说明:Alertmanager 在启动时会尝试加载默认的配置文件路径,如果没有指定其他位置的话。在 Linux 系统上,它通常会在 /etc/alertmanager/config/ 目录下查找默认的 alertmanager.yaml 配置文件。

针对上面163邮箱告警通知并不很容易阅读,此时可以通过Alertmanager的模板功能配置一些自定义模板。

自定义告警模板-1

下面通过添加自定义模板便于阅读:

1.修改alertmanager-secret.yaml添加自定义模板

[root@k8s-master01 ~]# cd /root/kube-prometheus/manifests
[root@k8s-master01 manifests]# vim alertmanager-secret.yaml

添加告警模板

  email.tmpl: |-
    {{ define "email.default.message" }}
    {{- if gt (len .Alerts.Firing) 0 -}}
    {{- range $index, $alert := .Alerts -}}
    {{- if eq $index 0 }}
    ==========异常告警==========
    告警类型: {{ $alert.Labels.alertname }}
    告警级别: {{ $alert.Labels.severity }}
    告警详情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};{{$alert.Annotations.summary}}
    故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    {{- if gt (len $alert.Labels.instance) 0 }}
    实例信息: {{ $alert.Labels.instance }}
    {{- end }}
    {{- if gt (len $alert.Labels.namespace) 0 }}
    命名空间: {{ $alert.Labels.namespace }}
    {{- end }}
    {{- if gt (len $alert.Labels.node) 0 }}
    节点信息: {{ $alert.Labels.node }}
    {{- end }}
    {{- if gt (len $alert.Labels.pod) 0 }}
    实例名称: {{ $alert.Labels.pod }}
    {{- end }}
    ============END============
    {{- end }}
    {{- end }}
    {{- end }}
    {{- if gt (len .Alerts.Resolved) 0 -}}
    {{- range $index, $alert := .Alerts -}}
    {{- if eq $index 0 }}
    ==========异常恢复==========
    告警类型: {{ $alert.Labels.alertname }}
    告警级别: {{ $alert.Labels.severity }}
    告警详情:
    {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};{{$alert.Annotations.summary}}
    故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    恢复时间: {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    {{- if gt (len $alert.Labels.instance) 0 }}
    实例信息: {{ $alert.Labels.instance }}
    {{- end }}
    {{- if gt (len $alert.Labels.namespace) 0 }}
    命名空间: {{ $alert.Labels.namespace }}
    {{- end }}
    {{- if gt (len $alert.Labels.node) 0 }}
    节点信息: {{ $alert.Labels.node }}
    {{- end }}
    {{- if gt (len $alert.Labels.pod) 0 }}
    实例名称: {{ $alert.Labels.pod }}
    {{- end }}
    ============END============
    {{- end }}
    {{- end }}
    {{- end }}
    {{- end }}

注意:wechat.tmpl和alertmanager.yaml需要对齐

在templates字段添加模板位置

    templates:
    - '/etc/alertmanager/config/*.tmpl'

配置wechat-ops receiver使用该模板

        html: '{{ template "email.default.message" . }}'

配置警报重复发送的时间间隔为5m

      "repeat_interval": "5m"

修改完成后的文件具体如下:

[root@k8s-master01 manifests]# vim alertmanager-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  labels:
    app.kubernetes.io/component: alert-router
    app.kubernetes.io/instance: main
    app.kubernetes.io/name: alertmanager
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 0.24.0
  name: alertmanager-main
  namespace: monitoring
stringData:
#添加告警模板
  email.tmpl: |-
    {{ define "email.default.message" }}
    {{- if gt (len .Alerts.Firing) 0 -}}
    {{- range $index, $alert := .Alerts -}}
    {{- if eq $index 0 }}
    ==========异常告警==========
    告警类型: {{ $alert.Labels.alertname }}
    告警级别: {{ $alert.Labels.severity }}
    告警详情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};{{$alert.Annotations.summary}}
    故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    {{- if gt (len $alert.Labels.instance) 0 }}
    实例信息: {{ $alert.Labels.instance }}
    {{- end }}
    {{- if gt (len $alert.Labels.namespace) 0 }}
    命名空间: {{ $alert.Labels.namespace }}
    {{- end }}
    {{- if gt (len $alert.Labels.node) 0 }}
    节点信息: {{ $alert.Labels.node }}
    {{- end }}
    {{- if gt (len $alert.Labels.pod) 0 }}
    实例名称: {{ $alert.Labels.pod }}
    {{- end }}
    ============END============
    {{- end }}
    {{- end }}
    {{- end }}
    {{- if gt (len .Alerts.Resolved) 0 -}}
    {{- range $index, $alert := .Alerts -}}
    {{- if eq $index 0 }}
    ==========异常恢复==========
    告警类型: {{ $alert.Labels.alertname }}
    告警级别: {{ $alert.Labels.severity }}
    告警详情:
    {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};{{$alert.Annotations.summary}}
    故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    恢复时间: {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    {{- if gt (len $alert.Labels.instance) 0 }}
    实例信息: {{ $alert.Labels.instance }}
    {{- end }}
    {{- if gt (len $alert.Labels.namespace) 0 }}
    命名空间: {{ $alert.Labels.namespace }}
    {{- end }}
    {{- if gt (len $alert.Labels.node) 0 }}
    节点信息: {{ $alert.Labels.node }}
    {{- end }}
    {{- if gt (len $alert.Labels.pod) 0 }}
    实例名称: {{ $alert.Labels.pod }}
    {{- end }}
    ============END============
    {{- end }}
    {{- end }}
    {{- end }}
    {{- end }}
#至此结束
  alertmanager.yaml: |-
    "global":
      "resolve_timeout": "5m"
      smtp_from: "your-alert@163.com"
      smtp_smarthost: "smtp.163.com:465"
      smtp_hello: "163.com"
      smtp_auth_username: "your-alert@163.com"
      smtp_auth_password: "YOUR_SMTP_AUTH_CODE"
      smtp_require_tls: false
#在templates字段添加模板位置
    templates:
    - '/etc/alertmanager/config/*.tmpl'
#至此结束
    "inhibit_rules":
    - "equal":
      - "namespace"
      - "alertname"
      "source_matchers":
      - "severity = critical"
      "target_matchers":
      - "severity =~ warning|info"
    - "equal":
      - "namespace"
      - "alertname"
      "source_matchers":
      - "severity = warning"
      "target_matchers":
      - "severity = info"
    - "equal":
      - "namespace"
      "source_matchers":
      - "alertname = InfoInhibitor"
      "target_matchers":
      - "severity = info"
    "receivers":
    - "name": "Default"
      email_configs:
      - to: "your-alert@163.com"
        send_resolved: true
#配置wechat-ops receiver使用该模板
        html: '{{ template "email.default.message" . }}'
#至此结束
    - "name": "Watchdog"
    - "name": "Critical"
    - "name": "null"
    "route":
      "group_by":
      - "namespace"
      - job
      - alertname
      "group_interval": "5m"
      "group_wait": "30s"
      "receiver": "Default"
      "repeat_interval": "5m"
      "routes":
      - "matchers":
        - "alertname = Watchdog"
        "receiver": "Watchdog"
      - "matchers":
        - "alertname = InfoInhibitor"
        "receiver": "null"
      - "matchers":
        - "severity = critical"
        "receiver": "Critical"
type: Opaque

2.将更改好的 Alertmanager 配置加载到 Alertmanager

[root@k8s-master01 manifests]# kubectl replace -f alertmanager-secret.yaml

3.查看alertmanager-secret.yaml 文件挂载到/etc/alertmanager/config/目录下

[root@k8s-master01 manifests]# kubectl exec -it alertmanager-main-0 -n monitoring -- ls /etc/alertmanager/config/

alertmanager.yaml.gz  email.tmpl

4.更新完成后,可以在 Secret 中查看该配置

[root@k8s-master01 manifests]# kubectl describe secret alertmanager-main -n monitoring
Name:         alertmanager-main
Namespace:    monitoring
Labels:       app.kubernetes.io/component=alert-router
              app.kubernetes.io/instance=main
              app.kubernetes.io/name=alertmanager
              app.kubernetes.io/part-of=kube-prometheus
              app.kubernetes.io/version=0.25.0
Annotations:  <none>

Type:  Opaque

Data
====
alertmanager.yaml:  1314 bytes
email.tmpl:         3702 bytes

5.打开163邮箱,查看告警内容,具体如下

自定义告警模板-2