一、DNS 监控

```yaml #添加如下配置 [root@master01 7]# vim prometheus-config.yaml ########## DNS 监控配置 ########## - job_name: "kubernetes-dns" metrics_path: /probe # 不是metrics,是probe params: module: [dns_tcp] # 使用DNS TCP模块 static_configs: - targets: - kube-dns.kube-system:53 #不要省略端口号 - 8.8.4.4:53 - 8.8.8.8:53 - 223.5.5.5:53 relabel_configs: - source_labels: [address] target_label: param_target - source_labels: [__param_target] target_label: instance - target_label: __address replacement: blackbox-exporter.monitor:9115 # 服务地址,和上面的 Service 定义保持一致

#完整配置文件 [root@master01 7]# vim prometheus-config.yaml apiVersion: v1 kind: ConfigMap metadata: name: prometheus-config namespace: monitor data: prometheus.yml: | global: scrape_interval: 15s evaluation_interval: 15s external_labels: cluster: "kubernetes"

 ############ 数据采集job ###################

 scrape_configs:
 ########## prometheus 监控配置 ##########
 - job_name: prometheus
   static_configs:
   - targets: ['127.0.0.1:9090']
     labels:
       instance: prometheus

 ########## kube-apiserver 监控配置 ##########
 - job_name: kube-apiserver
   kubernetes_sd_configs:
   - role: endpoints
   scheme: https
   tls_config:
     ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
   bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
   relabel_configs:
   - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
     action: keep
     regex: default;kubernetes
   - source_labels: [__meta_kubernetes_endpoints_name]
     action: replace
     target_label: endpoint
   - source_labels: [__meta_kubernetes_pod_name]
     action: replace
     target_label: pod
   - source_labels: [__meta_kubernetes_service_name]
     action: replace
     target_label: service
   - source_labels: [__meta_kubernetes_namespace]
     action: replace
     target_label: namespace

 ########## kube-controller-manager 监控配置 ##########
 - job_name: 'kube-controller-manager'
   # 使用 Kubernetes Pod 发现机制
   kubernetes_sd_configs:
     - role: pod
   # 强制使用 HTTPS 协议
   scheme: https
   # TLS 配置测试环境跳过验证
   tls_config:
     insecure_skip_verify: true
   # 使用 ServiceAccount  Token 认证
   bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
   relabel_configs:
     # 保留标签为 component=kube-controller-manager  Pod
     - source_labels: [__meta_kubernetes_pod_label_component]
       regex: kube-controller-manager
       action: keep
     # 重写目标地址为 Pod IP + 10257 端口
     - source_labels: [__meta_kubernetes_pod_ip]
       regex: (.+)
       target_label: __address__
       replacement: "${1}:10257"
     # 强制使用 HTTPS 协议冗余但明确
     - source_labels: []
       regex: .*
       target_label: __scheme__
       replacement: https
     # 附加元数据标签
     - source_labels: [__meta_kubernetes_endpoints_name]
       action: replace
       target_label: endpoint
     - source_labels: [__meta_kubernetes_pod_name]
       action: replace
       target_label: pod
     - source_labels: [__meta_kubernetes_service_name]
       action: replace
       target_label: service
     - source_labels: [__meta_kubernetes_namespace]
       action: replace
       target_label: namespace

 ########## kube-scheduler 监控配置 ##########
 - job_name: 'kube-scheduler'
   kubernetes_sd_configs:
     - role: pod
   scheme: https
   tls_config:
     insecure_skip_verify: true
   bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
   relabel_configs:
     - source_labels: [__meta_kubernetes_pod_label_component]
       regex: kube-scheduler
       action: keep
     - source_labels: [__meta_kubernetes_pod_ip]
       regex: (.+)
       target_label: __address__
       replacement: "${1}:10259"
     - source_labels: []
       regex: .*
       target_label: __scheme__
       replacement: https
     - source_labels: [__meta_kubernetes_endpoints_name]
       action: replace
       target_label: endpoint
     - source_labels: [__meta_kubernetes_pod_name]
       action: replace
       target_label: pod
     - source_labels: [__meta_kubernetes_service_name]
       action: replace
       target_label: service
     - source_labels: [__meta_kubernetes_namespace]
       action: replace
       target_label: namespace

 ########## kube-state-metrics 监控配置 ##########
 - job_name: kube-state-metrics
   kubernetes_sd_configs:
   - role: endpoints
   relabel_configs:
   - source_labels: [__meta_kubernetes_service_name]
     regex: kube-state-metrics
     action: keep
   - source_labels: [__meta_kubernetes_pod_ip]
     regex: (.+)
     target_label: __address__
     replacement: ${1}:8080
   - source_labels: [__meta_kubernetes_endpoints_name]
     action: replace
     target_label: endpoint
   - source_labels: [__meta_kubernetes_pod_name]
     action: replace
     target_label: pod
   - source_labels: [__meta_kubernetes_service_name]
     action: replace
     target_label: service
   - source_labels: [__meta_kubernetes_namespace]
     action: replace
     target_label: namespace

 ########## coredns 监控配置 ##########
 - job_name: coredns
   kubernetes_sd_configs:
   - role: endpoints
   relabel_configs:
   - source_labels:
       - __meta_kubernetes_service_label_k8s_app
     regex: kube-dns
     action: keep
   - source_labels: [__meta_kubernetes_pod_ip]
     regex: (.+)
     target_label: __address__
     replacement: ${1}:9153
   - source_labels: [__meta_kubernetes_endpoints_name]
     action: replace
     target_label: endpoint
   - source_labels: [__meta_kubernetes_pod_name]
     action: replace
     target_label: pod
   - source_labels: [__meta_kubernetes_service_name]
     action: replace
     target_label: service
   - source_labels: [__meta_kubernetes_namespace]
     action: replace
     target_label: namespace

 ########## etcd 监控配置 ##########
 - job_name: etcd
   kubernetes_sd_configs:
   - role: pod
   relabel_configs:
   - source_labels:
       - __meta_kubernetes_pod_label_component
     regex: etcd
     action: keep
   - source_labels: [__meta_kubernetes_pod_ip]
     regex: (.+)
     target_label: __address__
     replacement: ${1}:2381
   - source_labels: [__meta_kubernetes_endpoints_name]
     action: replace
     target_label: endpoint
   - source_labels: [__meta_kubernetes_pod_name]
     action: replace
     target_label: pod
   - source_labels: [__meta_kubernetes_namespace]
     action: replace
     target_label: namespace

 ########## kubelet 监控配置 ##########
 - job_name: kubelet
   metrics_path: /metrics/cadvisor
   scheme: https
   tls_config:
     insecure_skip_verify: true
   bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
   kubernetes_sd_configs:
   - role: node
   relabel_configs:
   - action: labelmap
     regex: __meta_kubernetes_node_label_(.+)
   - source_labels: [__meta_kubernetes_endpoints_name]
     action: replace
     target_label: endpoint
   - source_labels: [__meta_kubernetes_pod_name]
     action: replace
     target_label: pod
   - source_labels: [__meta_kubernetes_namespace]
     action: replace
     target_label: namespace

 ########## k8s-node 监控配置 ##########
 - job_name: k8s-nodes
   kubernetes_sd_configs:
   - role: node
   relabel_configs:
   - source_labels: [__address__]
     regex: '(.*):10250'
     replacement: '${1}:9100'
     target_label: __address__
     action: replace
   - action: labelmap
     regex: __meta_kubernetes_node_label_(.+)
   - source_labels: [__meta_kubernetes_endpoints_name]
     action: replace
     target_label: endpoint
   - source_labels: [__meta_kubernetes_pod_name]
     action: replace
     target_label: pod
   - source_labels: [__meta_kubernetes_namespace]
     action: replace
     target_label: namespace

 ########## DNS 监控配置 ##########
 - job_name: "kubernetes-dns"
   metrics_path: /probe              # 不是metrics是probe
   params:
     module: [dns_tcp]               # 使用DNS TCP模块
   static_configs:
     - targets:
       - kube-dns.kube-system:53             #不要省略端口号
       - 8.8.4.4:53
       - 8.8.8.8:53
       - 223.5.5.5:53
   relabel_configs:
     - source_labels: [__address__]
       target_label: __param_target
     - source_labels: [__param_target]
       target_label: instance
     - target_label: __address__
       replacement: blackbox-exporter.monitor:9115 # 服务地址和上面的 Service 定义保持一致

 ############ 指定告警规则文件路径位置 ###################
 rule_files:
 - /etc/prometheus/rules/*.rules

#应用 [root@master01 7]# kaf prometheus-config.yaml ```

参数解释:

################ DNS 服务器监控 ###################
- job_name: "kubernetes-dns"
  metrics_path: /probe
  params:
    ## 配置要使用的模块,要与blackbox exporter配置中的一致
    ## 这里使用DNS模块
    module: [dns_tcp]
  static_configs:
    ## 配置要检测的地址
    - targets:
      - kube-dns.kube-system:53
      - 8.8.4.4:53
      - 8.8.8.8:53
      - 223.5.5.5
  relabel_configs:
    ## 将上面配置的静态DNS服务器地址转换为临时变量 “__param_target”
    - source_labels: [__address__]
      target_label: __param_target
    ## 将 “__param_target” 内容设置为 instance 实例名称
    - source_labels: [__param_target]
      target_label: instance
    ## BlackBox Exporter 的 Service 地址
    - target_label: __address__
      replacement: blackbox-exporter.monitor:9115

更新 prometheus-config.yaml配置 :

curl -XPOST http://prometheus.zhang-qing.com/-/reload

打开 Prometheus 的 Target 页面,就会看到 上面定义的 blackbox-k8s-service-dns 任务;

image-20250411193136865

graph 页面,可以使用 probe_successprobe_duration_seconds 等来检查历史结果。

image-20250411193320587