一、scheduler¶
[root@tiaoban prometheus]# kubectl describe pod -n kube-system kube-scheduler-master01
Name: kube-scheduler-master01
Namespace: kube-system
...
Labels: component=kube-scheduler
tier=control-plane
...
由上可知,匹配pod对象,lable标签为component=kube-scheduler即可scheduler和controller-manager一样,默认监听0端口,需要注释
所有Master节点修改 /etc/kubernetes/manifests/kube-scheduler.yaml
# cat /etc/kubernetes/manifests/kube-scheduler.yaml
...
- command:
- --bind-address=0.0.0.0 # 端口改为0.0.0.0
#- --port=0 # 注释0端口
...
# 查看暴露的端口
[root@master01 7]# netstat -lntup | grep kube-schedul
tcp6 0 0 :::10259 :::* LISTEN 115708/kube-schedul
- 编写prometheus配置文件,需要注意的是,他默认匹配到的是80端口,需要手动指定为10259端口,同时指定token,否则会提示
server returned HTTP status 400 Bad Request;
########## kube-scheduler 监控配置 ##########
- job_name: 'kube-scheduler'
kubernetes_sd_configs:
- role: pod
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-scheduler
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10259"
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
# 完整配置文件
[root@master01 7]# cat prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitor
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: "kubernetes"
############ 数据采集job ###################
scrape_configs:
########## prometheus 监控配置 ##########
- job_name: prometheus
static_configs:
- targets: ['127.0.0.1:9090']
labels:
instance: prometheus
########## kube-apiserver 监控配置 ##########
- job_name: kube-apiserver
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
action: keep
regex: default;kubernetes
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-controller-manager 监控配置 ##########
- job_name: 'kube-controller-manager'
# 使用 Kubernetes Pod 发现机制
kubernetes_sd_configs:
- role: pod
# 强制使用 HTTPS 协议
scheme: https
# TLS 配置(测试环境跳过验证)
tls_config:
insecure_skip_verify: true
# 使用 ServiceAccount 的 Token 认证
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
# 保留标签为 component=kube-controller-manager 的 Pod
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-controller-manager
action: keep
# 重写目标地址为 Pod IP + 10257 端口
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10257"
# 强制使用 HTTPS 协议(冗余但明确)
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
# 附加元数据标签
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-scheduler 监控配置 ##########
- job_name: 'kube-scheduler'
kubernetes_sd_configs:
- role: pod
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-scheduler
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10259"
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
############ 指定告警规则文件路径位置 ###################
rule_files:
- /etc/prometheus/rules/*.rules
# 配置生效
[root@master01 7]# kaf prometheus-config.yaml
# 手动热加载
[root@master01 7]# curl -XPOST http://prometheus.zhang-qing.com/-/reload
二、kube-state-metrics¶
- 编写prometheus配置文件,需要注意的是,他默认匹配到的是8080和801两个端口,需要手动指定为8080端口;
#添加如下配置
########## kube-state-metrics 监控配置 ##########
- job_name: kube-state-metrics
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: kube-state-metrics
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: ${1}:8080
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
#完整配置文件如下
[root@master01 7]# cat prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitor
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: "kubernetes"
############ 数据采集job ###################
scrape_configs:
########## prometheus 监控配置 ##########
- job_name: prometheus
static_configs:
- targets: ['127.0.0.1:9090']
labels:
instance: prometheus
########## kube-apiserver 监控配置 ##########
- job_name: kube-apiserver
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
action: keep
regex: default;kubernetes
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-controller-manager 监控配置 ##########
- job_name: 'kube-controller-manager'
# 使用 Kubernetes Pod 发现机制
kubernetes_sd_configs:
- role: pod
# 强制使用 HTTPS 协议
scheme: https
# TLS 配置(测试环境跳过验证)
tls_config:
insecure_skip_verify: true
# 使用 ServiceAccount 的 Token 认证
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
# 保留标签为 component=kube-controller-manager 的 Pod
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-controller-manager
action: keep
# 重写目标地址为 Pod IP + 10257 端口
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10257"
# 强制使用 HTTPS 协议(冗余但明确)
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
# 附加元数据标签
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-scheduler 监控配置 ##########
- job_name: 'kube-scheduler'
kubernetes_sd_configs:
- role: pod
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-scheduler
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10259"
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-state-metrics 监控配置 ##########
- job_name: kube-state-metrics
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: kube-state-metrics
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: ${1}:8080
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
############ 指定告警规则文件路径位置 ###################
rule_files:
- /etc/prometheus/rules/*.rules
# 配置生效
[root@master01 7]# kaf prometheus-config.yaml
# 手动热加载
[root@master01 7]# curl -XPOST http://prometheus.zhang-qing.com/-/reload
三、coredns¶
- 编写prometheus配置文件,需要注意的是,他默认匹配到的是53端口,需要手动指定为9153端口
########## coredns 监控配置 ##########
- job_name: coredns
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels:
- __meta_kubernetes_service_label_k8s_app
regex: kube-dns
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: ${1}:9153
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
#完整配置文件如下
[root@master01 7]# cat prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitor
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: "kubernetes"
############ 数据采集job ###################
scrape_configs:
########## prometheus 监控配置 ##########
- job_name: prometheus
static_configs:
- targets: ['127.0.0.1:9090']
labels:
instance: prometheus
########## kube-apiserver 监控配置 ##########
- job_name: kube-apiserver
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
action: keep
regex: default;kubernetes
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-controller-manager 监控配置 ##########
- job_name: 'kube-controller-manager'
# 使用 Kubernetes Pod 发现机制
kubernetes_sd_configs:
- role: pod
# 强制使用 HTTPS 协议
scheme: https
# TLS 配置(测试环境跳过验证)
tls_config:
insecure_skip_verify: true
# 使用 ServiceAccount 的 Token 认证
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
# 保留标签为 component=kube-controller-manager 的 Pod
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-controller-manager
action: keep
# 重写目标地址为 Pod IP + 10257 端口
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10257"
# 强制使用 HTTPS 协议(冗余但明确)
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
# 附加元数据标签
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-scheduler 监控配置 ##########
- job_name: 'kube-scheduler'
kubernetes_sd_configs:
- role: pod
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_component]
regex: kube-scheduler
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: "${1}:10259"
- source_labels: []
regex: .*
target_label: __scheme__
replacement: https
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## kube-state-metrics 监控配置 ##########
- job_name: kube-state-metrics
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: kube-state-metrics
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: ${1}:8080
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
########## coredns 监控配置 ##########
- job_name: coredns
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels:
- __meta_kubernetes_service_label_k8s_app
regex: kube-dns
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
regex: (.+)
target_label: __address__
replacement: ${1}:9153
- source_labels: [__meta_kubernetes_endpoints_name]
action: replace
target_label: endpoint
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
# 配置生效
[root@master01 7]# kaf prometheus-config.yaml
# 手动热加载
[root@master01 7]# curl -XPOST http://prometheus.zhang-qing.com/-/reload