主要介绍kubeadm安装方式和二进制方式如何使用临时容器在线debug,这里需要注意版本1.25+不需要开启临时容器功能:

一、Kubeadm安装方式使用临时容器在线debug

1.所有master节点编辑/etc/kubernetes/manifests/kube-apiserver.yaml,添加- --feature-gates=EphemeralContainers=true

$ vim /etc/kubernetes/manifests/kube-apiserver.yaml

apiVersion: v1
kind: Pod
metadata:
  annotations:
    kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 192.168.100.32:6443
  creationTimestamp: null
  labels:
    component: kube-apiserver
    tier: control-plane
  name: kube-apiserver
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-apiserver
    - --advertise-address=192.168.100.32
    - --allow-privileged=true
    - --authorization-mode=Node,RBAC
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --enable-admission-plugins=NodeRestriction
    - --enable-bootstrap-token-auth=true
    - --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
    - --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
    - --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
    - --etcd-servers=https://127.0.0.1:2379
    - --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
    - --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
    - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
    - --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt
    - --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key
    - --requestheader-allowed-names=front-proxy-client
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
    - --requestheader-extra-headers-prefix=X-Remote-Extra-
    - --requestheader-group-headers=X-Remote-Group
    - --requestheader-username-headers=X-Remote-User
    - --secure-port=6443
    - --service-account-issuer=https://kubernetes.default.svc.cluster.local
    - --service-account-key-file=/etc/kubernetes/pki/sa.pub
    - --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
    - --service-cluster-ip-range=10.0.0.0/16
    - --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
    - --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
    - --feature-gates=EphemeralContainers=true  #添加此项即可,其他不动
    image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.23.14
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 192.168.100.32
        path: /livez
        port: 6443
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    name: kube-apiserver
    readinessProbe:
      failureThreshold: 3
      httpGet:
        host: 192.168.100.32
        path: /readyz
        port: 6443
        scheme: HTTPS
      periodSeconds: 1
      timeoutSeconds: 15
    resources:
      requests:
        cpu: 250m
    startupProbe:
      failureThreshold: 24
      httpGet:
        host: 192.168.100.32
        path: /livez
        port: 6443
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    volumeMounts:
    - mountPath: /etc/ssl/certs
      name: ca-certs
      readOnly: true
    - mountPath: /etc/pki
      name: etc-pki
      readOnly: true
    - mountPath: /etc/kubernetes/pki
      name: k8s-certs
      readOnly: true
  hostNetwork: true
  priorityClassName: system-node-critical
  securityContext:
    seccompProfile:
      type: RuntimeDefault
  volumes:
  - hostPath:
      path: /etc/ssl/certs
      type: DirectoryOrCreate
    name: ca-certs
  - hostPath:
      path: /etc/pki
      type: DirectoryOrCreate
    name: etc-pki
  - hostPath:
      path: /etc/kubernetes/pki
      type: DirectoryOrCreate
    name: k8s-certs
status: {}

2.所有master节点编辑/etc/kubernetes/manifests/kube-controller-manager.yaml,添加- --feature-gates=EphemeralContainers=true

$ vim /etc/kubernetes/manifests/kube-controller-manager.yaml

apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-controller-manager
    tier: control-plane
  name: kube-controller-manager
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-controller-manager
    - --allocate-node-cidrs=true
    - --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --bind-address=127.0.0.1
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --cluster-cidr=172.16.0.0/12
    - --cluster-name=kubernetes
    - --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
    - --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
    - --controllers=*,bootstrapsigner,tokencleaner
    - --kubeconfig=/etc/kubernetes/controller-manager.conf
    - --leader-elect=true
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
    - --root-ca-file=/etc/kubernetes/pki/ca.crt
    - --service-account-private-key-file=/etc/kubernetes/pki/sa.key
    - --service-cluster-ip-range=10.0.0.0/16
    - --use-service-account-credentials=true
    - --feature-gates=EphemeralContainers=true #添加此项即可,其他不动
    image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.23.14
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10257
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    name: kube-controller-manager
    resources:
      requests:
        cpu: 200m
    startupProbe:
      failureThreshold: 24
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10257
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    volumeMounts:
    - mountPath: /etc/ssl/certs
      name: ca-certs
      readOnly: true
    - mountPath: /etc/pki
      name: etc-pki
      readOnly: true
    - mountPath: /usr/libexec/kubernetes/kubelet-plugins/volume/exec
      name: flexvolume-dir
    - mountPath: /etc/kubernetes/pki
      name: k8s-certs
      readOnly: true
    - mountPath: /etc/kubernetes/controller-manager.conf
      name: kubeconfig
      readOnly: true
  hostNetwork: true
  priorityClassName: system-node-critical
  securityContext:
    seccompProfile:
      type: RuntimeDefault
  volumes:
  - hostPath:
      path: /etc/ssl/certs
      type: DirectoryOrCreate
    name: ca-certs
  - hostPath:
      path: /etc/pki
      type: DirectoryOrCreate
    name: etc-pki
  - hostPath:
      path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec
      type: DirectoryOrCreate
    name: flexvolume-dir
  - hostPath:
      path: /etc/kubernetes/pki
      type: DirectoryOrCreate
    name: k8s-certs
  - hostPath:
      path: /etc/kubernetes/controller-manager.conf
      type: FileOrCreate
    name: kubeconfig
status: {}

3.所有master节点编辑/etc/kubernetes/manifests/kube-scheduler.yaml ,添加- --feature-gates=EphemeralContainers=true

$ vim /etc/kubernetes/manifests/kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-scheduler
    - --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
    - --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
    - --bind-address=127.0.0.1
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
    - --feature-gates=EphemeralContainers=true #添加此项即可,其他不动
    image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.23.14
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10259
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    name: kube-scheduler
    resources:
      requests:
        cpu: 100m
    startupProbe:
      failureThreshold: 24
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10259
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    volumeMounts:
    - mountPath: /etc/kubernetes/scheduler.conf
      name: kubeconfig
      readOnly: true
  hostNetwork: true
  priorityClassName: system-node-critical
  securityContext:
    seccompProfile:
      type: RuntimeDefault
  volumes:
  - hostPath:
      path: /etc/kubernetes/scheduler.conf
      type: FileOrCreate
    name: kubeconfig
status: {}

4.所有master节点和node节点编辑/var/lib/kubelet/kubeadm-flags.env文件,添加--feature-gates=EphemeralContainers=true

$ vim /var/lib/kubelet/kubeadm-flags.env

KUBELET_KUBEADM_ARGS="--container-runtime=remote --container-runtime-endpoint=/run/containerd/containerd.sock --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.6 --feature-gates=EphemeralContainers=true"

5.所有master节点和node节点重新加载daemon文件并重启kubelet

$ systemctl daemon-reload
$ systemctl restart kubelet

6.完成后,重新查看node节点状态

[root@k8s-master01 ~]# kubectl get node
NAME           STATUS   ROLES                  AGE    VERSION
k8s-master01   Ready    control-plane,master   6d1h   v1.23.14
k8s-master02   Ready    control-plane,master   6d1h   v1.23.14
k8s-master03   Ready    control-plane,master   6d1h   v1.23.14
k8s-node01     Ready    <none>                 6d1h   v1.23.14
k8s-node02     Ready    <none>                 6d1h   v1.23.14

7.查找一个pod做测试用,这里选择metrics-server-5cf8885b66-99jwg作为测试Pod

[root@k8s-master01 ~]# kubectl get po -n kube-system
NAME                                       READY   STATUS    RESTARTS      AGE
...
...
...
metrics-server-5cf8885b66-99jwg            1/1     Running   1 (29h ago)   6d

8.使用临时容器

[root@k8s-master01 ~]# kubectl debug metrics-server-5cf8885b66-99jwg  -ti --image=registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools -n kube-system

(07:19 metrics-server-5cf8885b66-99jwg:/) netstat -lntp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name
tcp6       0      0 :::4443                 :::*                    LISTEN      -

9.如果终端卡死可以另起一个窗口使用kubectl exec进入到临时容器

[root@k8s-master01 ~]# kubectl describe po metrics-server-5cf8885b66-99jwg -n kube-system
tbalo/debug-tools"
  Warning  Failed   2m32s                  kubelet  Error: ImagePullBackOff
  Normal   Pulling  2m17s (x3 over 3m28s)  kubelet  Pulling image "registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools"
  Normal   Pulled   17s                    kubelet  Successfully pulled image "registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools" in 2m0.031448466s
  Normal   Created  17s                    kubelet  Created container debugger-z8sj5
  Normal   Started  17s                    kubelet  Started container debugger-z8sj5

[root@k8s-master01 ~]# kubectl exec metrics-server-5cf8885b66-99jwg  -ti -c debugger-z8sj5 -n kube-system -- bash

二、二进制安装方式使用临时容器在线debug

1.所有master节点编辑/usr/lib/systemd/system/kube-apiserver.service文件,添加--feature-gates=EphemeralContainers=true

$ vim /usr/lib/systemd/system/kube-apiserver.service

2.所有master节点编辑/usr/lib/systemd/system/kube-controller-manager.service文件,添加--feature-gates=EphemeralContainers=true

$ vim /usr/lib/systemd/system/kube-controller-manager.service

3.所有master节点编辑/usr/lib/systemd/system/kube-scheduler.service文件,添加--feature-gates=EphemeralContainers=true

$ vim /usr/lib/systemd/system/kube-scheduler.service

4.所有master节点和node节点编辑/usr/lib/systemd/system/kube-apiserver.service文件,添加--feature-gates=EphemeralContainers=true

$ vim /usr/lib/systemd/system/kube-proxy.service

5.所有master节点和node节点编辑/etc/kubernetes/kubelet-conf.yml文件,添加以下参数

$ vim /etc/kubernetes/kubelet-conf.yml
...
...
...
featureGates:
  EphemeralContainers: true

6.所有master节点和node节点重启所有服务

$ systemctl daemon-reload
$ systemctl restart kubelet kube-apiserver kube-controller-manager kube-scheduler kube-proxy

7.在master01节点上查找一个pod做测试用,这里选择metrics-server-5cf8885b66-99jwg作为测试Pod

[root@k8s-master01 ~]# kubectl get po -n kube-system
NAME                                       READY   STATUS    RESTARTS      AGE
...
...
...
metrics-server-5cf8885b66-99jwg            1/1     Running   1 (29h ago)   6d

8.在master01节点上使用临时容器

[root@k8s-master01 ~]# kubectl debug metrics-server-5cf8885b66-99jwg  -ti --image=registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools -n kube-system

(07:19 metrics-server-5cf8885b66-99jwg:/) netstat -lntp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name
tcp6       0      0 :::4443                 :::*                    LISTEN      -

9.在master01节点上如果终端卡死可以另起一个窗口使用kubectl exec进入到临时容器

[root@k8s-master01 ~]# kubectl describe po metrics-server-5cf8885b66-99jwg -n kube-system
tbalo/debug-tools"
  Warning  Failed   2m32s                  kubelet  Error: ImagePullBackOff
  Normal   Pulling  2m17s (x3 over 3m28s)  kubelet  Pulling image "registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools"
  Normal   Pulled   17s                    kubelet  Successfully pulled image "registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools" in 2m0.031448466s
  Normal   Created  17s                    kubelet  Created container debugger-z8sj5
  Normal   Started  17s                    kubelet  Started container debugger-z8sj5

[root@k8s-master01 ~]# kubectl exec metrics-server-5cf8885b66-99jwg  -ti -c debugger-z8sj5 -n kube-system -- bash