Friday, July 21, 2017

How to Setup High Availability Kubernetes

hakubernetes

Setup Kubernetes HA in

Environment

Kubernetes Version 1.6.

We have two servers. Master1: 172.16.155.158 Master2: 172.16.155.165 Etcd: 172.16.155.158

Here we only have one etcd, and we focus on discussion how to set up 2 Kubernetes Masters. And not considering etcd clustering.

There are two ways to connect to two api-servers. 1. How container connects to Kubernetes API Server -> kubernetes.default service 2. How Minion Node connect to Kubernetes API Server -> Loadbalancer to Host IP

We discuss how to setup kubernetes.default service to allow all conainer connect to API-server. and we then discuss how minion connects to api-server through a real loadbalancer to API-Server. And finally we discuss how it works while launching a container.

Master1 Setting

root@kuberm:~/kube1.6config/deploy/webscale# cat /lib/systemd/system/kube-apiserver.service
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target

[Service]
User=root
ExecStart=/opt/bin/kube-apiserver \
 --insecure-bind-address=0.0.0.0 \
 --insecure-port=8080 \
 --etcd-servers=http://172.16.155.158:2379\
 --logtostderr=false \
 --allow-privileged=false \
 --service-cluster-ip-range=172.18.0.0/16 \
 --admission-control=NamespaceLifecycle,ServiceAccount,LimitRanger,SecurityContextDeny,ResourceQuota \
 --service-node-port-range=30000-32767 \
 --advertise-address=172.16.155.158 \
 --v=6 \
 --storage-backend="etcd2" \
 --log-dir="/var/log/kubernetes" \
 --client-ca-file=/srv/kubernetes/ca.crt \
 --tls-private-key-file=/srv/kubernetes/server.key \
 --tls-cert-file=/srv/kubernetes/server.cert \
 --service_account_key_file=/srv/kubernetes/server.key \
 --secure-port=6443 \
 --apiserver-count=2

Restart=on-failure
Type=notify
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target

we mention that --advertise-address=172.16.155.158 \ is quite critical that the api-server is only allow this host ip. That's why kuberetes.default will route to hostip to connect to api-server.

where the kubernetes service is located in container ip address and route to hostip address to connect to api-server claimed in config file.

root@kuberm:~/kube1.6config/deploy/webscale# kubectl get endpoints kubernetes
NAME         ENDPOINTS                                 AGE
kubernetes   172.16.155.158:6443,172.16.155.165:6443   34d
root@kuberm:~/kube1.6config/deploy/webscale# cat /lib/systemd/system/kube-controller-manager.service
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes

[Service]
User=root
ExecStart=/opt/bin/kube-controller-manager \
  --master=172.16.155.158:8080 \
  --root-ca-file=/srv/kubernetes/ca.crt \
  --service-account-private-key-file=/srv/kubernetes/server.key \
  --logtostderr=false \
  --log-dir="/var/log/kubernetes" \
  --v=3 \
  --leader-elect=true
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
root@kuberm:~/kube1.6config/deploy/webscale# cat /lib/systemd/system/kube-scheduler.service
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes

[Service]
User=root
ExecStart=/opt/bin/kube-scheduler \
  --logtostderr=true \
  --master=172.16.155.158:8080 \
  --leader-elect=true
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
root@kuberm:~/kube1.6config/deploy/webscale# cat /lib/systemd/system/kube-proxy.service
[Unit]
Description=Kubernetes Proxy
After=network.target

[Service]
ExecStart=/opt/bin/kube-proxy  \
  --hostname-override=172.16.155.158 \
  --master=http://172.16.155.158:8080 \
  --logtostderr=true
Restart=on-failure

[Install]
WantedBy=multi-user.target
root@kuberm:~/kube1.6config/deploy/webscale# cat /lib/systemd/system/kubelet.service
[Unit]
Description=Kubernetes Kubelet
After=docker.service
Requires=docker.service

[Service]
ExecStart=/opt/bin/kubelet \
  --hostname-override=172.16.155.158 \
  --api-servers=http://172.16.155.158:8080 \
  --register-node=true \
  --logtostderr=false \
  --log-dir="/var/log/kubernetes" \
  --v=3 \
  --cluster_dns=172.18.0.5 \
  --cluster_domain=cluster.local
Restart=on-failure
KillMode=process

[Install]
WantedBy=multi-user.target

Master2 Setting

root@kuberm2:~# cat /lib/systemd/system/kube-apiserver.service
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target

[Service]
User=root
ExecStart=/opt/bin/kube-apiserver \
 --insecure-bind-address=0.0.0.0 \
 --insecure-port=8080 \
 --etcd-servers=http://172.16.155.158:2379\
 --logtostderr=false \
 --allow-privileged=true \
 --service-cluster-ip-range=172.18.0.0/16 \
 --admission-control=NamespaceLifecycle,ServiceAccount,LimitRanger,SecurityContextDeny,ResourceQuota \
 --service-node-port-range=30000-32767 \
 --advertise-address=172.16.155.165 \
 --v=6 \
 --storage-backend="etcd2" \
 --log-dir="/var/log/kubernetes" \
 --client-ca-file=/srv/kubernetes/ca.crt \
 --tls-private-key-file=/srv/kubernetes/server.key \
 --tls-cert-file=/srv/kubernetes/server.cert \
 --service_account_key_file=/srv/kubernetes/server.key \
 --secure-port=6443 \
 --apiserver-count=2

Restart=on-failure
Type=notify
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
root@kuberm2:~# cat /lib/systemd/system/kube-controller-manager.service
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes

[Service]
User=root
ExecStart=/opt/bin/kube-controller-manager \
  --master=172.16.155.165:8080 \
  --root-ca-file=/srv/kubernetes/ca.crt \
  --service-account-private-key-file=/srv/kubernetes/server.key \
  --logtostderr=false \
  --log-dir="/var/log/kubernetes" \
  --v=3 \
  --leader-elect=true

Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
root@kuberm2:~# cat /lib/systemd/system/kube-scheduler.service
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes

[Service]
User=root
ExecStart=/opt/bin/kube-scheduler \
  --logtostderr=true \
  --master=172.16.155.165:8080 \
  --leader-elect=true
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target

Check Result

That's check the container connected api-server, said kubernetes.default.
It shoud have two api-servers through kube-proxy loadbalancer.

root@kuberm:~/kube1.6config/deploy/webscale# kubectl get svc kubernetes
NAME         CLUSTER-IP   EXTERNAL-IP   PORT(S)   AGE
kubernetes   172.18.0.1   <none>        443/TCP   34d
root@kuberm:~/kube1.6config/deploy/webscale# kubectl get endpoints kubernetes
NAME         ENDPOINTS                                 AGE
kubernetes   172.16.155.158:6443,172.16.155.165:6443   34d
root@kuberm:~/kube1.6config/deploy/webscale# kubectl get svc kubernetes -o yaml
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: 2017-05-03T05:08:44Z
  labels:
    component: apiserver
    provider: kubernetes
  name: kubernetes
  namespace: default
  resourceVersion: "397592"
  selfLink: /api/v1/namespaces/default/services/kubernetes
  uid: 94c098f6-2fbe-11e7-9a3a-000c295cb5bb
spec:
  clusterIP: 172.18.0.1
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: 6443
  sessionAffinity: ClientIP
  type: ClusterIP
status:
  loadBalancer: {}
root@kuberm:~/kube1.6config/deploy/webscale# /opt/bin/etcdctl get /registry/services/endpoints/default/kubernetes
{"kind":"Endpoints","apiVersion":"v1","metadata":{"name":"kubernetes","namespace":"default","selfLink":"/api/v1/namespaces/default/endpoints/kubernetes","uid":"94c24aba-2fbe-11e7-9a3a-000c295cb5bb","creationTimestamp":"2017-05-03T05:08:44Z"},"subsets":[{"addresses":[{"ip":"172.16.155.158"},{"ip":"172.16.155.165"}],"ports":[{"name":"https","port":6443,"protocol":"TCP"}]}]}

Check multiple kube-scheduler and kube-controller

Using the config of --leader-elect=true for both kube-scheduler and kube-controller, you will see the process is still running but not both of them is working. You might see the log to understand that. Only one process is working, eventhough the process is existed.

Slave Of Kube-Scheduler

ube-scheduler 200 OK in 1 milliseconds
I0607 11:00:05.179156   17298 leaderelection.go:248] lock is held by kuberm and has not yet expired
I0607 11:00:05.179162   17298 leaderelection.go:185] failed to acquire lease kube-system/kube-scheduler
I0607 11:00:09.334061   17298 round_trippers.go:417] GET http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 2 milliseconds
I0607 11:00:09.334184   17298 leaderelection.go:248] lock is held by kuberm and has not yet expired
I0607 11:00:09.334191   17298 leaderelection.go:185] failed to acquire lease kube-system/kube-scheduler
I0607 11:00:12.110069   17298 round_trippers.go:417] GET http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 2 milliseconds
I0607 11:00:12.110194   17298 leaderelection.go:248] lock is held by kuberm and has not yet expired
I0607 11:00:12.110201   17298 leaderelection.go:185] failed to acquire lease kube-system/kube-scheduler

Master of Kube-Scheduler

I0607 11:00:53.679760   18257 round_trippers.go:417] GET http://172.16.155.158:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 1 milliseconds
I0607 11:00:53.684782   18257 round_trippers.go:417] PUT http://172.16.155.158:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 4 milliseconds
I0607 11:00:53.684914   18257 leaderelection.go:204] succesfully renewed lease kube-system/kube-scheduler
I0607 11:00:55.686845   18257 round_trippers.go:417] GET http://172.16.155.158:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 1 milliseconds
I0607 11:00:55.693945   18257 round_trippers.go:417] PUT http://172.16.155.158:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 6 milliseconds
I0607 11:00:55.694060   18257 leaderelection.go:204] succesfully renewed lease kube-system/kube-scheduler

Master of Kube-controller

607 11:03:56.630699   17442 nodecontroller.go:1044] node kubermnode2 hasn't been updated for 8m30.112359775s. Last OutOfDisk is: &NodeCondition{Type:OutOfDisk,Status:Unknown,LastHeartbeatTime:2017-05-12 18:15:25 +0800 CST,LastTransitionTime:2017-06-06 14:35:21 +0800 CST,Reason:NodeStatusUnknown,Message:Kubelet stopped posting node status.,}
I0607 11:03:56.630725   17442 nodecontroller.go:1044] node kubermnode2 hasn't been updated for 8m30.112386319s. Last MemoryPressure is: &NodeCondition{Type:MemoryPressure,Status:Unknown,LastHeartbeatTime:2017-05-12 18:15:25 +0800 CST,LastTransitionTime:2017-06-06 14:35:21 +0800 CST,Reason:NodeStatusUnknown,Message:Kubelet stopped posting node status.,}
I0607 11:03:56.630739   17442 nodecontroller.go:1044] node kubermnode2 hasn't been updated for 8m30.112399821s. Last DiskPressure is: &NodeCondition{Type:DiskPressure,Status:Unknown,LastHeartbeatTime:2017-05-12 18:15:25 +0800 CST,LastTransitionTime:2017-06-06 14:35:21 +0800 CST,Reason:NodeStatusUnknown,Message:Kubelet stopped posting node status.,}

Slave of kube-controller

ube-controller-manager 200 OK in 1 milliseconds
I0607 11:04:32.485502   17291 leaderelection.go:248] lock is held by kuberm and has not yet expired
I0607 11:04:32.485506   17291 leaderelection.go:185] failed to acquire lease kube-system/kube-controller-manager
I0607 11:04:36.263032   17291 round_trippers.go:417] GET http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-controller-manager 200 OK in 1 milliseconds
I0607 11:04:36.263122   17291 leaderelection.go:248] lock is held by kuberm and has not yet expired
I0607 11:04:36.263125   17291 leaderelection.go:185] failed to acquire lease kube-system/kube-controller-manager

Failed Over on Kube-Scheduler

Stop the kube-schduler in Master, in slave you will see the failed over after 10 sec. You might try kube-controller, their failed over is individual.

I0607 11:27:38.747508    1487 leaderelection.go:248] lock is held by kuberm and has not yet expired
I0607 11:27:38.747513    1487 leaderelection.go:185] failed to acquire lease kube-system/kube-scheduler
I0607 11:27:41.168987    1487 round_trippers.go:417] GET http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 3 milliseconds
I0607 11:27:41.176275    1487 round_trippers.go:417] PUT http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 4 milliseconds
I0607 11:27:41.176565    1487 leaderelection.go:189] successfully acquired lease kube-system/kube-scheduler
I0607 11:27:41.179086    1487 event.go:217] Event(v1.ObjectReference{Kind:"Endpoints", Namespace:"kube-system", Name:"kube-scheduler", UID:"758d2812-4b2d-11e7-9e10-000c295cb5bb", APIVersion:"v1", ResourceVersion:"558042", FieldPath:""}): type: 'Normal' reason: 'LeaderElection' kuberm2 became leader
I0607 11:27:41.181071    1487 round_trippers.go:417] POST http://172.16.155.165:8080/api/v1/namespaces/kube-system/events 201 Created in 2 milliseconds
I0607 11:27:43.178612    1487 round_trippers.go:417] GET http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 1 milliseconds
I0607 11:27:43.182060    1487 round_trippers.go:417] PUT http://172.16.155.165:8080/api/v1/namespaces/kube-system/endpoints/kube-scheduler 200 OK in 3 milliseconds
I0607 11:27:43.182111    1487 leaderelection.go:204] succesfully renewed lease kube-system/kube-scheduler

No comments:

Post a Comment