一次迁移中出现的问题,因为要搬迁机房,集群中的节点服务器分布在两个机房,通过专线打通了,现在需要整体都迁移到其中一个机房,
所以pod要进行迁移,机器资源也比较紧张,在迁移中zookeeper迁移出现问题。

//master上查看zk pod情况

[root@master-web-38 ~]# kubectl get pod -o wide | grep zk
zk-0 1/1 Running 0 7m 172.17.60.200 ht5.node
zk-1 1/1 Running 0 5h 172.17.205.86 ht23.node
zk-2 1/1 Running 0 5d 172.17.157.39 ht2.node

 导出 zk yaml查看

apiVersion: apps/v1
kind: StatefulSet
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"apps/v1","kind":"StatefulSet","metadata":{"annotations":{},"creationTimestamp":"2018-09-06T10:24:03Z","generation":2,"name":"zk","namespace":"default","resourceVersion":"123118858","selfLink":"/apis/apps/v1/namespaces/default/statefulsets/zk","uid":"fa5e482b-b1be-11e8-a33b-060eb4000e9d"},"spec":{"podManagementPolicy":"Parallel","replicas":3,"revisionHistoryLimit":10,"selector":{"matchLabels":{"app":"zk"}},"serviceName":"zk-cluster-svc","template":{"metadata":{"creationTimestamp":null,"labels":{"app":"zk"}},"spec":{"affinity":{"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["zk"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"containers":[{"command":["sh","-c","start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/log --conf_dir=/opt/zookeeper/conf --client_port=2181 --election_port=3888 --server_port=2888 --tick_time=2000 --init_limit=10 --sync_limit=5 --heap=4G --max_client_cnxns=60 --snap_retain_count=3 --purge_interval=12 --max_session_timeout=40000 --min_session_timeout=4000 --log_level=INFO"],"image":"127.0.0.1:35000/k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10","imagePullPolicy":"IfNotPresent","livenessProbe":{"exec":{"command":["sh","-c","zookeeper-ready 2181"]},"failureThreshold":3,"initialDelaySeconds":10,"periodSeconds":10,"successThreshold":1,"timeoutSeconds":5},"name":"kubernetes-zookeeper","ports":[{"containerPort":2181,"name":"client","protocol":"TCP"},{"containerPort":2888,"name":"server","protocol":"TCP"},{"containerPort":3888,"name":"leader-election","protocol":"TCP"}],"readinessProbe":{"exec":{"command":["sh","-c","zookeeper-ready 2181"]},"failureThreshold":3,"initialDelaySeconds":10,"periodSeconds":10,"successThreshold":1,"timeoutSeconds":5},"resources":{"requests":{"cpu":"2","memory":"6Gi"}},"terminationMessagePath":"/dev/termination-log","terminationMessagePolicy":"File","volumeMounts":[{"mountPath":"/var/lib/zookeeper","name":"datadir"}]}],"dnsPolicy":"ClusterFirst","nodeSelector":{"zk":"ht"},"restartPolicy":"Always","schedulerName":"default-scheduler","securityContext":{"fsGroup":1000,"runAsUser":1000},"terminationGracePeriodSeconds":30,"volumes":[{"hostPath":{"path":"/zookeeper","type":""},"name":"datadir"}]}},"updateStrategy":{"type":"RollingUpdate"}},"status":{"collisionCount":0,"currentReplicas":2,"currentRevision":"zk-59fd64cc84","observedGeneration":2,"readyReplicas":2,"replicas":3,"updateRevision":"zk-cc7b55c88","updatedReplicas":1}}
creationTimestamp: null
generation: 1
name: zk
selfLink: /apis/apps/v1/namespaces/default/statefulsets/zk
spec:
podManagementPolicy: Parallel
replicas: 3
revisionHistoryLimit: 10
selector:
matchLabels:
app: zk
serviceName: zk-cluster-svc
template:
metadata:
creationTimestamp: null
labels:
app: zk
spec:
affinity:
podAntiAffinity: //亲和性防止pod创建在同一个node上,需要不同node节点上创建zookeeper,所以会导致如果你只开了1个node节点,则可能会失败。
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- zk
topologyKey: kubernetes.io/hostname
containers:
- command:
- sh
- -c
- start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/log
--conf_dir=/opt/zookeeper/conf --client_port=2181 --election_port=3888 --server_port=2888
--tick_time=2000 --init_limit=10 --sync_limit=5 --heap=4G --max_client_cnxns=60
--snap_retain_count=3 --purge_interval=12 --max_session_timeout=40000 --min_session_timeout=4000
--log_level=INFO
image: 127.0.0.1:35000/k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
command:
- sh
- -c
- zookeeper-ready 2181
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: kubernetes-zookeeper
ports:
- containerPort: 2181
name: client
protocol: TCP
- containerPort: 2888
name: server
protocol: TCP
- containerPort: 3888
name: leader-election
protocol: TCP
readinessProbe:
exec:
command:
- sh
- -c
- zookeeper-ready 2181
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources:
requests:
cpu: "2"
memory: 6Gi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/lib/zookeeper
name: datadir
dnsPolicy: ClusterFirst
nodeSelector:
zk: ht
        //这里注意你需要设置label 例如:  kubectl label nodes ht23.node app=zk  
        //或如果存在该标签zk 则可以覆盖 kubectl label nodes ht23.node app=zk --overwrite
      restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 1000
runAsUser: 1000
terminationGracePeriodSeconds: 30
volumes:
- hostPath:
path: /zookeeper
type: ""
name: datadir
updateStrategy:
type: RollingUpdate
status:
replicas: 0

重建的时候出现问题。出现两个问题
1、删除pod zk-1时,报zookeeper目录问题。
2、删除pod zk-0时,由于亲和性的设置,所以需要在不同node节点上创建,否则会报错。

kubectl describe pod zk-1 得到下面的结果

Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal SuccessfulMountVolume 46m kubelet, ht23.node MountVolume.SetUp succeeded for volume "datadir"
Normal SuccessfulMountVolume 46m kubelet, ht23.node MountVolume.SetUp succeeded for volume "default-token-lgcgp"
Normal Created 46m (x4 over 46m) kubelet, ht23.node Created container
Normal Started 46m (x4 over 46m) kubelet, ht23.node Started container
Warning BackOff 45m (x10 over 46m) kubelet, ht23.node Back-off restarting failed container
Normal Pulled 45m (x5 over 46m) kubelet, ht23.node Container image "127.0.0.1:35000/k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10" already present on machine
Normal Scheduled 1m default-scheduler Successfully assigned zk-1 to ht23.node

//创建pod zk-1的时候,出错,所以查看日志

【root@master-web-38 yaml]# kubectl log zk-1
log is DEPRECATED and will be removed in a future version. Use logs instead.
#This file was autogenerated DO NOT EDIT
clientPort=2181
dataDir=/var/lib/zookeeper/data
dataLogDir=/var/lib/zookeeper/data/log
tickTime=2000
initLimit=10
syncLimit=5
maxClientCnxns=60
minSessionTimeout=4000
maxSessionTimeout=40000
autopurge.snapRetainCount=3
autopurge.purgeInteval=12
server.1=zk-0.zk-cluster-svc.default.svc.cluster.local.:2888:3888
server.2=zk-1.zk-cluster-svc.default.svc.cluster.local.:2888:3888
server.3=zk-2.zk-cluster-svc.default.svc.cluster.local.:2888:3888
Creating ZooKeeper log4j configuration
mkdir: cannot create directory '/var/lib/zookeeper/data': Permission denied
chown: cannot access '/var/lib/zookeeper/data': No such file or directory
mkdir: cannot create directory '/var/lib/zookeeper/data': Permission denied
chown: invalid group: 'zookeeper:USER'
/usr/bin/start-zookeeper: line 176: /var/lib/zookeeper/data/myid: No such file or directory

解决办法,到zk-1所在的node节点机器上创建 /zookeeper目录,然后修改权限。

解决方法:
[root@ht23 /]# ll
total 32
lrwxrwxrwx 1 root root 7 Sep 23 10:24 bin -> usr/bin
dr-xr-xr-x. 5 root root 4096 Sep 23 10:27 boot
drwxr-xr-x 3 root root 23 Feb 21 16:06 data
drwxr-xr-x 19 root root 3100 Feb 19 11:32 dev
drwxr-xr-x. 103 root root 8192 Feb 21 15:44 etc
drwxr-xr-x. 3 root root 19 Sep 23 11:18 home
lrwxrwxrwx 1 root root 7 Sep 23 10:24 lib -> usr/lib
lrwxrwxrwx 1 root root 9 Sep 23 10:24 lib64 -> usr/lib64
drwxr-xr-x. 2 root root 6 Apr 11 2018 media
drwxr-xr-x. 3 root root 18 Apr 11 2018 mnt
drwxr-xr-x. 4 root root 25 Sep 30 15:06 opt
dr-xr-xr-x 558 root root 0 Feb 19 11:32 proc
dr-xr-x---. 7 root root 4096 Feb 21 10:11 root
drwxr-xr-x 36 root root 1120 Feb 21 14:03 run
lrwxrwxrwx 1 root root 8 Sep 23 10:24 sbin -> usr/sbin
drwxr-xr-x. 2 root root 6 Apr 11 2018 srv
dr-xr-xr-x 13 root root 0 Feb 21 16:33 sys
drwxrwxrwt. 8 root root 4096 Feb 21 16:47 tmp
drwxr-xr-x. 13 root root 4096 Sep 23 10:24 usr
drwxr-xr-x. 22 root root 4096 Sep 23 10:24 var
drwxr-xr-x 2 root root 6 Dec 17 00:07 zookeeper
//我这里就先设置为777权限
[root@ht23 /]# chmod 777 zookeeper/
[root@ht23 /]# chmod -R 777 zookeeper/

//再次删除zk-0 pod时出现这样错误,

Warning FailedScheduling 1m (x12 over 3m) default-scheduler 0/18 nodes are available: 1 node(s) didn't match pod affinity/anti-affinity, 1 node(s) didn't satisfy existing pods anti-affinity rules, 15 node(s) were unschedulable, 4 node(s) were not ready, 4 node(s) were out of disk space.
Warning FailedScheduling 14s (x7 over 4m) default-scheduler 0/18 nodes are available: 1 node(s) didn't match pod affinity/anti-affinity, 1 node(s) didn't satisfy existing pods anti-affinity rules, 15 node(s) were unschedulable, 4 node(s) were out of disk space, 5 node(s) were not ready.
后我加入了一台安装好的节点,把zk-0调度到该节点后,问题解决。

k8s pod 在迁移zookeeper时出现的问题的更多相关文章

  1. k8s pod节点调度及k8s资源优化

    一.k8s pod 在节点间调度控制 k8s起pod时,会通过调度器scheduler选择某个节点完成调度,选择在某个节点上完成pod创建.当需要在指定pod运行在某个节点上时,可以通过以下几种方式: ...

  2. Kubernetes pod平滑迁移

    pod平滑迁移 使用到的命令 (cordon, drain, uncordon)这三个命令是正式release的1.2新加入的命令,三个命令一起介绍,是因为三个命令配合使用可以实现节点的维护.在1.2 ...

  3. k8s pod的4种网络模式最佳实战(externalIPs )

    [k8s]k8s pod的4种网络模式最佳实战(externalIPs )       hostPort相当于docker run -p 8081:8080,不用创建svc,因此端口只在容器运行的vm ...

  4. 解决kettle在两个mysql之间迁移数据时乱码的问题 和 相关报错 及参数调整, 速度优化

    1. 乱码问题 编辑目标数据库的链接: 配置编码参数即可. 2. 报错 No operations allowed after statement closed. 需要调整wait_timeout:  ...

  5. kubectl cp 从k8s pod 中 拷贝 文件到本地

    请查看官方的说明 kubectl cp --help 官方说使用cp , pod里需要有tar命令 从k8s pod 中 拷贝 文件到本地 这是我使用的命令 kubectl exec redis-6c ...

  6. 启动Dubbo项目注册Zookeeper时提示zookeeper not connected异常原理解析

    文/朱季谦 遇到一个很诡异的问题,我在启动多个配置相同zookeeper的Dubbo项目时,其他项目都是正常启动,唯独有一个项目在启动过程中,Dubbo注册zookeeper协议时,竟然出现了这样的异 ...

  7. 使用prometheus抓取k8s的metrics作监控时,cAdvisor和kubelet配置有何差别?

    按网上说法: 目前cAdvisor集成到了kubelet组件内,可以在kubernetes集群中每个启动了kubelet的节点使用cAdvisor提供的metrics接口获取该节点所有容器相关的性能指 ...

  8. k8s Pod的自动水平伸缩(HPA)

    我们知道,当访问量或资源需求过高时,使用:kubectl scale命令可以实现对pod的快速伸缩功能 但是我们平时工作中我们并不能提前预知访问量有多少,资源需求多少. 这就很麻烦了,总不能为了需求总 ...

  9. 深入掌握K8S Pod

    k8s系列文章: 什么是K8S K8S configmap介绍 Pod是k8s中最小的调度单元,包含了一个"根容器"和其它用户业务容器. 如果你使用过k8s的话,当然会了解pod的 ...

随机推荐

  1. 《手把手教你》系列基础篇(七十七)-java+ selenium自动化测试-框架设计基础-TestNG依赖测试- 上篇(详解教程)

    1.简介 今天主要是讲解和分享:TestNG中一个类中有多个测试方法的时候,多个测试方法的执行顺序或者依赖关系的问题.如果不用dependsOnMethods,testNG会自动根据@Test方法名称 ...

  2. LGP4287题解

    小清新 manacher 题.题意清楚. 首先看到回文,自然而然地就去想 manacher 了.先想想,manacher 到底在干嘛? manacher 做的其实是一个暴力,枚举每一个位置最远能够伸到 ...

  3. Python列表生成

    # For More :http://www.codebelief.com/article/2017/02/python-advanced-programming-list-comprehension ...

  4. python学习之scipy实战1(积分用法)

    import numpy as np def main(): #1-- Integral积分 from scipy.integrate import quad, dblquad, nquad prin ...

  5. Oracle视图(view)传参数教程

    废话不多说,直接上例子! 创建包: create or replace package p_view_param is function set_param(num number) return nu ...

  6. JNDI With RMI

    JNDI With RMI JNDI with RMI JNDI即Java Naming and Directory Interface(JAVA命名和目录接口),jndi类似于一个索引中心,允许客户 ...

  7. LCT板子

    粘板子: #include<cstdio> #include<cstring> #include<algorithm> using namespace std; c ...

  8. Spring Authorization Server 0.2.3发布,放出联合身份DEMO

    很快啊Spring Authorization Server又发新版本了,现在的版本是0.2.3.本次都有什么改动呢?我们来了解一下. 0.2.3版本特性 本次更新的新特性不少. 为公开客户端提供默认 ...

  9. Linux C++ 实现一个简易版的ping (也就是imcp协议)

    背景: 想实现一个在没外网的时候就自动重启路由器的功能. 又不想用ping命令,因为在代码里调用system("ping"); 可能会比较耗时,得单开线程.于是找了个实现ICMP协 ...

  10. 建立META-INF/spring.factories文件的意义何在

    平常我们如何将Bean注入到容器当中 @Configuration @EnableConfigurationProperties(HelloProperties.class) public class ...