Loading install.sh +9 −0 Original line number Diff line number Diff line Loading @@ -98,6 +98,8 @@ xlsFilePath="/tmp/xls" # 不启动设置为false,如果为false,以下配置不需要修改 hdfsStartupSate="false" #是否启动自启动脚本 monitorServerState="true" # namenode地址,支持HA,需要将core-site.xml和hdfs-site.xml放到conf目录下 namenodeFs="hdfs://mycluster:8020" Loading Loading @@ -364,3 +366,10 @@ fi # 6,启动 echo "6,启动" sh ${workDir}/script/start_all.sh # 7启动自启动脚本 if [ "true" = $monitorServerState ];then echo 'start monitor server' nohup python -u ${workDir}/script/monitor_server.py $installPath $zkQuorum $zkMasters $zkWorkers > ${workDir}/monitor_server.log 2>&1 & fi script/monitor_server.py +29 −13 Original line number Diff line number Diff line Loading @@ -10,9 +10,13 @@ pip install kazoo 安装 conda install -c conda-forge kazoo 安装 运行脚本: nohup python -u monitor_server.py > nohup.out 2>&1 & /data1_1T/escheduler的值来自install.sh中的installPath 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181的值来自install.sh中的zkQuorum /escheduler/masters的值来自install.sh中的zkMasters /escheduler/workers的值来自install.sh中的zkWorkers nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> nohup.out 2>&1 & ''' import sys import socket import os import sched Loading @@ -20,13 +24,16 @@ import time from datetime import datetime from kazoo.client import KazooClient schedule = sched.scheduler(time.time, time.sleep) class ZkClient: def __init__(self): # hosts配置zk地址集群 self.zk = KazooClient(hosts='ark0:2181,ark1:2181,ark2:2181') #self.zk = KazooClient(hosts='192.168.220.188:2181,192.168.220.189:2181,192.168.220.190:2181') print zookeepers #zookeepers1 = zookeepers self.zk = KazooClient(hosts=zookeepers) print "ready start" self.zk.start() # 读取配置文件,组装成字典 Loading @@ -45,35 +52,37 @@ class ZkClient: # 重启服务 def restart_server(self,inc): config_dict = self.read_file('/data1_1T/escheduler/conf/config/run_config.conf') config_dict = self.read_file(install_path + '/conf/config/run_config.conf') master_list = config_dict.get('masters').split(',') print master_list master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list)) worker_list = config_dict.get('workers').split(',') print worker_list worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list)) if (self.zk.exists('/escheduler/masters')): if (self.zk.exists(masters_zk_path)): zk_master_list = [] zk_master_nodes = self.zk.get_children('/escheduler/masters') zk_master_nodes = self.zk.get_children(masters_zk_path) for zk_master_node in zk_master_nodes: zk_master_list.append(zk_master_node.split('_')[0]) restart_master_list = list(set(master_list) - set(zk_master_list)) if (len(restart_master_list) != 0): for master in restart_master_list: print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了") os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start master-server') os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server') if (self.zk.exists('/escheduler/workers')): if (self.zk.exists(workers_zk_path)): zk_worker_list = [] zk_worker_nodes = self.zk.get_children('/escheduler/workers') zk_worker_nodes = self.zk.get_children(workers_zk_path) for zk_worker_node in zk_worker_nodes: zk_worker_list.append(zk_worker_node.split('_')[0]) restart_worker_list = list(set(worker_list) - set(zk_worker_list)) if (len(restart_worker_list) != 0): for worker in restart_worker_list: print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了") os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start worker-server') os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server') print(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) schedule.enter(inc, 0, self.restart_server, (inc,)) Loading @@ -84,5 +93,12 @@ class ZkClient: schedule.enter(0, 0, self.restart_server, (inc,)) schedule.run() if __name__ == '__main__': if (len(sys.argv) < 4): print('please input install_path,zookeepers,masters_zk_path and worker_zk_path') install_path = sys.argv[1] #zookeepers = "'" + sys.argv[2] + "'" zookeepers = sys.argv[2] masters_zk_path = sys.argv[3] workers_zk_path = sys.argv[4] zkClient = ZkClient() zkClient.main(300) Loading
install.sh +9 −0 Original line number Diff line number Diff line Loading @@ -98,6 +98,8 @@ xlsFilePath="/tmp/xls" # 不启动设置为false,如果为false,以下配置不需要修改 hdfsStartupSate="false" #是否启动自启动脚本 monitorServerState="true" # namenode地址,支持HA,需要将core-site.xml和hdfs-site.xml放到conf目录下 namenodeFs="hdfs://mycluster:8020" Loading Loading @@ -364,3 +366,10 @@ fi # 6,启动 echo "6,启动" sh ${workDir}/script/start_all.sh # 7启动自启动脚本 if [ "true" = $monitorServerState ];then echo 'start monitor server' nohup python -u ${workDir}/script/monitor_server.py $installPath $zkQuorum $zkMasters $zkWorkers > ${workDir}/monitor_server.log 2>&1 & fi
script/monitor_server.py +29 −13 Original line number Diff line number Diff line Loading @@ -10,9 +10,13 @@ pip install kazoo 安装 conda install -c conda-forge kazoo 安装 运行脚本: nohup python -u monitor_server.py > nohup.out 2>&1 & /data1_1T/escheduler的值来自install.sh中的installPath 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181的值来自install.sh中的zkQuorum /escheduler/masters的值来自install.sh中的zkMasters /escheduler/workers的值来自install.sh中的zkWorkers nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> nohup.out 2>&1 & ''' import sys import socket import os import sched Loading @@ -20,13 +24,16 @@ import time from datetime import datetime from kazoo.client import KazooClient schedule = sched.scheduler(time.time, time.sleep) class ZkClient: def __init__(self): # hosts配置zk地址集群 self.zk = KazooClient(hosts='ark0:2181,ark1:2181,ark2:2181') #self.zk = KazooClient(hosts='192.168.220.188:2181,192.168.220.189:2181,192.168.220.190:2181') print zookeepers #zookeepers1 = zookeepers self.zk = KazooClient(hosts=zookeepers) print "ready start" self.zk.start() # 读取配置文件,组装成字典 Loading @@ -45,35 +52,37 @@ class ZkClient: # 重启服务 def restart_server(self,inc): config_dict = self.read_file('/data1_1T/escheduler/conf/config/run_config.conf') config_dict = self.read_file(install_path + '/conf/config/run_config.conf') master_list = config_dict.get('masters').split(',') print master_list master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list)) worker_list = config_dict.get('workers').split(',') print worker_list worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list)) if (self.zk.exists('/escheduler/masters')): if (self.zk.exists(masters_zk_path)): zk_master_list = [] zk_master_nodes = self.zk.get_children('/escheduler/masters') zk_master_nodes = self.zk.get_children(masters_zk_path) for zk_master_node in zk_master_nodes: zk_master_list.append(zk_master_node.split('_')[0]) restart_master_list = list(set(master_list) - set(zk_master_list)) if (len(restart_master_list) != 0): for master in restart_master_list: print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了") os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start master-server') os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server') if (self.zk.exists('/escheduler/workers')): if (self.zk.exists(workers_zk_path)): zk_worker_list = [] zk_worker_nodes = self.zk.get_children('/escheduler/workers') zk_worker_nodes = self.zk.get_children(workers_zk_path) for zk_worker_node in zk_worker_nodes: zk_worker_list.append(zk_worker_node.split('_')[0]) restart_worker_list = list(set(worker_list) - set(zk_worker_list)) if (len(restart_worker_list) != 0): for worker in restart_worker_list: print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了") os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start worker-server') os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server') print(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) schedule.enter(inc, 0, self.restart_server, (inc,)) Loading @@ -84,5 +93,12 @@ class ZkClient: schedule.enter(0, 0, self.restart_server, (inc,)) schedule.run() if __name__ == '__main__': if (len(sys.argv) < 4): print('please input install_path,zookeepers,masters_zk_path and worker_zk_path') install_path = sys.argv[1] #zookeepers = "'" + sys.argv[2] + "'" zookeepers = sys.argv[2] masters_zk_path = sys.argv[3] workers_zk_path = sys.argv[4] zkClient = ZkClient() zkClient.main(300)