利用SSH或者Zabbix监控,配合Django开发框架,改造出属于自己的监控平台,实现包括主机图形,自动发现,计划任务,批量cmd执行,服务监控,日志监控等功能,由于公司机器混乱,基本上市面上的所有设备都能找到,监控这些不同型号不同系统的主机需要分别对待,我们就借助各种开源项目来做一个简单的巡检工具,方便工作需要。
由于公司及其都是老式的性能不高,所以全程我都没敢加多线程,生怕把服务器拒绝服务了。
首先客户端还是需要安装zabbix-agent这个监控工具,该工具可以监控所有的平台,windows linux mac unix 等,利用该工具完成一整套自动化平台一点问题都没有,连ansible都不需要了。
[root@localhost ~]# wget http://repo.zabbix.com/zabbix/4.4/rhel/7/x86_64/zabbix-agent-4.4.3-1.el7.x86_64.rpm[root@localhost ~]# rpm -ivh zabbix-agent-4.2.0-0.1Alpha1.el7.x86_64.rpm
该配置文件,并启动服务完事。
[root@localhost ~]# vim /etc/zabbix/zabbix_agentd.confPIDfile=/var/run/zabbix/zabbix_agentd.pIDLogfile=/var/log/zabbix/zabbix_agentd.logLogfileSize=0Server=192.168.1.2ServerActive=192.168.1.2Hostname=centos1Timeout=1Include=/etc/zabbix/zabbix_agentd.d/*.confEnableRemoteCommands=1[root@localhost ~]# systemctl restart zabbix-agent[root@localhost ~]# systemctl enable zabbix-agent
服务端下载https://www.zabbix.com/cn/download_agents
下载好之后将里面的 zabbix-get.exe 拖入项目中,直接调用就好。
基本的数据采集命令:
zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.hostnamelocalhost.localdomain // 此处才是主机名称zabbix_get.exe -s 192.168.1.20 -p 10050 -k agent.hostnamecentos1 // 此处是我们zabbix中自定义的键值对zabbix_get.exe -s 192.168.1.20 -p 10050 -k agent.Ping1zabbix_get.exe -s 192.168.1.20 -p 10050 -k agent.version4.4.3 // zabbix 的versionzabbix_get.exe -s 192.168.1.20 -p 10050 -k system.boottime1578564779 // 系统启动的时间戳zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.cpu.num1 // 处理器个数zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.localtime1578569019 // 当前系统时间戳zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.archx86_64 // 返回系统架构zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.oslinux version 3.10.0-1062.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC) ) #1 SMP Wed Aug 7 18:08:02 UTC 2019 // 返回系统详细架构zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.packages[ssh] // 已安装软件列表[rpm] libssh2-1.8.0-3.el7.x86_64,openssh-7.4p1-21.el7.x86_64,openssh-clIEnts-7.4p1-21.el7.x86_64,openssh-server-7.4p1-21.el7.x86_64zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.unamelinux localhost.localdomain 3.10.0-1062.el7.x86_64 #1 SMP Wed Aug 7 18:08:02 UTC 2019 x86_64zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.uptime4438 // 系统运行时长(秒)多少秒使用s/uptime来获取zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.users.num0 // 登陆用户数zabbix_get.exe -s 192.168.1.20 -p 10050 -k kernel.maxfiles96437 // 系统支持最大的open files整数zabbix_get.exe -s 192.168.1.20 -p 10050 -k kernel.maxproc65536 // 系统支持最大进程数zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.hw.macaddr[ens32] 00:50:56:22:6f:d3 // mac 地址列表>zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.oslinux version 3.10.0-1062.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC) ) #1 SMP Wed Aug 7 18:08:02 UTC 2019 // *** 作系统信息
允许远程执行命令:
[root@localhost ~]# vim /etc/zabbix/zabbix_agentd.confEnableRemoteCommands=1[root@localhost ~]# systemctl restart zabbix-agentD:\zabbix> zabbix_get.exe -s 192.168.1.20 -p 10050 -k "system.run[df -h]"filesystem Size Used Avail Use% Mounted ondevtmpfs 480M 0 480M 0% /devtmpfs 491M 0 491M 0% /dev/shmtmpfs 491M 7.4M 484M 2% /runtmpfs 491M 0 491M 0% /sys/fs/cgroup/dev/mapper/centos-root 27G 1.7G 26G 7% //dev/sda1 1014M 136M 879M 14% /boottmpfs 99M 0 99M 0% /run/user/0
监控cpu
system.cpu.util[<cpu>,<type>,<mode>]-cpu: cpu数量(默认是所有cpu)-Type: 可用值,IDle,nice,user,system,iowait,interrupt,sottrq,steal-Mode 可用值,avg1(一分钟负载),avg5,avg15[root@localhost ~]# zabbix_get -s 192.168.1.25 -k system.cpu.util[,avg1]0.016692zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.cpu.num1 // cpu 个数zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.cpu.util0.016461 // cpu 利用率百分比zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.hw.cpu // cpu 详细信息processor 0: GenuineIntel Intel(R) Celeron(R) cpu E3500 @ 2.70GHz working at 2700MHz
监控内存
proc.mem[<name>,<user>,<mode>,<cmdline>,<memtype>]-name 进程名(默认所有进程)-user 用户名(默认所有用户)-mode 可选值,avg,max,min,sum-cmdline 命令过滤[root@localhost ~]# zabbix_get -s 192.168.1.25 -k proc.mem[httpd,]1376378880[root@localhost ~]# zabbix_get -s 192.168.1.25 -k proc.mem[]6479925248zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.swap.in0 // Swap in (f内存到磁盘) .数字zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.swap.out0 //Swap out (f内存到磁盘) .数字zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.swap.size2147479552 // 交换分区大小字节或者百分比mode - total (默认),active,anon,buffers,cached,exec,file,free,inactive,pinned,shared,wired,used,pused,availablezabbix_get.exe -s 192.168.1.20 -p 10050 -k vm.memory.size[free]598949888 // 监控内存变化
监控网卡
net.if.in [if,<mode>] #网卡入口流量net.if.out [if,<mode>] #网卡出口流量net.if.total [if,<mode>] #网卡进/出流量总和-if 网卡名称-mode 可用值(如下) bytes 字节数 packets 包数量 errors 错误数量 droppend 丢包数量[root@localhost ~]# zabbix_get -s 192.168.1.25 -k net.if.in[ens32,bytes]165983[root@localhost ~]# zabbix_get -s 192.168.1.25 -k net.if.in[ens32,packets]2166[root@localhost ~]# zabbix_get -s 192.168.1.25 -k net.if.out[ens32,packets]2067[root@localhost ~]# zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.if.discovery[{"{#IFname}":"ens32"},{"{#IFname}":"lo"}] // 列出系统网卡信息zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.if.total[ens32]20785083 // 列出网卡总流量zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.tcp.Listen[10050]1 // 检测指定端口是否开启zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.tcp.port[,80]0 // 检测本机web服务是否开启zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.tcp.service[ssh,22]1 // 检测是定服务是否在运行
监控IO/读写
vfs.dev.read [<device>,<mode>] #磁盘读取vfs.dev.write [<device>,<mode>] #磁盘写入-device 磁盘设备(默认all)-type sectors,operations,bytes,sps,ops,hps-mode 默认有(avg1,avg15)[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[/dev/sda,avg1]0.000000[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[all]195.200000[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[all,avg1]195.200000[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[all,avg5]177.758242[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.write[all,avg5]465.284483[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.write[all,avg1]877.200000[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.write[/dev/sda,]374.800000
文件 *** 作:
zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.exists[/etc/passwd]1 // 检测指定文件是否存在zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.md5sum[/etc/passwd]52f5db4f5688b79b9c07ef5a42ea29af // md5验证zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.size[/etc/passwd]870 // 文件大小字节数zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.time[/etc/passwd]1578566854 // 文件日期时间戳zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.fs.discovery // 列出文件系统所有目录结构[{"{#FSname}":"/","{#FSTYPE}":"rootfs"},{"{#FSname}":"/sys","{#FSTYPE}":"sysfs"},{"{#FSname}":"/proc","{#FSTYPE}":"proc"},{"{#FSname}":"/dev","{#FSTYPE}":"devtmpfs"},{"{#FSname}":"/sys/kernel/security","{#FSTYPE}":"securityfs"},{"{#FSname}":"/dev/shm"}]mode - total (默认),pfree (空闲百分比),pused (使用百分比)zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.fs.inode[/dev,free]122355 // 检测/dev/空闲空间zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.fs.size[/dev,free]502525952 // 磁盘空间,返回本地文件系统的使用量字节
磁盘利用率
监控磁盘使用情况需要vfs.fs.size[fs,<mode>]键值fs:文件系统mode:模式total默认全部,free空闲,used使用,pfree空闲百分比,pused使用百分比名称:c盘总量键值:vfs.fs.size[c:,total]名称:c盘剩余量 键值vfs.fs.size[c:,free]名称:c盘使用量 键值vfs.fs.size[c:,used]名称:c盘剩余百分比 键值:vfs.fs.size[c:,pfree]名称:c盘使用百分比 键值:vfs.fs.size[c:,pused]
先来完成一个Ping *** 作
import os,sqlite3,datetime,time,signal,sysimport subprocessdef Ping(address,port,timeout): command = "get.exe -s {} -p {} -k agent.Ping".format(address,port).split(" ") start = datetime.datetime.Now() process = subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return 1for i in range(10,22): a = Ping("192.168.1.{}".format(i),"10050",1) print(a)
封装connect方法 connect.py
import os,subprocess,sys,mathimport timeimport datetime# 检查主机状态def GetPing(addr,timeout): try: command = "engine.exe -s {} -p {} -k agent.Ping".format(addr,port).split(" ") start = datetime.datetime.Now() process = subprocess.Popen(command,shell=True) while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return 1 except Exception: return 0# 获取目标主机名 仅用于windowsdef GetHostname(addr,timeout): try: Command = "engine.exe -s {} -p {} -k system.hostname".format(addr,port) start = datetime.datetime.Now() process = subprocess.Popen(Command,shell=True,stderr=subprocess.PIPE) Hostname = str(process.stdout.readlines()[0].split()[0],"utf-8") while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return Hostname except Exception: return 0# 得到系统型号,例如windows linux等def GetSysUname(addr,timeout): try: Command = "engine.exe -s {} -p {} -k system.uname".format(addr,stderr=subprocess.PIPE) Uname = str(process.stdout.readlines()[0].split()[0],"utf-8") while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return Uname except Exception: return 0# 列出系统平台 x86 or x64def GetSysArch(addr,timeout): try: command = "engine.exe -s {} -p {} -k system.sw.arch".format(addr,shell=True) while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 arch = process.stdout.readlines()[0].split()[0] return str(arch,"utf-8") except Exception: return 0# 获取cpu核心数def GetcpuCoreNumber(addr,timeout): try: Command = "engine.exe -s {} -p {} -k system.cpu.num[online]".format(addr,stderr=subprocess.PIPE) cpuCoreNum = process.stdout.readlines()[0].split()[0] #print() return str(cpuCoreNum,"utf-8") except Exception: return 0# 获取cpu的利用率 10%def GetcpuCoreInfo(addr,timeout): try: Command = "engine.exe -s {} -p {} -k system.cpu.util".format(addr,stderr=subprocess.PIPE) cpu = math.ceil(float(process.stdout.readlines()[0].split()[0])) while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return cpu except Exception: return 0# 获取cpu 的负载值 avg1 avg5 avg15def GetcpuLoadAvg(addr,timeout): try: Command = "engine.exe -s {} -p {} -k system.cpu.load[,{}]".format(addr,avg) start = datetime.datetime.Now() process = subprocess.Popen(Command,stderr=subprocess.PIPE) cpu = float(process.stdout.readlines()[0].split()[0]) while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return cpu except Exception: return 0# 获取内存数据 返回总内存/剩余内存def GetMemInfo(addr,timeout): try: Total = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[total]".format(addr,port),stderr=subprocess.PIPE) Free = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[free]".format(addr,stderr=subprocess.PIPE) Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024) Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024) return Total,Free except Exception: return 0# 获取全部磁盘容量def GetdiskInfo(self): try: Free = subprocess.Popen("engine.exe -s {} -p {} -k vfs.fs.size[/,pfree]".format(self.addr,self.port),stderr=subprocess.PIPE) Free = float(Free.stdout.readlines()[0].split()[0]) return Free except Exception: return 0# 获取特定进程是否运行def GetProcessstatus(addr,procname): command = "engine.exe -s {} -p {} -k proc.num[\"{}\"]".format(addr,procname) process = subprocess.Popen(command,shell=True) ref = process.stdout.readlines()[0] if ref == b"1\r\n": return 1 return 0# 获取端口开放状态def GetNetworkPort(addr,check_port): command = "engine.exe -s {} -p {} -k net.tcp.Listen[{}]".format(addr,check_port) process = subprocess.Popen(command,shell=True) ref = process.stdout.readlines()[0] if ref == b"1\r\n": return 1 return 0
调用命令,获取系统基本信息。
import connectdef SysInfo(): lis = [["127.0.0.1","MCP服务器"],["192.168.1.1","CTI"]] print("IP地址 \t\t 主机作用 \t\t 主机类型 \t\t 主机架构 \t\t 核心数 \t\t cpu利用率 \t\t cpu Avg1 \t Avg5 \t Avg15 \t 内存利用率 \t Ping") host_addr = lis[0][0] host_type = lis[0][1] host_address = host_addr host_user_type = host_type host_Ping = connect.GetPing(host_address,2) host_type = connect.GetSysUname(host_address,2) host_arch = connect.GetSysArch(host_address,2) host_cpu_number = connect.GetcpuCoreNumber(host_address,2) host_cpu_core = connect.GetcpuCoreInfo(host_address,2) host_cpu_load1 = connect.GetcpuLoadAvg(host_address,"avg1",2) host_cpu_load5 = connect.GetcpuLoadAvg(host_address,"avg5",2) host_cpu_load15 = connect.GetcpuLoadAvg(host_address,"avg15",2) host_memory = connect.GetMemInfo(host_address,2) print("{} \t {} \t\t {} \t\t {} \t\t\t {} \t\t\t {}% \t\t {} \t\t {} \t {} \t {} \t {}". format(host_address,host_user_type,host_type,host_arch,host_cpu_number,host_cpu_core,host_cpu_load1,host_cpu_load5,host_cpu_load15,host_memory,host_Ping))if __name__ == '__main__': SysInfo()
批量Ping检测
import subprocess,timeimport threading,os,syslock = threading.RLock()def GetPing(command,timeout): cmd = command.split(" ") start = datetime.datetime.Now() process = subprocess.Popen(cmd,stderr=subprocess.PIPE) while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds> timeout: return "异常" try: ref = process.stdout.readlines()[0] if ref == b"1\r\n": return "正常" else: return "异常" except Exception: return "异常"def MyThread(ptr): each = eval(ptr) command = "engine.exe -s {} -p 10050 -k agent.Ping".format(each[0]) ref = GetPing(command,1) lock.acquire() if(ref == "异常"): print("{0:15}\t\t {1:15}\t {2:10}\t\t {3:10}\t {4:4} <--".format(each[0],each[1],each[2],each[3],ref)) else: print("{0:15}\t\t {1:15}\t {2:10}\t\t {3:10}\t {4:4}".format(each[0],ref)) lock.release()if __name__ == "__main__":# Base.db 内容: ["127.0.0.1","ANA2048567","1M-2F","MCP服务器"] fp = open("base.db","r",enCoding="utf-8") count=len(open("base.db",enCoding="utf-8").readlines()) print("-" * 100) print("{0:13}\t\t {1:15}\t {2:10}\t\t {3:8}\t {4:4}".format("IP地址","SN号码","机房位置","作用","Ping")) print("-" * 100) for i in range(count): ptr = fp.readline() thread = threading.Thread(target=MyThread,args=(ptr,)) thread.start()
批量进程检测
import subprocess,sys# 获取特定进程是否运行def GetProcessstatus(addr,timeout,procname) start = datetime.datetime.Now() process = subprocess.Popen(command,stderr=subprocess.PIPE) while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds> timeout: return 0 try: ref = process.stdout.readlines()[0] if ref != b"0\r\n": return int(ref) else: return 0 except Exception: return 0if __name__ == "__main__": process_cache = [] proc_fp = open("process.db",enCoding="utf-8") count = len(open("process.db",enCoding="utf-8").readlines()) for each in range(1,count+1): proc = eval(proc_fp.readline()) proc_len = len(proc) print("-" * 70) print("---> 巡检地址: {}".format(proc[0])) print("-" * 70) for process in range(1,proc_len): ref = GetProcessstatus(proc[0],10050,3,proc[process]) if(ref != 0): print("进程: {0:18} \t 进程数: {1:5} \t 状态: {2}".format(proc[process],ref,"√")) else: print("进程: {0:18} \t 进程数: {1:5} \t 状态: {2}".format(proc[process],"×")) print()
批量cpu负载检测等
import subprocess,mathimport threading,syslock = threading.RLock()# 获取cpu的利用率 %def GetcpuCoreInfo(addr,stderr=subprocess.PIPE) cpu = math.ceil(float(process.stdout.readlines()[0].split()[0])) while process.poll() is None: time.sleep(0.3) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return str("0%") return str(cpu) + "%" except Exception: return str("0%")# 获取cpu 的负载值 avg1 avg5 avg15def GetcpuLoadAvg(addr,stderr=subprocess.PIPE) cpu = float(process.stdout.readlines()[0].split()[0]) while process.poll() is None: time.sleep(0.3) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 return cpu except Exception: return 0# 获取内存数据 返回 %def GetMemInfo(addr,stderr=subprocess.PIPE) start = datetime.datetime.Now() while Total.poll() is None: time.sleep(0.3) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return 0 Free = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[free]".format(addr,stderr=subprocess.PIPE) start = datetime.datetime.Now() while Free.poll() is None: time.sleep(0.3) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return str(0)+"%" Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024) Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024) percentage = 100 - int(Free/int(Total/100)) return str(percentage)+"%" except Exception: return str(0)+"%"def MyThread(ptr): address = eval(ptr) cpu_info = GetcpuCoreInfo(address[0],1) mem_info = GetMemInfo(address[0],1) cpu_load1 = GetcpuLoadAvg(address[0],1) cpu_load5 = GetcpuLoadAvg(address[0],1) cpu_load15 = GetcpuLoadAvg(address[0],1) lock.acquire() print("{0:10} \t {1:10} \t {2:10} \t {3:10} \t {4:10} \t {5:10} \t". format(address[0],cpu_info,mem_info,cpu_load1,cpu_load5,cpu_load15)) lock.release()if __name__ == "__main__": fp = open("base.db",enCoding="utf-8") count = len(open("base.db",enCoding="utf-8").readlines()) print("-" * 100) print("IP地址 \t\t cpu利用率 \t 内存利用率 \t 1分钟负载 \t 5分钟负载 \t 15分钟负载 \t") print("-" * 100) for i in range(count): ptr = fp.readline() thread = threading.Thread(target=MyThread,)) thread.start()
封装一个Zabbix调用类:
import subprocess,mathclass Engine(): def __init__(self,address,port): self.address = address self.port = port def GetValue(self,key): try: command = "get.exe -s {0} -p {1} -k {2}".format(self.address,self.port,key).split(" ") start = datetime.datetime.Now() process = subprocess.Popen(command,shell=True) while process.poll() is None: time.sleep(1) Now = datetime.datetime.Now() if (Now - start).seconds > 2: return 0 return str(process.stdout.readlines()[0].split()[0],"utf-8") except Exception: return 0 # 获取主机组基本信息 def GetSystem(self): ref_dict = { "Address" : 0,"Hostname" : 0,"Uname":0,"Ping":0 } ref_dict["Address"] = self.address ref_dict["Hostname"] = self.GetValue("system.hostname") ref_dict["Uname"] = self.GetValue("system.uname") ref_dict["Ping"] = self.GetValue("agent.Ping") return ref_dict # 获取cpu利用率 def Getcpu(self): ref_dict = { "Address": 0,"Core": 0,"Active":0,"Avg1": 0,"Avg5":0,"Avg15":0 } ref_dict["Address"] = self.address ref_dict["Core"] = self.GetValue("system.cpu.num") ref_dict["Active"] = math.ceil(float(self.GetValue("system.cpu.util"))) ref_dict["Avg1"] = self.GetValue("system.cpu.load[,avg1]") ref_dict["Avg5"] = self.GetValue("system.cpu.load[,avg5]") ref_dict["Avg15"] = self.GetValue("system.cpu.load[,avg15]") return ref_dict # 获取内存利用率 def GetMemory(self): ref_dict = { "Address":0,"Total":0,"Free":0,"Percentage":0 } ref_dict["Address"] = self.address ref_dict["Total"] = self.GetValue("vm.memory.size[total]") ref_dict["Free"] = self.GetValue("vm.memory.size[free]") # 计算百分比: percentage = 100 - int(Free/int(Total/100)) ref_dict["Percentage"] = str( 100 - int( int(ref_dict.get("Free")) / (int(ref_dict.get("Total"))/100)) ) + "%" return ref_dict # 获取磁盘数据 def Getdisk(self): ref_List = [] disk_ = eval( self.GetValue("vfs.fs.discovery") ) for x in range(len(disk_)): dict_ = { "Address":0,"name":0,"Type":0,"Free":0} dict_["Address"] = self.address dict_["name"] = disk_[x].get("{#FSname}") dict_["Type"] = disk_[x].get("{#FSTYPE}") if dict_["Type"] != "UNKNowN": pfree = self.GetValue("vfs.fs.size[\"{0}\",pfree]".format(dict_["name"])) dict_["Free"] = str(math.ceil(float(pfree))) else: dict_["Free"] = 0 ref_List.append(dict_) return ref_Listif __name__ == "__main__": ptr_windows = Engine("132.35.93.2","10050") ret = ptr_windows.Getdisk() print(ret)
简单的监控脚本编写 (无脑写法)
首先是连接脚本 win32_connect.py
import subprocess,math# 获取Ping返回状态def GetPing(command,stderr=subprocess.PIPE) while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return "异常" try: ref = process.stdout.readlines()[0] if ref == b"1\r\n": return "正常" else: return "异常" except Exception: return "异常"# 获取特定进程是否运行def GetProcessstatus(addr,stderr=subprocess.PIPE) while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds> timeout: return 0 try: ref = process.stdout.readlines()[0] if ref != b"0\r\n": return int(ref) else: return 0 except Exception: return 0# 获取cpu的利用率 %def GetcpuCoreInfo(addr,stderr=subprocess.PIPE) start = datetime.datetime.Now() while Free.poll() is None: time.sleep(0.3) Now = datetime.datetime.Now() if (Now - start).seconds > timeout: return str(0)+"%" Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024) Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024) percentage = 100 - int(Free/int(Total/100)) return str(percentage)+"%" except Exception: return str(0)+"%"# 获取磁盘容量、pfree_disk 已用 、 ptotal_disk 可用def Getdisk(addr,diskname): command = "engine.exe -s {} -p {} -k vfs.fs.size[\"{}\",pfree]".format(addr,diskname) start = datetime.datetime.Now() process = subprocess.Popen(command,stderr=subprocess.PIPE) try: while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds> timeout: return 0 try: pfree_disk = math.ceil(float(process.stdout.readlines()[0])) ptotal_disk = math.ceil(100 - pfree_disk) return pfree_disk,ptotal_disk except Exception: return 0,0 except Exception: return 0,0# 检测端口开启状态def GetListenPort(addr,check_port): Command = "engine.exe -s {} -p {} -k net.tcp.Listen[{}]".format(addr,check_port) start = datetime.datetime.Now() try: process = subprocess.Popen(Command,stderr=subprocess.PIPE) while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds> timeout: return 0 status = int(process.stdout.readlines()[0]) return status except Exception: return 0 return 0# 检测Web服务器状态 使用LocalAddr的地址对check_addr的check_port端口进行检测def CheckWebServerStatus(Local_Addr,check_addr,check_port): test_Ping = "engine.exe -s {} -p 10050 -k agent.Ping".format(Local_Addr) ref = GetPing(test_Ping,1) if ref != "异常": Command = "engine.exe -s {} -p {} -k net.tcp.port[\"{}\",{}]".format(Local_Addr,check_port) start = datetime.datetime.Now() try: process = subprocess.Popen(Command,stderr=subprocess.PIPE) while process.poll() is None: time.sleep(0.2) Now = datetime.datetime.Now() if (Now - start).seconds> timeout: return 0 status = int(process.stdout.readlines()[0]) return status except Exception: return 0 return 0 return 0
接着是win32_core.py
import win32_connectdef MyPing(): fp = open("win32_base.db",enCoding="utf-8") count = len(open("win32_base.db",enCoding="utf-8").readlines()) print("-" * 100) print("{0:20} \t {1:10} \t {2:13} \t {3:5} \t {4:9} \t {5:40}".format("IP地址","机器系统","设备SN","存活状态","主机作用")) print("-" * 100) for each in range(count): item = fp.readline().replace("\n","") eval_List = eval(item) command = "engine.exe -s {} -p 10050 -k agent.Ping".format(eval_List[0]) ref = win32_connect.GetPing(command,1) print("{0:20} \t {1:15} \t {2:13} \t {3:10} \t {4:5} \t {5:40}". format(eval_List[0],eval_List[1],eval_List[2],eval_List[3],eval_List[4])) fp.close()# 统计系统启动进程def MyProcessCheck(): process_cache = [] proc_fp = open("win32_process.db",enCoding="utf-8") count = len(open("win32_process.db",count + 1): proc = eval(proc_fp.readline()) proc_len = len(proc) print("-" * 70) print("---> 巡检地址: {}".format(proc[0])) print("-" * 70) for process in range(1,proc_len): ref = win32_connect.GetProcessstatus(proc[0],proc[process]) if (ref != 0): print("进程: {0:18} \t 进程数: {1:5} \t 状态: {2}".format(proc[process],"×")) print() proc_fp.close()# 统计系统负载情况def GetLoadAvg(): fp = open("win32_base.db",enCoding="utf-8").readlines()) print("-" * 120) print("IP地址 \t\t\t 系统类型 \t\t cpu利用率 \t 内存利用率 \t 1分钟负载 \t 5分钟负载 \t 15分钟负载 \t 主机位置 \t\t 主机作用") print("-" * 120) for item in range(count): ptr = eval(fp.readline()) command = "engine.exe -s {} -p 10050 -k agent.Ping".format(ptr[0]) flag = win32_connect.GetPing(command,1) if flag == "正常": cpu_info = win32_connect.GetcpuCoreInfo(ptr[0],1) mem_info = win32_connect.GetMemInfo(ptr[0],1) cpu_load1 = win32_connect.GetcpuLoadAvg(ptr[0],1) cpu_load5 = win32_connect.GetcpuLoadAvg(ptr[0],1) cpu_load15 = win32_connect.GetcpuLoadAvg(ptr[0],1) print("{0:10} \t {1:15} \t {2:4} \t {3:4} \t {4:7} \t {5:7} \t {6:7} \t\t {7:10} \t {8:30}". format(ptr[0],ptr[1],cpu_load15,ptr[3],ptr[4])) else: print("{0:10} \t {1:15} \t {2:4} \t {3:4} \t {4:7} \t {5:7} \t {6:7} \t\t {7:10} \t {7:30}". format(ptr[0],"-1",ptr[4])) fp.close()# 统计磁盘def GetFdisk(): fp = open("win32_disk.db",enCoding="utf-8") count = len(open("win32_disk.db",enCoding="utf-8").readlines()) for each in range(1,count+1): item = fp.readline().replace("\n","") eval_List = eval(item) print() print("-" * 80) print("检测主机: {}".format(eval_List[0])) print("-" * 80) for x in range(1,(len(eval_List)-1)+1): command = "engine.exe -s {} -p 10050 -k agent.Ping".format(eval_List[0]) ref = win32_connect.GetPing(command,1) if ref == "正常": pfree,ptotal = win32_connect.Getdisk(eval_List[0],1,eval_List[x]) print("---> 磁盘分区: {0:10} \t 剩余空间: {1:5} \t 已用空间: {2:5}".format(eval_List[x],str(pfree)+"%",str(ptotal)+"%")) fp.close()# 统计Web服务器是否运行中def CheckWebServer(): fp = open("WebServer.db",enCoding="utf-8") count = len(open("WebServer.db",enCoding="utf-8").readlines()) for each in range(count): fp_List = eval(fp.readline().replace("\n","")) ref = win32_connect.CheckWebServerStatus("132.35.93.2",fp_List[0],fp_List[1]) if ref == 1: print("[成功] -> Web主机: {0:20} \t 检测端口: {1:5} \t 业务名称: {2:20}".format(fp_List[0],fp_List[1],fp_List[2])) else: print("*失败* -> Web主机: {0:20} \t 检测端口: {1:5} \t 业务名称: {2:20}".format(fp_List[0],fp_List[2])) fp.close()
最后的win32_engine.py
import os,stderr=subprocess.PIPE) Free = float(Free.stdout.readlines()[0].split()[0]) return Free except Exception: return 0# 获取特定进程是否运行def GetProcessstatus(addr,shell=True) ref = process.stdout.readlines()[0] if ref == b"1\r\n": return 1 return 0
监控unix系列,Unix_core.py
import paramikossh = paramiko.SSHClIEnt()ssh.set_missing_host_key_policy(paramiko.autoAddPolicy())# 执行命令CMDdef BatchCMD(address,username,password,command): try: ssh.connect(hostname=address,username=username,password=password,port=port,timeout=2) stdin,stdout,stderr = ssh.exec_command(command) result = stdout.read() if len(result) != 0: return result else: return -1 except Exception: return -1# 通过获取主机Ping状态def GetPing(): fp = open("unix_base.db",enCoding="utf-8") count = len(open("unix_base.db","主机作用")) print("-" * 100) for each in range(count): ref = eval(fp.readline()) ret = BatchCMD(ref[0],ref[5],ref[6],22,"pwd | echo $?") if(int(ret)==0): print("{0:20} \t {1:10} \t {2:11} \t {3:5} \t {4:9} \t {5:40}". format(ref[0],ref[1],ref[2],ref[3],"正常",ref[4])) else: print("{0:20} \t {1:10} \t {2:13} \t {3:5} \t {4:9} \t {5:40}". format(ref[0],"异常",ref[4])) fp.close()# ps aux | grep "usbCfgDev" | grep -v "grep" | awk {'print '}def GetProcessstatus(): fp = open("unix_process.db",enCoding="utf-8") count = len(open("unix_process.db",enCoding="utf-8").readlines()) for each in range(count): proc = eval(fp.readline()) proc_len = len(proc) print("-" * 70) print("---> 巡检地址: {0:10} \t 登录用户: {1:7} \t 登录密码: {2:10}".format(proc[0],proc[1],proc[2])) print("-" * 70) for process in range(3,proc_len): command = "ps aux | grep \'{}\' | grep -v \'grep\' | awk '{}' | head -1".format(proc[process],"{print }") try: ref = BatchCMD(proc[0],proc[2],command) if(int(ref)!=-1): print("进程: {0:18} \t PID: {1:10} \t 状态: {2}".format(proc[process],int(ref),"√")) else: print("进程: {0:18} \t PID:{1:10} \t 状态: {2}".format(proc[process],"×")) except Exception: print("进程: {0:18} \t PID:{1:10} \t 状态: {2}".format(proc[process],"×")) print() fp.close()def GetdiskStatus(): fp = open("unix_disk.db",enCoding="utf-8") count = len(open("unix_disk.db",enCoding="utf-8").readlines()) for each in range(count): proc = eval(fp.readline()) proc_len = len(proc) print("-" * 100) print("---> 巡检地址: {0:10} \t 登录系统: {1:7} \t 登录账号: {2:10} 登录密码: {3:10}". format(proc[0],proc[3])) print("-" * 100) try: ref = BatchCMD(proc[0],proc[3],"df | grep -v 'filesystem'") st = str(ref).replace("\n","\n") print(st.replace("b'","").replace("'","")) except Exception: pass print() fp.close()# 运行命令def runcmd(command,system): fp = open("unix_disk.db",enCoding="utf-8").readlines()) for each in range(count): proc = eval(fp.readline()) proc_len = len(proc) if proc[1] == system: print("-" * 100) print("---> 巡检地址: {0:10} \t 登录系统: {1:7} \t 登录账号: {2:10} 登录密码: {3:10}". format(proc[0],proc[3])) print("-" * 100) try: ref = BatchCMD(proc[0],command) st = str(ref).replace("\n","\n") print(st.replace("b'","")) except Exception: pass fp.close()
配置文件
unix_base.db["127.0.0.1","Suse","CN11111","C-F-04","国漫 CTI3.6","root","1111111"]unix_disk.db["127.0.0.1","123123"]unix_process.db["127.0.0.1","123123","oracle","MysqLd"]webserver.db["127.0.0.1",8005,"跨省投诉-在线客服跨省协办"]win32_base.db["127.0.0.1","windows XP","0000000","0-0-0","本地跳板机"]win32_disk.db["127.0.0.1","c:","d:"]win32_port.db["127.0.0.1","80","3421","8080"]win32_process.db["127.0.0.1","ilmt_tray.exe","awhost32.exe"]
比较好的改进写法
上方代码中每个表都有一份账号密码,很麻烦,第二次重写我通过一个UUID序号,来定位用户名密码等登录信息,然后在一个配置文件中包含了所有配置项,程序中对不同的项目进行解析,来实现一个配置文件配置所有的配置项,只有基础base结构存储账号密码,每个base结构对应一个UUID号,当添加进程等表时,自动根据UUID号码,来解析账号密码,这样通过序号关联,只需写一份密码即可,通过UUID相关联。
使用Django开发图形化界面
收集目标主机数据的类:
import os,mathclass GetSysInfo(object): def __init__(self,addr,port): self.addr = addr self.port = port def GetHostname(self): try: Command = "get.exe -s {} -p {} -k system.hostname".format(self.addr,self.port) proc = subprocess.Popen(Command,stderr=subprocess.PIPE) Hostname = str(proc.stdout.readlines()[0].split()[0]) return Hostname except Exception: return 0 def GetcpuInfo(self): try: Command = "get.exe -s {} -p {} -k system.cpu.util".format(self.addr,stderr=subprocess.PIPE) cpu = math.ceil(float(proc.stdout.readlines()[0].split()[0])) return cpu except Exception: return 0 def GetMemInfo(self): try: Total = subprocess.Popen("get.exe -s {} -p {} -k vm.memory.size[total]".format(self.addr,stderr=subprocess.PIPE) Free = subprocess.Popen("get.exe -s {} -p {} -k vm.memory.size[free]".format(self.addr,stderr=subprocess.PIPE) Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024) Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024) return Total,Free except Exception: return 0 def GetdiskInfo(self): try: Free = subprocess.Popen("get.exe -s {} -p {} -k vfs.fs.size[/,stderr=subprocess.PIPE) Free = float(Free.stdout.readlines()[0].split()[0]) return Free except Exception: return 0 def GetNetInfo(self): try: InModule = subprocess.Popen("get.exe -s {} -p {} -k net.if.in[ens32,bytes]".format(self.addr,stderr=subprocess.PIPE) InModule = int(InModule.stdout.readlines()[0].split()[0]) OutModule = subprocess.Popen("get.exe -s {} -p {} -k net.if.out[ens32,stderr=subprocess.PIPE) OutModule = int(OutModule.stdout.readlines()[0].split()[0]) return InModule,OutModule except Exception: return 0a = GetSysInfo("192.168.1.20","10050")b = a.GetNetInfo()print(b)
admin.py
from django.contrib import adminfrom MyWeb.models import *@admin.register(HostInfo)class Myadmin(admin.Modeladmin): List_display = ("HostAddr","Hostname","Hostcpu","HostMem","Hostdisk","HostNet")
models.py
from django.db import modelsclass HostInfo(models.Model): ID = models.autoFIEld(primary_key = True) HostAddr = models.CharFIEld(max_length=64,verbose_name="主机地址") Hostname = models.CharFIEld(max_length=64,verbose_name="主机名称") Hostcpu = models.CharFIEld(max_length=64,verbose_name="cpu利用率") HostMem = models.CharFIEld(max_length=64,verbose_name="内存数据") Hostdisk = models.CharFIEld(max_length=64,verbose_name="磁盘空闲") HostNet = models.CharFIEld(max_length=64,verbose_name="网卡流量")
先来判断是否有指定的IP地址,有的话后端直接爬取数据并填充到数据库中,要求是只需要输入IP地址即可取出所有的数据,并自动填充,有几台自动填充几台。
if __name__ == "__main__": conn = sqlite3.connect("C:/Users/LyShark/PycharmProjects/MyProject/db.sqlite3") cursor = conn.cursor() cursor.execute('select * from MyWeb_hostinfo;') data = cursor.fetchall() for item in range(0,len(data)): addr = data[item][1] info = GetSysInfo(addr,"10050") print(info.GetNetInfo())
fabric的使用技巧 fabric工具也是自动化运维利器,其默认依赖于paramiko的二次封装.
# 简单实现命令执行from fabric import Connectionconn = Connection(host="192.168.1.10",user="root",port="22",connect_kwargs={"password":"123"})try: with conn.cd("/var/www/HTML/"): ret = conn.run("ls -lh",hIDe=True) print("主机:" + conn.host + "端口:" + conn.port + "完成")except Exception: print("主机:" + conn.host + "端口:" + conn.port + "失败")# 读取数据到本地from fabric import Connectionconn = Connection(host="192.168.1.20",connect_kwargs={"password":"123"})uname = conn.run('uname -s',hIDe=True)if 'linux' in uname.stdout: command = "df -h / | tail -n1 | awk '{print }'" print(conn.run(command,hIDe=True).stdout.strip())# 文件上传与下载from fabric import Connectionconn = Connection(host="192.168.1.20",connect_kwargs={"password":"123"})conn.put("D://zabbix_get.exe","/tmp/zabbix.exe") # 文件上传conn.get("/tmp/zabbix.exe","./zab.exe") # 下载文件
总结 以上是内存溢出为你收集整理的Python 开发简易巡检工具全部内容,希望文章能够帮你解决Python 开发简易巡检工具所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)