python 进程监控告警脚本

葫芦的运维日志

下一篇 搜索 上一篇

2019/01/26 22:47


业务上的一个需求:

监控进程是否存在,如果检测时不存在则发送告警短信,如果连续10次检测都不存在则发送告警电话。

最近一直在学前端vue.js,今天试着拿python练下,以免手生。

里面的一些参数可能需要根据自身需求作相应调整。

 

#!/usr/bin/env python
# coding=utf-8
# author: wz
# mail: 277215243@qq.com
# datetime:2019/1/26 22:34 PM
# web: https://www.bthlt.com

"""
SYNOPSIS
    ./procmonitor.py <sms_id> <uwork_id>
DESCRIPTION
    解析进程xml,监控进程标示符,不存在sms告警,不存在uwork电话告警
DEMO
    ./procmonitor.py 123456 654321
NOTE
   None
"""

import re
import os
import sys


class procmonitor(object):
    def __init__(self, parse_xml, path, sms_id, uwork_id, threshold):
        """
        parse_xml:要解析的进程文件
        path:存放proc计数的路径
        sms_id:短信告警id
        uwork_id:电话告警id
        threshold:阈值
        """
        self.parse_xml = parse_xml
        self.path = path
        self.sms_id = sms_id
        self.uwork_id = uwork_id
        self.threshold = threshold

    def parse_proc_xml(self):
        proc_list = list()
        tag = 1
        with open(self.parse_xml) as procfile:
            lines = procfile.readlines()
            for line in lines:
                if 'Address' in line:
                    if line and tag == 2:
                        proc_list += re.findall(r'>(.*)<', line)
                        tag = 1
                    else:
                        tag = 2
        return set(proc_list)

    def write_proc_count(self, proc_file, tag_number):
        if not os.path.exists(self.path + proc_file):
            tag_file = open(self.path + proc_file, 'w')
            tag_file.write('0')
            tag_file.close()
        else:
            tag_file = open(self.path + proc_file, 'w')
            tag_file.write(tag_number)
            tag_file.close()

    def read_proc_count(self, proc_file):
        count = os.popen('cat {0}{1}'.format(self.path, proc_file)).read()
        return int(count)

    def alarm_sms(self, proc):
        print os.popen('/gentRep {0} '
                       '"{1} proc not exist!"'.format(self.sms_id, proc)).read()

    def alarm_uwork(self, proc):
        print os.popen('/gentRep {0} '
                       '"{1} check 10 times proc not exist!"'.format(self.uwork_id, proc)).read()

    def run_alarm(self):
        alarm_string = 'no proc alarm!'
        try:
            proc_set = self.parse_proc_xml()
            for proc in proc_set:
                if os.popen('ps -ef | grep {0}|grep -v grep'.format(proc)).read() == '':
                    alarm_string = 'have proc alarm'
                    try:
                        count = self.read_proc_count(proc)
                    except Exception, ex:
                        self.write_proc_count(proc, '0')
                        count = self.read_proc_count(proc)
                    count += 1
                    self.write_proc_count(proc, count)
                    self.alarm_sms(proc)
                    if count > self.threshold:
                        self.alarm_uwork(proc)
                        self.write_proc_count(proc, '0')
                        continue
                    continue
                else:
                    proc_monitor.write_proc_count(proc, '0')
                    continue
            return alarm_string
        except Exception, ex:
            print ex


def usage():
    print sys.modules[__name__].__doc__


if __name__ == '__main__':

    if len(sys.argv) == 3:

        proc_monitor = procmonitor(parse_xml='/data/proc.xml',
                                   path='/data/monitor/proc/',
                                   sms_id=sys.argv[1],
                                   uwork_id=sys.argv[2],
                                   threshold=10)
        result = proc_monitor.run_alarm()
        if result is not None:
            print result
        sys.exit(0)
    else:
        print 'Error: illegal args.'
        usage()
        sys.exit(-1)

 

[test@test ~]$ vim a.py
[test@test ~]$ python a.py
Error: illegal args.

SYNOPSIS
    ./procmonitor.py <sms_id> <uwork_id>
DESCRIPTION
    解析进程xml,监控进程是否存在,不存在sms告警,不存在此uwork告警
DEMO
    ./procmonitor.py 123456 654321
NOTE
   None

[test@test ~]$ python a.py testnumber testnumber
no proc alarm!

 

葫芦的运维日志

打赏

上一篇 搜索 下一篇
© 冰糖葫芦甜(bthlt.com) 2019 王梓 打赏联系方式 陕ICP备17005322号