用AWK及Python求和及平均值

文件如下

# cat cesc 
a,1
a,2
b,3
b,4
c,2
d,5

需要获取abcd出现的次数,逗号后面数字的和及平均值。

With shell:

# grep -E ^a cesc |awk -F ',' '{sum+=$2} END {print "a, Count:" NR " Sum: " sum " Average: " sum/NR}'
a, Count:2 Sum: 3 Average: 1.5
# grep -E ^b cesc |awk -F ',' '{sum+=$2} END {print "b, Count:" NR " Sum: " sum " Average: " sum/NR}'
b, Count:2 Sum: 7 Average: 3.5
# grep -E ^c cesc |awk -F ',' '{sum+=$2} END {print "c, Count:" NR " Sum: " sum " Average: " sum/NR}'
c, Count:1 Sum: 2 Average: 2
# grep -E ^d cesc |awk -F ',' '{sum+=$2} END {print "d, Count:" NR " Sum: " sum " Average: " sum/NR}'
d, Count:1 Sum: 5 Average: 5

或者写成一个for循环,这样可移植性更好,另外,在awk中引用shell的变量有两种办法,一个是用双引号和单引号包含变量,如:”‘var'”,还有就是使用awk的-v参数提前声明,如:awk -v var=”$var”

# for i in `cat cesc |cut -d, -f1|sort|uniq`;do grep -E ^$i cesc |awk -F ',' '{sum+=$2} END {print "'$i'" " Count: " NR ", Sum: " sum ", Average: " sum/NR}';done
a Count: 2, Sum: 3, Average: 1.5
b Count: 2, Sum: 7, Average: 3.5
c Count: 1, Sum: 2, Average: 2
d Count: 1, Sum: 5, Average: 5

或者:

# for i in `cat cesc |cut -d, -f1|sort|uniq`;do grep -E ^$i cesc |awk -v i="$i" -F ',' '{sum+=$2} END {print i " Count: " NR ", Sum: " sum ", Average: " sum/NR}';done
a Count: 2, Sum: 3, Average: 1.5
b Count: 2, Sum: 7, Average: 3.5
c Count: 1, Sum: 2, Average: 2
d Count: 1, Sum: 5, Average: 5

 

 

With python:(python的整形除法默认地板除,只返回一个整形,可以使用from __future__ import division来实现真正的除法)

from __future__ import division

alist = []
blist = []
clist = []
dlist = []
for i in open('cesc'):
    ss = i.split(',')
    if ss[0] == 'a':
        alist.append(int(ss[1]))
    elif ss[0] == 'b':
        blist.append(int(ss[1]))
    elif ss[0] == 'c':
        clist.append(int(ss[1]))
    elif ss[0] == 'd':
        dlist.append(int(ss[1]))

print 'a, Count: ' + str(len(alist)) + ', Sum: ' + str(sum(alist)) + '. Average: ' + str(sum(alist)//len(alist))
print 'b, Count: ' + str(len(blist)) + ', Sum: ' + str(sum(blist)) + '. Average: ' + str(sum(blist)//len(blist))
print 'c, Count: ' + str(len(clist)) + ', Sum: ' + str(sum(clist)) + '. Average: ' + str(sum(clist)//len(clist))
print 'd, Count: ' + str(len(dlist)) + ', Sum: ' + str(sum(dlist)) + '. Average: ' + str(sum(dlist)//len(dlist))

 

获取两个文件夹下不同的文件和相同的文件并复制到其他目录

是这样:把A文件夹和B文件夹下都有的文件放到D目录,A有B没有,B有A没有的文件放到C目录

With shell:

#!/bin/bash

for a in `ls A`
    do 
    echo $a
    if [ -f /Users/cescwu/test/B/$a ];then
        cp /Users/cescwu/test/A/$a /Users/cescwu/test/D/
    else
        cp /Users/cescwu/test/A/$a /Users/cescwu/test/C/
    fi
    done

for a in `ls B`
    do
    echo $a
    if [ -f /Users/cescwu/test/A/$a ];then
        cp /Users/cescwu/test/B/$a /Users/cescwu/test/D/
    else
        cp /Users/cescwu/test/B/$a /Users/cescwu/test/C/
    fi
    done

或者

#!/bin/bash

for a in `ls A`
    do 
    echo $a
    if [ -f /Users/cescwu/test/B/$a ];then
        cp /Users/cescwu/test/A/$a /Users/cescwu/test/D/
    fi
    done

for a in `diff A B|grep 'Only in A'|awk -F ':' '{print $2}'`
    do
    cp /Users/cescwu/test/A/$a /Users/cescwu/test/C/
    done

for a in `diff A B|grep 'Only in B'|awk -F ':' '{print $2}'`
    do
    cp /Users/cescwu/test/B/$a /Users/cescwu/test/C/
    done

With python:

import os,shutil
Alist = []
Blist = []
pdir = '/Users/cescwu/test/'

def walkdir(dirname,list):
    ls = os.listdir(dirname)
    for l in ls:
        list.append(l)
    return list

for i in Alist:
    if i in Blist:
        shutil.copyfile(pdir + 'A/' + i, pdir + 'D/' + i)
    else:
        shutil.copyfile(pdir + 'A/' + i, pdir + 'C/' + i)

for i in Blist:
    if i in Alist:
        shutil.copyfile(pdir + 'B/' + i, pdir + 'D/' + i)
    else:
        shutil.copyfile(pdir + 'B/' + i, pdir + 'C/' + i)

其实很简单的一个问题

Python核心编程第9章练习题9.4

题目是访问一个文件,每次显示25行,并提示按任意键继续。

下面是我写的代码,测试通过,缺点是需要建一个比较大的列表,在文件非常大的时候占用内存过多。

import os

f = open(os.path.expanduser('~/PycharmProjects/MyPython/test.conf'))
flist = f.readlines()

list = [x for x in range(0, len(flist), 25)]

for i in range(1,len(list)):
    for x in flist[list[i - 1]:list[i]]:
        print x,
    print 'press any key to continue'
    os.system('read -s -n 1')
for x in flist[int(list[-1]):len(flist) - 1]:
    print x,

f.close()

 

下面这个是来自运维生存时间的代码,比上面的要好

import os
fobj = open(os.path.expanduser('~/PycharmProjects/MyPython/test.conf'))

count = 0
for eachline in fobj:
    print eachline,
    count += 1
    if count%25 == 0:
        print  "Press any key to continue"
        os.system('read -s -n 1')
        #print
        continue
fobj.close()

 

用python实现linux下的tail -n功能

#!/usr/bin/python
# -*- #coding:cp936

__author__ = "$Author:$"
__version__ = "$Revision:$"
__date__ = "$Date:$"

import os

while True:
    filename = raw_input('Enter filname:')
    if os.path.exists(filename) == False:
        print 'This file is not exist, please try again'
    else:
        lines = raw_input('Enter the number of rows you want:')
        lines = int(lines)
        block_size = 1024
        block = ''
        nl_count = 0
        start = 0
        newfile = filename + '_last_' + str(lines) + '_lines' + '.txt'
        newfsock = open(newfile, 'w')
        fsock = file(filename, 'rU')
        try:
            fsock.seek(0, 2)
            curpos = fsock.tell()
            while(curpos > 0):
                curpos = curpos - (block_size + len(block));
                if curpos < 0:
                     curpos = 0
                fsock.seek(curpos)
                block = fsock.read()
                nl_count = block.count('\n')
                if nl_count >= lines:
                     break
            for n in range(nl_count-lines+1):
                start = block.find('\n', start)+1
        finally:
            fsock.close()
        newfsock.write(block[start:])
        newfsock.close()
        break

 

python脚本-Gui选取excel文件并读取入库

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import xlrd, tkFileDialog

def insertdb(list):
    #appConn = MySQLdb.connect(host='xx.xx.xx.xx', user='root', passwd='123456', db='xxx_db', port=3306)
    #appConn.set_character_set('utf8')
    #appCur = appConn.cursor()
    #appCur.execute('SET NAMES utf8;')
    #appCur.execute('SET CHARACTER SET utf8;')
    #appCur.execute('SET character_set_connection=utf8;')
    for i in list:
        print i
        #appCur.execute("""xxxxxx""")

def getvalue(filename):
    data = xlrd.open_workbook(filename)
    #sheetname = data.sheet_names()
    sheet = data.sheet_by_index(0)
    rows = sheet.nrows
    cols = sheet.ncols
    #print cols
    valuelist = []
    for row in range(1, rows - 1):
        value = sheet.row_values(row)
        #print value
        valuelist.append(value)
    print valuelist
    insertdb(valuelist)

def guirun():

    initialdir = '/Users/cescwu/Downloads/'
    #initialdir = 'd:/'
    filename = tkFileDialog.askopenfilename(initialdir = initialdir, filetypes=[("Text files","*.xlsx")], message = '请选择需要入库的文件')
    print filename
    getvalue(filename)

if __name__ == '__main__':
    guirun()

主要用到python读取excel文件的xlrd插件,还有tkFileDialog模块,入库的步骤省略了。

另外,可以使用pyinstaller将python脚本打包为exe文件。

下载:https://github.com/pyinstaller/pyinstaller/releases/download/3.0/PyInstaller-3.0.tar.gz

解压后安装模块

python setup.py install

将脚本放到解压目录下,运行python pyinstaller.py -F scripts.py即可。

Mysql主从监控python脚本

脚本通过监控mysql slave status中的Slave_IO_Running,Slave_SQL_Running,Seconds_Behind_Master三个状态参数来判断replication的健康状况,通过邮件发送报警信息 –2015.7.24优化– 实际发现每天都有几次Seconds_Behind_Master的值大于0引发报警的情况,所以将脚本修改为如果连续两次Seconds_Behind_Master大于0才报警,如下:

#!/usr/bin/python
# -*- coding: cp936 -*-

import time
import datetime
import string
import MySQLdb
import smtplib
from email.Header import Header
from email.MIMEText import MIMEText
import urllib


def sendMail(ip, ioStatus, sqlStatus, lastErrNo, lastError, masterLogFile, masterLogFilePos, relayLogFile, relayLogFilePos, relayMasterLogFile, secondsBehindMaster):

    mailto_list = ["abc@xyz.com"]
    mail_host = "mail.xyz.com"
    mail_user = "mon@xyz.com"
    mail_pass = "passwd"
    mail_from = "<mon@xyz.com>"
    mail_postfix = "xyz.com"
    body = ip + " mysql slave status:\n\n IO Thread Status: " + ioStatus + "\n SQL Thread Status: " + sqlStatus + "\n LastErrorNo: " + str(lastErrNo)
    body = body + "\n LastError: " + str(lastError) + "\n Master Log File: " + str(masterLogFile) + "\n Read Master Log File Pos: " + str(masterLogFilePos)
    body = body + "\n Relay Log File: " + relayLogFile + "\n Relay Log File Pos: " + str(relayLogFilePos) +  "\n Relay Master Log File: " + relayMasterLogFile
    body = body + "\n Seconds_Behind_Master: " + str(secondsBehindMaster)

    mail_subject = "(" +ip + ") MYSQL REPLICATION ERROR!!!"
    me = mail_from+"<"+mail_user+">"
    msg = MIMEText(body,_subtype='plain',_charset='gb2312')
    msg['Subject'] = mail_subject
    msg['From'] = mail_from
    msg['To'] = ";".join(mailto_list)
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user,mail_pass)
        s.sendmail(me, mailto_list, msg.as_string())
        s.close()
        return True
    except Exception, e:
        print str(e)
        return False

def check(ip, username, pwd):
    conn = MySQLdb.connect(host=ip, user=username, passwd=pwd,use_unicode=True,charset='utf8')
    cur = conn.cursor()
    cur.execute("show slave status")
    conn.commit()
    rows = cur.fetchall()

    ioStatus = ""
    sqlStatus = ""
    lastErrNo = 0
    secondsBehindMaster = 0

    logfile = "/var/log/mysql_db_replication_mon_" + str(ip) + ".log"
    if os.path.isfile(logfile):
        #print logfile + ' is exists.'
        cmd = "sed -n '$p' " + logfile + "|awk -F ':' '{print $6}'"
        secondsBehindMaster_last = int(os.popen(cmd).read())
    else:
        print logfile + ' is not exists, I created it.'
        initf = open(logfile,'w')
        initf.close()
        secondsBehindMaster_last = 0

    if len(rows) > 0:
        ioStatus = rows[0][10]
        sqlStatus = rows[0][11]
        lastErrNo = rows[0][18]
        secondsBehindMaster = rows[0][32]
        today = (datetime.date.today().isoformat())
        timestamp = datetime.datetime.strftime(datetime.datetime.today(),"%y%m%d%H%M%S")

    if "Yes" == ioStatus and "Yes" == sqlStatus:
        print >> open(logfile,'a+'), timestamp, ip + ": ioStatus:" + ioStatus + "; sqlStatus:" + sqlStatus + "; lastErrNo:" + str(lastErrNo) + "; Seconds_Behind_Master:" + str(secondsBehindMaster)
        open(logfile,'a+').close()
        if 0 == secondsBehindMaster or 0 == secondsBehindMaster_last:
            cur.close()
            conn.close()
            return

    if "No" == ioStatus and lastErrNo == 23:
        cur.execute("stop slave")
        conn.commit()
        cur.execute("start slave")
        conn.commit()
    cur.close()
    conn.close()
    lastError=rows[0][19]
    masterLogFile=rows[0][5]
    masterLogFilePos=rows[0][6]
    relayLogFile=rows[0][7]
    relayLogFilePos=rows[0][8]
    relayMasterLogFile=rows[0][9]

    try:
        sendMail(ip, ioStatus, sqlStatus, lastErrNo, lastError, masterLogFile, masterLogFilePos, relayLogFile, relayLogFilePos, relayMasterLogFile, secondsBehindMaster)
    except Exception, e:
        print str(e)
        pass

def main():
    SLAVE_ARRAY = ["192.168.0.1", "192.168.0.2", "192.168.0.3"]
    for slave in SLAVE_ARRAY:
        check(slave, "repuser", "reppasswd")

if __name__ == "__main__":
    main()

 

监控nginx的访问日志是否有被刷的情况

有时候网站被刷了我却不知道,所以想了个简单的办法,如下: 写个shell,每小时记录一次独立IP的访问次数到文件里

#!/bin/bash

if [ $# -ne 2 ];then
echo "Missing Operand..."
exit 1
fi

awk '{print $1,$4}' /usr/local/nginx/logs/$1 |grep 2014:`date|awk '{print $4}'|awk -F ':' '{print $1}'`|awk '{print $1}'|sort |uniq -c |sort -rn|head -$2 > /usr/local/uniqIpAccessCnt/`date +%Y-%m-%d-%H`.txt

第一个参数表示日志名,第二个参数表示想记录的独立IP数量,每小时在/usr/local/uniqIpAccessCnt/目录下生成一个文件,类似“2014-12-02-09.txt”,表示14年12月2号9点的独立IP访问情况,文件内容如下

10304 111.206.12.85
 3351 112.9.28.220
 2191 124.152.184.80
 1706 221.214.13.179
 1407 120.192.232.82
 1208 119.190.40.169
 1137 58.222.187.194
  927 122.224.148.178
  915 218.3.162.66
  899 27.13.83.173
  806 113.4.197.208
  800 114.239.129.160
  763 175.1.35.110
  720 180.89.233.72
  714 118.197.131.232
  652 221.192.232.38
  596 175.171.126.215
  509 117.174.26.117
  470 118.144.66.196
  465 218.241.217.203

再写个python监控生成的文件,我设定阈值为10000,超过就发邮件通知。

58 * * * * /usr/local/bin/get_uniq_ip_access_cnt.sh www.abc.com.log 20 
0 * * * * /usr/local/bin/malicious_access_mon.py
#!/usr/bin/python
# -*- coding: cp936 -*-

import os
import time
import datetime
import string
import email
import smtplib
import mimetypes
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText

def SendMail():

    server = 'Web-1.2.3.4'
    mail_to = ["san.zhang@abc.com", "si.li@abc.com"]
    mail_host = "mail.abc.com"
    mail_user = "webmaster@abc.com"
    mail_pass = "passwd"
    mail_from = "webmaster@abc.com"
    mail_postfix = "abc.com"
    mail_subject = "Malicious Access - " + server +" !!!"
    mail_body = mailbody

    msg = MIMEMultipart()
    body = MIMEText(mail_body,_subtype='plain',_charset='gb2312')
    body = MIMEText(mail_body,_subtype='plain',_charset='utf-8')
    msg.attach(body)
    
    msg['Subject'] = mail_subject
    msg['From'] = mail_from
    msg['To'] = ";".join(mail_to)
    
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user,mail_pass)
        s.sendmail(mail_from, mail_to, msg.as_string())
        s.close()
        return True
    except Exception, e:
        print str(e)
        return False  

def handlefile():

    path=r"/usr/local/uniqIpAccessCnt/"
    files=[(os.path.getmtime(path+x),path+x) for x in os.listdir(path)]
    files.sort()
    fname = files[-1][1]
    print fname

    f = open(fname, 'r')
    line = f.readline()
    cnt = (line.split('.')[0]).split(' ')[-2]
    print cnt
    if int(cnt) > 10000:
        global mailbody
        mailbody = 'Access record as follows in the last hour:\n\n\n  Count      IP\n\n' 
        lines = f.readlines()
        f.close()
        mailbody = mailbody + line
        for x in lines:
            mailbody = mailbody + x
        print mailbody
        SendMail()

def main():
    handlefile()

main()

 

python结合shell写的日志备份脚本

用于压缩打包一个月前一周的日志文件,并上传至FTP服务器保存,以目录名为参数。 如:/usr/local/bin/log_backup.py /usr/local/apache/logs/accesslog/ /usr/local/tomcat/logs/

#!/usr/bin/python
# -*- #coding:cp936

import os
import sys
import time
import datetime
from ftplib import FTP

def getip():
    #cmd3 = "/sbin/ifconfig eth0 |grep \"inet addr:\" |awk \'{print $2}\'|cut -c 6-"
    cmd3 = "ifconfig|grep 'inet addr'|grep Bcast|grep -v '192.168'|awk '{print $2}'|cut -c 6-"
    ip = os.popen(cmd3).read().rstrip()
    #iplist=ip.split('.',4)
    #fileip=iplist[3]
    return ip

def ftp(bakfile):
    ftp=FTP()

    ftp.set_debuglevel(2)
    ftp.connect('192.168.0.1','21')
    ftp.login('user','pass')

    ftp.cwd('log_bak')
    bufsize = 1024
    file_handler = open(bakfile,'rb')
    ftp.storbinary('STOR %s' % os.path.basename(bakfile),file_handler,bufsize)
    ftp.set_debuglevel(0)

    file_handler.close()
    ftp.quit()

def main(dirname):

    stamp = datetime.datetime.now().strftime('%Y%m%d')
    tarname = dirname + getip() + dirname.replace('/','_') + str(stamp) + ".tar.gz"
    tarcmd = "tar -zcvf " + tarname + " $(find " + dirname + " -maxdepth 1 -type f -mtime +30 -mtime -38 -name '*.log*' -o -name '*.txt*'|sort)"
    os.system(tarcmd)
    print os.system(tarcmd)
    if os.system(tarcmd) == 0:
        rmcmd = "find " + dirname + " -maxdepth 1 -type f -mtime +30 -mtime -38 -name '*.log*' -o -name '*.txt*'|xargs rm -f {}"
        #print rmcmd
        #os.system(rmcmd)
    else:
        print "Process error"
        sys.exit(0)


if len(sys.argv) < 2:
    print 'Error: One parameter at least!!!'

print sys.argv[1:]
for dirname in sys.argv[1:]:
    print dirname
    main(dirname)

将java的堆栈信息输出到文件的python脚本

把java的堆栈信息保存起来,可以方便排查问题。

这个脚本是把之前的优化了一下,方便管理。

给脚本传一个参数进去,参数是可以唯一标识java进程的一个关键字,脚本里也包括了清理堆栈文件的内容

以glassfish为例,写个任务计划

*/1 * * * * /usr/local/bin/thread_dump.py domain1

每分钟输出一次domain1的堆栈信息到文件。

脚本如下

#!/usr/bin/python

import commands
import os
import sys
import string
import datetime

def main():

    if len(sys.argv) == 1:
        print "Missing Operand..."
        return

    dumpdir = "/usr/local/thread_dump/"
    if os.path.exists(dumpdir) == False:
        os.makedirs(dumpdir)
    if os.path.exists(dumpdir + '__bak') == False:
        os.makedirs(dumpdir + '__bak')

    getpid = "ps -ef|grep " + sys.argv[1] + "|grep -v -E 'grep|more|tail|thread_dump'|awk '{print $2}'"
    pid = os.popen(getpid).read().rstrip()
    if pid != "":
        dumpThread(pid)
    else:
       print "Can't find " + sys.argv[1] + " Process"

    cleardump = "/usr/bin/find " + dumpdir + " -maxdepth 1 -mmin +30 -name '*dump*.txt' -exec mv {} " + dumpdir + "/__bak \;&&/usr/bin/find " + dumpdir + "__bak -maxdepth 1 -mtime +3 -name '*dump*.txt' -exec rm {} \;"
    os.system(cleardump)

def dumpThread(pid):

    try:
        filename = dumpdir + sys.argv[1] + "_dump_"+ datetime.datetime.strftime(datetime.datetime.today(),'%y%m%d_%H%M')+".txt"
        dump = "/bin/su - anyuser -c \'/usr/local/jdk/bin/jstack " + pid + " > " + filename + "\'"
        os.system(dump)
        print filename+" has been created!"
    except Exception, ex:
        print str(ex)
        pass

main()

 


			

监控文件更新时间的python脚本

#!/usr/bin/python
# -*- #coding:cp936

import os
import socket
import time
import datetime
import string
import email
import smtplib
import mimetypes
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText

authInfo = {}
authInfo['server'] = '******'
authInfo['user'] = '******'
authInfo['password'] = '******'
fromAdd = '<******>'
toaddmon = ['******']

subject = 'Log Error!'

def getip():
    global ip
    if os.name == 'nt':
        ip=socket.gethostbyname(socket.gethostname())
    if os.name == 'posix':
        cmd3 = "/sbin/ifconfig eth0 |grep \"inet addr:\" |awk \'{print $2}\'|cut -c 6-"
        ip = os.popen(cmd3).read().rstrip()
    return ip

def sendemail(authInfo, fromAdd, toAdd, subject, body):
    strFrom = fromAdd
    strTo = ', '.join(toAdd)
    server = authInfo.get('server')
    user = authInfo.get('user')
    passwd = authInfo.get('password')
    if not (server and user and passwd) :
        print 'incomplete login info, exit now'
        return
    msgRoot = MIMEMultipart('related')
    msgRoot['Subject'] = subject
    msgRoot['From'] = strFrom
    msgRoot['To'] = strTo
    msgAlternative = MIMEMultipart('alternative')
    msgRoot.attach(msgAlternative)
    msgText = MIMEText(body, 'html', 'gb2312')
    msgAlternative.attach(msgText)
    smtp = smtplib.SMTP()
    smtp.set_debuglevel(1)
    smtp.connect("mail.xyz.com")
    smtp.login(user, passwd)
    smtp.sendmail(strFrom, strTo, msgRoot.as_string())
    smtp.quit()
    return

def gettimediff(path):
    temptime = time.ctime(os.stat(path).st_mtime)
    modifiedth = temptime.split(':')[0][len(temptime.split(':')[0])-2:len(temptime.split(':')[0])]
    modifiedtm = temptime.split(':')[1][0:2]
    now = time.strftime("%H:%M:%S",time.localtime())
    nowth = now.split(':')[0]
    nowtm = now.split(':')[1]
    tempm = int(nowth)*60 + int(nowtm) - (int(modifiedth)*60 + int(modifiedtm))
    tempm = abs(tempm)
    print tempm
    body = ip + ' ' + path + ' ' + str(tempm) + ' '
    if tempm > 5:
        sendemail(authInfo, fromAdd, toaddmon, subject, body)

def main ():
    today = (datetime.date.today().isoformat())
    timestamp = datetime.datetime.strftime(datetime.datetime.today(),"%y%m%d")
    getip()
    gettimediff('/usr/local/apache/logs/access/access_log.' + timestamp + '.txt')

main()