Python-5: 统计数据

Python-5: 统计数据,第1张

基于Spyder 4.2.5

ref

追加数据

https://www.geeksforgeeks.org/adding-new-column-to-existing-dataframe-in-pandas/

时间转换
https://www.runoob.com/python3/python-str-timestamp.html

列重命名
https://stackoverflow.com/questions/11346283/renaming-column-names-in-pandas

去掉行列
https://blog.csdn.net/songyunli1111/article/details/79306639

编译

.cmd文件

pyinstaller --onefile -i "logo.ico" .\tongji2204.py
源码
# -*- coding: utf-8 -*-

import time
import pandas as pd

fileName = 'set.csv' # class id cid and data file name in set.csv

mset = pd.read_csv(fileName,header=None) # get set info

ln = int(mset[0][0]) # library number in cell(0,0) in col(0)

# 序号,图书馆,日期,时间,星期,星期,班级,读者编号,性别,姓名, *** 作, *** 作,定位码,书籍编号,ISBN,书名


for lid in range(ln):
    # all book record
    b = pd.DataFrame(index=range(12))
    print(lid) # library id
    
    # library data file name in col(2*lid+1)
    file_ = mset[2*lid+1].dropna(axis=0,how='all')
    print(file_)
    
    # library class id cid in col(2*lid+2)
    cid_ = mset[2*lid+2].dropna(axis=0,how='all').astype(int)
    print(cid_)
    
    
    
    for file in file_:
        print(file)
        data = pd.read_csv(file,header=1,encoding='utf-8',
                            error_bad_lines=False,engine="python")
        data = data.dropna(axis=0,how='all')  
        
        # all book record
        for k in range(len(data)-1):
            bid = int(data['书籍编号'][k])
            b[bid]=0
            
        for k in range(len(data)-1):
            cid = int(data['班级'][k])
            uid = int(data['读者编号'][k])
            op = int(data[' *** 作'][k])
            bid = int(data['书籍编号'][k])
            uname = data['姓名'][k]
            bname = data['书名'][k]
            rdate = data['日期'][k]
            rtime = data['时间'][k]
            loc = data['定位码'][k]
            loc1 = loc.split('#')[0]
            rtstr = rdate + ' ' + rtime
            rtstr = rtstr.replace("/", "-");
            # print(rtstr)
            rtstr1 = time.strptime(rtstr, "%Y-%m-%d %H:%M:%S")
            rt = int(time.mktime(rtstr1))
            print(rt)
            
            b[bid][op] = b[bid][op]+1
            
            
            if(op==1):
                b[bid][0] = int(loc1)
                b[bid][3] = 1
                b[bid][4] = rt
                b[bid][5] = rtstr
                b[bid][6] = cid
                b[bid][7] = uid
                b[bid][8] = bid
                b[bid][9] = uname
                b[bid][10] = loc
                b[bid][11] = bname
            if(op==2):   
                if(rt>b[bid][4]):
                    b[bid][3] = 0
                    b[bid][4] = rt  
     
    for k in b:
        print(b[k][5])
        if(b[k][5]==0):
            b.drop(k,1,inplace=True)
   
    b = b.sort_values(axis=1, by=[3,6])
   
    b.T.to_csv('lib'+str(lid+1)+'book.csv',
               header = ['定位','借出次数', '归还次数', '状态','时间戳',
                        '时间','班级','学生编号','书编号','姓名','定位码','书名'],                
               encoding='UTF-8-sig',errors='ignore') 
    
    b = b.sort_values(axis=1, by=[0,10])
   
    b.T.to_csv('lib'+str(lid+1)+'book1.csv',
               header = ['定位','借出次数', '归还次数', '状态','时间戳',
                        '时间','班级','学生编号','书编号','姓名','定位码','书名'],                
               encoding='UTF-8-sig',errors='ignore') 

    # results holder
    d = pd.DataFrame(columns=cid_,index=range(3))
    for c in cid_:
        d[c]=0
       
    for k in b:
        cid = b[k][6]
        op = b[k][3]
        d[cid][op]=d[cid][op]+1
    for c in cid_:
        d[c][2]=d[c][0]+d[c][1]  
    d[0][2]='all'
    d[0][0]='returned'
    d[0][1]='unreturned'  
    d = d.rename(columns={0: 'class'})
    d.T.to_csv('lib'+str(lid+1)+'out_num.csv',header=None) 

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/943138.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-18
下一篇 2022-05-18

发表评论

登录后才能评论

评论列表(0条)

保存