Excel表格处理-----Python

Excel表格处理-----Python,第1张

Excel表格处理-----Python

文件1 data.py

'''
data:2021-08-21
Author: Ling Li
Description:使用Pandas拼接多个CSV文件到一个文件(即合并)
'''
import pandas as pd
import os
Folder_Path = r'C:UserselxxlngVideosTicketData'          #要拼接的文件夹及其完整路径,注意不要包含中文
SaveFile_Path =  r'C:UserselxxlngVideosTicketData'       #拼接后要保存的文件路径
SaveFile_Name = r'all.csv'              #合并后要保存的文件名
 
#修改当前工作目录
os.chdir(Folder_Path)
#将该文件夹下的所有文件名存入一个列表
file_list = os.listdir()
 
#读取第一个CSV文件并包含表头
df = pd.read_csv(Folder_Path +'\'+ file_list[0])   #编码默认UTF-8,若乱码自行更改
 
#将读取的第一个CSV文件写入合并后的文件保存
df.to_csv(SaveFile_Path+'\'+ SaveFile_Name,encoding="utf_8_sig",index=False)
 
#循环遍历列表中各个CSV文件名,并追加到合并后的文件
for i in range(1,len(file_list)):
    df = pd.read_csv(Folder_Path + '\'+ file_list[i])
    df.to_csv(SaveFile_Path+'\'+ SaveFile_Name,encoding="utf_8_sig",index=False, header=False, mode='a+')

文件2 create_solved.py

'''
data:2021-08-21
Author: Ling Li
Description:查看ticket的create和solved曲线
'''
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from Ini_set import *



#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
    data['Resolved'] = pd.to_datetime(data['Resolved'])
    data['Created'] = pd.to_datetime(data['Created'])
    data['Resolved'] = data['Resolved'].dt.strftime('%Y-%m-%d')
    data['Created'] = data['Created'].dt.strftime('%Y-%m-%d')
    return data
    
#time_conver_month:导入的日期格式为字符串形式,转换成日期的形式
# %W 每年的第几周,把星期一做为第一天(值从0到53)
# %V 每年的第几周,使用基于周的年
# %h 简写的月份名
def time_conver_month(data):
    data['Resolved'] = pd.to_datetime(data['Resolved'])
    data['Created'] = pd.to_datetime(data['Created'])
    data['Resolved'] = data['Resolved'].dt.strftime('%Y-%W')
    data['Created'] = data['Created'].dt.strftime('%Y-%W')
    return data

#Resolved, Created 曲线绘制准备
def plot_solve_create(data,average):
    Number_resolve = data.loc[:,'Resolved'].value_counts()
    Number_create = data.loc[:,'Created'].value_counts()
    
    ##时间变量处理,保证横坐标保持一致
    for idx in (set(Number_create.index) - set(Number_resolve.index)):
        Number_resolve[idx]=0
    for idx in  (set(Number_resolve.index) - set(Number_create.index)):
        Number_create[idx]=0
    #the type of temp_resolve&temp_create are 'pandas.core.series.Series'
    temp_resolve =  Number_resolve.sort_index(ascending=True)
    temp_create =  Number_create.sort_index(ascending=True)
    
    #准备横纵坐标
    x1_dates = temp_resolve.index
    y1 = temp_resolve.tolist()
    x2_dates = temp_create.index
    y2 = temp_create.tolist()
    
    fig,ax=plt.subplots(figsize=(6,6))
    fig.suptitle('Created VS Resolved ticket number of every day')  # 添加标题以便我们辨别
    ax.plot_date(x2_dates,y2,'r-x',label='Created')
    ax.plot_date(x1_dates, y1, 'g-o',label='Resolved')
    t = []
    for i in x1_dates:
        t.append(average)
    ax.plot_date(x1_dates, t, 'p-',label='Average ticket number every working day')
    #增加注释
    x0 = x1_dates[0]
    y0 = max(y1)
    plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1),
             fontsize=12, color = "royalblue",weight="bold")
    plt.text(x0, y0-1, "Average ticket numbet solved by everyday is: %.4f"%average,
             fontsize=12, color = "royalblue",weight="bold")
    
    #如果坐标轴上面的tick过于密集
    fig.autofmt_xdate()#自动调整xtick的间距
    #网格
    ax.xaxis.grid(True)
    ax.yaxis.grid(True)
    #添加文本 #x轴文本
    plt.xlabel('Date')
    #y轴文本
    plt.ylabel('Ticket number')
    #设置日期为每个月
    #Location也就是以什么样的频率
    ax.xaxis.set_major_locator(dates.WeekdayLocator())
    plt.tight_layout()
    plt.legend()
    plt.show()

#................................................................. 
#Resolved, Created 按照月度曲线绘制准备
def plot_solve_create_month(data):
    Number_resolve = data.loc[:,'Resolved'].value_counts()
    Number_create = data.loc[:,'Created'].value_counts()
    
    ##时间变量处理,保证横坐标保持一致
    for idx in (set(Number_create.index) - set(Number_resolve.index)):
        Number_resolve[idx]=0
    for idx in  (set(Number_resolve.index) - set(Number_create.index)):
        Number_create[idx]=0
    #the type of temp_resolve&temp_create are 'pandas.core.series.Series'
    temp_resolve =  Number_resolve.sort_index(ascending=True)
    temp_create =  Number_create.sort_index(ascending=True)
    
    #准备横纵坐标
    x1_dates = temp_resolve.index
    y1 = temp_resolve.tolist()
    x2_dates = temp_create.index
    y2 = temp_create.tolist()
    
    fig,ax=plt.subplots(figsize=(6,6))
    fig.suptitle('Created VS Resolved ticket number of every month')  # 添加标题以便我们辨别
    ax.plot_date(x2_dates,y2,'r-x',label='Created')
    ax.plot_date(x1_dates, y1, 'g-o',label='Resolved')
    

    #增加注释
    x0 = x1_dates[0]
    y0 = max(y1)
    plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1),
             fontsize=12, color = "royalblue",weight="bold")
    # plt.text(x0, y0-1, "Average ticket numbet solved by everyday is: %.4f"%average,
    #          fontsize=12, color = "royalblue",weight="bold")
    
    #如果坐标轴上面的tick过于密集
    fig.autofmt_xdate()#自动调整xtick的间距
    #网格
    ax.xaxis.grid(True)
    ax.yaxis.grid(True)
    #添加文本 #x轴文本
    plt.xlabel('Date')
    #y轴文本
    plt.ylabel('Ticket number')
    #设置日期为每个月
    #Location也就是以什么样的频率
    ax.xaxis.set_major_locator(dates.WeekdayLocator())
    plt.tight_layout()
    plt.legend()
    plt.show()
    

if __name__=="__main__":
    data = DATA   
    # data = time_conver_month(data)
    data = time_conver(data)
    total,average = aver_ticket_num(START_DAY,END_DAY,data)
    # plot_solve_create_month(data)
    plot_solve_create(data,average)

文件 3 ticket_every_pyecharts.py

'''
data:2021-08-21
Author: Ling Li
Description:使用create查看每个人在每个function下解决的ticket情况
'''
from pyecharts import Bar
# from pyecharts.charts import Bar
# from pyecharts import options as ops
from Ini_set import *


def coord_func_num(name_support,Function):
    name_support_data = name_support.loc[:,'Functional Area'].value_counts()    
    for idx in (Function - set(name_support_data.index)):
        name_support_data[idx] = 0
    name_support_data = name_support_data.sort_index(ascending=True)
    # print(name_support_data)
    columns = name_support_data.index.tolist()
    name = name_support_data.tolist()
    return columns,name

def plot_person_function(data):
    Function = data.loc[:,'Functional Area'].value_counts()
    Function = set(Function.index)
    user = set(data.Assignee)

    total_width, n = 1, len(user)
    width = total_width / n 
    
    bar = Bar('Ticket solved by everyone')
    for u in user:        
        name_support = data.loc[data['Assignee'] == u]
        x,y = coord_func_num(name_support,Function)
        bar.add(u,x,y,mark_line=['min','max']) 
    bar.render("type_ticket.html")
    bar


if __name__=="__main__":
    data = DATA
    plot_person_function(data)

文件 4 ticket_function.py

'''
data:2021-08-21
Author: Ling Li
Description:查看每个Function下的ticket数量
'''
import matplotlib.pyplot as plt
from Ini_set import *


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))

        
data = DATA
#Tickets are distributed by function
Function = data.loc[:,'Functional Area'].value_counts()
Function = Function.sort_index(ascending=True)

axis_x = Function.index.tolist()
print(axis_x)
x = list(range(len(axis_x)))
print(x)
y = Function.tolist()
print(y)

total_width, n = 1, 2
width = total_width / n 

plt.title("Tickets are distributed by function")
P = plt.bar(x,y,width=width,tick_label = axis_x,color ='darkcyan')
autolabel(P)

#网格
plt.grid()
# 设置刻度字体大小
plt.xticks(fontsize=11)
plt.yticks(fontsize=12)
# 设置坐标标签字体大小
plt.ylabel('Ticket Number',fontsize=15)
plt.xlabel('Function Area',fontsize=15)
plt.tight_layout()
plt.show()

文件5 ticket_month.py

'''
data:2021-08-21
Author: Ling Li
Description:查看每个月解决的ticket数量
'''
import pandas as pd
import matplotlib.pyplot as plt
from Ini_set import *

data = DATA
#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
    data['Resolved'] = pd.to_datetime(data['Resolved'])
    data['Created'] = pd.to_datetime(data['Created'])
    data['Resolved'] = data['Resolved'].dt.strftime('%Y%m')
    data['Created'] = data['Created'].dt.strftime('%Y%m')
    return data
time_conver(data)

Function = data.loc[:,'Functional Area'].value_counts()
Function = set(Function.index)


#图像数表
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))

#caculate the total ticket numbers and average number
Number_resolve = data.loc[:,'Resolved'].value_counts()


#parameters for (tickets by every month)
# plt.subplot(1,2,1)
plt.title("Resolved tickets by everymonth")
temp_resolve =  Number_resolve.sort_index(ascending=True)
axis_x = temp_resolve.index.tolist()
x1 = list(range(len(axis_x)))
y1 = temp_resolve.tolist()
total_width, n = 1, len(axis_x)
# 调节柱形图宽度
width = total_width / 3 
P1 = plt.bar(x1,y1,width=width,tick_label = axis_x,color='skyblue')
autolabel(P1)
plt.xlabel("Date by Month")
plt.ylabel("Ticket Number")

#增加注释
x0 = x1[0]
y0 = max(y1)
plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1),
         fontdict=dict(fontsize=12, color='r',family='monospace',),#字体属性字典
         
         #添加文字背景色
         bbox={'facecolor': '#74C476', #填充色
              'edgecolor':'b',#外框色
               'alpha': 0.5, #框透明度
               'pad': 8,#本文与框周围距离 
              }
        )
    
#网格
plt.grid()
plt.tight_layout()
plt.legend()
plt.show()

文件 6 ticket_month

import pandas as pd
import matplotlib.pyplot as plt
from Ini_set import *


data = DATA
#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
    data['Resolved'] = pd.to_datetime(data['Resolved'])
    data['Created'] = pd.to_datetime(data['Created'])
    data['Resolved'] = data['Resolved'].dt.strftime('%Y%m')
    data['Created'] = data['Created'].dt.strftime('%Y%m')
    return data
time_conver(data)

Function = data.loc[:,'Functional Area'].value_counts()
Function = set(Function.index)


#图像数表
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))

#caculate the total ticket numbers and average number
Number_resolve = data.loc[:,'Resolved'].value_counts()


#parameters for (tickets by every month)
# plt.subplot(1,2,2)
plt.title("Resolved tickets by everybody")
Number_assignee = data.loc[:,'Assignee'].value_counts()
temp_assignee = Number_assignee.sort_index(ascending=True)
axis_x = temp_assignee.index.tolist()
x2 = list(range(len(axis_x)))
y2 = temp_assignee.tolist()
total_width, n = 1, len(axis_x)
width = total_width / n 
P2 = plt.bar(x2,y2,width=width,tick_label = axis_x,color='darkcyan')
autolabel(P2)
plt.xlabel("Name")
plt.ylabel("Ticket Number")

#增加注释
x0 = x2[0]
y0 = max(y2)
plt.text(x0, y0, "Total ticket number solved: %d"%sum(y2),
         fontdict=dict(fontsize=12, color='r',family='monospace',),#字体属性字典
         
         #添加文字背景色
         bbox={'facecolor': '#74C476', #填充色
              'edgecolor':'b',#外框色
               'alpha': 0.5, #框透明度
               'pad': 8,#本文与框周围距离 
              }
        )

plt.tight_layout()
plt.legend()
plt.show()

文件 7wordclounds.py

'''
data:2021-08-21
Author: Ling Li
Description:生成创建ticket的人的词云图
'''
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator,STOPWORDS #词云,颜色生成器,停止词
import numpy as np #科学计算
from PIL import Image #处理图片
from Ini_set import *



data = DATA
word_counts = data.loc[:,'Reporter'].value_counts()

backgroud = np.array(Image.open(r'C:UserselxxlngdocumentsALINGduola.jpg'))
#定义词频背景
background_image = np.array(Image.open(r'C:UserselxxlngdocumentsALINGduola.jpg'))
font_path=r'C:\Windows\fonts\BASKVILL.TTF'

wd = WordCloud(
    font_path=font_path,  #设置字体格式,不然会乱码
    background_color="white",  #设置背景颜色
    mask=background_image   #设置背景图
).generate_from_frequencies(word_counts)

#保存词云图
wd.to_file('test.png')
#显示词云图
plt.imshow(wd,interpolation="bilinear")
plt.axis("off")
plt.show()

文件8 ticket_function.py

'''
data:2021-08-21
Author: Ling Li
Description:查看每个人在每个function下的ticket数量
'''

import pandas as pd
import math
import matplotlib.pyplot as plt
from Ini_set import *


#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
    data['Resolved'] = pd.to_datetime(data['Resolved'])
    data['Created'] = pd.to_datetime(data['Created'])
    data['Resolved'] = data['Resolved'].dt.strftime('%Y%m')
    data['Created'] = data['Created'].dt.strftime('%Y%m')
    return data
##定义函数来显示柱状上的数值
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))


def coord_func_num(name_support,Function):
    name_support_data = name_support.loc[:,'Functional Area'].value_counts()
    for idx in (Function - set(name_support_data.index)):
        name_support_data[idx] = 0
    name_support_data = name_support_data.sort_index(ascending=True)
    columns = name_support_data.index.tolist()
    name = name_support_data.tolist()
    return columns,name

def plot_person_function(data):
    Function = data.loc[:,'Functional Area'].value_counts()
    # tick_lable
    axis_x = Function.index.tolist()

    Function = set(Function.index)
    user = set(data.Assignee)

    total_width, n = 1, len(user)
    width = total_width / n 
    
    x = list(range(len(Function)))
    # print('Initial axis: ',x) 
    # 增加两个Function之间的间隙   
    x = [i*2 for i in x]    
    plt.title("Resolved tickets by everybody in Function")

    count = 0 
    # 保证颜色唯一
    colorlist = list()  
    for u in user:        
        count = count + 1
        name_support = data.loc[data['Assignee'] == u]
        x1,y = coord_func_num(name_support,Function)
        temp = randomcolor()
        while temp not in colorlist:
            colorlist.append(temp)
        else:
            temp =  randomcolor()

        # 横坐标位置居中        
        if count == math.ceil( len(user)/2 ):
            p = plt.bar(x,y,width=width,tick_label = axis_x, label=u,color=temp)
        else:
            p = plt.bar(x,y,width=width, label=u,color=temp)                  
        
        x = [i + width for i in x]
        # print('Distribution arrangement x-axis coordinate:', x)
        plt.legend(fontsize=14)
        autolabel(p)



    #网格
    plt.grid()
    # plt.set_xticks(x +width/2)#将坐标设置在指定位置
    #  设置刻度字体大小
    plt.xticks(fontsize=11)
    plt.yticks(fontsize=12)
    # 设置坐标标签字体大小
    plt.ylabel('Ticket Number',fontsize=15)
    plt.xlabel('Function Area',fontsize=15)
    plt.show()


if __name__=="__main__":
    data = DATA   
    data = time_conver(data)
    plot_person_function(data)

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5690089.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-17
下一篇 2022-12-17

发表评论

登录后才能评论

评论列表(0条)

保存