文件1 data.py
''' data:2021-08-21 Author: Ling Li Description:使用Pandas拼接多个CSV文件到一个文件(即合并) ''' import pandas as pd import os Folder_Path = r'C:UserselxxlngVideosTicketData' #要拼接的文件夹及其完整路径,注意不要包含中文 SaveFile_Path = r'C:UserselxxlngVideosTicketData' #拼接后要保存的文件路径 SaveFile_Name = r'all.csv' #合并后要保存的文件名 #修改当前工作目录 os.chdir(Folder_Path) #将该文件夹下的所有文件名存入一个列表 file_list = os.listdir() #读取第一个CSV文件并包含表头 df = pd.read_csv(Folder_Path +'\'+ file_list[0]) #编码默认UTF-8,若乱码自行更改 #将读取的第一个CSV文件写入合并后的文件保存 df.to_csv(SaveFile_Path+'\'+ SaveFile_Name,encoding="utf_8_sig",index=False) #循环遍历列表中各个CSV文件名,并追加到合并后的文件 for i in range(1,len(file_list)): df = pd.read_csv(Folder_Path + '\'+ file_list[i]) df.to_csv(SaveFile_Path+'\'+ SaveFile_Name,encoding="utf_8_sig",index=False, header=False, mode='a+')
文件2 create_solved.py
''' data:2021-08-21 Author: Ling Li Description:查看ticket的create和solved曲线 ''' import pandas as pd import matplotlib.pyplot as plt import matplotlib.dates as dates from Ini_set import * #time_conver:导入的日期格式为字符串形式,转换成日期的形式 def time_conver(data): data['Resolved'] = pd.to_datetime(data['Resolved']) data['Created'] = pd.to_datetime(data['Created']) data['Resolved'] = data['Resolved'].dt.strftime('%Y-%m-%d') data['Created'] = data['Created'].dt.strftime('%Y-%m-%d') return data #time_conver_month:导入的日期格式为字符串形式,转换成日期的形式 # %W 每年的第几周,把星期一做为第一天(值从0到53) # %V 每年的第几周,使用基于周的年 # %h 简写的月份名 def time_conver_month(data): data['Resolved'] = pd.to_datetime(data['Resolved']) data['Created'] = pd.to_datetime(data['Created']) data['Resolved'] = data['Resolved'].dt.strftime('%Y-%W') data['Created'] = data['Created'].dt.strftime('%Y-%W') return data #Resolved, Created 曲线绘制准备 def plot_solve_create(data,average): Number_resolve = data.loc[:,'Resolved'].value_counts() Number_create = data.loc[:,'Created'].value_counts() ##时间变量处理,保证横坐标保持一致 for idx in (set(Number_create.index) - set(Number_resolve.index)): Number_resolve[idx]=0 for idx in (set(Number_resolve.index) - set(Number_create.index)): Number_create[idx]=0 #the type of temp_resolve&temp_create are 'pandas.core.series.Series' temp_resolve = Number_resolve.sort_index(ascending=True) temp_create = Number_create.sort_index(ascending=True) #准备横纵坐标 x1_dates = temp_resolve.index y1 = temp_resolve.tolist() x2_dates = temp_create.index y2 = temp_create.tolist() fig,ax=plt.subplots(figsize=(6,6)) fig.suptitle('Created VS Resolved ticket number of every day') # 添加标题以便我们辨别 ax.plot_date(x2_dates,y2,'r-x',label='Created') ax.plot_date(x1_dates, y1, 'g-o',label='Resolved') t = [] for i in x1_dates: t.append(average) ax.plot_date(x1_dates, t, 'p-',label='Average ticket number every working day') #增加注释 x0 = x1_dates[0] y0 = max(y1) plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1), fontsize=12, color = "royalblue",weight="bold") plt.text(x0, y0-1, "Average ticket numbet solved by everyday is: %.4f"%average, fontsize=12, color = "royalblue",weight="bold") #如果坐标轴上面的tick过于密集 fig.autofmt_xdate()#自动调整xtick的间距 #网格 ax.xaxis.grid(True) ax.yaxis.grid(True) #添加文本 #x轴文本 plt.xlabel('Date') #y轴文本 plt.ylabel('Ticket number') #设置日期为每个月 #Location也就是以什么样的频率 ax.xaxis.set_major_locator(dates.WeekdayLocator()) plt.tight_layout() plt.legend() plt.show() #................................................................. #Resolved, Created 按照月度曲线绘制准备 def plot_solve_create_month(data): Number_resolve = data.loc[:,'Resolved'].value_counts() Number_create = data.loc[:,'Created'].value_counts() ##时间变量处理,保证横坐标保持一致 for idx in (set(Number_create.index) - set(Number_resolve.index)): Number_resolve[idx]=0 for idx in (set(Number_resolve.index) - set(Number_create.index)): Number_create[idx]=0 #the type of temp_resolve&temp_create are 'pandas.core.series.Series' temp_resolve = Number_resolve.sort_index(ascending=True) temp_create = Number_create.sort_index(ascending=True) #准备横纵坐标 x1_dates = temp_resolve.index y1 = temp_resolve.tolist() x2_dates = temp_create.index y2 = temp_create.tolist() fig,ax=plt.subplots(figsize=(6,6)) fig.suptitle('Created VS Resolved ticket number of every month') # 添加标题以便我们辨别 ax.plot_date(x2_dates,y2,'r-x',label='Created') ax.plot_date(x1_dates, y1, 'g-o',label='Resolved') #增加注释 x0 = x1_dates[0] y0 = max(y1) plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1), fontsize=12, color = "royalblue",weight="bold") # plt.text(x0, y0-1, "Average ticket numbet solved by everyday is: %.4f"%average, # fontsize=12, color = "royalblue",weight="bold") #如果坐标轴上面的tick过于密集 fig.autofmt_xdate()#自动调整xtick的间距 #网格 ax.xaxis.grid(True) ax.yaxis.grid(True) #添加文本 #x轴文本 plt.xlabel('Date') #y轴文本 plt.ylabel('Ticket number') #设置日期为每个月 #Location也就是以什么样的频率 ax.xaxis.set_major_locator(dates.WeekdayLocator()) plt.tight_layout() plt.legend() plt.show() if __name__=="__main__": data = DATA # data = time_conver_month(data) data = time_conver(data) total,average = aver_ticket_num(START_DAY,END_DAY,data) # plot_solve_create_month(data) plot_solve_create(data,average)
文件 3 ticket_every_pyecharts.py
''' data:2021-08-21 Author: Ling Li Description:使用create查看每个人在每个function下解决的ticket情况 ''' from pyecharts import Bar # from pyecharts.charts import Bar # from pyecharts import options as ops from Ini_set import * def coord_func_num(name_support,Function): name_support_data = name_support.loc[:,'Functional Area'].value_counts() for idx in (Function - set(name_support_data.index)): name_support_data[idx] = 0 name_support_data = name_support_data.sort_index(ascending=True) # print(name_support_data) columns = name_support_data.index.tolist() name = name_support_data.tolist() return columns,name def plot_person_function(data): Function = data.loc[:,'Functional Area'].value_counts() Function = set(Function.index) user = set(data.Assignee) total_width, n = 1, len(user) width = total_width / n bar = Bar('Ticket solved by everyone') for u in user: name_support = data.loc[data['Assignee'] == u] x,y = coord_func_num(name_support,Function) bar.add(u,x,y,mark_line=['min','max']) bar.render("type_ticket.html") bar if __name__=="__main__": data = DATA plot_person_function(data)
文件 4 ticket_function.py
''' data:2021-08-21 Author: Ling Li Description:查看每个Function下的ticket数量 ''' import matplotlib.pyplot as plt from Ini_set import * def autolabel(rects): for rect in rects: height = rect.get_height() plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height)) data = DATA #Tickets are distributed by function Function = data.loc[:,'Functional Area'].value_counts() Function = Function.sort_index(ascending=True) axis_x = Function.index.tolist() print(axis_x) x = list(range(len(axis_x))) print(x) y = Function.tolist() print(y) total_width, n = 1, 2 width = total_width / n plt.title("Tickets are distributed by function") P = plt.bar(x,y,width=width,tick_label = axis_x,color ='darkcyan') autolabel(P) #网格 plt.grid() # 设置刻度字体大小 plt.xticks(fontsize=11) plt.yticks(fontsize=12) # 设置坐标标签字体大小 plt.ylabel('Ticket Number',fontsize=15) plt.xlabel('Function Area',fontsize=15) plt.tight_layout() plt.show()
文件5 ticket_month.py
''' data:2021-08-21 Author: Ling Li Description:查看每个月解决的ticket数量 ''' import pandas as pd import matplotlib.pyplot as plt from Ini_set import * data = DATA #time_conver:导入的日期格式为字符串形式,转换成日期的形式 def time_conver(data): data['Resolved'] = pd.to_datetime(data['Resolved']) data['Created'] = pd.to_datetime(data['Created']) data['Resolved'] = data['Resolved'].dt.strftime('%Y%m') data['Created'] = data['Created'].dt.strftime('%Y%m') return data time_conver(data) Function = data.loc[:,'Functional Area'].value_counts() Function = set(Function.index) #图像数表 def autolabel(rects): for rect in rects: height = rect.get_height() plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height)) #caculate the total ticket numbers and average number Number_resolve = data.loc[:,'Resolved'].value_counts() #parameters for (tickets by every month) # plt.subplot(1,2,1) plt.title("Resolved tickets by everymonth") temp_resolve = Number_resolve.sort_index(ascending=True) axis_x = temp_resolve.index.tolist() x1 = list(range(len(axis_x))) y1 = temp_resolve.tolist() total_width, n = 1, len(axis_x) # 调节柱形图宽度 width = total_width / 3 P1 = plt.bar(x1,y1,width=width,tick_label = axis_x,color='skyblue') autolabel(P1) plt.xlabel("Date by Month") plt.ylabel("Ticket Number") #增加注释 x0 = x1[0] y0 = max(y1) plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1), fontdict=dict(fontsize=12, color='r',family='monospace',),#字体属性字典 #添加文字背景色 bbox={'facecolor': '#74C476', #填充色 'edgecolor':'b',#外框色 'alpha': 0.5, #框透明度 'pad': 8,#本文与框周围距离 } ) #网格 plt.grid() plt.tight_layout() plt.legend() plt.show()
文件 6 ticket_month
import pandas as pd import matplotlib.pyplot as plt from Ini_set import * data = DATA #time_conver:导入的日期格式为字符串形式,转换成日期的形式 def time_conver(data): data['Resolved'] = pd.to_datetime(data['Resolved']) data['Created'] = pd.to_datetime(data['Created']) data['Resolved'] = data['Resolved'].dt.strftime('%Y%m') data['Created'] = data['Created'].dt.strftime('%Y%m') return data time_conver(data) Function = data.loc[:,'Functional Area'].value_counts() Function = set(Function.index) #图像数表 def autolabel(rects): for rect in rects: height = rect.get_height() plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height)) #caculate the total ticket numbers and average number Number_resolve = data.loc[:,'Resolved'].value_counts() #parameters for (tickets by every month) # plt.subplot(1,2,2) plt.title("Resolved tickets by everybody") Number_assignee = data.loc[:,'Assignee'].value_counts() temp_assignee = Number_assignee.sort_index(ascending=True) axis_x = temp_assignee.index.tolist() x2 = list(range(len(axis_x))) y2 = temp_assignee.tolist() total_width, n = 1, len(axis_x) width = total_width / n P2 = plt.bar(x2,y2,width=width,tick_label = axis_x,color='darkcyan') autolabel(P2) plt.xlabel("Name") plt.ylabel("Ticket Number") #增加注释 x0 = x2[0] y0 = max(y2) plt.text(x0, y0, "Total ticket number solved: %d"%sum(y2), fontdict=dict(fontsize=12, color='r',family='monospace',),#字体属性字典 #添加文字背景色 bbox={'facecolor': '#74C476', #填充色 'edgecolor':'b',#外框色 'alpha': 0.5, #框透明度 'pad': 8,#本文与框周围距离 } ) plt.tight_layout() plt.legend() plt.show()
文件 7wordclounds.py
''' data:2021-08-21 Author: Ling Li Description:生成创建ticket的人的词云图 ''' import matplotlib.pyplot as plt from wordcloud import WordCloud,ImageColorGenerator,STOPWORDS #词云,颜色生成器,停止词 import numpy as np #科学计算 from PIL import Image #处理图片 from Ini_set import * data = DATA word_counts = data.loc[:,'Reporter'].value_counts() backgroud = np.array(Image.open(r'C:UserselxxlngdocumentsALINGduola.jpg')) #定义词频背景 background_image = np.array(Image.open(r'C:UserselxxlngdocumentsALINGduola.jpg')) font_path=r'C:\Windows\fonts\BASKVILL.TTF' wd = WordCloud( font_path=font_path, #设置字体格式,不然会乱码 background_color="white", #设置背景颜色 mask=background_image #设置背景图 ).generate_from_frequencies(word_counts) #保存词云图 wd.to_file('test.png') #显示词云图 plt.imshow(wd,interpolation="bilinear") plt.axis("off") plt.show()
文件8 ticket_function.py
''' data:2021-08-21 Author: Ling Li Description:查看每个人在每个function下的ticket数量 ''' import pandas as pd import math import matplotlib.pyplot as plt from Ini_set import * #time_conver:导入的日期格式为字符串形式,转换成日期的形式 def time_conver(data): data['Resolved'] = pd.to_datetime(data['Resolved']) data['Created'] = pd.to_datetime(data['Created']) data['Resolved'] = data['Resolved'].dt.strftime('%Y%m') data['Created'] = data['Created'].dt.strftime('%Y%m') return data ##定义函数来显示柱状上的数值 def autolabel(rects): for rect in rects: height = rect.get_height() plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height)) def coord_func_num(name_support,Function): name_support_data = name_support.loc[:,'Functional Area'].value_counts() for idx in (Function - set(name_support_data.index)): name_support_data[idx] = 0 name_support_data = name_support_data.sort_index(ascending=True) columns = name_support_data.index.tolist() name = name_support_data.tolist() return columns,name def plot_person_function(data): Function = data.loc[:,'Functional Area'].value_counts() # tick_lable axis_x = Function.index.tolist() Function = set(Function.index) user = set(data.Assignee) total_width, n = 1, len(user) width = total_width / n x = list(range(len(Function))) # print('Initial axis: ',x) # 增加两个Function之间的间隙 x = [i*2 for i in x] plt.title("Resolved tickets by everybody in Function") count = 0 # 保证颜色唯一 colorlist = list() for u in user: count = count + 1 name_support = data.loc[data['Assignee'] == u] x1,y = coord_func_num(name_support,Function) temp = randomcolor() while temp not in colorlist: colorlist.append(temp) else: temp = randomcolor() # 横坐标位置居中 if count == math.ceil( len(user)/2 ): p = plt.bar(x,y,width=width,tick_label = axis_x, label=u,color=temp) else: p = plt.bar(x,y,width=width, label=u,color=temp) x = [i + width for i in x] # print('Distribution arrangement x-axis coordinate:', x) plt.legend(fontsize=14) autolabel(p) #网格 plt.grid() # plt.set_xticks(x +width/2)#将坐标设置在指定位置 # 设置刻度字体大小 plt.xticks(fontsize=11) plt.yticks(fontsize=12) # 设置坐标标签字体大小 plt.ylabel('Ticket Number',fontsize=15) plt.xlabel('Function Area',fontsize=15) plt.show() if __name__=="__main__": data = DATA data = time_conver(data) plot_person_function(data)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)