题记:自己定义excel分表里的数量,大表分解成若干小表。例如:
大表数据量 | 小表数据量 | 小表个数 |
---|---|---|
100 条 | 30条 | 4张 |
500条 | 50条 | 10张 |
1650条 | 1000条 | 2张 |
import pandas as pd
from time import *
begin_time = time() #程序运行时间
dirPath = r'C:\Users\user\Desktop20-04-30 epc生成=820909030065\'
dirPath2 r'C:\Users\user\Desktop20-04-30 epc生成=820909030065\表格拆分\' +
path '00820909030065-1-392500.xlsx' dirPath #excel的路径 = .
orgName ( pd)read_excel=pathlist
SerialNumber_list ( ['SerialNumber'orgName].()drop_duplicates)=list
BarCode_list ( ['BarCode'orgName].()drop_duplicates)=[
BarCode 0 BarCode_list]print(
)printSerialNumber_list(
[0SerialNumber_list])=.
excel_alldata_num ( SerialNumber_list)__len__#excel的总数据量= 2000
excel_data_num #拆分每个excel里的数据量 = //
excel_num if excel_alldata_num % excel_data_num
!= excel_alldata_num 0 excel_data_num : +=1
excel_num print (
'表格总数量:'+str())excel_num#pd.read_excel(path, skiprows = 0, nrows=80)表示不跳过任何行,取前80行的数据;#pd.read_excel(path, skiprows = 80, nrows=20)表示跳过前80行,取剩下20行的数据。
#pd.read_excel(path, skiprows = 0, nrows=100)表示不跳过,读取全部数据。
def
SequenceNums
( ,)sta:end=[
numbers ] forin
range i ( ,+sta1end):.(
numbers)appendreturnifor
in numbers
range i ( ):excel_num#for j in range(excel_data_num*i,excel_data_num*(i+1)-1):print
(
)ifi==
0 i : =.
tempdata ( pd,read_excel=path0 skiprows,=) nrows=excel_data_num.
writer ( pd+ExcelWriterstrdirPath2 ( )+BarCode'-' + str ( [0SerialNumber_list])+'-' + str ( []SerialNumber_list-excel_data_num1)+'.xlsx' ) elif!=
- i 1 excel_num:=.
tempdata ( pd,read_excel=path(skiprows1SequenceNums,*)excel_data_num,i=)nrows=excel_data_num.
writer ( pd+ExcelWriterstrdirPath2 ( )+BarCode'-' + str ( [*SerialNumber_list]excel_data_num)i+'-' + str ( [*SerialNumber_list+excel_data_num-i1excel_data_num])+'.xlsx') else:
=.
tempdata ( pd,read_excel=path(skiprows1SequenceNums,*)excel_data_num,i=- nrows1excel_alldata_num)=.
writer ( pd+ExcelWriterstrdirPath2 ( )+BarCode'-' + str( [*SerialNumber_list]excel_data_num)i+'-' + str ( [-SerialNumber_list1excel_alldata_num])+'.xlsx' ) .(
tempdata,to_excel=writerFalse index).(
writer)save.(
writer)close=(
end_time ) time#程序运行结束时间= -
run_time print end_time ( begin_time
'程序运行总时间:',)# 步骤二:完成表格的拆分run_time
'''
for i in org_list:
if orgName['upc'][i] >5:
writer = pd.ExcelWriter(dirPath + str(i) + '指标体系.xlsx')
tempdata = orgName[orgName['upc'] <5]
tempdata.to_excel(writer, index=False)
writer.save()
writer.close()
'''
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)