Python合并或拆分Excel文件——基于pandas实现_python

应用场景1：某个文件夹中有多个Excel文件，现在需要将其合成一个Excel文件，一个工作簿中包含多个表单页

应用场景2：某Excel文件包含多个表单页，现在需要将其分解成多个独立的Excel文件，每个工作簿中包含一个表单页

pandas具有多种文件类型的输入输出功能，话不多说，直接上代码：

#coding: utf-8
import os
import pandas as pd

#使用pandas库将指定路径文件夹中的所有Excel(*.xlsx和*.xls)文件合并为一个文件
def merge_ExcelFiles(dirPath, outputFileName='合并结果.xlsx',includeFilename=False):
    dirPath+='\\'
    file_list=os.listdir(dirPath)
    wbo=pd.ExcelWriter(dirPath+outputFileName, engine='xlsxwriter')
    sheetlist=[]#利用列表来收集已有的sheet名字
    for file in file_list:
        #pandas同时支持.xls和.xlsx文件
        if file.endswith('.xls') or file.endswith('.xlsx'):#参数sheet_name默认为0，此时函数返回的直接就是DataFrame类型，但是那样的话只能读取第一个sheet，设置为None可以读取所有的sheet，返回数据则是字典类型
            print(file)
            df=pd.read_excel(dirPath+file, sheet_name=None, dtype='object')#参数dtype默认是None，但为了保护数据（例如身份z号被科学计数法丢失后面几位数字），需要使用object类型
            for key, value in df.items():#
                if includeFilename:
                    sheetname=os.path.splitext(file)[0]+'-'+key #使用"文件名+sheet_name",仍有重名风险，例如xxx.xlsx与xxx.xls中相同的sheet_name
                else:
                    sheetname=key #直接使用原来的sheet_name,有重名的风险
                namelen=len(sheetname)
                #在Excel中的sheet_name最长不能超过31个字符(must be <= 31 chars),否则会出错
                if namelen>=28:#考虑冗余，如果重名了后面还要加数字
                    sheetname=r'...'+sheetname[namelen-25:] #截取后面的25位
                #当sheet_name重名时保存会丢失数据，需要对重复sheet_name进行自增处理，最多100个重复sheet，不然sheet_name超过31个字符会出错
                sheetlist.append(sheetname)
                if sheetlist.count(sheetname)>1:#说明存在重复的sheet_name
                    sheetname=sheetname+'-'+str(sheetlist.count(sheetname))#利用列表中元素重复的数目来对同名sheetname进行重命名
                pd.DataFrame(value).to_excel(wbo, sheet_name=sheetname, index=False, header=True)
                print(sheetname)
    print('OK!')
    wbo.save()    

#使用pandas库将指定路径Excel(*.xlsx和*.xls)文件中的所有sheet页文件分为独立的Excel文件
def split_ExcelFiles(filePath):
    df=pd.read_excel(filePath, sheet_name=None, dtype='object')#参数dtype默认是None，但为了保护数据（例如身份z号被科学计数法丢失后面几位数字），需要使用object类型
    for key, value in df.items():#
        filename=os.path.splitext(filePath)[0]+'-'+key+os.path.splitext(filePath)[1] #使用"文件名+sheet_name"作为各独立Excel文件的文件名
        pd.DataFrame(value).to_excel(filename, sheet_name=key, index=False, header=True)
        print(filename)
    print('OK!')
    
    
if __name__=="__main__":
    dirPath=r'D:\课程\学生基础信息'
    filePath=r'D:\课程\学生基础信息\成绩汇总.xlsx'
    import time
    time0=time.time()
    merge_ExcelFiles(dirPath,'合并New.xlsx')
    # split_ExcelFiles(filePath)
    time1=time.time()
    print("Spending Time：{:.3f}s".format(time1-time0))

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/langs/876289.html

Python合并或拆分Excel文件——基于pandas实现

发表评论

评论列表（0条）