CSV切割demo:实现了按行数切割以及按文件书切割
#!/usr/bin/env python# -*- Coding: utf-8 -*-# @Time : 2021/4/2 12:17 下午# @Author : Xinlong Chen# @file : test.pyimport mathimport osclass CsvSplit: def __mkSubfile(self, lines, dir, head, srcname, sub): [des_filename, extname] = os.path.splitext(srcname) filename = dir + os.sep + des_filename + '_' + str(sub) + extname print('make file: %s' % filename) fout = open(filename, 'w') try: fout.writelines([head]) fout.writelines(lines) return sub + 1 finally: fout.close() def __readFromCsv(self, filename): if os.path.exists(filename): with open(filename, 'r') as file: head = file.readline() lines = file.readlines() return head, lines, 0 else: print("error filename") return [], [], -1 def splitBylineCount(self, filename, dir, count: int, lines=[], head=""): if len(lines) == 0: head, lines, status = self.__readFromCsv(filename) if status != 0: return if not os.path.exists(dir): os.makedirs(dir) buf = [] sub = 1 for line in lines: buf.append(line) if len(buf) == count: sub = self.__mkSubfile(buf, dir, head, filename, sub) buf = [] # judge end if len(buf) != 0: self.__mkSubfile(buf, dir, head, filename, sub) def splitByfileCount(self, filename, dir, filecount=10): head, lines, status = self.__readFromCsv(filename) if status != 0: return all_length = len(lines) each_file = math.ceil(all_length / filecount) self.splitBylineCount(filename, dir, each_file, lines, head)if __name__ == '__main__': CsvSplit().splitByfileCount('weibo.csv', dir='weibo', filecount=10) CsvSplit().splitBylineCount('weibo.csv', dir='weibo1', count=500000)
总结 以上是内存溢出为你收集整理的大文件切割Demo Python全部内容,希望文章能够帮你解决大文件切割Demo Python所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)