最近需要一个txt文件的批量转码功能,在网上找到一段批量处理java源文件的py程序如下:
#-*- coding: utf-8 -*- import codecs
import os
import shutil
import re
import chardet def convert_encoding(filename, target_encoding):
# Backup the origin file.
shutil.copyfile(filename, filename + '.bak') # convert file from the source encoding to target encoding
content = codecs.open(filename, 'r').read()
source_encoding = chardet.detect(content)['encoding']
print source_encoding, filename
content = content.decode(source_encoding) #.encode(source_encoding)
codecs.open(filename, 'w', encoding=target_encoding).write(content) def main():
for root, dirs, files in os.walk(os.getcwd()):
for f in files:
if f.lower().endswith('.txt'):
filename = os.path.join(root, f)
try:
convert_encoding(filename, 'utf-8')
except Exception, e:
print filename def process_bak_files(action='restore'):
for root, dirs, files in os.walk(os.getcwd()):
for f in files:
if f.lower().endswith('.txt.bak'):
source = os.path.join(root, f)
target = os.path.join(root, re.sub('\.txt\.bak$', '.txt', f, flags=re.IGNORECASE))
try:
if action == 'restore':
shutil.move(source, target)
elif action == 'clear':
os.remove(source)
except Exception, e:
print source if __name__ == '__main__':
# process_bak_files(action='clear')
main()
其中import了python的chardet模块,此模块需要单独安装,到http://pypi.python.org/pypi/chardet#downloads下载chardet-2.1.1.tar.gz,
解压后其中有setup.py文件,用于模块的安装,执行命令:python setup.py install ,报错:ImportError: No module named setuptools ,
需要安装setup tools,网上下载ez_setup.py,cmd执行之。
重跑一遍setup.py,chardet安装成功!
另:ubuntu安装命令: # apt-get install python-setuptools
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)