#文件名:
#BatchConverWords2Html.py
#说明:
#批量将一个文件夹下的所有.doc/.docx文件转为.html文件,需要安装对应的win32模块
#调用方式:进入源程序目录,命令:python BatchConverWords2Html.py RootDir
from win32com import client as wc
import os
word = wc.Dispatch('Word.Application')
def wordsToHtml(dir):
for path, subdirs, files in os.walk(dir):
for wordFile in files:
wordFullName = os.path.join(path, wordFile)
#print "word:" + wordFullName
doc = word.Documents.Open(wordFullName)
wordFile2 = unicode(wordFile, "gbk")
dotIndex = wordFile2.rfind(".")
if(dotIndex == -1):
print "********************ERROR: 未取得后缀名!"
fileSuffix = wordFile2[(dotIndex + 1) : ]
if(fileSuffix == "doc" or fileSuffix == "docx"):
fileName = wordFile2[ : dotIndex]
htmlName = fileName + ".html"
htmlFullName = os.path.join(unicode(path, "gbk"), htmlName)
#htmlFullName = unicode(path, "gbk") + "\\" + htmlName
print "generate html:" + htmlFullName
doc.SaveAs(htmlFullName, 10)
doc.Close()
word.Quit()
print ""
print "Finished!"
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print "Usage: python funcName.py rootdir"
sys.exit(100)
wordsToHtml(sys.argv[1])运行结果就是在rootdir目录下的所有word文档转为简洁版的html网页文件,生成的文件存在原word同目录下,生成 xxx.files 文件夹。
pip install pydocx
from pydocx import PyDocX
html = PyDocX.to_html("test.docx")
f = open("test.html", 'w', encoding="utf-8")
f.write(html)
f.close()
通过网页上传word文档,只接收docx
<form method="post" enctype="multipart/form-data">
<input type="file" name="file" accept="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
</form>
windows下,将doc转为docx
pip3 install pypiwin32
from win32com import client
word = client.Dispatch("Word.Application")
doc = word.Documents.Open("D:\ \ .doc") //绝对路径 doc文件
doc.SaveAs("D:\ \ .docx",16) //保存的docx 文件,绝对路径
doc.Close()
word.Quit()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)