引用spire.doc.dll
using Spire.Docusing Spire.Doc.Documents
using System.IO
namespace ReadText
{
class Program
{
static void Main(string[] args)
{
//创建Document对象
Document doc = new Document()
//加载Word文档
doc.LoadFromFile("input.docx")
//创建StringBuilder对象
StringBuilder sb = new StringBuilder()
//遍历Word文档中每一个section
foreach (Section section in doc.Sections)
{
//遍历section中每一个段落
foreach (Paragraph paragraph in section.Paragraphs)
{
//提取Word的每一行文字存入StringBuilder对象
sb.AppendLine(paragraph.Text)
}
}
//写入txt文档
File.WriteAllText("output.txt", sb.ToString())
}
}
}
提取word文档中图片,保存到本地:
using Spire.Docusing Spire.Doc.Fields
using Spire.Doc.Documents
namespace ReadImage
{
class Program
{
static void Main(string[] args)
{
//创建Document对象并加载Word文档
Document doc = new Document()
doc.LoadFromFile(@"Image.doc")
int index = 0
//遍历Word文档中每一个section
foreach (Section section in doc.Sections)
{
//遍历section中的每个段落
foreach (Paragraph paragraph in section.Paragraphs)
闷信 {
//遍历段落中的每个DocumentObject
foreach (DocumentObject docObject in paragraph.ChildObjects)
{
族脊 //判断DocumentObject是否为图片
if (docObject.DocumentObjectType == DocumentObjectType.Picture)
{
//保存图片到指定路径并设置图片格式
DocPicture picture = docObject as DocPicture
String imageName = String.Format(@"images\Image-{0}.png", index)
picture.Image.Save(imageName, System.Drawing.Imaging.ImageFormat.Png)
index++
}
}
}
}
}
}
}
你是指在扩展名是docx的Word文件提取xml文件?可以用WinRAR打开docx文件,然后在里面的Word文件蚂备夹中找到document.xlm文件弊慧解压闷卜毁出来。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)