java *** 作word_软件运维

最近在做项目的时候需要这么一个功能客户有一大堆word格式的模板需要我们用程序向模板里面填充一些数据如果是直接重新写一个Word 用POI或Itext都可以搞定关键是读取并解析而且Word里有表格图片等其他东西这两个框架要解析就比较麻烦然而用jacob却可以轻松搞定

下面是借鉴了别人已经包装好了的代码

import jacob activeX ActiveXComponent

import Dispatch

import Variant

public class WordHandle{

//运行时的Word程序

private ActiveXComponent app

//word对象

private Dispatch words

//当前的word文档

private Dispatch doc

//当前光标位置

private Dispatch cursor

//当前文档是否只读

private boolean readOnly

//当前文档中所有表格

private Dispatch tables

//当前所在表格

private Dispatch table

private int count

public WordHandle()

{

this app = new ActiveXComponent( Word Application )

this app setProperty( Visible new Variant(false)) // 设置word不可见

words = this app getProperty( Documents ) toDispatch()

this doc = null

this cursor = null

this readOnly = true

unt =

}

public boolean open(String fileName boolean readOnly) throws Exception

{

if (doc != null)

{

System out println( 当前文件未关闭 )

return false

}

this doc = Dispatch invoke(this words Open Dispatch Method new Object[] {fileName new Variant(false) new Variant(readOnly)} new int[ ]) toDispatch()

this cursor = app getProperty( Selection ) toDispatch()

this tables = Dispatch get(this doc Tables ) toDispatch()

this readOnly = readOnly

unt = Dispatch get(Dispatch get(this doc Words ) toDispatch() Count ) getInt()

System out println( 打开文件 + fileName + (readOnly ? ReadOnly : Writable ))

return true

}

public boolean newFile() throws Exception

{

if (doc != null)

{

System out println( 当前文件未关闭 )

return false

}

this doc = Dispatch call(this words Add ) toDispatch()

this readOnly = false

this cursor = app getProperty( Selection ) toDispatch()

this tables = Dispatch get(this doc Tables ) toDispatch()

System out println( 新建word文档 )

return true

}

public boolean close()

{

String fileName = null

if (this doc != null)

{

try

{

fileName = Dispatch get(this doc Name ) getString()

Dispatch call(this doc Close new Variant(false))

}

catch (Exception e)

{

e printStackTrace()

}

finally

{

this doc = null

}

System out println( 关闭文件 + fileName)

return true

}

public boolean quit()

{

try

{

this app invoke( Quit new Variant[] {})

}

catch (Exception e)

{

e printStackTrace()

}

System out println( 退出word )

return true

}

public boolean saveAs(String fileName) throws Exception

{

if (this doc == null)

{

System out println( 当前无文件 )

return false

}

else

{

Dispatch call(this doc SaveAs fileName)

System out println( 另存为 + fileName)

return true

}

public boolean save() throws Exception

{

if (this doc == null)

{

System out println( 当前无文档无法保存 )

return false

}

else

{

if (this readOnly)

{

System out println( 只读文档保存失败 )

return false

}

Dispatch call(this doc Save )

System out println( 保存完成 )

return true

}

public boolean moveRight(int steps) throws Exception

{

//int start = Dispatch get(this cursor Start ) getInt()

//Dispatch put(this cursor Start start + steps)

for (int i= i<stepsi++)

{

Dispatch call(cursor MoveRight )

}

return true

}

public boolean moveLeft(int steps) throws Exception

{

for (int i= i<stepsi++)

{

Dispatch call(cursor MoveLeft )

}

return true

}

public int search(String str) throws Exception

{

// 从cursor所在位置开始查询

Dispatch find = Dispatch call(this cursor Find ) toDispatch()

// 设置要查找的内容

Dispatch put(find Text str)

// 向前查找

Dispatch put(find Forward True )

// 设置格式

Dispatch put(find Format True )

// 大小写匹配

Dispatch put(find MatchCase True )

// 全字匹配

Dispatch put(find MatchWholeWord True )

// 查找

if (!Dispatch call(find Execute ) getBoolean())

return

else

{

return Dispatch get(this cursor Start ) getInt()

}

public int searchOnly(String str) throws Exception

{

// 从cursor所在位置开始查询

Dispatch find = Dispatch call(this cursor Find ) toDispatch()

// 设置要查找的内容

Dispatch put(find Text str)

// 向前查找

Dispatch put(find Forward True )

// 大小写匹配

Dispatch put(find MatchCase True )

// 全字匹配

Dispatch put(find MatchWholeWord True )

if (!Dispatch call(find Execute ) getBoolean())

return

else

{

int start = Dispatch get(this cursor Start ) getInt()

Dispatch put(this cursor End unt)

//System out println(start)

return start

}

public String getBeeen(int start int end) throws Exception

{

Dispatch range = Dispatch get(this cursor Range ) toDispatch()

Dispatch call(range SetRange start end)

return Dispatch get(range Text ) getString()

}

public String getLineAfter(int start) throws Exception

{

Dispatch put(this cursor Start start)

int length = Dispatch call(this cursor EndKey ) getInt() + start

return getBeeen(start length)

}

public String getLine(int position) throws Exception

{

Dispatch put(this cursor Start position)

Dispatch call(this cursor SelectRow )

int start = Dispatch get(this cursor Start ) getInt()

int end = Dispatch get(this cursor End ) getInt()

return getBeeen(start start + end)

}

public boolean gotoPage(int index) throws Exception

{

Dispatch invoke(this cursor Goto Dispatch Method new Object[] { String valueOf(index)} new int[ ])

//Dispatch call(this cursor GoTo wdGoToLine wdGoToNext String valueOf(index) null)

return true

}

public int getCurrentCursor() throws Exception

{

return Dispatch get(this cursor Start ) getInt()

}

public boolean setCursorMode() throws Exception

{

Dispatch put(this cursor End Dispatch get(this cursor Start ) getInt())

return true

}

public boolean gotoHome() throws Exception

{

Dispatch put(this cursor Start )

return true

}

public boolean insert(int steps String str) throws Exception

{

int start = Dispatch get(this cursor Start ) getInt() + steps

Dispatch put(this cursor Start start)

Dispatch call(this cursor InsertBefore str)

//this getCount()

Dispatch put(this cursor Start start + str length())

//System out println(Dispatch get(this cursor Start ) getInt() + + (Dispatch get(this cursor Start ) getInt()+Dispatch get(this cursor End ) getInt()))

return true

}

public boolean replace(String str) throws Exception

{

Dispatch put(this cursor Text str)

return true

}

public int getTableNum() throws Exception

{

return Dispatch get(this tables Count ) getInt()

}

public boolean setCurrentTable(int index) throws Exception

{

this table = Dispatch call(this tables Item new Variant(index)) toDispatch()

Dispatch call(this table Select )

return true

}

public String getCell(int row int col) throws Exception

{

Dispatch cell = Dispatch call(table Cell Integer toString(row) Integer toString(col)) toDispatch()

Dispatch call(cell Select )

String tmp = Dispatch get(this cursor Text ) getString()

//System out println( + tmp)

if (tmp length() >)

{

return tmp substring( tmp length() )

}

else

return

}

public boolean replaceCell(int row int col String str) throws Exception

{

Dispatch cell = Dispatch call(table Cell Integer toString(row) Integer toString(col)) toDispatch()

Dispatch call(cell Select )

Dispatch put(this cursor Text str)

return true

}

public static void main(String args[])

{

WordHandle word = new WordHandle()

try

{

word open( D://doc//tax//开业登记合并事项实地调查表 doc false)

System out println(word getTableNum())

word setCurrentTable( )

word replaceCell( old Name )

word replaceCell( 经营范围 )

word replaceCell( )

word saveAs( D://开业登记合并事项实地调查表 doc )

word close()

word quit()

}

catch (Exception e)

{

}

当然要运行上面的代码需要下载jacob 下载地址为 project

lishixinzhi/Article/program/Java/hx/201311/26729

将Word转Html的原理是这样的：

1、客户上传Word文档到服务器

2、服务器调用OpenOffice程序打开上传的Word文档

3、OpenOffice将Word文档另存为Html格式

4、Over

至此可见，这要求服务器端安装OpenOffice软件，其实也可以是MS Office，不过OpenOffice的优势是跨平台，你懂的。恩，说明一下，本文的测试基于 MS Win7 Ultimate X64 系统。

下面就是规规矩矩的实现。

1、下载OpenOffice，

2、下载Jodconverter 这是一个开启OpenOffice进行格式转化的第三方jar包。

3、泡杯热茶，等待下载。

4、安装OpenOffice，安装结束后，调用cmd，启动OpenOffice的一项服务：C:\Program Files (x86)\OpenOffice.org 3\program>soffice -headless -accept="socket,port=8100urp"

5、打开eclipse

6、喝杯热茶，等待eclipse打开。

7、新建eclipse项目，导入Jodconverter/lib 下得jar包。

* commons-io

* jodconverter

* juh

* jurt

* ridl

* slf4j-api

* slf4j-jdk14

* unoil

* xstream

8、Coding...

查看代码

package com.mzule.doc2html.util

import java.io.BufferedReader

import java.io.File

import java.io.FileInputStream

import java.io.FileNotFoundException

import java.io.IOException

import java.io.InputStreamReader

import java.net.ConnectException

import java.util.Date

import java.util.regex.Matcher

import java.util.regex.Pattern

import com.artofsolving.jodconverter.DocumentConverter

import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection

import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection

import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter

/**

* 将Word文档转换成html字符串的工具类

* @author MZULE

public class Doc2Html {

public static void main(String[] args) {

System.out

.println(toHtmlString(new File("C:/test/test.doc"), "C:/test"))

}

/**

* 将word文档转换成html文档

* @param docFile

*需要转换的word文档

* @param filepath

*转换之后html的存放路径

* @return 转换之后的html文件

public static File convert(File docFile, String filepath) {

// 创建保存html的文件

File htmlFile = new File(filepath + "/" + new Date().getTime()

+ ".html")

// 创建Openoffice连接

OpenOfficeConnection con = new SocketOpenOfficeConnection(8100)

try {

// 连接

con.connect()

} catch (ConnectException e) {

System.out.println("获取OpenOffice连接失败...")

e.printStackTrace()

}

// 创建转换器

DocumentConverter converter = new OpenOfficeDocumentConverter(con)

// 转换文档问html

converter.convert(docFile, htmlFile)

// 关闭openoffice连接

con.disconnect()

return htmlFile

}

/**

* 将word转换成html文件，并且获取html文件代码。

* @param docFile

*需要转换的文档

* @param filepath

*文档中图片的保存位置

* @return 转换成功的html代码

public static String toHtmlString(File docFile, String filepath) {

// 转换word文档

File htmlFile = convert(docFile, filepath)

// 获取html文件流

StringBuffer htmlSb = new StringBuffer()

try {

BufferedReader br = new BufferedReader(new InputStreamReader(

new FileInputStream(htmlFile)))

while (br.ready()) {

htmlSb.append(br.readLine())

}

br.close()

// 删除临时文件

htmlFile.delete()

} catch (FileNotFoundException e) {

e.printStackTrace()

} catch (IOException e) {

e.printStackTrace()

}

// HTML文件字符串

String htmlStr = htmlSb.toString()

// 返回经过清洁的html文本

return clearFormat(htmlStr, filepath)

}

/**

* 清除一些不需要的html标记

* @param htmlStr

*带有复杂html标记的html语句

* @return 去除了不需要html标记的语句

protected static String clearFormat(String htmlStr, String docImgPath) {

// 获取body内容的正则

String bodyReg = "<BODY .*</BODY>"

Pattern bodyPattern = Pattern.compile(bodyReg)

Matcher bodyMatcher = bodyPattern.matcher(htmlStr)

if (bodyMatcher.find()) {

// 获取BODY内容，并转化BODY标签为DIV

htmlStr = bodyMatcher.group().replaceFirst("<BODY", "<DIV")

.replaceAll("</BODY>", "</DIV>")

}

// 调整图片地址

htmlStr = htmlStr.replaceAll("<IMG SRC=\"", "<IMG SRC=\"" + docImgPath

+ "/")

// 把<P></P>转换成</div></div>保留样式

// content = content.replaceAll("(<P)([^>]*>.*?)(<\\/P>)",

// "<div$2</div>")

// 把<P></P>转换成</div></div>并删除样式

htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\\/P>)", "<p$3</p>")

// 删除不需要的标签

htmlStr = htmlStr

.replaceAll(

"<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\\w+)[^>]*?>",

"")

// 删除不需要的属性

htmlStr = htmlStr

.replaceAll(

"<$1$2>")

return htmlStr

}

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/yw/8135669.html

java *** 作word

发表评论

评论列表（0条）