java *** 作word

java *** 作word,第1张

最近在做项目的时候需要这么一个功能 客户有一大堆word格式的模板 需要我们用程序向模板里面填充一些数据 如果是直接重新写一个Word 用POI或Itext都可以搞定 关键是读取并解析 而且Word里有表格 图片等其他东西 这两个框架要解析就比较麻烦 然而用jacob却可以轻松搞定

下面是借鉴了别人已经包装好了的代码

import jacob activeX ActiveXComponent

import Dispatch

import Variant

public class WordHandle{

//运行时的Word程序

private ActiveXComponent app

//word对象

private Dispatch words

//当前的word文档

private Dispatch doc

//当前光标位置

private Dispatch cursor

//当前文档是否只读

private boolean readOnly

//当前文档中所有表格

private Dispatch tables

//当前所在表格

private Dispatch table

private int count

public WordHandle()

{

this app = new ActiveXComponent( Word Application )

this app setProperty( Visible new Variant(false))    // 设置word不可见

words = this app getProperty( Documents ) toDispatch()

this doc = null

this cursor = null

this readOnly = true

unt =

}

public boolean open(String fileName boolean readOnly) throws Exception

{

if (doc != null)

{

System out println( 当前文件未关闭 )

return false

}

this doc = Dispatch invoke(this words Open Dispatch Method new Object[] {fileName new Variant(false) new Variant(readOnly)} new int[ ]) toDispatch()

this cursor = app getProperty( Selection ) toDispatch()

this tables = Dispatch get(this doc Tables ) toDispatch()

this readOnly = readOnly

unt = Dispatch get(Dispatch get(this doc Words ) toDispatch() Count ) getInt()

System out println( 打开文件 + fileName + (readOnly ? ReadOnly : Writable ))

return true

}

public boolean newFile() throws Exception

{

if (doc != null)

{

System out println( 当前文件未关闭 )

return false

}

this doc = Dispatch call(this words Add ) toDispatch()

this readOnly = false

this cursor = app getProperty( Selection ) toDispatch()

this tables = Dispatch get(this doc Tables ) toDispatch()

System out println( 新建word文档 )

return true

}

public boolean close()

{

String fileName = null

if (this doc != null)

{

try

{

fileName = Dispatch get(this doc Name ) getString()

Dispatch call(this doc Close new Variant(false))

}

catch (Exception e)

{

e printStackTrace()

}

finally

{

this doc = null

}

}

System out println( 关闭文件 + fileName)

return true

}

public boolean quit()

{

try

{

this app invoke( Quit new Variant[] {})

}

catch (Exception e)

{

e printStackTrace()

}

System out println( 退出word )

return true

}

public boolean saveAs(String fileName) throws Exception

{

if (this doc == null)

{

System out println( 当前无文件 )

return false

}

else

{

Dispatch call(this doc SaveAs fileName)

System out println( 另存为 + fileName)

return true

}

}

public boolean save() throws Exception

{

if (this doc == null)

{

System out println( 当前无文档 无法保存 )

return false

}

else

{

if (this readOnly)

{

System out println( 只读文档 保存失败 )

return false

}

Dispatch call(this doc Save )

System out println( 保存完成 )

return true

}

}

public boolean moveRight(int steps) throws Exception

{

//int start = Dispatch get(this cursor Start ) getInt()

//Dispatch put(this cursor Start start + steps)

for (int i= i<stepsi++)

{

Dispatch call(cursor MoveRight )

}

return true

}

public boolean moveLeft(int steps) throws Exception

{

for (int i= i<stepsi++)

{

Dispatch call(cursor MoveLeft )

}

return true

}

public int search(String str) throws Exception

{

// 从cursor所在位置开始查询

Dispatch find = Dispatch call(this cursor Find ) toDispatch()

// 设置要查找的内容

Dispatch put(find Text str)

// 向前查找

Dispatch put(find Forward True )

// 设置格式

Dispatch put(find Format True )

// 大小写匹配

Dispatch put(find MatchCase True )

// 全字匹配

Dispatch put(find MatchWholeWord True )

// 查找

if (!Dispatch call(find Execute ) getBoolean())

return

else

{

return Dispatch get(this cursor Start ) getInt()

}

}

public int searchOnly(String str) throws Exception

{

// 从cursor所在位置开始查询

Dispatch find = Dispatch call(this cursor Find ) toDispatch()

// 设置要查找的内容

Dispatch put(find Text str)

// 向前查找

Dispatch put(find Forward True )

// 大小写匹配

Dispatch put(find MatchCase True )

// 全字匹配

Dispatch put(find MatchWholeWord True )

if (!Dispatch call(find Execute ) getBoolean())

return

else

{

int start = Dispatch get(this cursor Start ) getInt()

Dispatch put(this cursor End unt)

//System out println(start)

return start

}

}

public String getBeeen(int start int end) throws Exception

{

Dispatch range = Dispatch get(this cursor Range ) toDispatch()

Dispatch call(range SetRange start end)

return Dispatch get(range Text ) getString()

}

public String getLineAfter(int start) throws Exception

{

Dispatch put(this cursor Start start)

int length = Dispatch call(this cursor EndKey ) getInt() + start

return getBeeen(start length)

}

public String getLine(int position) throws Exception

{

Dispatch put(this cursor Start position)

Dispatch call(this cursor SelectRow )

int start = Dispatch get(this cursor Start ) getInt()

int end = Dispatch get(this cursor End ) getInt()

return getBeeen(start start + end)

}

public boolean gotoPage(int index) throws Exception

{

Dispatch invoke(this cursor Goto Dispatch Method new Object[] { String valueOf(index)} new int[ ])

//Dispatch call(this cursor GoTo wdGoToLine wdGoToNext String valueOf(index) null)

return true

}

public int getCurrentCursor() throws Exception

{

return Dispatch get(this cursor Start ) getInt()

}

public boolean setCursorMode() throws Exception

{

Dispatch put(this cursor End Dispatch get(this cursor Start ) getInt())

return true

}

public boolean gotoHome() throws Exception

{

Dispatch put(this cursor Start )

return true

}

public boolean insert(int steps String str) throws Exception

{

int start = Dispatch get(this cursor Start ) getInt() + steps

Dispatch put(this cursor Start start)

Dispatch call(this cursor InsertBefore str)

//this getCount()

Dispatch put(this cursor Start start + str length())

//System out println(Dispatch get(this cursor Start ) getInt() +    + (Dispatch get(this cursor Start ) getInt()+Dispatch get(this cursor End ) getInt()))

return true

}

public boolean replace(String str) throws Exception

{

Dispatch put(this cursor Text str)

return true

}

public int getTableNum() throws Exception

{

return Dispatch get(this tables Count ) getInt()

}

public boolean setCurrentTable(int index) throws Exception

{

this table = Dispatch call(this tables Item new Variant(index)) toDispatch()

Dispatch call(this table Select )

return true

}

public String getCell(int row int col) throws Exception

{

Dispatch cell = Dispatch call(table Cell Integer toString(row) Integer toString(col)) toDispatch()

Dispatch call(cell Select )

String tmp = Dispatch get(this cursor Text ) getString()

//System out println( + tmp)

if (tmp length() >)

{

return tmp substring( tmp length() )

}

else

return

}

public boolean replaceCell(int row int col String str) throws Exception

{

Dispatch cell = Dispatch call(table Cell Integer toString(row) Integer toString(col)) toDispatch()

Dispatch call(cell Select )

Dispatch put(this cursor Text str)

return true

}

public static void main(String args[])

{

WordHandle word = new WordHandle()

try

{

word open( D://doc//tax//开业登记合并事项实地调查表 doc false)

System out println(word getTableNum())

word setCurrentTable( )

word replaceCell( old Name )

word replaceCell( 经营范围 )

word replaceCell( )

word replaceCell( )

word replaceCell( )

word saveAs( D://开业登记合并事项实地调查表 doc )

word close()

word quit()

}

catch (Exception e)

{

}

}

}

当然要运行上面的代码需要下载jacob 下载地址为 project

lishixinzhi/Article/program/Java/hx/201311/26729

将Word转Html的原理是这样的:

1、客户上传Word文档到服务器

2、服务器调用OpenOffice程序打开上传的Word文档

3、OpenOffice将Word文档另存为Html格式

4、Over

至此可见,这要求服务器端安装OpenOffice软件,其实也可以是MS Office,不过OpenOffice的优势是跨平台,你懂的。恩,说明一下,本文的测试基于 MS Win7 Ultimate X64 系统。

下面就是规规矩矩的实现。

1、下载OpenOffice,

2、下载Jodconverter 这是一个开启OpenOffice进行格式转化的第三方jar包。

3、泡杯热茶,等待下载。

4、安装OpenOffice,安装结束后,调用cmd,启动OpenOffice的一项服务:C:\Program Files (x86)\OpenOffice.org 3\program>soffice -headless -accept="socket,port=8100urp"

5、打开eclipse

6、喝杯热茶,等待eclipse打开。

7、新建eclipse项目,导入Jodconverter/lib 下得jar包。

* commons-io

* jodconverter

* juh

* jurt

* ridl

* slf4j-api

* slf4j-jdk14

* unoil

* xstream

8、Coding...

查看代码

package com.mzule.doc2html.util

import java.io.BufferedReader

import java.io.File

import java.io.FileInputStream

import java.io.FileNotFoundException

import java.io.IOException

import java.io.InputStreamReader

import java.net.ConnectException

import java.util.Date

import java.util.regex.Matcher

import java.util.regex.Pattern

import com.artofsolving.jodconverter.DocumentConverter

import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection

import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection

import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter

/**

* 将Word文档转换成html字符串的工具类

*

* @author MZULE

*

*/

public class Doc2Html {

public static void main(String[] args) {

System.out

.println(toHtmlString(new File("C:/test/test.doc"), "C:/test"))

}

/**

* 将word文档转换成html文档

*

* @param docFile

*需要转换的word文档

* @param filepath

*转换之后html的存放路径

* @return 转换之后的html文件

*/

public static File convert(File docFile, String filepath) {

// 创建保存html的文件

File htmlFile = new File(filepath + "/" + new Date().getTime()

+ ".html")

// 创建Openoffice连接

OpenOfficeConnection con = new SocketOpenOfficeConnection(8100)

try {

// 连接

con.connect()

} catch (ConnectException e) {

System.out.println("获取OpenOffice连接失败...")

e.printStackTrace()

}

// 创建转换器

DocumentConverter converter = new OpenOfficeDocumentConverter(con)

// 转换文档问html

converter.convert(docFile, htmlFile)

// 关闭openoffice连接

con.disconnect()

return htmlFile

}

/**

* 将word转换成html文件,并且获取html文件代码。

*

* @param docFile

*需要转换的文档

* @param filepath

*文档中图片的保存位置

* @return 转换成功的html代码

*/

public static String toHtmlString(File docFile, String filepath) {

// 转换word文档

File htmlFile = convert(docFile, filepath)

// 获取html文件流

StringBuffer htmlSb = new StringBuffer()

try {

BufferedReader br = new BufferedReader(new InputStreamReader(

new FileInputStream(htmlFile)))

while (br.ready()) {

htmlSb.append(br.readLine())

}

br.close()

// 删除临时文件

htmlFile.delete()

} catch (FileNotFoundException e) {

e.printStackTrace()

} catch (IOException e) {

e.printStackTrace()

}

// HTML文件字符串

String htmlStr = htmlSb.toString()

// 返回经过清洁的html文本

return clearFormat(htmlStr, filepath)

}

/**

* 清除一些不需要的html标记

*

* @param htmlStr

*带有复杂html标记的html语句

* @return 去除了不需要html标记的语句

*/

protected static String clearFormat(String htmlStr, String docImgPath) {

// 获取body内容的正则

String bodyReg = "<BODY .*</BODY>"

Pattern bodyPattern = Pattern.compile(bodyReg)

Matcher bodyMatcher = bodyPattern.matcher(htmlStr)

if (bodyMatcher.find()) {

// 获取BODY内容,并转化BODY标签为DIV

htmlStr = bodyMatcher.group().replaceFirst("<BODY", "<DIV")

.replaceAll("</BODY>", "</DIV>")

}

// 调整图片地址

htmlStr = htmlStr.replaceAll("<IMG SRC=\"", "<IMG SRC=\"" + docImgPath

+ "/")

// 把<P></P>转换成</div></div>保留样式

// content = content.replaceAll("(<P)([^>]*>.*?)(<\\/P>)",

// "<div$2</div>")

// 把<P></P>转换成</div></div>并删除样式

htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\\/P>)", "<p$3</p>")

// 删除不需要的标签

htmlStr = htmlStr

.replaceAll(

"<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\\w+)[^>]*?>",

"")

// 删除不需要的属性

htmlStr = htmlStr

.replaceAll(

"<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\\w+)=(?:'[^']*'|\"\"[^\"\"]*\"\"|[^>]+)([^>]*)>",

"<$1$2>")

return htmlStr

}

}


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/yw/8135669.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-04-13
下一篇 2023-04-13

发表评论

登录后才能评论

评论列表(0条)

保存