JAVA使用spire.doc将富文本和latex公式生成word文档

JAVA使用spire.doc将富文本和latex公式生成word文档,第1张

需求说明:数据库中存放的信息为富文本数据,并且包含了latex公式的代码,现在需要将富文本和latex公式数据输出到word文档中。

生成逻辑:将数据获取后进行遍历,获取每个节点,然后将节点增加到段落中,特别需要区别图片标签,需要获取图片的数据输出到文档中。这里使用了spire.doc包生成文档。

主要类:LatexToMathMLUtils工具生成文档的逻辑,LatexUtils latex公式相关工具

LatexToMathMLUtils.java代码:

package com.xxx.util.pdf;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

import com.itextpdf.io.source.ByteArrayOutputStream;
import com.lowagie.text.Image;
import com.minxue.util.common.PropertisUtil;
import com.minxue.util.common.StringUtil;
import com.spire.doc.Document;
import com.spire.doc.FileFormat;
import com.spire.doc.Section;
import com.spire.doc.documents.HorizontalAlignment;
import com.spire.doc.documents.Paragraph;
import com.spire.doc.documents.ParagraphStyle;
import com.spire.doc.documents.UnderlineStyle;
import com.spire.doc.fields.DocPicture;
import com.spire.doc.fields.TextRange;
import com.spire.doc.fields.omath.OfficeMath;


/**
 * @author Administrator
 * @date 2022年5月10日
 */
public class LatexToMathMLUtils {
    protected static Logger logger = LogManager.getLogger(LatexToMathMLUtils.class);
    
    //将试题富文本信息输出到文档中
    private static Paragraph writeQuestion(Document document, Paragraph paragraph, String question ,String questionId) throws IOException {
        //试题默认添加段落标签
        if(question != null && !question.startsWith("";
        }
        //去除公式中的换行
,保证公式为一个整体 question = LatexUtils.dealLatexTitle(question); org.jsoup.nodes.Document doc = Jsoup.parse(question); // html解析 Element body = doc.body(); Elements elements = body.children(); analyseElements(document, paragraph, elements,questionId); return paragraph; } private static String latexFormat(String latex) { if (latex.contains("leqslant")) { latex = latex.replace("leqslant", "leq"); } if (latex.contains("geqslant")) { latex = latex.replace("geqslant", "geq"); } StringBuilder latexBuilder = new StringBuilder(); boolean isChinese = false; String regexStr = "[\u4E00-\u9FA5]"; for (Character c : latex.toCharArray()) { Matcher chineseMatch = Pattern.compile(regexStr).matcher(c.toString()); if (chineseMatch.find()) { if (isChinese) { latexBuilder.append(c); } else { latexBuilder.append("\\mbox{").append(c); isChinese = true; } continue; } else { if (isChinese) { isChinese = false; latexBuilder.append("}"); } latexBuilder.append(c); } } return latexBuilder.toString(); } //解析富文本数据 private static void analyseElements(Document document, Paragraph paragraph, Elements elements,String questionId) throws IOException { for (Element element : elements) { if (element.children().size() == 0) { if ("p".equalsIgnoreCase(element.tagName())) { latexTextDeal(paragraph, element.text(),questionId); logger.error(element.toString()); if(itemCheck(element.toString())) { } else { paragraph.appendText("\n"); } } else if ("img".equalsIgnoreCase(element.tagName())) { byte[] img = addImg2(document, element); if (null != img) { DocPicture appendPicture = paragraph.appendPicture(img); String width=""; if(element!=null){ width = element.attr("width"); } if (null != width && !"".equals(width)) { if(Float.parseFloat(element.attr("width"))* 0.65f > 460) { appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.5f); appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.5f); } else { appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.65f); appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.65f); } } else { /*appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.5f); appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.5f);*/ Image addImg = addImg(document, element); appendPicture.setWidth(addImg.getWidth() * 0.5f); appendPicture.setHeight(addImg.getHeight() * 0.5f); } } paragraph.appendText("\n"); } else { //判断是否含有Latex公式表达式 latexTextDeal(paragraph, element.text(),questionId); paragraph.appendText("\n"); } } else { analyseChildrenElement(document, paragraph, (Node) element,questionId); paragraph.appendText("\n"); } } } //解析富文本的字节点数据 private static void analyseChildrenElement(Document document, Paragraph paragraph, Node parent,String questionId) throws IOException { List childNodes = parent.childNodes(); for (Node node : childNodes) { if (node.childNodes().size() > 0) { analyseChildrenElement(document, paragraph, node,questionId); //递归到最低一级元素 } else { if ("#text".equalsIgnoreCase(node.nodeName()) && !"sub".equalsIgnoreCase(node.parent().nodeName()) && !"sup".equalsIgnoreCase(node.parent().nodeName())) { //判断是普通文本 String content = node.attr("text"); // paragraph.appendText(content); //判断是否含有Latex公式表达式 latexTextDeal(paragraph, content,questionId); } else if ("img".equalsIgnoreCase(node.nodeName())) { //判断是图片 byte[] img = addImg2(document, node); if (null != img) { DocPicture appendPicture = paragraph.appendPicture(img); String width=""; if(node!=null){ width = node.attr("width"); } if (null != width && !"".equals(width)) { if(Float.parseFloat(node.attr("width"))* 0.65f > 460) { appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.5f); appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.5f); } else { appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.65f); appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.65f); } } else { /*appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.5f); appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.5f);*/ Image addImg = addImg(document, node); appendPicture.setWidth(addImg.getWidth() * 0.5f); appendPicture.setHeight(addImg.getHeight() * 0.5f); } } } else if ("br".equalsIgnoreCase(node.nodeName())) { //判断是换行 if (node.parent().childNodes().size() > 1) //判断是文本中换行,而非单纯换行,避免无用换行过多导致文本过长 paragraph.appendText("\n"); } } } } //获取网络图片的二级制数据 private static byte[] addImg2(Document document, Node element) { byte[] img = null; String src =""; if(element != null && StringUtil.checkNotNull(element.attr("src"))){ String paltForm=PropertisUtil.getInstance().getPlatform(); src = element.attr("src"); if("shaanxi".equals(paltForm)){ src=src.replace("http://res.minxuejiaoyu.cn", "http://192.168.0.252:83"); src=src.replace("http://res2.minxuejiaoyu.cn", "http://192.168.0.252:83"); } logger.debug("element.attr2(src):--"+src); try { if(src != null && !src.equals("")){ src = src.trim(); img = getImageFromNetByUrl(src); // img = wordHttpImage(src); } } catch (Exception e) { logger.error("src:"+src); logger.error(e); return null; } } return img; } /* * 获取网络图片 */ private static Image addImg(Document document, Node element) { Image img = null; String src =""; if(element != null && StringUtil.checkNotNull(element.attr("src"))){ String paltForm=PropertisUtil.getInstance().getPlatform(); src = element.attr("src"); if("shaanxi".equals(paltForm)){ src=src.replace("http://res.minxuejiaoyu.cn", "http://192.168.0.252:83"); src=src.replace("http://res2.minxuejiaoyu.cn", "http://192.168.0.252:83"); } logger.debug("element.attr2(src):--"+src); try { if(src != null && !src.equals("")){ src = src.trim(); img = wordHttpImage(src); } } catch (MalformedURLException e) { logger.error(e.getMessage()); } catch (Exception e) { logger.error("src:"+src); logger.error(e); return null; } } String width=""; //String height=""; if(element!=null){ width = element.attr("width"); } if (null != width && !"".equals(width)) { if(Float.parseFloat(element.attr("width"))* 0.65f > 460) { img.scalePercent(50,50); } else { img.scalePercent(65,65); } } else { img.scalePercent(50,50); } return img; } /** * 文档线上图片获取 * */ public static Image wordHttpImage(String imageUrl) throws Exception{ //声明图片 Image img = Image.getInstance(new URL(imageUrl)); //img.scaleAbsolute(img.getWidth()/3, img.getHeight()/3); img.scalePercent(50,50); img.setAbsolutePosition(0, 0); img.setAlignment(Image.LEFT | Image.TEXTWRAP); return img; } private static void latexTextDeal(Paragraph paragraph,String latexText,String questionId){ String formulaStr = null; try { logger.debug("latexText:"+latexText); //判断是否含有Latex公式表达式 String[] titleSplit = LatexUtils.titleGroup(latexText); if(titleSplit == null){ return; } //按照顺序写入文档 for (int i = 0 ; i < titleSplit.length ; i++ ) { formulaStr = titleSplit[i]; if(formulaStr != null){ formulaStr = formulaStr.trim(); } if(formulaStr != null && !formulaStr.equals("")){ Matcher mather = LatexUtils.compile.matcher(formulaStr);//匹配是否是公式 if (!mather.find()){ paragraph.appendText(titleSplit[i]); }else{//是,接着写入图片 //调用latexImage方法将公式转换成图片,wordLocalImage方法处理图片,图片生成 /*String no = System.currentTimeMillis()+WordUtils.getRandomNickname(10); Image img = LatexUtils.wordLocalImage(LatexUtils.latexImage(formulaStr,"/data/mxjy_pdf/wordtemp/w_"+questionId+"_"+no+".png")); paragraph.add(new Phrase(new Chunk(img, 0, 0, true)));*/ OfficeMath math = new OfficeMath(paragraph.getDocument()); paragraph.getItems().add(math); //logger.error(titleSplit[i]); String subSequence = titleSplit[i].subSequence(2, titleSplit[i].length()-2).toString(); //logger.error(subSequence); math.fromLatexMathCode(latexFormat(subSequence)); } } } } catch (Exception e) { logger.error(e.getMessage()); } } //图片数据请求 public static byte[] getImageFromNetByUrl(String strUrl){ try { URL url = new URL(strUrl); HttpURLConnection conn = (HttpURLConnection)url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(3 * 1000); InputStream inStream = conn.getInputStream();//通过输入流获取图片数据 byte[] btImg = readInputStream(inStream);//得到图片的二进制数据 return btImg; } catch (Exception e) { e.printStackTrace(); } return null; } public static byte[] readInputStream(InputStream inStream) throws Exception{ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = 0; while( (len=inStream.read(buffer)) != -1 ){ outStream.write(buffer, 0, len); } inStream.close(); outStream.close(); return outStream.toByteArray(); } //生成文档逻辑 public static void produceItextWord(String paperName,String path,Map quesMap, Map quesAnsMap) throws Exception { if(quesMap == null){ return; } //调用wordStart方法,文档名称,生成路径 Document document = new Document(); Section section = document.addSection(); Paragraph pName = section.addParagraph(); pName.getFormat().setHorizontalAlignment(HorizontalAlignment.Center); pName.getFormat().setLineSpacing(15); TextRange appendText = pName.appendText(paperName); appendText.getCharacterFormat().setFontSize(20f); appendText.getCharacterFormat().setBold(true); //题干 if(quesMap != null && quesMap.size() > 0){ for (String id : quesMap.keySet()){ //题干 String title = quesMap.get(id); logger.error("题干信息:"+title); Paragraph qc = section.addParagraph(); writeQuestion(document, qc, title,id); } } //答案列表 if(quesAnsMap != null && quesAnsMap.size() > 0){ //答案 // 设置字体,字号,加粗,颜色 // 设置新的段落,使其字体为font Paragraph qa1 = section.addParagraph(); TextRange appendText2 = qa1.appendText("答案"); qa1.getFormat().setHorizontalAlignment(HorizontalAlignment.Center); appendText2.getCharacterFormat().setFontSize(20f); appendText2.getCharacterFormat().setBold(true); appendText2.getCharacterFormat().setUnderlineStyle(UnderlineStyle.None); for (String id : quesAnsMap.keySet()){ //答案 String ans = quesAnsMap.get(id); logger.error("答案信息:"+ans); Paragraph qa = section.addParagraph(); writeQuestion(document, qa, ans,id); } } ParagraphStyle style1 = new ParagraphStyle(document); style1.setName("style"); style1.getCharacterFormat().setFontName("Courier New"); document.getStyles().add(style1); pName.applyStyle(style1.getName()); document.saveToFile(path, FileFormat.Docx); // 关闭document document.close(); } public static boolean itemCheck(String elementContent) { //String st = "

.

"; Pattern pattern=Pattern.compile("\\

\\d{1,2}\\.

"); Matcher matcher = pattern.matcher(elementContent); return matcher.find(); } public static void main(String[] args) throws Exception { Map quesMap = new HashMap<>(); Map quesAnsMap = new HashMap<>(); String questionAns = "1.

设函数\\(f(x)=\\left\\{\\begin{array}{c}x+a, x \\leq 0 \\\\ \\ln x, x>0\\end{array}\\right.\\), 已知 \\(x_{1}<x_{2}\\), 且 \\(f\\left(x_{1}\\right)=f\\left(x_{2}\\right)\\), 若 \\(x_{2}-x_{1}\\) 的最小值为 \\(\\frac{1}{e^{2}}\\), 则 \\(a\\) 的值为_______.

"; quesMap.put("3", questionAns); quesAnsMap.put("1", questionAns); produceItextWord("重庆八中高2023级高二(下)数学周考(三)测试", "D:\\ParagraphAlignment.doc", quesMap, quesAnsMap); } }

LatexUtils.java代码:

package com.xxx.util.pdf;

import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.imageio.ImageIO;
import javax.swing.JLabel;

import org.scilab.forge.jlatexmath.TeXConstants;
import org.scilab.forge.jlatexmath.TeXFormula;
import org.scilab.forge.jlatexmath.TeXIcon;

import com.lowagie.text.Image;

/**
 * Latex 公式处理工具类
 * @author liujg
 * @date 2021年10月15日 下午1:51:59
 * @version V1.0
 */
public class LatexUtils {
	
	public static Pattern pattern = Pattern.compile("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]");
	public static Pattern compile = Pattern.compile("(\\\\\\()|(\\\\\\[)|(\\\\\\begin\\{equation\\})|(\\\\\\begin\\{aligned\\})");
	
	//试题列表
	private static Map quesMap = new LinkedHashMap();
	
	/**
	 * latex公式转图片
	 * */
	public static String latexImage(String formulaStr,String path){
	    TeXFormula tf = new TeXFormula(formulaStr);
	    TeXIcon ti = tf.createTeXIcon(TeXConstants.STYLE_DISPLAY, 40);
	    BufferedImage bimg = new BufferedImage(ti.getIconWidth(), ti.getIconHeight(), BufferedImage.TYPE_4BYTE_ABGR);
	    Graphics2D g2d = bimg.createGraphics();
	    g2d.setColor(Color.white);
	    g2d.fillRect(0,0,ti.getIconWidth(),ti.getIconHeight());
	    JLabel jl = new JLabel();
	    jl.setForeground(new Color(0, 0, 0));
	    ti.paintIcon(jl, g2d, 0, 0);
	    File out = new File(path);
	    try {
			ImageIO.write(bimg, "png", out);
		} catch (IOException e) {
		}
	    return path;
	}
	
	/**
	 * 文档线下图片获取
	 * */
	public static Image wordLocalImage(String imageUrl) throws Exception{
	    //声明图片
	    Image img = Image.getInstance(imageUrl);
	    //绝对大小设置
	    //img.scaleAbsolute(img.getWidth()/3,img.getHeight()/3);
	    //比例大小
	    img.scalePercent(30,30);
	    //图片位置坐标
	    img.setAbsolutePosition(0, 0);
	    //图片位置靠左并且文字绕图形显示| Image.TEXTWRAP ,图片背景Image.UNDERLYING
	    img.setAlignment(Image.LEFT | Image.TEXTWRAP);
	    return img;
	}
	
	/**
	 * 文档线上图片获取
	 * */
	public static Image wordHttpImage(String imageUrl) throws Exception{
	    //声明图片
	    Image img = Image.getInstance(new URL(imageUrl));
	    //img.scaleAbsolute(img.getWidth()/3, img.getHeight()/3);
	    img.scalePercent(50,50);
	    img.setAbsolutePosition(0, 0);
	    img.setAlignment(Image.LEFT | Image.TEXTWRAP);
	    return img;
	}
	
	/**
	 * 去除Latex公式中的换行
* @param title * @return * @author liujg * @date 2021年10月27日 上午11:51:06 */ public static String dealLatexTitle(String title){ if(title == null){ return null; } title = title.replaceAll("\\\\","\\\\\\\\"); //\[ \] latex其实是没有的,为了转换公式不出错需要去掉,把整个公式替换成@@@,再把去掉\[ \]的然后以@@@截取后,就是 文字-公式-文字-公式的数组形式,最后依次写入文档 String titleNot = title.replaceAll("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@"); Matcher mather = pattern.matcher(title); while (mather.find()){ String formula = mather.group(); if(formula != null){ formula = formula.replace("
", ""); formula = formula.replace("\\ ", ""); } titleNot = titleNot.replaceFirst("@@@", formula); } return titleNot; } /** * 题干类容段落处理 * */ public static String[] titleGroup(String title){ if(title == null){ return null; } //公式处理\[(\cfrac{x-y}{x+y}-\cfrac{x+y}{x-y})\div \cfrac{2x}{{{x}^{2}}y-{{y}^{3}}}\] 将\cfrac转换成\\,否者后面\cfrac转换成mathml其实转换的事cfrac,报错 title = title.replace("

", ""); title = title.replace("

", ""); title = title.replaceAll("\\\\","\\\\\\\\"); //\[ \] latex其实是没有的,为了转换公式不出错需要去掉,把整个公式替换成@@@,再把去掉\[ \]的然后以@@@截取后,就是 文字-公式-文字-公式的数组形式,最后依次写入文档 String titleNot = title.replaceAll("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@"); Matcher mather = pattern.matcher(title); while (mather.find()){ String formula = mather.group(); if(formula != null){ formula = formula.replace("
", ""); } titleNot = titleNot.replaceFirst("@@@", "#####"+formula+"#####"); } String[] titleSplit = null; if(titleNot != null){ titleNot = titleNot.replace("
", "\n"); titleSplit = titleNot.split("#####"); } return titleSplit; } public static void main(String[] args) { String url = "xxxx"; try { for(int i=0 ;i < 1 ;i++){ Image img = LatexUtils.wordHttpImage(url); System.out.println(i+"--"+img); } } catch (Exception e) { e.printStackTrace(); } } }

输出结果:

 富文本html页面展示方式:

页面引入






数据在页面展示后使用js进行渲染:

MathJax.typesetPromise();

展示结果:

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/web/940855.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-17
下一篇 2022-05-17

发表评论

登录后才能评论

评论列表(0条)

保存