需求说明:数据库中存放的信息为富文本数据,并且包含了latex公式的代码,现在需要将富文本和latex公式数据输出到word文档中。
生成逻辑:将数据获取后进行遍历,获取每个节点,然后将节点增加到段落中,特别需要区别图片标签,需要获取图片的数据输出到文档中。这里使用了spire.doc包生成文档。
主要类:LatexToMathMLUtils工具生成文档的逻辑,LatexUtils latex公式相关工具
LatexToMathMLUtils.java代码:
package com.xxx.util.pdf;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import com.itextpdf.io.source.ByteArrayOutputStream;
import com.lowagie.text.Image;
import com.minxue.util.common.PropertisUtil;
import com.minxue.util.common.StringUtil;
import com.spire.doc.Document;
import com.spire.doc.FileFormat;
import com.spire.doc.Section;
import com.spire.doc.documents.HorizontalAlignment;
import com.spire.doc.documents.Paragraph;
import com.spire.doc.documents.ParagraphStyle;
import com.spire.doc.documents.UnderlineStyle;
import com.spire.doc.fields.DocPicture;
import com.spire.doc.fields.TextRange;
import com.spire.doc.fields.omath.OfficeMath;
/**
* @author Administrator
* @date 2022年5月10日
*/
public class LatexToMathMLUtils {
protected static Logger logger = LogManager.getLogger(LatexToMathMLUtils.class);
//将试题富文本信息输出到文档中
private static Paragraph writeQuestion(Document document, Paragraph paragraph, String question ,String questionId) throws IOException {
//试题默认添加段落标签
if(question != null && !question.startsWith("";
}
//去除公式中的换行
,保证公式为一个整体
question = LatexUtils.dealLatexTitle(question);
org.jsoup.nodes.Document doc = Jsoup.parse(question); // html解析
Element body = doc.body();
Elements elements = body.children();
analyseElements(document, paragraph, elements,questionId);
return paragraph;
}
private static String latexFormat(String latex) {
if (latex.contains("leqslant")) {
latex = latex.replace("leqslant", "leq");
}
if (latex.contains("geqslant")) {
latex = latex.replace("geqslant", "geq");
}
StringBuilder latexBuilder = new StringBuilder();
boolean isChinese = false;
String regexStr = "[\u4E00-\u9FA5]";
for (Character c : latex.toCharArray()) {
Matcher chineseMatch = Pattern.compile(regexStr).matcher(c.toString());
if (chineseMatch.find()) {
if (isChinese) {
latexBuilder.append(c);
} else {
latexBuilder.append("\\mbox{").append(c);
isChinese = true;
}
continue;
} else {
if (isChinese) {
isChinese = false;
latexBuilder.append("}");
}
latexBuilder.append(c);
}
}
return latexBuilder.toString();
}
//解析富文本数据
private static void analyseElements(Document document, Paragraph paragraph, Elements elements,String questionId) throws IOException {
for (Element element : elements) {
if (element.children().size() == 0) {
if ("p".equalsIgnoreCase(element.tagName())) {
latexTextDeal(paragraph, element.text(),questionId);
logger.error(element.toString());
if(itemCheck(element.toString())) {
} else {
paragraph.appendText("\n");
}
} else if ("img".equalsIgnoreCase(element.tagName())) {
byte[] img = addImg2(document, element);
if (null != img) {
DocPicture appendPicture = paragraph.appendPicture(img);
String width="";
if(element!=null){
width = element.attr("width");
}
if (null != width && !"".equals(width)) {
if(Float.parseFloat(element.attr("width"))* 0.65f > 460) {
appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.5f);
appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.5f);
} else {
appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.65f);
appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.65f);
}
} else {
/*appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.5f);
appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.5f);*/
Image addImg = addImg(document, element);
appendPicture.setWidth(addImg.getWidth() * 0.5f);
appendPicture.setHeight(addImg.getHeight() * 0.5f);
}
}
paragraph.appendText("\n");
} else {
//判断是否含有Latex公式表达式
latexTextDeal(paragraph, element.text(),questionId);
paragraph.appendText("\n");
}
} else {
analyseChildrenElement(document, paragraph, (Node) element,questionId);
paragraph.appendText("\n");
}
}
}
//解析富文本的字节点数据
private static void analyseChildrenElement(Document document, Paragraph paragraph, Node parent,String questionId) throws IOException {
List childNodes = parent.childNodes();
for (Node node : childNodes) {
if (node.childNodes().size() > 0) {
analyseChildrenElement(document, paragraph, node,questionId); //递归到最低一级元素
} else {
if ("#text".equalsIgnoreCase(node.nodeName()) && !"sub".equalsIgnoreCase(node.parent().nodeName())
&& !"sup".equalsIgnoreCase(node.parent().nodeName())) { //判断是普通文本
String content = node.attr("text");
// paragraph.appendText(content);
//判断是否含有Latex公式表达式
latexTextDeal(paragraph, content,questionId);
} else if ("img".equalsIgnoreCase(node.nodeName())) { //判断是图片
byte[] img = addImg2(document, node);
if (null != img) {
DocPicture appendPicture = paragraph.appendPicture(img);
String width="";
if(node!=null){
width = node.attr("width");
}
if (null != width && !"".equals(width)) {
if(Float.parseFloat(node.attr("width"))* 0.65f > 460) {
appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.5f);
appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.5f);
} else {
appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.65f);
appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.65f);
}
} else {
/*appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.5f);
appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.5f);*/
Image addImg = addImg(document, node);
appendPicture.setWidth(addImg.getWidth() * 0.5f);
appendPicture.setHeight(addImg.getHeight() * 0.5f);
}
}
} else if ("br".equalsIgnoreCase(node.nodeName())) { //判断是换行
if (node.parent().childNodes().size() > 1) //判断是文本中换行,而非单纯换行,避免无用换行过多导致文本过长
paragraph.appendText("\n");
}
}
}
}
//获取网络图片的二级制数据
private static byte[] addImg2(Document document, Node element) {
byte[] img = null;
String src ="";
if(element != null && StringUtil.checkNotNull(element.attr("src"))){
String paltForm=PropertisUtil.getInstance().getPlatform();
src = element.attr("src");
if("shaanxi".equals(paltForm)){
src=src.replace("http://res.minxuejiaoyu.cn", "http://192.168.0.252:83");
src=src.replace("http://res2.minxuejiaoyu.cn", "http://192.168.0.252:83");
}
logger.debug("element.attr2(src):--"+src);
try {
if(src != null && !src.equals("")){
src = src.trim();
img = getImageFromNetByUrl(src);
// img = wordHttpImage(src);
}
} catch (Exception e) {
logger.error("src:"+src);
logger.error(e);
return null;
}
}
return img;
}
/*
* 获取网络图片
*/
private static Image addImg(Document document, Node element) {
Image img = null;
String src ="";
if(element != null && StringUtil.checkNotNull(element.attr("src"))){
String paltForm=PropertisUtil.getInstance().getPlatform();
src = element.attr("src");
if("shaanxi".equals(paltForm)){
src=src.replace("http://res.minxuejiaoyu.cn", "http://192.168.0.252:83");
src=src.replace("http://res2.minxuejiaoyu.cn", "http://192.168.0.252:83");
}
logger.debug("element.attr2(src):--"+src);
try {
if(src != null && !src.equals("")){
src = src.trim();
img = wordHttpImage(src);
}
} catch (MalformedURLException e) {
logger.error(e.getMessage());
} catch (Exception e) {
logger.error("src:"+src);
logger.error(e);
return null;
}
}
String width="";
//String height="";
if(element!=null){
width = element.attr("width");
}
if (null != width && !"".equals(width)) {
if(Float.parseFloat(element.attr("width"))* 0.65f > 460) {
img.scalePercent(50,50);
} else {
img.scalePercent(65,65);
}
} else {
img.scalePercent(50,50);
}
return img;
}
/**
* 文档线上图片获取
* */
public static Image wordHttpImage(String imageUrl) throws Exception{
//声明图片
Image img = Image.getInstance(new URL(imageUrl));
//img.scaleAbsolute(img.getWidth()/3, img.getHeight()/3);
img.scalePercent(50,50);
img.setAbsolutePosition(0, 0);
img.setAlignment(Image.LEFT | Image.TEXTWRAP);
return img;
}
private static void latexTextDeal(Paragraph paragraph,String latexText,String questionId){
String formulaStr = null;
try {
logger.debug("latexText:"+latexText);
//判断是否含有Latex公式表达式
String[] titleSplit = LatexUtils.titleGroup(latexText);
if(titleSplit == null){
return;
}
//按照顺序写入文档
for (int i = 0 ; i < titleSplit.length ; i++ ) {
formulaStr = titleSplit[i];
if(formulaStr != null){
formulaStr = formulaStr.trim();
}
if(formulaStr != null && !formulaStr.equals("")){
Matcher mather = LatexUtils.compile.matcher(formulaStr);//匹配是否是公式
if (!mather.find()){
paragraph.appendText(titleSplit[i]);
}else{//是,接着写入图片
//调用latexImage方法将公式转换成图片,wordLocalImage方法处理图片,图片生成
/*String no = System.currentTimeMillis()+WordUtils.getRandomNickname(10);
Image img = LatexUtils.wordLocalImage(LatexUtils.latexImage(formulaStr,"/data/mxjy_pdf/wordtemp/w_"+questionId+"_"+no+".png"));
paragraph.add(new Phrase(new Chunk(img, 0, 0, true)));*/
OfficeMath math = new OfficeMath(paragraph.getDocument());
paragraph.getItems().add(math);
//logger.error(titleSplit[i]);
String subSequence = titleSplit[i].subSequence(2, titleSplit[i].length()-2).toString();
//logger.error(subSequence);
math.fromLatexMathCode(latexFormat(subSequence));
}
}
}
} catch (Exception e) {
logger.error(e.getMessage());
}
}
//图片数据请求
public static byte[] getImageFromNetByUrl(String strUrl){
try {
URL url = new URL(strUrl);
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(3 * 1000);
InputStream inStream = conn.getInputStream();//通过输入流获取图片数据
byte[] btImg = readInputStream(inStream);//得到图片的二进制数据
return btImg;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static byte[] readInputStream(InputStream inStream) throws Exception{
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while( (len=inStream.read(buffer)) != -1 ){
outStream.write(buffer, 0, len);
}
inStream.close();
outStream.close();
return outStream.toByteArray();
}
//生成文档逻辑
public static void produceItextWord(String paperName,String path,Map quesMap,
Map quesAnsMap) throws Exception {
if(quesMap == null){
return;
}
//调用wordStart方法,文档名称,生成路径
Document document = new Document();
Section section = document.addSection();
Paragraph pName = section.addParagraph();
pName.getFormat().setHorizontalAlignment(HorizontalAlignment.Center);
pName.getFormat().setLineSpacing(15);
TextRange appendText = pName.appendText(paperName);
appendText.getCharacterFormat().setFontSize(20f);
appendText.getCharacterFormat().setBold(true);
//题干
if(quesMap != null && quesMap.size() > 0){
for (String id : quesMap.keySet()){
//题干
String title = quesMap.get(id);
logger.error("题干信息:"+title);
Paragraph qc = section.addParagraph();
writeQuestion(document, qc, title,id);
}
}
//答案列表
if(quesAnsMap != null && quesAnsMap.size() > 0){
//答案
// 设置字体,字号,加粗,颜色
// 设置新的段落,使其字体为font
Paragraph qa1 = section.addParagraph();
TextRange appendText2 = qa1.appendText("答案");
qa1.getFormat().setHorizontalAlignment(HorizontalAlignment.Center);
appendText2.getCharacterFormat().setFontSize(20f);
appendText2.getCharacterFormat().setBold(true);
appendText2.getCharacterFormat().setUnderlineStyle(UnderlineStyle.None);
for (String id : quesAnsMap.keySet()){
//答案
String ans = quesAnsMap.get(id);
logger.error("答案信息:"+ans);
Paragraph qa = section.addParagraph();
writeQuestion(document, qa, ans,id);
}
}
ParagraphStyle style1 = new ParagraphStyle(document);
style1.setName("style");
style1.getCharacterFormat().setFontName("Courier New");
document.getStyles().add(style1);
pName.applyStyle(style1.getName());
document.saveToFile(path, FileFormat.Docx);
// 关闭document
document.close();
}
public static boolean itemCheck(String elementContent) {
//String st = ".
";
Pattern pattern=Pattern.compile("\\\\d{1,2}\\.
");
Matcher matcher = pattern.matcher(elementContent);
return matcher.find();
}
public static void main(String[] args) throws Exception {
Map quesMap = new HashMap<>();
Map quesAnsMap = new HashMap<>();
String questionAns = "1.设函数\\(f(x)=\\left\\{\\begin{array}{c}x+a, x \\leq 0 \\\\ \\ln x, x>0\\end{array}\\right.\\), 已知 \\(x_{1}<x_{2}\\), 且 \\(f\\left(x_{1}\\right)=f\\left(x_{2}\\right)\\), 若 \\(x_{2}-x_{1}\\) 的最小值为 \\(\\frac{1}{e^{2}}\\), 则 \\(a\\) 的值为_______.
";
quesMap.put("3", questionAns);
quesAnsMap.put("1", questionAns);
produceItextWord("重庆八中高2023级高二(下)数学周考(三)测试", "D:\\ParagraphAlignment.doc", quesMap, quesAnsMap);
}
}
LatexUtils.java代码:
package com.xxx.util.pdf;
import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.imageio.ImageIO;
import javax.swing.JLabel;
import org.scilab.forge.jlatexmath.TeXConstants;
import org.scilab.forge.jlatexmath.TeXFormula;
import org.scilab.forge.jlatexmath.TeXIcon;
import com.lowagie.text.Image;
/**
* Latex 公式处理工具类
* @author liujg
* @date 2021年10月15日 下午1:51:59
* @version V1.0
*/
public class LatexUtils {
public static Pattern pattern = Pattern.compile("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]");
public static Pattern compile = Pattern.compile("(\\\\\\()|(\\\\\\[)|(\\\\\\begin\\{equation\\})|(\\\\\\begin\\{aligned\\})");
//试题列表
private static Map quesMap = new LinkedHashMap();
/**
* latex公式转图片
* */
public static String latexImage(String formulaStr,String path){
TeXFormula tf = new TeXFormula(formulaStr);
TeXIcon ti = tf.createTeXIcon(TeXConstants.STYLE_DISPLAY, 40);
BufferedImage bimg = new BufferedImage(ti.getIconWidth(), ti.getIconHeight(), BufferedImage.TYPE_4BYTE_ABGR);
Graphics2D g2d = bimg.createGraphics();
g2d.setColor(Color.white);
g2d.fillRect(0,0,ti.getIconWidth(),ti.getIconHeight());
JLabel jl = new JLabel();
jl.setForeground(new Color(0, 0, 0));
ti.paintIcon(jl, g2d, 0, 0);
File out = new File(path);
try {
ImageIO.write(bimg, "png", out);
} catch (IOException e) {
}
return path;
}
/**
* 文档线下图片获取
* */
public static Image wordLocalImage(String imageUrl) throws Exception{
//声明图片
Image img = Image.getInstance(imageUrl);
//绝对大小设置
//img.scaleAbsolute(img.getWidth()/3,img.getHeight()/3);
//比例大小
img.scalePercent(30,30);
//图片位置坐标
img.setAbsolutePosition(0, 0);
//图片位置靠左并且文字绕图形显示| Image.TEXTWRAP ,图片背景Image.UNDERLYING
img.setAlignment(Image.LEFT | Image.TEXTWRAP);
return img;
}
/**
* 文档线上图片获取
* */
public static Image wordHttpImage(String imageUrl) throws Exception{
//声明图片
Image img = Image.getInstance(new URL(imageUrl));
//img.scaleAbsolute(img.getWidth()/3, img.getHeight()/3);
img.scalePercent(50,50);
img.setAbsolutePosition(0, 0);
img.setAlignment(Image.LEFT | Image.TEXTWRAP);
return img;
}
/**
* 去除Latex公式中的换行
* @param title
* @return
* @author liujg
* @date 2021年10月27日 上午11:51:06
*/
public static String dealLatexTitle(String title){
if(title == null){
return null;
}
title = title.replaceAll("\\\\","\\\\\\\\");
//\[ \] latex其实是没有的,为了转换公式不出错需要去掉,把整个公式替换成@@@,再把去掉\[ \]的然后以@@@截取后,就是 文字-公式-文字-公式的数组形式,最后依次写入文档
String titleNot = title.replaceAll("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");
Matcher mather = pattern.matcher(title);
while (mather.find()){
String formula = mather.group();
if(formula != null){
formula = formula.replace("
", "");
formula = formula.replace("\\ ", "");
}
titleNot = titleNot.replaceFirst("@@@", formula);
}
return titleNot;
}
/**
* 题干类容段落处理
* */
public static String[] titleGroup(String title){
if(title == null){
return null;
}
//公式处理\[(\cfrac{x-y}{x+y}-\cfrac{x+y}{x-y})\div \cfrac{2x}{{{x}^{2}}y-{{y}^{3}}}\] 将\cfrac转换成\\,否者后面\cfrac转换成mathml其实转换的事cfrac,报错
title = title.replace("", "");
title = title.replace("
", "");
title = title.replaceAll("\\\\","\\\\\\\\");
//\[ \] latex其实是没有的,为了转换公式不出错需要去掉,把整个公式替换成@@@,再把去掉\[ \]的然后以@@@截取后,就是 文字-公式-文字-公式的数组形式,最后依次写入文档
String titleNot = title.replaceAll("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");
Matcher mather = pattern.matcher(title);
while (mather.find()){
String formula = mather.group();
if(formula != null){
formula = formula.replace("
", "");
}
titleNot = titleNot.replaceFirst("@@@", "#####"+formula+"#####");
}
String[] titleSplit = null;
if(titleNot != null){
titleNot = titleNot.replace("
", "\n");
titleSplit = titleNot.split("#####");
}
return titleSplit;
}
public static void main(String[] args) {
String url = "xxxx";
try {
for(int i=0 ;i < 1 ;i++){
Image img = LatexUtils.wordHttpImage(url);
System.out.println(i+"--"+img);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
输出结果:
富文本html页面展示方式:
页面引入
数据在页面展示后使用js进行渲染:
MathJax.typesetPromise();
展示结果:
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)