TextFileForm fileForm = (TextFileForm) form;
FormFile formFile = fileFormgetTxtFile();
if (formFilegetFileData()length == 0)
{
responsesetCharacterEncoding("gb2312");
responsegetWriter()write("");
}
InputStream in = formFilegetInputStream();
WordExtractor extractor = new WordExtractor();
String str = extractorextractText(in);
再用String特有分割方法将三个数据分割分别存放在三个String或String数组里
import javaioBufferedWriter;
import javaioFile;
import javaioFileWriter;
import javaioIOException;
import javaxxmlparsersDocumentBuilder;
import javaxxmlparsersDocumentBuilderFactory;
import javaxxmlparsersParserConfigurationException;
import javaxxmltransformTransformer;
import javaxxmltransformTransformerConfigurationException;
import javaxxmltransformTransformerException;
import javaxxmltransformTransformerFactory;
import javaxxmltransformdomDOMSource;
import javaxxmltransformstreamStreamResult;
import orgw3cdomElement;
import orgw3cdomNode;
import orgw3cdomNodeList;
//import orgjdom2Document;
import orgxmlsaxSAXException;
public class tt {
public static void main (String [] arg) {
DocumentBuilderFactory dFactory =DocumentBuilderFactorynewInstance();
DocumentBuilder dBuilder = null;
try {
dBuilder = dFactorynewDocumentBuilder();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
eprintStackTrace();
}
orgw3cdomDocument document = null;
try {
document = dBuilderparse("E:/workspace/XMLTest/src/ttxml");
} catch (SAXException e) {
// TODO Auto-generated catch block
eprintStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
eprintStackTrace();
}
Element root= documentgetDocumentElement();
NodeList nList = rootgetChildNodes();
Systemoutprintln(nListgetLength()+rootgetNodeName());
//for(int i=0 ; i< nListgetLength() ; i++){
Systemoutprintln(nListitem(1)getChildNodes()item(1)getTextContent()+ "hahah");
//}
NodeList nodeList =documentgetElementsByTagName("StepDescription");
nodeListitem(0)getChildNodes()item(1)getAttributes()item(0)setNodeValue("ranqi");
Systemoutprintln(nodeListitem(0)getChildNodes()item(1)getAttributes()item(0)getNodeValue()+ "7777");
DOMSource source = new DOMSource(document);
StreamResult sResult = new StreamResult(new File("E:/workspace/XMLTest/src/tt_11xml"));
TransformerFactory tFactory =TransformerFactorynewInstance();
Transformer transformer =null;
try {
transformer =tFactorynewTransformer();
try {
transformertransform(source, sResult);
} catch (TransformerException e) {
// TODO Auto-generated catch block
eprintStackTrace();
}
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
eprintStackTrace();
}
}
}
这还是我读E:/workspace/XMLTest/src/ttxml这个路径下的xml文档
你用免费版的Free SpireDoc for Java可以直接读取Word文档里面的文本,参考代码:
import comspiredocDocument;
import javaioFileWriter;
import javaioIOException;
public class ExtractText {
public static void main(String[] args) throws IOException {
//加载Word文档
Document document = new Document();
documentloadFromFile("C:\\Users\\Administrator\\Desktop\\sampledocx");
//获取文档中的文本保存为String
String text=documentgetText();
//将String写入Txt文件
writeStringToTxt(text,"ExtractedTexttxt");
}
public static void writeStringToTxt(String content, String txtFileName) throws IOException {
FileWriter fWriter= new FileWriter(txtFileName,true);
try {
fWriterwrite(content);
}catch(IOException ex){
exprintStackTrace();
}finally{
try{
fWriterflush();
fWriterclose();
} catch (IOException ex) {
exprintStackTrace();
}
}
}
}
参考自官网原文。
以上就是关于现有一个word文档,里面有3个大部分的数据,现在要用java来读取,并把3类数据分别读取出来,求思路方法。全部的内容,包括:现有一个word文档,里面有3个大部分的数据,现在要用java来读取,并把3类数据分别读取出来,求思路方法。、java 读取一个路径下的所有 xml类型的文件、JAVA有什么好的方法可以将word里的文本读取出来等相关内容解答,如果想了解更多相关内容,可以关注我们,你们的支持是我们更新的动力!
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)