java怎样读取html文件_框架

java读取html文件跟读取普通文件一样，都是使用输入输出流，但是java读取html文件之后还需要解析，使用Jsoup对html进行解析。下面是一个java读取带表格的任意html文件，并把html文件转换成excel的例子。

要求：要求能够实现给出任意带table表格的html文件，生成与表格相同内容的excel文件，附件可以作为测试文件，提供给定的rosterhtml文件，通过java代码，实现生成与html页面的table相同样式的rosterxls文件。

首先看rosterhtml:

java代码：

import javaioBufferedReader;

import javaioFile;

import javaioFileReader;

import javaioIOException;

import jxlWorkbook;

import jxlwriteLabel;

import jxlwriteWritableCellFormat;

import jxlwriteWritableFont;

import jxlwriteWritableSheet;

import jxlwriteWritableWorkbook;

import jxlwriteWriteException;

import jxlwritebiffRowsExceededException;

import orgjsoupJsoup;

import orgjsoupnodesDocument;

import orgjsoupnodesElement;

import orgjsoupselectElements;

public class HTMLTOExcel {

public static void main(String args[]) throws IOException{

///读取classpath目录下面的路径

String path=HTMLTOExcelclassgetResource("/")getPath();

path+="rosterhtml";

toExcel(path,"roster");

}

//得到Document并且设置编码格式

public static Document getDoc(String fileName) throws IOException{

File myFile=new File(fileName);

Document doc= Jsoupparse(myFile, "GBK","");

return doc;

}

///这个方法用于根据trs行数和sheet画出整个表格

public static void mergeColRow(Elements trs,WritableSheet sheet) throws RowsExceededException, WriteException{

int[][] rowhb=new int[300][50];

for(int i=0;i<trssize();i++){

Element tr=trsget(i);

Elements tds=trgetElementsByTag("td");

int realColNum=0;

for(int j=0;j<tdssize();j++){

Element td=tdsget(j);

if(rowhb[i][realColNum]!=0){

realColNum=getRealColNum(rowhb,i,realColNum);

}

int rowspan=1;

int colspan=1;

if(tdattr("rowspan")!=""){

rowspan = IntegerparseInt(tdattr("rowspan"));

}

if(tdattr("colspan")!=""){

colspan = IntegerparseInt(tdattr("colspan"));

}

String text=tdtext();

drawMegerCell(rowspan,colspan,sheet,realColNum,i,text,rowhb);

realColNum=realColNum+colspan;

}

///这个方法用于根据样式画出单元格，并且根据rowpan和colspan合并单元格

public static void drawMegerCell(int rowspan,int colspan,WritableSheet sheet,int realColNum,int realRowNum,String text,int[][] rowhb) throws RowsExceededException, WriteException{

for(int i=0;i<rowspan;i++){

for(int j=0;j<colspan;j++){

if(i!=0||j!=0){

text="";

}

Label label = new Label(realColNum+j,realRowNum+i,text);

WritableFont countents = new WritableFont(WritableFontTIMES,10); // 设置单元格内容，字号12

WritableCellFormat cellf = new WritableCellFormat(countents );

cellfsetAlignment(jxlformatAlignmentCENTRE);//把水平对齐方式指定为居中

cellfsetVerticalAlignment(jxlformatVerticalAlignmentCENTRE);//把垂直对齐方式指定为居

labelsetCellFormat(cellf);

sheetaddCell(label);

rowhb[realRowNum+i][realColNum+j]=1;

}

sheetmergeCells(realColNum,realRowNum, realColNum+colspan-1,realRowNum+rowspan-1);

}

public static int getRealColNum(int[][] rowhb,int i,int realColNum){

while(rowhb[i][realColNum]!=0){

realColNum++;

}

return realColNum;

}

///根据colgroups设置表格的列宽

public static void setColWidth(Elements colgroups,WritableSheet sheet){

if(colgroupssize()>0){

Element colgroup=colgroupsget(0);

Elements cols=colgroupgetElementsByTag("col");

for(int i=0;i<colssize();i++){

Element col=colsget(i);

String strwd=colattr("width");

if(colattr("width")!=""){

int wd=IntegerparseInt(strwd);

sheetsetColumnView(i,wd/8);

}

//toExcel是根据html文件地址生成对应的xls

public static void toExcel(String fileName,String excelName)throws IOException{

Document doc=getDoc(fileName);

String title = doctitle();

///得到样式，以后可以根据正则表达式解析css，暂且没有找到cssparse

Elements style= docgetElementsByTag("style");

///得到Table，demo只演示输入一个table，以后可以用循环遍历tables集合输入所有table

Elements tables= docgetElementsByTag("TABLE");

if(tablessize()==0){

return;

}

Element table=tablesget(0);

//得到所有行

Elements trs = tablegetElementsByTag("tr");

///得到列宽集合

Elements colgroups=tablegetElementsByTag("colgroup");

try {

//文件保存到classpath目录下面

String path=HTMLTOExcelclassgetResource("/")getPath();

path+=excelName+"xls";

Systemoutprintln(path);

WritableWorkbook book = WorkbookcreateWorkbook(new File(path));

WritableSheet sheet = bookcreateSheet("人事关系", 0);

setColWidth(colgroups,sheet);

mergeColRow(trs,sheet);

bookwrite();

bookclose();

} catch (RowsExceededException e) {

eprintStackTrace();

} catch (WriteException e) {

eprintStackTrace();

}

解析html文件的例子文档地址：>

交互方法：

1 <applet name="appletName" /$amp;>amp;$nbsp;

2 //JavaScript访问Applet属性

3 windowdocumentappletNameappletField

(属性必须是public的,"windowdocument"也可以不写)

4 /JavaScript访问Applet方法

5 windowdocumentappletNameappletMethod

(方法必须是public的,"windowdocument"也可以不写)

简单实现：

HtmlRequest类的内容：

[java] view plaincopy

package comcapinfotechnet;

import javaioByteArrayOutputStream;

import javaioIOException;

import javaioInputStream;

import javanet>

1 使用表单的方式传参

把表单元素放在<form> 标签里面，直接提交表单就行

2 使用URL重定向方式传值

比如：locationhref = "logindomethod=checktel&name=lisi&age=10";

后台通过 request 对象接收参数

String name = requestgetParameter("name");

String age = requestgetParameter("age");

您好，这样的：

解析HTML文档的Java程序

下面是一个解析HTML字符串，网络上下载的HTML文件，以及本地文件系统中的HTML文件的完整的Java程序。你可以使用Eclipse IDE或者别的IDE甚至命令来运行这个程序。在Eclipse里面则很简单，拷贝这份代码，新建一个Java工程，在src包上右键并粘贴进去就可以了。Eclipse会去创建正确的包及同名的Java源文件的，因此工作量最小。如果你已经有一个Java示例工程了，那么仅需一步就可以了。下面的这个Java程序展示了解析及遍历HTML文件的三个不同例子。第一个例子中，我们直接解析了一个内容为HTML的字符串，第二个例子中我们解析了一个从URL中下载的HTML文件，第三个中我们从本地文件系统中加载了一个HTML文档并进行解析。第一和第三个例子中都用到了parse方法来获取一个Document对象，你可以查询它来提取出任何的标签值或者属性值。第二个例子中，我们用到了Jsoupconnect方法，它会去创建URL的连接，下载HTML并进行解析。这个方法也会返回Document，它可以用于后续的查询及获取标签或者属性的值。

import javaioIOException;

import orgjsoupJsoup;

import orgjsoupnodesDocument;

import orgjsoupnodesElement;

[] Java Program to parse/read HTML documents from File using Jsoup library

[] Jsoup is an open source library which allows Java developer to parse HTML

[] files and extract elements, manipulate data, change style using DOM, CSS and

[] JQuery like method

[]

[] @author Javin Paul

[]/

public class HTMLParser{

public static void main(String args[]) {

// Parse HTML String using JSoup library

String HTMLSTring = "<!DOCTYPE html>"

+ "<html>"

+ "<head>"

+ "<title>JSoup Example</title>"

+ "</head>"

+ "<body>"

+ "|[b]HelloWorld[/b]"

+ ""

+ "</body>"

+ "</html>";

Document html = Jsoupparse(HTMLSTring);

String title = htmltitle();

String h1 = htmlbody()getElementsByTag("h1")text();

Systemoutprintln("Input HTML String to JSoup :" + HTMLSTring);

Systemoutprintln("After parsing, Title : " + title);

Systemoutprintln("Afte parsing, Heading : " + h1);

// JSoup Example 2 - Reading HTML page from URL

Document doc;

try {

doc = Jsoupconnect(">

以上就是关于java怎样读取html文件全部的内容，包括:java怎样读取html文件、如何使用java提取html页面中script标签里面的值、给了一个HTML HTML里有个Button 点击让调用java类中的方法 java类继承了Applet 没有框架不是web项目等相关内容解答，如果想了解更多相关内容，可以关注我们，你们的支持是我们更新的动力！

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/web/9742368.html

java怎样读取html文件

发表评论

评论列表（0条）