这个源码 我之前开源过,昨晚又做了一下。公开源码。这里讲下思路 以及真实源码实现。我们将使用1个类HttpClient,这个类的基本用法可以参照:
http://www.ibm.com/developerworks/cn/opensource/os-httpclient/
我们会使用httpClient以及Httpwatch工具。
watch可以在http://www.ij2ee.com/what-to-use-to-develop 有下载地址。
package com.thief.parser.impl; import java.io.IOException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.http.HttpException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.protocol.HTTP; import org.apache.log4j.Logger; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.AndFilter; import org.htmlparser.filters.HasAttributeFilter; import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.tags.TableColumn; import org.htmlparser.tags.TableTag; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import com.thief.parser.IMail163Parser; import com.thief.po.Contact; import com.thief.util.HttpUtil; import com.thief.util.StringUtil; public class Mail163ParserImpl implements IMail163Parser{ private String loginUrl; private static final String charCode = HTTP.UTF_8; public List<Contact> parser(String email, String password) throws HttpException, IOException, InterruptedException, URISyntaxException { DefaultHttpClient client = new DefaultHttpClient(); try { String loginRes = login(email, password,client); return parser(client,loginRes, email); } catch (Exception e) { // TODO: handle exception }finally{ client.getConnectionManager().shutdown(); } return null; } public String login(String email, String password, HttpClient client) throws IllegalStateException, URISyntaxException, IOException, HttpException, InterruptedException{ Map<String,String> map = new HashMap<String, String>(); map.put(".verifycookie", "1"); map.put("style", "35"); map.put("product", "mail163"); map.put("username", email); map.put("password", password); map.put("selType=", "jy"); map.put("remUser", "on"); map.put("secure", "on"); String res = HttpUtil.doPost(client, loginUrl,map, charCode); if (res.indexOf("跳转提示") != -1) { HttpGet get = new HttpGet( "http://entry.mail.163.com/coremail/fcg/ntesdoor2?username="+email+"&lightweight=1&verifycookie=1&language=-1&style=-1"); res = StringUtil.readInputStream(client.execute(get) .getEntity().getContent(), charCode); }else if(res.indexOf("errorType")!=-1){ throw new RuntimeException("帐号或密码错误"); } return res; } String getUsers="http://tg4a84.mail.163.com/jy3/address/addrprint.jsp?sid="; private static String regex = "iframe src=\\"index.jsp\\\\?sid=([^\\"]+)"; public List<Contact> parser(HttpClient client, String content, String email) throws IllegalStateException, URISyntaxException, IOException, HttpException, InterruptedException { //iframe src="index.jsp?sid=zBObqxwciWMxDZiIlwccEFhCuYOLgipm" String id = StringUtil.getByRegex(regex, 1, content); if(id == null || "".equals(id.trim())){ throw new RuntimeException("没能获取到关键ID"); } String userJson = getUsers+id; Map<String, String> map = new HashMap<String, String>(); //String res = HttpUtil.doPost(client, userJson,map , charCode); log.info(userJson); userJson = userJson+"&dd=" + System.currentTimeMillis(); String res = HttpUtil.doGet(client, userJson, null); log.info(res); List<Contact> contactList = new ArrayList<Contact>(); /*try { parseByHtmlParser(res); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } if(1==1 )return contactList; */ String aa = "<div class=\\"gTitleSub\\"><div align=\\"left\\"><b class=\\"mTT\\">(.*?)</b></div><div class=\\"Extra\\"></div></div><table class=\\"gTable\\"><tr id=\\"tr_base_0\\" style=\\"\\"><th>邮件地址:</th><td>(.*?)</td></tr>(.*?)</table>"; Pattern p = Pattern.compile(aa,Pattern.DOTALL); Matcher m = p.matcher(res); int groupNum = 0; int firstIndex = 0; while(m.find(firstIndex)) { String name = m.group(1); String email1 = m.group(2); contactList.add(new Contact(name,email1)); firstIndex = m.end(); groupNum++; } return contactList; } NodeFilter filter = new AndFilter(new NodeClassFilter(TableTag.class), new HasAttributeFilter("class", "gTable"));//new HasAttributeFilter("class","gTable"); private List<Contact> parseByHtmlParser(String content) throws ParserException{ List<Contact> contactList = new ArrayList<Contact>(); Parser p = new Parser(); p.setInputHTML(content); NodeList nodeList = p.extractAllNodesThatMatch(filter); if(nodeList != null && nodeList.size()!=0){ for(int i=0;i<nodeList.size();i++){ Node node = nodeList.elementAt(i); System.out.println(node.toHtml()); } } return contactList; } public void setLoginUrl(String loginUrl) { this.loginUrl = loginUrl; } public static void main(String[] args) { String content = "<!doctype html><html><head><meta http-equiv=\\"Content-Type\\" content=\\"text/html; charset=utf-8\\" /><meta name=\\"application-name\\" content=\\"网易电子邮箱 - 极速4.0\\" /><link rel=\\"shortcut icon\\" href=\\"http://mimg.127.net/p/images/favicon3.ico\\" type=\\"image/x-icon\\"/><title>网易电子邮箱 - 极速4.0</title><style type=\\"text/css\\">.Patch118-safe-tit{ border-bottom:#DADADA 1px solid; padding:15px 0 25px 86px; position:relative; zoom:1}.Patch118-safe-tit .ico{ position:absolute; left:20px; top:10px}.Patch118-safe-ct{ padding:20px 25px; line-height:22px}</style></head><body style=\\"margin:0;padding:0;overflow:hidden\\" scroll=\\"no\\"><iframe src=\\"index.jsp?sid=zBObqxwciWMxDZiIlwccEFhCuYOLgipm\\" name=\\"index\\" style=\\"width:100%;height:100%;position:absolute\\" frameborder=\\"0\\" border=\\"0\\"></iframe></body></html>"; String res = StringUtil.getByRegex(regex, 1, content); System.out.println(res); } Logger log = Logger.getLogger(Mail163ParserImpl.class); } //该片段来自于http://outofmemory.cn
收藏
0人收藏
- 2012-11-05 21:59:42java获得随机数代码 by 怪兽狂殴奥特曼
- 2014-11-24 10:33:35java web 分页 by 朱凯迪
- 2014-06-01 18:57:33连接池方式测试ehcache 独立server的性能 by 云香水识
- 2014-08-17 21:57:20得到网站的IP地址 by sdcool
- 2014-08-30 13:52:13采用JSCH实现对linux远程操作,执行简单的命令 by clt
- 2014-09-16 13:56:29sina微博删除微博 by 落叶随风
- 2014-10-09 11:54:31用Java实现断点续传(HTTP) by liuyan814
- 2014-10-25 11:48:30java调用wordpress xmlrpc发布图片博客 by Hugh
- 2014-06-01 15:41:49全站压缩 过滤器 by 香格里拉登
- 2014-08-30 14:05:44简单dos聊天工具 by Hugh
- 2014-09-26 10:51:10文件上传类 by 落叶随风
相关聚客文章
- 《HttpClient 官方文档》第三章 HTTP 状态管理
- Java 网络教程: JarURLConnection
- java 网络程序员真的不需要知道 servlet了吗?
- 《HttpClient 官方文档》第五章 Fluent API
- Java网络教程:URL + URLConnection
- 今日力推:阿里巴巴Java开发手册 / 简洁优雅的网络状态提示
- Netty源码注释翻译-Channel类
- 使用java web start网络启动应用程序
- Java网络IO编程
- IO模型解惑
- Java网络编程之HttpURLConnection你了解多少?
- Fail Javax.net.ssl.SSLPeerUnverifiedException:Host