返回顶部

收藏

Java实现网易163邮箱好友通讯录的解析功能(带源码)

更多

这个源码 我之前开源过,昨晚又做了一下。公开源码。这里讲下思路 以及真实源码实现。我们将使用1个类HttpClient,这个类的基本用法可以参照:

http://www.ibm.com/developerworks/cn/opensource/os-httpclient/

我们会使用httpClient以及Httpwatch工具。

watch可以在http://www.ij2ee.com/what-to-use-to-develop 有下载地址。

package com.thief.parser.impl;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;
import org.apache.log4j.Logger;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import com.thief.parser.IMail163Parser;
import com.thief.po.Contact;
import com.thief.util.HttpUtil;
import com.thief.util.StringUtil;

public class Mail163ParserImpl implements IMail163Parser{
    private String loginUrl;
    private static final String charCode = HTTP.UTF_8;

    public List<Contact> parser(String email, String password) throws HttpException,
            IOException, InterruptedException, URISyntaxException {
        DefaultHttpClient client = new DefaultHttpClient();
        try {
        String loginRes = login(email, password,client);
        return parser(client,loginRes, email);
        } catch (Exception e) {
            // TODO: handle exception
        }finally{
            client.getConnectionManager().shutdown();
        }
        return null;
    }

    public String login(String email, String password, HttpClient client)
            throws IllegalStateException, URISyntaxException, IOException, HttpException, InterruptedException{
        Map<String,String> map = new HashMap<String, String>();
        map.put(".verifycookie", "1");
        map.put("style", "35");
        map.put("product", "mail163");
        map.put("username", email);
        map.put("password", password);
        map.put("selType=", "jy");
        map.put("remUser", "on");
        map.put("secure", "on");

            String res = HttpUtil.doPost(client, loginUrl,map, charCode);
            if (res.indexOf("跳转提示") != -1) {
                HttpGet get = new HttpGet(
                        "http://entry.mail.163.com/coremail/fcg/ntesdoor2?username="+email+"&lightweight=1&verifycookie=1&language=-1&style=-1");
                res = StringUtil.readInputStream(client.execute(get)
                        .getEntity().getContent(), charCode);
            }else if(res.indexOf("errorType")!=-1){
                throw new RuntimeException("帐号或密码错误");
            }
            return res;

    }

    String getUsers="http://tg4a84.mail.163.com/jy3/address/addrprint.jsp?sid=";
    private static String regex = "iframe src=\\"index.jsp\\\\?sid=([^\\"]+)";
        public List<Contact> parser(HttpClient client, String content, String email) throws IllegalStateException, URISyntaxException, IOException, HttpException, InterruptedException {
        //iframe src="index.jsp?sid=zBObqxwciWMxDZiIlwccEFhCuYOLgipm"
        String id = StringUtil.getByRegex(regex, 1, content);
        if(id == null || "".equals(id.trim())){
            throw new RuntimeException("没能获取到关键ID");
        }
        String userJson = getUsers+id;
        Map<String, String> map = new HashMap<String, String>();
        //String res = HttpUtil.doPost(client, userJson,map , charCode);
        log.info(userJson);
        userJson = userJson+"&dd=" + System.currentTimeMillis();
        String res = HttpUtil.doGet(client, userJson, null);
        log.info(res);
        List<Contact> contactList = new ArrayList<Contact>();
        /*try {
            parseByHtmlParser(res);
        } catch (ParserException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        if(1==1 )return contactList;
*/      String aa = "<div class=\\"gTitleSub\\"><div align=\\"left\\"><b class=\\"mTT\\">(.*?)</b></div><div class=\\"Extra\\"></div></div><table class=\\"gTable\\"><tr id=\\"tr_base_0\\" style=\\"\\"><th>邮件地址:</th><td>(.*?)</td></tr>(.*?)</table>";
        Pattern p = Pattern.compile(aa,Pattern.DOTALL);
        Matcher m = p.matcher(res);
        int groupNum = 0;
        int firstIndex = 0;
            while(m.find(firstIndex))
            {
                String name = m.group(1);
                String email1 = m.group(2);
                contactList.add(new Contact(name,email1));
                firstIndex = m.end();
                groupNum++;
            }
        return contactList;
    }

    NodeFilter filter = new AndFilter(new NodeClassFilter(TableTag.class),    new HasAttributeFilter("class", "gTable"));//new HasAttributeFilter("class","gTable");
    private List<Contact> parseByHtmlParser(String content) throws ParserException{
        List<Contact> contactList = new ArrayList<Contact>();
        Parser p = new Parser();
        p.setInputHTML(content);
        NodeList nodeList = p.extractAllNodesThatMatch(filter);
        if(nodeList != null && nodeList.size()!=0){
            for(int i=0;i<nodeList.size();i++){
                Node node =  nodeList.elementAt(i);
                System.out.println(node.toHtml());
            }
        }
        return contactList;
    }

    public void setLoginUrl(String loginUrl) {
        this.loginUrl = loginUrl;
    }

    public static void main(String[] args) {
        String content = "<!doctype html><html><head><meta http-equiv=\\"Content-Type\\" content=\\"text/html; charset=utf-8\\" /><meta name=\\"application-name\\" content=\\"网易电子邮箱 - 极速4.0\\" /><link rel=\\"shortcut icon\\" href=\\"http://mimg.127.net/p/images/favicon3.ico\\" type=\\"image/x-icon\\"/><title>网易电子邮箱 - 极速4.0</title><style type=\\"text/css\\">.Patch118-safe-tit{ border-bottom:#DADADA 1px solid; padding:15px 0 25px 86px; position:relative; zoom:1}.Patch118-safe-tit .ico{ position:absolute; left:20px; top:10px}.Patch118-safe-ct{ padding:20px 25px; line-height:22px}</style></head><body style=\\"margin:0;padding:0;overflow:hidden\\" scroll=\\"no\\"><iframe src=\\"index.jsp?sid=zBObqxwciWMxDZiIlwccEFhCuYOLgipm\\" name=\\"index\\" style=\\"width:100%;height:100%;position:absolute\\" frameborder=\\"0\\" border=\\"0\\"></iframe></body></html>";
        String res = StringUtil.getByRegex(regex, 1, content);
        System.out.println(res);
    }

    Logger log = Logger.getLogger(Mail163ParserImpl.class);
}
//该片段来自于http://outofmemory.cn

标签:java,网络

收藏

0人收藏

支持

0

反对

0

发表评论