csdn 涨粉攻略 代码

csdn 涨粉攻略 代码,第1张

获取粉丝名 

package com.web;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.JsonPathSelector;


public class UsernameSpider implements PageProcessor {

    /*
https://blog.csdn.net/community/home-api/v2/get-fans-list?page=1&pageSize=100&id=0&noMore=true&blogUsername= + username;
     */

    private final String TAG = UsernameSpider.class.getSimpleName();

    List usernames = new ArrayList<>();

    private Site site = Site
            .me()
            .setDomain("blog.csdn.net")
            .setSleepTime(1000)
            // 便于测试,休眠较长时间。
            .setUserAgent(
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");

    @Override
    public void process(Page page) {
        String rawText = page.getRawText();
        String data = new JsonPathSelector("$.data").select(rawText);
        String replace = data.replace("{list=", "{\"list\":");
//        String selectStr = new JsonPathSelector("$.list").select(replace);
        List selectList = new JsonPathSelector("$.list").selectList(replace);
        for (String s : selectList) {
            String username = getUsername(s);
            usernames.add(username);
        }
    }

    public String getUsername(String line) {
        String s = "\"username\": \"m0_70555190\"";
        String pattern = "(\"username\":\".+?\")";

        // 创建 Pattern 对象
        Pattern r = Pattern.compile(pattern);

        // 现在创建 matcher 对象
        Matcher m = r.matcher(line);
        if (m.find()) {
            String username = m.group(0);
            String replace = username.replace("\"username\":\"", "");
            String result = replace.replace("\"", "");
            return result;
        }

        return "";
    }

    @Override
    public Site getSite() {
        return site;
    }

}

测试类

/*
 * 

文件名称: T2.java

*

项目描述: ALIPAY 交易系统

*

公司名称: 杭州市阿里科技股份有限公司

*

版权所有: (C) 2022-2023

*/ package com.web; import java.util.List; import us.codecraft.webmagic.Spider; /** * @author globalcoding * @since 2022/5/9 15:28 */ public class T2 { public static void main2(String[] args) { String username = "weixin_45781381"; String fanUrl = "https://blog.csdn.net/community/home-api/v2/get-fans-list" + "?page=1&pageSize=100&id=0&noMore=true&blogUsername="+username; CsdnUsernameSpider csdnUsernameSpider = new CsdnUsernameSpider(); Spider.create(csdnUsernameSpider).addUrl(fanUrl).thread(1).run(); List usernames = csdnUsernameSpider.usernames; System.out.println(usernames); } }

欢迎分享,转载请注明来源:内存溢出

原文地址: https://outofmemory.cn/langs/905969.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-15
下一篇 2022-05-15

发表评论

登录后才能评论

评论列表(0条)

保存