java模拟爬虫登录业务系统带传统验证码

java模拟爬虫登录业务系统带传统验证码,第1张

java模拟爬虫登录业务系统带传统验证码

1、使用的是tess4j识别验证码;

2、使用jsoup模拟浏览器登录请求。

package com.test.tess;

import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.SerializableString;
import com.jst.tess.constants.Constants;
import com.jst.tess.util.FileUtils;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.struts2.ServletActionContext;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import sun.net.www.http.HttpClient;

import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class test4 extends HttpServlet{

    //登录链接
    private static String baseUrl = "http://192.168.0.20:8080/test/login.jsp";

    //验证码保存路径
    private static String verCodePath = "D:\img\codeimg";

    //验证码请求地址
    private static String codeimgurl = "http://192.168.0.20:8080/test/login/getCode.do";


    //登录地址
    private static String loginUrl = "http://192.168.0.20:8080/test/login/login.do";

    //注销地址
    private static String logoutUrl = "http://192.168.0.20:8080/test/login/logout.do";

    //测试数据列表路径
    private static String listUrl = "http://192.168.0.20:8080/test/testList/getList.do";

    //测试数据详情路径
    private static String getoneUrl = "http://192.168.0.20:8080/test/testView/view.do";

    //用户名
    private static String userName = "test";

    //密码
    private static String passWord = "96af831e99ef1788b04c84d0a7782e855d700d4d6e7938722cfbcbaa";

    //判断是否进入首页标识,根据id属性获取
    private static String ifIndexPage = "index-menu";

    //全局session信息
    private static String baseSessions ="";

    public static void main(String[] args) throws IOException, TesseractException {
       
//        login();
//        getList();
//        getOne("9");
        
    }

    
    public Map login(String url, String user, String pwd, String tess4jpath) {
        System.out.println("begin:");
        Map map = null;
        Connection.Response LoginResponse = null;
        try {
            LoginResponse = Jsoup.connect(url).method(Connection.Method.GET).execute();
            map = LoginResponse.cookies();//获取会话,登录后需要保持会话
            String sessName = "JSESSIONID";
            String sessions = (String) map.get("JSESSIONID");
            System.out.println("sessions="+sessions);
//            System.out.println("map1:"+map.toString());
//            document document = LoginResponse.parse();
//            Element element = document.getElementById("varifyCodeImg");
//            String codeimgurl2 = element.attr("id");
//            System.out.println("222222:"+codeimgurl2);
            String codeimgurl = "http://192.168.0.37:8080/test/login/getCode.do";
            String connectPath = "http://192.168.0.37:8080/test/login/login.do";
            String codeimgpath = tess4jpath+"\codeimg";
            //下载验证码图片
            byte[] codeimgdata = Jsoup.connect(codeimgurl).header("cookie",sessName + "=" + sessions).ignoreContentType(true).execute().bodyAsBytes();
            FileUtils.saveImg(codeimgdata, codeimgpath, "codeimg.jpg");
            //识别样本输出地址
            String ocrResult = codeimgpath+"\codetmpimgtmp.jpg";
            String OriginalImg = codeimgpath+"\codeimg.jpg";
            //去噪点
            FileUtils.removeBackground(OriginalImg, ocrResult);
            ITesseract instance =new Tesseract();
            instance.setDatapath(tess4jpath);
            //获得Tesseract的文字库
            URL url2 = ClassLoader.getSystemResource("tessdata");
            String tesspath = url2.getPath().substring(1);
            instance.setDatapath(tesspath);//进行读取,默认是英文,如果要使用中文包,加上instance.setLanguage("chi_sim");
            File imgDir =new File(OriginalImg);
            String code = instance.doOCR(imgDir);//识别验证码
            code = replaceBlank(code);
            System.out.println("codeLength:"+code.length()+",code:"+code);
            Map datas = new HashMap();
            datas.put("username", user);
            datas.put("loginkey", pwd);
            datas.put("verifycode",code);
//            Connection.Response connection = Jsoup.connect(connectPath).header("cookie",sessName + "=" + sessions).data(datas).execute();
//            connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*
        public static void creatDir(String path) {
            File file = new File(path);
            if(!file.exists()) {
                file.mkdirs();
            }
        }
        
        public static void removeBackground(String imgUrl, String resUrl){
            //定义一个临界阈值
            int threshold = 400;
            try{
                BufferedImage img = ImageIO.read(new File(imgUrl));
                int width = img.getWidth();
                int height = img.getHeight();
                for(int i = 1;i < width;i++){
                    for (int x = 0; x < width; x++){
                        for (int y = 0; y < height; y++){
                            Color color = new Color(img.getRGB(x, y));
                            //System.out.println("red:"+color.getRed()+" | green:"+color.getGreen()+" | blue:"+color.getBlue());
                            int num = color.getRed()+color.getGreen()+color.getBlue();
                            if(num >= threshold){
                                img.setRGB(x, y, Color.WHITE.getRGB());
                            }
                        }
                    }
                }
                for(int i = 1;i 

部分代码参考自:Java识别验证码和图像处理_梁康h的博客-CSDN博客

Java 爬虫之识别图片验证码后登录_JavaBigADog的博客-CSDN博客

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/4025822.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-10-22
下一篇 2022-10-22

发表评论

登录后才能评论

评论列表(0条)

保存