1、使用的是tess4j识别验证码;
2、使用jsoup模拟浏览器登录请求。
package com.test.tess; import com.alibaba.fastjson.JSONObject; import com.fasterxml.jackson.core.SerializableString; import com.jst.tess.constants.Constants; import com.jst.tess.util.FileUtils; import net.sourceforge.tess4j.ITesseract; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import org.apache.struts2.ServletActionContext; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.web.context.request.RequestContextHolder; import org.springframework.web.context.request.ServletRequestAttributes; import sun.net.www.http.HttpClient; import javax.servlet.ServletException; import javax.servlet.ServletRequest; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class test4 extends HttpServlet{ //登录链接 private static String baseUrl = "http://192.168.0.20:8080/test/login.jsp"; //验证码保存路径 private static String verCodePath = "D:\img\codeimg"; //验证码请求地址 private static String codeimgurl = "http://192.168.0.20:8080/test/login/getCode.do"; //登录地址 private static String loginUrl = "http://192.168.0.20:8080/test/login/login.do"; //注销地址 private static String logoutUrl = "http://192.168.0.20:8080/test/login/logout.do"; //测试数据列表路径 private static String listUrl = "http://192.168.0.20:8080/test/testList/getList.do"; //测试数据详情路径 private static String getoneUrl = "http://192.168.0.20:8080/test/testView/view.do"; //用户名 private static String userName = "test"; //密码 private static String passWord = "96af831e99ef1788b04c84d0a7782e855d700d4d6e7938722cfbcbaa"; //判断是否进入首页标识,根据id属性获取 private static String ifIndexPage = "index-menu"; //全局session信息 private static String baseSessions =""; public static void main(String[] args) throws IOException, TesseractException { // login(); // getList(); // getOne("9"); } public Map login(String url, String user, String pwd, String tess4jpath) { System.out.println("begin:"); Mapmap = null; Connection.Response LoginResponse = null; try { LoginResponse = Jsoup.connect(url).method(Connection.Method.GET).execute(); map = LoginResponse.cookies();//获取会话,登录后需要保持会话 String sessName = "JSESSIONID"; String sessions = (String) map.get("JSESSIONID"); System.out.println("sessions="+sessions); // System.out.println("map1:"+map.toString()); // document document = LoginResponse.parse(); // Element element = document.getElementById("varifyCodeImg"); // String codeimgurl2 = element.attr("id"); // System.out.println("222222:"+codeimgurl2); String codeimgurl = "http://192.168.0.37:8080/test/login/getCode.do"; String connectPath = "http://192.168.0.37:8080/test/login/login.do"; String codeimgpath = tess4jpath+"\codeimg"; //下载验证码图片 byte[] codeimgdata = Jsoup.connect(codeimgurl).header("cookie",sessName + "=" + sessions).ignoreContentType(true).execute().bodyAsBytes(); FileUtils.saveImg(codeimgdata, codeimgpath, "codeimg.jpg"); //识别样本输出地址 String ocrResult = codeimgpath+"\codetmpimgtmp.jpg"; String OriginalImg = codeimgpath+"\codeimg.jpg"; //去噪点 FileUtils.removeBackground(OriginalImg, ocrResult); ITesseract instance =new Tesseract(); instance.setDatapath(tess4jpath); //获得Tesseract的文字库 URL url2 = ClassLoader.getSystemResource("tessdata"); String tesspath = url2.getPath().substring(1); instance.setDatapath(tesspath);//进行读取,默认是英文,如果要使用中文包,加上instance.setLanguage("chi_sim"); File imgDir =new File(OriginalImg); String code = instance.doOCR(imgDir);//识别验证码 code = replaceBlank(code); System.out.println("codeLength:"+code.length()+",code:"+code); Map datas = new HashMap(); datas.put("username", user); datas.put("loginkey", pwd); datas.put("verifycode",code); // Connection.Response connection = Jsoup.connect(connectPath).header("cookie",sessName + "=" + sessions).data(datas).execute(); // connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,* public static void creatDir(String path) { File file = new File(path); if(!file.exists()) { file.mkdirs(); } } public static void removeBackground(String imgUrl, String resUrl){ //定义一个临界阈值 int threshold = 400; try{ BufferedImage img = ImageIO.read(new File(imgUrl)); int width = img.getWidth(); int height = img.getHeight(); for(int i = 1;i < width;i++){ for (int x = 0; x < width; x++){ for (int y = 0; y < height; y++){ Color color = new Color(img.getRGB(x, y)); //System.out.println("red:"+color.getRed()+" | green:"+color.getGreen()+" | blue:"+color.getBlue()); int num = color.getRed()+color.getGreen()+color.getBlue(); if(num >= threshold){ img.setRGB(x, y, Color.WHITE.getRGB()); } } } } for(int i = 1;i 部分代码参考自:Java识别验证码和图像处理_梁康h的博客-CSDN博客
Java 爬虫之识别图片验证码后登录_JavaBigADog的博客-CSDN博客
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)