依赖:
commons-io
commons-io
2.11.0
org.jsoup
jsoup
1.14.3
源码:
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import static org.jsoup.Jsoup.connect;
/**
* 爬虫爬取图片
*/
public class Picture {
public static void main(String[] args) throws IOException {
int max = 10;//网站页码
String url; //观察网站页码规律编辑
for (int j = 1; j < max ;j++){
//获取连接
Connection connect = connect(url);
//网页内容
Document document = connect.get();
//通过class属性获取页面元素
Elements titles = document.getElementsByClass("lazy");
System.out.println("ElementsCount:"+titles.size());
List urls = new ArrayList();
for (int i = 0; i < titles.size(); i++) {
System.out.println("src=="+titles.get(i).attr("src"));
//获取src属性内容
urls.add(titles.get(i).attr("src"));
}
int a = 1;
//文件存储位置
File file = new File("F:\pic\weimei\"+new Date().getTime());
if(!file.exists()){
file.mkdirs();
}
for (String prcUrl: urls) {
String geshi = prcUrl.substring(prcUrl.lastIndexOf("."));//获取图片格式
Connection.Response execute = connect(prcUrl).ignoreContentType(true).execute();//获取图片
FileOutputStream out = (new FileOutputStream(new java.io.File(file,a+geshi)));//设置输出流
out.write(execute.bodyAsBytes());
out.close();
a++;
}
}
}
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)