基于golang的爬虫,爬取QQ邮箱号、链接、手机号、身份z号

基于golang的爬虫,爬取QQ邮箱号、链接、手机号、身份z号,第1张

基于golang的爬虫,爬取QQ邮箱号、链接、手机号、身份z号
爬虫基础方案,基本的接口封装和使用,并为使用并发的处理
代码篇
package main

import (
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"regexp"
)

var (
	// w代表大小写字母+数字+下划线
	reEmail = `\w+@\w+\.\w+`
	// s?有或者没有s
	// +代表出1次或多次
	//\s\S各种字符
	// +?代表贪婪模式
	reLinke  = `href="(https?://[\s\S]+?)"`
	rePhone  = `1[3456789]\d\s?\d{4}\s?\d{4}`
	reIdcard = `[123456789]\d{5}((19\d{2})|(20[01]\d))((0[1-9])|(1[012]))((0[1-9])|([12]\d)|(3[01]))\d{3}[\dXx]`
	reImg    = `https?://[^"]+?(\.((jpg)|(png)|(jpeg)|(gif)|(bmp)))`
)

//抽取的爬邮箱
func GetEmail2(url string)  {
	pageStr := GetPageStr(url)
	re := regexp.MustCompile(reEmail)
	results := re.FindAllStringSubmatch(pageStr,-1)
	for _,result := range results {
		fmt.Println(result)
	}
}
// 爬链接
func GetLink(url string)  {
	pageStr := GetPageStr(url)
	re := regexp.MustCompile(reLinke)
	results := re.FindAllStringSubmatch(pageStr,-1)
	for _,result := range results {
		fmt.Println(result[1])
	}
}

//爬手机号
func GetPhone(url string)  {
	pageStr := GetPageStr(url)
	re := regexp.MustCompile(rePhone)
	results := re.FindAllStringSubmatch(pageStr,-1)
	for _,result := range results {
		fmt.Println(result)
	}
}

//爬身份z号
func GetIdCard(url string)  {
	pageStr := GetPageStr(url)
	re := regexp.MustCompile(reIdcard)
	results := re.FindAllStringSubmatch(pageStr, -1)
	for _, result := range results {
		fmt.Println(result)
	}
}

// 抽取根据url获取内容
func GetPageStr(url string) (pageStr string)  {
	resp,_ := http.Get(url)
	//HandleError(err,"http.Get url")
	defer resp.Body.Close()
	if resp.StatusCode != 200 {
		log.Fatalf("status code error: %d %s", resp.StatusCode, resp.Status)
	}
	// 2.读取页面内容
	pageBytes,_ := ioutil.ReadAll(resp.Body)
	// 字节转字符串
	pageStr = string(pageBytes)
	return pageStr
}

func main()  {
	// 抽取的爬邮箱
	GetEmail2("https://tieba.baidu.com/p/6051076813?red_tag=1573533731")
	// 爬链接
	GetLink("http://www.baidu.com/s?wd=%E8%B4%B4%E5%90%A7%20%E7%95%99%E4%B8%8B%E9%82%AE%E7%AE%B1&rsv_spt=1&rsv_iqid=0x98ace53400003985&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=ib&rsv_sug2=0&inputT=5197&rsv_sug4=6345")
	// 爬手机号
	GetPhone("https://www.zhaohaowang.com/")
	// 爬身份z号
	GetIdCard("https://henan.qq.com/a/20171107/069413.htm")
}
结果:
[1184822807@qq.com]
[1184822807@qq.com]
[598088118@qq.com]
[598088118@qq.com]
[835428013@qq.com]
------
https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F
https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1
http://news.baidu.com
https://www.hao123.com
http://map.baidu.com
http://v.baidu.com
http://tieba.baidu.com
http://xueshu.baidu.com
http://help.baidu.com/question
http://www.baidu.com/search/jubao.html
----
[16050271557]
----
[410222198706134038 1987 1987  06 06  13  13 ]

Process finished with exit code 0

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/996056.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-21
下一篇 2022-05-21

发表评论

登录后才能评论

评论列表(0条)

保存