糗事百科案例
import requests
from fake_useragent import UserAgent
import re
url = “https://www.qiushibaike.com/text/page/1/”
headers = {
“User-Agent”: UserAgent().random
}
response = requests.get(url, headers=headers)
info = response.text
infos = re.findall(r’
s* s*(.+)s*’, info)with open(‘duanzi.txt’, ‘a’, encoding=‘utf-8’) as f:
for info in infos:
f.write(info + “nnn”)
beautifulsoup
#!usr/bin/env python
“”"
@author: yifan
@file: 19beautifulsoup.py
@time: 2021/11/12
@desc:
“”"
from bs4 import BeautifulSoup
from bs4.element import Comment
str = ‘’’
print(soup.title)
print(soup.div)
print(soup.div.attrs)
print(soup.div.get(‘class’))
print(soup.div[‘float’])
print(soup.a[‘href’])
print(soup.div.string)
print(type(soup.div.string))
print(soup.div.text)
if type(soup.strong.string) == Comment:
print(soup.strong.string)
print(soup.strong.prettify())
else:
print(soup.strong.text)
print("------------------find_all----------------------")
print(soup.find_all(‘title’))
print(soup.find_all(id=‘title’))
print(soup.find_all(class_=‘info’))
print(soup.find_all(“div”, attrs={‘float’: ‘left’}))
str2 = ‘’’
#!usr/bin/env python
-- coding:utf-8 --“”"
@author: yifan
@file: 正则表达.py
@time: 2021/11/12
@desc:
“”"
import re
str1 = “I Study Python3.6 Everyday”
print("-------------match()-----------------")
m1 = re.match(r’I’, str1)
m2 = re.match(r’w’, str1)
m3 = re.match(r’.’, str1)
m4 = re.match(r’D’, str1)
m5 = re.match(r’i’, str1, re.I)
m6 = re.match(r’S’, str1)
print(m6.group())
print("-------------search()-----------------")
s1 = re.search(r’Study’, str1)
s2 = re.search(r’Sw+’, str1)
s3 = re.search(r’Pw+.d’, str1)
print(s3.group())
print("-------------findall()-----------------")
f1 = re.findall(r’y’, str1)
print(f1)
print("-------------test()-----------------")
str2 = ‘
t1 = re.findall(r’[u4e00-u9fa5]w+’, str2)
t2 = re.findall(r’(.+)’, str2)
t3 = re.findall(r’’, str2)
print(t3)
print("-------------sub()-----------------")
su1 = re.sub(r’
(.+) ’, r’ 1’, str2)print(su1)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)