def check_flag(flag):
regex = re.compile(r'images\/')
result = True if regex.match(flag) else False
return result
#soup = BeautifulSoup(open('index.html'))
from bs4 import BeautifulSoup
html_content = '''
<a href="https://xxx.com">测试01</a>
<a href="https://yyy.com/123">测试02</a>
<a href="https://xxx.com">测试01</a>
<a href="https://xxx.com">测试01</a>
'''
file = open(r'favour-en.html','r',encoding="UTF-8")
soup = BeautifulSoup(file, 'html.parser')
for element in soup.find_all('img'):
if 'src' in element.attrs:
print(element.attrs['src'])
if check_flag(element.attrs['src']):
#if element.attrs['src'].find("png"):
element.attrs['src'] = "michenxxxxxxxxxxxx" +'/'+ element.attrs['src']
print("##################################")
with open('index.html', 'w',encoding="UTF-8") as fp:
fp.write(soup.prettify()) # prettify()的作⽤是将sp美化⼀下,有可读性
下面是来自ci的下载辅助函数,基本原理是生成一个能下载数据的http header,然后把文件数据发送过去就行啦。应该是可以直接用的,如果还有什么问题,我再看看啊$data = file_get_contents("/path/to/photo.jpg")// 读文件内容
$name = 'myphoto.jpg'
force_download($name, $data)
function force_download($filename = '', $data = '')
{
if ($filename == '' OR $data == '')
{
return FALSE
}
// Try to determine if the filename includes a file extension.
// We need it in order to set the MIME type
if (FALSE === strpos($filename, '.'))
{
return FALSE
}
// Grab the file extension
$x = explode('.', $filename)
$extension = end($x)
$mimes = array( 'bmp' =>array('image/bmp','application/octet-stream'),
'gif' =>array('image/gif','application/octet-stream'),
'jpeg' => array('image/jpeg', 'image/pjpeg', 'application/octet-stream'),
'jpg' => array('image/jpeg', 'image/pjpeg', 'application/octet-stream'),
'jpe' => array('image/jpeg', 'image/pjpeg', 'application/octet-stream'),
'png' => array('image/png', 'image/x-png', 'application/octet-stream')
)
// Set a default mime if we can't find it
if ( ! isset($mimes[$extension]))
{
$mime = 'application/octet-stream'
}
else
{
$mime = (is_array($mimes[$extension])) ? $mimes[$extension][0] : $mimes[$extension]
}
// Generate the server headers
if (strstr($_SERVER['HTTP_USER_AGENT'], "MSIE"))
{
header('Content-Type: "'.$mime.'"')
header('Content-Disposition: attachmentfilename="'.$filename.'"')
header('Expires: 0')
header('Cache-Control: must-revalidate, post-check=0, pre-check=0')
header("Content-Transfer-Encoding: binary")
header('Pragma: public')
header("Content-Length: ".strlen($data))
}
else
{
header('Content-Type: "'.$mime.'"')
header('Content-Disposition: attachmentfilename="'.$filename.'"')
header("Content-Transfer-Encoding: binary")
header('Expires: 0')
header('Pragma: no-cache')
header("Content-Length: ".strlen($data))
}
exit($data)
}
<a href="picName.jpg" id=pic1 onclick="savepic()return false" style="cursor:hand">点击下载</a><script type="text/javascript">
function savepic() {
if (document.all.a1 == null) {
objIframe = document.createElement("IFRAME")
document.body.insertBefore(objIframe)
objIframe.outerHTML = "<iframe name=a1 style='width:400pxhieght:300px' src=" + imageName.href + "></iframe>"
re = setTimeout("savepic()", 1)
}
else {
clearTimeout(re)
pic = window.open(imageName.href, "a1")
pic.document.execCommand("SaveAs")
document.all.a1.removeNode(true)
}
}
</script>
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)