下载全国城市空气质量历史数据

下载全国城市空气质量历史数据,第1张

概述下载全国城市空气质量历史数据

下面是内存溢出 jb51.cc 通过网络收集整理的代码片段。

内存溢出小编现在分享给大家,也给大家做个参考。

import osimport shutilimport reimport urllib.requesthome = "http://www.tianqihoubao.com"def down2txt(code,tit,url):    # 网页地址    page = urllib.request.urlopen(url).read()    try:        page = page.decode("gbk")    except:        page = page.decode("utf-8")    i_start = page.find("<h1>")    i_end = page.find("</h1>")    t = page[i_start : i_end]    #tit = t.replace("\r\n","").replace("<h1>","").strip(" ")    #print(tit)        # 创建目录    if not os.path.exists(os.getcwd() + "/data/" + code):        os.makedirs(os.getcwd() + "/data/" + code)        # 文件存在则不下载    file = os.getcwd() + "/data/" + code + "/" + tit + ".txt"    if os.path.exists(file):        print("文件已存在:" + tit + ".txt")        return    # 截取表格文本    i_start = page.find('<table')    i_end = page.find('</table>')    page = page[i_start:i_end]    i_start = page.find(">")    page = page[i_start:]    page = page.replace("\r\n","")    #page = page.replace("\r","").replace("\r","")    page = page.replace("</b>","").replace("<b>","")    #page = page.replace(" ","").replace(" ","")    re_c = re.compile(">(.+?)<")    ls = re.findall(re_c,page)        f = open(file,"w")    i = 0    s = ""    for l in ls:        l = l.strip(" ")        if l == "":            continue        s += l + " "        if i == 8:            #print(s)            f.write(s + "\n")        i+=1        if i >= 9:            i = 0            s = ""       f.close()def down_city(name,code):    url = home + "/aqi/" + code + ".HTML"    print(url)    page = urllib.request.urlopen(url).read()    page = page.decode("gbk")    ls = re.findall(re.compile("href='(/aqi/" + code + "-" + ".+?HTML)'"),page)    for l in ls:        url = home + l        tit = l.replace("/aqi/","").replace(".HTML","")        print(url)        down2txt(code,url)        #print(l)if __name__ == "__main__":    url = home + "/aqi/"    page = urllib.request.urlopen(url).read()    try:        page = page.decode("gbk")    except:        page = page.decode("utf-8")        ls = re.findall(re.compile('href="/aqi/(.+?)</a>'),page)    index = 0    for l in ls:        try:            ls2 = l.replace(" ","").replace('.HTML">'," ").strip(" ").split(" ")            if len(ls2) == 2:                index += 1                print( str(index) + "/" + str(len(ls)) + ": " + ls2[0] + " " + ls2[1])                b_down = False                # 查找下载记录                if os.path.exists("data\index.txt"):                    f = open("data\index.txt","r")                    ls3 = f.readlines()                    f.close()                             for l3 in ls3:                        if l3.strip("\n") == ls2[0] + " " + ls2[1]:                            print(ls2[1] + " 已下载")                            b_down = True                            break                                if b_down :                     continue                                down_city(ls2[1],ls2[0])                                # 保存记录                f = open("data\index.txt","a")                f.write(ls2[0] + " " + ls2[1] + "\n")                f.close()        except:            print("error!")    print("finished!")

以上是内存溢出(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

总结

以上是内存溢出为你收集整理的下载全国城市空气质量历史数据全部内容,希望文章能够帮你解决下载全国城市空气质量历史数据所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址: https://outofmemory.cn/langs/1199294.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-06-04
下一篇 2022-06-04

发表评论

登录后才能评论

评论列表(0条)

保存