爬取意林杂志&&Python *** 作excel

爬取意林杂志&&Python *** 作excel,第1张

概述前言刘太太对一篇意林的文章印象很深,但现在只记得两句话想着爬完所有意林文章应该能找到于是就写了个小玩意BTW刘太太已经是我npy啦!importreimportrequestsfrombs4importBeautifulSoupimportosdefGetLinkSum():url='https://www.yilinzazhi.com/' 前言

刘太太对一篇意林的文章印象很深,但现在只记得两句话
想着爬完所有意林文章应该能找到
于是就写了个小玩意
BTW
刘太太已经是我npy啦!

import reimport requestsfrom bs4 import BeautifulSoupimport osdef GetlinkSum():    url='https://www.yilinzazhi.com/'    road='td>a'    strHTML=requests.get(url)    soup=BeautifulSoup(strHTML.text,'lxml')#lxml    data = soup.select(road)    #print("read:data[1]:",data[1])    result=[]    for item in data:        result.append({            #'Title':item.get_text(),            'link':item.get('href'),            #'ID':re.findall('\d+',item.get('href'))        })    print('read:',len(result))    #print(result)    return resultdef articalTitle(aim):    for i in range(0,len(aim)):        url2='https://www.yilinzazhi.com/'+aim[i]['link']        date=(aim[i]['link']).split('index.HTML')        #print(date)        strHTML=requests.get(url2)        soup=BeautifulSoup(strHTML.text,'lxml')        data=soup.select('span > a')        #print('next:data[1]',data[0])        for item in data:            url3='https://www.yilinzazhi.com/'+date[0]+item.get('href')            artical(url3)            print('第{0}期第{1}篇'.format(date[0],item.get('href')))        print('i:',i)def artical(url):    strHTML=requests.get(url)    HTML=strHTML.text.encode('iso-8859-1').decode('utf-8')    soup=BeautifulSoup(HTML,'lxml')    data = soup.select('div> p')    #body>div.wrap>div>div.blkContainer>div>h1    str1='异乡的情侣'    str2='共同的兰州'    str3='兰州'    if str(data).find(str1)!=-1:        print(str(data))        print('str1:',url)    if str(data).find(str2)!=-1:        print(str(data))        print('str2:',url)    if str(data).find(str3)!=-1:         print(str(data))         print('str3:',url)    url=url.split('/')    dictionary=url[3]    Title = soup.select('body>div.wrap>div>div.blkContainer>div>h1')    demoTitle=str(Title).split('<h1>')    Titlename=demoTitle[1].split('</h1>')    path="F:\\PyCode\\Notes\\Yilin\\{0}".format(dictionary)    isExists=os.path.exists(path)    if not isExists:        os.makedirs(path)    with open("F:\\PyCode\\Notes\\Yilin\\{0}\\{1}.txt".format(dictionary,Titlename[0]),"w",enCoding="utf-8") as f:        sentencese=str(data).split('<p>')        for item in sentencese:            demo=item.split('</p>')            for i in demo:                f.write(i)                f.write("\n")if __name__ == '__main__':        status=GetlinkSum()    aim=articalTitle(status)
import xlwtdef ReadParameter():    JMTaiJiaNum=int(input("请输入加密段台架个数:"))    JMSFNum=int(input("请输入加密段每台架水阀数:"))    CGTaiJiaNum=int(input("请输入常规冷却段台架个数:"))    CGSFNum=int(input("请输入常规冷却段每台架水阀数:"))    WTTaiJiaNum=int(input("请输入微调段台架个数:"))    WTSFNum=int(input("请输入微调段每台架水阀个数:"))    print("加密段台架个数:{0},水阀个数{1}".format(JMTaiJiaNum,JMSFNum))    print("常规冷却段台架个数:{0},水阀个数{1}".format(CGTaiJiaNum,CGSFNum))    print("微调段台架个数:{0},水阀个数{1}".format(WTTaiJiaNum,WTSFNum))    demo=[[JMTaiJiaNum,JMSFNum],[CGTaiJiaNum,CGSFNum],[WTTaiJiaNum,WTSFNum]]    return demodef change8(demo):    demo=demo+1    if demo>=8:        demo=0    return demodef change16(demo):    demo=demo+1    if demo>16:        demo=1    return demodef First(base):    xls = xlwt.Workbook()    sht1 = xls.add_sheet('L1->L2(CTC)',cell_overwrite_ok=True)    sht2 = xls.add_sheet('L2->L1(CTC)',cell_overwrite_ok=True)    tittle=['Group name','Pin Comment','管脚注释','Type','Unit','Offset','L2 Var name','Wincc Var name','Note']    firstdata=['','数据包ID','','INT','','0','','','IOMASTER读配置文件,自动添加']    sht1.col(0).wIDth=200*20    sht1.col(1).wIDth=450*20    sht1.col(6).wIDth=450*20    sht1.col(7).wIDth=450*20    sht1.col(8).wIDth=400*20    # style=XFStyle()    # pattern=Pattern()    # fnt=Font()    startpoint=5    offset=2    spr=0    bit=0    CTCL2=1    sht1.write_merge(startpoint-5,startpoint-5,0,8,                     'DC TO CTC Data.Communication table(CTC)')    sht1.write_merge(startpoint-4,startpoint-4,0,8,                     'CTC IP:10.{0}.10.161 端口:1611 二级服务器主机IP:10.{0}.10.184 端口:4611 数据长度:224 byte ID:1 DB:159'.format(24))    sht1.write_merge(startpoint-3,startpoint-3,0,8,                     'CTC IP:10.{0}.10.161 端口:2611 二级服务器主机IP:10.{0}.10.186 端口:4611 数据长度:224 byte ID:1 DB:159'.format(24))    #标题    for i in range(0,len(tittle)):        sht1.write(startpoint-2,i,tittle[i])        sht1.write(startpoint-1,i,firstdata[i])    #上喷信号    for i in range(0,base[0][0]+base[1][0]+base[2][0]):        sht1.write_merge(startpoint+i*16,startpoint+i*16+15,0,0,'')        if i<base[0][0]:            for h in range(0,base[0][1]):                sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))                if bit==0:                    sht1.write(startpoint+16*i+h,3,'BYTE')                    sht1.write(startpoint+16*i+h,5,offset)                    offset=offset+1                else:                    sht1.write(startpoint+16*i+h,3,'---')                sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))                bit=change8(bit)                sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))                spr=spr+1                sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_top.spr{1}'.format(i+1,CTCL2))                CTCL2=change16(CTCL2)            for demo in range(base[0][1],16):                sht1.write(startpoint+16*i+demo,1,'预留')                if bit==0:                    sht1.write(startpoint+16*i+demo,3,'BYTE')                    sht1.write(startpoint+16*i+demo,5,offset)                    offset=offset+1                else:                    sht1.write(startpoint+16*i+demo,3,'---')                sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))                bit=change8(bit)                sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_top.spr{1}'.format(i+1,CTCL2))                CTCL2=change16(CTCL2)        elif i<base[0][0]+base[1][0]:            for h in range(0,base[1][1]):                sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))                if bit==0:                    sht1.write(startpoint+16*i+h,3,'BYTE')                    sht1.write(startpoint+16*i+h,5,offset)                    offset=offset+1                else:                    sht1.write(startpoint+16*i+h,3,'---')                sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))                bit=change8(bit)                sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))                spr=spr+1                sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_top.spr{1}'.format(i+1,CTCL2))                CTCL2=change16(CTCL2)            for demo in range(base[1][1],16):                sht1.write(startpoint+16*i+demo,1,'预留')                if bit==0:                    sht1.write(startpoint+16*i+demo,3,'BYTE')                    sht1.write(startpoint+16*i+demo,5,offset)                    offset=offset+1                else:                    sht1.write(startpoint+16*i+demo,3,'---')                sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))                bit=change8(bit)                sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_top.spr{1}'.format(i+1,CTCL2))                CTCL2=change16(CTCL2)        elif i<base[0][0]+base[1][0]+base[2][0]:            for h in range(0,base[2][1]):                sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))                if bit==0:                    sht1.write(startpoint+16*i+h,3,'BYTE')                    sht1.write(startpoint+16*i+h,5,offset)                    offset=offset+1                else:                    sht1.write(startpoint+16*i+h,3,'---')                sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))                bit=change8(bit)                sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))                spr=spr+1                sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_top.spr{1}'.format(i+1,CTCL2))                CTCL2=change16(CTCL2)            for demo in range(base[2][1],16):                sht1.write(startpoint+16*i+demo,1,'预留')                if bit==0:                    sht1.write(startpoint+16*i+demo,3,'BYTE')                    sht1.write(startpoint+16*i+demo,5,offset)                    offset=offset+1                else:                    sht1.write(startpoint+16*i+demo,3,'---')                sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))                bit=change8(bit)                sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_top.spr{1}'.format(i+1,CTCL2))                CTCL2=change16(CTCL2)    #下喷信号    #name=input("请输入项目名称:")    name="DaDongHai"    xls.save('./层冷L1-L2通讯变量表({0}).xls'.format(name))if __name__ == '__main__':    # base=ReadParameter()    # print(base)    base=[[8,10],[8,4],[2,8]]    First(base)
总结

以上是内存溢出为你收集整理的爬取意林杂志&&Python *** 作excel全部内容,希望文章能够帮你解决爬取意林杂志&&Python *** 作excel所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/1184558.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-06-03
下一篇 2022-06-03

发表评论

登录后才能评论

评论列表(0条)

保存