【Python】Python获取TIOBE排行榜绘制图表及词云

【Python】Python获取TIOBE排行榜绘制图表及词云,第1张

概述✨TIOBETIOBE排行榜是根据互联网上有经验的程序员、课程和第三方厂商的数量,并使用搜索引擎(如Google、Bing、Yahoo!)以及Wikipedia、Amazon、YouTube统计出排名数据,只是反映某个编程语言的热门程度,并不能说明一门编程语言好不好,或者一门语言所编写的代码数量多少。TIOBE开发语言排 ✨TIOBE

TIOBE排行榜是根据互联网上有经验的程序员、课程和第三方厂商的数量,并使用搜索引擎(如Google、Bing、Yahoo!)以及Wikipedia、Amazon、YouTube统计出排名数据,只是反映某个编程语言的热门程度,并不能说明一门编程语言好不好,或者一门语言所编写的代码数量多少。

TIOBE开发语言排行榜每月更新一次,依据的指数是基于世界范围内的资深软件工程师和第三方供应商提供,其结果作为当前业内程序开发语言的流行使用程度的有效指标。

该指数可以用来检阅开发者的编程技能能否跟上趋势,或是否有必要作出战略改变,以及什么编程语言是应该及时掌握的。观察认为,该指数反应的虽并非当前最流行或应用最广的语言,但对世界范围内开发语言的走势仍具有重要参考意义。


✨目标网站

https://www.tiobe.com/tiobe-index/


✨目标数据
TIOBE Index for April 2021


Very Long Term History


✨需求分析

⭐利用Python解析页面信息⭐

⭐分析相关数据绘制图表

⭐保存相关榜单信息⭐


✨绘制图表效果
top20 编程语言使用率

top20 编程语言变化趋势

各类编程语言长期排名变化情况

✨绘制词云效果
top20 编程语言使用率词云

✨代码实现
import requestsfrom requests.exceptions import RequestExceptionfrom lxml import etreeimport matplotlib.pyplot as pltimport wordcloud# 获取HTMLdef getHTMLText(url):    headers = {        'user-agent': 'Mozilla/5.0 (windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'    }    try:        r = requests.get(url, headers=headers)        r.raise_for_status()        r.enCoding = r.apparent_enCoding        return r.text    except RequestException as e:        print('error', e)# 解析HTMLdef analyzeRankHTML(HTML):    HTML = etree.HTML(HTML, etree.HTMLParser())    uhead = HTML.xpath('//table[contains(@class,"table-top20")]/thead/tr//text()')    uList = HTML.xpath('//table[contains(@class,"table-top20")]/tbody/tr//text()')    for i in range(7):        uhead.remove('\r\n')    uhead.pop(2)    # print(uhead)    # print(uList)    return uhead, uList# 数据清洗 1def dataPreTreat1(uinfo: List) -> List:    language = []    ratings = []    change = []    for i in range(len(uinfo)):        if i % 5 == 2:            language.append(uinfo[i])        elif i % 5 == 3:            ratings.append(uinfo[i])        elif i % 5 == 4:            change.append(uinfo[i])        else:            pass    ratings_data = []    for i in ratings:        ratings_data.append(float(i.rstrip('%')) / 100)    change_data = []    for i in change:        change_data.append(float(i.rstrip('%')) / 100)    return language, ratings_data, change_data# 打印排行信息def printRank(uhead, uList):    for i in range(len(uhead)):        if i == 2:            print('{:^25}\t'.format(uhead[i]), end='')        else:            print('{:^10}\t'.format(uhead[i]), end='')    print()    tplt = "{0:^10}\t{1:^10}\t{2:^25}\t{3:^10}\t{4:^10}"    j = 0    for i in range(20):        if i == 0:            print(tplt.format(uList[0], uList[1], uList[2], uList[3], uList[4]))        else:            print(tplt.format(uList[j], uList[j + 1], uList[j + 2], uList[j + 3], uList[j + 4]))        j += 5    print()# 保存top20编程语言情况def saveRank(uhead, uList):    with open('top20 编程语言情况.txt', 'w+') as f:        for i in range(len(uhead)):            if i == 2:                f.write('{:^25}\t'.format(uhead[i]))            else:                f.write('{:^10}\t'.format(uhead[i]))        f.write('\n')        tplt = "{0:^10}\t{1:^10}\t{2:^25}\t{3:^10}\t{4:^10}\t{5:}"        j = 0        for i in range(20):            if i == 0:                f.write(tplt.format(uList[0], uList[1], uList[2], uList[3], uList[4], '\n'))            else:                f.write(tplt.format(uList[j], uList[j + 1], uList[j + 2], uList[j + 3], uList[j + 4], '\n'))            j += 5    print('top20 编程语言情况.txt 已保存...\n')# 获取各类编程语言长期排名情况def analyzeVlthHTML(HTML):    HTML = etree.HTML(HTML, etree.HTMLParser())    vlth = HTML.xpath('//table[contains(@ID,"VLTH")]//tr//text()')    vhead = []    for i in range(9):        vhead.append(vlth.pop(0))    # print(vhead)    # print(vlth)    return vhead, vlth# 打印长期排行信息def printLongTermHistoryRank(vhead, vlth):    for i in range(len(vhead)):        if i == 0:            print('{0:^25}\t'.format(vhead[i]), end='')        else:            print('{:^5}\t'.format(vhead[i]), end='')    print()    tplt = "{0:^25}\t{1:^5}\t{2:^5}\t{3:^5}\t{4:^5}\t{5:^5}\t{6:^5}\t{7:^5}\t{8:^5}"    j = 0    for i in range(13):        if i == 0:            print(tplt.format(vlth[0], vlth[1], vlth[2], vlth[3], vlth[4], vlth[5], vlth[6], vlth[7], vlth[8]))        else:            print(tplt.format(vlth[j], vlth[j + 1], vlth[j + 2], vlth[j + 3], vlth[j + 4], vlth[j + 5], vlth[j + 6],                              vlth[j + 7], vlth[j + 8]))        j += 9    print()# 保存各类编程语言长期排名变化情况def saveLongTermHistoryRank(vhead, vlth):    with open('各类编程语言长期排名变化情况.txt', 'w+') as f:        for i in range(len(vhead)):            if i == 0:                f.write('{0:^25}\t'.format(vhead[i]))            else:                f.write('{:^5}\t'.format(vhead[i]))        f.write('\n')        tplt = "{0:^25}\t{1:^5}\t{2:^5}\t{3:^5}\t{4:^5}\t{5:^5}\t{6:^5}\t{7:^5}\t{8:^5}\t{9:}"        j = 0        for i in range(13):            if i == 0:                f.write(                    tplt.format(vlth[0], vlth[1], vlth[2], vlth[3], vlth[4], vlth[5], vlth[6], vlth[7], vlth[8], '\n'))            else:                f.write(                    tplt.format(vlth[j], vlth[j + 1], vlth[j + 2], vlth[j + 3], vlth[j + 4], vlth[j + 5], vlth[j + 6],                                vlth[j + 7], vlth[j + 8], '\n'))            j += 9    print('各类编程语言长期排名情况.txt 已保存...\n')# 数据清洗 2def dataPreTreat2(vhead, vlth: List) -> List:    month = []    for i in range(len(vhead)):        month.append(vhead.pop(0))    month.pop(0)    month.sort()    # print(month)    vlth = [40 if i == '-' else i for i in vlth]    tmp = []    data = []    for i in range(13):        for j in range(9):            tmp.append(vlth.pop(0))        # print(tmp)        data.append(tmp)        tmp = []    # print(data)    language = []    for i in data:        language.append(i.pop(0))    data = [[float(j) for j in i] for i in data]    # print(language)    # print(month)    # print(data)    return language, month, data# 绘制图表def drawPic(ratings_data, change_data):    print('开始绘制->top20 编程语言使用率...\n')    # 解决Title中文乱码    plt.rcParams['Font.sans-serif'] = ['SimHei']  # windows    # plt.rcParams['Font.sans-serif'] = ['Arial Unicode MS']  # macOS    plt.rcParams['axes.unicode_minus'] = False    # 输出所有可使用style    # print(plt.style.available)    # 设置绘图style    plt.style.use('bmh')    plt.figure(figsize=(12, 8))    plt.pIE(x=ratings_data, explode=ratings_data, labels=language, autopct='%1.1f%%', normalize=True, shadow=False,            startangle=150)    plt.Title("top20 编程语言使用率")    plt.legend(loc='best', Fontsize='x-small')    plt.savefig('top20 编程语言使用率.png')    plt.show()    print('top20 编程语言使用率.png 已保存...\n')    print('开始绘制->top20 编程语言变化趋势...\n')    color = []    for i in change_data:        if i >= 0:            color.append('red')        else:            color.append('green')    plt.figure(figsize=(16, 10))    bar_wIDth = 0.5    plt.bar(language, change_data, bar_wIDth,            color=color,            align="center", label="change", Alpha=0.5)    plt.xlabel("Programming Language")    plt.ylabel("Percentage")    plt.xticks(rotation=30)    plt.Title("top20 编程语言变化趋势")    plt.savefig('top20 编程语言变化趋势.png')    plt.show()    print('top20 编程语言变化趋势.png 已保存...\n')# 绘制词云图def drawWordcloud(language, ratings):    print('开始绘制词云图...\n')    ratings = [i * 1000 for i in ratings]    # print(ratings)    words = []    for i in range(len(language)):        for j in range(int(ratings[i])):            words.append(language[i])    txt = ",".join(words)    # print(txt)    w = wordcloud.WordCloud(wIDth=800, height=600, background_color="white",                            max_words=15,                            collocations=False                            )    w.generate(txt)    w.to_file("top20 编程语言使用率词云.png")    print('top20 编程语言使用率词云.png 已保存...\n')# 绘制折线图def drawlineChart(language, month, data):    print('开始绘制->各类编程长期排名情况...\n')    # print(month)    # print(language)    # 解决Title中文乱码    plt.rcParams['Font.sans-serif'] = ['SimHei']  # windows    # plt.rcParams['Font.sans-serif'] = ['Arial Unicode MS']  # macOS    plt.rcParams['axes.unicode_minus'] = False    # 输出所有可使用style    # print(plt.style.available)    # 设置绘图style    plt.style.use('bmh')    for i in range(len(data)):        # print(data[i])        plt.plot(month, data[i], label=language[i])    plt.Title("各类编程语言长期排名变化情况")    plt.legend(loc='best', Fontsize='x-small')    plt.ylim(0, 40)  # 设置y轴    plt.gca().invert_yaxis()  # y轴逆序    plt.savefig('各类编程语言长期排名变化情况.png')    plt.show()    print('各类编程语言长期排名变化情况.png 已保存...\n')if __name__ == '__main__':    url = 'https://www.tiobe.com/tiobe-index/'    print('正在解析页面信息...请稍等...\n')    HTML = getHTMLText(url)    uhead, uinfo = analyzeRankHTML(HTML)    language, ratings, change = dataPreTreat1(uinfo)    print('解析:https://www.tiobe.com/tiobe-index/ 获取到 top20 编程语言情况如下:\n')    printRank(uhead, uinfo)    saveRank(uhead, uinfo)    drawPic(ratings, change)    drawWordcloud(language, ratings)    print('正在解析各类编程语言长期排名情况...请稍等...\n')    vhead, vlth = analyzeVlthHTML(HTML)    language, month, data = dataPreTreat2(vhead, vlth)    print('解析:https://www.tiobe.com/tiobe-index/ 获取到 各类编程语言长期排名情况如下:\n')    printLongTermHistoryRank(vhead, vlth)    saveLongTermHistoryRank(vhead, vlth)    drawlineChart(language, month, data)@H_404_89@
✨参考及引用

https://baike.baidu.com/item/Tiobe/2830870

https://blog.csdn.net/handuoduo123/article/details/105319321


⭐转载请注明出处

本文作者:双份浓缩馥芮白

原文链接:https://www.cnblogs.com/Flat-White/p/14704210.html

版权所有,如需转载请注明出处。

总结

以上是内存溢出为你收集整理的【Python】Python获取TIOBE排行榜 绘制图表及词云全部内容,希望文章能够帮你解决【Python】Python获取TIOBE排行榜 绘制图表及词云所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/1187062.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-06-03
下一篇 2022-06-03

发表评论

登录后才能评论

评论列表(0条)

保存