python英文文本词性分析_python

导入需要的库

from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize

代码：

def getText():
    txt = open('comment.text','r',encoding='utf-8').read()
    txt = txt.lower()
    for ch in r'`~!\'''@#$%^&*()_+-={}[];":,./<>?\|':
        txt = txt.replace(ch," ")
    return txt

txt = getText()
words = pos_tag(word_tokenize(txt))
# print(words)
counts = {}
for word in words:
    counts[word[1]] = counts.get(word[1],0) + 1
items = list(counts.items())
items.sort(key = lambda  x:x[1],reverse=True)
print("该文本含有的词种类以及数量如下：")
for i in range(len(counts)):
    pos,count = items[i]
    print("{0:<10}{1:>5}".format(pos,count))

可以新增查询功能：

a = input("是否需要查询某种词的具体内容？(Y/N)")
while a in ['Y','y']:
    b = input("请输入你要查询的词性：(输入all查看含有的词性种类)")
    while b in ['all','All','ALL']:
        print(counts.keys())
        b = input("请输入你要查询的词性：(输入all查看含有的词性种类)")
    if b in counts.keys():
        for word in words:
            if word.flag == b:
                print(word,end=" ")
        print()
        b = input("是否还要继续查询？(Y/N)")
        if b in ['N','n']:
            break
    else:
        print("你的输入不正确！请重新输入！")
        a = input("是否需要查询某种词的具体内容？(Y/N)")
if a in ['N','n'] or b in ['N','n']:
    print("感谢使用！程序退出")
else:
    print("你的输入有误！程序退出！")

结果如下：

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/langs/943267.html

python英文文本词性分析

发表评论

评论列表（0条）