pip3 install -U spacy -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
python3 -m spacy download en
在终端里输一下就完事了
# import stanfordnlp
# nlp = stanfordnlp.Pipeline()
# doc = nlp("Barack Obama was born in Hawaii. He was elected president in 2008.")
# doc.sentences[0].print_dependencies()
import spacy
s="Everything will be OK in the end,if it's not OK,it's not the end."
nlp=spacy.load("en_core_web_sm")
doc=nlp(s)
# 利用空格分开
print(doc.text.split())
# 利用token的.orth_方法,可以识别标点符号
print([token.orth_ for token in doc])
# 带下划线的方法返回字符、不带下划线的方法返回数字
print([(token, token.orth_, token.orth) for token in doc])
# 分词,去除标点和空格
print([token.orth_ for token in doc if not token.is_punct | token.is_space]) # 标准化到基本形式
#practice = "practice practiced practicing"
#nlp_practice = nlp(practice)
#print([word.lemma_ for word in nlp_practice])
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)