源码如下: import jieba import io import re #jieba.load_userdict("E:/xinxi2.txt") patton=re.compile(r'..') #添加字典 def add_dict(): f=open("E:/xinxi2.txt","r+",encoding="utf-8") #百度爬取的字典 for line in f: jieba.suggest_freq(li
python 去除停用词 结巴分词 import jieba #stopwords = {}.fromkeys([ line.rstrip() for line in open('stopword.txt') ]) stopwords = {}.fromkeys(['的', '附近']) segs = jieba.cut('北京附近的租房', cut_all=False)final = ''for seg in segs: seg = seg.encode('gbk') if se