#字符串数字化:
def str_num(data):
lis = data.columns
for c in lis:
lis = df[f'{c}'].unique().tolist()
dict1 = {i:v for i,v in
zip(lis,range(1,len(lis)+1))}
df[f'{c}'] = df[f'{c}'].map(dict1)
#将数据集进行拆分成训练数据和测试数据
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.2)
#随机打乱
from sklearn.utiles import shuffle
np.random.shuffle(indexs)
画边界图
#分界图需要的数据: X , Y , XY
def get_XY(data):
x = np.linspace(data[:,0].min(), data[:,0].max(), 1000)
y = np.linspace(data[:,1].min(), data[:,1].max(), 1000)
X, Y = np.meshgrid(x, y)
XY = np.c_[X.ravel(), Y.ravel()]
return X,Y,XY
X,Y,XY = get_XY(data)
axes.pcolormesh(X,Y,y.reshape(1000,1000),shading='auto')
#搜索引擎模式,把长词继续划分的更细
jieba.lcut_for_search(s)
#全模式,尽可能的把所有的词找出来
jieba.lcut(s,cut_all=True)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)