原数据
kmeans = KMeans(n_clusters=3, random_state=0).fit(df)
#可视化
labels = kmeans.labels_
centers = kmeans.cluster_centers_
markers = [‘o’, ‘^’, ‘*’]
colors = [‘r’, ‘b’, ‘y’]
#解决中文显示问题
plt.rcParams[‘font.sans-serif’]=[‘SimHei’]
plt.rcParams[‘axes.unicode_minus’] = False
plt.figure(figsize=(12,8), dpi=500)
plt.xlabel(‘总订单数’)
plt.ylabel(‘云豆数’)
plt.title(“聚类结果”)
for c in range(3):#聚的三类
cluster = df[labels == c]
plt.scatter(cluster[‘Integral’], cluster[‘云豆数’],
marker=markers[c], s=20, c=colors[c])
plt.scatter(centers[:, 0], centers[:, 1],
marker=‘o’, c=“white”, alpha=0.9, s=300)
for i, c in enumerate(centers):
plt.scatter(c[0], c[1], marker=’
%d
’ % i, s=50, c=colors[i])
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)