虽然结果出来了,但是,由于数据太少,看不出什么。所以,我们把机器学习的重点,放在评论上面聚类三个范围
#!/usr/bin/env Python3
# -*- coding: utf-8 -*-
# @Software: PyCharm
# @virtualenv:workon
# @contact: contact information
# @Desc:Code descripton
__author__ = '未昔/AngelFate/1040691703@qq'
__date__ = '2019/8/24 22:53'
k = 3
iteration = 500
model = KMeans(n_clusters=k, n_jobs=1, max_iter=iteration)
y = model.fit_predict(x)
label_pred = model.labels_
centroids = model.cluster_centers_ #获取聚类中心
inertia = model.inertia_
print('y:\n',y)
print('label_pred:\n',label_pred)
print('centroids:\n',centroids)
print('inertia:\n',inertia)
print('----分类结果----:')
result = list(zip(y, x))
for i in result:
print(i)
# 简单打印结果
r1 = pd.Series(model.labels_).value_counts()
r2 = pd.DataFrame(model.cluster_centers_) # 将二维数组格式的cluster_centers_转换为DataFrame格式
print('r2: \n', r2)
r = pd.concat([r2, r1], axis=1) 默认从0开始
r.columns = data2.columns.tolist() ['类别数目'] # 重命名表头
print('r: \n', r)
output_data = pd.concat([data2, pd.Series(model.labels_, index=data2.index)], axis=1)
output_data.columns = list(data2.columns) ['聚类类别'] # 重命名表头
# output_data.to_excel(output_path) # 保存结果
# 使用TSNE进行数据降维并展示聚类结果
tsne = TSNE()
tsne.fit_transform(data2)
# tsne.embedding_可以获得降维后的数据
print('tsne.embedding_: \n', tsne.embedding_)
tsn = pd.DataFrame(tsne.embedding_, index=data.index)
print('tsne: \n', tsne)
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
color_style = ['r.', 'go', 'b*']
for i in range(k):
d = tsn[output_data[u'聚类类别'] == i]
plt.plot(d[0], d[1], color_style[i], label='聚类' str(i 1))
plt.legend()
plt.show()
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
聚类结果
分类结果----:
(2, array([2.01000e 04, 1.28361e 05, 3.00000e 01, 2.21753e 04]))
(1, array([2.301119e 04, 1.567950e 05, 3.900000e 01, 3.459470e 04]))
(1, array([2.867732e 04, 1.667870e 05, 4.700000e 01, 4.088600e 04]))
(1, array([1.88020e 04, 1.69018e 05, 3.10000e 01, 2.33768e 04]))
(1, array([1.934163e 04, 1.783780e 05, 3.000000e 01, 2.363760e 04]))
(1, array([1.964646e 04, 1.846340e 05, 3.000000e 01, 2.368560e 04]))
(2, array([1.764494e 04, 1.531700e 05, 3.200000e 01, 3.188840e 04]))
(2, array([2.019723e 04, 1.453720e 05, 3.800000e 01, 3.594430e 04]))
(1, array([3.387688e 04, 1.693550e 05, 5.400000e 01, 5.230970e 04]))
(1, array([3.408296e 04, 1.795520e 05, 5.200000e 01, 5.270440e 04]))
(1, array([1.668626e 04, 1.726620e 05, 2.700000e 01, 2.785250e 04]))
(1, array([1.541501e 04, 1.727600e 05, 2.500000e 01, 2.604260e 04]))
(2, array([2.51734e 04, 1.43915e 05, 4.90000e 01, 5.66489e 04]))
(2, array([1.221956e 04, 1.446960e 05, 2.400000e 01, 2.578140e 04]))
(0, array([1.123695e 04, 1.096660e 05, 2.900000e 01, 3.040230e 04]))
(2, array([1.813015e 04, 1.400030e 05, 3.600000e 01, 3.664440e 04]))
(0, array([1.790766e 04, 1.065170e 05, 2.900000e 01, 2.748880e 04]))
(2, array([8.91428e 03, 1.42105e 05, 1.80000e 01, 1.84165e 04]))
(2, array([7.83286e 03, 1.42299e 05, 1.60000e 01, 1.63741e 04]))
(2, array([6.97618e 03, 1.41605e 05, 1.40000e 01, 1.46528e 04]))
(2, array([6.15493e 03, 1.41263e 05, 1.30000e 01, 1.31160e 04]))
(0, array([6.87670e 03, 8.48370e 04, 2.30000e 01, 2.33302e 04]))
(0, array([1.108646e 04, 1.065170e 05, 2.900000e 01, 2.748880e 04]))
(0, array([1.089267e 04, 1.117420e 05, 2.800000e 01, 2.562580e 04]))
(0, array([5.43425e 03, 1.10661e 05, 1.50000e 01, 1.33031e 04]))
(0, array([5.01389e 03, 1.11588e 05, 1.40000e 01, 1.20931e 04]))
标签
y:
[2 1 1 1 1 1 2 2 1 1 1 1 2 2 0 2 0 2 2 2 2 0 0 0 0 0]
123456789101112131415161718192021222324252627282930
聚类中心
centroids:
[[9.77836857e 03 1.05932571e 05 2.38571429e 01 2.28188714e 04]
[2.32821900e 04 1.72215667e 05 3.72222222e 01 3.38988778e 04]
[1.43343530e 04 1.42278900e 05 2.70000000e 01 2.71642100e 04]]
版权所有,未经允许,禁止转载!!!
,