import numpy as np
import matplotlib.pyplot as plt 
from sklearn.datasets import make_blobs
 
1.数据预处理
 
x, y = make_blobs(n_samples=100, centers=6, random_state=100, cluster_std=0.6)
plt.figure(figsize=(6,6))
plt.scatter(x[:,0], x[:,1],c = y)
plt.show
 
<function matplotlib.pyplot.show(close=None, block=None)>
 

 
2.模型实现
 
from scipy.spatial.distance import cdist
class KMeansModel():
    
    def __init__(self, k_cluster=6, max_iteration=100, centroids=[]):
        self.k_cluster = k_cluster
        self.max_iteration = max_iteration
        self.centroids = np.array(centroids, dtype = np.float32)
        
        
        
    def fit(self, points):
        
        if(self.centroids.shape==(0,)):
            self.centroids = points[np.random.randint(0, points.shape[0], self.k_cluster), :]
        for i in range(self.max_iteration):
            
            distances = cdist(points, self.centroids)
            
            c_index = np.argmin(distances, axis=1)
            if(i == 0):
                print("c shape", c_index.shape,c_index[0])
            
            for i in range(self.k_cluster):
                if i in c_index:
                    self.centroids[i] = np.mean(points[c_index == i], axis=0)
    
    def predict(self, points):
        distances = cdist(points, self.centroids)
        
        
        c_index = np.argmin(distances, axis=1)
        return c_index
 
3.测试
 
def plot_kmeans(x, y, centroids, subplot):
    plt.subplot(subplot)
    plt.scatter(x[:,0], x[:,1], c=y)
    plt.scatter(centroids[:,0], centroids[:,1],s=100,c='r')
kmean_model = KMeansModel(centroids=np.array([[1,1],[2,2],[3,3],[4,4],[5,5],[6,6]]))
plt.figure(figsize=(18,8))
plot_kmeans(x, y, kmean_model.centroids, 121)
           
kmean_model.fit(x)
print(kmean_model.centroids)
plot_kmeans(x, y, kmean_model.centroids, 122)
x_new = np.array([[10,7],[0,0]])
y_predict = kmean_model.predict(x_new)
print("predict y ", y_predict)
plt.scatter(x_new[:,0],x_new[:,1],s=100, c= "black")
 
c shape (100,) 0
[[ 4.343336  -5.112518 ]
 [-1.6609049  6.7436223]
 [-8.57988   -3.3460388]
 [ 2.7469435  6.05025  ]
 [ 2.490612   7.7450833]
 [ 4.1287684  6.6914167]]
predict y  [5 3]
<matplotlib.collections.PathCollection at 0x1576e5a9850>
 
