聚类K-Means

import numpy as np

x=np.random.randint(0,52,52)

x

k=3

y=np.zeros(20)

y

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,

        0.,  0.,  0.,  0.,  0.,  0.,  0.])

def intcent(x,k):

    return x[0:k].reshape(k)

kc=intcent(x,k)

kc

array([21,  8, 45])

d=abs(2-kc)

np.where(d==np.min(d))[0][0]

1

def nearest(kc,i):

    d=(abs(kc-i))

    w=np.where(d==np.min(d))

    return w[0][0]

def xclassfy(x,y,kc):

    for i in range(x.shape[0]):

        y[i]=nearst(kc,x[i])

    return y

from sklearn.datasets import load_iris

iris=load_iris()

iris

x=iris.data

x

Out[1]:

array([[5.1, 3.5, 1.4, 0.2],

       [4.9, 3. , 1.4, 0.2],

       [4.7, 3.2, 1.3, 0.2],

       [4.6, 3.1, 1.5, 0.2],

       [5. , 3.6, 1.4, 0.2],

       [5.4, 3.9, 1.7, 0.4],

       [4.6, 3.4, 1.4, 0.3],

       [5. , 3.4, 1.5, 0.2],

       [4.4, 2.9, 1.4, 0.2],

       [4.9, 3.1, 1.5, 0.1],

       [5.4, 3.7, 1.5, 0.2],

       [4.8, 3.4, 1.6, 0.2],

       [4.8, 3. , 1.4, 0.1],

       [4.3, 3. , 1.1, 0.1],

       [5.8, 4. , 1.2, 0.2],

       [5.7, 4.4, 1.5, 0.4],

       [5.4, 3.9, 1.3, 0.4],

       [5.1, 3.5, 1.4, 0.3],

       [5.7, 3.8, 1.7, 0.3],

       [5.1, 3.8, 1.5, 0.3],

       [5.4, 3.4, 1.7, 0.2],

       [5.1, 3.7, 1.5, 0.4],

       [4.6, 3.6, 1. , 0.2],

       [5.1, 3.3, 1.7, 0.5],

       [4.8, 3.4, 1.9, 0.2],

       [5. , 3. , 1.6, 0.2],

       [5. , 3.4, 1.6, 0.4],

       [5.2, 3.5, 1.5, 0.2],

       [5.2, 3.4, 1.4, 0.2],

       [4.7, 3.2, 1.6, 0.2],

       [4.8, 3.1, 1.6, 0.2],

       [5.4, 3.4, 1.5, 0.4],

       [5.2, 4.1, 1.5, 0.1],

       [5.5, 4.2, 1.4, 0.2],

       [4.9, 3.1, 1.5, 0.1],

       [5. , 3.2, 1.2, 0.2],

       [5.5, 3.5, 1.3, 0.2],

       [4.9, 3.1, 1.5, 0.1],

       [4.4, 3. , 1.3, 0.2],

       [5.1, 3.4, 1.5, 0.2],

       [5. , 3.5, 1.3, 0.3],

       [4.5, 2.3, 1.3, 0.3],

       [4.4, 3.2, 1.3, 0.2],

       [5. , 3.5, 1.6, 0.6],

       [5.1, 3.8, 1.9, 0.4],

       [4.8, 3. , 1.4, 0.3],

       [5.1, 3.8, 1.6, 0.2],

       [4.6, 3.2, 1.4, 0.2],

       [5.3, 3.7, 1.5, 0.2],

       [5. , 3.3, 1.4, 0.2],

       [7. , 3.2, 4.7, 1.4],

       [6.4, 3.2, 4.5, 1.5],

       [6.9, 3.1, 4.9, 1.5],

       [5.5, 2.3, 4. , 1.3],

       [6.5, 2.8, 4.6, 1.5],

       [5.7, 2.8, 4.5, 1.3],

       [6.3, 3.3, 4.7, 1.6],

       [4.9, 2.4, 3.3, 1. ],

       [6.6, 2.9, 4.6, 1.3],

       [5.2, 2.7, 3.9, 1.4],

       [5. , 2. , 3.5, 1. ],

       [5.9, 3. , 4.2, 1.5],

       [6. , 2.2, 4. , 1. ],

       [6.1, 2.9, 4.7, 1.4],

       [5.6, 2.9, 3.6, 1.3],

       [6.7, 3.1, 4.4, 1.4],

       [5.6, 3. , 4.5, 1.5],

       [5.8, 2.7, 4.1, 1. ],

       [6.2, 2.2, 4.5, 1.5],

       [5.6, 2.5, 3.9, 1.1],

       [5.9, 3.2, 4.8, 1.8],

       [6.1, 2.8, 4. , 1.3],

       [6.3, 2.5, 4.9, 1.5],

       [6.1, 2.8, 4.7, 1.2],

       [6.4, 2.9, 4.3, 1.3],

       [6.6, 3. , 4.4, 1.4],

       [6.8, 2.8, 4.8, 1.4],

       [6.7, 3. , 5. , 1.7],

       [6. , 2.9, 4.5, 1.5],

       [5.7, 2.6, 3.5, 1. ],

       [5.5, 2.4, 3.8, 1.1],

       [5.5, 2.4, 3.7, 1. ],

       [5.8, 2.7, 3.9, 1.2],

       [6. , 2.7, 5.1, 1.6],

       [5.4, 3. , 4.5, 1.5],

       [6. , 3.4, 4.5, 1.6],

       [6.7, 3.1, 4.7, 1.5],

       [6.3, 2.3, 4.4, 1.3],

       [5.6, 3. , 4.1, 1.3],

       [5.5, 2.5, 4. , 1.3],

       [5.5, 2.6, 4.4, 1.2],

       [6.1, 3. , 4.6, 1.4],

       [5.8, 2.6, 4. , 1.2],

       [5. , 2.3, 3.3, 1. ],

       [5.6, 2.7, 4.2, 1.3],

       [5.7, 3. , 4.2, 1.2],

       [5.7, 2.9, 4.2, 1.3],

       [6.2, 2.9, 4.3, 1.3],

       [5.1, 2.5, 3. , 1.1],

       [5.7, 2.8, 4.1, 1.3],

       [6.3, 3.3, 6. , 2.5],

       [5.8, 2.7, 5.1, 1.9],

       [7.1, 3. , 5.9, 2.1],

       [6.3, 2.9, 5.6, 1.8],

       [6.5, 3. , 5.8, 2.2],

       [7.6, 3. , 6.6, 2.1],

       [4.9, 2.5, 4.5, 1.7],

       [7.3, 2.9, 6.3, 1.8],

       [6.7, 2.5, 5.8, 1.8],

       [7.2, 3.6, 6.1, 2.5],

       [6.5, 3.2, 5.1, 2. ],

       [6.4, 2.7, 5.3, 1.9],

       [6.8, 3. , 5.5, 2.1],

       [5.7, 2.5, 5. , 2. ],

       [5.8, 2.8, 5.1, 2.4],

       [6.4, 3.2, 5.3, 2.3],

       [6.5, 3. , 5.5, 1.8],

       [7.7, 3.8, 6.7, 2.2],

       [7.7, 2.6, 6.9, 2.3],

       [6. , 2.2, 5. , 1.5],

       [6.9, 3.2, 5.7, 2.3],

       [5.6, 2.8, 4.9, 2. ],

       [7.7, 2.8, 6.7, 2. ],

       [6.3, 2.7, 4.9, 1.8],

       [6.7, 3.3, 5.7, 2.1],

       [7.2, 3.2, 6. , 1.8],

       [6.2, 2.8, 4.8, 1.8],

       [6.1, 3. , 4.9, 1.8],

       [6.4, 2.8, 5.6, 2.1],

       [7.2, 3. , 5.8, 1.6],

       [7.4, 2.8, 6.1, 1.9],

       [7.9, 3.8, 6.4, 2. ],

       [6.4, 2.8, 5.6, 2.2],

       [6.3, 2.8, 5.1, 1.5],

       [6.1, 2.6, 5.6, 1.4],

       [7.7, 3. , 6.1, 2.3],

       [6.3, 3.4, 5.6, 2.4],

       [6.4, 3.1, 5.5, 1.8],

       [6. , 3. , 4.8, 1.8],

       [6.9, 3.1, 5.4, 2.1],

       [6.7, 3.1, 5.6, 2.4],

       [6.9, 3.1, 5.1, 2.3],

       [5.8, 2.7, 5.1, 1.9],

       [6.8, 3.2, 5.9, 2.3],

       [6.7, 3.3, 5.7, 2.5],

       [6.7, 3. , 5.2, 2.3],

       [6.3, 2.5, 5. , 1.9],

       [6.5, 3. , 5.2, 2. ],

       [6.2, 3.4, 5.4, 2.3],

       [5.9, 3. , 5.1, 1.8]])

x1=x[:,0]

x1

array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,

       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,

       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,

       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,

       6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,

       6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,

       6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,

       6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,

       6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,

       7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,

       7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,

       6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])

from sklearn.cluster import KMeans

est=KMeans(n_clusters=3)

est.fit(x)

est.cluster_centers_

y=est.predict(x)

y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

       0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2,

       2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2,

       2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1])

import matplotlib.pyplot as plt

plt.scatter(x[:,0],x[:,1],c=y,cmap='rainbow')

plt.show()

est1=KMeans(n_clusters=4)

x1=x[:,0].reshape(-1,1)

est1.fit(x1)

y=est1.labels_

plt.scatter(x1,x1)

plt.show()

est1=KMeans(n_clusters=4)

x1=x[:,0]

est=KMeans(n_clusters=4)

est.fit(x)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,

    n_clusters=4, n_init=10, n_jobs=1, precompute_distances='auto',

    random_state=None, tol=0.0001, verbose=0)

est1=KMeans(n_clusters=4)

x1=x[:,0].reshape(-1,1)

x1

array([[5.1],

       [4.9],

       [4.7],

       [4.6],

       [5. ],

       [5.4],

       [4.6],

       [5. ],

       [4.4],

       [4.9],

       [5.4],

       [4.8],

       [4.8],

       [4.3],

       [5.8],

       [5.7],

       [5.4],

       [5.1],

       [5.7],

       [5.1],

       [5.4],

       [5.1],

       [4.6],

       [5.1],

       [4.8],

       [5. ],

       [5. ],

       [5.2],

       [5.2],

       [4.7],

       [4.8],

       [5.4],

       [5.2],

       [5.5],

       [4.9],

       [5. ],

       [5.5],

       [4.9],

       [4.4],

       [5.1],

       [5. ],

       [4.5],

       [4.4],

       [5. ],

       [5.1],

       [4.8],

       [5.1],

       [4.6],

       [5.3],

       [5. ],

       [7. ],

       [6.4],

       [6.9],

       [5.5],

       [6.5],

       [5.7],

       [6.3],

       [4.9],

       [6.6],

       [5.2],

       [5. ],

       [5.9],

       [6. ],

       [6.1],

       [5.6],

       [6.7],

       [5.6],

       [5.8],

       [6.2],

       [5.6],

       [5.9],

       [6.1],

       [6.3],

       [6.1],

       [6.4],

       [6.6],

       [6.8],

       [6.7],

       [6. ],

       [5.7],

       [5.5],

       [5.5],

       [5.8],

       [6. ],

       [5.4],

       [6. ],

       [6.7],

       [6.3],

       [5.6],

       [5.5],

       [5.5],

       [6.1],

       [5.8],

       [5. ],

       [5.6],

       [5.7],

       [5.7],

       [6.2],

       [5.1],

       [5.7],

       [6.3],

       [5.8],

       [7.1],

       [6.3],

       [6.5],

       [7.6],

       [4.9],

       [7.3],

       [6.7],

       [7.2],

       [6.5],

       [6.4],

       [6.8],

       [5.7],

       [5.8],

       [6.4],

       [6.5],

       [7.7],

       [7.7],

       [6. ],

       [6.9],

       [5.6],

       [7.7],

       [6.3],

       [6.7],

       [7.2],

       [6.2],

       [6.1],

       [6.4],

       [7.2],

       [7.4],

       [7.9],

       [6.4],

       [6.3],

       [6.1],

       [7.7],

       [6.3],

       [6.4],

       [6. ],

       [6.9],

       [6.7],

       [6.9],

       [5.8],

       [6.8],

       [6.7],

       [6.7],

       [6.3],

       [6.5],

       [6.2],

       [5.9]])

est1=KMeans(n_clusters=4)

x1=x[:,0].reshape(-1,1)

est1=KMeans(n_clusters=4)

est1.fit(x1)

est1.labels_

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,

       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,

       1, 1, 1, 1, 1, 1, 2, 3, 3, 0, 3, 0, 3, 1, 3, 1, 1, 0, 0, 0, 0, 3,

       0, 0, 3, 0, 0, 0, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,

       0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0, 3, 0, 2, 3, 3, 2, 1, 2, 3, 2,

       3, 3, 3, 0, 0, 3, 3, 2, 2, 0, 3, 0, 2, 3, 3, 2, 3, 0, 3, 2, 2, 2,

       3, 3, 0, 2, 3, 3, 0, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 0])

1)设定好K的大小，随机选取K个点作为初始中心点；

　　(2)计算每个点到这K个中心点的距离大小，选取最近的中心点，划分到以该中心点为中心的集群中去；

　　(3)重新计算K个新集群的中心点；

　　(4)如果中心点保持不变，则结束K-Means过程。否则，重复进行(2)、(3)步；

复制代码

import numpy as np

x = np.random.randint(1,50,[20,1])

y = np.zeros(20)

k = 3

#1） 选取数据空间中的K个对象作为初始中心，每个对象代表一个聚类中心；

def initcen(x,k):

    return x[:k]

#2） 对于样本中的数据对象，根据它们与这些聚类中心的欧氏距离，按距离最近的准则将它们分到距离它们最近的聚类中心（最相似）所对应的类；

def nearest(kc,i):

    d = abs(kc-i)

    w = np.where(d == np.min(d))

    return w[0][0]

def xclassify(x,y,kc):

    for i in range(x.shape[0]):

        y[i] = nearest(kc,x[i])

        return y

#3） 更新聚类中心：将每个类别中所有对象所对应的均值作为该类别的聚类中心，计算目标函数的值；

def kcmean(x,y,kc,k):

    l = list(kc)

    flag = False

    for c in range(k):

        m = np.where(y ==0)

        n = np.mean(x[m])

        if l[c] != n:

            l[c] = n

            flag = True

            print(l,flag)

    return (np.array(l),flag)

#4） 判断聚类中心和目标函数的值是否发生改变，若不变，则输出结果，若改变，则返回2）

kc = initcen(x,k)

flag = True

print(x,y,kc,flag)

while flag:

    y = xclassify(x,y,kc)

    kc,flag = kcmean(x,y,kc,k)

print(y,kc)

复制代码

聚类K-Means的更多相关文章

软件——机器学习与Python，聚类，K——means
K-means是一种聚类算法: 这里运用k-means进行31个城市的分类城市的数据保存在city.txt文件中,内容如下: BJ,2959.19,730.79,749.41,513.34,467. ...
KMeans聚类 K值以及初始类簇中心点的选取转
本文主要基于Anand Rajaraman和Jeffrey David Ullman合著,王斌翻译的<大数据-互联网大规模数据挖掘与分布式处理>一书. KMeans算法是最常用的聚类算法, ...
聚类-K均值
数据来源:http://archive.ics.uci.edu/ml/datasets/seeds 15.26 14.84 0.871 5.763 3.312 2.221 5.22 Kama 14.8 ...
【机器学习笔记五】聚类 - k均值聚类
参考资料: [1]Spark Mlib 机器学习实践 [2]机器学习 [3]深入浅出K-means算法 http://www.csdn.net/article/2012-07-03/2807073- ...
聚类--K均值算法
import numpy as np from sklearn.datasets import load_iris iris = load_iris() x = iris.data[:,1] y = ...
第八次作业：聚类--K均值算法：自主实现与sklearn.cluster.KMeans调用
import numpy as np x = np.random.randint(1,100,[20,1]) y = np.zeros(20) k = 3 def initcenter(x,k): r ...
聚类--K均值算法：自主实现与sklearn.cluster.KMeans调用
1.用python实现K均值算法 import numpy as np x = np.random.randint(1,100,20)#产生的20个一到一百的随机整数 y = np.zeros(20) ...
KNN 与 K - Means 算法比较
KNN K-Means 1.分类算法聚类算法 2.监督学习非监督学习 3.数据类型:喂给它的数据集是带label的数据,已经是完全正确的数据喂给它的数据集是无label的数据,是杂乱无章的,经过 ...
机器学习方法（七）：Kmeans聚类K值如何选，以及数据重抽样方法Bootstrapping
欢迎转载,转载请注明:本文出自Bin的专栏blog.csdn.net/xbinworld. 技术交流QQ群:433250724,欢迎对算法.技术感兴趣的同学加入.我的博客写一些自己用得到东西,并分享给 ...
kmeans 聚类 k 值优化
kmeans 中k值一直是个令人头疼的问题,这里提出几种优化策略. 手肘法核心思想 1. 肉眼评价聚类好坏是看每类样本是否紧凑,称之为聚合程度: 2. 类别数越大,样本划分越精细,聚合程度越高,当类 ...

随机推荐

MySQL创建数据库时指定编码utf8mb4和添加用户
CREATE DATABASE `wordpress` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; CREATE DATABASE `wor ...
JS常见兼容性问题
兼容性问题:函数(方法)兼容描述:部分W3C指定的函数,有部分老的浏览器不支持解决: 条件判断,如果有,则使用,添加原型方法,例如 String 的 trim 方法 if(!String.prot ...
Mac系统如何显示隐藏文件？
显示全部文件 defaults write com.apple.finder AppleShowAllFiles -bool true osascript -e 'tell application & ...
CAtia_打开提示：许可证过期怎么办
CAtia_许可证过期怎么办:进计算机管理,点开服务和应用程序,点服务,找到DS License Server,在启动此服务的地方点启动,从而开启DS License Server.
ArcGIS按选定线分割面-案例教程
ArcGIS按选定线分割面-案例教程联系方式:谢老师,135-4855-4328,xiexiaokui#qq.com 功能方法:高级编辑实例: 分割前后联系方式:谢老师,135-4855-43 ...
[原创] debian 9.3 搭建Jira+Confluence+Bitbucket项目管理工具(二) -- 安装jira 7.5.4
[原创] debian 9.3 搭建Jira+Confluence+Bitbucket项目管理工具(二) -- 安装jira 7.5.4 环境都配置好以后, 开始搭建Jira的环境, 这里参考了一篇文 ...
【C++】C++中的string类的用法总结
相信使用过MFC编程的朋友对CString这个类的印象应该非常深刻吧?的确,MFC中的CString类使用起来真的非常的方便好用.但是如果离开了MFC框架,还有没有这样使用起来非常方便的类呢?答案是肯 ...
20175213 2018-2019-2 《Java程序设计》第7周学习总结
教材学习内容总结 (1)String (char a[])用一个字符数组a创建一个String对象. (2)String(char a[],int startIndex,int count) 提取字符 ...
OpenCV4.0学习笔记
1.读取显示图像 #include<opencv2/opencv.hpp> #include<iostream> using namespace cv; using names ...
python 网络编程粘包解决方案2 + ftp上传 + socketserver
一.struct 神奇的打包工具 struct 代码: import struct num = 156 #将int类型的数据打包成4个字节的数据 num_stru = struct.pack('i', ...

聚类K-Means

聚类K-Means的更多相关文章

随机推荐

热门专题