3.聚类–K-means的Java实现
K-means的步骤
输入: 含n 个样本的数据集,簇的数据K
输出: K 个簇
算法步骤:
1.初始化K个簇类中心C1,C2,-……Ck (通常随机选择)
2.repeat 步骤3,4
3,将数据集中的每个样本分配到与之最近的中心Ci所在的簇Cj ;
4. 更新聚类中心Ci,即计算各个簇的样本均值;
5.直到样本分配不在改变
上代码:
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target; /**
* 在对象的属性上标注此注释,
* 表示纳入kmeans算法,仅支持数值类属性
* @author 阿飞哥
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.FIELD)
public @interface KmeanField {
}.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List; /**
*
* @author 阿飞哥
*
*/
public class Kmeans<T> { /**
* 所有数据列表
*/
private List<T> players = new ArrayList<T>(); /**
* 数据类别
*/
private Class<T> classT; /**
* 初始化列表
*/
private List<T> initPlayers; /**
* 需要纳入kmeans算法的属性名称
*/
private List<String> fieldNames = new ArrayList<String>(); /**
* 分类数
*/
private int k = 1; public Kmeans() { } /**
* 初始化列表
*
* @param list
* @param k
*/
public Kmeans(List<T> list, int k) {
this.players = list;
this.k = k;
T t = list.get(0);
this.classT = (Class<T>) t.getClass();
Field[] fields = this.classT.getDeclaredFields();
System.out.println("fields---------------------------------------------="+fields.length);
for (int i = 0; i < fields.length; i++) {
Annotation kmeansAnnotation = fields[i]
.getAnnotation(KmeanField.class);
if (kmeansAnnotation != null) {
fieldNames.add(fields[i].getName());
System.out.println("fieldNames.add"+ fields[i].getName()); } } initPlayers = new ArrayList<T>();
for (int i = 0; i < k; i++) {
initPlayers.add(players.get(i));
}
} public List<T>[] comput() {
List<T>[] results = new ArrayList[k]; boolean centerchange = true;
while (centerchange) {
centerchange = false;
for (int i = 0; i < k; i++) {
results[i] = new ArrayList<T>();
}
for (int i = 0; i < players.size(); i++) {
T p = players.get(i);
double[] dists = new double[k];
for (int j = 0; j < initPlayers.size(); j++) {
T initP = initPlayers.get(j);
/* 计算距离 */
double dist = distance(initP, p);
// double dist = 1.0;
// double dist = LevenshteinDistance.levenshteinDistance(initP, p);
// System.out.println("dist="+dist); dists[j] = dist;
} int dist_index = computOrder(dists);
// System.out.println("dist_index="+dist_index);
results[dist_index].add(p);
} // System.out.println("results[0].size()="+results[0].size()); for (int i = 0; i < k; i++) { // 在每一个簇中寻找中心点
T player_new = findNewCenter(results[i]);
// System.out.println( "results[i]"+i+"----"+k+"---===="+results[i].size() +"===="+player_new.toString());
T player_old = initPlayers.get(i);
if (!IsPlayerEqual(player_new, player_old)) {
centerchange = true;
initPlayers.set(i, player_new);
}
}
}
// System.out.println( "results+"+results.length);
return results;
} /**
* 比较是否两个对象是否属性一致
*
* @param p1
* @param p2
* @return
*/
public boolean IsPlayerEqual(T p1, T p2) {
if (p1 == p2) {
return true;
}
if (p1 == null || p2 == null) {
return false;
} boolean flag = true;
try {
for (int i = 0; i < fieldNames.size(); i++) { String fieldName=fieldNames.get(i);
String getName = "get"
+ fieldName.substring(0, 1).toUpperCase()
+ fieldName.substring(1);
// System.out.println(fieldNames);
Object value1 = invokeMethod(p1,getName,null);
Object value2 = invokeMethod(p2,getName,null);
if (!value1.equals(value2)) {
flag = false;
break;
}
}
} catch (Exception e) {
e.printStackTrace();
flag = false;
} return flag;
} /**
* 得到新聚类中心对象
*
* @param ps
* @return
*/
public T findNewCenter(List<T> ps) {
try {
T t = classT.newInstance();
if (ps == null || ps.size() == 0) {
return t;
} double[] ds = new double[fieldNames.size()];
for (T vo : ps) {
for (int i = 0; i < fieldNames.size(); i++) {
String fieldName=fieldNames.get(i);
String getName = "get"
+ fieldName.substring(0, 1).toUpperCase()
+ fieldName.substring(1);
Object obj=invokeMethod(vo,getName,null);
Double fv=(obj==null?0:Double.parseDouble(obj+""));
ds[i] += fv;
} } // System.out.println("-----------------");
for (int i = 0; i < fieldNames.size(); i++) {
ds[i] = ds[i] / ps.size(); // 平均距离
String fieldName = fieldNames.get(i); /* 给对象设值 */
String setName = "set"
+ fieldName.substring(0, 1).toUpperCase()
+ fieldName.substring(1); // invokeMethod(t,setName,new Class[]{double.class},ds[i]);
System.out.println("ds[i] ++="+ds[i]+"----ps.size()"+ps.size());
invokeMethod(t,setName,new Class[]{double.class},ds[i]); } return t;
} catch (Exception ex) {
ex.printStackTrace();
}
return null; } /**
* 得到最短距离,并返回最短距离索引
*
* @param dists
* @return
*/
public int computOrder(double[] dists) {
double min = 0;
int index = 0;
for (int i = 0; i < dists.length - 1; i++) {
double dist0 = dists[i];
if (i == 0) {
min = dist0;
index = 0;
}
double dist1 = dists[i + 1];
if (min > dist1) {
min = dist1;
index = i + 1;
}
} return index;
} /**
* 计算距离(相似性) 采用欧几里得算法
*
* @param p0
* @param p1
* @return
*/
public double distance(T p0, T p1) {
double dis = 0;
try { for (int i = 0; i < fieldNames.size(); i++) {
String fieldName = fieldNames.get(i);
String getName = "get"
+ fieldName.substring(0, 1).toUpperCase()
+ fieldName.substring(1); // System.out.println("fieldNames-----="+fieldNames.size());
Double field0Value=Double.parseDouble(invokeMethod(p0,getName,null)+"");
Double field1Value=Double.parseDouble(invokeMethod(p1,getName,null)+"");
// System.out.println("field0Value="+field0Value);
dis += Math.pow(field0Value - field1Value, 2); } } catch (Exception ex) {
ex.printStackTrace();
}
return Math.sqrt(dis); } /*------公共方法-----*/
public Object invokeMethod(Object owner, String methodName,Class[] argsClass,
Object... args) {
Class ownerClass = owner.getClass(); try {
Method method=ownerClass.getDeclaredMethod(methodName,argsClass); return method.invoke(owner, args);
} catch (SecurityException e) {
e.printStackTrace();
} catch (NoSuchMethodException e) {
e.printStackTrace();
} catch (Exception ex) {
ex.printStackTrace();
} return null;
} }.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
public class Player { private int id;
//@KmeanField
private String name; private int age; /* 得分 */
@KmeanField
private double goal; /* 助攻 */
//@KmeanField
private double assists; /* 篮板 */
//@KmeanField
private double backboard; /* 抢断 */
//@KmeanField
private double steals; public int getId() {
return id;
} public void setId(int id) {
this.id = id;
} public String getName() {
return name;
} public void setName(String name) {
this.name = name;
} public int getAge() {
return age;
} public void setAge(int age) {
this.age = age;
} public double getGoal() {
return goal;
} public void setGoal(double goal) {
this.goal = goal;
} public double getAssists() {
return assists;
} public void setAssists(double assists) {
this.assists = assists;
} public double getBackboard() {
return backboard;
} public void setBackboard(double backboard) {
this.backboard = backboard;
} public double getSteals() {
return steals;
} public void setSteals(double steals) {
this.steals = steals;
} @Override
public String toString() {
// TODO Auto-generated method stub
return name;
}
}.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
import java.util.ArrayList;
import java.util.List;
import java.util.Random; public class TestMain { public static void main(String[] args) {
List<Player> listPlayers=new ArrayList<Player>(); for(int i=0;i<15;i++){ Player p1=new Player();
p1.setName("afei-"+i);
p1.setAssists(i);
p1.setBackboard(i); //p1.setGoal(new Random(100*i).nextDouble());
p1.setGoal(i*10);
p1.setSteals(i);
//listPlayers.add(p1);
} Player p1=new Player();
p1.setName("afei1");
p1.setGoal(1);
p1.setAssists(8);
listPlayers.add(p1); Player p2=new Player();
p2.setName("afei2");
p2.setGoal(2);
listPlayers.add(p2); Player p3=new Player();
p3.setName("afei3");
p3.setGoal(3);
listPlayers.add(p3); Player p4=new Player();
p4.setName("afei4");
p4.setGoal(7);
listPlayers.add(p4); Player p5=new Player();
p5.setName("afei5");
p5.setGoal(8);
listPlayers.add(p5); Player p6=new Player();
p6.setName("afei6");
p6.setGoal(25);
listPlayers.add(p6); Player p7=new Player();
p7.setName("afei7");
p7.setGoal(26);
listPlayers.add(p7); Player p8=new Player();
p8.setName("afei8");
p8.setGoal(27);
listPlayers.add(p8); Player p9=new Player();
p9.setName("afei9");
p9.setGoal(28);
listPlayers.add(p9); Kmeans<Player> kmeans = new Kmeans<Player>(listPlayers,2);
List<Player>[] results = kmeans.comput();
for (int i = 0; i < results.length; i++) {
System.out.println("===========类别" + (i + 1) + "================");
List<Player> list = results[i];
for (Player p : list) {
System.out.println(p.getName() + "--->"
+ p.getGoal() + "," + p.getAssists() + ","
+ p.getSteals() + "," + p.getBackboard());
}
} } }.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
源码:https://github.com/chaoren399/dkdemo/tree/master/kmeans/src
3.聚类–K-means的Java实现的更多相关文章
- 软件——机器学习与Python,聚类,K——means
K-means是一种聚类算法: 这里运用k-means进行31个城市的分类 城市的数据保存在city.txt文件中,内容如下: BJ,2959.19,730.79,749.41,513.34,467. ...
- 2.交通聚类 -层次聚类(agnes)Java实现
1.项目背景 在做交通路线分析的时候,客户需要找出车辆的行车规律,我们将车辆每天的行车路线当做一个数据样本,总共有365天或是更多,从这些数据中通过聚类来获得行车路线规律统计分析. 我首先想到是K-m ...
- 聚类-K均值
数据来源:http://archive.ics.uci.edu/ml/datasets/seeds 15.26 14.84 0.871 5.763 3.312 2.221 5.22 Kama 14.8 ...
- 剑指Offer:面试题15——链表中倒数第k个结点(java实现)
问题描述 输入一个链表,输出该链表中倒数第k个结点.(尾结点是倒数第一个) 结点定义如下: public class ListNode { int val; ListNode next = null; ...
- KMeans聚类 K值以及初始类簇中心点的选取 转
本文主要基于Anand Rajaraman和Jeffrey David Ullman合著,王斌翻译的<大数据-互联网大规模数据挖掘与分布式处理>一书. KMeans算法是最常用的聚类算法, ...
- 【机器学习笔记五】聚类 - k均值聚类
参考资料: [1]Spark Mlib 机器学习实践 [2]机器学习 [3]深入浅出K-means算法 http://www.csdn.net/article/2012-07-03/2807073- ...
- 聚类--K均值算法
import numpy as np from sklearn.datasets import load_iris iris = load_iris() x = iris.data[:,1] y = ...
- 第八次作业:聚类--K均值算法:自主实现与sklearn.cluster.KMeans调用
import numpy as np x = np.random.randint(1,100,[20,1]) y = np.zeros(20) k = 3 def initcenter(x,k): r ...
- 聚类--K均值算法:自主实现与sklearn.cluster.KMeans调用
1.用python实现K均值算法 import numpy as np x = np.random.randint(1,100,20)#产生的20个一到一百的随机整数 y = np.zeros(20) ...
- 在n个数字中求为k的和————Java
给出N个正整数组成的数组A,求能否从中选出若干个,使他们的和为K.如果可以,输出:"YES",否则输出"NO".用Java实现 import java.util ...
随机推荐
- 360doc的文章不能复制的解决办法
按F12打开控制台,执行: document.body.oncopy=null;
- 加州大学伯克利分校Stat2.2x Probability 概率初步学习笔记: Section 2 Random sampling with and without replacement
Stat2.2x Probability(概率)课程由加州大学伯克利分校(University of California, Berkeley)于2014年在edX平台讲授. PDF笔记下载(Acad ...
- Linux 下的常用工具
Useful Linux Utilities (This article is under constant construction) ssh 相关文章 How To Change OpenSSH ...
- 01C语言基础知识
C语言知识要点总结 1在C语言中,关系运算的结果为“真”,就返回1,"假"就返回0 printf("input value is %d" , 5 > 4) ...
- css的核心内容 标准流、盒子模型、浮动、定位等分析
1.块级元素:如:<div></div>2.行内元素:如:<span></span>从效果中看块级元素与行内元素的区别: 通过CSS的设置把行内元素转换 ...
- 10月17日下午MySQl数据库CRUD高级查询
高级查询:1.连接查询 #适用于有外键关系的 没有任何关系没法用select * from Info,Nation #同时查询这俩表并把两表每个数据相互组合,形成笛卡尔积 select * from ...
- 找到一款不错的网站压力测试工具webbench
webbench最多可以模拟3万个并发连接去测试网站的负载能力,个人感觉要比Apache自带的ab压力测试工具好,安装使用也特别方便. 1.适用系统:Linux 2.编译安装: 引用 wget htt ...
- thinkphp连接数据库
版本:3.1.1 连接数据库的具体位置 thinkphp/Config/convention.php,默认修改数据库在这里就可以了 但是为了方便,把数据库配置写到Index/Conf/config.p ...
- CSS3 让图片镜像对称
1.HTML代码 <!DOCTYPE html> <html> <head> <title>test</title> </head&g ...
- ELMAH日志组件数据库脚本
CREATE TABLE dbo.ELMAH_Error ( ErrorId UNIQUEIDENTIFIER NOT NULL, Application NVARCHAR() COLLATE SQL ...