
  • 需求:计算两点间的欧几里得距离、曼哈顿距离、切比雪夫距离、堪培拉距离
  • 实现:利用commons.math3库相应函数

 1 import org.apache.commons.math3.ml.distance.*;
3 public class TestMetrics {
4 public static void main(String[] args) {
5 double[] x = {1, 3}, y = {5, 6};
7 EuclideanDistance eD = new EuclideanDistance();
8 System.out.printf("Euclidean distance = %.2f%n", eD.compute(x,y));
10 ManhattanDistance mD = new ManhattanDistance();
11 System.out.printf("Manhattan distance = %.2f%n", mD.compute(x,y));
13 ChebyshevDistance cD = new ChebyshevDistance();
14 System.out.printf("Chebyshev distance = %.2f%n", cD.compute(x,y));
16 CanberraDistance caD = new CanberraDistance();
17 System.out.printf("Canberra distance = %.2f%n", caD.compute(x,y));
18 }
19 }

Euclidean distance = 5.00
Manhattan distance = 7.00
Chebyshev distance = 4.00
Canberra distance = 1.00


  • 需求:将13个样本点分为3类
  • 实现:m点划分为k类,先令m点的每个点为一类,然后找到中心最近的两个类,用一个新的聚类替换,重复m-k次


 1 import java.util.HashSet;
3 public class HierarchicalClustering {
4 private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},
5 {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};
6 private static final int M = DATA.length; // number of points
7 private static final int K = 3; // number of clusters
9 public static void main(String[] args) {
10 HashSet<Cluster> clusters = load(DATA);
11 for (int i = 0; i < M - K; i++) {
12 System.out.printf("%n%2d clusters:%n", M-i-1);
13 coalesce(clusters);
14 System.out.println(clusters);
15 }
16 }
18 private static HashSet<Cluster> load(double[][] data) {
19 HashSet<Cluster> clusters = new HashSet();
20 for (double[] datum : DATA) {
21 clusters.add(new Cluster(datum[0], datum[1]));
22 }
23 return clusters;
24 }
26 private static void coalesce(HashSet<Cluster> clusters) {
27 Cluster cluster1=null, cluster2=null;
28 double minDist = Double.POSITIVE_INFINITY;
29 for (Cluster c1 : clusters) {
30 for (Cluster c2 : clusters) {
31 if (!c1.equals(c2) && Cluster.distance(c1, c2) < minDist) {
32 cluster1 = c1;
33 cluster2 = c2;
34 minDist = Cluster.distance(c1, c2);
35 }
36 }
37 }
38 clusters.remove(cluster1);
39 clusters.remove(cluster2);
40 clusters.add(Cluster.union(cluster1, cluster2));
41 }
42 }


 1 public class Point {
2 private final double x, y;
4 public Point(double x, double y) {
5 this.x = x;
6 this.y = y;
7 }
9 public double getX() {
10 return x;
11 }
13 public double getY() {
14 return y;
15 }
17 @Override
18 public int hashCode() {
19 int xhC = new Double(x).hashCode();
20 int yhC = new Double(y).hashCode();
21 return (int)(xhC + 79*yhC);
22 }
24 @Override
25 public boolean equals(Object object) {
26 if (object == null) {
27 return false;
28 } else if (object == this) {
29 return true;
30 } else if (!(object instanceof Point)) {
31 return false;
32 }
33 Point that = (Point)object;
34 return bits(that.x) == bits(this.x) && bits(that.y) == bits(this.y);
35 }
37 private long bits(double d) {
38 return Double.doubleToLongBits(d);
40 }
42 @Override
43 public String toString() {
44 return String.format("(%.2f,%.2f)", x,y);
45 }
46 }


 1 import java.util.HashSet;
3 public class Cluster {
4 private final HashSet<Point> points;
5 private Point centroid;
7 public Cluster(HashSet points, Point centroid) {
8 this.points = points;
9 this.centroid = centroid;
10 }
12 public Cluster(Point point) {
13 this.points = new HashSet();
14 this.points.add(point);
15 this.centroid = point;
16 }
18 public Cluster(double x, double y) {
19 this(new Point(x,y));
20 }
22 public Point getCentroid() {
23 return centroid;
24 }
26 public void add(Point point) {
27 points.add(point);
28 recomputeCentroid();
29 }
31 public void recomputeCentroid() {
32 double xSum=0.0, ySum=0.0;
33 for (Point point : points) {
34 xSum += point.getX();
35 ySum += point.getY();
36 }
37 centroid = new Point(xSum/points.size(), ySum/points.size());
38 }
40 public static double distance(Cluster c1, Cluster c2) {
41 double dx = c1.centroid.getX() - c2.centroid.getX();
42 double dy = c1.centroid.getY() - c2.centroid.getY();
43 return Math.sqrt(dx*dx + dy*dy);
44 }
46 public static Cluster union(Cluster c1, Cluster c2) {
47 Cluster cluster = new Cluster(c1.points, c1.centroid);
48 cluster.points.addAll(c2.points);
49 cluster.recomputeCentroid();
50 return cluster;
51 }
53 @Override
54 public int hashCode() {
55 return points.hashCode();
56 }
58 @Override
59 public boolean equals(Object object) {
60 if (object == null) {
61 return false;
62 } else if (object == this) {
63 return true;
64 } else if (!(object instanceof Cluster)) {
65 return false;
66 }
67 final Cluster that = (Cluster)object;
68 return that.points.equals(this.points);
69 }
71 @Override
72 public String toString() {
73 return String.format("%n{%s,%s}", centroid, points);
74 }
75 }


  1 12 clusters:
2 [
3 {(1.00,1.00),[(1.00,1.00)]},
4 {(1.00,3.00),[(1.00,3.00)]},
5 {(2.00,6.00),[(2.00,6.00)]},
6 {(3.00,2.00),[(3.00,2.00)]},
7 {(4.00,3.00),[(4.00,3.00)]},
8 {(6.00,4.00),[(6.00,4.00)]},
9 {(7.00,1.00),[(7.00,1.00)]},
10 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},
11 {(6.00,3.00),[(6.00,3.00)]},
12 {(3.00,4.00),[(3.00,4.00)]},
13 {(1.00,5.00),[(1.00,5.00)]},
14 {(5.00,6.00),[(5.00,6.00)]}]
16 11 clusters:
17 [
18 {(1.00,1.00),[(1.00,1.00)]},
19 {(1.00,3.00),[(1.00,3.00)]},
20 {(2.00,6.00),[(2.00,6.00)]},
21 {(3.00,2.00),[(3.00,2.00)]},
22 {(4.00,3.00),[(4.00,3.00)]},
23 {(7.00,1.00),[(7.00,1.00)]},
24 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},
25 {(3.00,4.00),[(3.00,4.00)]},
26 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},
27 {(1.00,5.00),[(1.00,5.00)]},
28 {(5.00,6.00),[(5.00,6.00)]}]
30 10 clusters:
31 [
32 {(1.00,1.00),[(1.00,1.00)]},
33 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
34 {(1.00,3.00),[(1.00,3.00)]},
35 {(3.00,2.00),[(3.00,2.00)]},
36 {(4.00,3.00),[(4.00,3.00)]},
37 {(7.00,1.00),[(7.00,1.00)]},
38 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},
39 {(3.00,4.00),[(3.00,4.00)]},
40 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},
41 {(5.00,6.00),[(5.00,6.00)]}]
43 9 clusters:
44 [
45 {(1.00,1.00),[(1.00,1.00)]},
46 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
47 {(1.00,3.00),[(1.00,3.00)]},
48 {(7.00,1.00),[(7.00,1.00)]},
49 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},
50 {(3.50,2.50),[(3.00,2.00), (4.00,3.00)]},
51 {(3.00,4.00),[(3.00,4.00)]},
52 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},
53 {(5.00,6.00),[(5.00,6.00)]}]
55 8 clusters:
56 [
57 {(1.00,1.00),[(1.00,1.00)]},
58 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
59 {(1.00,3.00),[(1.00,3.00)]},
60 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},
61 {(7.00,1.00),[(7.00,1.00)]},
62 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},
63 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},
64 {(5.00,6.00),[(5.00,6.00)]}]
66 7 clusters:
67 [
68 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
69 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},
70 {(1.00,2.00),[(1.00,1.00), (1.00,3.00)]},
71 {(7.00,1.00),[(7.00,1.00)]},
72 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},
73 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},
74 {(5.00,6.00),[(5.00,6.00)]}]
76 6 clusters:
77 [
78 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
79 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},
80 {(1.00,2.00),[(1.00,1.00), (1.00,3.00)]},
81 {(6.33,5.67),[(7.00,6.00), (7.00,5.00), (5.00,6.00)]},
82 {(7.00,1.00),[(7.00,1.00)]},
83 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]}]
85 5 clusters:
86 [
87 {(6.20,4.80),[(6.00,3.00), (7.00,6.00), (7.00,5.00), (6.00,4.00), (5.00,6.00)]},
88 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
89 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},
90 {(1.00,2.00),[(1.00,1.00), (1.00,3.00)]},
91 {(7.00,1.00),[(7.00,1.00)]}]
93 4 clusters:
94 [
95 {(6.20,4.80),[(6.00,3.00), (7.00,6.00), (7.00,5.00), (6.00,4.00), (5.00,6.00)]},
96 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},
97 {(7.00,1.00),[(7.00,1.00)]},
98 {(2.40,2.60),[(1.00,1.00), (3.00,2.00), (4.00,3.00), (3.00,4.00), (1.00,3.00)]}]
100 3 clusters:
101 [
102 {(6.20,4.80),[(6.00,3.00), (7.00,6.00), (7.00,5.00), (6.00,4.00), (5.00,6.00)]},
103 {(7.00,1.00),[(7.00,1.00)]},
104 {(2.14,3.43),[(1.00,1.00), (2.00,6.00), (3.00,2.00), (4.00,3.00), (3.00,4.00), (1.00,3.00), (1.00,5.00)]}]


 1 import java.util.ArrayList;
2 import weka.clusterers.HierarchicalClusterer;
3 import static weka.clusterers.HierarchicalClusterer.TAGS_LINK_TYPE;
4 import weka.core.Attribute;
5 import weka.core.Instance;
6 import weka.core.Instances;
7 import weka.core.SelectedTag;
8 import weka.core.SparseInstance;
10 public class WekaHierarchicalClustering {
11 private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},
12 {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};
13 private static final int M = DATA.length; // number of points
14 private static final int K = 3; // number of clusters
16 public static void main(String[] args) {
17 Instances dataset = load(DATA);
18 HierarchicalClusterer hc = new HierarchicalClusterer();
19 hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE)); // CENTROID
20 hc.setNumClusters(3);
21 try {
22 hc.buildClusterer(dataset);
23 for (Instance instance : dataset) {
24 System.out.printf("(%.0f,%.0f): %s%n",
25 instance.value(0), instance.value(1),
26 hc.clusterInstance(instance));
27 }
28 } catch (Exception e) {
29 System.err.println(e);
30 }
31 }
33 private static Instances load(double[][] data) {
34 ArrayList<Attribute> attributes = new ArrayList<Attribute>();
35 attributes.add(new Attribute("X"));
36 attributes.add(new Attribute("Y"));
37 Instances dataset = new Instances("Dataset", attributes, M);
38 for (double[] datum : data) {
39 Instance instance = new SparseInstance(2);
40 instance.setValue(0, datum[0]);
41 instance.setValue(1, datum[1]);
42 dataset.add(instance);
43 }
44 return dataset;
45 }
46 }


(1,1): 0
(1,3): 0
(1,5): 0
(2,6): 0
(3,2): 0
(3,4): 0
(4,3): 0
(5,6): 1
(6,3): 1
(6,4): 1
(7,1): 2
(7,5): 1
(7,6): 1


 1 import java.awt.BorderLayout;
2 import java.awt.Container;
3 import java.util.ArrayList;
4 import javax.swing.JFrame;
5 import weka.clusterers.HierarchicalClusterer;
6 import static weka.clusterers.HierarchicalClusterer.TAGS_LINK_TYPE;
7 import weka.core.Attribute;
8 import weka.core.Instance;
9 import weka.core.Instances;
10 import weka.core.SelectedTag;
11 import weka.core.SparseInstance;
12 import weka.gui.hierarchyvisualizer.HierarchyVisualizer;
14 public class WekaHierarchicalClustering2 {
15 private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},
16 {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};
17 private static final int M = DATA.length; // number of points
18 private static final int K = 3; // number of clusters
20 public static void main(String[] args) {
21 Instances dataset = load(DATA);
22 HierarchicalClusterer hc = new HierarchicalClusterer();
23 hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE)); // CENTROID
24 hc.setNumClusters(1);
25 try {
26 hc.buildClusterer(dataset);
27 for (Instance instance : dataset) {
28 System.out.printf("(%.0f,%.0f): %s%n",
29 instance.value(0), instance.value(1),
30 hc.clusterInstance(instance));
31 }
32 displayDendrogram(hc.graph());
33 } catch (Exception e) {
34 System.err.println(e);
35 }
36 }
38 private static Instances load(double[][] data) {
39 ArrayList<Attribute> attributes = new ArrayList<Attribute>();
40 attributes.add(new Attribute("X"));
41 attributes.add(new Attribute("Y"));
42 Instances dataset = new Instances("Dataset", attributes, M);
43 for (double[] datum : data) {
44 Instance instance = new SparseInstance(2);
45 instance.setValue(0, datum[0]);
46 instance.setValue(1, datum[1]);
47 dataset.add(instance);
48 }
49 return dataset;
50 }
52 public static void displayDendrogram(String graph) {
53 JFrame frame = new JFrame("Dendrogram");
54 frame.setSize(500, 400);
55 frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
56 Container pane = frame.getContentPane();
57 pane.setLayout(new BorderLayout());
58 pane.add(new HierarchyVisualizer(graph));
59 frame.setVisible(true);
60 }
61 }


  • 需求:同上
  • 实现:从数据集中选k个点创建k个聚类,其余点添加到最近的聚类中,重新计算中心


 1 import java.util.HashSet;
2 import java.util.Random;
3 import java.util.Set;
5 public class KMeans {
6 private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},
7 {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};
8 private static final int M = DATA.length;
9 private static final int K = 3;
10 private static HashSet<Point> points;
11 private static HashSet<Cluster> clusters = new HashSet();
12 private static Random RANDOM = new Random();
14 public static void main(String[] args){
15 points = load(DATA);
17 int i0 = RANDOM.nextInt(M);
18 Point p = new Point(DATA[i0][0],DATA[i0][1]);
19 points.remove(p);
21 HashSet<Point> initSet = new HashSet();
22 initSet.add(p);
24 for(int i = 1; i < K; i ++){
25 p = farthestFrom(initSet);
26 initSet.add(p);
27 points.remove(p);
28 }
30 for(Point point:initSet){
31 Cluster cluster = new Cluster(point);
32 clusters.add(cluster);
33 }
35 for(Point point:points){
36 Cluster cluster = closestTo(point);
37 cluster.add(point);
38 cluster.recomputeCentroid();
39 }
40 System.out.println(clusters);
41 }
43 private static HashSet<Point> load(double[][] data) {
44 HashSet<Point> points = new HashSet();
45 for (double[] datum : DATA) {
46 points.add(new Point(datum[0], datum[1]));
47 }
48 return points;
49 }
51 // return the cluster whose centroid is closet to the specified point
52 private static Cluster closestTo(Point point){
53 double minDist = Double.POSITIVE_INFINITY;
54 Cluster c = null;
55 for(Cluster cluster:clusters){
56 double d = distance2(cluster.getCentroid(),point);
57 if(d < minDist){
58 minDist = d;
59 c = cluster;
60 }
61 }
62 return c;
63 }
65 // return the point that is farthest from the specified set
66 private static Point farthestFrom(Set<Point> set){
67 Point p = null;
68 double maxDist = 0.0;
69 for(Point point:points){
70 if(set.contains(point)){
71 continue;
72 }
73 double d = dist(point,set);
74 if(d > maxDist){
75 p = point;
76 maxDist = d;
77 }
78 }
79 return p;
80 }
82 // return the distance from p to the nearest point in the set
83 public static double dist(Point p, Set<Point> set){
84 double minDist = Double.POSITIVE_INFINITY;
85 for(Point point:set){
86 double d = distance2(p,point);
87 minDist = (d < minDist ? d : minDist);
88 }
89 return minDist;
90 }
92 public static double distance2(Point p, Point q){
93 double dx = p.getX() - q.getX();
94 double dy = p.getY() - q.getY();
95 return dx*dx + dy*dy;
96 }
97 }

[{(2.40,2.60),[(1.00,1.00), (1.00,3.00), (3.00,2.00), (4.00,3.00), (3.00,4.00)]},
{(6.33,4.17),[(6.00,3.00), (7.00,6.00), (6.00,4.00), (7.00,5.00), (7.00,1.00), (5.00,6.00)]},
{(1.50,5.50),[(2.00,6.00), (1.00,5.00)]}]

KMeans.java(Weka 实现)

 1 import java.util.ArrayList;
2 import weka.clusterers.SimpleKMeans;
3 import weka.core.Attribute;
4 import weka.core.Instance;
5 import weka.core.Instances;
6 import weka.core.SparseInstance;
8 public class KMeans {
9 private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},
10 {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};
11 private static final int M = DATA.length; // number of points
12 private static final int K = 3; // number of clusters
14 public static void main(String[] args) {
15 Instances dataset = load(DATA);
16 SimpleKMeans skm = new SimpleKMeans();
17 System.out.printf("%d clusters:%n", K);
18 try {
19 skm.setNumClusters(K);
20 skm.buildClusterer(dataset);
21 for (Instance instance : dataset) {
22 System.out.printf("(%.0f,%.0f): %s%n",
23 instance.value(0), instance.value(1),
24 skm.clusterInstance(instance));
25 }
26 } catch (Exception e) {
27 System.err.println(e);
28 }
29 }
31 private static Instances load(double[][] data) {
32 ArrayList<Attribute> attributes = new ArrayList<Attribute>();
33 attributes.add(new Attribute("X"));
34 attributes.add(new Attribute("Y"));
35 Instances dataset = new Instances("Dataset", attributes, M);
36 for (double[] datum : data) {
37 Instance instance = new SparseInstance(2);
38 instance.setValue(0, datum[0]);
39 instance.setValue(1, datum[1]);
40 dataset.add(instance);
41 }
42 return dataset;
43 }
44 }


(1,1): 1
(1,3): 1
(1,5): 0
(2,6): 0
(3,2): 1
(3,4): 0
(4,3): 0
(5,6): 0
(6,3): 2
(6,4): 2
(7,1): 2
(7,5): 2
(7,6): 2

KMeansPlusPlus.java(Apache Common Math 实现)

 1 import java.util.ArrayList;
2 import java.util.List;
3 import org.apache.commons.math3.ml.clustering.CentroidCluster;
4 import org.apache.commons.math3.ml.clustering.DoublePoint;
5 import org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer;
6 import org.apache.commons.math3.ml.distance.EuclideanDistance;
8 public class KMeansPlusPlus {
9 private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},
10 {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};
11 private static final int M = DATA.length; // number of points
12 private static final int K = 3; // number of clusters
13 private static final int MAX = 100; // maximum number of iterations
14 private static final EuclideanDistance ED = new EuclideanDistance();
16 public static void main(String[] args) {
17 List<DoublePoint> points = load(DATA);
18 KMeansPlusPlusClusterer<DoublePoint> clusterer;
19 clusterer = new KMeansPlusPlusClusterer(K, MAX, ED);
20 List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);
22 for (CentroidCluster<DoublePoint> cluster : clusters) {
23 System.out.println(cluster.getPoints());
24 }
25 }
27 private static List<DoublePoint> load(double[][] data) {
28 List<DoublePoint> points = new ArrayList(M);
29 for (double[] pair : data) {
30 points.add(new DoublePoint(pair));
31 }
32 return points;
33 }
34 }

[[5.0, 6.0], [6.0, 3.0], [6.0, 4.0], [7.0, 5.0], [7.0, 6.0]]
[[1.0, 1.0], [1.0, 3.0], [1.0, 5.0], [2.0, 6.0], [3.0, 2.0], [3.0, 4.0], [4.0, 3.0]]
[[7.0, 1.0]]


  • 需求:同上
  • 实现:
  • 特点:不同于KMeans,聚类个数k不需事先确定,

 1 public class AffinityPropagation {
2 private static double[][] x = {{1,2}, {2,3}, {4,1}, {4,4}, {5,3}};
3 private static int n = x.length; // number of points
4 private static double[][] s = new double[n][n]; // similarities
5 private static double[][] r = new double[n][n]; // responsibilities
6 private static double[][] a = new double[n][n]; // availabilities
7 private static final int ITERATIONS = 10;
8 private static final double DAMPER = 0.5;
10 public static void main(String[] args) {
11 initSimilarities();
12 for (int i = 0; i < ITERATIONS; i++) {
13 updateResponsibilities();
14 updateAvailabilities();
15 }
16 printResults();
17 }
19 private static void initSimilarities() {
20 double sum = 0;
21 for (int i = 0; i < n; i++) {
22 for (int j = 0; j < i; j++) {
23 sum += s[i][j] = s[j][i] = negSqEuclidDist(x[i], x[j]);
24 }
25 }
26 double average = 2*sum/(n*n - n); // average of s[i][j] for j < i
27 for (int i = 0; i < n; i++) {
28 s[i][i] = average;
29 }
30 }
32 private static void updateResponsibilities() {
33 for (int i = 0; i < n; i++) {
34 for (int k = 0; k < n; k++) {
35 double oldValue = r[i][k];
36 double max = Double.NEGATIVE_INFINITY;
37 for (int j = 0; j < n; j++) {
38 if (j != k) {
39 max = Math.max(max, a[i][j] + s[i][j]);
40 }
41 }
42 double newValue = s[i][k] - max;
43 r[i][k] = DAMPER*oldValue + (1 - DAMPER)*newValue;
44 }
45 }
46 }
48 private static void updateAvailabilities() {
49 for (int i = 0; i < n; i++) {
50 for (int k = 0; k < n; k++) {
51 double oldValue = a[i][k];
52 double newValue = Math.min(0, r[k][k] + sumOfPos(i,k));
53 if (k == i) {
54 newValue = sumOfPos(k,k);
55 }
56 a[i][k] = DAMPER*oldValue + (1 - DAMPER)*newValue;
57 }
58 }
59 }
61 /* Returns the negative square of the Euclidean distance from x to y.
62 */
63 private static double negSqEuclidDist(double[] x, double[] y) {
64 double d0 = x[0] - y[0];
65 double d1 = x[1] - y[1];
66 return -(d0*d0 + d1*d1);
67 }
69 /* Returns the sum of the positive r[j][k] excluding r[i][k] and r[k][k].
70 */
71 private static double sumOfPos(int i, int k) {
72 double sum = 0;
73 for (int j = 0; j < n; j++) {
74 if (j != i && j != k) {
75 sum += Math.max(0, r[j][k]);
76 }
77 }
78 return sum;
79 }
81 private static void printResults() {
82 for (int i = 0; i < n; i++) {
83 double max = a[i][0] + r[i][0];
84 int k = 0;
85 for (int j = 1; j < n; j++) {
86 double arij = a[i][j] + r[i][j];
87 if (arij > max) {
88 max = arij;
89 k = j;
90 }
91 }
92 System.out.printf("point %d has exemplar point %d%n", i, k);
93 }
94 }
95 }

point 0 has exemplar point 1
point 1 has exemplar point 1
point 2 has exemplar point 4
point 3 has exemplar point 4
point 4 has exemplar point 4




  1. Java实现聚类算法k-means

    2016-07 java简单实现聚类算法 但是有一个小问题,,,,我其实每次迭代之后(就是达不到我的收敛标准之前,聚类中心的误差达不到指定小的时候),虽然重新算了聚类中心,但是其实我的那些点并没有变, ...

  2. [Java] 数据分析 -- 大数据

    单词计数 需求:输入小说文本,输出每个单词出现的次数 实现:分map.combine.reduce三个阶段实现 1 /* Data Analysis with Java 2 * John R. Hub ...

  3. [Java] 数据分析 -- NoSQL数据库

    MongoDB概念:与关系型数据库对应 database(数据库):数据库 collection(集合):表 document(文档):行 field(域):列/字段 注意事项 文档是一组键值(key ...

  4. [Java] 数据分析--分类

    ID3算法 思路:分类算法的输入为训练集,输出为对数据进行分类的函数.ID3算法为分类函数生成分类树 需求:对水果训练集的一个维度(是否甜)进行预测 实现:决策树,熵函数,ID3,weka库 J48类 ...

  5. [Java] 数据分析 -- 回归分析

    线性回归 需求:从文件读取数据对,计算回归函数及系数 实现1:commons.math的SimpleRegression,定义函数getData从文件读取数据返回SimpleRegression类 1 ...

  6. [Java] 数据分析--统计

    二项分布 需求:5个四面体筛子,筛子三面绿色,一面红色,模拟1000000次,统计每次试验红色落地筛子个数的分布 实现:用循环实现5个筛子和1000000次试验,定义函数numRedDown模拟5个筛 ...

  7. [Java]数据分析--数据可视化

    时间序列 需求:将一组字符顺序添加到时间序列中 实现:定义时间序列类TimeSeries,包含静态类Entry表示序列类中的各项,以及add,get,iterator,entry方法 TimeSeri ...

  8. [Java] 数据分析--数据预处理

    数据结构 键-值对:HashMap 1 import java.io.File; 2 import java.io.FileNotFoundException; 3 import java.util. ...

  9. 5-Spark高级数据分析-第五章 基于K均值聚类的网络流量异常检测

    据我们所知,有‘已知的已知’,有些事,我们知道我们知道:我们也知道,有 ‘已知的未知’,也就是说,有些事,我们现在知道我们不知道.但是,同样存在‘不知的不知’——有些事,我们不知道我们不知道. 上一章 ...


  1. ISP算法:深入聊聊lens shading

    一.简介 关于什么是成像中的lens shading这里直接引用一句英文的definition:"The term shading describes the light fall-off ...

  2. 「Spring Boot 2.4 新特性」一键构建Docker镜像

    背景 在我们开发过程中为了支持 Docker 容器化,一般使用 Maven 编译打包然后生成镜像,能够大大提供上线效率,同时能够快速动态扩容,快速回滚,着实很方便.docker-maven-plugi ...

  3. 【macOS】Homebrew & Homebrew cask macOS软件包管理神器

    Homebrew Homebrew 与 Homebrew Cask Homebrew 是基于 OS X 的套件管理工具,是一个开源的 Ruby 脚本,专门用于快速下载软件.更通俗地讲,Homebrew ...

  4. 王炸!!IDEA 2021.1 推出语音、视频功能,边写代码边聊天,我真的服了…

    IDEA 2020.3 刚没用多久,2021.1 又陆续给我推送更新了: 启动就提醒更新,麻烦,那不如更新下,体验下新版本. 如上图所示,2021.1 更新了 9 个新特性,下面栈长会一一体验给大家介 ...

  5. jasypt-spring-boot提示Failed to bind properties

    1 问题描述 在Spring Boot中使用jasypt-spring-boot进行加密,但是提示: Description: Failed to bind properties under 'spr ...

  6. k8s cronjob

    k8s cronjob 只存在于v1beta1中 可以周期性 定时执行任务, 事例 [root@master01 ~]# kubectl apply -f mycronjob-busybox.yaml ...

  7. pickle json模块

    pickle --- Python 对象序列化 通过pickle模块的序列化操作我们能够将程序中运行的对象信息保存到文件中去,永久存储. 通过pickle模块的反序列化操作,我们能够从文件中创建上一次 ...

  8. 我的自定义多交互live2d折腾经历

    在@m0d1 大佬的督促(?)下有了这篇复盘.不过因为可能很多地方讲得不全面+理解不够深入,故不打算把这篇当成是教程/指南,那就算是一个指北吧= = (划重点:不是教程!不是教程!不是教程! 省流简介 ...

  9. 1055 The World's Richest

    Forbes magazine publishes every year its list of billionaires based on the annual ranking of the wor ...

  10. liunx从0开始部署vue+spring boot项目(包含:安装jdk、mysql、nginx)

    单纯记录,若有不合理不规范的地方请忽略. 0.配置JDK 0.下载liunx的jdk解压到/usr/local目录下. tar -xzvf jdk-8u291-linux-x64.tar.gz -C ...