
数据: 7w+ 条,数据结构如下图:

  1. > head(car.train)
  2. DV DC RV RC SOC HV LV HT LT Type TypeName
  3. 1 379 85.09 0.00 0.0 62.99 3.99 0.00 12 0 10f689e8-e6cc-47a3-be5a-dbc3833428ef EV200
  4. 2 379 85.09 370.89 59.9 63.99 4.01 0.00 12 0 10f689e8-e6cc-47a3-be5a-dbc3833428ef EV200
  5. 3 379 85.09 0.00 0.0 64.99 4.01 0.00 12 0 10f689e8-e6cc-47a3-be5a-dbc3833428ef EV200
  6. 4 379 85.09 0.00 0.0 66.00 4.03 1.55 12 11 10f689e8-e6cc-47a3-be5a-dbc3833428ef EV200
  7. 5 379 85.09 0.00 0.0 67.00 4.03 0.00 12 0 10f689e8-e6cc-47a3-be5a-dbc3833428ef EV200
  8. 6 379 85.09 0.00 0.0 68.00 4.05 0.00 13 0 10f689e8-e6cc-47a3-be5a-dbc3833428ef EV200


R version:

  1. > version
  2. _
  3. platform x86_64-w64-mingw32
  4. arch x86_64
  5. os mingw32
  6. system x86_64, mingw32
  7. status
  8. major 3
  9. minor 2.5
  10. year 2016
  11. month 04
  12. day 14
  13. svn rev 70478
  14. language R
  15. version.string R version 3.2.5 (2016-04-14)
  16. nickname Very, Very Secure Dishes



  1. ##----------------------全局设置-------------------------------
  2. remove(list=ls())
  3. space_path <- c("E:\\RScore\\kmeans\\")
  4. setwd(space_path)
  5. Sys.setlocale(category = "LC_ALL",local="chinese")
  7. ##table 行列转换函数
  8. tblView <- function (tbl)
  9. {
  10. ##install.packages("tidyr")
  11. library(tidyr)
  12. df <-
  13. df <- spread(data = df, key = Var2, value = Freq)
  14. datatable(df)
  15. }
  17. ## 公共函数:数据读写及计算
  18. source("core.R",encoding="utf-8")
  21. ##训练样本
  22. car.train <-"D_Cluster")
  23. newdata <- car.train[1:8]


source code:

  1. > ################################################stats::kmeans######################################
  2. > startTime <- Sys.time();
  3. >
  4. > library(stats)
  5. > kc <- kmeans(x=newdata, centers = 13)
  6. > #plot(newdata[,c("DV","DC")],col=kc$cluster)
  7. > tbl <- table(car.train$TypeName,kc$cluster)
  8. > tblView(tbl)
  9. >
  10. > ##耗时间
  11. > endTime <- Sys.time()
  12. > difTime <- difftime(endTime,startTime,units = "secs")
  13. > print(paste0("stats::kmeans total time:", difTime))
  14. [1] "stats::kmeans total time:0.195545196533203"
  1. stats::kmeans total time:0.195545196533203, result view:


source code:

  1. > ################################################fpc::kmeansruns######################################
  2. > startTime <- Sys.time();
  3. >
  4. > library(fpc)
  5. > kc1 <- kmeansruns(data = newdata,krange = 1:15,critout = TRUE)
  6. 2 clusters 9394.437
  7. 3 clusters 185919.7
  8. 4 clusters 482630.4
  9. 5 clusters 414875.3
  10. 6 clusters 376338
  11. 7 clusters 334493.6
  12. 8 clusters 303976.7
  13. 9 clusters 279036.3
  14. 10 clusters 432009.9
  15. 11 clusters 363074.8
  16. 12 clusters 405784.7
  17. 13 clusters 397422.8
  18. 14 clusters 371842.5
  19. 15 clusters 408561.7
  20. Warning messages:
  21. 1: Quick-TRANSfer stage steps exceeded maximum (= 3507150)
  22. 2: Quick-TRANSfer stage steps exceeded maximum (= 3507150)
  23. 3: Quick-TRANSfer stage steps exceeded maximum (= 3507150)
  24. > tbl<- table(car.train$TypeName,kc1$cluster)
  25. > tblView(tbl)
  26. >
  27. > ##耗时间
  28. > endTime <- Sys.time()
  29. > difTime <- difftime(endTime,startTime,units = "secs")
  30. > print(paste0("fpc::kmeansruns total time:", difTime))
  31. [1] "fpc::kmeansruns total time:107.454074859619"
  1. [1] "fpc::kmeansruns total time:107.454074859619" result view:


source code

  1. > ################################################cluster::pam######################################
  2. >
  3. > library(cluster)
  4. > cPam <- pam(x=newdata,k=13)
  5. Error in pam(x = newdata, k = 13) :
  6. have 70143 observations, but not more than 65536 are allowed

Error: 待确认


source code

  1. > ################################################fpc::pamk######################################
  2. >
  3. > library(fpc)
  4. > fPamk <- pamk(newdata,krang=1:15)
  5. Error in pam(sdata, k, diss = diss, ...) :
  6. have 70143 observations, but not more than 65536 are allowed

Error: 待确认


source code:

  1. ################################################fpc::pamk######################################
  2. >
  3. > library(fpc)
  4. > fPamk <- pamk(newdata,krang=1:15)
  5. Error in pam(sdata, k, diss = diss, ...) :
  6. have 70143 observations, but not more than 65536 are allowe

Error: 待确认


source code:

  1. > ################################################mclust::Mclust######################################
  2. > library(mclust)
  3. > EM<-Mclust(newdata)
  4. Error in hcVVV(data = c(379, 379, 379, 379, 379, 379, 379, 379, 379, 379, :
  5. NAs in foreign function call (arg 13)
  6. In addition: Warning message:
  7. In hcVVV(data = c(379, 379, 379, 379, 379, 379, 379, 379, 379, 379, :
  8. NAs introduced by coercion to integer range

Error: 待确认


source code:

  1. > ################################################cluster::fanny######################################
  2. > library(cluster)
  3. > fannyz=fanny(newdata,13,metric="SqEuclidean")
  4. Error in fanny(newdata, 13, metric = "SqEuclidean") :
  5. long vectors (argument 5) are not supported in .Fortran

Error: 待确认


source code:

  1. > ################################################e1071::cmeans######################################
  2. > startTime <- Sys.time();
  3. >
  4. > library("e1071")
  5. > eCm<-cmeans(newdata,15)
  6. > tbl <- table(car.train$TypeName,eCm$cluster)
  7. > tblView(tbl)
  8. >
  9. > ##耗时间
  10. > endTime <- Sys.time()
  11. > difTime <- difftime(endTime,startTime,units = "secs")
  12. > print(paste0("stats::kmeans total time:", difTime))
  13. [1] "stats::kmeans total time:8.7237401008606"

[1] "stats::kmeans total time:8.7237401008606"   result view:


