rm(list=ls())
gc() memory.limit(4000)
library(corrplot)
library(rpart)
data_health<-read.csv("D:/smart_data0608/smart_data_section_good_15.txt",header=FALSE,sep="\t",na.strings="None")#读健康数据
data_fault<-read.csv("D:/smart_data0608/smart_data_section_failTrainSet_last24h.txt",header=FALSE,sep="\t",na.strings="None")#读故障数据-训练数据
data_fault_test<-read.csv("D:/smart_data0608/smart_data_section_failTestSet_last24h.txt",header=FALSE,sep="\t",na.strings="None")#读故障数据—测试数据 colnames(data_health) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 colnames(data_fault) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 colnames(data_fault_test) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 data_health$label <- 0
data_fault$label <- 1
data_fault_test$label <- 1 #决策树
n <- nrow(data_fault)
dataNewTraining<-rbind(data_fault,data_health[sample(1:(nrow(data_health[1:(nrow(data_health)*0.7),])),n*20),])
dataNewTest<-rbind(data_fault_test,data_health[-(1:(nrow(data_health)*0.7)),]) pdf(file='D:/smart_data0608/smartDT_last24h.pdf',family="GB1")
dt <- rpart(label~ current_drive_temperature + elements_in_grown_defect_list + read_corrected_ecc_fast + read_corrected_ecc_delayed + read_corrected_re + read_total_errors_corrected + read_correction_algo_invocations + read_gigabytes_processed + read_total_uncorrected_errors + write_corrected_ecc_fast + write_corrected_ecc_delayed + write_corrected_re + write_total_errors_corrected + write_correction_algo_invocations + write_gigabytes_processed + write_total_uncorrected_errors,data = dataNewTraining, method = "class")
plot(dt,main="smartDT");text(dt)
dev.off() rawPredictScore = predict(dt,dataNewTest)
predictScore <- data.frame(rawPredictScore)
predictScore$label <- 2
predictScore[predictScore$X0 > predictScore$X1,][,"label"]=0
predictScore[predictScore$X0 <= predictScore$X1,][,"label"]=1 write.table(data.frame(predictScore$label,dataNewTest$label,dataNewTest$update_time,dataNewTest$serial_number), file="D:/smart_data0608/smartTestSetWithSerNO_last24h.txt",row.names= F ,col.names= F ,sep="\t")

  

分类结果:

//smartTestSetWithSerNO_last24h
健康样本数/健康判为故障样本数:583670/978
健康磁盘数/健康判为故障磁盘数:4150/12
健康样本预测率为:0.9983243956345195
健康盘预测率为:0.9971084337349397
--------------------------------
故障样本数/故障判为故障样本数:170/169
故障磁盘数/故障判为故障磁盘数:11/11
故障样本预测率为:0.9941176470588236
故障盘预测率为:1.0

R语言决策树分类模型的更多相关文章

  1. R语言︱LDA主题模型——最优主题...

    R语言︱LDA主题模型——最优主题...:https://blog.csdn.net/sinat_26917383/article/details/51547298#comments

  2. 基于R语言的ARIMA模型

    A IMA模型是一种著名的时间序列预测方法,主要是指将非平稳时间序列转化为平稳时间序列,然后将因变量仅对它的滞后值以及随机误差项的现值和滞后值进行回归所建立的模型.ARIMA模型根据原序列是否平稳以及 ...

  3. R语言︱决策树族——随机森林算法

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:有一篇<有监督学习选择深度学习 ...

  4. R语言与分类算法的绩效评估(转)

    关于分类算法我们之前也讨论过了KNN.决策树.naivebayes.SVM.ANN.logistic回归.关于这么多的分类算法,我们自然需要考虑谁的表现更加的优秀. 既然要对分类算法进行评价,那么我们 ...

  5. R语言︱LDA主题模型——最优主题数选取(topicmodels)+LDAvis可视化(lda+LDAvis)

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:在自己学LDA主题模型时候,发现该模 ...

  6. Spark 决策树--分类模型

    package Spark_MLlib import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{D ...

  7. R语言的ARIMA模型预测

    R通过RODBC连接数据库 stats包中的st函数建立时间序列 funitRoot包中的unitrootTest函数检验单位根 forecast包中的函数进行预测 差分用timeSeries包中di ...

  8. Redhat 5.8系统安装R语言作Arima模型预测

    请见Github博客:http://wuxichen.github.io/Myblog/timeseries/2014/09/02/RJavaonLinux.html

  9. 不知道怎么改的尴尬R语言的ARIMA模型预测

    数据还有很多没弄好,程序还没弄完全好. > read.xlsx("H:/ProjectPaper/论文/1.xlsx","Sheet1") > it ...

随机推荐

  1. 常用HTML转义字符,

    HTML字符实体(Character Entities),转义字符串(Escape Sequence) 为什么要用转义字符串? HTML中<,>,&等有特殊含义(<,> ...

  2. MySQL使用随笔

    001 查看版本 mysql --version mysql > select version(); mysql > status; 002 新建MySQL用户.授权 insert int ...

  3. 文本信息“welcome to java programming!”

    import javax.swing.JOptionPanepublic class welcome {public static void main(string[] arg){JOptionPan ...

  4. ZOJ 3644 Kitty's Game dfs,记忆化搜索,map映射 难度:2

    http://acm.zju.edu.cn/onlinejudge/showProblem.do?problemId=4834 从点1出发,假设现在在i,点数为sta,则下一步的点数必然不能是sta的 ...

  5. TOP 10开源的推荐系统简介

    最近这两年推荐系统特别火,本文搜集整理了一些比较好的开源推荐系统,即有轻量级的适用于做研究的SVDFeature.LibMF.LibFM等,也有重量级的适用于工业系统的 Mahout.Oryx.Eas ...

  6. struts2 ModelDriven 和 Preparable 拦截器

    Struts2 运行流程图-1

  7. iOS9/iOS8界面对比 XCode7

    Xcde7 bate 无需开发这账号(99¥)可以调试程序 目前是测试版 iOS9/iOS8界面对比 (注:左边为iOS8界面,右边为iOS9界面.) 1.新字体 苹果在 iOS9 中使用旧金山字体取 ...

  8. DropMaster

    DropMaster 是4个原生 VCL 控件的集合,在 Delphi 和 C++Builder 中使用.虽然包含在 Delphi 和 C++Builder 中的 VCL 组件允许同一程序内窗口之间的 ...

  9. navtab方法参数以及事件

    参数(options) DOM方式初始化navtab的,推荐使用集合属性data-options定义参数,如果使用data属性定义参数,注意转换成对应的名称. 名称 类型 默认值 描述 id stri ...

  10. poj2777 线段树

    //Accepted 4768 KB 391 ms //线段树,延时标记的应用 //对于每一段,用一个int表示被着色的情况,change标记该段的颜色是否发生整体的改变,即这一段 //用没用被全部涂 ...