rm(list=ls())
gc() memory.limit(4000)
library(corrplot)
library(rpart)
data_health<-read.csv("D:/smart_data0608/smart_data_section_good_15.txt",header=FALSE,sep="\t",na.strings="None")#读健康数据
data_fault<-read.csv("D:/smart_data0608/smart_data_section_failTrainSet_last24h.txt",header=FALSE,sep="\t",na.strings="None")#读故障数据-训练数据
data_fault_test<-read.csv("D:/smart_data0608/smart_data_section_failTestSet_last24h.txt",header=FALSE,sep="\t",na.strings="None")#读故障数据—测试数据 colnames(data_health) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 colnames(data_fault) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 colnames(data_fault_test) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 data_health$label <- 0
data_fault$label <- 1
data_fault_test$label <- 1 #决策树
n <- nrow(data_fault)
dataNewTraining<-rbind(data_fault,data_health[sample(1:(nrow(data_health[1:(nrow(data_health)*0.7),])),n*20),])
dataNewTest<-rbind(data_fault_test,data_health[-(1:(nrow(data_health)*0.7)),]) pdf(file='D:/smart_data0608/smartDT_last24h.pdf',family="GB1")
dt <- rpart(label~ current_drive_temperature + elements_in_grown_defect_list + read_corrected_ecc_fast + read_corrected_ecc_delayed + read_corrected_re + read_total_errors_corrected + read_correction_algo_invocations + read_gigabytes_processed + read_total_uncorrected_errors + write_corrected_ecc_fast + write_corrected_ecc_delayed + write_corrected_re + write_total_errors_corrected + write_correction_algo_invocations + write_gigabytes_processed + write_total_uncorrected_errors,data = dataNewTraining, method = "class")
plot(dt,main="smartDT");text(dt)
dev.off() rawPredictScore = predict(dt,dataNewTest)
predictScore <- data.frame(rawPredictScore)
predictScore$label <- 2
predictScore[predictScore$X0 > predictScore$X1,][,"label"]=0
predictScore[predictScore$X0 <= predictScore$X1,][,"label"]=1 write.table(data.frame(predictScore$label,dataNewTest$label,dataNewTest$update_time,dataNewTest$serial_number), file="D:/smart_data0608/smartTestSetWithSerNO_last24h.txt",row.names= F ,col.names= F ,sep="\t")

  

分类结果:

//smartTestSetWithSerNO_last24h
健康样本数/健康判为故障样本数:583670/978
健康磁盘数/健康判为故障磁盘数:4150/12
健康样本预测率为:0.9983243956345195
健康盘预测率为:0.9971084337349397
--------------------------------
故障样本数/故障判为故障样本数:170/169
故障磁盘数/故障判为故障磁盘数:11/11
故障样本预测率为:0.9941176470588236
故障盘预测率为:1.0

R语言决策树分类模型的更多相关文章

  1. R语言︱LDA主题模型——最优主题...

    R语言︱LDA主题模型——最优主题...:https://blog.csdn.net/sinat_26917383/article/details/51547298#comments

  2. 基于R语言的ARIMA模型

    A IMA模型是一种著名的时间序列预测方法,主要是指将非平稳时间序列转化为平稳时间序列,然后将因变量仅对它的滞后值以及随机误差项的现值和滞后值进行回归所建立的模型.ARIMA模型根据原序列是否平稳以及 ...

  3. R语言︱决策树族——随机森林算法

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:有一篇<有监督学习选择深度学习 ...

  4. R语言与分类算法的绩效评估(转)

    关于分类算法我们之前也讨论过了KNN.决策树.naivebayes.SVM.ANN.logistic回归.关于这么多的分类算法,我们自然需要考虑谁的表现更加的优秀. 既然要对分类算法进行评价,那么我们 ...

  5. R语言︱LDA主题模型——最优主题数选取(topicmodels)+LDAvis可视化(lda+LDAvis)

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:在自己学LDA主题模型时候,发现该模 ...

  6. Spark 决策树--分类模型

    package Spark_MLlib import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{D ...

  7. R语言的ARIMA模型预测

    R通过RODBC连接数据库 stats包中的st函数建立时间序列 funitRoot包中的unitrootTest函数检验单位根 forecast包中的函数进行预测 差分用timeSeries包中di ...

  8. Redhat 5.8系统安装R语言作Arima模型预测

    请见Github博客:http://wuxichen.github.io/Myblog/timeseries/2014/09/02/RJavaonLinux.html

  9. 不知道怎么改的尴尬R语言的ARIMA模型预测

    数据还有很多没弄好,程序还没弄完全好. > read.xlsx("H:/ProjectPaper/论文/1.xlsx","Sheet1") > it ...

随机推荐

  1. Linux学习之CentOS--CentOS6.4下Mysql数据库的安装与配置【转】

      如果要在Linux上做j2ee开发,首先得搭建好j2ee的开发环境,包括了jdk.tomcat.eclipse的安装(这个在之前的一篇随笔中已经有详细讲解了Linux学习之CentOS(七)--C ...

  2. Linux下配置用msmtp和mutt发邮件

    Linux下可以直接用mail命令发送邮件,但是发件人是user@servername,如果机器没有外网的dns,其他人就无法回复.此时,有一个可以使用网络免费邮箱服务的邮件发送程序就比较重要了.ms ...

  3. 用户上网的基本流程图与DNS解析原理

    1.用户上网发送请求,首先确认本地的hosts中是否含有域名,有则进行ip访问,如果没有呢?看本机的display缓存中有没有访问网站的ip,有就直接去访问 那么如果本地的hosts和缓存都没有呢?这 ...

  4. mrg_myIsam分表引擎用法

    CREATE TABLE `test`.`article_0` ( `id` BIGINT( 20 ) NOT NULL , `subject` VARCHAR( 200 ) NOT NULL , ` ...

  5. ASP.NET伪静态 UrlRewrite(Url重写) 实现和配置

    核心提示:大家一定经常在网络上看到很多网站的地址后缀都是用XX.HTML或者XX.ASPX等类似静态文件的标示来操作的吧,那么大家有怀疑过他真的是一个一个的静态生成的文件么,静态文件的生成的优缺有好有 ...

  6. Admob(6.12.x)符号未定义错误的解决方法(IOS)

    在升级Admob的SDK版本到6.12.x时, 按照官方文档操作(https://developers.google.com/mobile-ads-sdk/docs/#ios), 添加如下framew ...

  7. setsockopt的作用

    功能描述:        获取或者设置与某个套接字关联的选 项.选项可能存在于多层协议中,它们总会出现在最上面的套接字层.当操作套接字选项时,选项位于的层和选项的名称必须给出.为了操作套接字层的选项, ...

  8. PowerMock.expectNew(Class<T> type, Class<?>[] parameterTypes, Object... arguments)

    1:PowerMock.expectNew(Class<T> type, Class<?>[] parameterTypes, Object... arguments) 如果你 ...

  9. 给出一个长度为n的数列,请对于每一个数,输出他右边第一个比他大的数。n<=100000.

    RT,一个ppt里看到的题,不过没讲做法.百度上基本搜不到.自己想了个做法,理论上可行,复杂度也是O(nlogn). 首先,做一次RMQ,求区间最大值. 对于任意一个数s[i],可以用logn的时间求 ...

  10. Noip2014 提高组 T2 联合权值 连通图+技巧

    联合权值 描述 无向连通图 G 有 n 个点,n-1 条边.点从 1 到 n 依次编号,编号为 i 的点的权值为 WiWi, 每条边的长度均为 1.图上两点(u, v)的距离定义为 u 点到 v 点的 ...