rm(list=ls())
gc() memory.limit(4000)
library(corrplot)
library(rpart)
data_health<-read.csv("D:/smart_data0608/smart_data_section_good_15.txt",header=FALSE,sep="\t",na.strings="None")#读健康数据
data_fault<-read.csv("D:/smart_data0608/smart_data_section_failTrainSet_last24h.txt",header=FALSE,sep="\t",na.strings="None")#读故障数据-训练数据
data_fault_test<-read.csv("D:/smart_data0608/smart_data_section_failTestSet_last24h.txt",header=FALSE,sep="\t",na.strings="None")#读故障数据—测试数据 colnames(data_health) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 colnames(data_fault) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 colnames(data_fault_test) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count") #列改名 data_health$label <- 0
data_fault$label <- 1
data_fault_test$label <- 1 #决策树
n <- nrow(data_fault)
dataNewTraining<-rbind(data_fault,data_health[sample(1:(nrow(data_health[1:(nrow(data_health)*0.7),])),n*20),])
dataNewTest<-rbind(data_fault_test,data_health[-(1:(nrow(data_health)*0.7)),]) pdf(file='D:/smart_data0608/smartDT_last24h.pdf',family="GB1")
dt <- rpart(label~ current_drive_temperature + elements_in_grown_defect_list + read_corrected_ecc_fast + read_corrected_ecc_delayed + read_corrected_re + read_total_errors_corrected + read_correction_algo_invocations + read_gigabytes_processed + read_total_uncorrected_errors + write_corrected_ecc_fast + write_corrected_ecc_delayed + write_corrected_re + write_total_errors_corrected + write_correction_algo_invocations + write_gigabytes_processed + write_total_uncorrected_errors,data = dataNewTraining, method = "class")
plot(dt,main="smartDT");text(dt)
dev.off() rawPredictScore = predict(dt,dataNewTest)
predictScore <- data.frame(rawPredictScore)
predictScore$label <- 2
predictScore[predictScore$X0 > predictScore$X1,][,"label"]=0
predictScore[predictScore$X0 <= predictScore$X1,][,"label"]=1 write.table(data.frame(predictScore$label,dataNewTest$label,dataNewTest$update_time,dataNewTest$serial_number), file="D:/smart_data0608/smartTestSetWithSerNO_last24h.txt",row.names= F ,col.names= F ,sep="\t")

  

分类结果:

//smartTestSetWithSerNO_last24h
健康样本数/健康判为故障样本数:583670/978
健康磁盘数/健康判为故障磁盘数:4150/12
健康样本预测率为:0.9983243956345195
健康盘预测率为:0.9971084337349397
--------------------------------
故障样本数/故障判为故障样本数:170/169
故障磁盘数/故障判为故障磁盘数:11/11
故障样本预测率为:0.9941176470588236
故障盘预测率为:1.0

R语言决策树分类模型的更多相关文章

  1. R语言︱LDA主题模型——最优主题...

    R语言︱LDA主题模型——最优主题...:https://blog.csdn.net/sinat_26917383/article/details/51547298#comments

  2. 基于R语言的ARIMA模型

    A IMA模型是一种著名的时间序列预测方法,主要是指将非平稳时间序列转化为平稳时间序列,然后将因变量仅对它的滞后值以及随机误差项的现值和滞后值进行回归所建立的模型.ARIMA模型根据原序列是否平稳以及 ...

  3. R语言︱决策树族——随机森林算法

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:有一篇<有监督学习选择深度学习 ...

  4. R语言与分类算法的绩效评估(转)

    关于分类算法我们之前也讨论过了KNN.决策树.naivebayes.SVM.ANN.logistic回归.关于这么多的分类算法,我们自然需要考虑谁的表现更加的优秀. 既然要对分类算法进行评价,那么我们 ...

  5. R语言︱LDA主题模型——最优主题数选取(topicmodels)+LDAvis可视化(lda+LDAvis)

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:在自己学LDA主题模型时候,发现该模 ...

  6. Spark 决策树--分类模型

    package Spark_MLlib import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{D ...

  7. R语言的ARIMA模型预测

    R通过RODBC连接数据库 stats包中的st函数建立时间序列 funitRoot包中的unitrootTest函数检验单位根 forecast包中的函数进行预测 差分用timeSeries包中di ...

  8. Redhat 5.8系统安装R语言作Arima模型预测

    请见Github博客:http://wuxichen.github.io/Myblog/timeseries/2014/09/02/RJavaonLinux.html

  9. 不知道怎么改的尴尬R语言的ARIMA模型预测

    数据还有很多没弄好,程序还没弄完全好. > read.xlsx("H:/ProjectPaper/论文/1.xlsx","Sheet1") > it ...

随机推荐

  1. spring mvc如何获取问号后的url参数

    @RequestMapping(method=RequestMethod.GET) public ModelAndView allUsers(@RequestParam int page){ Mode ...

  2. git 安装或者更新

    1. 安装编译git时需要的包 # yum install curl-devel expat-devel gettext-devel openssl-devel zlib-devel # yum in ...

  3. getWritableDatabase()与getReadableDatabase()方法

    一旦在程序中得到了SQLiteOpenHelper对象之后,程序无须使用SQLiteDatabase的静态方法创建SQLiteDatabase实例,而且可以使用getWritableDatabase( ...

  4. 一个QQ木马的逆向分析浅谈(附带源码)

    程序流程:首先注册自己程序的窗口以及类等一系列窗口操作,安装了一个定时器,间隔为100ms,功能搜索QQ的类名,如果找到就利用FindWindow("5B3838F5-0C81-46D9-A ...

  5. NOIP 2001解题报告

    第一题:  有形如:ax3+bx2+cx+d=0  这样的一个一元三次方程.给出该方程中各项的系数(a,b,c,d  均为实数),并约定该方程存在三个不同实根(根的范围在-100至100之间),且根与 ...

  6. BindingNavigator操作DatagridView的数据

    参考 http://wenku.baidu.com/link?url=NWfEfArPZvDO_aI-xEKBHVGoZY9wQO_Oty_GCsGLiPspheCzFYLf_dytuWAqN2_0A ...

  7. ubuntu 14.04 上安装有道词典

    Ubuntu 14.04用户在安装前要更新系统,即update&dist-upgrade. 下载地址:32/64bits http://codown.youdao.com/cidian/lin ...

  8. centos=>gsutil,iptables

    sudo apt-get remove --purge gsutil sudo easy_install -U pip  sudo pip2 install gsutil gsutil ls gs:/ ...

  9. struts2 类型转换

    概述 从一个 HTML 表单到一个 Action 对象, 类型转换是从字符串到非字符串. 在 struts2 中, 把请求参数映射到 action  属性的工作由 Parameters 拦截器负责, ...

  10. 面试题目-atof与ftoa

    /////////////////////////////////////////////////////////////////////////////// // // FileName : ato ...