# coding: utf-8

# In[2]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

# In[1]:

import pandas as pd
import numpy as np

# In[151]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\auto_data\5mins_data_2.csv")
data.head()

# In[152]:

data=data.iloc[:,1:]
data.drop(['ooc','oos'],axis=1,inplace=True)
data.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
pivoted = data.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)

# In[153]:

final

# In[154]:

#normalize
final=final.drop(columns=["eqpid","chamber","lotid","wafer","recipe"])
final= final.dropna(axis=0, how='any')

# In[155]:

final

# In[156]:

final.iloc[:,8:10]=nz.transform(final.iloc[:,8:10])
final

# In[157]:

final.iloc[:,0:3]=nz.transform(final.iloc[:,0:3])

# In[158]:

final

# In[20]:

#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final

# In[166]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03.csv")
data.head()

# In[167]:

data=data.drop(columns=['ooc','oos','RNK'])
pivoted = data.pivot_table(index=['eqpid','Chamber','lotid','slotid','stage','Recipie_Name','finishtime'],columns="Param_Name",values="data1",aggfunc=np.sum)

# In[171]:

pivoted

# In[173]:

# In[177]:

import pandas as pd
import numpy as np
pivoted.reset_index(inplace=True)
columns=["eqpid","Chamber","lotid","slotid","Recipie_Name","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
final= final.dropna(axis=0, how='any')

# In[180]:

index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
final=final.drop(columns=["eqpid","Chamber","lotid","slotid","Recipie_Name"])
final.head()

# In[181]:

final.iloc[:,8:10]=nz.transform(final.iloc[:,8:10])
final.iloc[:,0:3]=nz.transform(final.iloc[:,0:3])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)

# In[182]:

final.head()

# In[185]:

from sklearn.externals import joblib
random_forest=joblib.load(r'D:\Users\sgg91044\Desktop\deployment\model_RF.pkl')

# In[186]:

final_pred = random_forest.predict_proba(final)

# In[194]:

final_pred
final_pred=pd.DataFrame(final_pred)

# In[195]:

index.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03index.csv')
final.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03data.csv')
final_pred.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03result.csv')

# In[196]:

index.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03index1.csv')

# In[217]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov.csv')

# In[218]:

data.head()

# In[219]:

data=data.drop(columns=["layer_id","ooc","oos"])
pivoted = data.pivot_table(index=['eqpid','Chamber','lotid','slotid','defect_count','stage','Recipie_Name','finishtime'],columns="Param_Name",values="data1",aggfunc=np.sum)

# In[220]:

pivoted

# In[226]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_pivoted.csv')

# In[227]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_pivoted.csv')
data.head()

# In[228]:

Index=data.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE","Target"])
Index.head()

# In[229]:

data=data.drop(columns=["eqpid","Chamber","lotid","slotid","defect_count","stage","Recipie_Name","finishtime"])
data.head()

# In[232]:

good_wafer= data[data.Target ==0]
good_wafer

# In[233]:

good_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good.csv')

# In[234]:

good_wafer= pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good.csv')
good_wafer.head()
good_wafer=good_wafer.drop(columns=["eqpid","Chamber","lotid","slotid","defect_count","stage","Recipie_Name","finishtime"])

# In[240]:

for i in range(0,16):
med = np.median(good_wafer.iloc[:,i][good_wafer.iloc[:,i].isna() == False])
good_wafer.iloc[:,i] = good_wafer.iloc[:,i].fillna(med)

# In[242]:

good_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good_imputed.csv')

# In[236]:

bad_wafer= data[data.Target ==1]
bad_wafer
bad_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_badd.csv')

# In[213]:

bad_wafer= pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_bad.csv')
bad_wafer.head()
bad_wafer=bad_wafer.drop(columns=["eqpid","Chamber","lotid","slotid","defect_count","stage","Recipie_Name","finishtime"])

# In[237]:

for i in range(0,15):
med = np.median(bad_wafer.iloc[:,i][bad_wafer.iloc[:,i].isna() == False])
bad_wafer.iloc[:,i] = bad_wafer.iloc[:,i].fillna(med)

# In[238]:

bad_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_bad_imputed.csv')

# In[259]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed.csv')

# In[260]:

data.head()

# In[261]:

data=data.drop(columns=['lotid','defect_count','stage','Recipie_Name','finishtime'])
data.head()

# In[262]:

data.eqpid = data.eqpid.astype("category")
data.Chamber = data.Chamber.astype("category")
le = LabelEncoder()
data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,2])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,1])

# In[263]:

Trace_back = pd.concat([data[["eqpid","Chamber","slotid"]],data[["eqp_encoded","chamber_encoded","slot_encoded"]]],axis=1)
data.head(20)

# In[264]:

data.drop(columns=['eqpid','Chamber','slotid'],inplace=True)
data.head()

# In[265]:

data.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed_edcoded.csv')

# In[266]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed_edcoded.csv')

# In[267]:

data.head()

# In[268]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

# In[277]:

data1=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good_imputed.csv')

# In[278]:

data1.head(20)

# In[279]:

data1.iloc[:,11:13]=nz.transform(data1.iloc[:,11:13])
data1.iloc[:,3:6]=nz.transform(data1.iloc[:,3:6])

# In[280]:

data1["SUM_ETCM"]=np.array(data1.ETCM_PHA4)+np.array(data1.ETCM_PHB4)+np.array(data1.ETCM_PHC4)

# In[281]:

data1["eqp_encoded"] = le.transform(data1.iloc[:,0])
data1["slot_encoded"] = le.transform(data1.iloc[:,2])
data1['chamber_encoded'] = le.transform(data1.iloc[:,1])

# In[276]:

data1.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed_encoded.csv')

# In[3]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1.csv')
data.head()

# In[4]:

data=data.drop(columns=["step",'parametername'])
data.head()

# In[5]:

pivoted = data.pivot_table(index=['eqpid','chamber','lotid','slotid','stage','recipe','finishtime'],columns="Param_Name",values="data1",aggfunc=np.sum)

# In[6]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1_pivoted.csv')

# In[30]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1_pivoted.csv')
data

# In[31]:

data=data.drop(columns=["stage","finishtime","recipe",'HELK_MAX.'])
data.head()

# In[32]:

for i in range(4,19):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)

# In[33]:

data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,12:14]=nz.transform(data.iloc[:,12:14])
data.iloc[:,4:7]=nz.transform(data.iloc[:,4:7])

# In[34]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.chamber = data.chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.chamber = data.chamber.astype("category")
data.slotid = data.slotid.astype("category")

# In[35]:

data=data.drop(columns=["lotid"])

# In[36]:

data

# In[107]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc

# In[167]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\model_data_1.csv')

# In[168]:

data.eqpid1 = data.eqpid1.astype("category")
data.chamber1 = data.chamber1.astype("category")
data.wafer1 = data.wafer1.astype("category")

# In[169]:

data.Target = data.Target.astype("category")
y = data.Target
X = data.drop(columns='Target')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=8)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, oob_score = True, verbose = 1, n_jobs = -1)
random_forest.fit(x_train_smote,y_train_smote)
y_pred = random_forest.predict(X_test)
#print(classification_report(y_pred=y_pred,y_true=y_test))

# In[170]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[135]:

from sklearn.externals import joblib

joblib.dump(random_forest, r'D:\Users\sgg91044\Desktop\deployment\model_RF_test.pkl')

# In[136]:

from sklearn.externals import joblib
random_forest=joblib.load(r'D:\Users\sgg91044\Desktop\deployment\model_RF_test.pkl')

# In[221]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\OK\test_AD_ok_pivoted_15.csv')

# In[222]:

data.head()

# In[223]:

lotid=data.lotid
data=data.drop(columns=['lotid','recipe','finishtime'])

# In[224]:

for i in range(3,18):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)

# In[225]:

data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,11:13]=nz.transform(data.iloc[:,11:13])
data.iloc[:,3:6]=nz.transform(data.iloc[:,3:6])

# In[226]:

data.head()

# In[227]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)

# In[228]:

def encode_eqpid(eqpid1):
return int(eqpid1[-2:])-1

def encode_chamber(chamber1):
if chamber1 == 'A':
return 0
else:
return 1

def encode_slotid(wafer1):
if wafer1 > 0:
return wafer1-1

data.eqpid1 = data.eqpid1.apply(encode_eqpid)
data.chamber1 = data.chamber1.apply(encode_chamber)
data.wafer1 = data.wafer1.apply(encode_slotid)
data.eqpid1 = data.eqpid1.astype("category")
data.chamber1 = data.chamber1.astype("category")
data.wafer1 = data.wafer1.astype("category")

# In[ ]:

data.to_csv()

# In[229]:

y_pred = random_forest.predict(data)

# In[230]:

y_pred=pd.DataFrame(y_pred)

# In[219]:

y_pred.to_csv(r'D:\Users\sgg91044\Desktop\y_pred_ok.csv')

# In[220]:

lotid.to_csv(r'D:\Users\sgg91044\Desktop\lotid_ok.csv')

# In[37]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)

# In[38]:

data.head()

# In[43]:

data.to_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1_test.csv')

# In[137]:

data1=pd.read_csv(r'D:\Users\sgg91044\Desktop\AEM2_pivotdata_12-13-10-29.csv')
data1.head()
data1.eqpid1 = data1.eqpid1.astype("category")
data1.chamber1 = data1.chamber1.astype("category")
data1.wafer1 = data1.wafer1.astype("category")

# In[138]:

y_pred = random_forest.predict(data1)

# In[139]:

print(y_pred)

# In[204]:

data1=pd.read_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\OK\test_AD_ok.csv')
data1.head()

# In[205]:

data1=data1.drop(columns=['waferid','Step'])
pivoted = data1.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe','finishtime'],columns="param_name",values="data",aggfunc=np.sum)

# In[206]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\OK\test_AD_ok_pivoted.csv')

# In[152]:

data2=pd.read_csv(r'D:\Users\sgg91044\Desktop\all_parameters.csv')
data2.head()

# In[153]:

data2=data2.drop(columns=['layerid','waferid','stg','stage','step','parametername','ooc','oos','RNK'])
pivoted = data2.pivot_table(index=['eqpid','chamber','lotid','sloitid','defect','recipe','finishtime'],columns="param_name",values="data",aggfunc=np.sum)

# In[154]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\all_parameters_pivoted.csv')

# In[159]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\all_parameters_deleted.csv')

# In[160]:

data.head()

# In[161]:

for i in range(7,61):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
data

# In[162]:

data=data.drop(columns=['lotid','defect','recipe','finishtime'])
data.head()

# In[163]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.chamber = data.chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.chamber = data.chamber.astype("category")
data.slotid = data.slotid.astype("category")

# In[164]:

data.head()

# In[165]:

data.Target = data.Target.astype("category")
y = data.Target
X = data.drop(columns='Target')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=8)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, oob_score = True, verbose = 1, n_jobs = -1)
random_forest.fit(x_train_smote,y_train_smote)
y_pred = random_forest.predict(X_test)
#print(classification_report(y_pred=y_pred,y_true=y_test))

# In[166]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[234]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data_pivoted.csv')
data

# In[236]:

data=data.drop(columns=['layer_id','stage','Recipie_Name','finishtime','defect_count','lotid'])
data.head()

# In[238]:

for i in range(3,18):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,11:13]=nz.transform(data.iloc[:,11:13])
data.iloc[:,3:6]=nz.transform(data.iloc[:,3:6])

# In[239]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(Chamber):
if Chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.Chamber = data.Chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.Chamber = data.Chamber.astype("category")
data.slotid = data.slotid.astype("category")
data.head()

# In[240]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)
data.Target = data.Target.astype("category")
y = data.Target
X = data.drop(columns='Target')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=8)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, oob_score = True, verbose = 1, n_jobs = -1)
random_forest.fit(x_train_smote,y_train_smote)
y_pred = random_forest.predict(X_test)
#print(classification_report(y_pred=y_pred,y_true=y_test))

# In[241]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[242]:

from sklearn.externals import joblib

joblib.dump(random_forest, r'D:\Users\sgg91044\Desktop\deployment\model_RF_Mclass.pkl')

# In[250]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\Defective\test_AD_15.csv')
lotid=data.lotid
data=data.drop(columns=['lotid','recipe','finishtime'])
for i in range(3,18):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,11:13]=nz.transform(data.iloc[:,11:13])
data.iloc[:,3:6]=nz.transform(data.iloc[:,3:6])
data.head()

# In[251]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(Chamber):
if Chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.Chamber = data.Chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.Chamber = data.Chamber.astype("category")
data.slotid = data.slotid.astype("category")
data.head()

# In[252]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)
from sklearn.externals import joblib
random_forest=joblib.load(r'D:\Users\sgg91044\Desktop\deployment\model_RF_Mclass.pkl')

# In[253]:

y_pred = random_forest.predict(data)

# In[254]:

y_pred=pd.DataFrame(y_pred)
y_pred.to_csv(r'D:\Users\sgg91044\Desktop\y_pred_defect.csv')

我的代码-test models的更多相关文章

  1. [Django]models定义choices 字典中的页面显示值

    问题: 在django的models.py 在.我们定义一些choices元组,类别似一些字典值.通常下拉框或单个复选框,例如 0相应的M 1妇女和其他有关 class Area(models.Mod ...

  2. 在线教育平台搭建 预览和models

    一.前言 1.1.项目介绍 在线演示地址:mxonline.mtianyan.cn 开发环境: python:3.6.4 Django:2.0.2 后台管理:xadmin 系统概括: 系统具有完整的用 ...

  3. Django框架----权限组件(具体代码实现)

    1.settings """ Django settings for day80 project. Generated by 'django-admin startpro ...

  4. python测试开发django-11.模型models详解

    前言 Django 模型是与数据库相关的,与数据库相关的代码一般写在 models.py 中,Django 支持 sqlite3, MySQL, PostgreSQL等数据库 只需要在settings ...

  5. 6 Django系列之关于models的sql语句日常用法总结

    preface Django提供了强大的ORM,我们可以通过ORM快速的写出我们想要对数据做什么样操作的代码.下面就说说我在日常工作中的用法: 外键关联精确查询 应用场景:表A host字段关联到了表 ...

  6. Django中models定义的choices字典使用get_FooName_display()在页面中显示值

    问题 在django的models.py 中,我们定义了一些choices的元组,类似一些字典值,一般都是下拉框或者单多选框,例如 0对应男 1对应女等等 看下例子: class Area(model ...

  7. 七:mvc使用CodeFirst(代码优先)创建数据库

    1. 理解EF CodeFirst模式特点 2. 使用CodeFirst模式生成数据库 1. CodeFirst模式(代码优先) Code First是Entity Framework提供的一种新的编 ...

  8. Django]models中定义的choices 字典在页面中显示值

    在django的models.py 中,我们定义了一些choices的元组,类似一些字典值,一般都是下拉框或者单多选框,例如 0对应男 1对应女等 class Area(models.Model): ...

  9. 网站开发学习Python实现-Django的models学习-生鲜项目(6.3.2)

    @ 目录 1.说明 2.模型类的设计 3.代码的具体实现 4.详情地址 关于作者 1.说明 models是django的很重要的部分,所以深入研究. 本文章的所研究项目为黑马教育python课程中的项 ...

随机推荐

  1. oracle 字符串 正则表达式 拆分,排序,合并

    需求,表数据如:要求圈中的数据,必须根据线芯有序排列. 思路: 1.首先根据分号分隔元素.oracle 很蛋疼,没有提供字符串分隔函数,网上倒是多觉得有点麻烦,耐着性子继续网上找了下,还真让我找到一篇 ...

  2. Mysql 了解changeBuffer 与 purge 调优

    需要删除.新增记录或更新一个数据页时,如果数据页在内存中就直接更新,而如果这个数据页还没有在内存中的话,在不影响数据一致性的前提下,InooDB 会将这些更新操作缓存在 change buffer中, ...

  3. xpath 在firefox,chrome中正常,在requests中不正常的解决。

    经多次测试发现: lxml中的etree格式化以后,直接使用firefox或chrome提取的xpath检索不到内容. 主要是因为tbody # 车种xpathczxx = '/html/body/t ...

  4. throw与throws

    throws可以单独使用(一直上抛) throw要么和try-catch-finally语句配套使用,要么与throws配套使用 /** * 总结: *    1.throws是方法抛出异常.如: p ...

  5. LeetCode 15. 3Sum 16. 3Sum Closest 18. 4Sum

    n数求和,固定n-2个数,最后两个数在连续区间内一左一右根据当前求和与目标值比较移动,如果sum<target,移动较小数,否则,移动较大数 重复数处理: 使i为左至右第一个不重复数:while ...

  6. 【js高程学习笔记】Object类型

    创建一组Object的实例的方式有两种: 方法一: var person = new Object(); person.name = '团子'; person.race = '猫'; person.s ...

  7. C#中异步使用及回调

    1. 一句话理解异步 我叫你去吃饭,叫完你不去,那我就会一直等你,直到你和我一起去吃饭.这叫同步! 我叫你去吃饭,叫完不管你去不去,我都不会等你,我自己去吃饭.这叫异步! 2. 异步使用 static ...

  8. windows mysql 和linux mysql解决乱码问题

    windows : 1找到mysql安装目录的my.ini文件 2修改的里面的内容为 character-set-server=utf8 default-character-set=UTF-8 然后打 ...

  9. ubuntu 下redis的安装简介

    Linux公社:https://www.linuxidc.com/topicnews.aspx?page=2&tid=2 简单介绍下ubuntu下redis的安装方式: 第一种: 1:进入re ...

  10. readfile() file_get_content f

    php.ini:memory_limit memory_limit是设置内存限制的,如果使用readfile()读取文件就会和这个有关, 调用readfile()函数将打开这个文件,并且将文件内容输出 ...