# coding: utf-8

# In[6]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc
from imblearn.over_sampling import SMOTE

# In[7]:

data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")

# In[8]:

data.iloc[:,5:22] = data.iloc[:,5:22].apply(pd.to_numeric,errors='coerce')

# In[9]:


# In[10]:

data.Target = data.Target.astype("category")

# In[11]:

Y = data.Target
X = data.drop(columns='Target')

# In[13]:


# In[14]:


# In[64]:


# In[8]:

for i in range(0,18):
med = np.median(X.iloc[:,i][X.iloc[:,i].isna() == False])
X.iloc[:,i] = X.iloc[:,i].fillna(med)

# In[9]:

nz = Normalizer()

# In[15]:

X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)

# In[16]:

sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)

# In[17]:

print(y_train.value_counts(), np.bincount(y_train_smote))

# In[18]:

from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)

# In[19]:

# Train on the training data

# In[20]:


# In[21]:

# Make predictions on the test data
y_pred = random_forest.predict(X_test)

# In[22]:


# In[23]:


# In[24]:


# In[25]:

print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")

# In[26]:

print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")

