#对银行客户是否放贷进行分类 import pandas
import numpy
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score data = pandas.read_csv("datas.csv")
data = data.dropna() # Randomly shuffle our data for the training and test set
admissions = data.loc[numpy.random.permutation(data.index)] # train with 700 and test with the following 300, split dataset
num_train = 14968
data_train = admissions[:num_train]
data_test = admissions[num_train:] # Fit Logistic regression to admit with features using the training set
logistic_model = LogisticRegression()
'LoanUse','Company','Salary']], data_train['Label']) # Print the Models Coefficients
print(logistic_model.coef_) # .predict() using a threshold of 0.50 by default
predicted = logistic_model.predict(data_train[['Age','Gender','AppAmount','Occupation',
'LoanUse','Company','Salary']]) # The average of the binary array will give us the accuracy
accuracy_train = (predicted == data_train['Label']).mean() # Print the accuracy
print("Accuracy in Training Set = {s}".format(s=accuracy_train)) # Predicted to be admitted
predicted = logistic_model.predict(data_test[['Age','Gender','AppAmount','Occupation',
'LoanUse','Company','Salary']]) # What proportion of our predictions were true
accuracy_test = (predicted == data_test['Label']).mean()
print("Accuracy in Test Set = {s}".format(s=accuracy_test)) # Predict the chance of label from those in the training set
train_probs = logistic_model.predict_proba(data_train[['Age','Gender','AppAmount','Occupation',
'LoanUse','Company','Salary']])[:,1] test_probs = logistic_model.predict_proba(data_test[['Age','Gender','AppAmount','Occupation',
'LoanUse','Company','Salary']])[:,1] # Compute auc for training set
auc_train = roc_auc_score(data_train["Label"], train_probs) # Compute auc for test set
auc_test = roc_auc_score(data_test["Label"], test_probs) # Difference in auc values
auc_diff = auc_train - auc_test # Compute ROC Curves
roc_train = roc_curve(data_train["Label"], train_probs)
roc_test = roc_curve(data_test["Label"], test_probs) # Plot false positives by true positives
plt.plot(roc_train[0], roc_train[1])
plt.plot(roc_test[0], roc_test[1])

