
import numpy as np
import tflearn
from tflearn.layers.core import dropout
from tflearn.layers.normalization import batch_normalization
from tflearn.data_utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import sys class EarlyStoppingCallback(tflearn.callbacks.Callback):
def __init__(self, val_acc_thresh):
""" Note: We are free to define our init function however we please. """
# Store a validation accuracy threshold, which we can compare against
# the current validation accuracy at, say, each epoch, each batch step, etc.
self.val_acc_thresh = val_acc_thresh def on_epoch_end(self, training_state):
This is the final method called in trainer.py in the epoch loop.
We can stop training and leave without losing any information with a simple exception.
#print dir(training_state)
print("Terminating training at the end of epoch", training_state.epoch)
if training_state.val_acc >= self.val_acc_thresh and training_state.acc_value >= self.val_acc_thresh:
raise StopIteration def on_train_end(self, training_state):
Furthermore, tflearn will then immediately call this method after we terminate training,
(or when training ends regardless). This would be a good time to store any additional
information that tflearn doesn't store already.
print("Successfully left training! Final model accuracy:", training_state.acc_value)

cols = ["label", "flow_cnt", "len(srcip_arr)", "len(dstip_arr)", "subdomain_num", "uniq_subdomain_ratio", "np.average(dns_request_len_arr)", "np.average(dns_reply_len_arr)", "np.average(subdomain_tag_num_arr)", "np.average(subdomain_len_arr)", "np.average(subdomain_weird_len_arr)", "np.average(subdomain_entropy_arr)", "A_rr_type_ratio", "incommon_rr_type_rato", "valid_ipv4_ratio", "uniq_valid_ipv4_ratio", "request_reply_ratio", "np.max(dns_request_len_arr)", "np.max(dns_reply_len_arr)", "np.max(subdomain_tag_num_arr)", "np.max(subdomain_len_arr)", "np.max(subdomain_weird_len_arr)", "np.max(subdomain_entropy_arr)", "avg_distance", "std_distance"]
#unwanted_cols = set(["uniq_subdomain_ratio", "incommon_rr_type_rato"])
unwanted_cols = set(["uniq_subdomain_ratio", "incommon_rr_type_rato", "np.max(dns_reply_len_arr)", "request_reply_ratio", "uniq_valid_ipv4_ratio", "A_rr_type_ratio"])
wanted_cols = set(['label', 'flow_cnt', 'len(srcip_arr)', 'len(dstip_arr)',
'np.average(dns_request_len_arr)', 'np.average(dns_reply_len_arr)',
'request_reply_ratio', 'np.max(dns_request_len_arr)',
'np.max(dns_reply_len_arr)']) def parse_line(s):
s = s.replace("(", "").replace(")", "").replace("[", "").replace("]", "")
#dat = [float(_) for i,_ in enumerate(s.split(",")) if cols[i] not in unwanted_cols]
dat = [float(_) for i,_ in enumerate(s.split(",")) if cols[i] in wanted_cols]
return dat if __name__ == "__main__":
training_data = []
with open("feature_with_dnn_todo.dat") as f:
training_data = [parse_line(line) for line in f] #sys.exit(0) X = training_data
org_labels = [1 if int(x[0])==2.0 else 0 for x in X]
labels = to_categorical(org_labels, nb_classes=2)
data = [x[1:] for x in X]
input_dim = len(data[0]) X = data
Y = labels print "X len:", len(X), "Y len:", len(Y)
trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2, random_state=42)
print trainX[0]
print trainY[0]
print testX[-1]
print testY[-1] # Build neural network
net = tflearn.input_data(shape=[None, input_dim])
net = batch_normalization(net)
net = tflearn.fully_connected(net, input_dim)
net = tflearn.fully_connected(net, 128, activation='tanh')
net = dropout(net, 0.5)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy', name='target')
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
# Initialize our callback with desired accuracy threshold.
early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.998)
model.fit(trainX, trainY, validation_set=(testX, testY), n_epoch=500, batch_size=8, show_metric=True, callbacks=early_stopping_cb)
except StopIteration as e:
print "pass"
filename = 'tf_model/dns_tunnel2_998.tflearn'
model.load(filename) y_predict_list = model.predict(X)
y_predict = []
for i in y_predict_list:
#print i[0]
if i[0] >= 0.5:
y_predict.append(1) print(classification_report(org_labels, y_predict))
print confusion_matrix(org_labels, y_predict)


('Terminating training at the end of epoch', 175)
Training Step: 309936  | total loss: 0.00695 | time: 4.371s
| Adam | epoch: 176 | loss: 0.00695 - acc: 0.9988 | val_loss: 0.00661 - val_acc: 0.9991 -- iter: 14084/14084
('Terminating training at the end of epoch', 176)
('Successfully left training! Final model accuracy:', 0.9987633228302002)
             precision    recall  f1-score   support

0       1.00      1.00      1.00     16529
          1       0.97      0.99      0.98      1076

avg / total       1.00      1.00      1.00     17605

[[16497    32]
 [    8  1068]]



深度学习 dns tunnel检测 使用统计特征 全连接网络——精度99.8%的更多相关文章

