
  1. 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 4.0900 1 296.0 15.30 396.90 4.98 24.00
  2. 0.02731 0.00 7.070 0 0.4690 6.4210 78.90 4.9671 2 242.0 17.80 396.90 9.14 21.60
  3. 0.02729 0.00 7.070 0 0.4690 7.1850 61.10 4.9671 2 242.0 17.80 392.83 4.03 34.70
  4. 0.03237 0.00 2.180 0 0.4580 6.9980 45.80 6.0622 3 222.0 18.70 394.63 2.94 33.40
  5. 0.06905 0.00 2.180 0 0.4580 7.1470 54.20 6.0622 3 222.0 18.70 396.90 5.33 36.20
  6. 0.02985 0.00 2.180 0 0.4580 6.4300 58.70 6.0622 3 222.0 18.70 394.12 5.21 28.70
  7. 0.08829 12.50 7.870 0 0.5240 6.0120 66.60 5.5605 5 311.0 15.20 395.60 12.43 22.90
  8. 0.14455 12.50 7.870 0 0.5240 6.1720 96.10 5.9505 5 311.0 15.20 396.90 19.15 27.10
  9. 0.21124 12.50 7.870 0 0.5240 5.6310 100.00 6.0821 5 311.0 15.20 386.63 29.93 16.50
  10. 0.17004 12.50 7.870 0 0.5240 6.0040 85.90 6.5921 5 311.0 15.20 386.71 17.10 18.90
  11. 0.22489 12.50 7.870 0 0.5240 6.3770 94.30 6.3467 5 311.0 15.20 392.52 20.45 15.00
  12. 0.11747 12.50 7.870 0 0.5240 6.0090 82.90 6.2267 5 311.0 15.20 396.90 13.27 18.90
  13. 0.09378 12.50 7.870 0 0.5240 5.8890 39.00 5.4509 5 311.0 15.20 390.50 15.71 21.70
  14. 0.62976 0.00 8.140 0 0.5380 5.9490 61.80 4.7075 4 307.0 21.00 396.90 8.26 20.40
  15. 0.63796 0.00 8.140 0 0.5380 6.0960 84.50 4.4619 4 307.0 21.00 380.02 10.26 18.20
  16. 0.62739 0.00 8.140 0 0.5380 5.8340 56.50 4.4986 4 307.0 21.00 395.62 8.47 19.90
  17. 1.05393 0.00 8.140 0 0.5380 5.9350 29.30 4.4986 4 307.0 21.00 386.85 6.58 23.10


  1. # k-Nearest Neighbor
  2. #----------------------------------
  3. #
  4. # This function illustrates how to use
  5. # k-nearest neighbors in tensorflow
  6. #
  7. # We will use the 1970s Boston housing dataset
  8. # which is available through the UCI
  9. # ML data repository.
  10. #
  11. # Data:
  12. #----------x-values-----------
  13. # CRIM : per capita crime rate by town
  14. # ZN : prop. of res. land zones
  15. # INDUS : prop. of non-retail business acres
  16. # CHAS : Charles river dummy variable
  17. # NOX : nitrix oxides concentration / 10 M
  18. # RM : Avg. # of rooms per building
  19. # AGE : prop. of buildings built prior to 1940
  20. # DIS : Weighted distances to employment centers
  21. # RAD : Index of radian highway access
  22. # TAX : Full tax rate value per $10k
  23. # PTRATIO: Pupil/Teacher ratio by town
  24. # B : 1000*(Bk-0.63)^2, Bk=prop. of blacks
  25. # LSTAT : % lower status of pop
  26. #------------y-value-----------
  27. # MEDV : Median Value of homes in $1,000's
  29. import matplotlib.pyplot as plt
  30. import numpy as np
  31. import tensorflow as tf
  32. import requests
  33. from tensorflow.python.framework import ops
  34. ops.reset_default_graph()
  36. # Create graph
  37. sess = tf.Session()
  39. # Load the data
  40. housing_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
  41. housing_header = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
  42. cols_used = ['CRIM', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
  43. num_features = len(cols_used)
  44. housing_file = requests.get(housing_url)
  45. housing_data = [[float(x) for x in y.split(' ') if len(x)>=1] for y in housing_file.text.split('\n') if len(y)>=1]
  47. y_vals = np.transpose([np.array([y[13] for y in housing_data])])
  48. x_vals = np.array([[x for i,x in enumerate(y) if housing_header[i] in cols_used] for y in housing_data])
  50. ## Min-Max Scaling
  51. x_vals = (x_vals - x_vals.min(0)) / x_vals.ptp(0)
  53. # Split the data into train and test sets
  54. np.random.seed(13) #make results reproducible
  55. train_indices = np.random.choice(len(x_vals), round(len(x_vals)*0.8), replace=False)
  56. test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices)))
  57. x_vals_train = x_vals[train_indices]
  58. x_vals_test = x_vals[test_indices]
  59. y_vals_train = y_vals[train_indices]
  60. y_vals_test = y_vals[test_indices]
  62. # Declare k-value and batch size
  63. k = 4
  64. batch_size=len(x_vals_test)
  66. # Placeholders
  67. x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
  68. x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
  69. y_target_train = tf.placeholder(shape=[None, 1], dtype=tf.float32)
  70. y_target_test = tf.placeholder(shape=[None, 1], dtype=tf.float32)
  72. # Declare distance metric
  73. # L1
  74. distance = tf.reduce_sum(tf.abs(tf.subtract(x_data_train, tf.expand_dims(x_data_test,1))), axis=2)
  76. # L2
  77. #distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(x_data_train, tf.expand_dims(x_data_test,1))), reduction_indices=1))
  79. # Predict: Get min distance index (Nearest neighbor)
  80. #prediction = tf.arg_min(distance, 0)
  81. top_k_xvals, top_k_indices = tf.nn.top_k(tf.negative(distance), k=k)
  82. x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1),1)
  83. x_sums_repeated = tf.matmul(x_sums,tf.ones([1, k], tf.float32))
  84. x_val_weights = tf.expand_dims(tf.div(top_k_xvals,x_sums_repeated), 1)
  86. top_k_yvals = tf.gather(y_target_train, top_k_indices)
  87. prediction = tf.squeeze(tf.matmul(x_val_weights,top_k_yvals), axis=[1])
  89. # Calculate MSE
  90. mse = tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction, y_target_test))), batch_size)
  92. # Calculate how many loops over training data
  93. num_loops = int(np.ceil(len(x_vals_test)/batch_size))
  95. for i in range(num_loops):
  96. min_index = i*batch_size
  97. max_index = min((i+1)*batch_size,len(x_vals_train))
  98. x_batch = x_vals_test[min_index:max_index]
  99. y_batch = y_vals_test[min_index:max_index]
  100. predictions = sess.run(prediction, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch,
  101. y_target_train: y_vals_train, y_target_test: y_batch})
  102. batch_mse = sess.run(mse, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch,
  103. y_target_train: y_vals_train, y_target_test: y_batch})
  105. print('Batch #' + str(i+1) + ' MSE: ' + str(np.round(batch_mse,3)))
  107. # Plot prediction and actual distribution
  108. bins = np.linspace(5, 50, 45)
  110. plt.hist(predictions, bins, alpha=0.5, label='Prediction')
  111. plt.hist(y_batch, bins, alpha=0.5, label='Actual')
  112. plt.title('Histogram of Predicted and Actual Values')
  113. plt.xlabel('Med Home Value in $1,000s')
  114. plt.ylabel('Frequency')
  115. plt.legend(loc='upper right')
  116. plt.show()

