Logistic Regression

特别需要注意的是 exp 和 log 的使用。

sigmoid 原始表达式为 1 / (1+exp(-z)),但如果直接使用 z=-710,会显示 overflow。因此对于 z<0 的情况,采用 exp(z) / (1 + exp(z)) ,这样一来,exp(-710) 就没问题了。这就是 scipy 包里的 expit 函数

log_logistic = log(sigmoid),注意和 expit 函数是一致的,分情况讨论。

 import numpy as np
from scipy.special import expit
from sklearn.utils.extmath import log_logistic def predict(theta, x):
return expit(x.dot(theta)) def compute_loss(y, yz):
return - np.sum(log_logistic(yz)) def gradientdescent(x, y, theta, iterations=2000, lr=0.01):
loss_list = []
for i in range(iterations):
yhat = predict(theta, x)
delta = x.T.dot(yhat - y) / m
loss = compute_loss(y, y * X.dot(theta))
loss_list.append(loss)
theta = theta - lr * delta
return theta, loss_list theta, loss_list = gradientdescent(X, y, np.zeros((n, 1)))

Kmeans

Kmeans 的本质就是 EM 算法,只不过是硬间隔而不是软间隔。首先初始化 K 个中心点,在 E 步,将样本分配到最近的中心点,在 M 步,选取新的中心点以最小化组内距离。

 import numpy as np

 def calc_dist(x1, x2):
return sum([(x1[i] - x2[i])**2 for i in range(len(x1))]) # Assign samples to given centers
def E_step(X, cents):
cent_dict = dict(zip(cents, [[] for _ in range(len(cents))]))
for row in X:
min_dist, best_cent = 1e10, None
for cent in cent_dict:
dist = calc_dist(row, cent)
if dist < min_dist:
min_dist = dist
best_cent = cent
cent_dict[best_cent] += [row.tolist()]
return cent_dict # Compute new centers
def M_step(cent_dict):
new_cents = []
for cent in cent_dict:
new_cent = np.mean(np.array(cent_dict[cent]), axis=0)
new_cents.append(tuple(new_cent))
return new_cents def Kmeans(X, K=3, max_iter=10):
np.random.seed(1)
inds = np.random.choice(len(X), K)
init_cents = [tuple(X[i]) for i in inds]
cents = init_cents
for k in range(max_iter):
cent_dict = E_step(X, cents)
new_cents = M_step(cent_dict)
move = sum([calc_dist(c1, c2) for c1, c2 in zip(cents, new_cents)])
if move < 0.1:
print('Converged in %s steps' % k)
break
cents = new_cents
return cent_dict

Neural Network

注意 softmax 的计算,需要考虑到 exp 的 overflow。因此通常会在 softmax 分子分母同时乘上一个常数 C,log(C) = -max(z),这就是 shift_score。

这里使用了 scipy 包里的 logsumexp,理由同 LR,logsumexp = log(sum(exp(z)))。

 from scipy.special import logsumexp
import numpy as np class Neural_Network: def __init__(self, n, h, c, std=1e-4):
W1 = np.random.randn(n, h) * std
b1 = np.zeros(h)
W2 = np.random.randn(h, c) * std
b2 = np.zeros(c)
self.params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} def forward_backward_prop(self, X, y):
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2'] # forward prop
hidden = X.dot(W1) + b1
relu = np.maximum(0, hidden)
scores = relu.dot(W2) + b2
shift_scores = scores - np.max(scores, axis=1, keepdims=True)
softmax = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1, keepdims=True)
loss = - np.sum(y * (shift_scores - logsumexp(shift_scores, axis=1, keepdims=True))) / X.shape[0] # backward prop
dscores = (softmax - y) / X.shape[0]
drelu = dscores.dot(W2.T)
dW2 = relu.T.dot(dscores)
db2 = np.sum(dscores, axis=0)
dhidden = (hidden > 0) * drelu
dW1 = X.T.dot(dhidden)
db1 = np.sum(dhidden, axis=0) grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2} return loss, grads def train(self, X, y, lr=0.01, decay=0.95, iters=5000):
loss_list, acc_list = [], []
for it in range(iters):
loss, grads = self.forward_backward_prop(X, y)
loss_list.append(loss)
self.params['W1'] -= lr * grads['dW1']
self.params['b1'] -= lr * grads['db1']
self.params['W2'] -= lr * grads['dW2']
self.params['b2'] -= lr * grads['db2'] if it % 100 == 0:
yhat = self.predict(X)
acc = np.sum(np.argmax(y, axis=1) == yhat) / X.shape[0]
acc_list.append(acc)
lr *= decay return loss_list, acc_list def predict(self, X):
hidden = X.dot(self.params['W1']) + self.params['b1']
relu = np.maximum(0, hidden)
scores = relu.dot(self.params['W2']) + self.params['b2']
yhat = np.argmax(scores, axis=1)
return yhat

Recurrent Neural Network

 import numpy as np

 def tanh(x):
return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x)) def softmax(x):
ex = np.exp(x - np.max(x))
return ex / ex.sum(axis=0) class RNN: def __init__(self, na, nx, ny, m, seed=1):
np.random.seed(seed)
Waa = np.random.randn(na, na)
Wax = np.random.randn(na, nx)
Wya = np.random.randn(ny, na)
ba = np.random.randn(na, 1)
by = np.random.randn(ny, 1)
self.a0 = np.random.randn(na, m)
self.params = {'Waa': Waa, 'Wax': Wax, 'Wya': Wya, 'ba': ba, 'by': by} def RNN_cell_forward(self, xt, a_prev):
"""
Inputs:
xt -- Current input data, of shape (nx, m).
a_prev -- Previous hidden state, of shape (na, m) Outputs:
at -- Current hidden state, of shape (na, m)
yt -- Current prediction, of shape (ny, m)
"""
Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
Wya, by = self.params['Wya'], self.params['by'] at = tanh(Waa.dot(a_prev) + Wax.dot(xt) + ba)
score = Wya.dot(at) + by
yt = softmax(score)
return at, yt def RNN_forward(self, X, y):
"""
Inputs:
X -- Input data for every time step, of shape (nx, m, Tx)
y -- Target for every time step, of shape (ny, m, Tx) Outputs:
a -- Hidden states for every time-step, of shape (n_a, m, T_x)
yhat -- Predictions for every time-step, of shape (n_y, m, T_x)
"""
a_prev = self.a0
na, m = a_prev.shape
ny = y.shape[0]
Tx = X.shape[2] a = np.zeros((na, m, Tx))
yhat = np.zeros((ny, m, Tx))
loss = 0
for t in range(Tx):
a_next, yt = self.RNN_cell_forward(X[:, :, t], a_prev)
yhat[:, :, t] = yt
a[:, :, t] = a_next
loss -= np.sum(np.log(yt.T.dot(y[:, :, t])))
a_prev = a_next cache = (a, yhat)
return loss, cache def RNN_cell_backward(self, dz, grads, cache):
"""
Inputs:
dz -- Gradient of loss with respect to score
grads -- Dictionary contains all gradients
cache -- Tuple contains xt, a_next, a_prev Outputs:
grads -- Dictionary contains all gradients
"""
xt, a_next, a_prev = cache
Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
Wya, by = self.params['Wya'], self.params['by'] grads['dWya'] += dz.dot(a_next.T)
grads['dby'] += np.sum(dz, axis=1, keepdims=True)
da_y = Wya.T.dot(dz)
da_a = grads['da_prev']
da_next = da_y + da_a # da is computed based on two paths, from da_y and da_a.
dtanh = (1 - a_next**2) * da_next
grads['dWaa'] += dtanh.dot(a_prev.T)
grads['da_prev'] = Waa.T.dot(dtanh)
grads['dWax'] += dtanh.dot(xt.T)
grads['dba'] += np.sum(dtanh, axis=1, keepdims=True) return grads def RNN_backward(self, X, y, cache):
"""
Inputs:
X -- Input data for every time step, of shape (nx, m, Tx)
y -- Target for every time step, of shape (ny, m, Tx)
cache -- Tuple from RNN_forward, contains a, yhat Outputs:
grads -- Dictionary contains all gradients
a -- Hidden states for every time-step, of shape (n_a, m, T_x)
"""
a, yhat = cache
Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
Wya, by = self.params['Wya'], self.params['by']
Tx = X.shape[2] grads = {}
grads['dWya'], grads['dby'] = np.zeros_like(Wya), np.zeros_like(by)
grads['dWaa'], grads['da_prev'] = np.zeros_like(Waa), np.zeros_like(self.a0)
grads['dWax'], grads['dba'] = np.zeros_like(Wax), np.zeros_like(ba) for t in reversed(range(Tx)):
# compute gradient of loss wrt score
dz = yhat[:, :, t] - y[:, :, t]
cell_cache = X[:, :, t], a[:, :, t], a[:, :, t-1]
grads = self.RNN_cell_backward(dz, grads, cell_cache) return grads, a def update_parameters(self, grads, lr):
self.params['Wax'] -= lr * grads['dWax']
self.params['Waa'] -= lr * grads['dWaa']
self.params['Wya'] -= lr * grads['dWya']
self.params['ba'] -= lr * grads['dba']
self.params['by'] -= lr * grads['dby'] def clip(self, grads, maxValue):
for key in ['dWax', 'dWaa', 'dWya', 'dba', 'dby']:
gradient = grads[key]
grads[key] = np.clip(gradient, -maxValue, maxValue, out=gradient)
return grads def train(self, X, y, lr, iters=1):
loss_list = []
for it in range(iters):
loss, cache = self.RNN_forward(X, y)
grads, a = self.RNN_backward(X, y, cache)
# Clip gradients between -5 (min) and 5 (max)
grads = self.clip(grads, 5)
self.update_parameters(grads, lr)
loss_list.append(loss)
return loss, grads, a

简易机器学习代码(LR,Kmeans,NN,RNN)的更多相关文章

  1. 机器学习中的K-means算法的python实现

    <机器学习实战>kMeans算法(K均值聚类算法) 机器学习中有两类的大问题,一个是分类,一个是聚类.分类是根据一些给定的已知类别标号的样本,训练某种学习机器,使它能够对未知类别的样本进行 ...

  2. 【机器学习】:Kmeans均值聚类算法原理(附带Python代码实现)

    这个算法中文名为k均值聚类算法,首先我们在二维的特殊条件下讨论其实现的过程,方便大家理解. 第一步.随机生成质心 由于这是一个无监督学习的算法,因此我们首先在一个二维的坐标轴下随机给定一堆点,并随即给 ...

  3. 机器学习之寻找KMeans的最优K

    K-Means聚类算法是最为经典的,同时也是使用最为广泛的一种基于划分的聚类算法,它属于基于距离的无监督聚类算法.KMeans算法简单实用,在机器学习算法中占有重要的地位.对于KMeans算法而言,如 ...

  4. Python机器学习笔记:K-Means算法,DBSCAN算法

    K-Means算法 K-Means 算法是无监督的聚类算法,它实现起来比较简单,聚类效果也不错,因此应用很广泛.K-Means 算法有大量的变体,本文就从最传统的K-Means算法学起,在其基础上学习 ...

  5. Data scientist———java实现常见的机器学习代码(跟百度深度学习研究院师兄学机器学习)

    2016-05-02开始决定好好记录一切有关<数据科学家>的学习过程.记录学习笔记. --------------------------------------------------- ...

  6. 深度学习原理与框架-图像补全(原理与代码) 1.tf.nn.moments(求平均值和标准差) 2.tf.control_dependencies(先执行内部操作) 3.tf.cond(判别执行前或后函数) 4.tf.nn.atrous_conv2d 5.tf.nn.conv2d_transpose(反卷积) 7.tf.train.get_checkpoint_state(判断sess是否存在

    1. tf.nn.moments(x, axes=[0, 1, 2])  # 对前三个维度求平均值和标准差,结果为最后一个维度,即对每个feature_map求平均值和标准差 参数说明:x为输入的fe ...

  7. 深度学习原理与框架-Tensorflow卷积神经网络-cifar10图片分类(代码) 1.tf.nn.lrn(局部响应归一化操作) 2.random.sample(在列表中随机选值) 3.tf.one_hot(对标签进行one_hot编码)

    1.tf.nn.lrn(pool_h1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75) # 局部响应归一化,使用相同位置的前后的filter进行响应归一化操作 参数 ...

  8. Python机器学习(1):KMeans聚类

    Python进行KMeans聚类是比较简单的,首先需要import numpy,从sklearn.cluster中import KMeans模块: import numpy as np from sk ...

  9. Andrew Ng机器学习编程作业:K-means Clustering and Principal Component Analysis

    作业文件 machine-learning-ex7 1. K-means聚类 在这节练习中,我们将实现K-means聚类,并将其应用到图片压缩上.我们首先 从二维数据开始,获得一个直观的感受K-mea ...

随机推荐

  1. NLP VS NLU

    NLP(Natural Language Processing )自然语言处理:是计算机科学,人工智能和语言学的交叉领域.目标是让计算机处理或“理解”自然语言,以执行语言翻译和问题回答等任务.NLU  ...

  2. MVC object htmlAttributes,IDictionary<string, object> htmlAttributes 写法

    MVC object htmlAttributes:new {style="color:red",width="12px",height="10px& ...

  3. CentOS 7系统初始化

    1. 升级系统 $ yum -y update 2.SELinux设置: 禁用 $ vi /etc/selinux/config 修改 SELINUX=disabled

  4. 2018-2019-2 20175227张雪莹《Java程序设计》实验三 《敏捷开发与XP实践》

    2018-2019-2 20175227张雪莹<Java程序设计> 实验三 <敏捷开发与XP实践> 实验报告封面 课程:Java程序设计 班级:1752班 姓名:张雪莹 学号: ...

  5. Springboot+ActiveMQ(ActiveMQ消息持久化,保证JMS的可靠性,消费者幂等性)

    ActiveMQ 持久化设置: 在redis中提供了两种持久化机制:RDB和AOF 两种持久化方式,避免redis宕机以后,能数据恢复,所以持久化的功能 对高可用程序来说 很重要. 同样在Active ...

  6. js 数组 map方法

    https://www.cnblogs.com/xuan52rock/p/4460949.html https://jingyan.baidu.com/article/91f5db1b7453471c ...

  7. ERROR 2002 (HY000): Can't connect to local MySQL server through socket '/var/lib/mysql/mysql.sock' (13)解答

    我在使用mysqll客户端连接我的mysql服务器的时候,出现了上述的问题.我的操作系统是ubuntu,安装版本是对应的64位服务器.我的服务器的启动方式是sudo service mysql sta ...

  8. 贝叶斯、朴素贝叶斯及调用spark官网 mllib NavieBayes示例

    贝叶斯法则   机器学习的任务:在给定训练数据A时,确定假设空间B中的最佳假设.   最佳假设:一种方法是把它定义为在给定数据A以及B中不同假设的先验概率的有关知识下的最可能假设   贝叶斯理论提供了 ...

  9. python 模拟百度搜索

    import urllib.request def Url(url): flag = input("请输入要搜索的关键字:") headers_ = { "User-Ag ...

  10. Linux守护进程管理利器——Supervisor

    Supervisor是采用 Python(2.4+) 开发的,它是一个允许用户管理 基于 Unix 系统进程的 Client/Server 系统,提供了大量功能来实现对进程的管理.安装: yum in ...