谢邀。numpy实现两层全连接神经网络,Justin Johnson写过一个tutorial Learning PyTorch with Examples, 只不过没有residual block。residual 部分也不难加,因为backward也是identity,很好实现,周末有空可能写一个。

这个是Justin的版本:

# -*- coding: utf-8 -*-

import numpy as np

# N is batch size; D_in is input dimension;

# H is hidden dimension; D_out is output dimension.

N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data

x = np.random.randn(N, D_in)

y = np.random.randn(N, D_out)

# Randomly initialize weights

w1 = np.random.randn(D_in, H)

w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

for t in range(500):

# Forward pass: compute predicted y

h = x.dot(w1)

h_relu = np.maximum(h, 0)

y_pred = h_relu.dot(w2)

# Compute and print loss

loss = np.square(y_pred - y).sum()

print(t, loss)

# Backprop to compute gradients of w1 and w2 with respect to loss

grad_y_pred = 2.0 * (y_pred - y)

grad_w2 = h_relu.T.dot(grad_y_pred)

grad_h_relu = grad_y_pred.dot(w2.T)

grad_h = grad_h_relu.copy()

grad_h[h < 0] = 0

grad_w1 = x.T.dot(grad_h)

# Update weights

w1 -= learning_rate * grad_w1

w2 -= learning_rate * grad_w2

N = 64;

D_in = 1000;

H = 100;

D_out = 10;

% init input and output

x = rand(N, D_in)-0.5;

y = rand(N, D_out)-0.5;

% init weights

w1 = rand(D_in, H)-0.5;

w2 = rand(H, D_out)-0.5;

lr = 1e-4;

total_loss = {};

for i = 1:500

% forward

h = x * w1;

h_relu = max(h, 0);

y_pred = h_relu * w2;

loss = norm(y_pred-y, 2);

disp(loss);

total_loss{i} = loss;

% backward

grad_y_pred = 2 * (y_pred-y);

grad_w2 = h_relu' * grad_y_pred;

grad_h_relu = grad_y_pred * w2';

grad_h = grad_h_relu .* (h>=0);

grad_w1 = x' * grad_h;

% optimizer

w1 = w1 - lr * grad_w1;

w2 = w2 - lr * grad_w2;

end

total_loss = cell2mat(total_loss)

plot(total_loss)

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐