1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
def mnist_mlp(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False):
W1 = ad.Variable(name="W1")
W2 = ad.Variable(name="W2")
W3 = ad.Variable(name="W3")
b1 = ad.Variable(name="b1")
b2 = ad.Variable(name="b2")
b3 = ad.Variable(name="b3")
X = ad.Variable(name="X")
y_ = ad.Variable(name="y_")
# relu(X W1+b1)
z1 = ad.matmul_op(X, W1)
z2 = z1 + ad.broadcastto_op(b1, z1)
z3 = ad.relu_op(z2)
# relu(z3 W2+b2)
z4 = ad.matmul_op(z3, W2)
z5 = z4 + ad.broadcastto_op(b2, z4)
z6 = ad.relu_op(z5)
# softmax(z5 W2+b2)
z7 = ad.matmul_op(z6, W3)
y = z7 + ad.broadcastto_op(b3, z7)
loss = ad.softmaxcrossentropy_op(y, y_)
grad_W1, grad_W2, grad_W3, grad_b1, grad_b2, grad_b3 = ad.gradients(
loss, [W1, W2, W3, b1, b2, b3])
executor = ad.Executor(
[loss, grad_W1, grad_W2, grad_W3, grad_b1, grad_b2, grad_b3, y],
ctx=executor_ctx)
datasets = load_mnist_data("mnist.pkl.gz")
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
batch_size = 1000
n_train_batches = train_set_x.shape[0] // batch_size
n_valid_batches = valid_set_x.shape[0] // batch_size
print("Start training loop...")
W2_val = rand.normal(scale=0.1, size=(256, 100))
W3_val = rand.normal(scale=0.1, size=(100, 10))
b1_val = rand.normal(scale=0.1, size=(256))
b2_val = rand.normal(scale=0.1, size=(100))
b3_val = rand.normal(scale=0.1, size=(10))
X_val = np.empty(shape=(batch_size, 784), dtype=np.float32)
y_val = np.empty(shape=(batch_size, 10), dtype=np.float32)
valid_X_val = np.empty(shape=(batch_size, 784), dtype=np.float32)
valid_y_val = np.empty(shape=(batch_size, 10), dtype=np.float32)
if ndarray.is_gpu_ctx(executor_ctx):
W1_val = ndarray.array(W1_val, ctx=executor_ctx)
W2_val = ndarray.array(W2_val, ctx=executor_ctx)
W3_val = ndarray.array(W3_val, ctx=executor_ctx)
b1_val = ndarray.array(b1_val, ctx=executor_ctx)
b2_val = ndarray.array(b2_val, ctx=executor_ctx)
b3_val = ndarray.array(b3_val, ctx=executor_ctx)
X_val = ndarray.array(X_val, ctx=executor_ctx)
y_val = ndarray.array(y_val, ctx=executor_ctx)
lr = 1.0e-3
for i in range(num_epochs):
print("epoch %d" % i)
for minibatch_index in range(n_train_batches):
minibatch_start = minibatch_index * batch_size
minibatch_end = (minibatch_index + 1) * batch_size
X_val[:] = train_set_x[minibatch_start:minibatch_end]
y_val[:] = convert_to_one_hot(
train_set_y[minibatch_start:minibatch_end])
loss_val, grad_W1_val, grad_W2_val, grad_W3_val, \
grad_b1_val, grad_b2_val, grad_b3_val, _ = executor.run(
feed_dict={
X: X_val,
y_: y_val,
W1: W1_val,
W2: W2_val,
W3: W3_val,
b1: b1_val,
b2: b2_val,
b3: b3_val})
# SGD update
if (executor_ctx is None):
W1_val = W1_val - lr * grad_W1_val
W2_val = W2_val - lr * grad_W2_val
W3_val = W3_val - lr * grad_W3_val
b1_val = b1_val - lr * grad_b1_val
b2_val = b2_val - lr * grad_b2_val
b3_val = b3_val - lr * grad_b3_val
else:
sgd_update_gpu(W1_val, grad_W1_val, lr)
sgd_update_gpu(W2_val, grad_W2_val, lr)
sgd_update_gpu(W3_val, grad_W3_val, lr)
sgd_update_gpu(b1_val, grad_b1_val, lr)
sgd_update_gpu(b2_val, grad_b2_val, lr)
sgd_update_gpu(b3_val, grad_b3_val, lr)
if print_loss_val_each_epoch:
if isinstance(loss_val, ndarray.NDArray):
print(loss_val.asnumpy())
else:
print(loss_val)
correct_predictions = []
for minibatch_index in range(n_valid_batches):
minibatch_start = minibatch_index * batch_size
minibatch_end = (minibatch_index + 1) * batch_size
valid_X_val[:] = valid_set_x[minibatch_start:minibatch_end]
valid_y_val[:] = convert_to_one_hot(
valid_set_y[minibatch_start:minibatch_end])
_, _, _, _, _, _, _, valid_y_predicted = executor.run(
feed_dict={
X: valid_X_val,
y_: valid_y_val,
W1: W1_val,
W2: W2_val,
W3: W3_val,
b1: b1_val,
b2: b2_val,
b3: b3_val},
convert_to_numpy_ret_vals=True)
correct_prediction = np.equal(
np.argmax(valid_y_val, 1),
np.argmax(valid_y_predicted, 1)).astype(np.float)
correct_predictions.extend(correct_prediction)
accuracy = np.mean(correct_predictions)
print("validation set accuracy=%f" % accuracy)
|