이 포스트는 Github 접속 제약이 있을 경우를 위한 것이며, 아래와 동일 내용을 실행 결과와 함께 Jupyter notebook으로도 보실 수 있습니다.
You can also see the following as Jupyter notebook along with execution result screens if you have no trouble connecting to the Github.
2-1. Simple Example (2 variables)
import tensorflow as tf
import numpy as np
tf.random.set_seed(1234) # for reproducibility
x1_data = [1, 0, 3, 0, 5]
x2_data = [0, 2, 0, 4, 0]
y_data = [1, 2, 3, 4, 5]
# tf.random.uniform(shape=[1], minval=-10.0, maxval=10.0)
W1 = tf.Variable(tf.random.uniform([1], -10.0, 10.0))
W2 = tf.Variable(tf.random.uniform([1], -10.0, 10.0))
b = tf.Variable(tf.random.uniform([1], -10.0, 10.0))
learning_rate = tf.Variable(0.001)
for i in range(1000+1):
with tf.GradientTape() as tape:
hypothesis = W1 * x1_data + W2 * x2_data + b
cost = tf.reduce_mean(tf.square(hypothesis - y_data))
W1_grad, W2_grad, b_grad = tape.gradient(cost, [W1, W2, b])
W1.assign_sub(learning_rate * W1_grad)
W2.assign_sub(learning_rate * W2_grad)
b.assign_sub(learning_rate * b_grad)
if i % 100 == 0:
print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.6f}".format(
i, cost.numpy(), W1.numpy()[0], W2.numpy()[0], b.numpy()[0]))
2-2. Simple Example (2 variables with Matrix)
x_data = [[1., 0., 3., 0., 5.],
[0., 2., 0., 4., 0.]]
y_data = [1, 2, 3, 4, 5]
print('x_data dim.:', tf.shape(x_data).numpy())
print('y_data dim.:', tf.shape(y_data).numpy())
- 가중치 설정
# W 차원 (1, 2) : W * (2, 5) = (1, 5)
W = tf.Variable(tf.random.uniform([1, 2], -1.0, 1.0))
b = tf.Variable(tf.random.uniform([1], -1.0, 1.0))
print(W.numpy())
print(b.numpy())
- 가중치 학습
learning_rate = tf.Variable(0.001)
for i in range(1000+1):
with tf.GradientTape() as tape:
hypothesis = tf.matmul(W, x_data) + b # (1, 2) * (2, 5) = (1, 5)
cost = tf.reduce_mean(tf.square(hypothesis - y_data))
W_grad, b_grad = tape.gradient(cost, [W, b])
W.assign_sub(learning_rate * W_grad)
b.assign_sub(learning_rate * b_grad)
if i % 100 == 0:
print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.6f}".format(
i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]))
2-3. Simple Example (Hypothesis without b)
- 별도의 bias 변수를 설정하지 않고, W 가중치 변수에 반영.
- tf1.x 버전에서 optimizer로 사용된 tf.train.GradientDescentOptimizer는 tf.keras.optimizers.SGD로 변경됨
# 앞의 코드에서 bias(b)를 행렬에 추가
x_data = [[1., 1., 1., 1., 1.], # bias를 '1'로 설정했으나, W가 곱해지고 열 원소가 더해지면
[1., 0., 3., 0., 5.], # 결과적으로 변수 b 원소가 더해진 것과 동일한 상황.
[0., 2., 0., 4., 0.]]
y_data = [1, 2, 3, 4, 5]
W = tf.Variable(tf.random.uniform([1, 3], -1.0, 1.0)) # [1, 3]으로 변경하고 X_data와 연산을 하면, bias가 반영됨.
learning_rate = 0.001
optimizer = tf.keras.optimizers.SGD(learning_rate) #tf.1x버전의 tf.train.GradientDescentOptimizer를 대체
for i in range(1000+1):
with tf.GradientTape() as tape:
hypothesis = tf.matmul(W, x_data) # bias 없음
cost = tf.reduce_mean(tf.square(hypothesis - y_data))
grads = tape.gradient(cost, [W])
optimizer.apply_gradients(grads_and_vars=zip(grads,[W]))
if i % 100 == 0:
print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.4f}".format(
i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], W.numpy()[0][2]))
2.4 Simple Example (Custom Gradient)
- tf1.x 버전에서 optimizer로 사용된 tf.train.GradientDescentOptimizer는 tf.keras.optimizers.SGD로 변경됨
- optimizer.apply_gradients(): update
# Multi-variable linear regression (1)
X = tf.constant([[1., 2.],
[3., 4.]])
y = tf.constant([[1.5], [3.5]])
W = tf.Variable(tf.random.normal([2, 1]))
b = tf.Variable(tf.random.normal([1]))
# Create an optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) #tf.1x버전의 tf.train.GradientDescentOptimizer를 대체
n_epoch = 1000+1
print("epoch | cost")
for i in range(n_epoch):
# Use tf.GradientTape() to record the gradient of the cost function
with tf.GradientTape() as tape:
y_pred = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(y_pred - y))
# calculates the gradients of the loss
grads = tape.gradient(cost, [W, b])
# updates parameters (W and b)
optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
if i % 100 == 0:
print("{:5} | {:10.6f}".format(i, cost.numpy()))
2-5. More Examples (Predicting exam score)
- hypothesis = w1 x1 + w2 x2 + w3 * x3 + b
# data and label
x1 = [ 73., 93., 89., 96., 73.]
x2 = [ 80., 88., 91., 98., 66.]
x3 = [ 75., 93., 90., 100., 70.]
Y = [152., 185., 180., 196., 142.]
# weights
w1 = tf.Variable(10.)
w2 = tf.Variable(10.)
w3 = tf.Variable(10.)
b = tf.Variable(10.)
learning_rate = 0.000001
for i in range(1000+1):
# tf.GradientTape() to record the gradient of the cost function
with tf.GradientTape() as tape:
hypothesis = w1 * x1 + w2 * x2 + w3 * x3 + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# calculates the gradients of the cost
w1_grad, w2_grad, w3_grad, b_grad = tape.gradient(cost, [w1, w2, w3, b])
# update w1,w2,w3 and b
w1.assign_sub(learning_rate * w1_grad)
w2.assign_sub(learning_rate * w2_grad)
w3.assign_sub(learning_rate * w3_grad)
b.assign_sub(learning_rate * b_grad)
if i % 100 == 0:
print("{:5} | {:12.4f}".format(i, cost.numpy()))
- Matrix 및 hypothesis 함수 사용
data = np.array([
# X1, X2, X3, y
[ 73., 80., 75., 152. ],
[ 93., 88., 93., 185. ],
[ 89., 91., 90., 180. ],
[ 96., 98., 100., 196. ],
[ 73., 66., 70., 142. ]
], dtype=np.float32)
# slice data
X = data[:, :-1]
y = data[:, [-1]]
W = tf.Variable(tf.random.normal([3, 1]))
b = tf.Variable(tf.random.normal([1]))
learning_rate = 0.000001
# hypothesis, prediction function
def predict(X):
return tf.matmul(X, W) + b
print("epoch | cost")
n_epochs = 2000
for i in range(n_epochs+1):
# tf.GradientTape() to record the gradient of the cost function
with tf.GradientTape() as tape:
cost = tf.reduce_mean((tf.square(predict(X) - y)))
# calculates the gradients of the loss
W_grad, b_grad = tape.gradient(cost, [W, b])
# updates parameters (W and b)
W.assign_sub(learning_rate * W_grad)
b.assign_sub(learning_rate * b_grad)
if i % 100 == 0:
print("{:5} | {:10.4f}".format(i, cost.numpy()))
predict
Y # labels, 실제값
predict(X).numpy() # prediction, 예측값
# 새로운 데이터에 대한 예측
predict([[ 89., 95., 92.],[ 84., 92., 85.]]).numpy()