๋น์ฉ ์ต์ํ ์ฝ๋
์ ๋ฒ๊ธ์์ X, Y์ ์ต์ ํ๋ W, b๋ฅผ ์ฐพ์๋ค๋ฉด, ์ด๋ฒ์๋ ๊ณ ์ ๋ X, Y ์ ๋ํด ๋ค๋ฅธ W๊ฐ์ ๋ฃ์ด์ฃผ๋ฉด์ Cost์ ๋ณํ๋ฅผ ์ดํด๋ณด์.
# Lab 3 Minimizing Cost
import tensorflow as tf
import matplotlib.pyplot as plt
# ๊ณ ์ ๋ ๋ฐ์ดํฐ ์
X = [1, 2, 3]
Y = [1, 2, 3]
# W๊ฐ์ ๋ณํ์ํค๋ฉฐ ๊ด์ฐฐํ ๊ฒ์ด๋ฏ๋ก placeholder๋ก ์ ์ธํด์ค๋ค.
W = tf.placeholder(tf.float32)
# Our hypothesis for linear model X * W
# ๊ฐ๋จํ 1์ฐจํจ์๋ก ์ ์ธํด๋ณด์.
hypothesis = X * W
# cost/loss function
# ๋น์ฉํจ์๋ MSE๋ก ํ๋ค.
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Variables for plotting cost function
# W๊ฐ์ ๋ฐ๋ฅธ Cost์ ๋ณํ๋ฅผ ์์๋ณผ ๊ฒ์ด๋ฏ๋ก ๊ทธ๋ํ๋ฅผ ์คํํ๋ฉด์ ๊ฐ์ ์ ์ฅํ ๋น list๋ฅผ ๋ง๋ค์ด์ค๋ค.
W_history = []
cost_history = []
# Launch the graph in a session.
with tf.Session() as sess:
for i in range(-30, 50):
curr_W = i * 0.1
curr_cost = sess.run(cost, feed_dict={W: curr_W})
W_history.append(curr_W)
cost_history.append(curr_cost)
# Show the cost function
plt.plot(W_history, cost_history)
plt.show()
W = 1 ์ผ๋, cost๊ฐ ์ต์๊ฐ ๋๋ค.
Gradient Descent
W๋ฅผ ๋ค์๊ณผ ๊ฐ์ ๋ฐฉ์์ผ๋ก ์ ๋ฐ์ดํธํ๋ ๊ฒ์ด W์ ์ต์๋ฅผ ์ฐพ๋๋ก ํ๋ค.
์๊น minimize ํ ๋ ์ฌ์ฉํ๋ ์ฝ๋๋ฅผ ์ฐ๋ฆฌ๊ฐ ๋ค์์ ์ฝ๋๋ก ๋ฐ๊ฟ ์ ์๋ค.
# Minimize: Gradient Descent using derivative: W -= learning_rate * derivative
learning_rate = 0.1
gradient = tf.reduce_mean((W * X - Y) * X)
descent = W - learning_rate * gradient
update = W.assign(descent)
๊ทธ๋ฆฌ๊ณ ๋์ ์ธ์ ์ ์คํํด์ค์ผ ํ๋ฏ๋ก,
# Launch the graph in a session.
with tf.Session() as sess:
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(21):
_, cost_val, W_val = sess.run(
[update, cost, W], feed_dict={X: x_data, Y: y_data}
)
print(step, cost_val, W_val)
"""
0 6.8174477 [1.6446238]
1 1.9391857 [1.3437994]
2 0.5515905 [1.1833596]
3 0.15689684 [1.0977918]
4 0.044628453 [1.0521556]
5 0.012694317 [1.0278163]
6 0.003610816 [1.0148354]
7 0.0010270766 [1.0079122]
8 0.00029214387 [1.0042198]
9 8.309683e-05 [1.0022506]
10 2.363606e-05 [1.0012003]
11 6.723852e-06 [1.0006402]
12 1.912386e-06 [1.0003414]
13 5.439676e-07 [1.000182]
14 1.5459062e-07 [1.000097]
15 4.3941593e-08 [1.0000517]
16 1.2491266e-08 [1.0000275]
17 3.5321979e-09 [1.0000147]
18 9.998237e-10 [1.0000079]
19 2.8887825e-10 [1.0000042]
20 8.02487e-11 [1.0000023]
์ด๊ธฐ Weight๋ฅผ ๋ค๋ฅด๊ฒ ์ค๋ณด์.
# Lab 3 Minimizing Cost
import tensorflow as tf
# tf Graph Input
X = [1, 2, 3]
Y = [1, 2, 3]
# Set wrong model weights
W = tf.Variable(5.0)
# Linear model
hypothesis = X * W
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize: Gradient Descent Optimizer
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
# Launch the graph in a session.
with tf.Session() as sess:
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(101):
_, W_val = sess.run([train, W])
print(step, W_val)
0 5.0
1 1.2666664
2 1.0177778
3 1.0011852
4 1.000079
...
97 1.0
98 1.0
99 1.0
100 1.0
Tensorflow์๊ฒ Gradient ๊ตฌํ๊ฒ ํด๋ณด๊ธฐ
์์ ์์ ์์ ์ฐ๋ฆฌ๋
MSE ํจ์์ Gradient ๋ฅผ ๊ตฌํด, gradient๋ผ๋ ๋ณ์์ ๋ฃ์ด์ฃผ์๋ค.
gradient = tf.reduce_mean((W * X - Y) * X)
๊ทธ๋ฐ๋ฐ, MSE๋ง ์ ์ํด์ฃผ๊ณ , tensorflowํํ ์ด๊ฑธ ์ํฌ ์ ์๋ค.
๋ํ ๋ฏธ๋ถ๊น์ง๋ง ์ ์ํ๊ณ , ๊ทธ ๊ฐ์์ ๋ด๊ฐ ์ถ๊ฐ์ ์ธ ํ๋ ์ญ์ ํ ์ ์๋ค.
# Lab 3 Minimizing Cost
# This is optional
import tensorflow as tf
# tf Graph Input
X = [1, 2, 3]
Y = [1, 2, 3]
# Set wrong model weights
W = tf.Variable(5.)
# Linear model
hypothesis = X * W
# Manual gradient
# ์์์๋ 2๊ฐ ์์ง๋ง,
gradient = tf.reduce_mean((W * X - Y) * X) * 2
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Gradient Descent Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
# optimize ๊น์ง ์ ์ธํ๊ณ .minizize(cost)๋ฅผ ์ฐ๊ฒ ๋๋ฉด, ๊ธฐ๋ณธ์ ์ธ
# Gradient descent๊ฐ ์งํ๋๋ค.
# ๊ทธ๋ฐ๋ฐ ์ดํจ์๋ฅผ ์ฐ์ง๋ง๊ณ , ์ฌ๊ธฐ์ .compute_gradients(cost)๋ฅผ ์ฐ๊ฒ๋๋ฉด,
# ์ด cost ํจ์์ ๋ํ gradient๋ฅผ ๊ณ์ฐํด์ค๋ค.
# ์ฆ ๋ฏธ๋ถํ ๊ฐ์ ๋๋ ค์ค๋ค.
# Get gradients
gvs = optimizer.compute_gradients(cost)
# ์ฌ๊ธฐ์ ๋ชจ๋ธ์ ์์ ํ๊ธฐ ์ํด ๋ค๋ฅธ ์์
์ด ํ์ํ๋ฉด ํ๋ฉด๋๋ค.
# Optional: modify gradient if necessary
# gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
# Apply gradients
# ์ด ๋ณํํ gvs ๊ฐ์ ๋ค์ optimizer์ ๋๋ ค์ค ์ ์๋ค.
apply_gradients = optimizer.apply_gradients(gvs)
# Launch the graph in a session.
with tf.Session() as sess:
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(101):
gradient_val, gvs_val, _ = sess.run([gradient, gvs, apply_gradients])
print(step, gradient_val, gvs_val)
'''
0 37.333332 [(37.333336, 5.0)]
1 33.84889 [(33.84889, 4.6266665)]
2 30.689657 [(30.689657, 4.2881775)]
3 27.825289 [(27.825289, 3.981281)]
...
97 0.0027837753 [(0.0027837753, 1.0002983)]
98 0.0025234222 [(0.0025234222, 1.0002704)]
99 0.0022875469 [(0.0022875469, 1.0002451)]
100 0.0020739238 [(0.0020739238, 1.0002222)]
'''