Train/Test Dataset
์ ํ๋๋ฅผ ์ธก์ ํ๋๋ฐ ์์ด์ ์์์๋, ๋ชจ๋ธ์ ํ๋ จํ๋ ๋ฐ ์ฌ์ฉํ๋ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ง๊ณ ์ ํ๊ฐํ๋ค.
๊ทธ๋ฐ๋ฐ ์ฌ์ค์ ๊ทธ๋ ๊ฒ ์ํํ๋ฉด ์๋๋ ๊ฒ์ด ๋น์ฐํ๋ค.
๋ชจ๋ธ์ ์ ์ฅ์์๋ ๋ด๊ฐ ์ด๋ฏธ ๋จน์ด๋ณธ ์์์ ๋ํด ๊ฒฐ๊ณผ๋ฅผ ๋ด๋์ผ๋ผ ํ๋ ๋น์ฐํ ์ข์ ๊ฒฐ๊ณผ๊ฐ ๋์ค๊ธฐ ๋๋ฌธ์ด๋ค. ์ฐ๋ฆฌ๋ ๋ด๊ฐ ํ๋ จํ ๋ชจ๋ธ์ด ์๋ก์ด ์์์ ๋ฃ์์ ๋๋
์ข๊ฒ ์์ธกํ๋์ง๊ฐ ๊ถ๊ธํ๋ค. ๊ทธ๋ ๊ธฐ ๋๋ฌธ์ ์ฐ๋ฆฌ๋ ์ ์ฒด ๋ฐ์ดํฐ ์ ์ ๋๋ ์ ๊ด๋ฆฌํด์ผ ํ๋ค.
Train/Test ๋ก.
Learning rate
Gradient descent๋ฅผ ๋ณด๊ฒ๋๋ฉด, alpha ๋ผ๋ ์์ ํญ์ด ๊ณฑํด์ ธ ์๋๋ฐ, ์ด๊ฒ์ ์ต์ ์ ์ผ๋ก ๊ฐ๋๋ฐ ์์ด ์ผ๋ง๋ ๋๋ฐ๊ธฐ๋ฅผ ํฌ๊ฒ ํ ๊ฒ์ด๋๋ฅผ ๊ฒฐ์ ํ๋ค.
์ผ์ชฝ์, ํ์ต๋ฅ ์ด ํฐ ๊ฒฝ์ฐ, ๊ตญ์์ ์ผ๋ก๋ถํฐ ๋ฉ๋ฆฌ ๋จ์ด์ ธ ๊ฐ์ด ๋ฐ์ฐํ๋ ๊ฒฝ์ฐ์ด๊ณ ,
์ค๋ฅธ์ชฝ์ ๋๋ฌด ํ์ต๋ฅ ์ด ์์ local minima์ ๋น ์ง๋ ๊ฒฝ์ฐ๋ฅผ ๋ณด์ฌ์ค ๊ฒ์ด๋ค.
ํฐ Learning rate
# Lab 7 Learning rate and Evaluation
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
x_data = [[1, 2, 1],
[1, 3, 2],
[1, 3, 4],
[1, 5, 5],
[1, 7, 5],
[1, 2, 5],
[1, 6, 6],
[1, 7, 7]]
y_data = [[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0]]
# Evaluation our model using this test dataset
x_test = [[2, 1, 1],
[3, 1, 2],
[3, 3, 4]]
y_test = [[0, 0, 1],
[0, 0, 1],
[0, 0, 1]]
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cross entropy cost/loss
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Try to change learning_rate to small numbers
# ํฐ ํ์ต๋ฅ
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.5).minimize(cost)
# Correct prediction Test model
prediction = tf.argmax(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variables
sess.run(tf.global_variables_initializer())
for step in range(201):
cost_val, W_val, _ = sess.run([cost, W, optimizer], feed_dict={X: x_data, Y: y_data})
print(step, cost_val, W_val)
# predict
print("Prediction:", sess.run(prediction, feed_dict={X: x_test}))
# Calculate the accuracy
print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))
'''
when lr = 1.5
0 5.73203 [[-0.30548954 1.22985029 -0.66033536]
[-4.39069986 2.29670858 2.99386835]
[-3.34510708 2.09743214 -0.80419564]]
1 23.1494 [[ 0.06951046 0.29449689 -0.0999819 ]
[-1.95319986 -1.63627958 4.48935604]
[-0.90760708 -1.65020132 0.50593793]]
2 27.2798 [[ 0.44451016 0.85699677 -1.03748143]
[ 0.48429942 0.98872018 -0.57314301]
[ 1.52989244 1.16229868 -4.74406147]]
3 8.668 [[ 0.12396193 0.61504567 -0.47498202]
[ 0.22003263 -0.2470119 0.9268558 ]
[ 0.96035379 0.41933775 -3.43156195]]
4 5.77111 [[-0.9524312 1.13037777 0.08607888]
[-3.78651619 2.26245379 2.42393875]
[-3.07170963 3.14037919 -2.12054014]]
5 inf [[ nan nan nan]
[ nan nan nan]
[ nan nan nan]]
6 nan [[ nan nan nan]
[ nan nan nan]
[ nan nan nan]]
...
Prediction: [0 0 0]
Accuracy: 0.0
'''
์ต์์ ์ ๋ค์ด๊ฐ์ง ๋ชปํ๊ณ , ๋ฐ์ฐํด ๋ฒ๋ฆฐ๋ค.
์์ Learning rate
# Lab 7 Learning rate and Evaluation
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
x_data = [[1, 2, 1],
[1, 3, 2],
[1, 3, 4],
[1, 5, 5],
[1, 7, 5],
[1, 2, 5],
[1, 6, 6],
[1, 7, 7]]
y_data = [[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0]]
# Evaluation our model using this test dataset
x_test = [[2, 1, 1],
[3, 1, 2],
[3, 3, 4]]
y_test = [[0, 0, 1],
[0, 0, 1],
[0, 0, 1]]
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cross entropy cost/loss
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Try to change learning_rate to small numbers
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.5).minimize(cost)
# Correct prediction Test model
prediction = tf.argmax(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variables
sess.run(tf.global_variables_initializer())
for step in range(201):
cost_val, W_val, _ = sess.run([cost, W, optimizer], feed_dict={X: x_data, Y: y_data})
print(step, cost_val, W_val)
# predict
print("Prediction:", sess.run(prediction, feed_dict={X: x_test}))
# Calculate the accuracy
print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))
'''
When lr = 1e-10
0 5.73203 [[ 0.80269563 0.67861295 -1.21728313]
[-0.3051686 -0.3032113 1.50825703]
[ 0.75722361 -0.7008909 -2.10820389]]
1 5.73203 [[ 0.80269563 0.67861295 -1.21728313]
[-0.3051686 -0.3032113 1.50825703]
[ 0.75722361 -0.7008909 -2.10820389]]
...
199 5.73203 [[ 0.80269563 0.67861295 -1.21728313]
[-0.3051686 -0.3032113 1.50825703]
[ 0.75722361 -0.7008909 -2.10820389]]
200 5.73203 [[ 0.80269563 0.67861295 -1.21728313]
[-0.3051686 -0.3032113 1.50825703]
[ 0.75722361 -0.7008909 -2.10820389]]
Prediction: [0 0 0]
Accuracy: 0.0
'''
๊ด์ฐฐํด๋ณด๋ฉด, cost๊ฐ ๊ฐ์ํ๊ณ ์์ง ์์์ ๋ณผ ์ ์๋ค.
๊ตญ์ ์ต์์ ์ ๊ฐํ๊ฑฐ๋, ์ด๋ํ์ง ๋ชปํ๊ณ ์์์ ๋ณด์ฌ์ค๋ค.
Input data์ ํฐ๊ฐ์ด ๋ค์ด์์ ๋
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
[823.02002, 828.070007, 1828100, 821.655029, 828.070007],
[819.929993, 824.400024, 1438100, 818.97998, 824.159973],
[816, 820.958984, 1008100, 815.48999, 819.23999],
[819.359985, 823, 1188100, 818.469971, 818.97998],
[819, 823, 1198100, 816, 820.450012],
[811.700012, 815.25, 1098100, 809.780029, 813.669983],
[809.51001, 816.659973, 1398100, 804.539978, 809.559998]])
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(101):
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
'''
0 Cost: 2.45533e+12
Prediction:
[[-1104436.375]
[-2224342.75 ]
[-1749606.75 ]
[-1226179.375]
[-1445287.125]
[-1457459.5 ]
[-1335740.5 ]
[-1700924.625]]
1 Cost: 2.69762e+27
Prediction:
[[ 3.66371490e+13]
[ 7.37543360e+13]
[ 5.80198785e+13]
[ 4.06716290e+13]
[ 4.79336847e+13]
[ 4.83371348e+13]
[ 4.43026590e+13]
[ 5.64060907e+13]]
2 Cost: inf
Prediction:
[[ -1.21438790e+21]
[ -2.44468702e+21]
[ -1.92314724e+21]
[ -1.34811610e+21]
[ -1.58882674e+21]
[ -1.60219962e+21]
[ -1.46847142e+21]
[ -1.86965602e+21]]
3 Cost: inf
Prediction:
[[ 4.02525216e+28]
[ 8.10324465e+28]
[ 6.37453079e+28]
[ 4.46851237e+28]
[ 5.26638074e+28]
[ 5.31070676e+28]
[ 4.86744608e+28]
[ 6.19722623e+28]]
4 Cost: inf
Prediction:
[[ -1.33422428e+36]
[ -2.68593010e+36]
[ -2.11292430e+36]
[ -1.48114879e+36]
[ -1.74561303e+36]
[ -1.76030542e+36]
[ -1.61338091e+36]
[ -2.05415459e+36]]
5 Cost: inf
Prediction:
[[ inf]
[ inf]
[ inf]
[ inf]
[ inf]
[ inf]
[ inf]
[ inf]]
6 Cost: nan
Prediction:
[[ nan]
[ nan]
[ nan]
[ nan]
[ nan]
[ nan]
[ nan]
[ nan]]
'''
??? ์์ ์ฝ๋๋ฅผ ๋ณด๋ฉด, ๋๋ฌด๋๋ ์ฌํํ ์ฝ๋์ธ๋ฐ cost๊ฐ ๋ฐ์ฐํ๋ ๋ฌธ์ ๊ฐ ์๊ฒจ๋ฒ๋ ธ๋ค. ์ ์ผ๊น?
์ฌ์ค regression์ ํ๋ ฌ์ ๋ค๋ฃจ๊ฒ ๋์ด์๋ค. ๊ทธ๋ฐ๋ฐ ์ปดํจํฐ๋ก ๊ณ์ฐํ ๋์ ๊ฐ ํ๋ ฌ์ ์์๊ฐ ๋น์ทํ ํฌ๊ธฐ๋ฅผ ๊ฐ์ง ์์ ๊ฒฝ์ฐ ์ฐ์ฐ์ ์ํํ ๋, ์ค๋ฅ๊ฐ ์๊ธด๋ค.
์ฐ๋ฆฌ๋ ํ๋ ฌ์ ๊ณ์ฐํ๋๋ฐ ์์ด Computing Method๋ฅผ ์ฌ์ฉํ๋๋ฐ, ํ์ค์ ๊ฐ์ ๊ทผ์ฌํด์ ๋งคํํ๋ ์ปดํจํฐ์ ํ๊ณ ๋๋ฌธ์, ์ฐ๋ฆฌ๋ Round Off Error ๋ฅผ ํ์ฐ์ ์ผ๋ก ๊ฐ์ง ์ ๋ฐ์ ์๋ค.
์ด๊ฒ์ ๊ดํ ๋ด์ฉ์, 08. Pivoting ๋ํ, ์์ ๊ฐ์ input data๋ฅผ ๋ฃ์์ ๊ฒฝ์ฐ, ๊ฐ weight์ ๊ฐ์ ๋ถํฌ๊ฐ ๋ถ๊ท ์ผ ํ๋ค.
์ด๋ ฌ ๊ฒฝ์ฐ, ํ์ต๋ฅ alpha์ ์ํด ์ํฅ์ ๋ฐ์ผ๋ฉฐ ์ต์ ์ ์ ์ฐพ์๊ฐ๋๋ฐ, w1, w2๊ฐ ๋ค๋ฅด๊ฒ ์ํฅ์ ๋ฐ๊ฒ๋์ด ๋ฐ์ฐํ ๊ฐ๋ฅ์ฑ์ด ๊ต์ฅํ ๋์์ง๋ค.
๋ฐ๋ผ์ ์ฐ๋ฆฌ๋ ์ ๊ทํ๋ฅผ ํ์ฌ w1, w2๊ฐ ๊ฐ์ ์ํฅ๋ ฅ์ ๋ฐ๋๋ก ํด์ผํ๋ค.
Non-normalized inputs (min-max scale)
๋ฐ๋ผ์ ์ฐ๋ฆฌ๋ ์ด ๊ฐ๋ค์ scalingํด์ ์ง์ด๋ฃ์ด์ค๋ค.
์ด ๋, ๊ฐ ์ด ๋ณ๋ก ์ต์๊ฐ๊ณผ ์ต๋๊ฐ์ ๊ธฐ์ค์ผ๋ก ์ ๊ทํํด์ฃผ๋ min-max scale์ ์ฌ์ฉํ๋ค.
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
def min_max_scaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
xy = np.array(
[
[828.659973, 833.450012, 908100, 828.349976, 831.659973],
[823.02002, 828.070007, 1828100, 821.655029, 828.070007],
[819.929993, 824.400024, 1438100, 818.97998, 824.159973],
[816, 820.958984, 1008100, 815.48999, 819.23999],
[819.359985, 823, 1188100, 818.469971, 818.97998],
[819, 823, 1198100, 816, 820.450012],
[811.700012, 815.25, 1098100, 809.780029, 813.669983],
[809.51001, 816.659973, 1398100, 804.539978, 809.559998],
]
)
# very important. It does not work without it.
# ๋๋ xy = MinMaxScaler(xy)
xy = min_max_scaler(xy)
print(xy)
'''
[[0.99999999 0.99999999 0. 1. 1. ]
[0.70548491 0.70439552 1. 0.71881782 0.83755791]
[0.54412549 0.50274824 0.57608696 0.606468 0.6606331 ]
[0.33890353 0.31368023 0.10869565 0.45989134 0.43800918]
[0.51436 0.42582389 0.30434783 0.58504805 0.42624401]
[0.49556179 0.42582389 0.31521739 0.48131134 0.49276137]
[0.11436064 0. 0.20652174 0.22007776 0.18597238]
[0. 0.07747099 0.5326087 0. 0. ]]
'''
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
train = tf.train.GradientDescentOptimizer(learning_rate=1e-5).minimize(cost)
# Launch the graph in a session.
with tf.Session() as sess:
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(101):
_, cost_val, hy_val = sess.run(
[train, cost, hypothesis], feed_dict={X: x_data, Y: y_data}
)
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
'''
0 Cost: 0.15230925
Prediction:
[[ 1.6346191 ]
[ 0.06613699]
[ 0.3500818 ]
[ 0.6707252 ]
[ 0.61130744]
[ 0.61464405]
[ 0.23171967]
[-0.1372836 ]]
1 Cost: 0.15230872
Prediction:
[[ 1.634618 ]
[ 0.06613836]
[ 0.35008252]
[ 0.670725 ]
[ 0.6113076 ]
[ 0.6146443 ]
[ 0.23172 ]
[-0.13728246]]
...
99 Cost: 0.1522546
Prediction:
[[ 1.6345041 ]
[ 0.06627947]
[ 0.35014683]
[ 0.670706 ]
[ 0.6113161 ]
[ 0.61466044]
[ 0.23175153]
[-0.13716647]]
100 Cost: 0.15225402
Prediction:
[[ 1.6345029 ]
[ 0.06628093]
[ 0.35014752]
[ 0.67070574]
[ 0.61131614]
[ 0.6146606 ]
[ 0.23175186]
[-0.13716528]]
'''