Train/Test Dataset

์ •ํ™•๋„๋ฅผ ์ธก์ •ํ•˜๋Š”๋ฐ ์žˆ์–ด์„œ ์•ž์—์„œ๋Š”, ๋ชจ๋ธ์„ ํ›ˆ๋ จํ•˜๋Š” ๋ฐ ์‚ฌ์šฉํ–ˆ๋˜ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ง€๊ณ ์„œ ํ‰๊ฐ€ํ–ˆ๋‹ค.

๊ทธ๋Ÿฐ๋ฐ ์‚ฌ์‹ค์€ ๊ทธ๋ ‡๊ฒŒ ์ˆ˜ํ–‰ํ•˜๋ฉด ์•ˆ๋˜๋Š” ๊ฒƒ์ด ๋‹น์—ฐํ•˜๋‹ค.

๋ชจ๋ธ์— ์ž…์žฅ์—์„œ๋Š” ๋‚ด๊ฐ€ ์ด๋ฏธ ๋จน์–ด๋ณธ ์Œ์‹์— ๋Œ€ํ•ด ๊ฒฐ๊ณผ๋ฅผ ๋‚ด๋†“์œผ๋ผ ํ•˜๋‹ˆ ๋‹น์—ฐํžˆ ์ข‹์€ ๊ฒฐ๊ณผ๊ฐ€ ๋‚˜์˜ค๊ธฐ ๋•Œ๋ฌธ์ด๋‹ค. ์šฐ๋ฆฌ๋Š” ๋‚ด๊ฐ€ ํ›ˆ๋ จํ•œ ๋ชจ๋ธ์ด ์ƒˆ๋กœ์šด ์Œ์‹์„ ๋„ฃ์—ˆ์„ ๋•Œ๋„

์ข‹๊ฒŒ ์˜ˆ์ธกํ•˜๋Š”์ง€๊ฐ€ ๊ถ๊ธˆํ•˜๋‹ค. ๊ทธ๋ ‡๊ธฐ ๋•Œ๋ฌธ์— ์šฐ๋ฆฌ๋Š” ์ „์ฒด ๋ฐ์ดํ„ฐ ์…‹์„ ๋‚˜๋ˆ ์„œ ๊ด€๋ฆฌํ•ด์•ผ ํ•œ๋‹ค.

Train/Test ๋กœ.

Learning rate

Gradient descent๋ฅผ ๋ณด๊ฒŒ๋˜๋ฉด, alpha ๋ผ๋Š” ์ƒ์ˆ˜ ํ•ญ์ด ๊ณฑํ•ด์ ธ ์žˆ๋Š”๋ฐ, ์ด๊ฒƒ์€ ์ตœ์ ์ ์œผ๋กœ ๊ฐ€๋Š”๋ฐ ์žˆ์–ด ์–ผ๋งˆ๋‚˜ ๋„๋›ฐ๊ธฐ๋ฅผ ํฌ๊ฒŒ ํ•  ๊ฒƒ์ด๋ƒ๋ฅผ ๊ฒฐ์ •ํ•œ๋‹ค.

์™ผ์ชฝ์€, ํ•™์Šต๋ฅ ์ด ํฐ ๊ฒฝ์šฐ, ๊ตญ์†Œ์ ์œผ๋กœ๋ถ€ํ„ฐ ๋ฉ€๋ฆฌ ๋–จ์–ด์ ธ ๊ฐ’์ด ๋ฐœ์‚ฐํ•˜๋Š” ๊ฒฝ์šฐ์ด๊ณ ,

์˜ค๋ฅธ์ชฝ์€ ๋„ˆ๋ฌด ํ•™์Šต๋ฅ ์ด ์ž‘์•„ local minima์— ๋น ์ง€๋Š” ๊ฒฝ์šฐ๋ฅผ ๋ณด์—ฌ์ค€ ๊ฒƒ์ด๋‹ค.

ํฐ Learning rate

# Lab 7 Learning rate and Evaluation
import tensorflow as tf
tf.set_random_seed(777)  # for reproducibility
 
x_data = [[1, 2, 1],
          [1, 3, 2],
          [1, 3, 4],
          [1, 5, 5],
          [1, 7, 5],
          [1, 2, 5],
          [1, 6, 6],
          [1, 7, 7]]
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]
 
# Evaluation our model using this test dataset
x_test = [[2, 1, 1],
          [3, 1, 2],
          [3, 3, 4]]
y_test = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1]]
 
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
 
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))
 
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
 
# Cross entropy cost/loss
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Try to change learning_rate to small numbers
# ํฐ ํ•™์Šต๋ฅ 
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.5).minimize(cost)
 
# Correct prediction Test model
prediction = tf.argmax(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
 
# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
 
    for step in range(201):
        cost_val, W_val, _ = sess.run([cost, W, optimizer], feed_dict={X: x_data, Y: y_data})
        print(step, cost_val, W_val)
 
    # predict
    print("Prediction:", sess.run(prediction, feed_dict={X: x_test}))
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))
 
'''
when lr = 1.5
 
0 5.73203 [[-0.30548954  1.22985029 -0.66033536]
 [-4.39069986  2.29670858  2.99386835]
 [-3.34510708  2.09743214 -0.80419564]]
1 23.1494 [[ 0.06951046  0.29449689 -0.0999819 ]
 [-1.95319986 -1.63627958  4.48935604]
 [-0.90760708 -1.65020132  0.50593793]]
2 27.2798 [[ 0.44451016  0.85699677 -1.03748143]
 [ 0.48429942  0.98872018 -0.57314301]
 [ 1.52989244  1.16229868 -4.74406147]]
3 8.668 [[ 0.12396193  0.61504567 -0.47498202]
 [ 0.22003263 -0.2470119   0.9268558 ]
 [ 0.96035379  0.41933775 -3.43156195]]
4 5.77111 [[-0.9524312   1.13037777  0.08607888]
 [-3.78651619  2.26245379  2.42393875]
 [-3.07170963  3.14037919 -2.12054014]]
5 inf [[ nan  nan  nan]
 [ nan  nan  nan]
 [ nan  nan  nan]]
6 nan [[ nan  nan  nan]
 [ nan  nan  nan]
 [ nan  nan  nan]]
 ...
Prediction: [0 0 0]
Accuracy:  0.0
'''

์ตœ์†Œ์ ์— ๋“ค์–ด๊ฐ€์ง€ ๋ชปํ•˜๊ณ , ๋ฐœ์‚ฐํ•ด ๋ฒ„๋ฆฐ๋‹ค.

์ž‘์€ Learning rate

# Lab 7 Learning rate and Evaluation
import tensorflow as tf
tf.set_random_seed(777)  # for reproducibility
 
x_data = [[1, 2, 1],
          [1, 3, 2],
          [1, 3, 4],
          [1, 5, 5],
          [1, 7, 5],
          [1, 2, 5],
          [1, 6, 6],
          [1, 7, 7]]
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]
 
# Evaluation our model using this test dataset
x_test = [[2, 1, 1],
          [3, 1, 2],
          [3, 3, 4]]
y_test = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1]]
 
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
 
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))
 
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
 
# Cross entropy cost/loss
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Try to change learning_rate to small numbers
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.5).minimize(cost)
 
# Correct prediction Test model
prediction = tf.argmax(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
 
# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
 
    for step in range(201):
        cost_val, W_val, _ = sess.run([cost, W, optimizer], feed_dict={X: x_data, Y: y_data})
        print(step, cost_val, W_val)
 
    # predict
    print("Prediction:", sess.run(prediction, feed_dict={X: x_test}))
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))
 
'''
When lr = 1e-10
 
0 5.73203 [[ 0.80269563  0.67861295 -1.21728313]
 [-0.3051686  -0.3032113   1.50825703]
 [ 0.75722361 -0.7008909  -2.10820389]]
1 5.73203 [[ 0.80269563  0.67861295 -1.21728313]
 [-0.3051686  -0.3032113   1.50825703]
 [ 0.75722361 -0.7008909  -2.10820389]]
...
199 5.73203 [[ 0.80269563  0.67861295 -1.21728313]
 [-0.3051686  -0.3032113   1.50825703]
 [ 0.75722361 -0.7008909  -2.10820389]]
200 5.73203 [[ 0.80269563  0.67861295 -1.21728313]
 [-0.3051686  -0.3032113   1.50825703]
 [ 0.75722361 -0.7008909  -2.10820389]]
Prediction: [0 0 0]
Accuracy:  0.0
'''

๊ด€์ฐฐํ•ด๋ณด๋ฉด, cost๊ฐ€ ๊ฐ์†Œํ•˜๊ณ  ์žˆ์ง€ ์•Š์Œ์„ ๋ณผ ์ˆ˜ ์žˆ๋‹ค.

๊ตญ์†Œ ์ตœ์†Œ์ ์— ๊ฐ‡ํ˜”๊ฑฐ๋‚˜, ์ด๋™ํ•˜์ง€ ๋ชปํ•˜๊ณ  ์žˆ์Œ์„ ๋ณด์—ฌ์ค€๋‹ค.

Input data์— ํฐ๊ฐ’์ด ๋“ค์–ด์™”์„ ๋•Œ

import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
               [823.02002, 828.070007, 1828100, 821.655029, 828.070007],
               [819.929993, 824.400024, 1438100, 818.97998, 824.159973],
               [816, 820.958984, 1008100, 815.48999, 819.23999],
               [819.359985, 823, 1188100, 818.469971, 818.97998],
               [819, 823, 1198100, 816, 820.450012],
               [811.700012, 815.25, 1098100, 809.780029, 813.669983],
               [809.51001, 816.659973, 1398100, 804.539978, 809.559998]])
 
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
 
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
 
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
 
# Hypothesis
hypothesis = tf.matmul(X, W) + b
 
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
 
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
 
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
 
for step in range(101):
    cost_val, hy_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
    print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
 
'''
0 Cost:  2.45533e+12
Prediction:
 [[-1104436.375]
 [-2224342.75 ]
 [-1749606.75 ]
 [-1226179.375]
 [-1445287.125]
 [-1457459.5  ]
 [-1335740.5  ]
 [-1700924.625]]
1 Cost:  2.69762e+27
Prediction:
 [[  3.66371490e+13]
 [  7.37543360e+13]
 [  5.80198785e+13]
 [  4.06716290e+13]
 [  4.79336847e+13]
 [  4.83371348e+13]
 [  4.43026590e+13]
 [  5.64060907e+13]]
2 Cost:  inf
Prediction:
 [[ -1.21438790e+21]
 [ -2.44468702e+21]
 [ -1.92314724e+21]
 [ -1.34811610e+21]
 [ -1.58882674e+21]
 [ -1.60219962e+21]
 [ -1.46847142e+21]
 [ -1.86965602e+21]]
3 Cost:  inf
Prediction:
 [[  4.02525216e+28]
 [  8.10324465e+28]
 [  6.37453079e+28]
 [  4.46851237e+28]
 [  5.26638074e+28]
 [  5.31070676e+28]
 [  4.86744608e+28]
 [  6.19722623e+28]]
4 Cost:  inf
Prediction:
 [[ -1.33422428e+36]
 [ -2.68593010e+36]
 [ -2.11292430e+36]
 [ -1.48114879e+36]
 [ -1.74561303e+36]
 [ -1.76030542e+36]
 [ -1.61338091e+36]
 [ -2.05415459e+36]]
5 Cost:  inf
Prediction:
 [[ inf]
 [ inf]
 [ inf]
 [ inf]
 [ inf]
 [ inf]
 [ inf]
 [ inf]]
6 Cost:  nan
Prediction:
 [[ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]]
'''

??? ์œ„์˜ ์ฝ”๋“œ๋ฅผ ๋ณด๋ฉด, ๋„ˆ๋ฌด๋‚˜๋„ ์‹ฌํ”Œํ•œ ์ฝ”๋“œ์ธ๋ฐ cost๊ฐ€ ๋ฐœ์‚ฐํ•˜๋Š” ๋ฌธ์ œ๊ฐ€ ์ƒ๊ฒจ๋ฒ„๋ ธ๋‹ค. ์™œ ์ผ๊นŒ?

์‚ฌ์‹ค regression์€ ํ–‰๋ ฌ์„ ๋‹ค๋ฃจ๊ฒŒ ๋˜์–ด์žˆ๋‹ค. ๊ทธ๋Ÿฐ๋ฐ ์ปดํ“จํ„ฐ๋กœ ๊ณ„์‚ฐํ•  ๋•Œ์— ๊ฐ ํ–‰๋ ฌ์˜ ์š”์†Œ๊ฐ€ ๋น„์Šทํ•œ ํฌ๊ธฐ๋ฅผ ๊ฐ–์ง€ ์•Š์„ ๊ฒฝ์šฐ ์—ฐ์‚ฐ์„ ์ˆ˜ํ–‰ํ•  ๋•Œ, ์˜ค๋ฅ˜๊ฐ€ ์ƒ๊ธด๋‹ค.

์šฐ๋ฆฌ๋Š” ํ–‰๋ ฌ์„ ๊ณ„์‚ฐํ•˜๋Š”๋ฐ ์žˆ์–ด Computing Method๋ฅผ ์‚ฌ์šฉํ•˜๋Š”๋ฐ, ํ˜„์‹ค์˜ ๊ฐ’์„ ๊ทผ์‚ฌํ•ด์„œ ๋งคํ•‘ํ•˜๋Š” ์ปดํ“จํ„ฐ์˜ ํ•œ๊ณ„ ๋•Œ๋ฌธ์—, ์šฐ๋ฆฌ๋Š” Round Off Error ๋ฅผ ํ•„์—ฐ์ ์œผ๋กœ ๊ฐ€์งˆ ์ˆ˜ ๋ฐ–์— ์—†๋‹ค.

์ด๊ฒƒ์— ๊ด€ํ•œ ๋‚ด์šฉ์€, 08. Pivoting ๋˜ํ•œ, ์œ„์™€ ๊ฐ™์€ input data๋ฅผ ๋„ฃ์—ˆ์„ ๊ฒฝ์šฐ, ๊ฐ weight์˜ ๊ฐ’์˜ ๋ถ„ํฌ๊ฐ€ ๋ถˆ๊ท ์ผ ํ•˜๋‹ค.

์ด๋ ฌ ๊ฒฝ์šฐ, ํ•™์Šต๋ฅ  alpha์— ์˜ํ•ด ์˜ํ–ฅ์„ ๋ฐ›์œผ๋ฉฐ ์ตœ์ €์ ์„ ์ฐพ์•„๊ฐ€๋Š”๋ฐ, w1, w2๊ฐ€ ๋‹ค๋ฅด๊ฒŒ ์˜ํ–ฅ์„ ๋ฐ›๊ฒŒ๋˜์–ด ๋ฐœ์‚ฐํ•  ๊ฐ€๋Šฅ์„ฑ์ด ๊ต‰์žฅํžˆ ๋†’์•„์ง„๋‹ค.

๋”ฐ๋ผ์„œ ์šฐ๋ฆฌ๋Š” ์ •๊ทœํ™”๋ฅผ ํ•˜์—ฌ w1, w2๊ฐ€ ๊ฐ™์€ ์˜ํ–ฅ๋ ฅ์„ ๋ฐ›๋„๋ก ํ•ด์•ผํ•œ๋‹ค.

Non-normalized inputs (min-max scale)

๋”ฐ๋ผ์„œ ์šฐ๋ฆฌ๋Š” ์ด ๊ฐ’๋“ค์„ scalingํ•ด์„œ ์ง‘์–ด๋„ฃ์–ด์ค€๋‹ค.

์ด ๋•Œ, ๊ฐ ์—ด ๋ณ„๋กœ ์ตœ์†Ÿ๊ฐ’๊ณผ ์ตœ๋Œ“๊ฐ’์„ ๊ธฐ์ค€์œผ๋กœ ์ •๊ทœํ™”ํ•ด์ฃผ๋Š” min-max scale์„ ์‚ฌ์šฉํ•œ๋‹ค.

import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
def min_max_scaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)
 
xy = np.array(
    [
        [828.659973, 833.450012, 908100, 828.349976, 831.659973],
        [823.02002, 828.070007, 1828100, 821.655029, 828.070007],
        [819.929993, 824.400024, 1438100, 818.97998, 824.159973],
        [816, 820.958984, 1008100, 815.48999, 819.23999],
        [819.359985, 823, 1188100, 818.469971, 818.97998],
        [819, 823, 1198100, 816, 820.450012],
        [811.700012, 815.25, 1098100, 809.780029, 813.669983],
        [809.51001, 816.659973, 1398100, 804.539978, 809.559998],
    ]
)
 
# very important. It does not work without it.
# ๋˜๋Š” xy = MinMaxScaler(xy)
xy = min_max_scaler(xy)
print(xy)
 
'''
[[0.99999999 0.99999999 0.         1.         1.        ]
 [0.70548491 0.70439552 1.         0.71881782 0.83755791]
 [0.54412549 0.50274824 0.57608696 0.606468   0.6606331 ]
 [0.33890353 0.31368023 0.10869565 0.45989134 0.43800918]
 [0.51436    0.42582389 0.30434783 0.58504805 0.42624401]
 [0.49556179 0.42582389 0.31521739 0.48131134 0.49276137]
 [0.11436064 0.         0.20652174 0.22007776 0.18597238]
 [0.         0.07747099 0.5326087  0.         0.        ]]
'''
 
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
 
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
 
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
 
# Hypothesis
hypothesis = tf.matmul(X, W) + b
 
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
 
# Minimize
train = tf.train.GradientDescentOptimizer(learning_rate=1e-5).minimize(cost)
 
# Launch the graph in a session.
with tf.Session() as sess:
    # Initializes global variables in the graph.
    sess.run(tf.global_variables_initializer())
 
    for step in range(101):
        _, cost_val, hy_val = sess.run(
            [train, cost, hypothesis], feed_dict={X: x_data, Y: y_data}
        )
        print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
 
'''
0 Cost: 0.15230925 
Prediction:
 [[ 1.6346191 ]
 [ 0.06613699]
 [ 0.3500818 ]
 [ 0.6707252 ]
 [ 0.61130744]
 [ 0.61464405]
 [ 0.23171967]
 [-0.1372836 ]]
1 Cost: 0.15230872 
Prediction:
 [[ 1.634618  ]
 [ 0.06613836]
 [ 0.35008252]
 [ 0.670725  ]
 [ 0.6113076 ]
 [ 0.6146443 ]
 [ 0.23172   ]
 [-0.13728246]]
...
99 Cost: 0.1522546 
Prediction:
 [[ 1.6345041 ]
 [ 0.06627947]
 [ 0.35014683]
 [ 0.670706  ]
 [ 0.6113161 ]
 [ 0.61466044]
 [ 0.23175153]
 [-0.13716647]]
100 Cost: 0.15225402 
Prediction:
 [[ 1.6345029 ]
 [ 0.06628093]
 [ 0.35014752]
 [ 0.67070574]
 [ 0.61131614]
 [ 0.6146606 ]
 [ 0.23175186]
 [-0.13716528]]
'''