Numpy.loadtxt()
๋จ, csv ํ์ผ์ ๋ชจ๋ ๊ฐ์ด ๊ฐ์ ์๋ฃํ์ด์ด์ผ ํ๋ค.
# Lab 4 Multi-variable linear regression
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
-
Slicing์ ๋ชจ๋ฅด๊ฒ ์ด์!
[3:5] => 3์ด์ 5๋ฏธ๋ง (3, 4) [:] => ์ ์ฒด ๋ฆฌ์คํธ ๊ฐ ์ถ๋ ฅ (0, 1, 2, 3, 4, 5) [:-1] => ๋์์ ํ๋ ๋บด๊ณ ๋ค ๊ฐ์ ธ์ (0, 1, 2, 3, 4) [2:4] = [8,9] => 2์ด์, 4๋ฏธ๋ง ์๋ฆฌ์ 8,9๋ฅผ ๋ฃ์ด์ค (1, 8, 9, 4, 5)
๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ ์ฝ๋
# Lab 4 Multi-variable linear regression
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# Make sure the shape and data are OK
print(x_data, "\nx_data shape:", x_data.shape)
print(y_data, "\ny_data shape:", y_data.shape)
[[ 73. 80. 75.]
[ 93. 88. 93.]
...
[ 76. 83. 71.]
[ 96. 93. 95.]]
x_data shape: (25, 3)
[[152.]
[185.]
...
[149.]
[192.]]
y_data shape: (25, 1)
Placeholder์ ๊ฐ ๋ฃ์ด์ printํด๋ณด๊ธฐ
# Lab 4 Multi-variable linear regression
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# Make sure the shape and data are OK
print(x_data, "\nx_data shape:", x_data.shape)
print(y_data, "\ny_data shape:", y_data.shape)
# data output
'''
[[ 73. 80. 75.]
[ 93. 88. 93.]
...
[ 76. 83. 71.]
[ 96. 93. 95.]]
x_data shape: (25, 3)
[[152.]
[185.]
...
[149.]
[192.]]
y_data shape: (25, 1)
'''
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(2001):
cost_val, hy_val, _ = sess.run([cost, hypothesis, train],
feed_dict={X: x_data, Y: y_data})
if step % 10 == 0:
print(step, "Cost:", cost_val, "\nPrediction:\n", hy_val)
# train output
'''
0 Cost: 21027.0
Prediction:
[[22.048063 ]
[21.619772 ]
...
[31.36112 ]
[24.986364 ]]
10 Cost: 95.976326
Prediction:
[[157.11063 ]
[183.99283 ]
...
[167.48862 ]
[193.25117 ]]
1990 Cost: 24.863274
Prediction:
[[154.4393 ]
[185.5584 ]
...
[158.27443 ]
[192.79778 ]]
2000 Cost: 24.722485
Prediction:
[[154.42894 ]
[185.5586 ]
...
[158.24257 ]
[192.79166 ]]
'''
# Ask my score
print("Your score will be ", sess.run(hypothesis,
feed_dict={X: [[100, 70, 101]]}))
print("Other scores will be ", sess.run(hypothesis,
feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))
Your score will be [[ 181.73277283]]
Other scores will be [[ 145.86265564]
[ 187.23129272]]
Queue Runners
๋ชจ๋ธ์ Train ์์ผ์ผ ํ๋ ๊ธฐ๋ณธ ๋ฐ์ดํฐ๊ฐ ๋ง์ ๋, ์ด ๋ชจ๋ ๊ฐ๋ค์ ๋ฉ๋ชจ๋ฆฌ์ ์ฌ๋ ค๋๊ณ ์์ ํ๋ ๊ฒ์ ๋๋ฌด ๋ถํ๊ฐ ํฌ๋ค.
๊ทธ๋ ๊ธฐ ๋๋ฌธ์ Tensorflow๋ Queue Runner๋ผ๋ ๊ฒ์ ์ ๊ณตํ๋ค. ๊ธฐ๋ณธ์ ์ธ ์๋ฆฌ๋ ์ด๋ ๋ค.
- ๋ถ๋ฌ์์ผ ํ ํ์ผ๋ค์ ๋ค ๊ฐ์ง๊ณ ์๋๋ค.
- ๊ฐ๊ฐ์ ํ์ผ๋ง๋ค ์ด๋ป๊ฒ Readํ ์ง ์ ํ๋ค. (Reader1, Reader2โฆ)
- ์ฝ์ด์จ ํ์ผ์ ์ด๋ป๊ฒ Decodingํ ์ง ์ ํ๋ค. (Decoder)
- ๋ช ๊ฐ์ฉ ํ์ตํ ์ง Batch ๋จ์๋ก ์คํํ๋ค.
data-01-test-score.csv
73,80,75,152
93,88,93,185
89,91,90,180
96,98,100,196
73,66,70,142
53,46,55,101
69,74,77,149
47,56,60,115
87,79,90,175
79,70,88,164
69,70,73,141
70,65,74,141
93,95,91,184
79,80,73,152
70,73,78,148
93,89,96,192
78,75,68,147
81,90,93,183
88,92,86,177
78,83,77,159
82,86,90,177
86,82,89,175
78,83,85,175
76,83,71,149
96,93,95,192
# Lab 4 Multi-variable linear regression
# https://www.tensorflow.org/programmers_guide/reading_data
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
# Queue Runner๋ฅผ ์ฌ์ฉํ์.
# filename์ list๋ก ์ฌ๋ฌ๊ฐ ์ค ์ ์๋ค.
filename_queue = tf.train.string_input_producer(
['data-01-test-score.csv'], shuffle=False, name='filename_queue')
# filename_queue๋ฅผ ์ด๋ป๊ฒ readํ ์ง ์ ํ๋ค.
# ์ง๊ธ ๊ฐ์ ๊ฒฝ์ฐ๋ text๋ก ์ฝ์ด์ค๋ผ๋ ๋ช
๋ น์ ํ๋ค.
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# ์นผ๋ผ์ ๊ฐ์ด ์์ ๊ฒฝ์ฐ ์ด๋ค ์ด๊ธฐ๊ฐ์ ์ธ ๊ฒ์ธ์ง ์ ํ๋ค.
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
# ๋ํ ์๊น text๋ก readํ ๋
์(value)์ด ์ด๋ป๊ฒ decodingํ ๋ฐฉ๋ฒ์ ํํ๋ค.
# ์ง๊ธ์ csv ํ์ผ์ text๋ก ๋ถ๋ฌ์จ ๋ค csv ๋ฐฉ์์ผ๋ก ํด์ํ๋ผ๋ ๋ช
๋ น์ ์ค๊ฒ์ด๋ค.
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)
# batch ๋ผ๋ ๋จ์๋ก ์ฐ๋ฆฌ๋ ํ์ต์ ํ ํ
๋ฐ,
# ๋ถ๋ฌ์จ ๋ฐ์ดํฐ๋ฅผ ์ด ๋จ์๋ก ๋ฐ๊ฟ์ค ํ์๊ฐ ์๋ค.
# decodeํ ๋ฐ์ดํฐ๋ฅผ X_data, Y_data๋ก ๋๋ ์ batch ํจ์์ ๋ฃ๋๋ก ํ์.
# ๊ทธ๋ฆฌ๊ณ ๊ทธ ๊ฒฐ๊ณผ๋ฅผ train์ ๋ถ์ธ ๊ฒ์ ๋ฐ์.
# ๋ช
์ฌํ ๊ฒ์, ์ด ์์
์ ํ๋ ๋
ธ๋๋ฅผ train_x_batch, train_y_batch๋ผ ๋ถ๋ฅด๋ ๊ฒ์ด๋ค.
# ๋์ค์ ์คํํด์ค์ผ ์ด ์์
์ ์ค์ ๋ก ์ํํ๋ค.
# ํ๋ฒ ํํ์ง ํ ๋ ๋ช๊ฐ์ฉ ๊ฐ์ ธ์ฌ๊น? ๋ผ๋ ์๊ธฐ๋ก ๋ฐ์๋ค์ด๋ฉด ์ดํด๊ฐ ์ฝ๋ค.
# collect batches of csv in
train_x_batch, train_y_batch = \
tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
# Queue Runner๋ฅผ ์ฌ์ฉํ๊ธฐ ์ ์ ์จ์ฃผ๋ ์ฝ๋
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(2001):
# ์ค์ batch๋ฅผ ๋ง๋ค๊ธฐ ์ํด์ ๋
ธ๋๋ฅผ ์คํ์์ผ์ค๋ค.
x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
# ๋๋ด๊ณ ์ ์ด์ฃผ๋ ์ฝ๋
coord.request_stop()
coord.join(threads)
# Ask my score
print("Your score will be ",
sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]}))
print("Other scores will be ",
sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))
...
[172.44307]
[173.06042]
[164.73372]
[158.24258]
[192.79166]]
Your score will be [[181.73277]]
Other scores will be [[145.86266]
[187.2313 ]]