Softmax classification

# Lab 6 Softmax Classifier
import tensorflow as tf
tf.set_random_seed(777)  # for reproducibility
 
x_data = [[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]]
# One-hot Encoding ์ด๋ผ ํ•œ๋‹ค.
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]
 
X = tf.placeholder("float", [None, 4])
Y = tf.placeholder("float", [None, 3])
nb_classes = 3
 
W = tf.Variable(tf.random_normal([4, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')
 
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
 
# Cross entropy cost/loss
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
 
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
# Launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(2001):
            _, cost_val = sess.run([optimizer, cost], feed_dict={X: x_data, Y: y_data})
 
            if step % 200 == 0:
                print(step, cost_val)
 
    print('--------------')
    # Testing & One-hot encoding
    a = sess.run(hypothesis, feed_dict={X: [[1, 11, 7, 9]]})
		# ์ตœ๋Œ“๊ฐ’์˜ argument๋ฅผ ๋ฆฌํ„ดํ•ด์คŒ
    print(a, sess.run(tf.argmax(a, 1)))
 
    print('--------------')
    b = sess.run(hypothesis, feed_dict={X: [[1, 3, 4, 3]]})
    print(b, sess.run(tf.argmax(b, 1)))
 
    print('--------------')
    c = sess.run(hypothesis, feed_dict={X: [[1, 1, 0, 1]]})
    print(c, sess.run(tf.argmax(c, 1)))
 
    print('--------------')
    all = sess.run(hypothesis, feed_dict={X: [[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]]})
    print(all, sess.run(tf.argmax(all, 1)))
 
'''
0 6.926112
200 0.6005015
400 0.47295815
600 0.37342924
800 0.28018373
1000 0.23280522
1200 0.21065344
1400 0.19229904
1600 0.17682323
1800 0.16359556
2000 0.15216158
-------------
[[1.3890490e-03 9.9860185e-01 9.0613084e-06]] [1]
-------------
[[0.9311919  0.06290216 0.00590591]] [0]
-------------
[[1.2732815e-08 3.3411323e-04 9.9966586e-01]] [2]
-------------
[[1.3890490e-03 9.9860185e-01 9.0613084e-06]
 [9.3119192e-01 6.2902197e-02 5.9059085e-03]
 [1.2732815e-08 3.3411323e-04 9.9966586e-01]] [1 0 2]
'''

Fancy Ver. Softmax Classification

์œ„์˜ ์ฝ”๋“œ์—์„œ cost๋ฅผ ์ •์˜ํ•˜๋Š”๋ฐ ์žˆ์–ด ์‹์ด ๋ณต์žกํ•˜๋‹ˆ,

๊ธฐ๋ณธ์ ์œผ๋กœ ์ œ๊ณตํ•ด์ฃผ๋Š” ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•ด ๋ณด์ž!

# From
logits = tf.matul(X, W) + b
hypothesis = tf.nn.softmax(logits)
cost = tf.reduce_mean(-tf.reduce_sum(Y*tf.log(hypothesis), axis=1))
# To
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels = Y_one_hot)
cost = tf.reduce_mean(cost_i)

label์€ one-hot ์ฝ”๋”ฉ์œผ๋กœ ๋˜์–ด ์žˆ๋Š” Y ๋ฅผ ๋„ฃ์–ด์•ผ ํ•œ๋‹ค๋Š” ์˜๋ฏธ์—์„œ Y_one_hot์œผ๋กœ ์ •ํ–ˆ๋‹ค.

์ด๋ ‡๊ฒŒ ํ•ด์ฃผ๋ฉด, cost_i ๋Š” ๊ฐ ํ–‰์— ๋Œ€ํ•œ cost๊ฐ’๋“ค์„ ๋‹ค ๊ฐ–๊ณ  ์žˆ๋‹ค.

๋งˆ์ง€๋ง‰์œผ๋กœ ์ด ํ–‰์— ๊ด€ํ•œ ๊ฐ’๋“ค์„ ํ‰๊ท ๋‚ด์ฃผ๋ฉด ๊ทธ๊ฒŒ cost์ด๋‹ค.

๋™๋ฌผ ํŠน์ง•์— ๋”ฐ๋ฅธ ์ข… ๋ถ„๋ฅ˜ํ•˜๊ธฐ

๋ฐ์ดํ„ฐ๊ฐ€ ์ฃผ์–ด์กŒ์„ ๋•Œ, Y๊ฐ€ ํด๋ž˜์Šค์ž„์—๋„ One-hot ์ฝ”๋”ฉ์ด ์•ˆ๋˜์–ด ์žˆ๋‹ค๋ฉด,

์ด๊ฒƒ๋ถ€ํ„ฐ ๋งŒ์ ธ์ฃผ๋Š” ๊ฒƒ์ด ๋งž๋‹ค.

Y = tf.placeholder(tf.int32, [None, 1]) # shape = [?, 1]
# ๋‚ด Y ๋ฐ์ดํ„ฐ, class ๊ฐฏ์ˆ˜๋ฅผ ์ค„ํ…Œ๋‹ˆ ๋งŒ๋“ค์–ด์ค˜ 
# ์—ฌ๊ธฐ์„œ ์กฐ์‹ฌํ•ด์•ผ ํ•˜๋Š”๊ฒƒ, ๋‚ด๊ฐ€ one_hot์„ ์‹คํ–‰ํ•˜๊ณ ๋‚œ ๋’ค ๊ฒฐ๊ณผ ๋žญํฌ๋Š” ํ•œ์ฐจ์› ํฐ ๊ฐ’์„ ๋Œ๋ ค์ค€๋‹ค.
# ์ฆ‰, ์—ฌ๊ธฐ์„œ ๋‚ด๊ฐ€ Y๋Š” rank = 2์ด๋‹ค.
# one_hot์„ ๋Œ๋ฆฌ๊ณ  ๋‚œ ๋’ค rank๋Š” 3์„ ๋ฆฌํ„ดํ•œ๋‹ค๋Š” ์–˜๊ธฐ
Y_one_hot = tf.one_hot(Y, nb_classes) # shape = [?, 1, 7] 
 
# Example
# Y = [[0], [2]] -> shape = (?(2), 1)
# Y_one_hot = [[[1 0 0], [0 0 1]]] -> shape = (?(2), 1, 3)
# ๊ทธ๋Ÿฐ๋ฐ, ์šฐ๋ฆฌ๋Š” shape -> (?, 7) ์ด๋Ÿฐ ์‹์œผ๋กœ ๋‚˜์˜ค๊ธฐ๋ฅผ ๋ฐ”๋ž€๋‹ค.
# ๊ทธ๋Ÿด๋•Œ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด reshape ํ•จ์ˆ˜์ด๋‹ค.
# ์ž…๋ ฅ ๋„ฃ๊ณ , ๋‚ด๊ฐ€ ์›ํ•˜๋Š” shape์˜ ํ˜•ํƒœ๋ฅผ ์จ์ฃผ๋ฉด ๋œ๋‹ค.
# ์ด ๋•Œ, -1์€ '์•Œ์•„์„œ' ๋ผ๋Š” ๋œป์ด๋‹ค.
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])
 
# Y_one_hot = [[1 0 0], [0 0 1]] -> shape = (?(2), 7)

์‹คํ–‰ ์ฝ”๋“œ

# Lab 6 Softmax Classifier
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
 
# Predicting animal type based on various features
xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
 
print(x_data.shape, y_data.shape)
 
'''
(101, 16) (101, 1)
'''
 
nb_classes = 7  # 0 ~ 6
 
X = tf.placeholder(tf.float32, [None, 16])
Y = tf.placeholder(tf.int32, [None, 1])  # 0 ~ 6
 
Y_one_hot = tf.one_hot(Y, nb_classes)  # one hot
print("one_hot:", Y_one_hot)
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])
print("reshape one_hot:", Y_one_hot)
 
'''
one_hot: Tensor("one_hot:0", shape=(?, 1, 7), dtype=float32)
reshape one_hot: Tensor("Reshape:0", shape=(?, 7), dtype=float32)
'''
 
W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')
 
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)
 
# Cross entropy cost/loss
# tf.stop_gradient() ํ•จ์ˆ˜๋Š” gradient ๋…ธ๋“œ๊ฐ€ ์ˆ˜ํ–‰๋  ๋•Œ, ์ด ๊ฐ’์€ ์—ฐ์‚ฐ์— ์˜ํ–ฅ์„ ์ฃผ์ง€ ์•Š์Œ์„
# ์•Œ๋ ค์ฃผ๋Š” ํ•จ์ˆ˜์ด๋‹ค.
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                 labels=tf.stop_gradient([Y_one_hot])))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
 
# ์—ด๋ฒกํ„ฐ๋กœ ์ฃผ์–ด์ง„ hypothesis์˜ max๋ฅผ ๊ฐ–๋Š” index๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
prediction = tf.argmax(hypothesis, 1)
# one-hot coding์œผ๋กœ ๋œ Y ๊ฐ’์ค‘ argmax๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋ฉด, ํ•ด๋‹น ์ •๋‹ต์˜ index๋ฅผ ๋ฐ˜ํ™˜ํ•  ๊ฒƒ์ด๋‹ค.
# ๊ทธ๋ฆฌ๊ณ  ๊ทธ ๊ฐ’๊ณผ ๋‚ด๊ฐ€ ์˜ˆ์ƒํ•œ index๊ฐ€ ๊ฐ™์œผ๋ฉด 1์„ ๊ฐ–๊ณ  ์žˆ๋Š”๋‹ค.
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
# cast ํ•จ์ˆ˜๋Š”, ์›๋ž˜ ๊ฐ™์œผ๋ฉด correct_prediction์€ ๋…ธ๋“œ์ด์ง€๋งŒ, 
# ์ด ํ•จ์ˆ˜๋ฅผ ์“ฐ๋ฉด ํ…์„œ๋ฅผ ๋ฐ˜ํ™˜ํ•ด์ค€๋‹ค.
# ๋’ค์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๋Š” ๋˜์งˆ๋•Œ, ์ด ํ…์„œ์˜ ์ž๋ฃŒํ˜•์„ ์จ์ค€๋‹ค.
# ์ฐธ๊ณ ๋กœ ๋ชจ๋“  ๊ฐ’์€ ํ…์„œ๋กœ ์›€์ง์ธ๋‹ค. ์ฆ‰ ๋ฒกํ„ฐ๋‚˜ ํ–‰๋ ฌ์ด๋‚˜ ํ…์„œ์ด๋‹ค.
# ๋”ฐ๋ผ์„œ castํ•œ ๋’ค์˜ ๋ฐ˜ํ™˜๊ฐ’๋„ ํ–‰๋ ฌ์ด๋‹ค. 
# reduce_mean ํ•จ์ˆ˜๋Š” ์ด ํ–‰๋ ฌ์˜ ํ‰๊ท ์„ ๊ตฌํ•ด์ฃผ๋Š”๋ฐ, ํŒŒ๋ผ๋ฏธํ„ฐ์— ๋”ฐ๋ผ ํ–‰, ์—ด, ์ „์ฒด๋กœ ๊ตฌํ•  ์ˆ˜ ์žˆ๋‹ค.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 
# Launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
 
    for step in range(2001):
        _, cost_val, acc_val = sess.run([optimizer, cost, accuracy], feed_dict={X: x_data, Y: y_data})
                                        
        if step % 100 == 0:
            print("Step: {:5}\tCost: {:.3f}\tAcc: {:.2%}".format(step, cost_val, acc_val))
 
    # Let's see if we can predict
    pred = sess.run(prediction, feed_dict={X: x_data})
    # y_data: (N,1) = flatten => (N, ) matches pred.shape
		# [[1], [2]] -> [1, 2] ์ด๋Ÿฐ์‹์œผ๋กœ!
    for p, y in zip(pred, y_data.flatten()):
        print("[{}] Prediction: {} True Y: {}".format(p == int(y), p, int(y)))
 
'''
Step:     0 Loss: 5.106 Acc: 37.62%
Step:   100 Loss: 0.800 Acc: 79.21%
Step:   200 Loss: 0.486 Acc: 88.12%
...
Step:  1800	Loss: 0.060	Acc: 100.00%
Step:  1900	Loss: 0.057	Acc: 100.00%
Step:  2000	Loss: 0.054	Acc: 100.00%
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 3 True Y: 3
...
[True] Prediction: 0 True Y: 0
[True] Prediction: 6 True Y: 6
[True] Prediction: 1 True Y: 1
'''