tensorflow_cnn/recognizer.py at master · deatherving/tensorflow_cnn · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
# author: K
# desc: 1. basic cnn nets
#	2. vgg19

import tensorflow as tf

pooling_times = 0

dropout_prob = tf.placeholder(tf.float32)


def weights_initializer(shape):
	values = tf.truncated_normal(shape, stddev = 0.1)
	return tf.Variable(values)

def bias_initializer(shape):
	values = tf.constant(0.1, shape = shape)
	return tf.Variable(values)

def conv_layer(x, W):
	# convolutional layer
	return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding = "SAME")

def pooling_layer(x, mode = "MAX"):
	global pooling_times
	# pooling layer
	if mode == "MAX":
		pooling_times += 1
		return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

def dropout(h, drop_prob):
	return tf.nn.drop_out(h, drop_prob)

def inference(images, num_classes, image_size):
	global pooling_times
	global dropout_prob

	# first layer
	W_conv1 = weights_initializer([5, 5, 3, 64])
	b_conv1 = bias_initializer([64])

	h_conv1 = tf.nn.relu(conv_layer(images, W_conv1) + b_conv1)
	h_pool1 = pooling_layer(h_conv1)

	# second layer
	W_conv2 = weights_initializer([5, 5, 64, 64])
	b_conv2 = bias_initializer([64])

	h_conv2 = tf.nn.relu(conv_layer(h_pool1, W_conv2) + b_conv2)
	h_pool2 = pooling_layer(h_conv2)

	# third layer
	W_conv3 = weights_initializer([5, 5, 64, 128])
	b_conv3 = bias_initializer([128])

	h_conv3 = tf.nn.relu(conv_layer(h_pool2, W_conv3) + b_conv3)

	# forth layer
	W_conv4 = weights_initializer([5, 5, 128, 64])
	b_conv4 = bias_initializer([64])

	h_conv4 = tf.nn.relu(conv_layer(h_conv3, W_conv4) + b_conv4)
	h_pool4 = pooling_layer(h_conv4)

	# fully connect layer
	# The shape of the W_fc1 is based on the max_pooling times, normally you can compute the shape dynamically.

	flat_height = image_size[0] / 2**pooling_times
	flat_width = image_size[1] / 2**pooling_times

	W_fc1 = weights_initializer([flat_height * flat_width * 64, 2048])
	b_fc1 = bias_initializer([2048])

	h_flat = tf.reshape(h_pool4, [-1, flat_height * flat_width * 64])
	h_fc1 = tf.nn.relu(tf.matmul(h_flat, W_fc1) + b_fc1)

	h_fc_drop1 = tf.nn.dropout(h_fc1, dropout_prob)

	# second fully connected

	W_fc2 = weights_initializer([2048, 256])
	b_fc2 = bias_initializer([256])

	h_fc2 = tf.nn.relu(tf.matmul(h_fc_drop1, W_fc2) + b_fc2)

	# output
	W_out = weights_initializer([256, num_classes])
	b_out = bias_initializer([num_classes])

	h_out = tf.matmul(h_fc2, W_out) + b_out

	return h_out

def loss(out, labels):
	loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(out, labels))
	return loss

def optimizer(loss, learning_rate = 0.0001):
	optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
	return optimizer

def predictions(out):
	preds = tf.argmax(out, 1)
	return preds

def accuracy(out, labels):
	accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(labels, 1)), tf.float32))
	return accuracy