HolStep-Tree/simple_network.py at master · mirefek/HolStep-Tree · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from __future__ import print_function

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import numpy as np
import sys
import tensorflow as tf
import tensorflow.contrib.layers as tf_layers
import traceback_utils
import datetime
from tensorflow.contrib.tensorboard.plugins import projector
from data_utils import DataParser

import tree_utils as tree
from tf_utils import partitioned_avg, predict_loss_acc
from tf_tree_utils import TreePlaceholder, InterfaceTF
from cells import *
from layers import *

"""
The main code in main.py and network.py is beginning to be robust and therefore it is difficult
to distinguish core lines from extra features. So the aim of this file is to provide just the basic example of
the network so that one can understand it better.
"""

class Network:

    def __init__(self, vocab_size, dim=128, threads=4):
        # Create an empty graph and a session
        graph = tf.Graph()
        graph.seed = 42
        self.session = tf.Session(graph = graph, config=tf.ConfigProto(inter_op_parallelism_threads=threads,
                                                                       intra_op_parallelism_threads=threads))

        with self.session.graph.as_default():

            self.embeddings = tf.tanh(tf.get_variable(name="raw_embeddings", shape=[vocab_size+1, dim]))

            interface = InterfaceTF(dim)
            up_layer = tf.make_template('up_layer', UpLayer(dim, self.embeddings))

            self.steps = TreePlaceholder()
            _, steps_roots1 = up_layer(self.steps) # Main line, computation through tree

            hidden = tf_layers.fully_connected(steps_roots1, num_outputs=dim, activation_fn = tf.nn.relu)
            self.logits = tf_layers.linear(hidden, num_outputs = 2)
            self.labels = tf.placeholder(tf.int32, [None])
            self.predictions, self.loss, self.accuracy = predict_loss_acc(self.logits, self.labels)

            self.training = tf.train.AdamOptimizer().minimize(self.loss)

            # Initialize variables
            self.session.run(tf.global_variables_initializer())

        # Finalize graph and log it if requested
        self.session.graph.finalize()

    def train(self, steps, labels):
        data = self.steps.feed(steps)
        #data.update(self.conjectures.feed(conjectures))
        data.update({ self.labels: labels })
        _, accuracy = self.session.run([self.training, self.accuracy], data)

        return accuracy

    def evaluate(self, steps, labels):
        data = self.steps.feed(steps)
        #data.update(self.conjectures.feed(conjectures))
        data.update({ self.labels: labels })
        return self.session.run([self.accuracy, self.loss], data)

    def predict(self, steps):
        data = self.steps.feed(steps)
        #data.update(self.conjectures.feed(conjectures))

        return predictions

encoder = tree.TokenEncoder(('*', '/'))
data_parser = DataParser(
    "./e-hol-ml-dataset/",
    encoder = encoder,
    ignore_deps = True,
    truncate_test = 0.05,
    truncate_train = 0.01,
    complete_vocab = True,
)
network = Network(len(data_parser.vocabulary_index))

# training

batch_size = 64

acumulated = 0.5
for i in range(1000):

    batch = data_parser.draw_batch(
        split='train',
        batch_size=batch_size,
        use_preselection = False,
        get_conjectures = False,
    )

    acc = network.train(batch['steps'], batch['labels'])
    acumulated = acumulated*0.99 + acc*0.01

    if (i+1)%100 == 0: print("{}: {}".format(i+1, acumulated))

# testing

index = (0,0)
sum_accuracy = sum_loss = 0
processed_test_samples = 0

batch_size = 128
while True:
    batch, index = data_parser.draw_batch(
        split='val',
        batch_size=batch_size,
        use_preselection = False,
        get_conjectures = False,
        begin_index = index, # this causes the "in order" mode
    )
    numlabels = len(batch['labels'])
    if numlabels == 0: break

    accuracy, loss = network.evaluate(batch['steps'], batch['labels'])

    sum_accuracy += accuracy*numlabels
    sum_loss += loss*numlabels
    processed_test_samples += numlabels

    if numlabels < batch_size: break # Just a smaller batch left -> we are on the end of the testing dataset

print("Development accuracy: {}, avg. loss: {}".format(sum_accuracy/processed_test_samples, sum_loss/processed_test_samples))