-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodels.py
More file actions
103 lines (80 loc) · 3.11 KB
/
models.py
File metadata and controls
103 lines (80 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from scipy.special import softmax
import time
import torch
import torch.utils.data
import torch.utils.data.dataset
import torch.autograd
import numpy as np
from gameEngine import TicTacToeMoves
Ms = TicTacToeMoves
class BaseNN(torch.nn.Module):
def __init__(self, n_layers=3, hidden_size=32, n_iter=None, batch_size=None, lr=None):
super().__init__()
self.uid = time.strftime("%Y-%m-%d:%Hh%M")
self.n_layers = n_layers
self.hidden_size = hidden_size
self.n_iter = 100 if n_iter is None else n_iter
self.batch_size = 1024 if batch_size is None else batch_size
self.lr = 0.001 if lr is None else lr
self.layers = torch.nn.ModuleList([
torch.nn.Linear(9, hidden_size)
] + [
torch.nn.Linear(hidden_size, hidden_size)
for _ in range(n_layers - 1)
])
self.evaluation_output = torch.nn.Linear(hidden_size, 1)
self.policy_output = torch.nn.Linear(hidden_size, 9)
def forward(self, x):
x = x.float()
x = x.reshape(*x.shape[:-2], -1)
for layer in self.layers:
x = torch.nn.functional.leaky_relu(layer(x))
return self.evaluation_output(x).squeeze(dim=-1), self.policy_output(x)
def predict(self, x):
v, P = self(x)
v = v.item()
P = softmax(P.squeeze().detach().numpy())
return v, P
def fit(self, data, n_iter=None, batch_size=None, lr=None, progress=None):
if n_iter is not None:
self.n_iter = n_iter
if batch_size is not None:
self.batch_size = batch_size
if lr is not None:
self.lr = lr
self.train()
V, TP, TV = zip(*data)
V = torch.stack(V)
TP = torch.stack(TP)
TV = torch.stack(TV)
dataset = torch.utils.data.dataset.TensorDataset(V, TP, TV)
loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
def criterion(v, p, tv, tp):
return (
torch.nn.functional.mse_loss(v, tv),
torch.nn.functional.cross_entropy(p, tp)
)
task = None
if progress is not None:
task = progress[0].add_task("Training", total=n_iter, arg1_n="loss", arg1="N/A")
losses = []
for _ in range(self.n_iter):
cl = np.zeros(2)
for s, tp, tv in loader:
optimizer.zero_grad()
v, p = self(s)
loss = criterion(v, p, tv, tp)
for i in range(2):
cl[i] += loss[i].item() * len(s)
loss = loss[0] + loss[1]
loss.backward()
optimizer.step()
cl /= len(data)
losses.append(cl)
if progress is not None:
progress[0].update(task, advance=1, arg1=f'{cl.sum():.5f} = {cl[0]:.5f} + {cl[1]:.5f}')
self.eval()
return losses
def id(self):
return f'BaseNN-{self.n_layers}-{self.hidden_size}-{self.n_iter}-{self.batch_size}-{self.lr}_{self.uid}'