Skip to content

Commit 1eb5e4b

Browse files
committed
update
1 parent c356478 commit 1eb5e4b

File tree

8 files changed

+469
-203
lines changed

8 files changed

+469
-203
lines changed
0 Bytes
Binary file not shown.

doc/pub/week1/ipynb/week1.ipynb

Lines changed: 217 additions & 203 deletions
Large diffs are not rendered by default.

doc/pub/week1/pdf/week1.pdf

1.03 KB
Binary file not shown.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
\begin{MintedVerbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}]
2+
3+
\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{tensorflow}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{tf}
4+
\PYG{k+kn}{from}\PYG{+w}{ }\PYG{n+nn}{tensorflow}\PYG{+w}{ }\PYG{k+kn}{import} \PYG{n}{keras}
5+
\PYG{k+kn}{from}\PYG{+w}{ }\PYG{n+nn}{tensorflow}\PYG{n+nn}{.}\PYG{n+nn}{keras}\PYG{+w}{ }\PYG{k+kn}{import} \PYG{n}{layers}\PYG{p}{,} \PYG{n}{regularizers}
6+
7+
\PYG{c+c1}{\PYGZsh{} Check for GPU (TensorFlow will use it automatically if available)}
8+
\PYG{n}{gpus} \PYG{o}{=} \PYG{n}{tf}\PYG{o}{.}\PYG{n}{config}\PYG{o}{.}\PYG{n}{list\PYGZus{}physical\PYGZus{}devices}\PYG{p}{(}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{GPU}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{)}
9+
\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+sa}{f}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{GPUs available: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{gpus}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)}
10+
11+
\PYG{c+c1}{\PYGZsh{} 1) Load and preprocess MNIST}
12+
\PYG{p}{(}\PYG{n}{x\PYGZus{}train}\PYG{p}{,} \PYG{n}{y\PYGZus{}train}\PYG{p}{)}\PYG{p}{,} \PYG{p}{(}\PYG{n}{x\PYGZus{}test}\PYG{p}{,} \PYG{n}{y\PYGZus{}test}\PYG{p}{)} \PYG{o}{=} \PYG{n}{keras}\PYG{o}{.}\PYG{n}{datasets}\PYG{o}{.}\PYG{n}{mnist}\PYG{o}{.}\PYG{n}{load\PYGZus{}data}\PYG{p}{(}\PYG{p}{)}
13+
\PYG{c+c1}{\PYGZsh{} Normalize to [0, 1]}
14+
\PYG{n}{x\PYGZus{}train} \PYG{o}{=} \PYG{p}{(}\PYG{n}{x\PYGZus{}train}\PYG{o}{.}\PYG{n}{astype}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{float32}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)} \PYG{o}{/} \PYG{l+m+mf}{255.0}\PYG{p}{)}
15+
\PYG{n}{x\PYGZus{}test} \PYG{o}{=} \PYG{p}{(}\PYG{n}{x\PYGZus{}test}\PYG{o}{.}\PYG{n}{astype}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{float32}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)} \PYG{o}{/} \PYG{l+m+mf}{255.0}\PYG{p}{)}
16+
17+
\PYG{c+c1}{\PYGZsh{} 2) Build the model: 784 \PYGZhy{}\PYGZgt{} 100 \PYGZhy{}\PYGZgt{} 100 \PYGZhy{}\PYGZgt{} 10}
18+
\PYG{n}{l2\PYGZus{}reg} \PYG{o}{=} \PYG{l+m+mf}{1e\PYGZhy{}4} \PYG{c+c1}{\PYGZsh{} L2 regularization strength}
19+
20+
\PYG{n}{model} \PYG{o}{=} \PYG{n}{keras}\PYG{o}{.}\PYG{n}{Sequential}\PYG{p}{(}\PYG{p}{[}
21+
\PYG{n}{layers}\PYG{o}{.}\PYG{n}{Input}\PYG{p}{(}\PYG{n}{shape}\PYG{o}{=}\PYG{p}{(}\PYG{l+m+mi}{28}\PYG{p}{,} \PYG{l+m+mi}{28}\PYG{p}{)}\PYG{p}{)}\PYG{p}{,}
22+
\PYG{n}{layers}\PYG{o}{.}\PYG{n}{Flatten}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
23+
\PYG{n}{layers}\PYG{o}{.}\PYG{n}{Dense}\PYG{p}{(}\PYG{l+m+mi}{100}\PYG{p}{,} \PYG{n}{activation}\PYG{o}{=}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{relu}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{,}
24+
\PYG{n}{kernel\PYGZus{}regularizer}\PYG{o}{=}\PYG{n}{regularizers}\PYG{o}{.}\PYG{n}{l2}\PYG{p}{(}\PYG{n}{l2\PYGZus{}reg}\PYG{p}{)}\PYG{p}{)}\PYG{p}{,}
25+
\PYG{n}{layers}\PYG{o}{.}\PYG{n}{Dense}\PYG{p}{(}\PYG{l+m+mi}{100}\PYG{p}{,} \PYG{n}{activation}\PYG{o}{=}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{relu}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{,}
26+
\PYG{n}{kernel\PYGZus{}regularizer}\PYG{o}{=}\PYG{n}{regularizers}\PYG{o}{.}\PYG{n}{l2}\PYG{p}{(}\PYG{n}{l2\PYGZus{}reg}\PYG{p}{)}\PYG{p}{)}\PYG{p}{,}
27+
\PYG{n}{layers}\PYG{o}{.}\PYG{n}{Dense}\PYG{p}{(}\PYG{l+m+mi}{10}\PYG{p}{,} \PYG{n}{activation}\PYG{o}{=}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{softmax}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} output probabilities for 10 classes}
28+
\PYG{p}{]}\PYG{p}{)}
29+
30+
\PYG{c+c1}{\PYGZsh{} 3) Compile with SGD + weight decay via L2 regularizers}
31+
\PYG{n}{model}\PYG{o}{.}\PYG{n}{compile}\PYG{p}{(}
32+
\PYG{n}{optimizer}\PYG{o}{=}\PYG{n}{keras}\PYG{o}{.}\PYG{n}{optimizers}\PYG{o}{.}\PYG{n}{SGD}\PYG{p}{(}\PYG{n}{learning\PYGZus{}rate}\PYG{o}{=}\PYG{l+m+mf}{0.01}\PYG{p}{)}\PYG{p}{,}
33+
\PYG{n}{loss}\PYG{o}{=}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{sparse\PYGZus{}categorical\PYGZus{}crossentropy}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{,}
34+
\PYG{n}{metrics}\PYG{o}{=}\PYG{p}{[}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{accuracy}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{]}\PYG{p}{,}
35+
\PYG{p}{)}
36+
37+
\PYG{n}{model}\PYG{o}{.}\PYG{n}{summary}\PYG{p}{(}\PYG{p}{)}
38+
39+
\PYG{c+c1}{\PYGZsh{} 4) Train}
40+
\PYG{n}{history} \PYG{o}{=} \PYG{n}{model}\PYG{o}{.}\PYG{n}{fit}\PYG{p}{(}
41+
\PYG{n}{x\PYGZus{}train}\PYG{p}{,} \PYG{n}{y\PYGZus{}train}\PYG{p}{,}
42+
\PYG{n}{epochs}\PYG{o}{=}\PYG{l+m+mi}{10}\PYG{p}{,}
43+
\PYG{n}{batch\PYGZus{}size}\PYG{o}{=}\PYG{l+m+mi}{64}\PYG{p}{,}
44+
\PYG{n}{validation\PYGZus{}split}\PYG{o}{=}\PYG{l+m+mf}{0.1}\PYG{p}{,} \PYG{c+c1}{\PYGZsh{} optional: monitor validation during training}
45+
\PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{1}
46+
\PYG{p}{)}
47+
48+
\PYG{c+c1}{\PYGZsh{} 5) Evaluate on test set}
49+
\PYG{n}{test\PYGZus{}loss}\PYG{p}{,} \PYG{n}{test\PYGZus{}acc} \PYG{o}{=} \PYG{n}{model}\PYG{o}{.}\PYG{n}{evaluate}\PYG{p}{(}\PYG{n}{x\PYGZus{}test}\PYG{p}{,} \PYG{n}{y\PYGZus{}test}\PYG{p}{,} \PYG{n}{verbose}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)}
50+
\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+sa}{f}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{Test accuracy: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{test\PYGZus{}acc}\PYG{l+s+si}{:}\PYG{l+s+s2}{.4f}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{, Test loss: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{test\PYGZus{}loss}\PYG{l+s+si}{:}\PYG{l+s+s2}{.4f}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)}
51+
52+
53+
\end{MintedVerbatim}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
\begin{MintedVerbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}]
2+
\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torch}
3+
\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torch}\PYG{n+nn}{.}\PYG{n+nn}{nn}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{nn}
4+
\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torch}\PYG{n+nn}{.}\PYG{n+nn}{optim}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{optim}
5+
\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torchvision}
6+
\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torchvision}\PYG{n+nn}{.}\PYG{n+nn}{transforms}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{transforms}
7+
8+
\PYG{c+c1}{\PYGZsh{} Device configuration: use GPU if available}
9+
\PYG{n}{device} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{device}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{cuda}\PYG{l+s+s2}{\PYGZdq{}} \PYG{k}{if} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{cuda}\PYG{o}{.}\PYG{n}{is\PYGZus{}available}\PYG{p}{(}\PYG{p}{)} \PYG{k}{else} \PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{cpu}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)}
10+
11+
\PYG{c+c1}{\PYGZsh{} MNIST dataset (downloads if not already present)}
12+
\PYG{n}{transform} \PYG{o}{=} \PYG{n}{transforms}\PYG{o}{.}\PYG{n}{Compose}\PYG{p}{(}\PYG{p}{[}
13+
\PYG{n}{transforms}\PYG{o}{.}\PYG{n}{ToTensor}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,}
14+
\PYG{n}{transforms}\PYG{o}{.}\PYG{n}{Normalize}\PYG{p}{(}\PYG{p}{(}\PYG{l+m+mf}{0.5}\PYG{p}{,}\PYG{p}{)}\PYG{p}{,} \PYG{p}{(}\PYG{l+m+mf}{0.5}\PYG{p}{,}\PYG{p}{)}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} normalize to mean=0.5, std=0.5 (approx. [\PYGZhy{}1,1] pixel range)}
15+
\PYG{p}{]}\PYG{p}{)}
16+
\PYG{n}{train\PYGZus{}dataset} \PYG{o}{=} \PYG{n}{torchvision}\PYG{o}{.}\PYG{n}{datasets}\PYG{o}{.}\PYG{n}{MNIST}\PYG{p}{(}\PYG{n}{root}\PYG{o}{=}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{./data}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{train}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{download}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{transform}\PYG{o}{=}\PYG{n}{transform}\PYG{p}{)}
17+
\PYG{n}{test\PYGZus{}dataset} \PYG{o}{=} \PYG{n}{torchvision}\PYG{o}{.}\PYG{n}{datasets}\PYG{o}{.}\PYG{n}{MNIST}\PYG{p}{(}\PYG{n}{root}\PYG{o}{=}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{./data}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{train}\PYG{o}{=}\PYG{k+kc}{False}\PYG{p}{,} \PYG{n}{download}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{transform}\PYG{o}{=}\PYG{n}{transform}\PYG{p}{)}
18+
19+
\PYG{n}{train\PYGZus{}loader} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{utils}\PYG{o}{.}\PYG{n}{data}\PYG{o}{.}\PYG{n}{DataLoader}\PYG{p}{(}\PYG{n}{train\PYGZus{}dataset}\PYG{p}{,} \PYG{n}{batch\PYGZus{}size}\PYG{o}{=}\PYG{l+m+mi}{64}\PYG{p}{,} \PYG{n}{shuffle}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{)}
20+
\PYG{n}{test\PYGZus{}loader} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{utils}\PYG{o}{.}\PYG{n}{data}\PYG{o}{.}\PYG{n}{DataLoader}\PYG{p}{(}\PYG{n}{test\PYGZus{}dataset}\PYG{p}{,} \PYG{n}{batch\PYGZus{}size}\PYG{o}{=}\PYG{l+m+mi}{64}\PYG{p}{,} \PYG{n}{shuffle}\PYG{o}{=}\PYG{k+kc}{False}\PYG{p}{)}
21+
22+
23+
\PYG{k}{class}\PYG{+w}{ }\PYG{n+nc}{NeuralNet}\PYG{p}{(}\PYG{n}{nn}\PYG{o}{.}\PYG{n}{Module}\PYG{p}{)}\PYG{p}{:}
24+
\PYG{k}{def}\PYG{+w}{ }\PYG{n+nf+fm}{\PYGZus{}\PYGZus{}init\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{)}\PYG{p}{:}
25+
\PYG{n+nb}{super}\PYG{p}{(}\PYG{n}{NeuralNet}\PYG{p}{,} \PYG{n+nb+bp}{self}\PYG{p}{)}\PYG{o}{.}\PYG{n+nf+fm}{\PYGZus{}\PYGZus{}init\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{p}{)}
26+
\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc1} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{Linear}\PYG{p}{(}\PYG{l+m+mi}{28}\PYG{o}{*}\PYG{l+m+mi}{28}\PYG{p}{,} \PYG{l+m+mi}{100}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} first hidden layer (784 \PYGZhy{}\PYGZgt{} 100)}
27+
\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc2} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{Linear}\PYG{p}{(}\PYG{l+m+mi}{100}\PYG{p}{,} \PYG{l+m+mi}{100}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} second hidden layer (100 \PYGZhy{}\PYGZgt{} 100)}
28+
\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc3} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{Linear}\PYG{p}{(}\PYG{l+m+mi}{100}\PYG{p}{,} \PYG{l+m+mi}{10}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} output layer (100 \PYGZhy{}\PYGZgt{} 10 classes)}
29+
\PYG{k}{def}\PYG{+w}{ }\PYG{n+nf}{forward}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{,} \PYG{n}{x}\PYG{p}{)}\PYG{p}{:}
30+
\PYG{n}{x} \PYG{o}{=} \PYG{n}{x}\PYG{o}{.}\PYG{n}{view}\PYG{p}{(}\PYG{n}{x}\PYG{o}{.}\PYG{n}{size}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{p}{,} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} flatten images into vectors of size 784}
31+
\PYG{n}{x} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{relu}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc1}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} hidden layer 1 + ReLU activation}
32+
\PYG{n}{x} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{relu}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc2}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} hidden layer 2 + ReLU activation}
33+
\PYG{n}{x} \PYG{o}{=} \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc3}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} output layer (logits for 10 classes)}
34+
\PYG{k}{return} \PYG{n}{x}
35+
36+
\PYG{n}{model} \PYG{o}{=} \PYG{n}{NeuralNet}\PYG{p}{(}\PYG{p}{)}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}
37+
38+
39+
\PYG{n}{criterion} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{CrossEntropyLoss}\PYG{p}{(}\PYG{p}{)}
40+
\PYG{n}{optimizer} \PYG{o}{=} \PYG{n}{optim}\PYG{o}{.}\PYG{n}{SGD}\PYG{p}{(}\PYG{n}{model}\PYG{o}{.}\PYG{n}{parameters}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{n}{lr}\PYG{o}{=}\PYG{l+m+mf}{0.01}\PYG{p}{,} \PYG{n}{weight\PYGZus{}decay}\PYG{o}{=}\PYG{l+m+mf}{1e\PYGZhy{}4}\PYG{p}{)}
41+
42+
\PYG{n}{num\PYGZus{}epochs} \PYG{o}{=} \PYG{l+m+mi}{10}
43+
\PYG{k}{for} \PYG{n}{epoch} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{num\PYGZus{}epochs}\PYG{p}{)}\PYG{p}{:}
44+
\PYG{n}{model}\PYG{o}{.}\PYG{n}{train}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} set model to training mode}
45+
\PYG{n}{running\PYGZus{}loss} \PYG{o}{=} \PYG{l+m+mf}{0.0}
46+
\PYG{k}{for} \PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o+ow}{in} \PYG{n}{train\PYGZus{}loader}\PYG{p}{:}
47+
\PYG{c+c1}{\PYGZsh{} Move data to device (GPU if available, else CPU)}
48+
\PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o}{=} \PYG{n}{images}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}\PYG{p}{,} \PYG{n}{labels}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}
49+
50+
\PYG{n}{optimizer}\PYG{o}{.}\PYG{n}{zero\PYGZus{}grad}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} reset gradients to zero}
51+
\PYG{n}{outputs} \PYG{o}{=} \PYG{n}{model}\PYG{p}{(}\PYG{n}{images}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} forward pass: compute predictions}
52+
\PYG{n}{loss} \PYG{o}{=} \PYG{n}{criterion}\PYG{p}{(}\PYG{n}{outputs}\PYG{p}{,} \PYG{n}{labels}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} compute cross\PYGZhy{}entropy loss}
53+
\PYG{n}{loss}\PYG{o}{.}\PYG{n}{backward}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} backpropagate to compute gradients}
54+
\PYG{n}{optimizer}\PYG{o}{.}\PYG{n}{step}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} update weights using SGD step}
55+
56+
\PYG{n}{running\PYGZus{}loss} \PYG{o}{+}\PYG{o}{=} \PYG{n}{loss}\PYG{o}{.}\PYG{n}{item}\PYG{p}{(}\PYG{p}{)}
57+
\PYG{c+c1}{\PYGZsh{} Compute average loss over all batches in this epoch}
58+
\PYG{n}{avg\PYGZus{}loss} \PYG{o}{=} \PYG{n}{running\PYGZus{}loss} \PYG{o}{/} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{train\PYGZus{}loader}\PYG{p}{)}
59+
\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+sa}{f}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{Epoch }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{epoch}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{/}\PYG{l+s+si}{\PYGZob{}}\PYG{n}{num\PYGZus{}epochs}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{, Loss: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{avg\PYGZus{}loss}\PYG{l+s+si}{:}\PYG{l+s+s2}{.4f}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)}
60+
61+
\PYG{c+c1}{\PYGZsh{}Evaluation on the Test Set}
62+
63+
64+
65+
\PYG{n}{model}\PYG{o}{.}\PYG{n}{eval}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} set model to evaluation mode}
66+
\PYG{n}{correct} \PYG{o}{=} \PYG{l+m+mi}{0}
67+
\PYG{n}{total} \PYG{o}{=} \PYG{l+m+mi}{0}
68+
\PYG{k}{with} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{no\PYGZus{}grad}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:} \PYG{c+c1}{\PYGZsh{} disable gradient calculation for evaluation}
69+
\PYG{k}{for} \PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o+ow}{in} \PYG{n}{test\PYGZus{}loader}\PYG{p}{:}
70+
\PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o}{=} \PYG{n}{images}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}\PYG{p}{,} \PYG{n}{labels}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}
71+
\PYG{n}{outputs} \PYG{o}{=} \PYG{n}{model}\PYG{p}{(}\PYG{n}{images}\PYG{p}{)}
72+
\PYG{n}{\PYGZus{}}\PYG{p}{,} \PYG{n}{predicted} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{max}\PYG{p}{(}\PYG{n}{outputs}\PYG{p}{,} \PYG{n}{dim}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} class with highest score}
73+
\PYG{n}{total} \PYG{o}{+}\PYG{o}{=} \PYG{n}{labels}\PYG{o}{.}\PYG{n}{size}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)}
74+
\PYG{n}{correct} \PYG{o}{+}\PYG{o}{=} \PYG{p}{(}\PYG{n}{predicted} \PYG{o}{==} \PYG{n}{labels}\PYG{p}{)}\PYG{o}{.}\PYG{n}{sum}\PYG{p}{(}\PYG{p}{)}\PYG{o}{.}\PYG{n}{item}\PYG{p}{(}\PYG{p}{)}
75+
76+
\PYG{n}{accuracy} \PYG{o}{=} \PYG{l+m+mi}{100} \PYG{o}{*} \PYG{n}{correct} \PYG{o}{/} \PYG{n}{total}
77+
\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+sa}{f}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{Test Accuracy: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{accuracy}\PYG{l+s+si}{:}\PYG{l+s+s2}{.2f}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{\PYGZpc{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)}
78+
79+
80+
\end{MintedVerbatim}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"jobname": "week1",
3+
"md5": "0191B8CD171F8856F257DAC490DB099A",
4+
"timestamp": "20260122083710",
5+
"cachefiles": [
6+
"8E507AC87F69F186AC3E97F040235C83.highlight.minted",
7+
"FF29CA3E4F7B0F1F907BAD8941CED3A0.highlight.minted",
8+
"_0191B8CD171F8856F257DAC490DB099A.index.minted",
9+
"default.style.minted"
10+
]
11+
}

0 commit comments

Comments
 (0)