|
| 1 | +\begin{MintedVerbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] |
| 2 | +\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torch} |
| 3 | +\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torch}\PYG{n+nn}{.}\PYG{n+nn}{nn}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{nn} |
| 4 | +\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torch}\PYG{n+nn}{.}\PYG{n+nn}{optim}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{optim} |
| 5 | +\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torchvision} |
| 6 | +\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{torchvision}\PYG{n+nn}{.}\PYG{n+nn}{transforms}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{transforms} |
| 7 | + |
| 8 | +\PYG{c+c1}{\PYGZsh{} Device configuration: use GPU if available} |
| 9 | +\PYG{n}{device} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{device}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{cuda}\PYG{l+s+s2}{\PYGZdq{}} \PYG{k}{if} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{cuda}\PYG{o}{.}\PYG{n}{is\PYGZus{}available}\PYG{p}{(}\PYG{p}{)} \PYG{k}{else} \PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{cpu}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)} |
| 10 | + |
| 11 | +\PYG{c+c1}{\PYGZsh{} MNIST dataset (downloads if not already present)} |
| 12 | +\PYG{n}{transform} \PYG{o}{=} \PYG{n}{transforms}\PYG{o}{.}\PYG{n}{Compose}\PYG{p}{(}\PYG{p}{[} |
| 13 | + \PYG{n}{transforms}\PYG{o}{.}\PYG{n}{ToTensor}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} |
| 14 | + \PYG{n}{transforms}\PYG{o}{.}\PYG{n}{Normalize}\PYG{p}{(}\PYG{p}{(}\PYG{l+m+mf}{0.5}\PYG{p}{,}\PYG{p}{)}\PYG{p}{,} \PYG{p}{(}\PYG{l+m+mf}{0.5}\PYG{p}{,}\PYG{p}{)}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} normalize to mean=0.5, std=0.5 (approx. [\PYGZhy{}1,1] pixel range)} |
| 15 | +\PYG{p}{]}\PYG{p}{)} |
| 16 | +\PYG{n}{train\PYGZus{}dataset} \PYG{o}{=} \PYG{n}{torchvision}\PYG{o}{.}\PYG{n}{datasets}\PYG{o}{.}\PYG{n}{MNIST}\PYG{p}{(}\PYG{n}{root}\PYG{o}{=}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{./data}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{train}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{download}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{transform}\PYG{o}{=}\PYG{n}{transform}\PYG{p}{)} |
| 17 | +\PYG{n}{test\PYGZus{}dataset} \PYG{o}{=} \PYG{n}{torchvision}\PYG{o}{.}\PYG{n}{datasets}\PYG{o}{.}\PYG{n}{MNIST}\PYG{p}{(}\PYG{n}{root}\PYG{o}{=}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{./data}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{,} \PYG{n}{train}\PYG{o}{=}\PYG{k+kc}{False}\PYG{p}{,} \PYG{n}{download}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{transform}\PYG{o}{=}\PYG{n}{transform}\PYG{p}{)} |
| 18 | + |
| 19 | +\PYG{n}{train\PYGZus{}loader} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{utils}\PYG{o}{.}\PYG{n}{data}\PYG{o}{.}\PYG{n}{DataLoader}\PYG{p}{(}\PYG{n}{train\PYGZus{}dataset}\PYG{p}{,} \PYG{n}{batch\PYGZus{}size}\PYG{o}{=}\PYG{l+m+mi}{64}\PYG{p}{,} \PYG{n}{shuffle}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{)} |
| 20 | +\PYG{n}{test\PYGZus{}loader} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{utils}\PYG{o}{.}\PYG{n}{data}\PYG{o}{.}\PYG{n}{DataLoader}\PYG{p}{(}\PYG{n}{test\PYGZus{}dataset}\PYG{p}{,} \PYG{n}{batch\PYGZus{}size}\PYG{o}{=}\PYG{l+m+mi}{64}\PYG{p}{,} \PYG{n}{shuffle}\PYG{o}{=}\PYG{k+kc}{False}\PYG{p}{)} |
| 21 | + |
| 22 | + |
| 23 | +\PYG{k}{class}\PYG{+w}{ }\PYG{n+nc}{NeuralNet}\PYG{p}{(}\PYG{n}{nn}\PYG{o}{.}\PYG{n}{Module}\PYG{p}{)}\PYG{p}{:} |
| 24 | + \PYG{k}{def}\PYG{+w}{ }\PYG{n+nf+fm}{\PYGZus{}\PYGZus{}init\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{)}\PYG{p}{:} |
| 25 | + \PYG{n+nb}{super}\PYG{p}{(}\PYG{n}{NeuralNet}\PYG{p}{,} \PYG{n+nb+bp}{self}\PYG{p}{)}\PYG{o}{.}\PYG{n+nf+fm}{\PYGZus{}\PYGZus{}init\PYGZus{}\PYGZus{}}\PYG{p}{(}\PYG{p}{)} |
| 26 | + \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc1} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{Linear}\PYG{p}{(}\PYG{l+m+mi}{28}\PYG{o}{*}\PYG{l+m+mi}{28}\PYG{p}{,} \PYG{l+m+mi}{100}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} first hidden layer (784 \PYGZhy{}\PYGZgt{} 100)} |
| 27 | + \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc2} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{Linear}\PYG{p}{(}\PYG{l+m+mi}{100}\PYG{p}{,} \PYG{l+m+mi}{100}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} second hidden layer (100 \PYGZhy{}\PYGZgt{} 100)} |
| 28 | + \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc3} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{Linear}\PYG{p}{(}\PYG{l+m+mi}{100}\PYG{p}{,} \PYG{l+m+mi}{10}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} output layer (100 \PYGZhy{}\PYGZgt{} 10 classes)} |
| 29 | + \PYG{k}{def}\PYG{+w}{ }\PYG{n+nf}{forward}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{p}{,} \PYG{n}{x}\PYG{p}{)}\PYG{p}{:} |
| 30 | + \PYG{n}{x} \PYG{o}{=} \PYG{n}{x}\PYG{o}{.}\PYG{n}{view}\PYG{p}{(}\PYG{n}{x}\PYG{o}{.}\PYG{n}{size}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{p}{,} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} flatten images into vectors of size 784} |
| 31 | + \PYG{n}{x} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{relu}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc1}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} hidden layer 1 + ReLU activation} |
| 32 | + \PYG{n}{x} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{relu}\PYG{p}{(}\PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc2}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} hidden layer 2 + ReLU activation} |
| 33 | + \PYG{n}{x} \PYG{o}{=} \PYG{n+nb+bp}{self}\PYG{o}{.}\PYG{n}{fc3}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} output layer (logits for 10 classes)} |
| 34 | + \PYG{k}{return} \PYG{n}{x} |
| 35 | + |
| 36 | +\PYG{n}{model} \PYG{o}{=} \PYG{n}{NeuralNet}\PYG{p}{(}\PYG{p}{)}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)} |
| 37 | + |
| 38 | + |
| 39 | +\PYG{n}{criterion} \PYG{o}{=} \PYG{n}{nn}\PYG{o}{.}\PYG{n}{CrossEntropyLoss}\PYG{p}{(}\PYG{p}{)} |
| 40 | +\PYG{n}{optimizer} \PYG{o}{=} \PYG{n}{optim}\PYG{o}{.}\PYG{n}{SGD}\PYG{p}{(}\PYG{n}{model}\PYG{o}{.}\PYG{n}{parameters}\PYG{p}{(}\PYG{p}{)}\PYG{p}{,} \PYG{n}{lr}\PYG{o}{=}\PYG{l+m+mf}{0.01}\PYG{p}{,} \PYG{n}{weight\PYGZus{}decay}\PYG{o}{=}\PYG{l+m+mf}{1e\PYGZhy{}4}\PYG{p}{)} |
| 41 | + |
| 42 | +\PYG{n}{num\PYGZus{}epochs} \PYG{o}{=} \PYG{l+m+mi}{10} |
| 43 | +\PYG{k}{for} \PYG{n}{epoch} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{num\PYGZus{}epochs}\PYG{p}{)}\PYG{p}{:} |
| 44 | + \PYG{n}{model}\PYG{o}{.}\PYG{n}{train}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} set model to training mode} |
| 45 | + \PYG{n}{running\PYGZus{}loss} \PYG{o}{=} \PYG{l+m+mf}{0.0} |
| 46 | + \PYG{k}{for} \PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o+ow}{in} \PYG{n}{train\PYGZus{}loader}\PYG{p}{:} |
| 47 | + \PYG{c+c1}{\PYGZsh{} Move data to device (GPU if available, else CPU)} |
| 48 | + \PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o}{=} \PYG{n}{images}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}\PYG{p}{,} \PYG{n}{labels}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)} |
| 49 | + |
| 50 | + \PYG{n}{optimizer}\PYG{o}{.}\PYG{n}{zero\PYGZus{}grad}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} reset gradients to zero} |
| 51 | + \PYG{n}{outputs} \PYG{o}{=} \PYG{n}{model}\PYG{p}{(}\PYG{n}{images}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} forward pass: compute predictions} |
| 52 | + \PYG{n}{loss} \PYG{o}{=} \PYG{n}{criterion}\PYG{p}{(}\PYG{n}{outputs}\PYG{p}{,} \PYG{n}{labels}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} compute cross\PYGZhy{}entropy loss} |
| 53 | + \PYG{n}{loss}\PYG{o}{.}\PYG{n}{backward}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} backpropagate to compute gradients} |
| 54 | + \PYG{n}{optimizer}\PYG{o}{.}\PYG{n}{step}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} update weights using SGD step} |
| 55 | + |
| 56 | + \PYG{n}{running\PYGZus{}loss} \PYG{o}{+}\PYG{o}{=} \PYG{n}{loss}\PYG{o}{.}\PYG{n}{item}\PYG{p}{(}\PYG{p}{)} |
| 57 | + \PYG{c+c1}{\PYGZsh{} Compute average loss over all batches in this epoch} |
| 58 | + \PYG{n}{avg\PYGZus{}loss} \PYG{o}{=} \PYG{n}{running\PYGZus{}loss} \PYG{o}{/} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{train\PYGZus{}loader}\PYG{p}{)} |
| 59 | + \PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+sa}{f}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{Epoch }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{epoch}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{/}\PYG{l+s+si}{\PYGZob{}}\PYG{n}{num\PYGZus{}epochs}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{, Loss: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{avg\PYGZus{}loss}\PYG{l+s+si}{:}\PYG{l+s+s2}{.4f}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)} |
| 60 | + |
| 61 | +\PYG{c+c1}{\PYGZsh{}Evaluation on the Test Set} |
| 62 | + |
| 63 | + |
| 64 | + |
| 65 | +\PYG{n}{model}\PYG{o}{.}\PYG{n}{eval}\PYG{p}{(}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} set model to evaluation mode} |
| 66 | +\PYG{n}{correct} \PYG{o}{=} \PYG{l+m+mi}{0} |
| 67 | +\PYG{n}{total} \PYG{o}{=} \PYG{l+m+mi}{0} |
| 68 | +\PYG{k}{with} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{no\PYGZus{}grad}\PYG{p}{(}\PYG{p}{)}\PYG{p}{:} \PYG{c+c1}{\PYGZsh{} disable gradient calculation for evaluation} |
| 69 | + \PYG{k}{for} \PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o+ow}{in} \PYG{n}{test\PYGZus{}loader}\PYG{p}{:} |
| 70 | + \PYG{n}{images}\PYG{p}{,} \PYG{n}{labels} \PYG{o}{=} \PYG{n}{images}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)}\PYG{p}{,} \PYG{n}{labels}\PYG{o}{.}\PYG{n}{to}\PYG{p}{(}\PYG{n}{device}\PYG{p}{)} |
| 71 | + \PYG{n}{outputs} \PYG{o}{=} \PYG{n}{model}\PYG{p}{(}\PYG{n}{images}\PYG{p}{)} |
| 72 | + \PYG{n}{\PYGZus{}}\PYG{p}{,} \PYG{n}{predicted} \PYG{o}{=} \PYG{n}{torch}\PYG{o}{.}\PYG{n}{max}\PYG{p}{(}\PYG{n}{outputs}\PYG{p}{,} \PYG{n}{dim}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{)} \PYG{c+c1}{\PYGZsh{} class with highest score} |
| 73 | + \PYG{n}{total} \PYG{o}{+}\PYG{o}{=} \PYG{n}{labels}\PYG{o}{.}\PYG{n}{size}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)} |
| 74 | + \PYG{n}{correct} \PYG{o}{+}\PYG{o}{=} \PYG{p}{(}\PYG{n}{predicted} \PYG{o}{==} \PYG{n}{labels}\PYG{p}{)}\PYG{o}{.}\PYG{n}{sum}\PYG{p}{(}\PYG{p}{)}\PYG{o}{.}\PYG{n}{item}\PYG{p}{(}\PYG{p}{)} |
| 75 | + |
| 76 | +\PYG{n}{accuracy} \PYG{o}{=} \PYG{l+m+mi}{100} \PYG{o}{*} \PYG{n}{correct} \PYG{o}{/} \PYG{n}{total} |
| 77 | +\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+sa}{f}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{Test Accuracy: }\PYG{l+s+si}{\PYGZob{}}\PYG{n}{accuracy}\PYG{l+s+si}{:}\PYG{l+s+s2}{.2f}\PYG{l+s+si}{\PYGZcb{}}\PYG{l+s+s2}{\PYGZpc{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{)} |
| 78 | + |
| 79 | + |
| 80 | +\end{MintedVerbatim} |
0 commit comments