Facial-keypoint-Detection-using-Pytorch/models.py at main · MariaSimon-AI/Facial-keypoint-Detection-using-Pytorch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
## TODO: define the convolutional neural network architecture

import torch
import torch.nn as nn
import torch.nn.functional as F
# can use the below import should you choose to initialize the weights of your Net
import torch.nn.init as I


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        ## TODO: Define all the layers of this CNN, the only requirements are:
        ## 1. This network takes in a square (same width and height), grayscale             image as input
        ## 2. It ends with a linear layer that represents the keypoints
        ## it's suggested that you make this last layer output 136 values, 2 for             each of the 68 keypoint (x, y) pairs

        # As an example, you've been given a convolutional layer, which you may (but         don't have to) change:
        # 1 input image channel (grayscale), 32 output channels/feature maps, 4x4           square convolution kernel
        ## output size = (W-F)/S +1 = (224-5)/1 +1 = 220
        self.conv1 = nn.Conv2d(1, 32, 5)
        # the output Tensor for one image, will have the dimensions: (32,220,220)
        # after one pool layer, this becomes (32, 110, 110)

        #applying batchnorm
        self.bn1_1 = nn.BatchNorm2d(32)

        # second conv layer: 32 inputs, 64 outputs, 3x3 conv
        ## output size = (W-F)/S +1 = (110-3)/1 +1 = 108
        self.conv2 = nn.Conv2d(32, 64, 3)
        # the output tensor will have dimensions: (64, 108, 108)
        # after one pool layer, this becomes (64, 54, 54)

        #applying batchnorm
        self.bn1_2 = nn.BatchNorm2d(64)

        # third conv layer: 64 inputs, 128 outputs, 3x3 conv
        ## output size = (W-F)/S +1 = (54-3)/1 +1 = 52
        self.conv3 = nn.Conv2d(64, 128, 3)
        # the output tensor will have dimensions: (128, 52, 52)
        # after one pool layer, this becomes (128, 26, 26)

        #applying batchnorm
        self.bn1_3 = nn.BatchNorm2d(128)

        #fourth conv layer: 128 inputs, 256 outputs, 3x3 conv
        ## output size = (W-F)/S +1 = (26-3)/1 +1 = 24
        self.conv4 = nn.Conv2d(128, 256, 3)
        # the output tensor will have dimensions: (256, 24, 24)
        # after one pool layer, this becomes (256, 12, 12)

        #applying batchnorm
        self.bn1_4 = nn.BatchNorm2d(256)

         #fifth conv layer: 256 inputs, 512 outputs, 1x1 conv
        ## output size = (W-F)/S +1 = (12-3)/1 +1 = 10
        self.conv5 = nn.Conv2d(256, 512, 3)
        # the output tensor will have dimensions: (512, 10, 10)
        # after one pool layer, this becomes (512, 5, 5)

        # pool with kernel_size=2, stride=2
        self.pool = nn.MaxPool2d(2, 2)


        # 1024 outputs * 5*5 filtered/pooled map size
        self.fc1 = nn.Linear(512*5*5, 1024)

        # also consider adding a dropout layer to avoid overfitting


        self.fc1_drop = nn.Dropout(p=0.4)

        self.fc2 = nn.Linear(1024, 136)


        ## Note that among the layers to add, consider including:
        # maxpooling layers, multiple conv layers, fully-connected layers, and other         layers (such as dropout or batch normalization) to avoid overfitting

    def forward(self, x):
        ## TODO: Define the feedforward behavior of this model
        ## x is the input image and, as an example, here you may choose to include a pool/conv step:
        ## x = self.pool(F.relu(self.conv1(x)))
        # first activated conv layer
        x = F.relu(self.conv1(x))


        # applies pooling layer
        x = self.pool(x)

        #applies batchnorm layer
        x = self.bn1_1(x)

        # second activated conv layer
        x = F.relu(self.conv2(x))


        # applies pooling layer
        x = self.pool(x)

        #applies batchnorm layer
        x = self.bn1_2(x)

        # third activated conv layer
        x = F.relu(self.conv3(x))


        # applies pooling layer
        x = self.pool(x)

        #applies batchnorm layer
        x = self.bn1_3(x)

        # fourth activated conv layer
        x = F.relu(self.conv4(x))


        # applies pooling layer
        x = self.pool(x)

        #applies batchnorm layer
        x = self.bn1_4(x)

        # fifth activated conv layer
        x = F.relu(self.conv5(x))

        # applies pooling layer
        x = self.pool(x)

        # prep for linear layer
        # this line of code is the equivalent of Flatten in Keras
        x = x.view(x.size(0), -1)

        # two linear layers with dropout in between
        x = self.fc1(x)

        x = self.fc1_drop(x)


        x = self.fc2(x)


        # a modified x, having gone through all the layers of your model, should be returned
        return x