CS236G_Final_Project/model.py at main · ncomly/CS236G_Final_Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# Inspired by and partially taken from CS 236G Coursera Course Content
from utils import *

################################################################################
# Generator
################################################################################
##### Residual Block #####
class ResidualBlock(nn.Module):
    '''
    ResidualBlock Class:
    Performs two convolutions and an instance normalization, the input is added
    to this output to form the residual block output.
    Values:
        input_channels: the number of channels to expect from a given input
    '''
    def __init__(self, input_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, input_channels, kernel_size=3, padding=1, padding_mode='reflect')
        self.conv2 = nn.Conv2d(input_channels, input_channels, kernel_size=3, padding=1, padding_mode='reflect')
        self.instancenorm = nn.InstanceNorm2d(input_channels)
        self.activation = nn.ReLU()

    def forward(self, x):
        '''
        Function for completing a forward pass of ResidualBlock:
        Given an image tensor, completes a residual block and returns the transformed tensor.
        Parameters:
            x: image tensor of shape (batch size, channels, height, width)
        '''
        original_x = x.clone()
        x = self.conv1(x)
        x = self.instancenorm(x)
        x = self.activation(x)
        x = self.conv2(x)
        x = self.instancenorm(x)
        return original_x + x

##### Contracting & Expanding #####
class ContractingBlock(nn.Module):
    '''
    ContractingBlock Class
    Performs a convolution followed by a max pool operation and an optional instance norm.
    Values:
        input_channels: the number of channels to expect from a given input
    '''
    def __init__(self, input_channels, use_bn=True, kernel_size=3, activation='relu'):
        super(ContractingBlock, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, input_channels * 2, kernel_size=kernel_size, padding=1, stride=2, padding_mode='reflect')
        self.activation = nn.ReLU() if activation == 'relu' else nn.LeakyReLU(0.2)
        if use_bn:
            self.instancenorm = nn.InstanceNorm2d(input_channels * 2)
        self.use_bn = use_bn

    def forward(self, x):
        '''
        Function for completing a forward pass of ContractingBlock:
        Given an image tensor, completes a contracting block and returns the transformed tensor.
        Parameters:
            x: image tensor of shape (batch size, channels, height, width)
        '''
        x = self.conv1(x)
        if self.use_bn:
            x = self.instancenorm(x)
        x = self.activation(x)
        return x

class ExpandingBlock(nn.Module):
    '''
    ExpandingBlock Class:
    Performs a convolutional transpose operation in order to upsample,
        with an optional instance norm
    Values:
        input_channels: the number of channels to expect from a given input
    '''
    def __init__(self, input_channels, use_bn=True):
        super(ExpandingBlock, self).__init__()
        self.conv1 = nn.ConvTranspose2d(input_channels, input_channels // 2, kernel_size=3, stride=2, padding=1, output_padding=1)
        if use_bn:
            self.instancenorm = nn.InstanceNorm2d(input_channels // 2)
        self.use_bn = use_bn
        self.activation = nn.ReLU()

    def forward(self, x):
        '''
        Function for completing a forward pass of ExpandingBlock:
        Given an image tensor, completes an expanding block and returns the transformed tensor.
        Parameters:
            x: image tensor of shape (batch size, channels, height, width)
            skip_con_x: the image tensor from the contracting path (from the opposing block of x)
                    for the skip connection
        '''
        x = self.conv1(x)
        if self.use_bn:
            x = self.instancenorm(x)
        x = self.activation(x)
        return x

class FeatureMapBlock(nn.Module):
    '''
    FeatureMapBlock Class
    The final layer of a Generator -
    maps each the output to the desired number of output channels
    Values:
        input_channels: the number of channels to expect from a given input
        output_channels: the number of channels to expect for a given output
    '''
    def __init__(self, input_channels, output_channels):
        super(FeatureMapBlock, self).__init__()
        self.conv = nn.Conv2d(input_channels, output_channels, kernel_size=7, padding=3, padding_mode='reflect')

    def forward(self, x):
        '''
        Function for completing a forward pass of FeatureMapBlock:
        Given an image tensor, returns it mapped to the desired number of channels.
        Parameters:
            x: image tensor of shape (batch size, channels, height, width)
        '''
        x = self.conv(x)
        return x

##### CycleGAN Generator #####
class Generator(nn.Module):
    '''
    Generator Class
    A series of 2 contracting blocks, 9 residual blocks, and 2 expanding blocks to
    transform an input image into an image from the other class, with an upfeature
    layer at the start and a downfeature layer at the end.
    Values:
        input_channels: the number of channels to expect from a given input
        output_channels: the number of channels to expect for a given output
    '''
    def __init__(self, input_channels, output_channels, hidden_channels=64, num_res=8):
        super(Generator, self).__init__()
        self.upfeature = FeatureMapBlock(input_channels, hidden_channels)
        self.contract1 = ContractingBlock(hidden_channels)
        self.contract2 = ContractingBlock(hidden_channels * 2)
        res_mult = 4

        # dynamic transformer sizing
        self.res_blocks = nn.ModuleList([ResidualBlock(hidden_channels * res_mult)
                                            for _ in range(num_res)])

        self.expand2 = ExpandingBlock(hidden_channels * 4)
        self.expand3 = ExpandingBlock(hidden_channels * 2)
        self.downfeature = FeatureMapBlock(hidden_channels, output_channels)
        self.tanh = torch.nn.Tanh()

    def forward(self, x):
        '''
        Function for completing a forward pass of Generator:
        Given an image tensor, passes it through the U-Net with residual blocks
        and returns the output.
        Parameters:
            x: image tensor of shape (batch size, channels, height, width)
        '''
        x_con = self.upfeature(x)
        x_con = self.contract1(x_con)
        x_res = self.contract2(x_con)
        for res_block in self.res_blocks:
            x_res = res_block(x_res)

        x_exp = self.expand2(x_res)
        x_exp = self.expand3(x_exp)
        xn = self.downfeature(x_exp)
        return self.tanh(xn)


################################################################################
# Discriminator
################################################################################
##### PatchGAN Discriminator #####
class Discriminator(nn.Module):
    '''
    Discriminator Class
    Structured like the contracting path of the U-Net, the discriminator will
    output a matrix of values classifying corresponding portions of the image as real or fake.
    Parameters:
        input_channels: the number of image input channels
        hidden_channels: the initial number of discriminator convolutional filters
    '''
    def __init__(self, input_channels, hidden_channels=64):
        super(Discriminator, self).__init__()
        self.upfeature = FeatureMapBlock(input_channels, hidden_channels)
        self.contract1 = ContractingBlock(hidden_channels, use_bn=False, kernel_size=4, activation='lrelu')
        self.contract2 = ContractingBlock(hidden_channels * 2, kernel_size=4, activation='lrelu')
        #self.contract3 = ContractingBlock(hidden_channels * 4, kernel_size=4, activation='lrelu')
        self.final = nn.Conv2d(hidden_channels * 4, 1, kernel_size=1)

    def forward(self, x):
        x0 = self.upfeature(x)
        x1 = self.contract1(x0)
        x2 = self.contract2(x1)
        #x3 = self.contract3(x2)
        xn = self.final(x2)
        return xn


################################################################################
# Loss Functions
################################################################################
##### Generator Loss #####
## Adversarial Loss ##
def get_gen_adversarial_loss(real_X, disc_Y, gen_XY, adv_criterion):
    '''
    Return the adversarial loss of the generator given inputs
    (and the generated images for testing purposes).
    Parameters:
        real_X: the real images from pile X
        disc_Y: the discriminator for class Y; takes images and returns real/fake class Y
            prediction matrices
        gen_XY: the generator for class X to Y; takes images and returns the images
            transformed to class Y
        adv_criterion: the adversarial loss function; takes the discriminator
                  predictions and the target labels and returns a adversarial
                  loss (which you aim to minimize)
    '''
    # create fake
    fake_Y = gen_XY(real_X)
    disc_fake = disc_Y(fake_Y)
    adversarial_loss = adv_criterion(disc_fake, torch.ones_like(disc_fake))
    return adversarial_loss, fake_Y

## Identity Loss ##
def get_identity_loss(real_X, gen_YX, identity_criterion):
    '''
    Return the identity loss of the generator given inputs
    (and the generated images for testing purposes).
    Parameters:
        real_X: the real images from pile X
        gen_YX: the generator for class Y to X; takes images and returns the images
            transformed to class X
        identity_criterion: the identity loss function; takes the real images from X and
                        those images put through a Y->X generator and returns the identity
                        loss (which you aim to minimize)
    '''
    identity_X = gen_YX(real_X)
    identity_loss = identity_criterion(identity_X, real_X)
    return identity_loss, identity_X

## Cycle Consistency Loss ##
def get_cycle_consistency_loss(real_X, fake_Y, gen_YX, cycle_criterion):
    '''
    Return the cycle consistency loss of the generator given inputs
    (and the generated images for testing purposes).
    Parameters:
        real_X: the real images from pile X
        fake_Y: the generated images of class Y
        gen_YX: the generator for class Y to X; takes images and returns the images
            transformed to class X
        cycle_criterion: the cycle consistency loss function; takes the real images from X and
                        those images put through a X->Y generator and then Y->X generator
                        and returns the cycle consistency loss (which you aim to minimize)
    '''
    cycle_X = gen_YX(fake_Y)
    cycle_loss = cycle_criterion(real_X, cycle_X)
    return cycle_loss, cycle_X


## Reconstruction Loss ##
def get_reconstruction_adversarial_loss(real_X, fake_Y, landmarks_X, disc_L, gen_YX, adv_criterion):
    '''
    Return the adversarial loss of the reconstructed inputs
    (and the generated images for testing purposes).
    Parameters:
        real_X: the real images from pile X
        fake_Y: the fake images generated in pile Y
        landmarks_X: the landmarks for images from pile X
        disc_X: the discriminator for class X; takes images and returns real/fake class Y
            prediction matrices
        gen_YX: the generator for class Y to X; takes images and returns the images
            transformed to class X
        adv_criterion: the adversarial loss function; takes the discriminator
                  predictions and the target labels and returns a adversarial
                  loss (which you aim to minimize)
    '''
    # create fake
    if disc_L != None:
        rec_X = gen_YX(fake_Y)
        disc_rec = disc_L(torch.cat((rec_X, landmarks_X), 1))
        reconstruction_loss = adv_criterion(disc_rec, torch.ones_like(disc_rec))
        return reconstruction_loss, rec_X


## Total Loss ##
def get_gen_loss(real_A, real_B, landmarks_B, gen_AB, gen_BA, disc_A, disc_B, disc_L, adv_criterion, identity_criterion, cycle_criterion, lambda_identity=0.1, lambda_cycle=10, lambda_rec=10):
    '''
    Return the loss of the generator given inputs.
    Parameters:
        real_A: the real images from pile A
        real_B: the real images from pile B
        landmarks_B: the landmarks for images from pile B
        gen_AB: the generator for class A to B; takes images and returns the images
            transformed to class B
        gen_BA: the generator for class B to A; takes images and returns the images
            transformed to class A
        disc_A: the discriminator for class A; takes images and returns real/fake class A
            prediction matrices
        disc_B: the discriminator for class B; takes images and returns real/fake class B
            prediction matrices
        disc_L: the reconstruction discriminator for class B, conditioned on landmarks;
            takes images concatenated by channel with landmarks and returns real/fake class B
            prediction matrices
        adv_criterion: the adversarial loss function; takes the discriminator
            predictions and the true labels and returns a adversarial
            loss (which you aim to minimize)
        identity_criterion: the reconstruction loss function used for identity loss
            and cycle consistency loss; takes two sets of images and returns
            their pixel differences (which you aim to minimize)
        cycle_criterion: the cycle consistency loss function; takes the real images from X and
            those images put through a X->Y generator and then Y->X generator
            and returns the cycle consistency loss (which you aim to minimize).
            Note that in practice, cycle_criterion == identity_criterion == L1 loss
        lambda_identity: the weight of the identity loss
        lambda_cycle: the weight of the cycle-consistency loss
        lambda_rec: the weight of the reconstruction-adversarial loss
    '''
    # Adversarial Loss -- get_gen_adversarial_loss(real_X, disc_Y, gen_XY, adv_criterion)
    adv_loss_AB, fake_B = get_gen_adversarial_loss(real_A, disc_B, gen_AB, adv_criterion)
    adv_loss_BA, fake_A = get_gen_adversarial_loss(real_B, disc_A, gen_BA, adv_criterion)

    # Identity Loss -- get_identity_loss(real_X, gen_YX, identity_criterion)
    idn_loss_AB, idn_A = get_identity_loss(real_A, gen_BA, identity_criterion)
    idn_loss_BA, idn_B = get_identity_loss(real_B, gen_AB, identity_criterion)

    # Cycle-consistency Loss -- get_cycle_consistency_loss(real_X, fake_Y, gen_YX, cycle_criterion)
    cyc_loss_BA, cyc_BA = get_cycle_consistency_loss(real_A, fake_B, gen_BA, cycle_criterion)
    cyc_loss_AB, cyc_AB = get_cycle_consistency_loss(real_B, fake_A, gen_AB, cycle_criterion)

    # Reconstruction Adversarial Loss -- get_reconstruction_adversarial_loss(real_X, fake_Y, disc_L, gen_YX, cycle_criterion)
    if disc_L != None:
        rec_loss_B, rec_B = get_reconstruction_adversarial_loss(real_B, fake_A, landmarks_B,
                                                                disc_L, gen_AB, adv_criterion)

    # Total loss
    gen_loss = adv_loss_AB + adv_loss_BA \
                + lambda_identity*(idn_loss_AB + idn_loss_BA) \
                + lambda_cycle*(cyc_loss_AB + cyc_loss_BA)

    if disc_L != None:
        gen_loss += lambda_rec*(rec_loss_B)
    return gen_loss, fake_A, fake_B

## Individual Losses ##
def get_gen_losses(real_A, real_B, landmarks_B,
                    gen_AB, gen_BA, disc_A, disc_B, disc_L,
                    adv_criterion, identity_criterion, cycle_criterion):
    # Adversarial Loss
    adv_loss_AB, fake_B = get_gen_adversarial_loss(real_A, disc_B, gen_AB, adv_criterion)
    adv_loss_BA, fake_A = get_gen_adversarial_loss(real_B, disc_A, gen_BA, adv_criterion)
    adv_loss = adv_loss_AB + adv_loss_BA

    # Identity Loss
    idn_loss_AB, _ = get_identity_loss(real_A, gen_BA, identity_criterion)
    idn_loss_BA, _ = get_identity_loss(real_B, gen_AB, identity_criterion)
    idn_loss = idn_loss_AB + idn_loss_BA

    # Cycle Loss
    cyc_loss_BA, _ = get_cycle_consistency_loss(real_A, fake_B, gen_BA, cycle_criterion)
    cyc_loss_AB, _ = get_cycle_consistency_loss(real_B, fake_A, gen_AB, cycle_criterion)
    cyc_loss = cyc_loss_BA + cyc_loss_AB

    # Reconstruction Loss
    if disc_L != None:
        rec_loss, _ = get_reconstruction_adversarial_loss(real_B, fake_A, landmarks_B,
                                                            disc_L, gen_AB, adv_criterion)


        return adv_loss, idn_loss, cyc_loss, rec_loss
    else:
        return adv_loss, idn_loss, cyc_loss


##### Discriminator Loss #####
def get_disc_loss(real_X, fake_X, disc_X, adv_criterion):
    '''
    Return the loss of the discriminator given inputs.
    Parameters:
        real_X: the real images from pile X
        fake_X: the generated images of class X
        disc_X: the discriminator for class X; takes images and returns real/fake class X
            prediction matrices
        adv_criterion: the adversarial loss function; takes the discriminator
            predictions and the target labels and returns a adversarial
            loss (which you aim to minimize)
    '''
    # get fake loss
    disc_fake = disc_X(fake_X)
    disc_loss_fake = adv_criterion(disc_fake, torch.zeros_like(disc_fake))
    # get real loss
    disc_real = disc_X(real_X)
    disc_loss_real = adv_criterion(disc_real, torch.ones_like(disc_real))
    # average
    disc_loss = (disc_loss_fake + disc_loss_real) / 2
    return disc_loss


##### Discriminator Loss #####
def get_disc_loss_L(real_X, rec_X, landmarks_X, disc_LX, adv_criterion):
    '''
    Return the loss of the discriminator given inputs.
    Parameters:
        real_X: the real images from pile X
        rec_X : the reconstructed images of class X
        disc_X: the discriminator for class X; takes images and returns real/fake class X
            prediction matrices
        adv_criterion: the adversarial loss function; takes the discriminator
            predictions and the target labels and returns a adversarial
            loss (which you aim to minimize)
    '''
    # concatenate real and reconstructed with landmarks. along the channel dimension
    real_X = torch.cat((real_X, landmarks_X), 1)
    rec_X  = torch.cat((rec_X, landmarks_X), 1)

    # get reconstruction loss
    disc_rec = disc_LX(rec_X)
    disc_loss_rec = adv_criterion(disc_rec, torch.zeros_like(disc_rec))
    # get real loss
    disc_real = disc_LX(real_X)
    disc_loss_real = adv_criterion(disc_real, torch.ones_like(disc_real))
    # average
    disc_loss = (disc_loss_rec + disc_loss_real) / 2
    return disc_loss


################################################################################
# Inception-V3
################################################################################
## Creation ##
def get_inception_v3(device='cuda', pretrained=True):
    # create and download the model
    inception_model = inception_v3(pretrained=True)
    #inception_model.load_state_dict(torch.load("inception_v3_google-1a9a5a14.pth"))
    # put it on the device and set to EVAL mode
    inception_model.to(device)
    inception_model = inception_model.eval()
    # replace last layer with identity to avoid classification
    inception_model.fc = torch.nn.Identity()

    # return
    return inception_model

## Extraction ##
def inception_extraction(model, samples, device='cuda'):
    # return evaluated samples
    return model(samples.to(device)).detach().to('cpu')


## Loss ##
def inception_loss(model, loss, X, Y):
    # extract features
    X_features = inception_extraction(model, X)
    Y_features = inception_extraction(model, Y)

    # calculate loss on features
    return loss(X_features, Y_features)