AlexNet-Source-Code/multisoft.py at main · Huliangyi/AlexNet-Source-Code · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import sys
import numpy as np
import numpy.random as nr
from math import exp, log

def sumprod_brute(elts, size, fixed = -1):
    if size > len(elts):
        return 0
    if fixed >= 0 and fixed < len(elts):
        if size == 0:
            return 0
        z = 0
        for s in xrange(size):
            z += sumprod_brute(elts[:fixed], s) * sumprod_brute(elts[fixed+1:], size - 1 - s)
        return z * exp(elts[fixed])
    if size == 0:
        return 1
    return exp(elts[0]) * sumprod_brute(elts[1:], size - 1) + sumprod_brute(elts[1:], size)

# Returns sum over all subsets of given size of product
# of exp of elements.
# Also returns, for each index, the said sum given that the element
# at that index is in the subset.
def sumprod(elts, size, fixed = -1):
    N = len(elts)
    B = np.zeros((size + 1, N + 1)) # Backward lattice
    B[0, :] = 1
    if fixed >= 0:
        B[0, :fixed+1] = 0
    logBNorms = np.zeros(N+2)
    # Backward pass
    for i in xrange(N - 1, -1, -1):
        B[0,i] = B[0,i+1]
        #B[1, i] = exp(LogBNorms[i+2] + elts[i]) + B[1, i + 1] * (fixed != i)
        for s in xrange(1, size + 1):
            B[s, i] = B[s - 1, i + 1] * exp(elts[i]) + B[s, i + 1] * (fixed != i)
        norm = B[:,i].sum()
        B[:,i] /= norm
        logBNorms[i] = log(norm) + logBNorms[i+1]

    #print LogBNorms
    # Log partition function
    #print B
    #print B * np.exp(LogBNorms)
    logZ = log(B[size, 0]) + logBNorms[0]
    #print logZ; sys.exit()
    F = np.zeros((size + 1,)) # Forward column
    F[0] = 1
    # Forward pass
    # Compute z_j for each j (unnormalized prob)
    z = np.zeros(N)
    logFNorm = 0
    for i in xrange(1, N + 1):
        for s in xrange(size, -1, -1):
            if s < size:
                z[i - 1] += F[s] * B[size - 1 - s, i]
            if s > 0:
                F[s] = F[s - 1] * exp(elts[i - 1]) + F[s] * (fixed != i - 1)
            elif fixed == i - 1:
                F[0] = 0
        norm = F.sum()
        F /= norm
        z[i - 1] *= exp(elts[i - 1] + logBNorms[i] + logFNorm - logZ)
        logFNorm += log(norm)
    return z, 1

# Checks the gradient with respect to the objective
# E = log(y_i)
# where y_i = z_i/Z and i = the index of the correct label
def check_grad(elts, size, correct=0):
    eps = 0.01
    N = len(elts)
    z, Z = sumprod(elts, size)
    cz, CZ = sumprod(elts, size, fixed=correct)

    y = z / Z
    Cy = cz / CZ
    grad = Cy - y
    print "Analytic gradient: "
    print grad

    grad_num = np.zeros_like(grad)
    for i in xrange(N):
        tmp = elts[i]
        elts[i] += eps
        z, Z = sumprod(elts, size)
        y_n = z / Z
        grad_num[i] = (log(y_n[correct]) - log(y[correct])) / eps
        elts[i] = tmp
    print "Numeric gradient: "
    print grad_num

if __name__ == "__main__":
    nr.seed(2)
    N = 5 # The number of outputs in the softmax
    size = 2 # The size of the multisoft set
    fixed = 2 # Force this index to be on (negative = don't)
    elts = nr.randn(N)
    elts -= elts.max()
    print elts

    dp_z, dp_Z = sumprod(elts, size, fixed=fixed)
    bf_Z = sumprod_brute(elts, size, fixed=fixed)
    print "Brute force Z: %f" % bf_Z
    print "DP Z: %f" % dp_Z

    print "Brute force z/Z:"
    bf_z = np.zeros(N)
    for i in xrange(N):
        for s in xrange(size):
            bf_z[i] += sumprod_brute(elts[:i], s, fixed=fixed) * sumprod_brute(elts[i+1:], size - 1 - s, fixed=fixed-i-1)
        bf_z[i] *= exp(elts[i])

    print bf_z / bf_Z

    print "DP z/Z:"
    print dp_z / dp_Z

    check_grad(elts, size, correct=1)