-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlayers.jl
171 lines (145 loc) · 6.16 KB
/
layers.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
module layers
using Flux
using Flux: glorot_uniform
# Flux needs these methods to convert TrackedReals to Floats
# for faster matrix multiplies
Base.Float64(x::Flux.Tracker.TrackedReal{T}) where T <: Number = Float64(x.data)
Base.Float32(x::Flux.Tracker.TrackedReal{T}) where T <: Number = Float32(x.data)
"""A fully connected NN layer."""
struct Connected{F,S,T}
W::S # These types need to remain fairly dynamic because
b::T # it looks like Flux might change them around a little.
σ::F
end
Connected(W, b) = Connected(W, b, identity)
# We call param() on each thing we're training so Flux
# keeps track of computations that take place on those things,
# so it can perform backprop on them.
function Connected(inDim::Int, outDim::Int, σ::Function = identity; initW::Function = glorot_uniform, initb::Function = zeros)
return Connected(param(initW(outDim, inDim)), param(initb(outDim)), σ)
end
# This I believe enables all the things param() has
# been called on within the layer to be collected
# automatically just by calling params() on the model or
# the layer as a whole, rather than having to pass each
# collected item explicitly to params().
Flux.@treelike Connected
"""
Allows an instantiated `Connected` layer to be called as a function e.g.
```julia
myLayer = Connected(5,10,σ)
x = [1,2,3,4,5]
h = myLayer(x) # performs Wx + b
```
Performs forward propagation on the input array `x`.
"""
function (l::Connected)(x::AbstractArray)
W, b, σ = l.W, l.b, l.σ
return σ.(W*x .+ b)
end
# Try to avoid hitting generic matmul in some simple cases
# Base's matmul is so slow that it's worth the extra conversion to hit BLAS
(a::Connected{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
invoke(a, Tuple{AbstractArray}, x)
(a::Connected{<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
a(T.(x))
"""Helper function used by Convolution constructor(s)."""
function makeConvMat(filter::Int, xWidth::Int, xHeight::Int, init::Function)
W = init(filter, filter)
numColSteps = xWidth - filter + 1
numRowSteps = xHeight - filter + 1
convMat = zeros(Float32, numColSteps*numRowSteps, xWidth*xHeight)
convMatⱼ = 0
for convMatᵢ in 1:size(convMat)[1]
# Add a row to (convMat), the convolution transpose matrix
# which will contain the tracked weights (W) within it.
convMatⱼ = Int(floor((convMatᵢ-1) / numColSteps) * xWidth) + (convMatᵢ-1) % numRowSteps + 1
for Wᵢ in 1:filter
# Insert row Wᵢ of W into row convMatᵢ of convMat at the proper place.
convMat[convMatᵢ, convMatⱼ:(convMatⱼ+filter-1)] = W[Wᵢ, :]
convMatⱼ += xWidth
end
end
return param(convMat)
end
"""A convolutional NN layer"""
struct Convolution{F,A,V}
W::A
b::V
σ::F
filterDim::Int
xWidth::Int
xHeight::Int
end
function Convolution(filter::Int, xWidth::Int, xHeight::Int, σ = identity; init = glorot_uniform)
# Weights will have dimensions (filter x filter x inCh x outCh)
# i.e. there is a filter of weights for each input channel of each feature map.
# Each feature map only has one bias weight.
W = makeConvMat(filter, xWidth, xHeight, init)
b = param(0.)
return Convolution(W, b, σ, filter, xWidth, xHeight)
end
Flux.@treelike Convolution
"""
Performs forward convolution on the input array `x`. `x` should be
in the HWCN (height-width-channels-batchsize) format.
"""
function (c::Convolution)(x::AbstractArray)
if size(x)[1] != c.xHeight
throw(ArgumentError("Incoming array `x` must have $(c.xHeight) rows, not $(size(x)[1])"))
elseif size(x)[2] != c.xWidth
throw(ArgumentError("Incoming array `x` must have $(c.xWidth) columns, not $(size(x)[2])"))
end
# flatten each channel into a vector
flatX = reshape(x, size(x)[1] * size(x)[2])
# Compute the net and activate
return reshape(c.σ.(c.W * flatX .+ c.b), c.xHeight - c.filterDim + 1, c.xWidth - c.filterDim + 1)
end
"""A convolutional transpose NN layer"""
struct ConvolutionTranspose{F,A,V}
W::A # The Convolution Transpose Matrix
b::V
σ::F
filterDim::Int
inCh::Int
outCh::Int
xWidth::Int # dim 2 (columns) of incoming `x`
xHeight::Int # dim 1 (rows) of incoming `x`
end
function ConvolutionTranspose(filter::Int, inCh::Int, outCh::Int, xWidth::Int, xHeight::Int, σ = identity; init = glorot_uniform)
W = makeConvMat(filter, inCh, outCh, xWidth, xHeight, init)
W = permutedims(W, [2,1,3,4]) # Permute the convolution matrix to get the convolution transpose
# Per norm, each feature map only has one bias weight.
b = param(zeros(outCh))
return ConvolutionTranspose(W, b, σ, filter, inCh, outCh, xWidth, xHeight)
end
Flux.children(l::ConvolutionTranspose) = (l.W, l.b)
Flux.mapchildren(f, l::ConvolutionTranspose) = map(f, (l.W, l.b))
"""
Performs forward convolution transpose on the input array
`x`. `x` should be in the HWCN (height-width-channels-batchsize) format.
"""
function (c::ConvolutionTranspose)(x::AbstractArray)
if size(x)[1] != c.xHeight - c.filterDim + 1
throw(ArgumentError("Incoming array `x` must have $(c.xHeight - c.filterDim + 1) rows, not $(size(x)[1])"))
elseif size(x)[2] != c.xWidth - c.filterDim + 1
throw(ArgumentError("Incoming array `x` must have $(c.xWidth - c.filterDim + 1) columns, not $(size(x)[2])"))
end
batchSize = size(x)[4]
# flatten each channel into a vector
flatX = reshape(x, size(x)[1] * size(x)[2], size(x)[3], batchSize)
# Initialize the pre-activated feature map values
net = Array{Any}(undef, c.xHeight, c.xWidth, c.outCh, batchSize)
# Compute the nets
numRowSteps, numColSteps = size(net)[1:2]
for batchᵢ in 1:batchSize, featMap in 1:c.outCh
# Calculate and sum the nets across all input channels
chNets = sum(c.W[:, :, ch, featMap] * flatX[:, ch, batchᵢ] for ch in 1:c.inCh)
# Add the bias and store in net
net[:, :, featMap, batchᵢ] = reshape(chNets, size(net)[1], size(net)[2]) .+ c.b[featMap]
end
# Activate
return c.σ.(net)
end
export Connected, Convolution, ConvolutionTranspose
end # module layers