FastVision
Image statistics computed on ImageNet. These are used as a default for normalizing RGB images during
ImagePreprocessing
.
const
IMAGENET_MEANS
=
SVector
{
3
,
Float32
}
(
0.485
,
0.456
,
0.406
)
const
IMAGENET_STDS
=
SVector
{
3
,
Float32
}
(
0.229
,
0.224
,
0.225
)
"""
ImageTensor{N} <: Block
Block for N+1-dimensional arrays representing an N-dimensional
image with the color channels expanded.
"""
struct
ImageTensor
{
N
}
<:
Block
nchannels
::
Int
end
function
checkblock
(
block
::
ImageTensor
{
N
}
,
a
::
AbstractArray
{
T
,
M
}
)
where
{
M
,
N
,
T
}
# Tensor has dimensionality one higher and color channels need to be the same
return
(
N
+
1
==
M
)
&&
(
size
(
a
,
M
)
==
block
.
nchannels
)
end
Base
.
summary
(
io
::
IO
,
::
ImageTensor
{
N
}
)
where
{
N
}
=
print
(
io
,
"
ImageTensor{
$
N
}
"
)
"""
ImagePreprocessing([; kwargs...]) <: Encoding
Encodes `Image`s by converting them to a common color type `C`,
expanding the color channels and normalizing the channel values.
Additionally, apply pixel-level augmentations passed in as `augmentations`
during `Training`.
Encodes
- `Image{N}` -> `ImageTensor{N}`
## Keyword arguments
- `augmentations::`[`DataAugmentation.Transform`](#): Augmentation to apply to every image
before preprocessing. See [`augs_lighting`](#)
- `buffered = true`: Whether to use inplace transformations. Reduces memory usage.
- `means::SVector = IMAGENET_MEANS`: mean value of each color channel.
- `stds::SVector = IMAGENET_STDS`: standard deviation of each color channel.
- `C::Type{<:Colorant} = RGB{N0f8}`: color type to convert images to.
- `T::Type{<:Real} = Float32`: element type of output
"""
struct
ImagePreprocessing
{
P
,
N
,
C
<:
Color
{
P
,
N
}
,
T
<:
Number
}
<:
Encoding
buffered
::
Bool
augmentations
::
DataAugmentation
.
Transform
stats
::
Tuple
{
SVector
{
N
}
,
SVector
{
N
}
}
tfms
::
Dict
{
Context
,
DataAugmentation
.
Transform
}
end
function
ImagePreprocessing
(
;
means
::
SVector
{
N
}
=
IMAGENET_MEANS
,
stds
::
SVector
{
N
}
=
IMAGENET_STDS
,
augmentations
=
Identity
(
)
,
C
::
Type
{
<:
Color
{
U
,
N
}
}
=
RGB
{
N0f8
}
,
T
=
Float32
,
buffered
=
true
)
where
{
N
,
U
}
# TODO: tensor of type T
stats
=
means
,
stds
basetfm
=
ToEltype
(
C
)
|>
ImageToTensor
{
T
}
(
)
|>
Normalize
(
means
,
stds
)
if
buffered
tfms
=
Dict
(
Training
(
)
=>
BufferedThreadsafe
(
augmentations
|>
basetfm
)
,
Validation
(
)
=>
BufferedThreadsafe
(
basetfm
)
,
# Inference transform is not buffered since it can have
# varying sizes
Inference
(
)
=>
basetfm
)
else
tfms
=
Dict
(
Training
(
)
=>
augmentations
|>
basetfm
,
Validation
(
)
=>
basetfm
,
Inference
(
)
=>
basetfm
)
end
return
ImagePreprocessing
{
U
,
N
,
C
,
T
}
(
buffered
,
augmentations
,
stats
,
tfms
)
end
colorchannels
(
C
::
Type
{
<:
Color
{
T
,
N
}
}
)
where
{
T
,
N
}
=
N
function
encodedblock
(
ip
::
ImagePreprocessing
{
P
,
M
,
C
}
,
::
Image
{
N
}
)
where
{
P
,
M
,
C
,
N
}
return
ImageTensor
{
N
}
(
colorchannels
(
C
)
)
end
function
encode
(
ip
::
ImagePreprocessing
,
context
,
block
::
Image
,
obs
)
return
copy
(
apply
(
ip
.
tfms
[
context
]
,
DataAugmentation
.
Image
(
obs
)
)
|>
itemdata
)
end
decodedblock
(
::
ImagePreprocessing
,
::
ImageTensor
{
N
}
)
where
{
N
}
=
Image
{
N
}
(
)
function
decode
(
ip
::
ImagePreprocessing
,
context
,
block
::
ImageTensor
,
obs
)
means
,
stds
=
ip
.
stats
return
copy
(
DataAugmentation
.
tensortoimage
(
DataAugmentation
.
denormalize
(
obs
,
means
,
stds
)
)
)
end
"""
imagestats(image, C)
Compute the color channel-wise means and standard deviations of all pixels.
`image` is converted to color type `C` (e.g. `RGB{N0f8}`, `Gray{N0f8}`)
before statistics are calculated.
"""
function
imagestats
(
img
::
AbstractArray
{
T
,
N
}
,
C
)
where
{
T
,
N
}
imt
=
DataAugmentation
.
imagetotensor
(
map
(
x
->
convert
(
C
,
x
)
,
img
)
)
means
=
reshape
(
mean
(
imt
;
dims
=
1
:
N
)
,
:
)
stds
=
reshape
(
std
(
imt
;
dims
=
1
:
N
)
,
:
)
return
means
,
stds
end
"""
imagedatasetstats(data, C[; parallel, progress])
Given a data container of images `data`, compute the color channel-wise means
and standard deviations across all observations. Images are converted to color type
`C` (e.g. `RGB{N0f8}`, `Gray{N0f8}`) before statistics are calculated.
If `progress = true`, show a progress bar.
"""
function
imagedatasetstats
(
data
,
C
;
progress
=
true
)
means
,
stds
=
imagestats
(
getobs
(
data
,
1
)
,
C
)
loaderfn
=
d
->
eachobs
(
d
,
parallel
=
true
,
buffer
=
false
)
p
=
Progress
(
numobs
(
data
)
,
enabled
=
progress
)
for
(
means_
,
stds_
)
in
mapobs
(
img
->
imagestats
(
img
,
C
)
,
data
)
|>
loaderfn
means
.+=
means_
stds
.+=
stds_
next!
(
p
)
end
return
means
./
numobs
(
data
)
,
stds
./
numobs
(
data
)
end
function
setup
(
::
Type
{
ImagePreprocessing
}
,
::
Image
,
data
;
C
=
RGB
{
N0f8
}
,
progress
=
false
,
kwargs
...
)
means
,
stds
=
imagedatasetstats
(
data
,
C
;
progress
=
progress
)
return
ImagePreprocessing
(
;
means
=
SVector
{
length
(
means
)
}
(
means
)
,
stds
=
SVector
{
length
(
means
)
}
(
stds
)
,
C
=
C
,
kwargs
...
)
end
Augmentation helper
"""
augs_lighting([; intensity = 0.2, p = 0.75])
Helper to create a set of lighting transformations for image data. With
probability `p`, applies [`AdjustBrightness`](#)`(intensity)` and
[`AdjustContrast`](#)`(intensity)`.
"""
function
augs_lighting
(
;
intensity
=
0.2
,
p
=
0.75
)
return
Maybe
(
AdjustBrightness
(
intensity
)
,
p
)
|>
Maybe
(
AdjustContrast
(
intensity
)
,
p
)
end
Pretty-printing
function
Base
.
show
(
io
::
IO
,
ip
::
ImagePreprocessing
{
P
,
N
,
C
,
T
}
)
where
{
P
,
N
,
C
,
T
}
show
(
io
,
ShowTypeOf
(
ip
)
)
fields
=
(
buffered
=
ShowLimit
(
ip
.
buffered
,
limit
=
80
)
,
augmentations
=
ShowLimit
(
ip
.
augmentations
,
limit
=
80
)
)
show
(
io
,
ShowProps
(
fields
,
new_lines
=
true
)
)
end
@
testset
"
ImagePreprocessing [encoding]
"
begin
encfns
=
[
(
)
->
ImagePreprocessing
(
)
,
(
)
->
ImagePreprocessing
(
buffered
=
false
)
,
(
)
->
ImagePreprocessing
(
T
=
Float64
)
,
(
)
->
ImagePreprocessing
(
augmentations
=
augs_lighting
(
)
)
,
(
)
->
ImagePreprocessing
(
C
=
Gray
{
N0f8
}
,
means
=
SVector
(
0.0
)
,
stds
=
SVector
(
1.0
)
)
,
]
for
encfn
in
encfns
enc
=
encfn
(
)
block
=
Image
{
2
}
(
)
img
=
rand
(
RGB
{
N0f8
}
,
10
,
10
)
testencoding
(
enc
,
block
,
img
)
ctx
=
Validation
(
)
outblock
=
encodedblock
(
enc
,
block
)
a
=
encode
(
enc
,
ctx
,
block
,
img
)
rimg
=
decode
(
enc
,
ctx
,
outblock
,
a
)
if
eltype
(
rimg
)
<:
RGB
@
test
img
≈
rimg
end
end
@
testset
"
3D
"
begin
enc
=
ImagePreprocessing
(
)
block
=
Image
{
3
}
(
)
img
=
rand
(
RGB
{
N0f8
}
,
10
,
10
,
10
)
FastAI
.
testencoding
(
enc
,
block
,
img
)
enc
=
ImagePreprocessing
(
buffered
=
false
)
block
=
Image
{
3
}
(
)
img
=
rand
(
RGB
{
N0f8
}
,
10
,
10
,
10
)
FastAI
.
testencoding
(
enc
,
block
,
img
)
end
@
testset
"
imagedatasetstats
"
begin
@
testset
"
RGB
"
begin
data
=
[
zeros
(
RGB
{
Float32
}
,
10
,
10
)
,
ones
(
RGB
{
Float32
}
,
10
,
10
)
]
means
,
stds
=
imagedatasetstats
(
data
,
RGB
{
N0f8
}
;
progress
=
false
)
@
test
means
≈
[
0.5
,
0.5
,
0.5
]
@
test
stds
≈
[
0.0
,
0.0
,
0.0
]
end
@
testset
"
Gray
"
begin
data
=
[
zeros
(
Gray
{
Float32
}
,
10
,
10
)
,
ones
(
Gray
{
Float32
}
,
10
,
10
)
]
means
,
stds
=
imagedatasetstats
(
data
,
Gray
{
N0f8
}
;
progress
=
false
)
@
test
means
≈
[
0.5
]
@
test
stds
≈
[
0.0
]
end
end
@
testset
"
setup
"
begin
data
=
[
zeros
(
10
,
10
)
,
ones
(
10
,
10
)
,
]
enc
=
setup
(
ImagePreprocessing
,
Image
{
2
}
(
)
,
data
,
C
=
Gray
{
N0f8
}
)
@
test
enc
.
stats
[
1
]
≈
[
0.5
]
@
test
enc
.
stats
[
2
]
≈
[
0.0
]
end
end