4.4 - CNN Architectures
Contents
4.4 - CNN Architectures¶
!wget -nc --no-cache -O init.py -q https://raw.githubusercontent.com/rramosp/2021.deeplearning/main/content/init.py
import init; init.init(force_download=False);
enabling encryption...
encryption enabled
replicating local resources
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%load_ext tensorboard
from sklearn.datasets import *
from local.lib import mlutils
tf.__version__
The tensorboard extension is already loaded. To reload it, use:
%reload_ext tensorboard
'2.4.1'
see
A Survey of the Recent Architectures of Deep Convolutional Neural Networks
Benchmark Analysis of Representative Deep Neural Network Architectures
vgg16 = tf.keras.applications.VGG16()
vgg16.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 224, 224, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 224, 224, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 112, 112, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 112, 112, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 112, 112, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 56, 56, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 56, 56, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 28, 28, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 14, 14, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 7, 7, 512) 0
_________________________________________________________________
flatten (Flatten) (None, 25088) 0
_________________________________________________________________
fc1 (Dense) (None, 4096) 102764544
_________________________________________________________________
fc2 (Dense) (None, 4096) 16781312
_________________________________________________________________
predictions (Dense) (None, 1000) 4097000
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________
Get some random image and transform it. We need the correct shape and pixel values
img_url = 'https://www.autocar.co.uk/sites/autocar.co.uk/files/styles/gallery_slide/public/images/car-reviews/first-drives/legacy/rrswbphev006.jpg?itok=9I7wPblq'
from skimage.io import imread
from skimage.transform import resize
def get_img(img_url):
img_fname = img_url.split("/")[-1]
!wget -nc $img_url
img = imread(img_fname)
simg = resize(img, output_shape=(224,224,3))
simg = ((simg-np.min(simg))/(np.max(simg)-np.min(simg))*255).astype(int)
simg = simg.reshape([1,*simg.shape])
return simg
simg = get_img(img_url)
print(simg.shape, np.min(simg), np.max(simg))
plt.imshow(simg[0])
--2021-02-14 19:57:46-- https://www.autocar.co.uk/sites/autocar.co.uk/files/styles/gallery_slide/public/images/car-reviews/first-drives/legacy/rrswbphev006.jpg?itok=9I7wPblq
Resolving www.autocar.co.uk (www.autocar.co.uk)... 13.226.49.122, 13.226.49.37, 13.226.49.22, ...
Connecting to www.autocar.co.uk (www.autocar.co.uk)|13.226.49.122|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 116781 (114K) [image/jpeg]
Saving to: ‘rrswbphev006.jpg?itok=9I7wPblq’
rrswbphev006.jpg?it 100%[===================>] 114.04K --.-KB/s in 0.05s
2021-02-14 19:57:46 (2.05 MB/s) - ‘rrswbphev006.jpg?itok=9I7wPblq’ saved [116781/116781]
(1, 224, 224, 3) 0 255
<matplotlib.image.AxesImage at 0x7f3bb93ae240>
make and decode prediction
output = vgg16.predict(simg)
output.shape
(1, 1000)
plt.plot(output[0])
[<matplotlib.lines.Line2D at 0x7f3bb932b518>]
np.argsort(output[0])[::-1][:5]
array([717, 609, 436, 656, 511])
from tensorflow.keras.applications.imagenet_utils import decode_predictions
decode_predictions(output)
[[('n03930630', 'pickup', 0.2946707),
('n03594945', 'jeep', 0.2187905),
('n02814533', 'beach_wagon', 0.14391266),
('n03770679', 'minivan', 0.105930716),
('n03100240', 'convertible', 0.078578584)]]
we can also use it as a feature extractor
inputs = vgg16.layers[0]
layer = vgg16.get_layer('fc1')
layer_output_fn = tf.keras.Model(inputs.input, layer.output)
layer_output_fn(simg)
<tf.Tensor: shape=(1, 4096), dtype=float32, numpy=
array([[0. , 0. , 0. , ..., 1.44261, 0. , 0. ]],
dtype=float32)>
and do other stuff (train other models, compare, etc.)
simg2 = get_img('https://specials-images.forbesimg.com/imageserve/5d61562368cb0a0008c04988/960x0.jpg?fit=scale')
simg3 = get_img('https://static01.nyt.com/images/2020/08/13/fashion/12VIRUS-CARS-eli/12VIRUS-CARS-eli-mobileMasterAt3x-v2.jpg')
simg4 = get_img('https://media-cdn.tripadvisor.com/media/photo-s/12/99/82/95/bellaire-house.jpg')
plt.figure(figsize=(15,6))
plt.subplot(141); plt.imshow(simg[0]); plt.axis("off")
plt.subplot(142); plt.imshow(simg2[0]); plt.axis("off")
plt.subplot(143); plt.imshow(simg3[0]); plt.axis("off")
plt.subplot(144); plt.imshow(simg4[0]); plt.axis("off")
File ‘960x0.jpg?fit=scale’ already there; not retrieving.
File ‘12VIRUS-CARS-eli-mobileMasterAt3x-v2.jpg’ already there; not retrieving.
--2021-02-14 19:59:26-- https://media-cdn.tripadvisor.com/media/photo-s/12/99/82/95/bellaire-house.jpg
Resolving media-cdn.tripadvisor.com (media-cdn.tripadvisor.com)... 151.101.2.38, 151.101.66.38, 151.101.130.38, ...
Connecting to media-cdn.tripadvisor.com (media-cdn.tripadvisor.com)|151.101.2.38|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58530 (57K) [image/jpeg]
Saving to: ‘bellaire-house.jpg’
bellaire-house.jpg 100%[===================>] 57.16K --.-KB/s in 0.01s
2021-02-14 19:59:26 (4.44 MB/s) - ‘bellaire-house.jpg’ saved [58530/58530]
(-0.5, 223.5, 223.5, -0.5)
feats = layer_output_fn(np.vstack((simg,simg2,simg3,simg4))).numpy()
feats.shape
(4, 4096)
import itertools
r = np.zeros((len(feats), len(feats)))
for i,j in itertools.product(range(len(feats)), range(len(feats))):
r[i,j] = np.mean(np.abs(feats[i]-feats[j]))
r
array([[0. , 2.57414365, 2.57649517, 2.30459738],
[2.57414365, 0. , 3.01039696, 2.43500757],
[2.57649517, 3.01039696, 0. , 2.78080606],
[2.30459738, 2.43500757, 2.78080606, 0. ]])
and, of course, the imagenet classification
preds = vgg16.predict(np.vstack((simg,simg2,simg3,simg4)))
decode_predictions(preds)
[[('n03930630', 'pickup', 0.29467037),
('n03594945', 'jeep', 0.21879068),
('n02814533', 'beach_wagon', 0.14391305),
('n03770679', 'minivan', 0.1059308),
('n03100240', 'convertible', 0.078578345)],
[('n04482393', 'tricycle', 0.4868581),
('n03791053', 'motor_scooter', 0.122519016),
('n03785016', 'moped', 0.06780141),
('n03534580', 'hoopskirt', 0.0479511),
('n02769748', 'backpack', 0.04404157)],
[('n02930766', 'cab', 0.64891493),
('n04252077', 'snowmobile', 0.048013847),
('n03459775', 'grille', 0.043748885),
('n03445924', 'golfcart', 0.0394534),
('n02704792', 'amphibian', 0.032926943)],
[('n02859443', 'boathouse', 0.41583353),
('n02825657', 'bell_cote', 0.17513582),
('n03028079', 'church', 0.08576325),
('n03930313', 'picket_fence', 0.06905515),
('n02793495', 'barn', 0.042880956)]]