class: center, middle, inverse, title-slide # Lec 23 - pytorch - optim & nn ##
Statistical Computing and Computation ### Sta 663 | Spring 2022 ###
Dr. Colin Rundel --- exclude: true ```python import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import scipy import statsmodels.api as sm import statsmodels.formula.api as smf import torch import os import math plt.rcParams['figure.dpi'] = 200 np.set_printoptions( edgeitems=30, linewidth=200, precision = 5, suppress=True #formatter=dict(float=lambda x: "%.5g" % x) ) pd.set_option("display.width", 130) pd.set_option("display.max_columns", 10) pd.set_option("display.precision", 6) ``` ```r knitr::opts_chunk$set( fig.align="center", cache=FALSE ) library(lme4) ``` ``` ## Loading required package: Matrix ``` ```r local({ hook_err_old <- knitr::knit_hooks$get("error") # save the old hook knitr::knit_hooks$set(error = function(x, options) { # now do whatever you want to do with x, and pass # the new x to the old hook x = sub("## \n## Detailed traceback:\n.*$", "", x) x = sub("Error in py_call_impl\\(.*?\\)\\: ", "", x) #x = stringr::str_wrap(x, width = 100) hook_err_old(x, options) }) hook_warn_old <- knitr::knit_hooks$get("warning") # save the old hook knitr::knit_hooks$set(warning = function(x, options) { x = sub("<string>:1: ", "", x) #x = stringr::str_wrap(x, width = 100) hook_warn_old(x, options) }) hook_msg_old <- knitr::knit_hooks$get("output") # save the old hook knitr::knit_hooks$set(output = function(x, options) { x = stringr::str_replace(x, "(## ).* ([A-Za-z]+Warning:)", "\\1\\2") x = stringr::str_split(x, "\n")[[1]] #x = stringr::str_wrap(x, width = 120, exdent = 3) x = stringr::str_remove_all(x, "\r") x = stringi::stri_wrap(x, width=120, exdent = 3, normalize=FALSE) x = paste(x, collapse="\n") #x = stringr::str_wrap(x, width = 100) hook_msg_old(x, options) }) }) ``` --- class: center, middle # Demo 2 - Using a model --- ## A sample model ```python class Model(torch.nn.Module): def __init__(self, X, y, beta=None): super().__init__() self.X = X self.y = y if beta is None: beta = torch.zeros(X.shape[1]) beta.requires_grad = True self.beta = torch.nn.Parameter(beta) def forward(self, X): return X @ self.beta def fit(self, opt, n=1000, loss_fn = torch.nn.MSELoss()): losses = [] for i in range(n): loss = loss_fn(self(self.X).squeeze(), self.y.squeeze()) loss.backward() opt.step() opt.zero_grad() losses.append(loss.item()) return losses ``` --- ## Fitting .pull-left[ ```python x = torch.linspace(-math.pi, math.pi, 200) y = torch.sin(x) X = torch.vstack(( torch.ones_like(x), x, x**2, x**3 )).T m = Model(X, y) opt = torch.optim.SGD(m.parameters(), lr=1e-3) losses =, n=10000) m.beta.detach() ``` ``` ## tensor([-1.0707e-08, 8.5455e-01, 3.9629e-09, -9.2817e-02]) ``` ] .pull-right[ ```python plt.figure(figsize=(8,6), layout="constrained") plt.plot(losses) ``` <img src="Lec23_files/figure-html/unnamed-chunk-3-1.png" width="768" style="display: block; margin: auto;" /> ] --- ## Learning rate and convergence .pull-left[ ```python plt.figure(figsize=(8,6), layout="constrained") for lr in [1e-3, 1e-4, 1e-5, 1e-6]: m = Model(X, y) opt = torch.optim.SGD(m.parameters(), lr=lr) losses =, n=10000) plt.plot(losses, label=f"lr = {lr}") plt.legend() ``` ] .pull-right[ <img src="Lec23_files/figure-html/unnamed-chunk-4-3.png" width="768" style="display: block; margin: auto;" /> ] --- ## Momentum and convergence .pull-left[ ```python plt.figure(figsize=(8,6), layout="constrained") for mt in [0, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99]: m = Model(X, y) opt = torch.optim.SGD(m.parameters(), lr=1e-4, momentum=mt) losses =, n=10000) plt.plot(losses, label=f"momentum = {mt}") plt.legend() ``` ] .pull-right[ <img src="Lec23_files/figure-html/unnamed-chunk-5-5.png" width="768" style="display: block; margin: auto;" /> ] --- ## Optimizers and convergence .pull-left[ ```python plt.figure(figsize=(8,6), layout="constrained") opts = (torch.optim.SGD, torch.optim.Adam, torch.optim.Adagrad) for opt_fn in opts: m = Model(X, y) opt = opt_fn(m.parameters(), lr=1e-4) losses =, n=10000) plt.plot(losses, label=f"opt = {opt_fn}") plt.legend() ``` ] .pull-right[ <img src="Lec23_files/figure-html/unnamed-chunk-6-7.png" width="768" style="display: block; margin: auto;" /> ] --- class: center, middle ## MNIST & Logistic models --- ## MNIST handwritten digits - simplified ```python from sklearn.datasets import load_digits digits = load_digits() ``` .pull-left[ .small[ ```python X = X.shape ``` ``` ## (1797, 64) ``` ```python X[0:3] ``` ``` ## array([[ 0., 0., 5., 13., 9., 1., 0., 0., 0., 0., 13., 15., 10., 15., 5., 0., 0., 3., 15., 2., 0., 11., 8., 0., 0., 4., 12., 0., 0., 8., 8., 0., 0., 5., 8., 0., 0., 9., ## 8., 0., 0., 4., 11., 0., 1., 12., 7., 0., 0., 2., 14., 5., 10., 12., 0., 0., 0., 0., 6., 13., 10., 0., 0., 0.], ## [ 0., 0., 0., 12., 13., 5., 0., 0., 0., 0., 0., 11., 16., 9., 0., 0., 0., 0., 3., 15., 16., 6., 0., 0., 0., 7., 15., 16., 16., 2., 0., 0., 0., 0., 1., 16., 16., 3., ## 0., 0., 0., 0., 1., 16., 16., 6., 0., 0., 0., 0., 1., 16., 16., 6., 0., 0., 0., 0., 0., 11., 16., 10., 0., 0.], ## [ 0., 0., 0., 4., 15., 12., 0., 0., 0., 0., 3., 16., 15., 14., 0., 0., 0., 0., 8., 13., 8., 16., 0., 0., 0., 0., 1., 6., 15., 11., 0., 0., 0., 1., 8., 13., 15., 1., ## 0., 0., 0., 9., 16., 16., 5., 0., 0., 0., 0., 3., 13., 16., 16., 11., 5., 0., 0., 0., 0., 3., 11., 16., 9., 0.]]) ``` ] ] .pull-right[ .small[ ```python y = y.shape ``` ``` ## (1797,) ``` ```python y[0:10] ``` ``` ## array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) ``` ] ] --- ## Example digits <img src="Lec23_files/figure-html/unnamed-chunk-10-9.png" width="85%" style="display: block; margin: auto;" /> --- ## Test train split ```python from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.20, shuffle=True, random_state=1234 ) ``` .pull-left[ ```python X_train.shape ``` ``` ## (1437, 64) ``` ```python y_train.shape ``` ``` ## (1437,) ``` ```python X_test.shape ``` ``` ## (360, 64) ``` ```python y_test.shape ``` ``` ## (360,) ``` ] -- .pull-right[ ```python from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score lr = LogisticRegression(penalty='none').fit(X_train, y_train) accuracy_score(y_train, lr.predict(X_train)) ``` ``` ## 1.0 ``` ```python accuracy_score(y_test, lr.predict(X_test)) ``` ``` ## 0.9583333333333334 ``` ] --- ## As Tensors .pull-left[ ```python X_train = torch.from_numpy(X_train).float() y_train = torch.from_numpy(y_train) X_test = torch.from_numpy(X_test).float() y_test = torch.from_numpy(y_test) ``` ] .pull-right[ ```python X_train.shape ``` ``` ## torch.Size([1437, 64]) ``` ```python y_train.shape ``` ``` ## torch.Size([1437]) ``` ```python X_test.shape ``` ``` ## torch.Size([360, 64]) ``` ```python y_test.shape ``` ``` ## torch.Size([360]) ``` ] --- ## PyTorch Model ```python class mnist_model(torch.nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.beta = torch.nn.Parameter( torch.randn(input_dim, output_dim, requires_grad=True) ) self.intercept = torch.nn.Parameter( torch.randn(output_dim, requires_grad=True) ) def forward(self, X): return (X @ self.beta + self.intercept).squeeze() def fit(self, X_train, y_train, X_test, y_test, lr=0.001, n=1000): opt = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9) losses = [] for i in range(n): opt.zero_grad() loss = torch.nn.CrossEntropyLoss()(self(X_train), y_train) loss.backward() opt.step() losses.append(loss.item()) return losses ``` --- ## Cross entropy loss ```python m = mnist_model(64, 10) l =, y_train, X_test, y_test) ``` ```python plt.figure(figsize=(8,6), layout="constrained") plt.plot(l) ``` <img src="Lec23_files/figure-html/unnamed-chunk-18-11.png" width="45%" style="display: block; margin: auto;" /> --- ## Out of sample accuracy .pull-left[ ```python m(X_test) ``` ``` ## tensor([[-60.8759, -17.2256, -9.1371, ..., 34.9462, -32.5647, -27.0246], ## [ -1.5938, 72.1707, -43.8905, ..., -12.6269, 41.7249, 62.3089], ## [-68.0310, -44.0990, -16.7110, ..., 59.5446, -23.9124, -29.6518], ## ..., ## [ 53.7474, 35.9603, -1.8724, ..., -59.9388, 33.7026, 30.7364], ## [-63.3765, 11.4854, -83.6133, ..., 37.6233, -4.3290, 12.5134], ## [-34.7887, -6.8412, 0.2186, ..., -16.7075, 24.0493, 2.7638]], ## grad_fn=<SqueezeBackward0>) ``` ```python val, index = torch.max(m(X_test), dim=1) ``` ```python index ``` ``` ## tensor([7, 1, 7, 6, 0, 2, 6, 3, 6, 3, 7, 8, 7, 9, 4, 3, 1, 7, 8, 4, 0, 3, 9, 2, ## 3, 6, 6, 0, 5, 4, 1, 8, 1, 2, 3, 2, 7, 6, 4, 8, 6, 4, 4, 0, 9, 1, 8, 5, ## 4, 1, 4, 1, 7, 6, 8, 2, 9, 9, 8, 0, 8, 3, 1, 8, 8, 1, 3, 9, 1, 3, 9, 6, ## 9, 5, 2, 1, 9, 2, 1, 3, 8, 7, 3, 3, 1, 8, 7, 5, 1, 2, 6, 1, 9, 1, 6, 4, ## 5, 2, 2, 4, 5, 4, 7, 6, 5, 7, 2, 4, 1, 0, 7, 6, 1, 2, 9, 5, 2, 5, 0, 3, ## 2, 7, 6, 4, 3, 2, 1, 1, 6, 4, 6, 2, 7, 4, 7, 5, 0, 9, 1, 0, 5, 6, 7, 6, ## 3, 8, 3, 2, 0, 4, 0, 8, 5, 4, 6, 1, 1, 1, 6, 8, 7, 9, 0, 7, 9, 5, 4, 1, ## 3, 8, 6, 4, 7, 1, 5, 7, 4, 7, 4, 3, 2, 2, 1, 1, 4, 4, 3, 5, 5, 9, 4, 5, ## 5, 9, 3, 9, 0, 1, 2, 0, 8, 2, 8, 9, 2, 4, 6, 8, 3, 3, 1, 0, 8, 1, 8, 3, ## 6, 8, 7, 1, 8, 0, 4, 9, 7, 0, 5, 5, 6, 1, 3, 0, 5, 8, 2, 0, 9, 5, 6, 7, ## 8, 4, 1, 0, 5, 2, 5, 1, 6, 1, 7, 1, 2, 6, 4, 4, 6, 3, 3, 3, 2, 6, 5, 2, ## 9, 6, 7, 0, 1, 0, 4, 3, 1, 2, 7, 9, 8, 5, 9, 5, 7, 0, 0, 8, 4, 9, 4, 0, ## 7, 7, 3, 5, 3, 5, 3, 9, 7, 9, 6, 2, 7, 4, 8, 9, 1, 7, 9, 8, 5, 0, 8, 0, ## 1, 7, 0, 9, 5, 5, 9, 6, 1, 2, 3, 3, 6, 3, 2, 9, 3, 0, 3, 4, 1, 6, 1, 8, ## 5, 0, 9, 2, 7, 2, 3, 5, 2, 6, 3, 4, 1, 5, 0, 5, 4, 6, 3, 2, 5, 0, 7, 3]) ``` ] .pull-right[ ```python (index == y_test).sum() ``` ``` ## tensor(324) ``` ```python (index == y_test).sum() / len(y_test) ``` ``` ## tensor(0.9000) ``` ] --- ## Calculating Accuracy .small[ ```python class mnist_model(torch.nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.beta = torch.nn.Parameter( torch.randn(input_dim, output_dim, requires_grad=True) ) self.intercept = torch.nn.Parameter( torch.randn(output_dim, requires_grad=True) ) def forward(self, X): return (X @ self.beta + self.intercept).squeeze() def fit(self, X_train, y_train, X_test, y_test, lr=0.001, n=1000, acc_step=10): opt = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9) losses, train_acc, test_acc = [], [], [] for i in range(n): opt.zero_grad() loss = torch.nn.CrossEntropyLoss()(self(X_train), y_train) loss.backward() opt.step() losses.append(loss.item()) if (i+1) % acc_step == 0: val, train_pred = torch.max(self(X_train), dim=1) val, test_pred = torch.max(self(X_test), dim=1) train_acc.append( (train_pred == y_train).sum() / len(y_train) ) test_acc.append( (test_pred == y_test).sum() / len(y_test) ) return (losses, train_acc, test_acc) ``` ] --- ## Performance ```python loss, train_acc, test_acc = mnist_model(64, 10).fit(X_train, y_train, X_test, y_test,acc_step=10, n=3000) ``` ```python plt.figure(figsize=(8,6), layout="constrained") plt.plot(train_acc, label="train accuracy") plt.plot(test_acc, label="test accuracy") plt.legend() ``` <img src="Lec23_files/figure-html/unnamed-chunk-24-13.png" width="45%" style="display: block; margin: auto;" /> --- ## NN Layers ```python class mnist_nn_model(torch.nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.linear = torch.nn.Linear(input_dim, output_dim) def forward(self, X): return self.linear(X) def fit(self, X_train, y_train, X_test, y_test, lr=0.001, n=1000, acc_step=10): opt = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9) losses, train_acc, test_acc = [], [], [] for i in range(n): opt.zero_grad() loss = torch.nn.CrossEntropyLoss()(self(X_train), y_train) loss.backward() opt.step() losses.append(loss.item()) if (i+1) % acc_step == 0: val, train_pred = torch.max(self(X_train), dim=1) val, test_pred = torch.max(self(X_test), dim=1) train_acc.append( (train_pred == y_train).sum() / len(y_train) ) test_acc.append( (test_pred == y_test).sum() / len(y_test) ) return (losses, train_acc, test_acc) ``` --- ## Linear layer parameters ```python m = mnist_nn_model(64, 10) m.parameters() ``` ``` ## <generator object Module.parameters at 0x2a5a1cba0> ``` ```python len(list(m.parameters())) ``` ``` ## 2 ``` ```python list(m.parameters())[0].shape ``` ``` ## torch.Size([10, 64]) ``` ```python list(m.parameters())[1].shape ``` ``` ## torch.Size([10]) ``` -- Applies a linear transform to the incoming data: `$$y = x A^T+b$$` --- ## Performance ```python loss, train_acc, test_acc =, y_train, X_test, y_test, n=1500) ``` <img src="Lec23_files/figure-html/unnamed-chunk-28-15.png" width="1152" style="display: block; margin: auto;" /> --- class: center, middle ## Feedforward Neural Network --- ## FNN Model .small[ ```python class mnist_fnn_model(torch.nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, nl_step = torch.nn.ReLU(), seed=1234): super().__init__() self.l1 = torch.nn.Linear(input_dim, hidden_dim) = nl_step self.l2 = torch.nn.Linear(hidden_dim, output_dim) def forward(self, X): out = self.l1(X) out = out = self.l2(out) return out def fit(self, X_train, y_train, X_test, y_test, lr=0.001, n=1000, acc_step=10): opt = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9) losses, train_acc, test_acc = [], [], [] for i in range(n): opt.zero_grad() loss = torch.nn.CrossEntropyLoss()(self(X_train), y_train) loss.backward() opt.step() losses.append(loss.item()) if (i+1) % acc_step == 0: val, train_pred = torch.max(self(X_train), dim=1) val, test_pred = torch.max(self(X_test), dim=1) train_acc.append( (train_pred == y_train).sum() / len(y_train) ) test_acc.append( (test_pred == y_test).sum() / len(y_test) ) return (losses, train_acc, test_acc) ``` ] --- ## Model parameters ```python m = mnist_fnn_model(64,64,10) len(list(m.parameters())) ``` ``` ## 4 ``` ```python for i, p in enumerate(m.parameters()): print("Param", i, p.shape) ``` ``` ## Param 0 torch.Size([64, 64]) ## Param 1 torch.Size([64]) ## Param 2 torch.Size([10, 64]) ## Param 3 torch.Size([10]) ``` --- ## Performance - ReLU ```python loss, train_acc, test_acc = mnist_fnn_model(64,64,10).fit( X_train, y_train, X_test, y_test, n=2000 ) test_acc[-5:] ``` ``` ## [tensor(0.9750), tensor(0.9750), tensor(0.9750), tensor(0.9750), tensor(0.9750)] ``` <img src="Lec23_files/figure-html/unnamed-chunk-32-17.png" width="75%" style="display: block; margin: auto;" /> --- ## Non-linear activation functions .pull-left[ .small[ `$$\text{Tanh}(x) = \frac{\exp(x)-\exp(-x)}{\exp(x) + \exp(-x)}$$` <img src="imgs/torch_Tanh.png" width="55%" style="display: block; margin: auto;" /> `$$\text{Sigmoid}(x) = \frac{1}{1+\exp(-x)}$$` <img src="imgs/torch_Sigmoid.png" width="55%" style="display: block; margin: auto;" /> ] ] .pull-right[ .small[ `$$\text{ReLU}(x) = \max(0,x)$$` <img src="imgs/torch_ReLU.png" width="55%" style="display: block; margin: auto;" /> ] ] --- ## Performance - tanh ```python loss, train_acc, test_acc = mnist_fnn_model(64,64,10, nl_step=torch.nn.Tanh()).fit( X_train, y_train, X_test, y_test, n=2000 ) test_acc[-5:] ``` ``` ## [tensor(0.9694), tensor(0.9694), tensor(0.9694), tensor(0.9694), tensor(0.9694)] ``` <img src="Lec23_files/figure-html/unnamed-chunk-37-1.png" width="75%" style="display: block; margin: auto;" /> --- ## Performance - Sigmoid ```python loss, train_acc, test_acc = mnist_fnn_model(64,64,10, nl_step=torch.nn.Sigmoid()).fit( X_train, y_train, X_test, y_test, n=2000 ) test_acc[-5:] ``` ``` ## [tensor(0.9556), tensor(0.9556), tensor(0.9556), tensor(0.9556), tensor(0.9556)] ``` <img src="Lec23_files/figure-html/unnamed-chunk-39-3.png" width="75%" style="display: block; margin: auto;" /> --- ## Multilayer FNN Model .small[ ```python class mnist_fnn2_model(torch.nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, nl_step = torch.nn.ReLU(), seed=1234): super().__init__() self.l1 = torch.nn.Linear(input_dim, hidden_dim) = nl_step self.l2 = torch.nn.Linear(hidden_dim, hidden_dim) = nl_step self.l3 = torch.nn.Linear(hidden_dim, output_dim) def forward(self, X): out = self.l1(X) out = out = self.l2(out) out = out = self.l3(out) return out def fit(self, X_train, y_train, X_test, y_test, lr=0.001, n=1000, acc_step=10): loss_fn = torch.nn.CrossEntropyLoss() opt = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9) losses, train_acc, test_acc = [], [], [] for i in range(n): opt.zero_grad() loss = loss_fn(self(X_train), y_train) loss.backward() opt.step() losses.append(loss.item()) if (i+1) % acc_step == 0: val, train_pred = torch.max(self(X_train), dim=1) val, test_pred = torch.max(self(X_test), dim=1) train_acc.append( (train_pred == y_train).sum() / len(y_train) ) test_acc.append( (test_pred == y_test).sum() / len(y_test) ) return (losses, train_acc, test_acc) ``` ] --- ## Performance ```python loss, train_acc, test_acc = mnist_fnn2_model(64,64,10, nl_step=torch.nn.ReLU()).fit( X_train, y_train, X_test, y_test, n=1000 ) test_acc[-5:] ``` ``` ## [tensor(0.9611), tensor(0.9611), tensor(0.9611), tensor(0.9611), tensor(0.9611)] ``` <img src="Lec23_files/figure-html/unnamed-chunk-42-5.png" width="75%" style="display: block; margin: auto;" /> --- class: center, middle ## Convolutional NN --- ## 2d convolutions .footnote[[Source](] .pull-left[ <img src="imgs/tds_2dconv.gif" style="display: block; margin: auto;" /> ] -- .pull-right[ <img src="imgs/tds_2dconv2.gif" style="display: block; margin: auto;" /> ] --- ## `nn.Conv2d()` ```python cv = torch.nn.Conv2d( in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=1 ) ``` -- ```python list(cv.parameters()) ``` ``` ## [Parameter containing: ## tensor([[[[ 0.3185, -0.1251, 0.0127], ## [ 0.2319, 0.1078, 0.0828], ## [-0.2651, 0.1484, -0.0063]]], ## ## ## [[[-0.3191, 0.1816, -0.0369], ## [ 0.0180, 0.0299, -0.0892], ## [ 0.2278, -0.2236, 0.2685]]], ## ## ## [[[ 0.0027, -0.3022, 0.0216], ## [-0.0097, -0.1375, 0.0558], ## [ 0.0894, 0.1148, -0.2137]]], ## ## ## [[[-0.0667, 0.0374, -0.1312], ## [ 0.1726, 0.1751, -0.2822], ## [-0.1157, -0.1920, -0.1563]]]], requires_grad=True), Parameter containing: ## tensor([ 0.1464, -0.3314, 0.2575, 0.2789], requires_grad=True)] ``` --- ## Applying `Conv2d()` ```python X_train[[0]] ``` ``` ## tensor([[ 0., 0., 0., 10., 11., 0., 0., 0., 0., 0., 9., 16., 6., 0., ## 0., 0., 0., 0., 15., 13., 0., 0., 0., 0., 0., 0., 14., 10., ## 0., 0., 0., 0., 0., 1., 15., 12., 8., 2., 0., 0., 0., 0., ## 12., 16., 16., 16., 10., 1., 0., 0., 7., 16., 12., 12., 16., 4., ## 0., 0., 0., 9., 15., 12., 5., 0.]]) ``` ```python X_train[[0]].shape ``` ``` ## torch.Size([1, 64]) ``` -- ```python cv(X_train[[0]]) ``` ``` ## RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 64] ``` -- ```python cv(X_train[[0]].view(1,8,8)) ``` ``` ## tensor([[[ 1.4635e-01, 8.9731e-02, 2.2091e+00, 2.0856e+00, 3.0008e-01, ## 1.1069e+00, 1.4635e-01, 1.4635e-01], ## [ 1.4635e-01, 7.9740e-01, 4.7118e+00, 1.2956e+00, 2.8663e+00, ## 5.0414e+00, 1.4635e-01, 1.4635e-01], ## [ 1.4635e-01, 1.4148e+00, 3.9304e+00, 3.7392e+00, 4.8559e+00, ## 2.0574e+00, 1.4635e-01, 1.4635e-01], ## [ 1.4006e-01, 1.5501e+00, 2.6560e+00, 5.3757e+00, 4.5996e+00, ## -1.6774e+00, -3.8376e-01, 1.4635e-01], ## [ 2.2918e-01, 1.5985e+00, 3.0432e+00, 7.8808e+00, 5.1742e+00, ## 2.8690e-01, -2.1535e+00, -2.3558e+00], ## [ 1.5903e-01, 1.1613e+00, 2.2963e+00, 9.7990e+00, 7.2166e+00, ## 7.2061e+00, 4.8221e+00, -1.0744e+00], ## [ 1.4635e-01, 8.7829e-01, 8.7060e-01, 7.7516e+00, 9.2051e+00, ## 6.5414e+00, 6.4035e+00, 6.0225e+00], ## [ 1.4635e-01, 2.3512e-01, 2.1870e-01, 2.7383e+00, 8.5907e+00, ## 7.8556e+00, 5.3387e+00, 5.9015e+00]], ## ## [[-3.3139e-01, 2.0855e+00, 1.0611e+00, -9.2929e-01, 2.4813e+00, ## 1.2338e+00, -3.3139e-01, -3.3139e-01], ## [-3.3139e-01, 2.8943e+00, -1.7214e+00, 1.6945e+00, 1.9052e+00, ## -3.7329e+00, -3.3139e-01, -3.3139e-01], ## [-3.3139e-01, 1.7583e+00, -4.4349e-01, 1.0937e+00, -1.8343e+00, ## -2.2458e+00, -3.3139e-01, -3.3139e-01], ## [-6.2847e-02, 1.6707e+00, 1.5358e+00, 6.7726e-01, -2.8171e+00, ## 1.0439e+00, 1.2424e-01, -3.3139e-01], ## [-4.2056e-01, 1.0664e+00, 2.8519e+00, 3.8648e-01, 1.1191e+00, ## 2.6252e+00, 1.3820e+00, 1.7231e+00], ## [-3.6833e-01, 1.0592e-01, 3.2942e+00, -2.7258e+00, 7.4348e-01, ## 1.7019e+00, -2.4103e-01, 2.6293e+00], ## [-3.3139e-01, -1.3988e+00, 2.4566e+00, -2.9491e-01, -1.6257e+00, ## -1.6749e+00, -1.7032e+00, -1.7933e+00], ## [-3.3139e-01, -5.8998e-01, -4.5368e-01, -1.1705e+00, -4.1593e+00, ## -2.3883e+00, -1.0363e+00, -4.6198e+00]], ## ## [[ 2.5754e-01, -1.6656e+00, -1.5697e+00, 8.5662e-01, 7.6729e-01, ## 6.8691e-01, 2.5754e-01, 2.5754e-01], ## [ 2.5754e-01, -2.4453e+00, -9.2579e-01, -1.6456e+00, -2.8589e+00, ## 2.2836e-01, 2.5754e-01, 2.5754e-01], ## [ 2.5754e-01, -1.7021e+00, -3.9824e+00, -3.9580e+00, -7.4595e-01, ## 2.7345e-01, 2.5754e-01, 2.5754e-01], ## [ 4.3848e-02, -1.7271e+00, -6.1138e+00, -4.1333e+00, 1.7588e+00, ## 1.2023e+00, 4.3632e-01, 2.5754e-01], ## [ 3.1336e-01, -1.3042e+00, -7.2006e+00, -4.5856e+00, -9.7218e-01, ## 1.0355e+00, 2.6030e+00, 1.2663e+00], ## [ 2.7915e-01, -5.4638e-01, -7.3856e+00, -4.6808e+00, -3.3033e+00, ## -3.0910e+00, 8.4359e-01, 1.9124e+00], ## [ 2.5754e-01, 9.0769e-01, -5.0158e+00, -7.9702e+00, -5.3626e+00, ## -3.5424e+00, -3.1469e+00, -2.7673e-01], ## [ 2.5754e-01, 4.0883e-01, -1.0099e+00, -4.7002e+00, -4.5473e+00, ## -4.5081e+00, -5.2640e+00, -9.5759e-01]], ## ## [[ 2.7892e-01, -1.1277e+00, -6.7712e+00, -6.1245e+00, 9.2750e-01, ## 1.4826e+00, 2.7892e-01, 2.7892e-01], ## [ 2.7892e-01, -4.6052e+00, -8.8830e+00, -2.3599e+00, 2.3306e+00, ## 5.8103e-01, 2.7892e-01, 2.7892e-01], ## [ 2.7892e-01, -7.3229e+00, -6.7756e+00, 8.1508e-01, 5.2251e-01, ## -1.2101e-01, 2.7892e-01, 2.7892e-01], ## [ 1.2263e-01, -8.1761e+00, -6.1066e+00, -1.3577e+00, -2.0990e+00, ## -1.0308e+00, 4.7451e-02, 2.7892e-01], ## [-3.2851e-03, -7.4911e+00, -5.9007e+00, -4.8088e+00, -4.9038e+00, ## -4.4762e+00, -3.3035e+00, -1.0704e+00], ## [ 1.4772e-01, -6.1321e+00, -7.0593e+00, -6.7212e+00, -5.4672e+00, ## -3.6314e+00, -7.0955e-01, -4.3996e-01], ## [ 2.7892e-01, -3.2708e+00, -6.0676e+00, -5.4703e+00, -6.6090e+00, ## -6.6654e+00, 8.5040e-01, 2.5325e+00], ## [ 2.7892e-01, -6.3944e-01, -4.0984e+00, -3.8209e+00, -1.1199e+00, ## 1.1073e+00, 2.4985e+00, 2.2468e-01]]], grad_fn=<SqueezeBackward1>) ``` --- ## Pooling ```python x = torch.tensor( [[[0,0,0,0], [0,1,2,0], [0,3,4,0], [0,0,0,0]]], dtype=torch.float ) x.shape ``` ``` ## torch.Size([1, 4, 4]) ``` .pull-left[ ```python p = torch.nn.MaxPool2d(kernel_size=2, stride=1) p(x) ``` ``` ## tensor([[[1., 2., 2.], ## [3., 4., 4.], ## [3., 4., 4.]]]) ``` ```python p = torch.nn.MaxPool2d(kernel_size=3, stride=1, padding=1) p(x) ``` ``` ## tensor([[[1., 2., 2., 2.], ## [3., 4., 4., 4.], ## [3., 4., 4., 4.], ## [3., 4., 4., 4.]]]) ``` ] .pull-right[ ```python p = torch.nn.AvgPool2d(kernel_size=2) p(x) ``` ``` ## tensor([[[0.2500, 0.5000], ## [0.7500, 1.0000]]]) ``` ```python p = torch.nn.AvgPool2d(kernel_size=2, padding=1) p(x) ``` ``` ## tensor([[[0.0000, 0.0000, 0.0000], ## [0.0000, 2.5000, 0.0000], ## [0.0000, 0.0000, 0.0000]]]) ``` ] --- ## Convolutional model ```python class mnist_conv_model(torch.nn.Module): def __init__(self): super().__init__() self.cnn = torch.nn.Conv2d( in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1 ) self.relu = torch.nn.ReLU() self.pool = torch.nn.MaxPool2d(kernel_size=2) self.lin = torch.nn.Linear(8 * 4 * 4, 10) def forward(self, X): out = self.cnn(X.view(-1, 1, 8, 8)) out = self.relu(out) out = self.pool(out) out = self.lin(out.view(-1, 8 * 4 * 4)) return out def fit(self, X_train, y_train, X_test, y_test, lr=0.001, n=1000, acc_step=10): opt = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9) losses, train_acc, test_acc = [], [], [] for i in range(n): opt.zero_grad() loss = torch.nn.CrossEntropyLoss()(self(X_train), y_train) loss.backward() opt.step() losses.append(loss.item()) if (i+1) % acc_step == 0: val, train_pred = torch.max(self(X_train), dim=1) val, test_pred = torch.max(self(X_test), dim=1) train_acc.append( (train_pred == y_train).sum() / len(y_train) ) test_acc.append( (test_pred == y_test).sum() / len(y_test) ) return (losses, train_acc, test_acc) ``` --- ## Performance ```python loss, train_acc, test_acc = mnist_conv_model().fit( X_train, y_train, X_test, y_test, n=1000 ) test_acc[-5:] ``` ``` ## [tensor(0.9667), tensor(0.9667), tensor(0.9667), tensor(0.9667), tensor(0.9667)] ``` <img src="Lec23_files/figure-html/unnamed-chunk-55-1.png" width="75%" style="display: block; margin: auto;" />