# standard imports
import matplotlib.pyplot as plt
import numpy as np
import random
# sklearn data
from sklearn.datasets import make_blobs
from sklearn.datasets import make_circles
from sklearn.datasets import make_classification
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
# sklearn models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
# sklearn metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay
# pytorch imports
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
Torch Setup
# get cpu, gpu, or mps device for training
if torch.cuda.is_available():
= "cuda"
device elif torch.backends.mps.is_available():
= "mps"
device else:
= "cpu"
device
print(f"Using {device} device!")
Using cpu device!
If your machine does not allow for mps
or cuda
consider using:
- Google Colaboratory
- Be sure to change the runtime type to a GPU.
- Illinois Computes Research Notebooks
- Be sure to select the PyTorch option to utilize a GPU.
Logistic Regression as a Neural Network: Linear Data
# generate "linear" data
= make_classification(
X, y =100,
n_samples=2,
n_features=2,
n_informative=0,
n_redundant=1,
n_clusters_per_class=2,
random_state=2,
n_classes )
Show Code for Plot
# create a new figure and an axes
= plt.subplots(figsize=(10, 6))
fig, ax
# plot the generated data
= ax.scatter(
scatter 0],
X[:, 1],
X[:, =y,
c=plt.cm.Set1,
cmap
)
# set labels and title
"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Linear Data")
ax.set_title(
# add a grid
ax.grid(="lightgrey",
color="--",
linestyle
)
# display the plot
plt.show()
# define a logistic regression model class
class LogisticRegressionNN(nn.Module):
def __init__(self, input_size):
super().__init__()
self.linear = nn.Linear(input_size, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
= self.linear(x)
out = self.sigmoid(out)
out return out
# create the model instance
= LogisticRegressionNN(X.shape[1])
model print(model)
LogisticRegressionNN(
(linear): Linear(in_features=2, out_features=1, bias=True)
(sigmoid): Sigmoid()
)
# define the loss function
= nn.BCELoss()
loss_fn # loss_fn = nn.BCEWithLogitsLoss()
# define the optimizer
= torch.optim.Adam(
optimizer
model.parameters(),=0.01,
lr
)
# convert the data to PyTorch tensors
= torch.tensor(X, dtype=torch.float32)
X_tensor = torch.tensor(y, dtype=torch.float32) y_tensor
# train the model
= 1000
num_epochs for epoch in range(num_epochs):
# forward pass
= model(X_tensor)
outputs = loss_fn(outputs, y_tensor.view(-1, 1))
loss
# backward and optimize
optimizer.zero_grad()
loss.backward() optimizer.step()
# print the trained model parameters
print("Trained model parameters:")
for name, param in model.named_parameters():
if param.requires_grad:
print(name, param.data)
Trained model parameters:
linear.weight tensor([[3.7912, 1.6506]])
linear.bias tensor([-0.6449])
# fit sklearn logistic regression
= LogisticRegression(penalty=None)
sk_model = sk_model.fit(X, y) _
# print the sklearn model parameters
sk_model.intercept_, sk_model.coef_
(array([1.07315953]), array([[15.06284414, 6.71249139]]))
Show Code for Plot
# generate a grid of points
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max = np.meshgrid(
xx, yy 0.005),
np.arange(x_min, x_max, 0.005),
np.arange(y_min, y_max,
)= np.c_[xx.ravel(), yy.ravel()]
grid_points
# convert the grid points to PyTorch tensor
= torch.tensor(grid_points, dtype=torch.float32)
grid_tensor
# use the trained model to predict the class labels for the grid points
with torch.no_grad():
= model(grid_tensor)
predictions = (predictions >= 0.5).float().numpy().reshape(xx.shape)
labels
# create a new figure and an axes
= plt.subplots(figsize=(10, 6))
fig, ax
# plot the decision boundary
=0.25, cmap=plt.cm.Set1)
ax.contourf(xx, yy, labels, alpha
# plot the generated data
= ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1)
scatter
# set labels and title
"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Linear Data with Decision Boundary")
ax.set_title(
# add a legend
= [
legend_elements
plt.Line2D(0],
[0],
[="o",
marker="w",
color="Class 0",
label="grey",
markerfacecolor=8,
markersize
),
plt.Line2D(0],
[0],
[="o",
marker="w",
color="Class 1",
label="r",
markerfacecolor=8,
markersize
),
]=legend_elements)
ax.legend(handles
# add a grid
="lightgrey", linestyle="--")
ax.grid(color
# display the plot
plt.show()
Logistic Regression as a Neural Network: Circle Data
# generate circles data
= make_circles(
X, y =100,
n_samples=0.05,
noise=42,
random_state
)
# plot the generated data
= plt.subplots(figsize=(10, 6))
fig, ax = ax.scatter(
scatter 0],
X[:, 1],
X[:, =y,
c=plt.cm.Set1,
cmap
)"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Circles Data")
ax.set_title(="lightgrey", linestyle="--")
ax.grid(color plt.show()
# create the model instance
= LogisticRegressionNN(X.shape[1])
model print(model)
# define the loss function
= nn.BCELoss()
criterion
# define the optimizer
= torch.optim.Adam(
optimizer
model.parameters(),=0.01,
lr
)
# convert the data to PyTorch tensors
= torch.tensor(X, dtype=torch.float32)
X_tensor = torch.tensor(y, dtype=torch.float32)
y_tensor
# train the model
= 1000
num_epochs for epoch in range(num_epochs):
# forward pass
= model(X_tensor)
outputs = criterion(outputs, y_tensor.view(-1, 1))
loss
# backward and optimize
optimizer.zero_grad()
loss.backward() optimizer.step()
LogisticRegressionNN(
(linear): Linear(in_features=2, out_features=1, bias=True)
(sigmoid): Sigmoid()
)
# print the trained model parameters
print("Trained model parameters:")
for name, param in model.named_parameters():
if param.requires_grad:
print(name, param.data)
Trained model parameters:
linear.weight tensor([[0.0180, 0.0113]])
linear.bias tensor([-2.0658e-05])
Show Code for Plot
# generate a grid of points
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max = np.meshgrid(
xx, yy 0.005),
np.arange(x_min, x_max, 0.005),
np.arange(y_min, y_max,
)= np.c_[xx.ravel(), yy.ravel()]
grid_points
# convert the grid points to PyTorch tensor
= torch.tensor(grid_points, dtype=torch.float32)
grid_tensor
# use the trained model to predict the class labels for the grid points
with torch.no_grad():
= model(grid_tensor)
predictions = (predictions >= 0.5).float().numpy().reshape(xx.shape)
labels
# create a new figure and an axes
= plt.subplots(figsize=(10, 6))
fig, ax
# plot the decision boundary
=0.5, cmap=plt.cm.Set1)
ax.contourf(xx, yy, labels, alpha
# plot the generated data
= ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1)
scatter
# set labels and title
"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Linearly Separable Data with Decision Boundary")
ax.set_title(
# add a legend
= [
legend_elements
plt.Line2D(0],
[0],
[="o",
marker="w",
color="Class 0",
label="grey",
markerfacecolor=8,
markersize
),
plt.Line2D(0],
[0],
[="o",
marker="w",
color="Class 1",
label="r",
markerfacecolor=8,
markersize
),
]=legend_elements)
ax.legend(handles
# add a grid
True, color="lightgrey")
ax.grid(
# display the plot
plt.show()
Multi-Layer Neural Network: Circle Data
# define multi-layer neural network class
class MLP(nn.Module):
def __init__(self, input_size):
super().__init__()
self.linear = nn.Sequential(
100),
nn.Linear(input_size,
nn.ReLU(),100, 10),
nn.Linear(
nn.ReLU(),10, 1),
nn.Linear(
)self.sigmoid = nn.Sigmoid()
def forward(self, x):
= self.linear(x)
out = self.sigmoid(out)
out return out
# create the model instance
= MLP(X.shape[1])
model print(model)
# define the loss function
= nn.BCELoss()
criterion
# define the optimizer
= torch.optim.SGD(
optimizer
model.parameters(),=0.1,
lr
)
# convert the data to PyTorch tensors
= torch.tensor(X, dtype=torch.float32)
X_tensor = torch.tensor(y, dtype=torch.float32)
y_tensor
# train the model
= 1000
num_epochs for epoch in range(num_epochs):
# Forward pass
= model(X_tensor)
outputs = criterion(outputs, y_tensor.view(-1, 1))
loss
# Backward and optimize
optimizer.zero_grad()
loss.backward() optimizer.step()
MLP(
(linear): Sequential(
(0): Linear(in_features=2, out_features=100, bias=True)
(1): ReLU()
(2): Linear(in_features=100, out_features=10, bias=True)
(3): ReLU()
(4): Linear(in_features=10, out_features=1, bias=True)
)
(sigmoid): Sigmoid()
)
# print the trained model parameters
# print("Trained model parameters:")
# for name, param in model.named_parameters():
# if param.requires_grad:
# print(name, param.data)
Show Code for Plot
# generate a grid of points
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max = np.meshgrid(
xx, yy 0.02),
np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max,
)= np.c_[xx.ravel(), yy.ravel()]
grid_points
# convert the grid points to PyTorch tensor
= torch.tensor(grid_points, dtype=torch.float32)
grid_tensor
# use the trained model to predict the class labels for the grid points
with torch.no_grad():
= model(grid_tensor)
predictions = (predictions >= 0.5).float().numpy().reshape(xx.shape)
labels
# create a new figure and an axes
= plt.subplots(figsize=(10, 6))
fig, ax
# plot the decision boundary
=0.5, cmap=plt.cm.Set1)
ax.contourf(xx, yy, labels, alpha
# plot the generated data
= ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1)
scatter
# set labels and title
"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Circle Data with Decision Boundary")
ax.set_title(
# add a legend
= [
legend_elements
plt.Line2D(0],
[0],
[="o",
marker="w",
color="Class 0",
label="grey",
markerfacecolor=8,
markersize
),
plt.Line2D(0],
[0],
[="o",
marker="w",
color="Class 1",
label="r",
markerfacecolor=8,
markersize
),
]=legend_elements)
ax.legend(handles
# add a grid
True, color="lightgrey")
ax.grid(
# display the plot
plt.show()
Multi-Layer Neural Network: Five Class Data
# generate the dataset
= make_blobs(
X, y =1000,
n_samples=5,
centers=42,
random_state
)
# print the shape of the dataset
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
Shape of X: (1000, 2)
Shape of y: (1000,)
Show Code for Plot
# create a new figure and an axes
= plt.subplots(figsize=(10, 6))
fig, ax
# plot the generated data
= ax.scatter(
scatter 0],
X[:, 1],
X[:, =y,
c=plt.cm.Set1,
cmap
)
# set labels and title
"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Linear Data")
ax.set_title(
# add a grid
="lightgrey", linestyle="--")
ax.grid(color
# display the plot
plt.show()
# check target data
15] y[:
array([1, 1, 2, 1, 4, 2, 3, 4, 1, 4, 4, 2, 3, 2, 3])
# define multi-layer neural network class
class MLP(nn.Module):
def __init__(self, input_size):
super().__init__()
self.linear = nn.Sequential(
100),
nn.Linear(input_size,
nn.ReLU(),100, 10),
nn.Linear(
nn.ReLU(),10, 10),
nn.Linear(
nn.ReLU(),10, 5),
nn.Linear(
)
def forward(self, x):
= self.linear(x)
out return out
# create the model instance
= X.shape[1]
input_size = MLP(input_size)
model print(model)
# define the loss function
= nn.CrossEntropyLoss()
criterion
# define the optimizer
= torch.optim.Adam(
optimizer
model.parameters(),=0.01,
lr
)
# convert the data to PyTorch tensors
= torch.tensor(X, dtype=torch.float32)
X_tensor = torch.tensor(y, dtype=torch.long)
y_tensor
# train the model
= 1000
num_epochs for epoch in range(num_epochs):
# forward pass
= model(X_tensor)
outputs = criterion(outputs, y_tensor)
loss
# backward and optimize
optimizer.zero_grad()
loss.backward() optimizer.step()
MLP(
(linear): Sequential(
(0): Linear(in_features=2, out_features=100, bias=True)
(1): ReLU()
(2): Linear(in_features=100, out_features=10, bias=True)
(3): ReLU()
(4): Linear(in_features=10, out_features=10, bias=True)
(5): ReLU()
(6): Linear(in_features=10, out_features=5, bias=True)
)
)
# print the trained model parameters
# print("Trained model parameters:")
# for name, param in model.named_parameters():
# if param.requires_grad:
# print(name, param.data)
# convert the model predictions to numpy array
with torch.no_grad():
= model(X_tensor)
predictions = torch.argmax(predictions, dim=1).numpy()
predicted_labels
# calculate the accuracy
= accuracy_score(y, predicted_labels)
accuracy print("Accuracy:", accuracy)
Accuracy: 0.98
Show Code for Plot
# generate a grid of points
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max = np.meshgrid(
xx, yy 0.02),
np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max,
)= np.c_[
grid_points
xx.ravel(),
yy.ravel(),
]
# convert the grid points to PyTorch tensor
= torch.tensor(grid_points, dtype=torch.float32)
grid_tensor
# use the trained model to predict the class labels for the grid points
with torch.no_grad():
= model(grid_tensor)
predictions = torch.argmax(predictions, dim=1).numpy().reshape(xx.shape)
labels
# create a new figure and an axes
= plt.subplots(figsize=(10, 6))
fig, ax
# plot the decision boundary
ax.contourf(
xx,
yy,
labels,=0.5,
alpha=plt.cm.Set1,
cmap
)
# plot the generated data
= ax.scatter(
scatter 0],
X[:, 1],
X[:, =y,
c=plt.cm.Set1,
cmap
)
# set labels and title
"Feature 1")
ax.set_xlabel("Feature 2")
ax.set_ylabel("Decision Boundary")
ax.set_title(
# add a grid
True, color="lightgrey")
ax.grid(
# display the plot
plt.show()
Convolutional Neural Networks
Additional Reading and Resources
- Deep Learning with PyTorch
- Learn PyTorch for Deep Learning: Zero to Mastery
- CNN Explainer
- CS231n: Deep Learning for Computer Vision
- Bishop: Deep Learning Foundations and Concepts
- Goodfellow: Deep Learning
- MIT 6.S191 Introduction to Deep Learning
- Prince: Understanding Deep Learning
- PyTorch Lightning
- Keras
- Wikipedia: Neural Networks