for e inrange(100): for b inrange(100 // 1): # stochastic gradient descent # for b in range(100 // 10): # mini-batch gradient descent # for b in range(100 // 100): # batch gradient descent batch_index = np.random.choice(range(len(train_x)), size=20)
yhat = model(train_x[batch_index]) loss = loss_fn(yhat, yture[batch_index]) loss.backward() print(loss) optimer.step()
""" tensor(5.0873, dtype=torch.float64, grad_fn=<MseLossBackward>) tensor(3.4337, dtype=torch.float64, grad_fn=<MseLossBackward>) show more (open the raw output data in a text editor) ...
print(model(x).shape) print(ytrue.shape) loss = loss_fn(model(x), ytrue)
print(torch.randint(5, (3, )))
loss.backward()
for p in model.parameters(): print(p, p.grad)
Advanced deep learning
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Basic computing library import numpy as np # Deep learning library import torch import torch.nn as nn import torch.optim as optim import torchvision import torch.nn.functional as F import torchvision.transforms as transforms # Auxiliary drawing gallery import matplotlib.pyplot as plt # Time operation library import time # Progress bar control library from tqdm import tqdm
Project
1: Forward propagation of simple neural network
Question
1: Define the initial parameters and activation function
You need to use numpy to implement the forward propagation process of
the neural network and calculate the final output result of the output
layer. In order to complete the above tasks, we need to make the
following assumptions: 1. The value entered is [3,5] 1. The two weights
of the hidden layer h1 are [2,4], [4,-5] 1. The two weights of the
hidden layer h2 are [-1,1], [2,2] 1. The weight of the output layer is
[-3,7] 1. All layers do not use bias 1. All hidden layers need to add
tanh activation function
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# TODO: Define a numpy array with the input data of the neural network: input_data = np.array([3, 5])
# TODO: Define a numpy array with the content of the hidden layer and output layer weights of the neural network: # Tips: The weight dictionary has been built, you only need to fill in the corresponding value according to the hidden layer name weights = {'h11': np.array([2, 4]), 'h12': np.array([4, -5]), 'h21': np.array([-1, 1]), 'h22': np.array([2, 2]), 'out': np.array([-3, 7])}
# TODO: Improve the following tanh activation function: deftanh(x): return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
Question
2: Calculate the neural network output layer by layer
In the calculation of the neural network, it is necessary to first
multiply the weight of the layer to be calculated with its input data,
and then sum, and then through the operation of the activation function,
it can be output to the next layer.
Below we will use the layer as the unit to perform calculations:
The first is the first hidden layer. You need to multiply, sum, and
input the data of the input layer and the weight of the hidden layer
into the activation function.
# TODO: multiply, sum, and input the data of the input layer and the weight of the first hidden layer into the activation function. hidden_11_value = tanh(input_data * weights['h11']).sum() hidden_12_value = tanh(input_data * weights['h12']).sum() hidden_1_output = np.array([hidden_11_value, hidden_12_value]) """ 1.9999877116507956 -7.550282621338056e-11 [ 1.99998771e+00 -7.55028262e-11] """
Next is the second hidden layer, the operation of this layer is
exactly the same as the previous layer.
1 2 3 4 5
# TODO: multiply, sum, and input the data output by the upper layer and the weight of the second hidden layer into the activation function. hidden_21_value = tanh(hidden_1_output * weights['h21']).sum() hidden_22_value = tanh(hidden_1_output * weights['h22']).sum() hidden_2_output = np.array([hidden_21_value, hidden_22_value])
Finally, there is the output layer. At this time, there is only one
node that needs to be calculated, and there is no need to add an
activation function.
1 2
# TODO: multiply and sum the data output by the upper layer and the weight of the output layer output = (hidden_2_output * weights['out']).sum()
At this point, you have completed all the calculations. Now let's
print out the output of these layers and have a look.
1 2 3 4
print(output) """ 9.887385002294863 """
Project 2: CIFAR-10
Image Classification
Preparation
The data set used in this project can be directly exported from the
torchvision library. Here are some basic data operations (data download
may take a few minutes, please be patient).
##Define various transformation operations on the image, including converting the array to tensor, and regularizing the image #transforms.Compose is mainly used for some common graphics transformations, such as cropping and rotation #Traverse the list array and perform each transforms operation on the img in turn transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.48216, 0.44653), (0.24703, 0.24349, 0.26159)))) #Export the CIFAR10 data set in torchvision. The root is the directory where the data is stored after downloading. The train controls whether it is in the training phase, the download controls whether it needs to be downloaded, and the transform passes in a series of image transformations. trainset = torchvision.datasets.CIFAR10(root='~/data/course_data/', train=True, download=True, transform=transform) testset = torchvision.datasets.CIFAR10(root='~/data/course_data/', train=False, download=True, transform=transform) #Used to divide the training data into multiple groups, this function throws a group of data each time. trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True) #Used to divide the test data into multiple groups, this function throws a group of data each time. testloader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=False) """ Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ~/data/course_data/cifar-10-python.tar.gz 170499072it [02:24, 1181561.38it/s] Extracting ~/data/course_data/cifar-10-python.tar.gz to ~/data/course_data/ Files already downloaded and verified """
After the data download is complete, we can simply check the data
label to see if it is correct with the data set in the exercise
description.
#Display the pictures visually #Define drawing function defimshow(inp, title = None): """Imshow for Tensor."""
# Define the canvas for drawing fig = plt.figure(figsize = (30, 30))
# Convert the dimensions of the picture inp = inp.numpy().transpose((1,2,0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225])
# Standardize the picture inp = std * inp + mean
# The value of the entire image array is limited to the specified value a_min, and a_max inp = np.clip(inp, 0, 1)
# Visual display of pictures plt.imshow(inp,)
# Get a batch of data inputs, classes = next(iter(trainloader))
# Display in grid format, the function is to combine several images into one image out = torchvision.utils.make_grid(inputs)
# plt.imshow() can display the picture and also display its format imshow(out, title = [trainset.classes[x] for x in classes])
Question 1: Build a
simple neural network
After the data is ready, you need to build a simple neural
network.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# TODO: define a layer 3 fully connected neural network, the input dimension is 32*32*3, the output dimension of the first layer is 1000, the output dimension of the second layer is 500, and the output dimension of the third layer is 10 classNet(nn.Module): def__init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(32*32*3, 1000) self.fc2 = nn.Linear(1000, 500) self.fc3 = nn.Linear(500, 10)
defforward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x))
return self.fc3(x)
# Instantiate the neural network class net = Net()
After the model structure is defined, the loss function and optimizer
need to be determined.
1 2 3 4 5
# Define loss function-cross entropy criterion = nn.CrossEntropyLoss()
# Define the optimizer, pass the parameters of the neural network to the optimizer, and define the learning rate optimizer = optim.Adam(net.parameters(), lr = 3e-4)
Question 2: Neural Network
Training
The main content of the model has been completed, and the training
can be carried out below. In the process of model training, the
following steps are generally followed:
Big for loop-epochs, used to manage a set of data loop training
several times
Small for loop-step, used to retrieve data from dataloader in
batchsize unit
Clear the gradient of the optimizer
Read in data and label, and perform shape transformation (can be
done or not)
Run the forward propagation process of the model
Generate the final result based on the model output
# TODO: training model num_epochs = 10 since = time.time() net.train()
for epoch inrange(num_epochs): print(f'Epoch {epoch + 1} / {num_epochs}')
running_loss = 0.0 running_corrects = 0
# Take out each batch of data in a loop from the trainloader for data in tqdm(trainloader): # TODO: Completion code inputs, labels = data inputs = inputs.view(-1, 32 * 32 * 3) optimizer.zero_grad() outputs = net(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) loss.backward() optimizer.step()
# Calculation of the loss function of a batch of data running_loss += loss.item() * inputs.size(0)
# Calculation of the accuracy of a batch of data running_corrects += torch.sum(preds == labels.data)
After completing the model training, the model needs to be evaluated
to verify the accuracy of the model on the test set.
Tips: In the model training log, the accuracy acc is also printed,
but this is the accuracy of the model on the training set, not the
accuracy on the test set. You can observe the accuracy of the training
set and the accuracy of the test set to see if there is any
difference.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# TODO: Complete model evaluation correct, total = 0, 0 net.eval()
for data in tqdm(testloader): inputs, labels = data inputs = inputs.view(-1, 32 * 32 * 3) outputs = net(inputs) _, predicted = torch.max(outputs, 1) total += labels.size(0) correct += (predicted == labels).sum().item()
print('The testing set accuracy of the network is: %d %%'% (100 * correct / total)) """ 100%|██████████| 625/625 [00:03<00:00, 157.71it/s]The testing set accuracy of the network is: 53 % """