首先,我们回顾一下多层感知机
import torch
from torch import nn
from torch.nn import functional as F
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)
tensor([[-0.0592, 0.0558, -0.2263, -0.1218, -0.1035, 0.0366, -0.0849, -0.0338, 0.1567, 0.0243], [-0.0824, 0.0313, -0.1744, -0.0658, -0.1209, 0.0315, -0.0919, -0.0731, 0.0743, -0.0659]], grad_fn=<AddmmBackward>)
nn.Sequential
定义了一种特殊的Module
自定义块
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.hidden = nn.Linear(20, 256)
self.out = nn.Linear(256, 10)
def forward(self, X):
return self.out(F.relu(self.hidden(X)))
实例化多层感知机的层,然后在每次调用正向传播函数时调用这些层
net = MLP()
net(X)
tensor([[ 0.0789, -0.4260, -0.0066, -0.1538, 0.1072, 0.0945, -0.1590, -0.1296, 0.0477, -0.1613], [ 0.1023, -0.3749, 0.0776, -0.0611, -0.0132, 0.0850, -0.0748, -0.0584, 0.0807, -0.0315]], grad_fn=<AddmmBackward>)
顺序块
class MySequential(nn.Module):
def __init__(self, *args):
super().__init__()
for block in args:
self._modules[block] = block
def forward(self, X):
for block in self._modules.values():
X = block(X)
return X
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)
tensor([[-0.1840, 0.0114, 0.1589, 0.0729, -0.1335, 0.0517, 0.0956, -0.1545, 0.0713, 0.1012], [-0.1319, 0.0737, 0.1547, 0.1419, -0.1117, -0.0313, -0.0084, -0.0456, 0.0429, 0.1718]], grad_fn=<AddmmBackward>)
在正向传播函数中执行代码
class FixedHiddenMLP(nn.Module):
def __init__(self):
super().__init__()
self.rand_weight = torch.rand((20, 20), requires_grad=False)
self.linear = nn.Linear(20, 20)
def forward(self, X):
X = self.linear(X)
X = F.relu(torch.mm(X, self.rand_weight) + 1)
X = self.linear(X)
while X.abs().sum() > 1:
X /= 2
return X.sum()
net = FixedHiddenMLP()
net(X)
tensor(0.2000, grad_fn=<SumBackward0>)
混合搭配各种组合块的方法
class NestMLP(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
nn.Linear(64, 32), nn.ReLU())
self.linear = nn.Linear(32, 16)
def forward(self, X):
return self.linear(self.net(X))
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)
tensor(-0.1374, grad_fn=<SumBackward0>)