梯度消失
%matplotlib inline
import torch
from d2l import torch as d2l
x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)
y = torch.sigmoid(x)
y.backward(torch.ones_like(x))
d2l.plot(x.detach().numpy(), [y.detach().numpy(), x.grad.numpy()],
legend=['sigmoid', 'gradient'], figsize=(4.5, 2.5))
梯度爆炸
M = torch.normal(0, 1, size=(4, 4))
print('一个矩阵 \n', M)
for i in range(100):
M = torch.mm(M, torch.normal(0, 1, size=(4, 4)))
print('乘以100个矩阵后\n', M)
一个矩阵 tensor([[ 1.4482, 0.1321, -0.3654, 0.2553], [ 1.6057, -0.0775, 0.2758, -0.0886], [-0.9053, -0.9715, 1.3556, 0.0492], [-1.7316, -1.5284, -0.3882, -0.5320]]) 乘以100个矩阵后 tensor([[-7.5557e+22, 6.7367e+22, -1.3285e+23, -2.3105e+22], [-1.5998e+23, 1.4264e+23, -2.8128e+23, -4.8921e+22], [-1.4575e+23, 1.2995e+23, -2.5626e+23, -4.4569e+22], [-7.0795e+22, 6.3121e+22, -1.2448e+23, -2.1649e+22]])