Q&A:
def train_concise():
net=nn.Sequential(nn.Linear(num_inputs,1))
for param in net.parameters():
param.data.normal_()
loss=nn.MSELoss(reduction='none')
num_epochs,lr=100,0.003
trainer=torch.optim.SGD([
{"params":net[0].weight,'weight_decay':wd},
{"params":net[0].bias}],lr=lr)
if (epoch + 1) % 5 == 0:
animator.add(epoch + 1,
(d2l.evaluate_loss(net, train_iter, loss),
d2l.evaluate_loss(net, test_iter, loss)))
print('w的L2范数:', net[0].weight.norm().item())