Q&A:

权重衰减直接加在权重上和将其加在损失上


def train_concise():
	net=nn.Sequential(nn.Linear(num_inputs,1))
	for param in net.parameters():
		param.data.normal_()
	loss=nn.MSELoss(reduction='none')
	num_epochs,lr=100,0.003
	trainer=torch.optim.SGD([
	{"params":net[0].weight,'weight_decay':wd},
	{"params":net[0].bias}],lr=lr)
	if (epoch + 1) % 5 == 0:
            animator.add(epoch + 1,
                         (d2l.evaluate_loss(net, train_iter, loss),
                          d2l.evaluate_loss(net, test_iter, loss)))
print('w的L2范数:', net[0].weight.norm().item())