Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

模型参数无法更新 #44426

Open
aixuxian opened this issue Jul 19, 2022 · 12 comments
Open

模型参数无法更新 #44426

aixuxian opened this issue Jul 19, 2022 · 12 comments
Assignees
Labels

Comments

@aixuxian
Copy link

aixuxian commented Jul 19, 2022

请提出你的问题 Please ask your question

使用V100 [16G的环境,模型构建和运行代码如下:
模型构建
`class Representation_Net(nn.Layer):

def __init__(self, num_classes=1000):
    super(Representation_Net, self).__init__()
    self.num_classes = num_classes

    self.features1 = Sequential(
        Conv1D(64, 128, 1, stride=1, padding=0,),
        BatchNorm1D(128),
        ReLU(),
        Conv1D(128, 256, 3, stride=2, padding=1,),
        BatchNorm1D(256),
        ReLU(),
        Conv1D(256, 512, 3, stride=2, padding=1,),
        BatchNorm1D(512),
        ReLU(),
        Conv1D(512, 256, 3, stride=2, padding=1,),
        BatchNorm1D(256),
        ReLU(),
    )
    self.features2 = Sequential(
        Conv1D(64, 128, 1, stride=1, padding=0,),
        BatchNorm1D(128),
        ReLU(),
        Conv1D(128, 256, 3, stride=2, padding=1,),
        BatchNorm1D(256),
        ReLU(),
        Conv1D(256, 512, 3, stride=2, padding=1,),
        BatchNorm1D(512),
        ReLU(),
        Conv1D(512, 256, 3, stride=2, padding=1,),
        BatchNorm1D(256),
        ReLU(),
    )
    self.linear1 = Linear(
        in_features=32256,
        out_features=512,
        )
    self.linear2 = Linear(
        in_features=32256,
        out_features=512,
        )
    self.discriminator = paddle.nn.Sequential(
        Linear(in_features=1024,out_features=512,),
        ReLU(),
        Linear(in_features=512, out_features=2),
    )

def forward(self, inputs1, inputs2):
    x1 = paddle.to_tensor(pywt.dwt(inputs1.numpy(), 'haar'), dtype='float32', stop_gradient=False)
    x1_1,x1_2 = x1.split(2)
    x1_1 = x1_1.squeeze(axis=0)
    x1_2 = x1_2.squeeze(axis=0)
    x1_1 = self.features1(x1_1)
    x1_2 = self.features2(x1_2) 
    x1 = paddle.concat(x = [x1_1,x1_2], axis=2)
    x1 = paddle.flatten(x1, start_axis=1, stop_axis=-1)

    x2 = paddle.to_tensor(pywt.dwt(inputs2.numpy(), 'haar'), dtype='float32', stop_gradient=False)
    x2_1,x2_2 = x2.split(2)
    x2_1 = x2_1.squeeze(axis=0)
    x2_2 = x2_2.squeeze(axis=0)
    x2_1 = self.features1(x2_1)
    x2_2 = self.features2(x2_2) 
    x2 = paddle.concat(x=[x2_1,x2_2], axis=2)
    x2 = paddle.flatten(x2, start_axis=1, stop_axis=-1)

    x1 = self.linear1(x1)
    x1 = F.relu(x1)

    # self.linear2 = self.linear1
    x2 = self.linear2(x2)
    x2 = F.relu(x2)
    
    x0 = paddle.concat(x=[x1,x2], axis=1)
    x0 = self.discriminator(x0)
    x0 = F.sigmoid(x0)

    # x = self._fc8(x)
    return x0`

模型训练:
model = Representation_Net() llr = 0.1 optim = paddle.optimizer.Adam(learning_rate=llr, parameters=model.parameters()) model.train() loss_fun = paddle.nn.CrossEntropyLoss() t1 = time.time() for epoch in range(2): for num, (edt1, edt2, llb) in enumerate(train_loader): # print(llb) # t1 = time.time() # print(llb) x0 = model(edt1, edt2) # paddle.Model(model). loss = loss_fun(x0, llb) loss.backward() for para in model.linear1.parameters(): print(para.name, ":", para.grad.numpy()[0][:5]) print(para.name, ":", para.numpy()[0][:5]) break if num % 10 == 0: # acc, f1, pre, rec = metrics_print(x0, llb) print("{}---epoch: {}, batch_id: {}, loss is: {}".format(time.time()-t1, epoch, num, loss.numpy())) optim.step() optim.clear_grad()
下面是运行了几个step之后的打印信息:
linear_0.w_0 : [ 1.4867793e-05 -1.0415246e-05 1.3343290e-04 2.0477468e-04
-1.7027040e-04]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
3.926401138305664---epoch: 0, batch_id: 0, loss is: [0.6963549]
linear_0.w_0 : [-2.3965185e-06 3.9633629e-05 9.2653732e-05 -6.8230751e-05
9.3980285e-05]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [ 1.3629116e-05 -6.8708534e-05 -1.8428715e-05 5.0045915e-06
-1.5834516e-05]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [-8.1593771e-06 7.6985467e-05 2.8302988e-05 -7.5435062e-05
-9.5136711e-05]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [ 1.1492494e-06 -1.5806514e-04 5.4077671e-05 1.1575944e-04
-8.0573343e-05]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [-1.4700053e-05 -1.4874062e-05 -1.4286905e-05 -1.0297184e-04
1.1741554e-04]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [-3.0369796e-05 5.5571427e-06 1.2763042e-05 4.1698524e-05
-5.2746847e-05]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [ 2.5099731e-05 -8.0320642e-05 1.3445060e-05 9.5332878e-05
-2.4350986e-04]
linear_0.w_0 : [ 0.01032601 -0.00485409 0.00241758 0.00228845 -0.0067165 ]
linear_0.w_0 : [-3.3261884e-05 -1.2341246e-04 -4.1538180e-05 -8.0625352e-05

这个问题我搜索了没找到什么好的解决办法,然后就使用Pytorch实现了,在自己电脑上运行,发现参数参数是可以正常更新的!!
希望大佬们给指个方向!!

@paddle-bot
Copy link

paddle-bot bot commented Jul 19, 2022

您好,我们已经收到了您的问题,会安排技术人员尽快解答您的问题,请耐心等待。请您再次检查是否提供了清晰的问题描述、复现代码、环境&版本、报错信息等。同时,您也可以通过查看官网API文档常见问题历史IssueAI社区来寻求解答。祝您生活愉快~

Hi! We've received your issue and please be patient to get responded. We will arrange technicians to answer your questions as soon as possible. Please make sure that you have posted enough message to demo your request. You may also check out the APIFAQGithub Issue and AI community to get the answer.Have a nice day!

@GuoxiaWang
Copy link
Contributor

@aixuxian 您好,我已经收到你的信息反馈,请整理一下排版?模型训练那里排版出问题了,我无法直接复制进行复现。

@aixuxian
Copy link
Author

aixuxian commented Jul 19, 2022

@GuoxiaWang 抱歉,刚才没找到方法,把导入的库也贴进来

import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import pywt
from paddle.nn import Linear, Dropout, ReLU, Sequential
from paddle.nn import Conv1D, MaxPool1D, CosineSimilarity, BatchNorm1D
from paddle.nn.initializer import Uniform
from paddle.fluid.param_attr import ParamAttr
from paddle.utils.download import get_weights_path_from_url


model = Representation_Net()
model.train()
llr = 0.1
optim = paddle.optimizer.Adam(learning_rate=llr, parameters=model.parameters())
loss_fun = paddle.nn.CrossEntropyLoss()
base_score = 0.5
t1 = time.time()
for epoch in range(2):
    for num, (edt1, edt2, llb) in enumerate(train_loader):
        # print(llb)
        # t1 = time.time()
        # print(llb)
        x0 = model(edt1, edt2)
        # paddle.Model(model).
        loss = loss_fun(x0, llb)
        loss.backward()
        for para in model.linear1.parameters():
            print(para.name, ":", para.grad.numpy()[0][:5])
            print(para.name, ":", para.numpy()[0][:5])
        if num % 10 == 0:
            # acc, f1, pre, rec = metrics_print(x0, llb)
            print("{}---epoch: {}, batch_id: {}, loss is: {}".format(time.time()-t1, epoch, num, loss.numpy()))
        optim.step()
        optim.clear_grad()```

@GuoxiaWang
Copy link
Contributor

如果可以是否能贴一个完整可以跑的代码?数据可以用random 数据。

另外是否也可以提供一份 torch 代码?好排查。

我直观上看这份代码写的是没有问题的,但是实际跑出来确实有问题,所以需要我运行起来才能排查。

@aixuxian
Copy link
Author

@GuoxiaWang
数据集构建应该不需要了,每一个输入数据是(64,1000)
Paddle API版本:

train_dataset = MyDataset(train_images, mode='train')
train_loader = paddle.io.DataLoader(train_dataset, places=paddle.CUDAPlace(0), batch_size=512, shuffle=True, num_workers=0)

#val_loader
val_dataset = MyDataset(val_images, mode='val')
val_loader = paddle.io.DataLoader(val_dataset, places=paddle.CUDAPlace(0), batch_size=512, shuffle=True, num_workers=0)

import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import pywt
from paddle.nn import Linear, Dropout, ReLU, Sequential
from paddle.nn import Conv1D, MaxPool1D, CosineSimilarity, BatchNorm1D
from paddle.nn.initializer import Uniform
from paddle.fluid.param_attr import ParamAttr
from paddle.utils.download import get_weights_path_from_url

class Representation_Net(nn.Layer):

    def __init__(self, num_classes=1000):
        super(Representation_Net, self).__init__()
        self.num_classes = num_classes

        self.features1 = Sequential(
            Conv1D(64, 128, 1, stride=1, padding=0,),
            BatchNorm1D(128),
            ReLU(),
            Conv1D(128, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
            Conv1D(256, 512, 3, stride=2, padding=1,),
            BatchNorm1D(512),
            ReLU(),
            Conv1D(512, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
        )
        self.features2 = Sequential(
            Conv1D(64, 128, 1, stride=1, padding=0,),
            BatchNorm1D(128),
            ReLU(),
            Conv1D(128, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
            Conv1D(256, 512, 3, stride=2, padding=1,),
            BatchNorm1D(512),
            ReLU(),
            Conv1D(512, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
        )
        self.linear1 = Linear(
            in_features=32256,
            out_features=512,
            )
        self.linear2 = Linear(
            in_features=32256,
            out_features=512,
            )
        self.discriminator = paddle.nn.Sequential(
            Linear(in_features=1024,out_features=512,),
            ReLU(),
            Linear(in_features=512, out_features=2),
        )

    def forward(self, inputs1, inputs2):
        x1 = paddle.to_tensor(pywt.dwt(inputs1.numpy(), 'haar'), dtype='float32', stop_gradient=False)
        x1_1,x1_2 = x1.split(2)
        x1_1 = x1_1.squeeze(axis=0)
        x1_2 = x1_2.squeeze(axis=0)
        x1_1 = self.features1(x1_1)
        x1_2 = self.features2(x1_2) 
        x1 = paddle.concat(x = [x1_1,x1_2], axis=2)
        x1 = paddle.flatten(x1, start_axis=1, stop_axis=-1)

        x2 = paddle.to_tensor(pywt.dwt(inputs2.numpy(), 'haar'), dtype='float32', stop_gradient=False)
        x2_1,x2_2 = x2.split(2)
        x2_1 = x2_1.squeeze(axis=0)
        x2_2 = x2_2.squeeze(axis=0)
        x2_1 = self.features1(x2_1)
        x2_2 = self.features2(x2_2) 
        x2 = paddle.concat(x=[x2_1,x2_2], axis=2)
        x2 = paddle.flatten(x2, start_axis=1, stop_axis=-1)

        x1 = self.linear1(x1)
        x1 = F.relu(x1)

        # self.linear2 = self.linear1
        x2 = self.linear2(x2)
        x2 = F.relu(x2)
        
        x0 = paddle.concat(x=[x1,x2], axis=1)
        x0 = self.discriminator(x0)
        x0 = F.sigmoid(x0)

        # x = self._fc8(x)
        return x0

model = Representation_Net()
llr = 0.1
optim = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters())

model.train()
loss_fun = paddle.nn.CrossEntropyLoss()
base_score = 0.5
t1 = time.time()
for epoch in range(2):
    for num, (edt1, edt2, llb) in enumerate(train_loader):
        # print(llb)
        # t1 = time.time()
        # print(llb)
        x0 = model(edt1, edt2)
        # paddle.Model(model).
        loss = loss_fun(x0, llb)
        loss.backward()
        for para in model.linear1.parameters():
            print(para.name, ":", para.grad.numpy()[0][:5])
            print(para.name, ":", para.numpy()[0][:5])
        if num % 10 == 0:
            # acc, f1, pre, rec = metrics_print(x0, llb)
            print("{}---epoch: {}, batch_id: {}, loss is: {}".format(time.time()-t1, epoch, num, loss.numpy()))
        optim.step()
        optim.clear_grad()```

Pytorch版本:

import scipy.io as scio
from sklearn.utils import shuffle
import os
import pandas as pd
import numpy as np
import time
import random

import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")

train_dataset = MyDataset(train_images, mode='train')
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=False)

#val_loader
val_dataset = MyDataset(val_images, mode='val')
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

import math
import pywt

class Representation_Net(nn.Module):

    def __init__(self):
        super(Representation_Net, self).__init__()
        self.features1 = nn.Sequential(
            nn.Conv1d(64,128,1,stride=1,padding=0,),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(128,256,3,stride=2,padding=1,),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Conv1d(256,512,3,stride=2,padding=1,),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Conv1d(512,256,3,stride=2,padding=1,),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.features2 = nn.Sequential(
            nn.Conv1d(64,128,1,stride=1,padding=0,),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(128,256,3,stride=2,padding=1,),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Conv1d(256,512,3,stride=2,padding=1,),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Conv1d(512,256,3,stride=2,padding=1,),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )

        # self._fc8 = Linear(
        #     in_features=16384,
        #     out_features=num_classes,
        #     )

    def forward(self, inputs):
        # print("self._conv1", x.shape)
        x = torch.as_tensor(pywt.dwt(inputs.detach().numpy(), 'haar'), dtype=torch.float32,)
        # print("pywt.dwt", x.shape)
        (x1,x2) = x.split(1)
        # print("x.split", x1.shape, x2.shape)

        x1 = x1.squeeze(axis=0)
        x2 = x2.squeeze(axis=0)
        
        x1 = self.features1(x1)
        x2 = self.features2(x2)
        
        x = torch.concat((x1,x2), axis=2)
        x = torch.flatten(x, start_dim=1, end_dim=-1)
        # x = self._fc8(x)
        return x

class Discriminator_Net(nn.Module):
    def __init__(self):
        super(Discriminator_Net, self).__init__()
        self.repre_net1 = Representation_Net()
        # self.repre_net2 = self.repre_net1
        # weight_attr = paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
        # bias_attr = paddle.framework.ParamAttr(name="linear_bias", initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
        self.linear1 = nn.Linear(
            in_features=32256,
            out_features=512,
            )
        self.discriminator = nn.Linear(in_features=1024, out_features=2)
        # self.cosine_simi = CosineSimilarity(axis=1)
    def forward(self, x1, x2):
        x1 = self.repre_net1(x1)
        # print("self.repre_net1(x1):", x1.stop_gradient)
        # x1 = F.relu(x1)
        self.repre_net2 = self.repre_net1
        x2 = self.repre_net2(x2)
        # print("self.repre_net2(x2):", x2.stop_gradient)
        x1 = self.linear1(x1)
        x1 = F.relu(x1)
        self.linear2 = self.linear1
        x2 = self.linear2(x2)
        x2 = F.relu(x2)
        
        x = torch.concat((x1,x2), axis=1)
        x = self.discriminator(x)
        # print("self.cosine_simi(x1, x2):", x0.stop_gradient)
        return x

model = Discriminator_Net()

lrrr = 0.1
optim = torch.optim.Adam(model.parameters(), lr=lrrr)
criterion = nn.CrossEntropyLoss()
model.train()
base_score = 0.5
t1 = time.time()
train_loss = []
for epoch in range(1):
    for num, (edt1, edt2, llb) in enumerate(train_loader):
        # print(llb)
        # t1 = time.time()
        # print(llb)
        optim.zero_grad()
        x0 = model(edt1, edt2)
        # print("x0:", x0.stop_gradient)
        loss = criterion(x0, llb)
        
        loss.backward()
        optim.step()
        # print("loss:", loss.stop_gradient)
#         for name, para in model.named_parameters():
#             print(name, ":", para.requires_grad, ":", para.grad)
        train_loss.append(loss.detach().numpy())
        if num % 1 == 0:
            # acc, f1, pre, rec = metrics_print(x0, llb)
            for e in model.linear1.parameters():
                print(e.detach().numpy()[1][:5])
                break
            print("{}---epoch: {}, batch_id: {}, train_loss is: {}"
            .format(time.time()-t1, epoch, num, loss.detach().numpy()))```

这两个虽然稍微不一样,但是其实效果是一样的。Pytorch版本是我最初实现的,Paddle版本之前也是跟它一样的。我调试了好久发现不行,以为Paddle不支持pytorch的那种实现方式重改成了上面代码里的方式了。所以就是说Paddle里两种实现方式参数都是无法更新的。

@GuoxiaWang
Copy link
Contributor

@aixuxian

x1 = paddle.to_tensor(pywt.dwt(inputs1.numpy(), 'haar'), dtype='float32', stop_gradient=False)
x2 = paddle.to_tensor(pywt.dwt(inputs2.numpy(), 'haar'), dtype='float32', stop_gradient=False)

x1 的 shape 是什么?
x2 的 shape 是什么?

@aixuxian
Copy link
Author

都是[2, 512, 64, 500]

@GuoxiaWang
Copy link
Contributor

llb 的 shape 是什么? 里面的值是什么?是否可以提供一下?我需要跑起来代码

@aixuxian
Copy link
Author

Tensor(shape=[512], dtype=int64, place=Place(gpu_pinned), stop_gradient=True,
[0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1,
0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,
1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1,
0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
0, 1, 1, 1, 0, 0, 1, 1])

@aixuxian
Copy link
Author

有没有什么方法我可以把我在Paddle AIStudio里的notebook发给您?

@GuoxiaWang
Copy link
Contributor

@aixuxian

应该是跟优化器相关,adam 走一个 opt.step 后梯度就为 0 了。这可能跟 adam 有关,我们再找到具体问题。

import time
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Linear, Dropout, ReLU, Sequential
from paddle.nn import Conv1D, MaxPool1D, CosineSimilarity, BatchNorm1D
from paddle.nn.initializer import Uniform
from paddle.fluid.param_attr import ParamAttr
from paddle.utils.download import get_weights_path_from_url

class Representation_Net(nn.Layer):

    def __init__(self, num_classes=1000):
        super(Representation_Net, self).__init__()
        self.num_classes = num_classes

        self.features1 = Sequential(
            Conv1D(64, 128, 1, stride=1, padding=0,),
            BatchNorm1D(128),
            ReLU(),
            Conv1D(128, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
            Conv1D(256, 512, 3, stride=2, padding=1,),
            BatchNorm1D(512),
            ReLU(),
            Conv1D(512, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
        )
        self.features2 = Sequential(
            Conv1D(64, 128, 1, stride=1, padding=0,),
            BatchNorm1D(128),
            ReLU(),
            Conv1D(128, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
            Conv1D(256, 512, 3, stride=2, padding=1,),
            BatchNorm1D(512),
            ReLU(),
            Conv1D(512, 256, 3, stride=2, padding=1,),
            BatchNorm1D(256),
            ReLU(),
        )
        self.linear1 = Linear(
            in_features=32256,
            out_features=512,
            )
        self.linear2 = Linear(
            in_features=32256,
            out_features=512,
            )
        self.discriminator = paddle.nn.Sequential(
            Linear(in_features=1024,out_features=512,),
            ReLU(),
            Linear(in_features=512, out_features=2),
        )

    def forward(self, inputs1, inputs2):
        # x1 = paddle.to_tensor(pywt.dwt(inputs1.numpy(), 'haar'), dtype='float32', stop_gradient=False)
        x1 = inputs1
        x1_1,x1_2 = x1.split(2)
        x1_1 = x1_1.squeeze(axis=0)
        x1_2 = x1_2.squeeze(axis=0)
        x1_1 = self.features1(x1_1)
        x1_2 = self.features2(x1_2) 
        x1 = paddle.concat(x = [x1_1,x1_2], axis=2)
        x1 = paddle.flatten(x1, start_axis=1, stop_axis=-1)

        # x2 = paddle.to_tensor(pywt.dwt(inputs2.numpy(), 'haar'), dtype='float32', stop_gradient=False)
        x2 = inputs2
        x2_1,x2_2 = x2.split(2)
        x2_1 = x2_1.squeeze(axis=0)
        x2_2 = x2_2.squeeze(axis=0)
        x2_1 = self.features1(x2_1)
        x2_2 = self.features2(x2_2) 
        x2 = paddle.concat(x=[x2_1,x2_2], axis=2)
        x2 = paddle.flatten(x2, start_axis=1, stop_axis=-1)

        x1 = self.linear1(x1)
        x1 = F.relu(x1)

        # self.linear2 = self.linear1
        x2 = self.linear2(x2)
        x2 = F.relu(x2)
        
        x0 = paddle.concat(x=[x1,x2], axis=1)
        x0 = self.discriminator(x0)
        x0 = F.sigmoid(x0)

        # x = self._fc8(x)
        return x0

model = Representation_Net()
llr = 0.1
optim = paddle.optimizer.Momentum(learning_rate=0.1, parameters=model.parameters())
# optim = paddle.optimizer.Adam(learning_rate=0.1, parameters=model.parameters())

model.train()
loss_fun = paddle.nn.CrossEntropyLoss()
base_score = 0.5
t1 = time.time()
for epoch in range(1):
    #for num, (edt1, edt2, llb) in enumerate(train_loader):
    for num in range(10):
        edt1 = paddle.rand(shape=[2, 512, 64, 500])
        edt2 = paddle.rand(shape=[2, 512, 64, 500])
        llb  = paddle.randint(low=0, high=2, shape=[512])
        # print(llb)
        # t1 = time.time()
        # print(llb)
        x0 = model(edt1, edt2)
        # paddle.Model(model).
        loss = loss_fun(x0, llb)
        loss.backward()
        print("{}---epoch: {}, batch_id: {}, loss is: {}".format(time.time()-t1, epoch, num, loss.numpy()))
        for para in model.linear1.parameters():
            print(para.name, " grad: ", para.grad.numpy()[:5])
            print(para.name, ' stop_gradient', para.stop_gradient, " param: ", para.numpy()[:5])
        print('-'*10)  
        optim.step()
        optim.clear_grad()

@aixuxian
Copy link
Author

aixuxian commented Jul 19, 2022

@GuoxiaWang 是的,我改用SGD之后,参数确实更新了。感谢感谢!!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

3 participants