使用 conv2d 方法在动态图中出现错误

AIStudio792042 发布于2020-01

版本、环境信息：
1）PaddlePaddle版本：1.6.1
2）CPU：Intel(R) Core(TM) i5-6300HQ CPU @ 2.30GHz
3）GPU：未使用
4）系统环境：windows 7 64Bit，Python 3.7.4
模型信息
主要使用 fluid.layers.conv2d 方法
复现信息：使用paddle的 conv2d算法建立一个简单的BN操作程序（程序No.1），在连续使用 conv2d时，动态图模式出现错误。为了查找问题，我建立一个静态运行模式程序（程序 No.2），一模一样的算法，却正确通过。
问题描述：问题我都注释在代码文档里面了 , 主要是两个：
1.同样的算法，为何静态没问题，动态却出错，是我的理解错误导致代码不当，还是 Bug？
2.报错的信息中脚本位置错误，是否是Bug?
谢谢！

下面贴的代码显示有点问题，请看这个issue的附件

No.1 Program 出错的脚本

import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm
from paddle.fluid.regularizer import L2Decay
import numpy as np

class BN(fluid.dygraph.Layer):
def init(self, name_scope,
num_filters,
filter_size,
stride,
padding):
super(BN, self).init(name_scope)

    self._conv2d = Conv2D(
        self.full_name(),
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        padding=padding,
        param_attr=ParamAttr(
            initializer=fluid.initializer.Normal(0, 0.02)),            
        bias_attr=False,
        act=None)
    
    self._bn = BatchNorm(
        self.full_name(),
        num_channels = num_filters,
        act=None,
        param_attr=ParamAttr(
            initializer=fluid.initializer.Normal(0., 0.02),
            regularizer=L2Decay(0.)),
        bias_attr=ParamAttr(
            initializer=fluid.initializer.Constant(0.0),
            regularizer=L2Decay(0.)))

def forward(self, inputs):
    # print("forward from dbl_block,inputs:", inputs)
    x = self._conv2d(inputs)
    x = self._bn(x)
    return x

place = fluid.CPUPlace()
with fluid.dygraph.guard(place):

x = fluid.layers.ones(shape=[1, 1024, 9, 9], dtype='float32') 

conv1 = BN("conv1", 256, 1, 1, 0)
conv2 = BN("conv2", 512, 3, 1, 1)


print("Step 0,x shape is:", x.shape)  # Step 0,x shape is: [1, 1024, 9, 9]
x1 = conv1(x)
print("Step 1,x shape is:", x1.shape) #Step 1,x shape is: [1, 256, 9, 9]    
x2 = conv2(x1)
print("Step 2,x shape is:", x2.shape) # Step 2,x shape is: [1, 512, 9, 9]    
x3 = conv1(x2)
# PaddleCheckError: Expected input_channels == filter_dims[1] * groups, 
# but received input_channels:512 != filter_dims[1] * groups:1024.
# ShapeError: The number of input channels should be equal to 
# filter channels * groups. But received: the input channels is [512], 
# the shapeof input is [1, 512, 9,9], the filter channel is [1024], 
# the shape of filter is [256, 1024, 1, 1],the groups is [1] at 
# [D:\1.6.1\paddle\paddle\fluid\operators\conv_op.cc:90]

# Interrupted here , and the scripts path is wrong , 
# My scripts path is something like below:
# "C:\Users\Administrator\Anaconda3\envs\paddle\lib\site-packages\paddle\fluid\layers\nn.py
# And instead of it ,I have no  "D:\1.6.1\..."  dir.

print("Step 3,x shape is:", x3.shape)    
x4 = conv2(x3)
print("Step 4,x shape is:", x4.shape)

No.2 Program 未报错的脚本

import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.regularizer import L2Decay
import numpy as np

def BN( input,
ch_out,
filter_size,
stride,
padding,
name=None):

conv1 = fluid.layers.conv2d(
    input=input,
    num_filters=ch_out,
    filter_size=filter_size,
    stride=stride,
    padding=padding,
    param_attr=ParamAttr(
        initializer=fluid.initializer.Normal(0., 0.02)),
    bias_attr=False,
    act=None)

out = fluid.layers.batch_norm(
    input=conv1,
    act=None,
    param_attr=ParamAttr(
        initializer=fluid.initializer.Normal(0., 0.02),
        regularizer=L2Decay(0.)),
    bias_attr=ParamAttr(
        initializer=fluid.initializer.Constant(0.0),
        regularizer=L2Decay(0.)))
return out

place = fluid.CPUPlace()
exe = fluid.Executor(place)

exe.run(fluid.default_startup_program())

x = fluid.layers.ones(shape=[1, 1024, 9, 9], dtype='float32')

x1 = BN(x, 256, 1, 1, 0)
x2 = BN(x1, 512,3, 1, 1)
x3 = BN(x2, 256, 1, 1, 0)
x4 = BN(x3, 512,3, 1, 1)

print("Step 0,x shape is:", x.shape) #Step 0,x shape is: (1, 1024, 9, 9)
print("Step 1,x shape is:", x1.shape) #Step 1,x shape is: (1, 256, 9, 9)
print("Step 2,x shape is:", x2.shape) #Step 2,x shape is: (1, 512, 9, 9)
print("Step 3,x shape is:", x3.shape) #Step 3,x shape is: (1, 256, 9, 9)
print("Step 4,x shape is:", x4.shape) #Step 4,x shape is: (1, 512, 9, 9)

All pass and the result is ok

TheTwoPythonProgram.zip
上面的代码复制显示有点问题，请看附件吧，谢谢

全部评论(5)

AIStudio792042

#2 回复于2020-01

哎，我刚刚把No.1 程序改成这样子，即是拆成4个函数定义分别计算，这样就可以了
conv1 = BN("conv1", 256, 1, 1, 0)
conv2 = BN("conv2", 512, 3, 1, 1)
conv3 = BN("conv3", 256, 1, 1, 0)
conv4 = BN("conv4", 512, 3, 1, 1)

但是！我昨天也是这么做的却不行，还是一样的错误。不过昨天好像是所有的名字都是 self.full_name()，就像：
conv1 = BN(self.full_name(), 256, 1, 1, 0)
conv2 = BN(self.full_name(), 512, 3, 1, 1)
conv3 = BN(self.full_name(), 256, 1, 1, 0)
conv4 = BN(self.full_name(), 512, 3, 1, 1)
x1 = conv1(x)
print("Step 1,x shape is:", x1.shape) #Step 1,x shape is: [1, 256, 9, 9]
x2 = conv2(x1)
print("Step 2,x shape is:", x2.shape) # Step 2,x shape is: [1, 512, 9, 9]
x3 = conv3(x2)
到这步报错，跟No.1 脚本错误一模一样，同名字不行吗？但为何前两步又可以执行？

不管如何，No.1程序本应是正确的，却错了，而且报错的脚本地址也是错误的，所以还是帮忙看看吧，看是我的理解设计错误，还是Bug ，谢谢

AIStudio784460

#3 回复于2020-01

conv1 = BN("conv1", 256, 1, 1, 0)
conv2 = BN("conv2", 512, 3, 1, 1)
conv3 = BN("conv3", 256, 1, 1, 0)
conv4 = BN("conv4", 512, 3, 1, 1)
应该这么做，你定义网络的时候，每一层都需要定义。然后命名参数namescope可以一样，不影响结果计算。比如都叫conv
conv1 = BN(self.full_name(), 256, 1, 1, 0)
conv2 = BN(self.full_name(), 512, 3, 1, 1)
conv3 = BN(self.full_name(), 256, 1, 1, 0)
conv4 = BN(self.full_name(), 512, 3, 1, 1)
这个明显跑不起来，因为self是哪里定义的？
不过昨天好像是所有的名字都是 self.full_name()，就像这个可以提供一下具体代码吗？我看看你是怎么写的

AIStudio792042

#4 回复于2020-01

conv1 = BN("conv1", 256, 1, 1, 0)
conv2 = BN("conv2", 512, 3, 1, 1)
conv3 = BN("conv3", 256, 1, 1, 0)
conv4 = BN("conv4", 512, 3, 1, 1)
应该这么做，你定义网络的时候，每一层都需要定义。然后命名参数namescope可以一样，不影响结果计算。比如都叫conv
conv1 = BN(self.full_name(), 256, 1, 1, 0)
conv2 = BN(self.full_name(), 512, 3, 1, 1)
conv3 = BN(self.full_name(), 256, 1, 1, 0)
conv4 = BN(self.full_name(), 512, 3, 1, 1)
这个明显跑不起来，因为self是哪里定义的？
不过昨天好像是所有的名字都是 self.full_name()，就像这个可以提供一下具体代码吗？我看看你是怎么写的

原始的删了，现在我复现一下，大概是这样的

class BN(fluid.dygraph.Layer):
def init(self, name_scope,
num_filters,
filter_size,
stride,
padding):
super(BN, self).init(name_scope)

    self._conv2d = Conv2D(
        self.full_name(),
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        padding=padding,
        param_attr=ParamAttr(
            initializer=fluid.initializer.Normal(0, 0.02)),            
        bias_attr=False,
        act=None)
    
    self._bn = BatchNorm(
        self.full_name(),
        num_channels = num_filters,
        act=None,
        param_attr=ParamAttr(
            initializer=fluid.initializer.Normal(0., 0.02),
            regularizer=L2Decay(0.)),
        bias_attr=ParamAttr(
            initializer=fluid.initializer.Constant(0.0),
            regularizer=L2Decay(0.)))

def forward(self, inputs):
    # print("forward from dbl_block,inputs:", inputs)
    x = self._conv2d(inputs)
    x = self._bn(x)
    return x

class BlockTest(fluid.dygraph.Layer):
def init(self, name_scope):
super(BlockTest, self).init(name_scope)

    self._conv1 = BN(self.full_name(), 256, 1, 1, 0)
    self._conv2 = BN(self.full_name(), 512, 3, 1, 1)
    self._conv3 = BN(self.full_name(), 256, 1, 1, 0)
    self._conv4 = BN(self.full_name(), 512, 3, 1, 1)
def forward(self, inputs):
    # Way No1: 4 different function, It will work
    # conv1 = self._conv1(inputs)
    # conv2 = self._conv2(conv1)
    # conv3 = self._conv3(conv2)
    # conv4 = self._conv4(conv3)
    # return conv4

    # Way No1: 2 different function,It will not work
    conv = inputs
    for _ in range(2):
        conv = self._conv1(conv) # will cause error in 2nd Iter
        conv = self._conv2(conv)
    return conv

place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
x = fluid.layers.ones(shape=[1, 768, 9, 9], dtype='float32')
block_test = BlockTest("block_test")
x4 = block_test(x)
print("Step 4,x shape is:......", x4.shape)

上面的源代码在这里：
No.3.zip

现在发现，确实如您所述，问题在于每一层都必须定义，而我的错误在于没有理解所谓层，而是单纯认为是一个普通函数那样，所以出错了。
其实我是参考yolov3 下面例程（第109行）来学习的：
https://github.com/PaddlePaddle/models/blob/958b08cd82fa30d0baf0ee760378fe5008ea154f/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/yolo_head.py

不过邯郸学步，没get到重点，没有命名，所以程序出错了，无关动态与否。
关于层的概念，是不是指每一个OP和数据？既然可以重名，那么namespace有何意义？我一直对层的概念和namescope不太理解，您可以多提点几句吗？谢谢！

AIStudio784460

#5 回复于2020-01

关于网络层可以这样理解。您定义的BN是一个类，是一类操作，这个操作可以应用在某一层网络上。但是BN实例化出来的对象，是一个具体的层，有具体参数，这个层不仅有前向也有反向。所以这个对象代表的是一个具体的层，并不是代表BN这个功能。所以不能作为一个通用计算方式使用多次。

namescope作用是对层进行唯一命名，但是即使不加，框架也会对层进行命名，主要作用就是为了方便调试而已。区别在于比如GAN模型，这样的模型可能会一次训练多个网络，里面相同类型的网络比如FC，框架会给命名成FC1,FC2...FCX等等，不好区分这个FC到底是哪个网络下的FC，所以加入了namescope，实际上就是在系统命名钱面加个前缀，类似于namescope+"/"+layer's name。不过这个功能目前在最新版本develop的paddle中已经移除了，所以不用太在意这个概念了。

AIStudio792042

#6 回复于2020-01

明白了，谢谢！