最近贴吧大佬们在谈论padding里的一些细节东西,刚好自己也挺感兴趣,就总结了一下,发出来希望对大家有用。
# 动态图中的尺寸确定问题
这是一个伴随paddle很久的问题。我在上一次发的帖子避坑指南( https://ai.baidu.com/forum/topic/show/960793 )中也提到过。Linear层的输入强制性地需要用户输入input_dim。然而input_dim会随着自己输入的数据格式,卷积核大小,以及卷积步长等因素而变化,因此需要手动输入的话,会非常地不方便。解决这个问题其实不难,就是引入一个计算输出尺寸的函数,在每一层的后边都计算输出,然后迭代到Linear后,自动计算input_dim。以下是我解决的思路:
from paddle.fluid.layers import utils
def conv_output_length(input_length, filter_size, padding, stride, dilation=1):
"""Determines output length of a convolution given input length.
Arguments:
input_length: integer.
filter_size: integer.
padding: integer
stride: integer.
dilation: dilation rate, integer.
Returns:
The output length (integer).
"""
if input_length is None:
return None
return (input_length + 2 * padding - (dilation * (filter_size - 1) + 1)) // stride + 1
def compute_output_shape(input_shape, num_filters, filter_size, stride, padding, dilation=1, rank=2):
filter_size = utils.convert_to_list(filter_size, rank, name='filter_size')
padding = utils.convert_to_list(padding, rank, name='padding')
stride = utils.convert_to_list(stride, rank, name='stride')
dilation = utils.convert_to_list(dilation, rank, name='dilation')
input_shape = list(input_shape)
space = input_shape[1:]
new_space = []
for i in range(len(space)):
new_dim = conv_output_length(
space[i],
filter_size[i],
padding=padding[i],
stride=stride[i],
dilation=dilation[i])
new_space.append(new_dim)
return [num_filters] + new_space
有了以上的函数,我们可以在定义模型的时候,进行如下操作,保留每一层的输出尺寸,直到Linear为止:
from paddle.fluid.dygraph import Conv2D, Pool2D, Layer, Linear
import numpy as np
class TestModel(Layer):
def __init__(self,
input_shape,
output_dim,
conv_filter_size=(4, 3, 2),
conv_stride=(3, 2, 1),
conv_num_filters=(40, 20, 10,),
conv_padding=(0, 1, 2)):
super(TestModel, self).__init__()
##############################################示例部分#############################################
self.conv_1 = Conv2D(input_shape[0], conv_num_filters[0], conv_filter_size[0], conv_stride[0], conv_padding[0])
output_shape_1 = compute_output_shape(input_shape, conv_num_filters[0], conv_filter_size[0], conv_stride[0], conv_padding[0], rank=2)
self.conv_2 = Conv2D(output_shape_1[0], conv_num_filters[1], conv_filter_size[1], conv_stride[1], conv_padding[1])
output_shape_2 = compute_output_shape(output_shape_1, conv_num_filters[1], conv_filter_size[1], conv_stride[1], conv_padding[1], rank=2)
self.pool_1 = Pool2D(pool_size=2, pool_stride=2, pool_padding=2)
output_shape_pool_1 = compute_output_shape(output_shape_2, conv_num_filters[1], 2, 2, 2, rank=2)
self.conv_3 = Conv2D(output_shape_pool_1[0], conv_num_filters[2], conv_filter_size[2], conv_stride[2], conv_padding[2])
output_shape_3 = compute_output_shape(output_shape_pool_1, conv_num_filters[2], conv_filter_size[2], conv_stride[2], conv_padding[2], rank=2)
self.pool_2 = Pool2D(pool_size=2, pool_stride=2, pool_padding=4)
output_shape_pool_2 = compute_output_shape(output_shape_3, conv_num_filters[2], 2, 2, 4, rank=2)
self.linear = Linear(input_dim=np.prod(output_shape_pool_2), output_dim=output_dim, act='softmax')
###################################################################################################
def forward(self, inputs):
x = self.conv_1(inputs)
x = self.conv_2(x)
x = self.pool_1(x)
x = self.conv_3(x)
x = self.pool_2(x)
x = fluid.layers.flatten(x)
out = self.linear(x)
return out
详细情况可以参照我发的一个项目:https://aistudio.baidu.com/aistudio/projectdetail/1024182
# padding类型的问题
在使用padding这个选项时,会让人费一些头脑,因为padding的细节需要一定的基础知识才能够搞懂,所以padding直接给出数值会劝退一部分入门的萌新。实际上,padding的经常使用的方式也无非两种:valid和same。valid不需要进行任何操作之外,但是如果想进行same方式的填充,就需要费一些功夫了,而一些深一些的网络,是需要保证输出的特征图保持一定尺寸的。因此,个人感觉padding参数也应该适应符号标记,以方便编程。
我做了一些尝试,通过引入一些函数,改写paddle的类,来实现padding接收字符标记而非数值。第一步和上一部分相同,就是计算输出尺寸,同时根据padding的类型确定padding长度:
"""
to define a function for computing the output shape of a Layer
"""
from paddle.fluid.layers import utils
from math import ceil
def conv_output_length(input_length, filter_size, padding, stride, dilation=1):
"""
Determines output length of a convolution given input length.
Arguments:
input_length: integer.
filter_size: integer.
padding: one of "same", "valid", "full"
stride: integer.
dilation: dilation rate, integer.
Returns:
The output length (integer).
"""
if input_length is None:
return None
assert padding in {'same', 'valid', 'full'}
dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
if padding in ['same']:
output_length = input_length
elif padding == 'valid':
output_length = input_length - dilated_filter_size + 1
elif padding == 'full':
output_length = input_length + dilated_filter_size - 1
return (output_length + stride - 1) // stride
def conv_pad_length(input_length, output_length, filter_size, padding, stride, dilation=1):
"""
Determines input length of a convolution given output length.
Arguments:
input_length: integer
output_length: integer.
filter_size: integer.
padding: one of "same", "valid", "full".
stride: integer.
dilation: integer
Returns:
The input length (integer).
"""
if output_length is None:
return None
assert padding in {'same', 'valid', 'full'}
dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
if padding == 'same':
pad_needed_length = (output_length - 1) * stride + dilated_filter_size - input_length
pad = max(0, ceil(pad_needed_length / 2))
elif padding == 'valid':
pad = 0
elif padding == 'full':
pad = dilated_filter_size - 1
return pad
def compute_output_shape(input_shape, num_filters, filter_size, stride, padding, dilation=1, rank=2):
filter_size = utils.convert_to_list(filter_size, rank, name='filter_size')
stride = utils.convert_to_list(stride, rank, name='stride')
dilation = utils.convert_to_list(dilation, rank, name='dilation')
input_shape = list(input_shape)
space = input_shape[1:]
new_space = []
for i in range(len(space)):
new_dim = conv_output_length(
space[i],
filter_size[i],
padding=padding,
stride=stride[i],
dilation=dilation[i])
new_space.append(new_dim)
return [num_filters] + new_space
def compute_pad_length(input_shape, output_shape, filter_size, stride, padding, dilation=1, rank=2):
filter_size = utils.convert_to_list(filter_size, rank, name='filter_size')
stride = utils.convert_to_list(stride, rank, name='stride')
dilation = utils.convert_to_list(dilation, rank, name='dilation')
output_shape = list(output_shape)
input_space = input_shape[1:]
output_space = output_shape[1:]
pad_space = []
for i in range(len(output_space)):
new_dim = conv_pad_length(
input_space[i],
output_space[i],
filter_size[i],
padding=padding,
stride=stride[i],
dilation=dilation[i])
pad_space.append(new_dim)
return pad_space
然后重写ConvXD类,将padding接收的参数由数值改为类型符号,这里用Conv2D示意:
from paddle.fluid.dygraph import Conv2D
def pad_decorator(output_shape, rank=2):
def inner_decorator(func):
def wrapper(*args, **kwargs):
out = func(*args, **kwargs)
if rank == 2:
out = out[:, :, :output_shape[1], :output_shape[2]]
elif rank == 3:
out = out[:, :, :output_shape[1], :output_shape[2], :output_shape[3]]
else:
raise ValueError("rank only support 2 and 3 but get %d" % rank)
return out
return wrapper
return inner_decorator
class MyConv2D(Conv2D):
"""
The Conv2D Layer has padding with a label indicating how to complete a zero padding instead of the padding value
"""
def __init__(self, num_channels,
num_filters,
filter_size,
stride,
padding='valid',
dilation=1,
input_shape=None,
**kwargs):
self.output_shape = compute_output_shape(input_shape, num_filters, filter_size, stride, padding, dilation, rank=2)
self.pad_length = compute_pad_length(input_shape, self.output_shape, filter_size, stride, padding, dilation, rank=2)
super(MyConv2D, self).__init__(num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=tuple(self.pad_length),
dilation=dilation,
**kwargs)
if padding == 'same':
self.forward = pad_decorator(self.output_shape, rank=2)(self.forward)
细节请参照我的一个项目:https://aistudio.baidu.com/aistudio/projectdetail/1077932
个人水平有限,欢迎大佬们纠错补充!欢迎积极讨论!
点赞!不知道2.0之后会不会有什么新变化。
PS:论坛居然不支持代码高亮……
的确,代码看着跟mark一样。。。
啊,这。。。这都是细节啊
先用插件吧,说不定下次就更了
大佬说干就干,感动!感动!
以后我就能偷懒了~~
2.0beta 里没有padding=same,看正式版吧
大佬甩链接~!
大佬的要求,我必须完成!
向我这种从学深度学习就是搞图像重建的人对padding那简直就是娘胎的记忆 碰到3的核无脑填1哈哈哈哈
可以总结个技术贴不?
我试试吧 本菜狗努力一下
我点赞的手已经。。。(屏蔽词)!!
谦虚了!期待ing