optimizer minimize loss时报错，无具体错误信息

AIStudio792052 发布于2020-01

版本、环境信息：
1）PaddlePaddle版本：1.6.2
2）python：3.6.3

训练信息
1）单机单卡

total_loss = self.alpha * losses[0] + self.beta * losses[1] + self.gamma * losses[2]
logging.info("Shape of total_loss: {}".format(total_loss.shape))
inference_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=self.learning_rate)
optimizer.minimize(total_loss)

在minimize total loss时报错

I0106 22:17:22.031168 64214 op_desc.cc:685] CompileTime infer shape on sum
I0106 22:17:22.031216 64214 op_desc.cc:685] CompileTime infer shape on expand_grad
I0106 22:17:22.031261 64214 op_desc.cc:685] CompileTime infer shape on unsqueeze2_grad
I0106 22:17:22.031307 64214 op_desc.cc:685] CompileTime infer shape on gather_nd_grad
I0106 22:17:22.031350 64214 op_desc.cc:685] CompileTime infer shape on cast
Traceback (most recent call last):
  File "models/train.py", line 158, in <module>
    train()
  File "models/train.py", line 56, in train
    accuracies, inference_program = gnn_model.train_net()
  File "/root/paddle/GNN-Table-Parsing/models/gnn_model.py", line 349, in train_net
    optimizer.minimize(total_loss)
  File "<decorator-gen-36>", line 2, in minimize
  File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/wrapped_decorator.py", line 25, in __impl__
    return wrapped_func(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/dygraph/base.py", line 78, in __impl__
    return func(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/optimizer.py", line 678, in minimize
    no_grad_set=no_grad_set)
  File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/optimizer.py", line 551, in backward
    no_grad_set, callbacks)
  File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/backward.py", line 1085, in append_backward
    _append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
  File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/backward.py", line 891, in _append_backward_vars_
    op_desc.infer_shape(block.desc)
paddle.fluid.core_avx.EnforceNotMet:

--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0   std::string paddle::platform::GetTraceBackString<char const*>(char const*&&, char const*, int)
1   paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int)
2   paddle::operators::CastOpInferShape::operator()(paddle::framework::InferShapeContext*) const
3   std::_Function_handler<void (paddle::framework::InferShapeContext*), paddle::framework::details::OpInfoFiller<paddle::operators::CastOpInferShape, (paddle::framework::details::OpInfoFillType)4>::operator()(char const*, paddle::framework::OpInfo*) const::{lambda(paddle::framework::InferShapeContext*)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::InferShapeContext*)
4   paddle::framework::OpDesc::InferShape(paddle::framework::BlockDesc const&) const

----------------------
Error Message Summary:
----------------------
Error: The input of cast op must be set at (/paddle/paddle/fluid/operators/cast_op.cc:44)

全部评论(4)

AIStudio792089

#2 回复于2020-01

可否贴下完整代码或者gather_nd附近的代码呢，看上去是gather_nd附近报错

AIStudio792052

#3 回复于2020-01

    def build_adjacency_edge_features(self, image, vertex_features, graph_node_features):
        """
        Build adjacency edge features, feature is gathered from node features and relative distance
        :param graph_node_features: Node features (Shape of [batch, max_vertices, node_feature_size])
        :return: Adjacency edge features (Shape of [batch, max_vertices, max_vertices, edge_feature_size])
        """
        x = fluid.layers.expand(
                fluid.layers.unsqueeze(fluid.layers.range(0, self.max_vertices, 1, dtype="int32"), axes=[0, 1]),
                expand_times=[self.batch_size, self.max_vertices, 1])  # [batch, max_vertices, max_vertices]
        y = fluid.layers.unsqueeze(fluid.layers.range(0, self.batch_size, 1, dtype="int32"), axes=[1])
        y = fluid.layers.expand(y, expand_times=[1, self.max_vertices])  # [batch, max_vertices]
        batch_range = fluid.layers.expand(fluid.layers.unsqueeze(y, axes=[-1]),
                                          expand_times=[1, 1, self.max_vertices])  # [batch, max_vertices, max_vertices]
        indexing_tensor = fluid.layers.concat(
            [fluid.layers.unsqueeze(batch_range, axes=[-1]), fluid.layers.unsqueeze(x, axes=[-1])],
            axis=-1)  # [batch, max_vertices, max_vertices, 2]
        x_axis_graph_node_features = fluid.layers.gather_nd(graph_node_features,
                                                            indexing_tensor)  # [batch, max_vertices, max_vertices, features]
        expand_graph_node_features = fluid.layers.unsqueeze(graph_node_features, axes=[2])  # [batch, max_vertices, 1, features]
        y_axis_graph_node_features = fluid.layers.expand(expand_graph_node_features,
                                                         expand_times=[1, 1, self.max_vertices, 1])
        adjacency_edge_features = fluid.layers.concat([x_axis_graph_node_features, y_axis_graph_node_features], axis=-1)

        # 计算节点之间的相对位置
        _, image_height, image_width, _ = image.shape
        pos_x1 = vertex_features[:, :, self.dim_vertex_x_position]
        pos_y1 = vertex_features[:, :, self.dim_vertex_y_position]
        pos_x2 = vertex_features[:, :, self.dim_vertex_x2_position]
        pos_y2 = vertex_features[:, :, self.dim_vertex_y2_position]

        rel_x1 = fluid.layers.elementwise_sub(fluid.layers.expand(
                    fluid.layers.unsqueeze(pos_x1, axes=[-1]), (1, 1, self.max_vertices)),
                  fluid.layers.expand(
                      fluid.layers.unsqueeze(pos_x1, axes=[1]), (1, self.max_vertices, 1))) / float(image_width)
        rel_y1 = fluid.layers.elementwise_sub(fluid.layers.expand(
                    fluid.layers.unsqueeze(pos_y1, axes=[-1]), (1, 1, self.max_vertices)),
                  fluid.layers.expand(
                      fluid.layers.unsqueeze(pos_y1, axes=[1]), (1, self.max_vertices, 1))) / float(image_height)
        rel_x2 = fluid.layers.elementwise_sub(fluid.layers.expand(
                    fluid.layers.unsqueeze(pos_x2, axes=[-1]), (1, 1, self.max_vertices)),
                  fluid.layers.expand(
                      fluid.layers.unsqueeze(pos_x2, axes=[1]), (1, self.max_vertices, 1))) / float(image_width)
        rel_y2 = fluid.layers.elementwise_sub(fluid.layers.expand(
                    fluid.layers.unsqueeze(pos_y2, axes=[-1]), (1, 1, self.max_vertices)),
                  fluid.layers.expand(
                      fluid.layers.unsqueeze(pos_y2, axes=[1]), (1, self.max_vertices, 1))) / float(image_height)
        relative_pos = fluid.layers.concat([fluid.layers.unsqueeze(rel_x1, axes=[3]),
                                            fluid.layers.unsqueeze(rel_y1, axes=[3]),
                                            fluid.layers.unsqueeze(rel_x2, axes=[3]),
                                            fluid.layers.unsqueeze(rel_y2, axes=[3])], axis=-1)
        adjacency_edge_features = fluid.layers.concat([adjacency_edge_features, relative_pos], axis=-1)

        return adjacency_edge_features

    def gather_feature_from_conv_head(self, image_features, vertex_features, scale_y, scale_x):
        """
        Select vertex image feature from convolution feature map
        :param image_features:
        :param vertex_features:
        :param scale_y:
        :param scale_x:
        :return:
        """
        image_features = fluid.layers.transpose(image_features, perm=[0, 2, 3, 1])
        logging.info("Shape of transpose image_features: {}".format(image_features.shape))
        vertices_y = vertex_features[:, :, self.dim_vertex_y_position]
        vertices_x = vertex_features[:, :, self.dim_vertex_x_position]
        vertices_y2 = vertex_features[:, :, self.dim_vertex_y2_position]
        vertices_x2 = vertex_features[:, :, self.dim_vertex_x2_position]
        vertices_y = fluid.layers.cast(vertices_y, "float32") * scale_y
        vertices_x = fluid.layers.cast(vertices_x, "float32") * scale_x
        vertices_y2 = fluid.layers.cast(vertices_y2, "float32") * scale_y
        vertices_x2 = fluid.layers.cast(vertices_x2, "float32") * scale_x

        batch_size, max_vertices = vertices_y.shape
        batch_size, max_vertices = int(batch_size), int(max_vertices)
        batch_range = fluid.layers.unsqueeze(fluid.layers.range(0, batch_size, 1, dtype="float32"), axes=[1, 2])
        batch_range = fluid.layers.expand(batch_range, expand_times=[1, max_vertices, 1])

        indexing_tensor = fluid.layers.concat(
            [batch_range,
             fluid.layers.unsqueeze(fluid.layers.elementwise_add(vertices_y, vertices_y2) / 2, axes=[2]),
             fluid.layers.unsqueeze(fluid.layers.elementwise_add(vertices_x, vertices_x2) / 2, axes=[2])], axis=-1)
        indexing_tensor = fluid.layers.cast(indexing_tensor, "int64")
        return fluid.layers.gather_nd(image_features, indexing_tensor)

def edge_conv_layer(input_feature,
                    num_neighbors=30,
                    dense_layers=(64, 64, 64),
                    aggregation_function=fluid.layers.reduce_max,
                    edge_activation=None):
    # Shape of indexing: (batch_size, n_max_entries, k, 2)
    indexing, _ = get_indexing_tensor(input_feature, num_neighbors)
    # Shape of neighbour_space: (batch_size, n_max_entries, k, input_feature.shape[-1])
    neighbour_space = fluid.layers.gather_nd(input_feature, indexing)
    # Shape of expanded_input_feature: (batch_size, n_max_entries, 1, input_feature.shape[-1])
    expanded_input_feature = fluid.layers.unsqueeze(input_feature, axes=[2])
    expanded_input_feature = fluid.layers.expand(expanded_input_feature, [1, 1, num_neighbors, 1])
    diff = expanded_input_feature - neighbour_space
    edge = fluid.layers.concat([expanded_input_feature, diff], axis=-1)
    for units in dense_layers:
        edge = fluid.layers.fc(edge, units, num_flatten_dims=3, act="relu")
    if edge_activation is not None:
        edge = edge_activation(edge)
    output_feature = aggregation_function(edge, dim=2)
    return output_feature

共这三处用到了gather_nd函数

AIStudio792089

#4 回复于2020-01

可以试下在以下代码中的gather_nd前加上indexing_tensor.stop_gradient=True

        indexing_tensor = fluid.layers.concat(
            [batch_range,
             fluid.layers.unsqueeze(fluid.layers.elementwise_add(vertices_y, vertices_y2) / 2, axes=[2]),
             fluid.layers.unsqueeze(fluid.layers.elementwise_add(vertices_x, vertices_x2) / 2, axes=[2])], axis=-1)
        indexing_tensor = fluid.layers.cast(indexing_tensor, "int64")
        return fluid.layers.gather_nd(image_features, indexing_tensor)

大圣a黄

#5 回复于2020-09

今天也碰到这个问题了，请问楼主解决了吗？