- 标题:在利用deeplabv3p模型进行图像分割时,报cudaSetDevice failed in paddle::platform::SetDeviceId: initialization error at [/paddle/paddle/fluid/platform/gpu_info.cc:210]
- 版本、环境信息:
1)PaddlePaddle版本:release/1.5
2)GPU:K40 四卡、CUDA8.0、CUDNN-7.1.3
3)系统环境:python-2.7.11
- 问题描述:请详细描述您的问题,同步贴出报错信息、日志/代码关键片段
device id is 3
#Device count: 4
2019-11-22,13:39:25.106 ERROR Segment image batch failed : Invoke operator fill_constant error.
Python Call stacks:
File "/opt/python/cp27-cp27mu/lib/python2.7/site-packages/paddle/fluid/framework.py", line 1845, in _prepend_op
attrs=kwargs.get("attrs", None))
File "/opt/python/cp27-cp27mu/lib/python2.7/site-packages/paddle/fluid/initializer.py", line 189, in __call__
stop_gradient=True)
File "/opt/python/cp27-cp27mu/lib/python2.7/site-packages/paddle/fluid/framework.py", line 1730, in create_parameter
initializer(param, self)
File "/opt/python/cp27-cp27mu/lib/python2.7/site-packages/paddle/fluid/layer_helper_base.py", line 316, in create_parameter
**attr._to_kwargs(with_initializer=True))
File "/opt/python/cp27-cp27mu/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 128, in append_bias_op
attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True)
File "/opt/python/cp27-cp27mu/lib/python2.7/site-packages/paddle/fluid/layers/nn.py", line 2184, in conv2d
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
File "/mnt/pdseg_process/models/libs/model_libs.py", line 125, in conv
return fluid.layers.conv2d(*args, **kargs)
File "/mnt/pdseg_process/models/modeling/deeplab.py", line 272, in deeplabv3p
param_attr=param_attr)
File "/mnt/pdseg_process/models/model_builder.py", line 84, in build_model
logits = model_func(image, class_num)
File "pdseg_parallel.py", line 148, in segment
utils.config.cfg.DATASET.NUM_CLASSES)
File "/opt/python/cp27-cp27mu/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/opt/python/cp27-cp27mu/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/opt/python/cp27-cp27mu/lib/python2.7/multiprocessing/forking.py", line 126, in __init__
code = process_obj._bootstrap()
File "/opt/python/cp27-cp27mu/lib/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "pdseg_parallel.py", line 228, in main
segment_process.start()
File "pdseg_parallel.py", line 251, in <module>
main()
C++ Call stacks:
cudaSetDevice failed in paddle::platform::SetDeviceId: initialization error at [/paddle/paddle/fluid/platform/gpu_info.cc:210]
PaddlePaddle Call Stacks:
0 0x7f6f5c6b1cb0p void paddle::platform::EnforceNotMet::Init<char const*>(char const*, char const*, int) + 352
1 0x7f6f5c6b2029p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 137
2 0x7f6f5e78053cp paddle::platform::SetDeviceId(int) + 428
3 0x7f6f5e69f58dp paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 269
4 0x7f6f5c83ccbep paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool) + 398
5 0x7f6f5c83f9bfp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocator<std::string> > const&, bool) + 143
6 0x7f6f5c6a323dp
7 0x7f6f5c6e4476p
8 0x7f6fa4324ce8p PyEval_EvalFrameEx + 28264
9 0x7f6fa432737dp PyEval_EvalCodeEx + 2061
10 0x7f6fa4324d70p PyEval_EvalFrameEx + 28400
11 0x7f6fa432737dp PyEval_EvalCodeEx + 2061
12 0x7f6fa4324d70p PyEval_EvalFrameEx + 28400
13 0x7f6fa432737dp PyEval_EvalCodeEx + 2061
14 0x7f6fa429e905p
15 0x7f6fa426cd33p PyObject_Call + 67
16 0x7f6fa43220a2p PyEval_EvalFrameEx + 16930
17 0x7f6fa4324e9ep PyEval_EvalFrameEx + 28702
18 0x7f6fa4324e9ep PyEval_EvalFrameEx + 28702
19 0x7f6fa432737dp PyEval_EvalCodeEx + 2061
20 0x7f6fa429e830p
21 0x7f6fa426cd33p PyObject_Call + 67
22 0x7f6fa427b74dp
23 0x7f6fa426cd33p PyObject_Call + 67
24 0x7f6fa42d836fp
25 0x7f6fa42d6effp
26 0x7f6fa426cd33p PyObject_Call + 67
27 0x7f6fa43218e4p PyEval_EvalFrameEx + 14948
28 0x7f6fa4324e9ep PyEval_EvalFrameEx + 28702
29 0x7f6fa432737dp PyEval_EvalCodeEx + 2061
30 0x7f6fa4324d70p PyEval_EvalFrameEx + 28400
31 0x7f6fa432737dp PyEval_EvalCodeEx + 2061
32 0x7f6fa43274b2p PyEval_EvalCode + 50
33 0x7f6fa43511c2p PyRun_FileExFlags + 146
34 0x7f6fa4352559p PyRun_SimpleFileExFlags + 217
35 0x7f6fa43681ddp Py_Main + 3149
36 0x7f6fa35fbd1dp __libc_start_main + 253
37 0x4006b1p
预测代码:
# build model
startup_prog = fluid.Program()
main_prog = fluid.Program()
pred, logit = models.model_builder.build_model(main_prog,
startup_prog,
INPUT_IMAGE_SHAPE,
utils.config.cfg.DATASET.NUM_CLASSES)
# Get device environment
places = fluid.cuda_places() if USE_GPU else fluid.cpu_places()
assert DEVICE_ID < len(places)
place = places[DEVICE_ID]
dev_count = len(places)
print("#Device count: {}".format(dev_count))
exe = fluid.Executor(place)
exe.run(startup_prog)
main_prog = main_prog.clone(for_test=True)
fluid.io.load_inference_model(dirname=MODEL_DIR,
executor=exe,
model_filename="__model__",
params_filename="__params__")
logging.info(" # Load model success.")
fetch_list = [pred.name, logit.name]
timer = utils.timer.Timer()
file_index = 0
while file_index < len(all_img_files):
if not batched_data_queue.empty():
batched_data = batched_data_queue.get()
data = batched_data.data
desc_list = batched_data.provide_data
timer.start()
_, logit, = exe.run(program=main_prog,
feed={"image": data},
fetch_list=fetch_list,
return_numpy=True)
logging.info(" ### Segment batched image cost {} seconds.".format(timer.elapsed_time()))
file_index += BATCH_SIZE
segment_data = utils.data_batch.DataBatch(logit, desc_list)
segment_data_queue.put(segment_data)
exe.close() # close executor
1)PaddlePaddle版本:release/1.5
2)GPU:K40 四卡、CUDA8.0、CUDNN-7.1.3
3)系统环境:python-2.7.11
预测代码: