- 版本、环境信息:
1)PaddlePaddle版本:1.6.1
2)GPU:预测若用GPU,CUDA 9.0,cudnn 7.6
3)系统环境:window 10 python3.6
- 训练信息
1)单机,单卡
- 部分复现信息:(train_X, train_Y就是我自己的数据集,其维度的我有在下面打印出来)
batch_size=60 #迭代次数
max_len=10 #input_dim不能超过该值
dropout_prob=0.2
seq_len=20 #时间序列长度
hidden_size=6 #隐藏层单元
num_layers=1 #叠加LSTM的层数
learning_rate=0.002 #学习率
data1= fluid.data(name='data1', shape=[-1,20,6], dtype='float64')
label = fluid.data(name='label', shape=[-1,1], dtype='float64')
print(data1)
init_h = fluid.layers.fill_constant( [num_layers, batch_size, hidden_size], 'float64', 0.0 )
init_c = fluid.layers.fill_constant( [num_layers, batch_size, hidden_size], 'float64', 0.0 )
rnn_out, last_h, last_c = fluid.layers.lstm(data1, init_h, init_c, max_len, hidden_size, num_layers, dropout_prob=dropout_prob)
print(last_h)
last_result=last_h[0,:,:]
print(last_result)
out = fluid.layers.fc(input=last_result, size=1, act=None)
获取损失函数
cost = fluid.layers.square_error_cost(input=out, label=label)
avg_cost = fluid.layers.mean(cost)
获取预测程序
test_program = fluid.default_main_program().clone(for_test=True)
定义优化方法
optimizer = fluid.optimizer.AdagradOptimizer(learning_rate)
opt = optimizer.minimize(avg_cost)
创建一个执行器,CPU训练速度比较慢
place = fluid.CPUPlace()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
进行参数初始化
exe.run(fluid.default_startup_program())
def reader_createor(data, label):
def reader():
for i in range(len(data)):
yield data[i,:], label[i]
return reader
train_reader = fluid.io.batch(fluid.io.shuffle(reader_createor(train_X, train_Y), 3000), batch_size=60,drop_last=True)
#加载测试数据
#train_reader = fluid.io.batch(fluid.io.shuffle(train_X, 3000), batch_size=60,drop_last=True)
feeder = fluid.DataFeeder(place=place, feed_list=[data1, label])
开始训练
for pass_id in range(1000):
# 进行训练
train_cost = 0
for batch_id, data in enumerate(train_reader()):
train_cost = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
if batch_id % 20 == 0:
print('Pass:%d, Batch:%d, Cost:%0.5f' % (pass_id, batch_id, train_cost[0]))
- 问题描述:请详细描述您的问题,同步贴出报错信息、日志、可复现的代码片段
前面是一点输出,后面是报错信息(我也试过换类型,换成了float32类型,但是就报PaddleCheckError: CUDNN_STATUS_BAD_PARAM at [D:\1.6.1\paddle\paddle\fluid\operators\cudnn_lstm_op.cu.cc:113]错误!!)
train_x1的维度: (2980, 20)
train_Y的维度: (2980, 1)
train_XT的维度: (6, 2980, 20)
train_X的维度: (2980, 20, 6)
name: "data1"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP64
dims: -1
dims: 20
dims: 6
}
lod_level: 0
}
}
persistable: false
need_check_feed: true
name: "cudnn_lstm_0.tmp_1"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP64
dims: 1
dims: 60
dims: 6
}
}
}
persistable: false
name: "cudnn_lstm_0.tmp_1_slice_0"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP64
dims: 60
dims: 6
}
}
}
C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py:774: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Traceback (most recent call last):
File "D:\eclipse-workspace\pd_pd\pd_lstm\pd_lstm3.py", line 181, in
fetch_list=[avg_cost])
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 775, in run
six.reraise(*sys.exc_info())
File "E:\Anaconda3\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 770, in run
use_program_cache=use_program_cache)
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 817, in _run_impl
use_program_cache=use_program_cache)
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 894, in _run_program
fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet:
C++ Call Stacks (More useful to developers):
Windows not support stack backtrace yet.
Python Call Stacks (More useful to users):
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\framework.py", line 2459, in append_op
attrs=kwargs.get("attrs", None))
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\layers\nn.py", line 1018, in lstm
'seed': seed,
File "D:\eclipse-workspace\pd_pd\pd_lstm\pd_lstm3.py", line 129, in
rnn_out, last_h, last_c = fluid.layers.lstm(data1, init_h, init_c, max_len, hidden_size, num_layers, dropout_prob=dropout_prob)
Error Message Summary:
PaddleCheckError: op cudnn_lstm does not have kernel for data_type[double]:data_layout[ANY_LAYOUT]:place[CUDAPlace(0)]:library_type[PLAIN] at [D:\1.6.1\paddle\paddle\fluid\framework\operator.cc:1007]
[operator < cudnn_lstm > error]
W1112 09:50:24.788756 14520 device_context.cc:235] Please NOTE: device: 0, CUDA Capability: 61, Driver API Version: 9.1, Runtime API Version: 9.0
W1112 09:50:24.795739 14520 device_context.cc:243] device: 0, cuDNN Version: 7.6.
1)PaddlePaddle版本:1.6.1
2)GPU:预测若用GPU,CUDA 9.0,cudnn 7.6
3)系统环境:window 10 python3.6
1)单机,单卡
batch_size=60 #迭代次数
max_len=10 #input_dim不能超过该值
dropout_prob=0.2
seq_len=20 #时间序列长度
hidden_size=6 #隐藏层单元
num_layers=1 #叠加LSTM的层数
learning_rate=0.002 #学习率
data1= fluid.data(name='data1', shape=[-1,20,6], dtype='float64')
label = fluid.data(name='label', shape=[-1,1], dtype='float64')
print(data1)
init_h = fluid.layers.fill_constant( [num_layers, batch_size, hidden_size], 'float64', 0.0 )
init_c = fluid.layers.fill_constant( [num_layers, batch_size, hidden_size], 'float64', 0.0 )
rnn_out, last_h, last_c = fluid.layers.lstm(data1, init_h, init_c, max_len, hidden_size, num_layers, dropout_prob=dropout_prob)
print(last_h)
last_result=last_h[0,:,:]
print(last_result)
out = fluid.layers.fc(input=last_result, size=1, act=None)
获取损失函数
cost = fluid.layers.square_error_cost(input=out, label=label)
avg_cost = fluid.layers.mean(cost)
获取预测程序
test_program = fluid.default_main_program().clone(for_test=True)
定义优化方法
optimizer = fluid.optimizer.AdagradOptimizer(learning_rate)
opt = optimizer.minimize(avg_cost)
创建一个执行器,CPU训练速度比较慢
place = fluid.CPUPlace()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
进行参数初始化
exe.run(fluid.default_startup_program())
def reader_createor(data, label):
def reader():
for i in range(len(data)):
yield data[i,:], label[i]
return reader
train_reader = fluid.io.batch(fluid.io.shuffle(reader_createor(train_X, train_Y), 3000), batch_size=60,drop_last=True)
#加载测试数据
#train_reader = fluid.io.batch(fluid.io.shuffle(train_X, 3000), batch_size=60,drop_last=True)
feeder = fluid.DataFeeder(place=place, feed_list=[data1, label])
开始训练
for pass_id in range(1000):
# 进行训练
train_cost = 0
for batch_id, data in enumerate(train_reader()):
train_cost = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
前面是一点输出,后面是报错信息(我也试过换类型,换成了float32类型,但是就报PaddleCheckError: CUDNN_STATUS_BAD_PARAM at [D:\1.6.1\paddle\paddle\fluid\operators\cudnn_lstm_op.cu.cc:113]错误!!)
train_x1的维度: (2980, 20)
train_Y的维度: (2980, 1)
train_XT的维度: (6, 2980, 20)
train_X的维度: (2980, 20, 6)
name: "data1"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP64
dims: -1
dims: 20
dims: 6
}
lod_level: 0
}
}
persistable: false
need_check_feed: true
name: "cudnn_lstm_0.tmp_1"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP64
dims: 1
dims: 60
dims: 6
}
}
}
persistable: false
name: "cudnn_lstm_0.tmp_1_slice_0"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP64
dims: 60
dims: 6
}
}
}
C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py:774: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Traceback (most recent call last):
File "D:\eclipse-workspace\pd_pd\pd_lstm\pd_lstm3.py", line 181, in
fetch_list=[avg_cost])
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 775, in run
six.reraise(*sys.exc_info())
File "E:\Anaconda3\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 770, in run
use_program_cache=use_program_cache)
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 817, in _run_impl
use_program_cache=use_program_cache)
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\executor.py", line 894, in _run_program
fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet:
C++ Call Stacks (More useful to developers):
Windows not support stack backtrace yet.
Python Call Stacks (More useful to users):
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\framework.py", line 2459, in append_op
attrs=kwargs.get("attrs", None))
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "C:\Users\17720\AppData\Roaming\Python\Python36\site-packages\paddle\fluid\layers\nn.py", line 1018, in lstm
'seed': seed,
File "D:\eclipse-workspace\pd_pd\pd_lstm\pd_lstm3.py", line 129, in
rnn_out, last_h, last_c = fluid.layers.lstm(data1, init_h, init_c, max_len, hidden_size, num_layers, dropout_prob=dropout_prob)
Error Message Summary:
PaddleCheckError: op cudnn_lstm does not have kernel for data_type[double]:data_layout[ANY_LAYOUT]:place[CUDAPlace(0)]:library_type[PLAIN] at [D:\1.6.1\paddle\paddle\fluid\framework\operator.cc:1007]
[operator < cudnn_lstm > error]
W1112 09:50:24.788756 14520 device_context.cc:235] Please NOTE: device: 0, CUDA Capability: 61, Driver API Version: 9.1, Runtime API Version: 9.0
W1112 09:50:24.795739 14520 device_context.cc:243] device: 0, cuDNN Version: 7.6.