import pandas as pd
import numpy as np
import paddle
import paddle.fluid as fluid
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
paddle.enable_static()
import warnings
warnings.filterwarnings("ignore")
# 读取excel数据
file = r"F:\桌面\数据.xlsx"
sheetName = 'Sheet2'
data = pd.read_excel(file, sheet_name=sheetName)
# 将第一列作为输出值,第二、三列作为输入值
X = data.iloc[:, 1:3].values
y = data.iloc[:, 0:1].values
# 数据标准化处理
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)
# 将数据集拆分为训练集(70%)和测试集(30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# 构建LSTM模型
def lstm_model(input_dim, output_dim, hidden_size, learning_rate=0.001):
# 定义输入和标签
x = fluid.data(name='x', shape=[None, input_dim], dtype='float32')
y = fluid.data(name='y', shape=[None, output_dim], dtype='float32')
# 定义隐藏层
hidden = fluid.layers.fc(input=x, size=hidden_size, act='tanh')
# 打印hidden张量的维度
print("hidden.dim: ", hidden.shape)
# 定义第一个LSTM层
lstm1, _ = fluid.layers.dynamic_lstm(input=hidden, size=hidden_size)
# 定义dropout
dropout1 = fluid.layers.dropout(lstm1, dropout_prob=0.5)
# 定义第二个LSTM层
lstm2, _ = fluid.layers.dynamic_lstm(input=dropout1, size=hidden_size)
# 定义dropout
dropout2 = fluid.layers.dropout(lstm2, dropout_prob=0.5)
# 定义输出层
prediction = fluid.layers.fc(input=dropout2, size=output_dim, act=None)
# 定义损失函数
cost = fluid.layers.mean_squared_error(prediction, y)
avg_cost = fluid.layers.mean(cost)
# 定义优化器
optimizer = fluid.optimizer.AdamOptimizer(learning_rate)
optimizer.minimize(avg_cost)
return x, y, prediction, avg_cost
# 定义输入数据维度、输出数据维度、隐藏层维度和学习率
input_dim = 2
output_dim = 1
hidden_size = 2
learning_rate = 0.001
# 创建训练器
x, y, prediction, loss = lstm_model(input_dim, output_dim, hidden_size)
optimizer = fluid.optimizer.Adam(learning_rate)
optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# 开始训练模型
epoch_num = 200
batch_size = 8
train_data_len = X_train.shape[0]
steps = int(train_data_len / batch_size)
for epoch in range(epoch_num):
for step in range(steps):
batch_x = X_train[step*batch_size:(step+1)*batch_size, :]
batch_y = y_train[step*batch_size:(step+1)*batch_size].reshape(-1, 1)
loss_val, = exe.run(fluid.default_main_program(), feed={x: batch_x, y: batch_y}, fetch_list=[loss])
if step % 10 == 0:
print('Epoch {}, Step {}, Loss: {:.6f}'.format(epoch, step, loss_val[0]))
# 测试模型
test_data_len = X_test.shape[0]
test_x = X_test.reshape((test_data_len, 1, input_dim)).astype('float32')
test_y = y_test.reshape((test_data_len, 1, output_dim)).astype('float32')
test_pred, = exe.run(fluid.default_main_program(), feed={x: test_x, y: test_y}, fetch_list=[prediction])
test_pred = test_pred.reshape(-1)
# 反归一化处理
test_y = scaler.inverse_transform(test_y.reshape(-1, 1)).reshape(-1)
test_pred = scaler.inverse_transform(test_pred.reshape(-1, 1)).reshape(-1)
# 计算RMSE,MAE和MAPE
rmse = np.sqrt(mean_squared_error(test_y, test_pred))
mae = mean_absolute_error(test_y, test_pred)
mape = np.mean(np.abs((test_y - test_pred) / test_y)) * 100
# 输出测试结果和指标值
result_df = pd.DataFrame({'y_test': test_y, 'y_pred': test_pred})
print(result_df)
print('RMSE:', rmse)
print('MAE:', mae)
print('MAPE:', mape)
出现以下错误
AssertionError: Expected every dim's size to be larger than 0, but the size of the 0-th dim is 0
hidden.dim: (-1, 2)