coding=UTF-8
import paddle.v2 as paddle
import pandas as pd
import numpy as np
from paddle.trainer_config_helpers.activations import SigmoidActivation
'''
体重预测项目:
描述:根据某个人100多天的两个特征值,摄入量(小数),消耗量(小数),所对应的目标值体重变化(小数),来预测用户未来某一天的体重.
初步思路:
1.数据集操作(已完成)
1.1 通过pandas读取数据集
1.2 均值来填充特征值和目标值(摄入量in,消耗量out,体重weight)
1.3 去掉除上述外其他无关特征(id,age,gender)
2.创建paddlepaddle框架中的reader(不确定是否正确)
2.1 已完成整体数据集train_reader的逻辑书写,问题:是否要区分train_reader和test_reader处理?还是直接整体数据集,再通过buf_size区分训练集和测试集的数量?
2.2 batch size 为 1
3.创建网络结构(一层输入层,一层fc,一层lstm层,一层输出层)
3.1 输入层 type=paddle.data_type.pydp2.dense_vector(4) 参数为多少?是特征值还是神经元个数?输入层有 1 个input,隐藏层有 4 个神经元
3.2 fc层 size = 4
3.3 lstm层搭建,激活函数用 sigmoid,
3.4 输出层 输出层就是预测一个值
4.保存网络结构(照搬)
5.构造SGD trainer(构造使用随机梯度下降的trainer)
6.构建feeding(看了官方文档不是很理解)
6.1官方文档描述如下
feeding用来指定train_reader和test_reader返回的数据与模型配置中data_layer的对应关系。这里表示reader返回的第0列数据对应word层,第1列数据对应label层
7.进行训练,轮数100(已完成)
8.预测infer
'''
#paddlepaddle固定初始化,不适用GPU,线程数为1
paddle.init(use_gpu=False, trainer_count=1)
#1.数据集操作
df1 = pd.read_csv('2_34.csv',parse_dates=["data"],index_col=3,)
df1_in_mean = df1["in"].mean()
df1["in"].fillna(df1_in_mean,inplace=True)
df1_out_mean = df1["out"].mean()
df1["out"].fillna(df1_out_mean,inplace=True)
# 求体重均值并填充(不要求精度)
df1_weight_mean = df1["weight"].mean()
df1["weight"].fillna(df1_weight_mean,inplace=True)
df1.drop('id', axis=1, inplace=True)#axis=1,删除列;inplace=True,直接在原DataFrame上执行删除
df1.drop('age', axis=1, inplace=True)#axis=1,删除列;inplace=True,直接在原DataFrame上执行删除
df1.drop('gender', axis=1, inplace=True)#axis=1,删除列;inplace=True,直接在原DataFrame上执行删除
#定义reader
def train_reader():
train_list_x = []
train_list_y = []
train_len = len(df1["in"])
for i in range(train_len):
list1=[]
list1 = [df1["in"][i],df1["out"][i]]
list2 = [df1["weight"][i]]
train_list_x.append(list1)
train_list_y.append(list2)
train_x = np.array(train_list_x)
train_y = np.array(train_list_y)
# train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]])
# train_y = np.array([[-2], [-3], [-7], [-7]])
def reader():
for i in xrange(train_y.shape[0]):
yield train_x[i], train_y[i]
return reader
#2.创建reader(不确定正确与否)
reader_w = train_reader()
shuffle_reader = paddle.reader.shuffle(reader_w,buf_size=128)
batch_reader = paddle.batch(shuffle_reader,batch_size=1)
#3.搭建网络结构
#输入层 paddle.data_type.pydp2.dense_vector(4)?应该填几?4?还是几个特征值就填几?
x = paddle.layer.data(name='x',type=paddle.data_type.pydp2.dense_vector(4))
#lstm层
y_predict = paddle.layer.v1_layers.lstmemory(input=x,act=SigmoidActivation())
#输出层
y = paddle.layer.data(name='y', type=paddle.data_type.pydp2.dense_vector(1))
#损失函数
cost = paddle.layer.v1_layers.square_error_cost(input=y_predict, label=y)
#mse_cost 是mse均方差y_predict 是预测值,label = y_label 是真实值
#4.保存网络结构
inference_topology = paddle.topology.Topology(layers=y_predict)
with open("weight_infer.pkl", 'wb') as f:
inference_topology.serialize_for_inference(f)
#5.构造SGD trainer(构造使用随机梯度下降的trainer)
parameters = paddle.parameters.create(cost)
optimizer =paddle.optimizer.Adam() #AdaGrad函数代替momentum进行优化,学习率可以通过参数更改
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
#6.feeding构建
feeding={'x': 0, 'y': 1}
#7.进行训练
trainer.train(
reader = batch_reader,
feeding=feeding,
#event_handler=event_handler_plot,
num_passes=100)
没有结果:
I0306 11:24:33.605950 9099 Util.cpp:166] commandline: --use_gpu=False --trainer_count=1
I0306 11:24:33.628300 9099 GradientMachine.cpp:94] Initing parameters..
I0306 11:24:33.628324 9099 GradientMachine.cpp:101] Init parameters done.
F0306 11:24:33.699769 9099 LstmLayer.cpp:155] Check failed: input.sequenceStartPositions
*** Check failure stack trace: ***
@ 0x7f03535639cd google::LogMessage::Fail()
@ 0x7f035356747c google::LogMessage::SendToLog()
@ 0x7f03535634f3 google::LogMessage::Flush()
@ 0x7f035356898e google::LogMessageFatal::~LogMessageFatal()
@ 0x7f0353282452 paddle::LstmLayer::forward()
@ 0x7f0353318efd paddle::NeuralNetwork::forward()
@ 0x7f0353319c13 paddle::GradientMachine::forwardBackward()
@ 0x7f035353fae4 GradientMachine::forwardBackward()
@ 0x7f03531bda69 _wrap_GradientMachine_forwardBackward
@ 0x4cb755 PyEval_EvalFrameEx
@ 0x4c2705 PyEval_EvalCodeEx
@ 0x4ca7df PyEval_EvalFrameEx
@ 0x4c2705 PyEval_EvalCodeEx
@ 0x4ca088 PyEval_EvalFrameEx
@ 0x4c2705 PyEval_EvalCodeEx
@ 0x4ca088 PyEval_EvalFrameEx
@ 0x4c2705 PyEval_EvalCodeEx
@ 0x4c24a9 PyEval_EvalCode
@ 0x4f19ef (unknown)
@ 0x4ec372 PyRun_FileExFlags
@ 0x4eaaf1 PyRun_SimpleFileExFlags
@ 0x49e208 Py_Main
@ 0x7f03779ad830 __libc_start_main
@ 0x49da59 _start
@ (nil) (unknown)
Process finished with exit code 134 (interrupted by signal 6: SIGABRT)
我需要一些指导,没有结果什么原因,谢谢了。
你的问题好像在GitHub已经得到解决了,能否在这里回答一下。介绍是如何解决问题的呢