使用【飞桨】实现【手写数字识别】心得
cousnecs 发布于2020-08 浏览:2939 回复:3
0
收藏

使用【飞桨】动态图实现【手写数字识别】可分5步走:

1.获取数据,2.定义模型,3.训练模型,4.测试模型,5.使用模型

当然依赖少不了

#引入依赖
import os
import random
import paddle
import paddle.fluid as fluid
import numpy as np

1.获取数据

使用mnist数据集仅需要两行代码

    # 使用Paddle自带的数据读取器
    train_loader = paddle.batch(fluid.io.shuffle(paddle.dataset.mnist.train(),buf_size=1000), batch_size=10)
    valid_loader = paddle.batch(paddle.dataset.mnist.test(), batch_size=10)

2.定义模型

  模型组成:卷积层>池化层>卷积层>池化层>全连接层,输入数据为:(1,28,28),各层输出数据为:(20,28,28)(20,14,14)(20,14,14)(20,7,7)(1,10)

#定义模型
from paddle.fluid.dygraph.nn import Conv2D, Pool2D,Linear
class MNIST(fluid.dygraph.Layer):
    def __init__(self, num_classes=1):
        super(MNIST,self).__init__()        
        # 定义一个卷积层,输出通道20,卷积核大小为5,步长为1,padding为2,使用relu激活函数
        self.conv1 = Conv2D(num_channels=1, num_filters=20, filter_size=5, stride=1, padding=2, act='relu')
        # 定义一个池化层,池化核为2,步长为2,使用最大池化方式
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        # 定义一个卷积层,输出通道20,卷积核大小为5,步长为1,padding为2,使用relu激活函数
        self.conv2 = Conv2D(num_channels=20, num_filters=20, filter_size=5, stride=1, padding=2, act='relu')
        # 定义一个池化层,池化核为2,步长为2,使用最大池化方式
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        # 定义一个全连接层,输出节点数为10 
        self.fc = Linear(input_dim=980, output_dim=num_classes, act='softmax')
    # 定义网络的前向计算过程
    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc(x)
        return x

3.训练模型

使用系统优化算法fluid.optimizer.Momentum

# 定义训练过程
def train(model):
    print('start training ... ')
    model.train()
    epoch_num = 5

    opt = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters())
    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_loader()):
            # 调整输入数据形状和类型
            x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28)
            y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1)
            # 将numpy.ndarray转化成Tensor
            img = fluid.dygraph.to_variable(x_data)
            label = fluid.dygraph.to_variable(y_data)
            # 计算模型输出
            logits = model(img)
            # 计算损失函数
            loss = fluid.layers.softmax_with_cross_entropy(logits, label)
            avg_loss = fluid.layers.mean(loss)
            if batch_id % 1000 == 0:
                print("epoch: {}, batch_id: {}, loss is: {}".format(epoch, batch_id, avg_loss.numpy()))
            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()

        model.eval()
        accuracies = []
        losses = []
        for batch_id, data in enumerate(valid_loader()):
            # 调整输入数据形状和类型
            x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28)
            y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1)
            # 将numpy.ndarray转化成Tensor
            img = fluid.dygraph.to_variable(x_data)
            label = fluid.dygraph.to_variable(y_data)
            # 计算模型输出
            logits = model(img)
            pred = fluid.layers.softmax(logits)
            # 计算损失函数
            loss = fluid.layers.softmax_with_cross_entropy(logits, label)
            acc = fluid.layers.accuracy(pred, label)
            accuracies.append(acc.numpy())
            losses.append(loss.numpy())
        print("[validation] accuracy/loss: {}/{}".format(np.mean(accuracies), np.mean(losses)))
        model.train()

    # 保存模型参数
    fluid.save_dygraph(model.state_dict(), 'mnist')


if __name__ == '__main__':
    # 创建模型
    with fluid.dygraph.guard():
        model = MNIST(num_classes=10)
        #启动训练过程
        train(model)

4.测试模型

#测试模型
with fluid.dygraph.guard():
    model_dict, _ = fluid.load_dygraph('mnist')
    model = MNIST(num_classes=10) 
    model.load_dict(model_dict) #加载模型参数
    test_loader =paddle.batch(fluid.io.shuffle(paddle.dataset.mnist.test(),buf_size=1000), batch_size=100)
    model.eval()
    accuracies = []
    losses = []
    for batch_id, data in enumerate(test_loader()):
        # 调整输入数据形状和类型
        x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28)
        y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1)
        # 将numpy.ndarray转化成Tensor
        img = fluid.dygraph.to_variable(x_data)
        label = fluid.dygraph.to_variable(y_data)
        # 计算模型输出
        logits = model(img)
        pred = fluid.layers.softmax(logits)
        # 计算损失函数
        loss = fluid.layers.softmax_with_cross_entropy(logits, label)
        acc = fluid.layers.accuracy(pred, label)
        accuracies.append(acc.numpy())
        losses.append(loss.numpy())

       
    print("[validation] accuracy/loss: {}/{}".format(np.mean(accuracies), np.mean(losses)))

5.使用模型

def loadimg(img_path):
    img = paddle.dataset.image.load_image(file=img_path, is_color=False)
    img = img.astype('float32')
    img = img[np.newaxis, ] / 255.0
    img = img * 2.0 - 1.0
    return img
with fluid.dygraph.guard():
    model_dict, _ = fluid.load_dygraph('mnist')
    model = MNIST(num_classes=10) 
    model.load_dict(model_dict) #加载模型参数
    model.eval()#评估模式
    infer_imgs = []
    images=np.array(loadimg("9.jpg").reshape(1,28,28),np.float32)
    infer_imgs.append(images)
    infer_imgs = np.array(infer_imgs)
    infer_imgs = fluid.dygraph.to_variable(infer_imgs)
    
    result=model(infer_imgs)
    print("预测值:%d"%(np.argmax(result.numpy())))

注意图片大小为28*28的手写数字!

 

收藏
点赞
0
个赞
共3条回复 最后由用户已被禁言回复于2022-04
#4cousnecs回复于2020-09
#3 188******76回复
修改网络结构以后的效果如何?

变数大,效果不好说。

0
#3188******76回复于2020-09

修改网络结构以后的效果如何?

0
#2188******76回复于2020-09

赞!

0
TOP
切换版块