猫狗分类如何提高准确率?
收藏
各位大大好。
小弟想用官方关于眼部识别的教程改成猫狗分类的代码。用了ALEXNET/VGG/GOOGLENET三个模型,但效果都不怎么好。准确率也只有60多,不到70. 我想请教一下,我应该怎么去提高我的准确率呢?或者是不是我的代码上有什么不对呢?烦请指教,谢谢。
import paddle
import numpy as np
from paddle.nn import Conv2D, MaxPool2D, Linear, Dropout, AdaptiveAvgPool2D
import cv2
import os
import paddle.nn.functional as F
import random
class Inception(paddle.nn.Layer):
def __init__(self, c0, c1, c2, c3, c4, **kwargs):
'''
Inception模块的实现代码,
c1,图(b)中第一条支路1x1卷积的输出通道数,数据类型是整数
c2,图(b)中第二条支路卷积的输出通道数,数据类型是tuple或list,
其中c2[0]是1x1卷积的输出通道数,c2[1]是3x3
c3,图(b)中第三条支路卷积的输出通道数,数据类型是tuple或list,
其中c3[0]是1x1卷积的输出通道数,c3[1]是3x3
c4,图(b)中第一条支路1x1卷积的输出通道数,数据类型是整数
'''
super(Inception, self).__init__()
# 依次创建Inception块每条支路上使用到的操作
self.p1_1 = Conv2D(in_channels=c0, out_channels=c1,
kernel_size=1, stride=1)
self.p2_1 = Conv2D(
in_channels=c0, out_channels=c2[0], kernel_size=1, stride=1)
self.p2_2 = Conv2D(
in_channels=c2[0], out_channels=c2[1], kernel_size=3, padding=1, stride=1)
self.p3_1 = Conv2D(
in_channels=c0, out_channels=c3[0], kernel_size=1, stride=1)
self.p3_2 = Conv2D(
in_channels=c3[0], out_channels=c3[1], kernel_size=5, padding=2, stride=1)
self.p4_1 = MaxPool2D(kernel_size=3, stride=1, padding=1)
self.p4_2 = Conv2D(in_channels=c0, out_channels=c4,
kernel_size=1, stride=1)
# # 新加一层batchnorm稳定收敛
# self.batchnorm = paddle.nn.BatchNorm2D(c1+c2[1]+c3[1]+c4)
def forward(self, x):
# 支路1只包含一个1x1卷积
p1 = F.relu(self.p1_1(x))
# 支路2包含 1x1卷积 + 3x3卷积
p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
# 支路3包含 1x1卷积 + 5x5卷积
p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
# 支路4包含 最大池化和1x1卷积
p4 = F.relu(self.p4_2(self.p4_1(x)))
# 将每个支路的输出特征图拼接在一起作为最终的输出结果
return paddle.concat([p1, p2, p3, p4], axis=1)
# return self.batchnorm()
class GoogLeNet(paddle.nn.Layer):
def __init__(self):
super(GoogLeNet, self).__init__()
# GoogLeNet包含五个模块,每个模块后面紧跟一个池化层
# 第一个模块包含1个卷积层
self.conv1 = Conv2D(in_channels=3, out_channels=64,
kernel_size=7, padding=3, stride=1)
# 3x3最大池化
self.pool1 = MaxPool2D(kernel_size=3, stride=2, padding=1)
# 第二个模块包含2个卷积层
self.conv2_1 = Conv2D(
in_channels=64, out_channels=64, kernel_size=1, stride=1)
self.conv2_2 = Conv2D(in_channels=64, out_channels=192,
kernel_size=3, padding=1, stride=1)
# 3x3最大池化
self.pool2 = MaxPool2D(kernel_size=3, stride=2, padding=1)
# 第三个模块包含2个Inception块
self.block3_1 = Inception(192, 64, (96, 128), (16, 32), 32)
self.block3_2 = Inception(256, 128, (128, 192), (32, 96), 64)
# 3x3最大池化
self.pool3 = MaxPool2D(kernel_size=3, stride=2, padding=1)
# 第四个模块包含5个Inception块
self.block4_1 = Inception(480, 192, (96, 208), (16, 48), 64)
self.block4_2 = Inception(512, 160, (112, 224), (24, 64), 64)
self.block4_3 = Inception(512, 128, (128, 256), (24, 64), 64)
self.block4_4 = Inception(512, 112, (144, 288), (32, 64), 64)
self.block4_5 = Inception(528, 256, (160, 320), (32, 128), 128)
# 3x3最大池化
self.pool4 = MaxPool2D(kernel_size=3, stride=2, padding=1)
# 第五个模块包含2个Inception块
self.block5_1 = Inception(832, 256, (160, 320), (32, 128), 128)
self.block5_2 = Inception(832, 384, (192, 384), (48, 128), 128)
# 全局池化,用的是global_pooling,不需要设置pool_stride
self.pool5 = AdaptiveAvgPool2D(output_size=1)
self.fc = Linear(in_features=1024, out_features=2)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2_2(F.relu(self.conv2_1(x)))))
x = self.pool3(self.block3_2(self.block3_1(x)))
x = self.block4_3(self.block4_2(self.block4_1(x)))
x = self.pool4(self.block4_5(self.block4_4(x)))
x = self.pool5(self.block5_2(self.block5_1(x)))
x = paddle.reshape(x, [x.shape[0], -1])
x = self.fc(x)
return x
class VGGNet(paddle.nn.Layer):
def __init__(self, num_classes=1):
super(VGGNet, self).__init__()
self.conv1_1 = Conv2D(in_channels=3, out_channels=64,
kernel_size=3, stride=1, padding=1)
self.conv1_2 = Conv2D(in_channels=64, out_channels=64,
kernel_size=3, stride=1, padding=1)
self.max_pool1 = MaxPool2D(kernel_size=2, stride=2)
self.conv2_1 = Conv2D(in_channels=64, out_channels=128,
kernel_size=3, stride=1, padding=1)
self.conv2_2 = Conv2D(in_channels=128, out_channels=128,
kernel_size=3, stride=1, padding=1)
self.conv2_3 = Conv2D(in_channels=128, out_channels=128,
kernel_size=3, stride=1, padding=1)
self.max_pool2 = MaxPool2D(kernel_size=2, stride=2)
self.conv3_1 = Conv2D(in_channels=128, out_channels=256,
kernel_size=3, stride=1, padding=1)
self.conv3_2 = Conv2D(in_channels=256, out_channels=256,
kernel_size=3, stride=1, padding=1)
self.conv3_3 = Conv2D(in_channels=256, out_channels=256,
kernel_size=3, stride=1, padding=1)
self.max_pool3 = MaxPool2D(kernel_size=2, stride=2)
self.conv4_1 = Conv2D(in_channels=256, out_channels=512,
kernel_size=3, stride=1, padding=1)
self.conv4_2 = Conv2D(in_channels=512, out_channels=512,
kernel_size=3, stride=1, padding=1)
self.conv4_3 = Conv2D(in_channels=512, out_channels=512,
kernel_size=3, stride=1, padding=1)
self.max_pool4 = MaxPool2D(kernel_size=2, stride=2)
self.conv5_1 = Conv2D(in_channels=512, out_channels=512,
kernel_size=3, stride=1, padding=1)
self.conv5_2 = Conv2D(in_channels=512, out_channels=512,
kernel_size=3, stride=1, padding=1)
self.conv5_3 = Conv2D(in_channels=512, out_channels=512,
kernel_size=3, stride=1, padding=1)
# 使用Sequential 将全连接层和relu组成一个线性结构(fc + relu)
# 当输入为224x224时,经过五个卷积块和池化层后,特征维度变为[512x7x7]
self.fc1 = paddle.nn.Sequential(
paddle.nn.Linear(512 * 7 * 7, 4096), paddle.nn.ReLU())
self.drop1_ratio = 0.5
self.dropout1 = paddle.nn.Dropout(
self.drop1_ratio, mode='upscale_in_train')
# 使用Sequential 将全连接层和relu组成一个线性结构(fc + relu)
self.fc2 = paddle.nn.Sequential(
paddle.nn.Linear(4096, 4096), paddle.nn.ReLU())
self.drop2_ratio = 0.5
self.dropout2 = paddle.nn.Dropout(
self.drop2_ratio, mode='upscale_in_train')
self.fc3 = paddle.nn.Linear(4096, 2)
self.relu = paddle.nn.ReLU()
self.pool = MaxPool2D(stride=2, kernel_size=2)
def forward(self, x, label=None):
x = self.relu(self.conv1_1(x))
x = self.relu(self.conv1_2(x))
x = self.pool(x)
x = self.relu(self.conv2_1(x))
x = self.relu(self.conv2_2(x))
x = self.pool(x)
x = self.relu(self.conv3_1(x))
x = self.relu(self.conv3_2(x))
x = self.relu(self.conv3_3(x))
x = self.pool(x)
x = self.relu(self.conv4_1(x))
x = self.relu(self.conv4_2(x))
x = self.relu(self.conv4_3(x))
x = self.pool(x)
x = self.relu(self.conv5_1(x))
x = self.relu(self.conv5_2(x))
x = self.relu(self.conv5_3(x))
x = self.pool(x)
x = paddle.flatten(x, 1, -1)
x = self.dropout1(self.relu(self.fc1(x)))
x = self.dropout2(self.relu(self.fc2(x)))
x = self.fc3(x)
if label is not None:
acc = paddle.metric.accuracy(input=x, label=label)
# print("hello")
# print(acc)
return x, acc
else:
return x
class AlexNet(paddle.nn.Layer):
def __init__(self, num_classes=1):
super(AlexNet, self).__init__()
self.num_classes = num_classes
self.conv1 = Conv2D(in_channels=3, out_channels=96, kernel_size=11,
stride=4, padding=5)
self.max_pool1 = MaxPool2D(kernel_size=2, stride=2)
self.conv2 = Conv2D(in_channels=96, out_channels=256,
kernel_size=5, stride=1, padding=2)
self.max_pool2 = MaxPool2D(kernel_size=2, stride=2)
self.conv3 = Conv2D(in_channels=256, out_channels=384,
kernel_size=3, stride=1, padding=1)
self.conv4 = Conv2D(in_channels=384, out_channels=384,
kernel_size=3, stride=1, padding=1)
self.conv5 = Conv2D(in_channels=384, out_channels=256,
kernel_size=3, stride=1, padding=1)
self.max_pool3 = MaxPool2D(kernel_size=2, stride=2)
self.fc1 = Linear(in_features=12544, out_features=4096)
self.drop_ratio1 = 0.5
self.drop1 = Dropout(self.drop_ratio1)
self.fc2 = Linear(in_features=4096, out_features=4096)
self.drop_ratio2 = 0.5
self.drop2 = Dropout(self.drop_ratio2)
self.fc3 = Linear(in_features=4096, out_features=num_classes)
#self.sigmoid = paddle.nn.Sigmoid()
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.max_pool1(x)
x = self.conv2(x)
x = F.relu(x)
x = self.max_pool2(x)
x = self.conv3(x)
x = F.relu(x)
x = self.conv4(x)
x = F.relu(x)
x = self.conv5(x)
x = F.relu(x)
x = self.max_pool3(x)
x = paddle.reshape(x, [x.shape[0], -1])
x = self.fc1(x)
x = F.relu(x)
x = self.drop1(x)
x = self.fc2(x)
x = F.relu(x)
x = self.drop2(x)
x = self.fc3(x)
#x = self.sigmoid(x)
return x
def transform_img(img):
img = cv2.resize(img, (224, 224))
img = np.transpose(img, (2, 0, 1))
img = img.astype('float32')
img = img / 255
img = img * 2.0 - 1.0
return img
def train_data_loader(bacth_size=50):
train_file_path = "./Data/train/"
imgs_file_list = []
label_list = []
for dirpath, dirnames, filenames in os.walk(train_file_path):
for filename in filenames:
if filename.find("jpg") >= 0:
path = os.path.join(dirpath, filename)
imgs_file_list.append(path)
random.shuffle(imgs_file_list)
# print(imgs_file_list)
batch_imgs = []
batch_labels = []
for one_file in imgs_file_list:
img = cv2.imread(one_file)
img = transform_img(img)
if one_file.find("cat") >= 0:
label_list.append([0])
elif one_file.find("dog") >= 0:
label_list.append([1])
else:
raise("invalid file path")
batch_imgs.append(img)
# batch_labels.append(label_list)
if len(batch_imgs) == bacth_size:
imgs_array = np.array(batch_imgs).astype('float32')
# print(label_list)
#print("%d in total %d file",)
labels_array = np.array(label_list).astype(
'float32').reshape(-1, 1)
yield imgs_array, labels_array
batch_imgs = []
label_list = []
if len(batch_imgs) > 0:
imgs_array = np.array(batch_imgs).astype('float32')
labels_array = np.array(label_list).astype(
'int').reshape(-1, 1)
yield imgs_array, labels_array
def valid_data_loader(bacth_size=10):
train_file_path = "./Data/validation/"
imgs_file_list = []
label_list = []
for dirpath, dirnames, filenames in os.walk(train_file_path):
for filename in filenames:
if filename.find("jpg") >= 0:
path = os.path.join(dirpath, filename)
imgs_file_list.append(path)
random.shuffle(imgs_file_list)
print(imgs_file_list)
batch_imgs = []
batch_labels = []
for one_file in imgs_file_list:
# print(one_file)
img = cv2.imread(one_file)
# print(img.shape)
img = transform_img(img)
if one_file.find("cat") >= 0:
label_list.append(0)
elif one_file.find("dog") >= 0:
label_list.append(1)
else:
raise("invalid file path")
batch_imgs.append(img)
# batch_labels.append(label_list)
if len(batch_imgs) == bacth_size:
imgs_array = np.array(batch_imgs).astype('float32')
# print(label_list)
#print("%d in total %d file",)
labels_array = np.array(label_list).astype(
'float32').reshape(10, -1)
yield imgs_array, labels_array
batch_imgs = []
label_list = []
if len(batch_imgs) > 0:
imgs_array = np.array(batch_imgs).astype('float32')
labels_array = np.array(label_list).astype(
'float32').reshape(-1, 1)
yield imgs_array, labels_array
train_data_loader()
my_alex_net = AlexNet()
my_google_net = GoogLeNet()
my_VGG_net = VGGNet()
EPOCH_NUM = 5
def train(model, optimizer):
use_gpu = True
paddle.device.set_device(
'gpu:0') if use_gpu else paddle.device.set_device('cpu')
print("start training ...")
for epoch in range(EPOCH_NUM):
for batch_id, data in enumerate(train_data_loader()):
x_data, y_data = data
# print(y_data)
img = paddle.to_tensor(x_data)
# print(img.shape)
label = paddle.to_tensor(y_data)
estimate_label = model(img)
# print(estimate_label)
# print(label)
loss = F.binary_cross_entropy_with_logits(estimate_label, label)
avg_loss = paddle.mean(loss)
if batch_id % 20 == 0:
print("epoch: {}, batch_id: {}, loss is: {:.4f}".format(
epoch, batch_id, float(avg_loss.numpy())))
# 反向传播,更新权重,清除梯度
avg_loss.backward()
optimizer.step()
optimizer.clear_grad()
model.eval()
accuracies = []
losses = []
for batch_id, data in enumerate(valid_data_loader()):
x_data, y_data = data
img = paddle.to_tensor(x_data)
# print(x_data.shape)
# print(y_data.shape)
label = paddle.to_tensor(y_data)
estimate_label = model(img)
pred = F.sigmoid(estimate_label)
# print(pred)
loss = F.binary_cross_entropy_with_logits(estimate_label, label)
pred2 = pred * (-1.0) + 1.0
pred = paddle.concat([pred2, pred], axis=1)
acc = paddle.metric.accuracy(
pred, paddle.cast(label, dtype='int'))
accuracies.append(acc.numpy())
losses.append(loss.numpy())
# print(len(accuracies))
# print(len(losses))
print(
"[validation] accuracy/loss: {:.4f}/{:.4f}".format(np.mean(accuracies), np.mean(losses)))
model.train()
opt = paddle.optimizer.Momentum(
learning_rate=0.001, momentum=0.9, parameters=my_alex_net.parameters(), weight_decay=0.001)
# 启动训练过程
train(my_alex_net, opt)
0
收藏
请登录后评论
试试resnet
试了,更差了。
[validation] accuracy/loss: 0.5643/0.6964
是不是我的代码的accuracies计算有什么问题?
大佬
m
训练的epoch数量不够吧,你代码里写的EPOCH_NUM是5,同时可能epoch和优化器学习率之类的也不匹配
好的,谢谢!我提高 看看!
提高EPNUM是不行的, 我查看 了图片,发现图片并不是规则的,所以有可能是因为图片大小不一,我强行转换成3x222x224,会造成失真。
准备看看用填充的方法来试试。
m