百度飞桨图像分割7日打卡营第1次作业手撸代码
收藏
这段时间参加了飞桨深度学习学院开设的“图像分割7日打卡营”,课程链接如下:
https://aistudio.baidu.com/aistudio/course/introduce/1767
在学习过程中,看着朱老师的视频写手撸了代码,在写代码过程中,遇到了一些问题,本文将第1次作业的代码拿来,介绍一下,在写代码过程中遇到的问题。
第1次作业有如下的3个任务:
1) basic_model.py
2) basic_dataloader.py
3) basic_trainsform.py
几个文件都是以填空的方式来实现的,老师已经写了基础的代码,要求每个人根据课程内容和老师的视频把剩下的todo部分以填空的方式写完整,并把代码跑起来:
- basic_model.py 的代码如下:
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable #TODO 将np数据转换成paddle中的tensor
from paddle.fluid.dygraph import Conv2D #TODO
from paddle.fluid.dygraph import Pool2D #TODO
import numpy as np
np.set_printoptions(precision=2)
class BasicModel(fluid.dygraph.Layer):
# BasicModel contains:
# 1. pool: 4x4 max pool op, with stride 4
# 2. conv: 3x3 kernel size, takes RGB image as input and output num_classes channels,
# note that the feature map size should be the same
# 3. upsample: upsample to input size
#
# TODOs:
# 1. The model takes an random input tensor with shape (1, 3, 8, 8)
# 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
# 3. Print out the model output in numpy format
def __init__(self, num_classes=59):
super(BasicModel, self).__init__()
#TODO
self.pool = Pool2D(pool_size=2, pool_stride=2)
#TODO
self.conv = Conv2D(num_channels=3, num_filters=num_classes, filter_size=1)
def forward(self, inputs):
#TODO
x = self.pool(inputs)
x = fluid.layers.interpolate(x, out_shape=inputs.shape[2::]) # 本句与下一句相同,写法较为简单
# x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3]))
#TODO
x = self.conv(x)
return x
def main():
place = paddle.fluid.CPUPlace()
# place = paddle.fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
model = BasicModel(num_classes=59)
model.eval() # model.train(),这里使用eval()只是简单的过一下,火炬为torch
input_data = np.random.rand(1, 3, 8, 8).astype(np.float32) # TODO
print('Input data shape: ', input_data.shape)
input_data = to_variable(input_data) # TODO 将np数据转换成paddle的tensor
output_data = model(input_data) # TODO
# print(output_data)
output_data = output_data.numpy() # TODO 将数据转换成np
print('Output data shape: ', output_data.shape)
if __name__ == "__main__":
main()
2. basic_dataloader.py 的代码
import os
import random
import numpy as np
import cv2
import paddle.fluid as fluid
# 新增的类的定义
class Transform(object):
def __init__(self, size=256):
self.size = size
def __call__(self, input, label):
input = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR) # interpolation 很关键,有效防越界出错
label = cv2.resize(label, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
return input, label
class BasicDataLoader():
def __init__(self,
image_folder,
image_list_file,
transform=None,
shuffle=True):
self.image_folder = image_folder
self.image_list_file = image_list_file
self.transform = transform
self.shuffle = shuffle
self.data_list = self.read_list()
# 老师习惯使用这四个方法,在自己实现时,也可以使用其它的方法
def read_list(self):
data_list = []
with open(self.image_list_file) as infile:
for line in infile:
data_path = os.path.join(self.image_folder, line.split()[0])
label_path = os.path.join(self.image_folder, line.split()[1])
data_list.append((data_path, label_path)) # 转化成tupple
random.shuffle(data_list) # python 自带的shuffle
return data_list
def preprocess(self, data, label):
h, w, c = data.shape
h_gt, w_gt = label.shape
assert h==h_gt, "Error"
assert w==w_gt, "Error"
if self.transform:
data, label = self.transform(data, label)
label = label[:, :, np.newaxis]
return data, label
def __len__(self): # __方法为类中的方法, 这里python基础
return len(self.data_list)
def __call__(self): # a=A(), a() 会调用这个就去
for data_path, label_path in self.data_list:
data = cv2.imread(data_path, cv2.IMREAD_COLOR) # 带颜色读取
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB) # 通道转换
label = cv2.imread(data_path, cv2.IMREAD_GRAYSCALE) # 灰色图形
print(data.shape, label.shape)
data, label = self.preprocess(data, label)
yield data, label # 最关键的一行,将数据返回,比Return高级一些
def main():
batch_size = 5
place = fluid.CPUPlace() # 使用CPU运行
# place = fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
transform = Transform(256)
# TODO: create BasicDataloder instance
# image_folder="./dummy_data"
# image_list_file="./dummy_data/list.txt"
basic_dataloader = BasicDataLoader(
image_folder = './work/dummy_data',
image_list_file = "./work/dummy_data/list.txt",
transform=transform,
shuffle=True
)
# 下面的两段代码能自动将代码转换成(5, 256, 256, 1)
# TODO: craete fluid.io.DataLoader instance
dataloader = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False) # paddle的dataloader
# TODO: set sample generator for fluid dataloader
# 自己的dataloader、 # batch_size、# 工作地点
dataloader.set_sample_generator(basic_dataloader,
batch_size = batch_size,
places=place)
num_epoch = 2
for epoch in range(1, num_epoch+1):
print(f'Epoch [{epoch}/{num_epoch}]:')
for idx, (data, label) in enumerate(dataloader):
print(f'Iter {idx}, Data shape: {data.shape}, Label shape: {label.shape}')
if __name__ == "__main__":
main()
3. basic_transform.py 的代码
import cv2
import numpy as np
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label
class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
self.size = size
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size - h, 0)
pad_w = max(self.size - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label
# TODO,中心裁剪
class CenterCrop(object):
def __init__(self, crop_size):
self.crop_h = crop_size
self.crop_w = crop_size
def __call__(self, image, label=None):
h, w, c = image.shape
top = (h - self.crop_h) // 2
left = (w - self.crop_w) // 2
image = image[top:top+self.crop_h, left:left+self.crop_w, :]
if label is not None:
label = label[top:top+self.crop_h, left:left+self.crop_w]
return image, label
# TODO, 调整大小, 每个方法都是处理后,返回图像与标签
class Resize(object):
def __init__(self, size):
self.size = size
def __call__(self, image, label=None):
image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(label, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
return image, label
# TODO,随机翻转
class RandomFlip(object):
def __call__(self, image, label=None):
prob_of_flip = np.random.rand()
if prob_of_flip > 0.5:
image = cv2.flip(image, 1)
if label is not None:
label = cv2.flip(label, 1)
return image, label
# TODO,随机剪切,与CenterCrop的原理一样,
class RandomCrop(object):
def __init__(self, crop_size):
self.crop_size = crop_size
def __call__(self, image, label=None):
h, w, c = image.shape
top = np.random.uniform(h - self.crop_size)
left = np.random.uniform(w - self.crop_size)
assert top >= 0, "Error: crop_size > image height!"
assert left >= 0, "Error: crop_size > image width!"
rect = np.array([int(left),
int(top),
int(left + self.crop_size),
int(top + self.crop_size)])
image = image[rect[1]: rect[3], rect[0]:rect[2], :]
if label is not None:
label = label[rect[1]: rect[3], rect[0]:rect[2]]
return image, label
# TODO,缩放
class Scale(object):
def __call__(self, image, label=None, scale=1.0):
if not isinstance(scale, (list, tuple)):
scale = [scale, scale]
h, w, c = image.shape
image = cv2.resize(image,
(int(w*scale[0]), int(h*scale[1])),
interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(label,
(int(w*scale[0]), int(h*scale[1])),
interpolation=cv2.INTER_LINEAR)
return image, label
# TODO,随机缩放
class RandomScale(object):
def __init__(self, min_scale=0.5, max_scale=2.0, step=0.25):
self.min_scale = min_scale
self.max_scale = max_scale
self.step = step
self.scale = Scale()
def __call__(self, image, label=None):
if self.step == 0:
self.random_scale = np.random.uniform(self.min_scale, self.max_scale, 1)[0]
else:
num_steps = int((self.max_scale - self.min_scale) / self.step + 1)
scale_factors = np.linspace(self.min_scale, self.max_scale, num_steps)
np.random.shuffle(scale_factors)
self.random_scale = scale_factors[0]
image, label = self.scale(image, label, self.random_scale)
return image, label
def main():
image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')
# TODO: crop_size
crop_size = 256
# augment = Compose([
# RandomScale(),
# RandomFlip(),
# Pad(crop_size, mean_val=[0.485, 0.456, 0.406]),
# RandomCrop(crop_size),
# ConvertDataType(),
# Normalize(0, 1)])
# new_img, _ = augment(image)
# cv2.imwrite('tmp_new.png', new_img)
# TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
randScale = RandomScale()
randscale_img, _ = randScale(image)
cv2.imwrite('randScale.png', randscale_img)
scale = Scale()
scale_img, _ = scale(image, label, 0.5)
cv2.imwrite('scale_img.png', scale_img)
randFlip = RandomFlip()
randflip_img, _ = randFlip(image, label)
cv2.imwrite('randflip.png', randflip_img)
pad = Pad(crop_size, mean_val=[0.485, 0.456, 0.406])
pad_img, _ = pad(image, label)
cv2.imwrite('pad.png', pad_img)
randCrop = RandomCrop(crop_size)
randcrop_img, _ = randCrop(image, label)
cv2.imwrite('randcrop.png', randcrop_img)
for i in range(10):
# TODO: call transform
randcrop_img, _ = randCrop(image, label)
# TODO: save image
filename = 'randcrop_img'+str(i)+'.png'
cv2.imwrite(filename, randcrop_img)
if __name__ == "__main__":
main()
在 aistudio 的 notebook 中执行这几个程序,默认是不执行的,要带有路径或切换路径才可以。
带有路径的执行方法如下, 在 notebook 中添加一个 code 区域,在区域中添加如下:
!python work/basic_model.py
切换路径的运行方法如下:
%cd work
!python basic_model.py
!python basic_dataloader.py
!python basic_transform.py
程序做的还不完整,但是跟着老师写代码,学会了一些基本的操作和思路,后面还要继续学习与练习。
1
收藏
请登录后评论
手撸代码
越看越有味,要多看几遍,一遍搞不定。。。
手写真的有收获,终于知道某些报错为什么会发生了……
写代码比看代码爽多了
公开项目啊,大佬~~
蹲一个