百度飞桨图像分割7日打卡营第1次作业手撸代码
收藏
这段时间参加了飞桨深度学习学院开设的“图像分割7日打卡营”,课程链接如下:
https://aistudio.baidu.com/aistudio/course/introduce/1767
在学习过程中,看着朱老师的视频写手撸了代码,在写代码过程中,遇到了一些问题,本文将第1次作业的代码拿来,介绍一下,在写代码过程中遇到的问题。
第1次作业有如下的3个任务:
1) basic_model.py
2) basic_dataloader.py
3) basic_trainsform.py
几个文件都是以填空的方式来实现的,老师已经写了基础的代码,要求每个人根据课程内容和老师的视频把剩下的todo部分以填空的方式写完整,并把代码跑起来:
- basic_model.py 的代码如下:
import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import to_variable #TODO 将np数据转换成paddle中的tensor from paddle.fluid.dygraph import Conv2D #TODO from paddle.fluid.dygraph import Pool2D #TODO import numpy as np np.set_printoptions(precision=2) class BasicModel(fluid.dygraph.Layer): # BasicModel contains: # 1. pool: 4x4 max pool op, with stride 4 # 2. conv: 3x3 kernel size, takes RGB image as input and output num_classes channels, # note that the feature map size should be the same # 3. upsample: upsample to input size # # TODOs: # 1. The model takes an random input tensor with shape (1, 3, 8, 8) # 2. The model outputs a tensor with same HxW size of the input, but C = num_classes # 3. Print out the model output in numpy format def __init__(self, num_classes=59): super(BasicModel, self).__init__() #TODO self.pool = Pool2D(pool_size=2, pool_stride=2) #TODO self.conv = Conv2D(num_channels=3, num_filters=num_classes, filter_size=1) def forward(self, inputs): #TODO x = self.pool(inputs) x = fluid.layers.interpolate(x, out_shape=inputs.shape[2::]) # 本句与下一句相同,写法较为简单 # x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3])) #TODO x = self.conv(x) return x def main(): place = paddle.fluid.CPUPlace() # place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(place): model = BasicModel(num_classes=59) model.eval() # model.train(),这里使用eval()只是简单的过一下,火炬为torch input_data = np.random.rand(1, 3, 8, 8).astype(np.float32) # TODO print('Input data shape: ', input_data.shape) input_data = to_variable(input_data) # TODO 将np数据转换成paddle的tensor output_data = model(input_data) # TODO # print(output_data) output_data = output_data.numpy() # TODO 将数据转换成np print('Output data shape: ', output_data.shape) if __name__ == "__main__": main()
2. basic_dataloader.py 的代码
import os import random import numpy as np import cv2 import paddle.fluid as fluid # 新增的类的定义 class Transform(object): def __init__(self, size=256): self.size = size def __call__(self, input, label): input = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR) # interpolation 很关键,有效防越界出错 label = cv2.resize(label, (self.size, self.size), interpolation=cv2.INTER_NEAREST) return input, label class BasicDataLoader(): def __init__(self, image_folder, image_list_file, transform=None, shuffle=True): self.image_folder = image_folder self.image_list_file = image_list_file self.transform = transform self.shuffle = shuffle self.data_list = self.read_list() # 老师习惯使用这四个方法,在自己实现时,也可以使用其它的方法 def read_list(self): data_list = [] with open(self.image_list_file) as infile: for line in infile: data_path = os.path.join(self.image_folder, line.split()[0]) label_path = os.path.join(self.image_folder, line.split()[1]) data_list.append((data_path, label_path)) # 转化成tupple random.shuffle(data_list) # python 自带的shuffle return data_list def preprocess(self, data, label): h, w, c = data.shape h_gt, w_gt = label.shape assert h==h_gt, "Error" assert w==w_gt, "Error" if self.transform: data, label = self.transform(data, label) label = label[:, :, np.newaxis] return data, label def __len__(self): # __方法为类中的方法, 这里python基础 return len(self.data_list) def __call__(self): # a=A(), a() 会调用这个就去 for data_path, label_path in self.data_list: data = cv2.imread(data_path, cv2.IMREAD_COLOR) # 带颜色读取 data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB) # 通道转换 label = cv2.imread(data_path, cv2.IMREAD_GRAYSCALE) # 灰色图形 print(data.shape, label.shape) data, label = self.preprocess(data, label) yield data, label # 最关键的一行,将数据返回,比Return高级一些 def main(): batch_size = 5 place = fluid.CPUPlace() # 使用CPU运行 # place = fluid.CUDAPlace(0) with fluid.dygraph.guard(place): transform = Transform(256) # TODO: create BasicDataloder instance # image_folder="./dummy_data" # image_list_file="./dummy_data/list.txt" basic_dataloader = BasicDataLoader( image_folder = './work/dummy_data', image_list_file = "./work/dummy_data/list.txt", transform=transform, shuffle=True ) # 下面的两段代码能自动将代码转换成(5, 256, 256, 1) # TODO: craete fluid.io.DataLoader instance dataloader = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False) # paddle的dataloader # TODO: set sample generator for fluid dataloader # 自己的dataloader、 # batch_size、# 工作地点 dataloader.set_sample_generator(basic_dataloader, batch_size = batch_size, places=place) num_epoch = 2 for epoch in range(1, num_epoch+1): print(f'Epoch [{epoch}/{num_epoch}]:') for idx, (data, label) in enumerate(dataloader): print(f'Iter {idx}, Data shape: {data.shape}, Label shape: {label.shape}') if __name__ == "__main__": main()
3. basic_transform.py 的代码
import cv2 import numpy as np class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, label=None): for t in self.transforms: image, label = t(image, label) return image, label class Normalize(object): def __init__(self, mean_val, std_val, val_scale=1): # set val_scale = 1 if mean and std are in range (0,1) # set val_scale to other value, if mean and std are in range (0,255) self.mean = np.array(mean_val, dtype=np.float32) self.std = np.array(std_val, dtype=np.float32) self.val_scale = 1/255.0 if val_scale==1 else 1 def __call__(self, image, label=None): image = image.astype(np.float32) image = image * self.val_scale image = image - self.mean image = image * (1 / self.std) return image, label class ConvertDataType(object): def __call__(self, image, label=None): if label is not None: label = label.astype(np.int64) return image.astype(np.float32), label class Pad(object): def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1): # set val_scale to 1 if mean_val is in range (0, 1) # set val_scale to 255 if mean_val is in range (0, 255) factor = 255 if val_scale == 1 else 1 self.size = size self.ignore_label = ignore_label self.mean_val=mean_val # from 0-1 to 0-255 if isinstance(self.mean_val, (tuple,list)): self.mean_val = [int(x* factor) for x in self.mean_val] else: self.mean_val = int(self.mean_val * factor) def __call__(self, image, label=None): h, w, c = image.shape pad_h = max(self.size - h, 0) pad_w = max(self.size - w, 0) pad_h_half = int(pad_h / 2) pad_w_half = int(pad_w / 2) if pad_h > 0 or pad_w > 0: image = cv2.copyMakeBorder(image, top=pad_h_half, left=pad_w_half, bottom=pad_h - pad_h_half, right=pad_w - pad_w_half, borderType=cv2.BORDER_CONSTANT, value=self.mean_val) if label is not None: label = cv2.copyMakeBorder(label, top=pad_h_half, left=pad_w_half, bottom=pad_h - pad_h_half, right=pad_w - pad_w_half, borderType=cv2.BORDER_CONSTANT, value=self.ignore_label) return image, label # TODO,中心裁剪 class CenterCrop(object): def __init__(self, crop_size): self.crop_h = crop_size self.crop_w = crop_size def __call__(self, image, label=None): h, w, c = image.shape top = (h - self.crop_h) // 2 left = (w - self.crop_w) // 2 image = image[top:top+self.crop_h, left:left+self.crop_w, :] if label is not None: label = label[top:top+self.crop_h, left:left+self.crop_w] return image, label # TODO, 调整大小, 每个方法都是处理后,返回图像与标签 class Resize(object): def __init__(self, size): self.size = size def __call__(self, image, label=None): image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_LINEAR) if label is not None: label = cv2.resize(label, (self.size, self.size), interpolation=cv2.INTER_NEAREST) return image, label # TODO,随机翻转 class RandomFlip(object): def __call__(self, image, label=None): prob_of_flip = np.random.rand() if prob_of_flip > 0.5: image = cv2.flip(image, 1) if label is not None: label = cv2.flip(label, 1) return image, label # TODO,随机剪切,与CenterCrop的原理一样, class RandomCrop(object): def __init__(self, crop_size): self.crop_size = crop_size def __call__(self, image, label=None): h, w, c = image.shape top = np.random.uniform(h - self.crop_size) left = np.random.uniform(w - self.crop_size) assert top >= 0, "Error: crop_size > image height!" assert left >= 0, "Error: crop_size > image width!" rect = np.array([int(left), int(top), int(left + self.crop_size), int(top + self.crop_size)]) image = image[rect[1]: rect[3], rect[0]:rect[2], :] if label is not None: label = label[rect[1]: rect[3], rect[0]:rect[2]] return image, label # TODO,缩放 class Scale(object): def __call__(self, image, label=None, scale=1.0): if not isinstance(scale, (list, tuple)): scale = [scale, scale] h, w, c = image.shape image = cv2.resize(image, (int(w*scale[0]), int(h*scale[1])), interpolation=cv2.INTER_LINEAR) if label is not None: label = cv2.resize(label, (int(w*scale[0]), int(h*scale[1])), interpolation=cv2.INTER_LINEAR) return image, label # TODO,随机缩放 class RandomScale(object): def __init__(self, min_scale=0.5, max_scale=2.0, step=0.25): self.min_scale = min_scale self.max_scale = max_scale self.step = step self.scale = Scale() def __call__(self, image, label=None): if self.step == 0: self.random_scale = np.random.uniform(self.min_scale, self.max_scale, 1)[0] else: num_steps = int((self.max_scale - self.min_scale) / self.step + 1) scale_factors = np.linspace(self.min_scale, self.max_scale, num_steps) np.random.shuffle(scale_factors) self.random_scale = scale_factors[0] image, label = self.scale(image, label, self.random_scale) return image, label def main(): image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg') label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png') # TODO: crop_size crop_size = 256 # augment = Compose([ # RandomScale(), # RandomFlip(), # Pad(crop_size, mean_val=[0.485, 0.456, 0.406]), # RandomCrop(crop_size), # ConvertDataType(), # Normalize(0, 1)]) # new_img, _ = augment(image) # cv2.imwrite('tmp_new.png', new_img) # TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop randScale = RandomScale() randscale_img, _ = randScale(image) cv2.imwrite('randScale.png', randscale_img) scale = Scale() scale_img, _ = scale(image, label, 0.5) cv2.imwrite('scale_img.png', scale_img) randFlip = RandomFlip() randflip_img, _ = randFlip(image, label) cv2.imwrite('randflip.png', randflip_img) pad = Pad(crop_size, mean_val=[0.485, 0.456, 0.406]) pad_img, _ = pad(image, label) cv2.imwrite('pad.png', pad_img) randCrop = RandomCrop(crop_size) randcrop_img, _ = randCrop(image, label) cv2.imwrite('randcrop.png', randcrop_img) for i in range(10): # TODO: call transform randcrop_img, _ = randCrop(image, label) # TODO: save image filename = 'randcrop_img'+str(i)+'.png' cv2.imwrite(filename, randcrop_img) if __name__ == "__main__": main()
在 aistudio 的 notebook 中执行这几个程序,默认是不执行的,要带有路径或切换路径才可以。
带有路径的执行方法如下, 在 notebook 中添加一个 code 区域,在区域中添加如下:
!python work/basic_model.py
切换路径的运行方法如下:
%cd work
!python basic_model.py
!python basic_dataloader.py
!python basic_transform.py
程序做的还不完整,但是跟着老师写代码,学会了一些基本的操作和思路,后面还要继续学习与练习。
1
收藏
请登录后评论
手撸代码
越看越有味,要多看几遍,一遍搞不定。。。
手写真的有收获,终于知道某些报错为什么会发生了……
写代码比看代码爽多了
公开项目啊,大佬~~
蹲一个