飞桨图像分割训练营-图像增强实现

项目

数据集

课程

比赛

模型库

活动

论坛

访问飞桨官网

项目

数据集

课程

比赛

模型库

活动

论坛

访问飞桨官网

Jason369 发布于2020-10

import cv2 as cv
import numpy as np
from PIL import Image
import random
import math
import matplotlib.pyplot as plt
import os

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, image, label=None):
        for t in self.transforms:
            image, label = t(image, label)
        return image, label


class Normalize(object):
    def __init__(self, mean_val, std_val, val_scale=1):
        # set val_scale = 1 if mean and std are in range (0,1)
        # set val_scale to other value, if mean and std are in range (0,255)
        self.mean = np.array(mean_val, dtype=np.float32)
        self.std = np.array(std_val, dtype=np.float32)
        self.val_scale = 1/255.0 if val_scale==1 else 1
    def __call__(self, image, label=None):
        image = image.astype(np.float32)
        image = image * self.val_scale
        image = image - self.mean
        image = image * (1 / self.std)
        return image, label


class ConvertDataType(object):
    def __call__(self, image, label=None):
        if label is not None:
            label = label.astype(np.int64)
        return image.astype(np.float32), label


class Pad(object):
    def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
        # set val_scale to 1 if mean_val is in range (0, 1)
        # set val_scale to 255 if mean_val is in range (0, 255) 
        factor = 255 if val_scale == 1 else 1

        self.size = size
        self.ignore_label = ignore_label
        self.mean_val=mean_val
        # from 0-1 to 0-255
        if isinstance(self.mean_val, (tuple,list)):
            self.mean_val = [int(x* factor) for x in self.mean_val]
        else:
            self.mean_val = int(self.mean_val * factor)


    def __call__(self, image, label=None):
        h, w, c = image.shape
        pad_h = max(self.size - h, 0)
        pad_w = max(self.size - w, 0)

        pad_h_half = int(pad_h / 2)
        pad_w_half = int(pad_w / 2)

        if pad_h > 0 or pad_w > 0:

            image = cv.copyMakeBorder(image,
                                       top=pad_h_half,
                                       left=pad_w_half,
                                       bottom=pad_h - pad_h_half,
                                       right=pad_w - pad_w_half,
                                       borderType=cv.BORDER_CONSTANT,
                                       value=self.mean_val)
            if label is not None:
                label = cv.copyMakeBorder(label,
                                           top=pad_h_half,
                                           left=pad_w_half,
                                           bottom=pad_h - pad_h_half,
                                           right=pad_w - pad_w_half,
                                           borderType=cv.BORDER_CONSTANT,
                                           value=self.ignore_label)
        return image, label


# TODO
def center_crop(img_array, crop_size):
    rows = img_array.shape[0]
    cols = img_array.shape[1]
    assert crop_size>0,"crop size should be greater than zero"
    if crop_size>max(rows,cols):
        crop_size = min(rows, cols)
    row_s = max(int((rows-crop_size)/2), 0)
    row_e = min(row_s+crop_size, rows)
    col_s = max(int((cols-crop_size)/2), 0)
    col_e = min(col_s+crop_size, cols)
    img_crop = img_array[row_s:row_e, col_s:col_e, ]
    return img_crop
class CenterCrop(object):
    def __init__(self,size):
        self.size=size
        
    def __call__(self,img,label):
        if self.size > max(img.shape[0],img.shape[1]):
            self.size = min(img.shape[0],img.shape[1])
        img = center_crop(img, self.size)
        label = center_crop(label, self.size)
        return img,label
# TODO
class Resize(object):
    def __init__(self,size):
        self.size=size
    def __call__(self,img,label=None):
        out_img = cv.resize(img,(self.size,self.size),interpolation=cv.INTER_LINEAR)
        if label is not None:
            out_label = cv.resize(label,(self.size,self.size),interpolation=cv.INTER_LINEAR)
        
        return out_img,out_label


class RandomFlip(object):
    """
    filp:
    1: Horizontally 
    0: Vertically
    -1: Horizontally and vertically
    """
    def __init__(self,flip=1):
        assert flip in [-1,0,1]
        self.flip = flip

    def __call__(self,img,label=None):
        
        img = cv.flip(img,self.flip)
        if label is not None:
            label = cv.flip(label,self.flip)
        return img, label
        


class RandomCrop(object):

    def __init__(self, size):
        self.size = size
        self.scale = [0.08, 1.0]
        self.ratio = [3. / 4., 4. / 3.]

    def __call__(self, img,label=None):
        size = self.size
        scale = self.scale
        ratio = self.ratio

        aspect_ratio = math.sqrt(random.uniform(*ratio))
        w = 1. * aspect_ratio
        h = 1. / aspect_ratio

        img_h, img_w = img.shape[:2]

        bound = min((float(img_w) / img_h) / (w**2),
                    (float(img_h) / img_w) / (h**2))
        scale_max = min(scale[1], bound)
        scale_min = min(scale[0], bound)

        target_area = img_w * img_h * random.uniform(scale_min, scale_max)
        target_size = math.sqrt(target_area)
        w = int(target_size * w)
        h = int(target_size * h)

        i = random.randint(0, img_w - w)
        j = random.randint(0, img_h - h)

        img = img[j:j + h, i:i + w, :]
        img = cv.resize(img, (size,size),interpolation=cv.INTER_LINEAR)
        if label is not None:
           label = label[j:j + h, i:i + w]
           label = cv.resize(label, (size,size), interpolation=cv.INTER_NEAREST)
        return img,label

class Scale(object):
    def __init__(self,size):
        self.size=size
    def _call__(self,img,label):
        assert img.shape == label.shape
        w,h, = img.shape[0],img.shape[1]
        if (w>=h and w == self.size) or (h>=w and h == self.size):
            return img,label
        if w > h:
            ow = self.size
            oh = int(self.size * h / w)
            img = cv.resize(img,(ow,oh),interpolation=cv.INTER_LINEAR)
            label = cv.resize(label,(ow,oh),interpolation=cv.INTER_LINEAR)
            return img,label

# TODO
class RandomScale(object):
     def __init__(self, base_size=320,p=0.2):
        self.base_size = base_size
        self.p = p

     def __call__(self, img,label=None):
        # random scale (short edge)
        if random.random() w:
            ow = short_size
            oh = int(1.0 * h * ow / w)
        else:
            oh = short_size
            ow = int(1.0 * w * oh / h)
        img = cv.resize(img,(ow, oh),cv.INTER_LINEAR)
        if label is not None:
           label = cv.resize(label,(img.shape[1],img.shape[0]),cv.INTER_NEAREST)
        return img,label


def showimg(img,label):
    plt.figure()
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.subplot(1,2,2)
    plt.imshow(label)
    plt.show()

一些常见的数据增强的实现，如填充，resize, 随机翻转，中心裁剪，随机裁剪，缩放。效果还行，代码有待优化。

image = cv.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
label = cv.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')
print(image.shape,label.shape)
i,j = Resize(256)(image,label)
showimg(i,j)

i,j = Pad(600)(image,label)
showimg(i,j)

i,j = CenterCrop(100)(image,label)
showimg(i,j)

i,j = RandomFlip(-1)(image,label)
showimg(i,j)

i,j = RandomCrop(200)(image,label)
showimg(i,j)

i,j = RandomScale(base_size=1000,p=0.1)(image,label)
showimg(i,j)

参考：https://aistudio.baidu.com/aistudio/course/introduce/1767

全部评论(2)

AIStudio810261

#2 回复于2020-10

看着很不错呀

AIStudio810258

#3 回复于2020-10

我感觉cv上，图像增强很管用啊