使用Pytorch进行图像分类,AI challenger 农作物病害分类竞赛源码解读
1.首先对给的数据进行划分,类型为每个类单独放在一个文件夹中
import json import shutil import os from glob import glob from tqdm import tqdm # 此文件的作用是创建每个类的文件夹,以及根据给出来的Json中已经做好的分类,对数据进行对号入座划分。 # 加载json文件得出一个字典,然后根据Key值来提取每个文件到相应的文件夹中,(注意去除了不合理数据) try: for i in range(0,59): os.mkdir("./data/train/" + str(i)) except: pass file_train = json.load(open("./data/temp/labels/AgriculturalDisease_train_annotations.json","r",encoding="utf-8")) file_val = json.load(open("./data/temp/labels/AgriculturalDisease_validation_annotations.json","r",encoding="utf-8")) file_list = file_train + file_val for file in tqdm(file_list): filename = file["image_id"] origin_path = "./data/temp/images/" + filename ids = file["disease_class"] if ids == 44: continue if ids == 45: continue if ids > 45: ids = ids -2 save_path = "./data/train/" + str(ids) + "/" shutil.copy(origin_path,save_path)
2.获取增强数据集类的定义
1.采用自定义获取增强数据类,此Dataset类中重新定义了对数据进行数据增强的多种方式,不仅限于pytorch中自带的增强方式。
首先附上自定义的数据增强的函数代码:
方式一,以重新定义重载方法类的方式定义多种增强方式,在dataset类中的get_item方法中的compose中加入自定义的方法,即可调用。
# 数据增强的多种方式,使用自定义的方法。调用只需在dataloader.py文件中的get_item函数中调用类自身参数 # transforms,transforms中集合了compose,compose中列出详细所使用的增强方式。 from __future__ import division import cv2 import numpy as np from numpy import random import math from sklearn.utils import shuffle # 常用的增强方式几乎都在这里,只需在compose中列出类名即可 __all__ = ['Compose','RandomHflip', 'RandomUpperCrop', 'Resize', 'UpperCrop', 'RandomBottomCrop', "RandomErasing",'BottomCrop', 'Normalize', 'RandomSwapChannels', 'RandomRotate', 'RandomHShift',"CenterCrop","RandomVflip",'ExpandBorder', 'RandomResizedCrop', 'RandomDownCrop', 'DownCrop', 'ResizedCrop',"FixRandomRotate"] # 组合 # “随机翻转”,“随机顶部切割”,“调整大小”,“上切割”,“随机底部切割”、 # “随机擦除”,“底部切割”,“正则化”,“随机交换频道”,“随机旋转”, # “随机HShift”,“中央切割”,“随机Vflip”,“扩展边界”,“随机调整切割”, # “随机下降”,“下降切割”, “调整切割”,“固定随机化”。 # 每个增强方式类需要调用普通方法描述如下: def rotate_nobound(image, angle, center=None, scale=1.): (h, w) = image.shape[:2] # if the center is None, initialize it as the center of # the image if center is None: center = (w // 2, h // 2) # perform the rotation M = cv2.getRotationMatrix2D(center, angle, scale) rotated = cv2.warpAffine(image, M, (w, h)) return rotated def scale_down(src_size, size): w, h = size sw, sh = src_size if sh < h: w, h = float(w * sh) / h, sh if sw < w: w, h = sw, float(h * sw) / w return int(w), int(h) def fixed_crop(src, x0, y0, w, h, size=None): out = src[y0:y0 + h, x0:x0 + w] if size is not None and (w, h) != size: out = cv2.resize(out, (size[0], size[1]), interpolation=cv2.INTER_CUBIC) return out # 固定随机旋转 class FixRandomRotate(object): def __init__(self, angles=[0,90,180,270], bound=False): self.angles = angles self.bound = bound def __call__(self,img): do_rotate = random.randint(0, 4) angle=self.angles[do_rotate] if self.bound: img = rotate_bound(img, angle) else: img = rotate_nobound(img, angle) return img def center_crop(src, size): h, w = src.shape[0:2] new_w, new_h = scale_down((w, h), size) x0 = int((w - new_w) / 2) y0 = int((h - new_h) / 2) out = fixed_crop(src, x0, y0, new_w, new_h, size) return out def bottom_crop(src, size): h, w = src.shape[0:2] new_w, new_h = scale_down((w, h), size) x0 = int((w - new_w) / 2) y0 = int((h - new_h) * 0.75) out = fixed_crop(src, x0, y0, new_w, new_h, size) return out def rotate_bound(image, angle): # grab the dimensions of the image and then determine the # center h, w = image.shape[:2] (cX, cY) = (w // 2, h // 2) M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0) cos = np.abs(M[0, 0]) sin = np.abs(M[0, 1]) # compute the new bounding dimensions of the image nW = int((h * sin) + (w * cos)) nH = int((h * cos) + (w * sin)) # adjust the rotation matrix to take into account translation M[0, 2] += (nW / 2) - cX M[1, 2] += (nH / 2) - cY rotated = cv2.warpAffine(image, M, (nW, nH)) return rotated # 常用增强方式,以类的方式体现: # 将多个transform组合起来使用 crop切割 filp旋转 class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, img): for t in self.transforms: img = t(img) return img class RandomRotate(object): def __init__(self, angles, bound=False): self.angles = angles self.bound = bound def __call__(self,img): do_rotate = random.randint(0, 2) if do_rotate: angle = np.random.uniform(self.angles[0], self.angles[1]) if self.bound: img = rotate_bound(img, angle) else: img = rotate_nobound(img, angle) return img class RandomBrightness(object): def __init__(self, delta=10): assert delta >= 0 assert delta <= 255 self.delta = delta def __call__(self, image): if random.randint(2): delta = random.uniform(-self.delta, self.delta) image = (image + delta).clip(0.0, 255.0) # print('RandomBrightness,delta ',delta) return image class RandomContrast(object): def __init__(self, lower=0.9, upper=1.05): self.lower = lower self.upper = upper assert self.upper >= self.lower, "contrast upper must be >= lower." assert self.lower >= 0, "contrast lower must be non-negative." # expects float image def __call__(self, image): if random.randint(2): alpha = random.uniform(self.lower, self.upper) # print('contrast:', alpha) image = (image * alpha).clip(0.0,255.0) return image class RandomSaturation(object): def __init__(self, lower=0.8, upper=1.2): self.lower = lower self.upper = upper assert self.upper >= self.lower, "contrast upper must be >= lower." assert self.lower >= 0, "contrast lower must be non-negative." def __call__(self, image): if random.randint(2): alpha = random.uniform(self.lower, self.upper) image[:, :, 1] *= alpha # print('RandomSaturation,alpha',alpha) return image class RandomHue(object): def __init__(self, delta=18.0): assert delta >= 0.0 and delta <= 360.0 self.delta = delta def __call__(self, image): if random.randint(2): alpha = random.uniform(-self.delta, self.delta) image[:, :, 0] += alpha image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 # print('RandomHue,alpha:', alpha) return image class ConvertColor(object): def __init__(self, current='BGR', transform='HSV'): self.transform = transform self.current = current def __call__(self, image): if self.current == 'BGR' and self.transform == 'HSV': image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) elif self.current == 'HSV' and self.transform == 'BGR': image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) else: raise NotImplementedError return image class RandomSwapChannels(object): def __call__(self, img): if np.random.randint(2): order = np.random.permutation(3) return img[:,:,order] return img class RandomCrop(object): def __init__(self, size): self.size = size def __call__(self, image): h, w, _ = image.shape new_w, new_h = scale_down((w, h), self.size) if w == new_w: x0 = 0 else: x0 = random.randint(0, w - new_w) if h == new_h: y0 = 0 else: y0 = random.randint(0, h - new_h) out = fixed_crop(image, x0, y0, new_w, new_h, self.size) return out class RandomResizedCrop(object): def __init__(self, size,scale=(0.49, 1.0), ratio=(1., 1.)): self.size = size self.scale = scale self.ratio = ratio def __call__(self,img): if random.random() < 0.2: return cv2.resize(img,self.size) h, w, _ = img.shape area = h * w d=1 for attempt in range(10): target_area = random.uniform(self.scale[0], self.scale[1]) * area aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) new_w = int(round(math.sqrt(target_area * aspect_ratio))) new_h = int(round(math.sqrt(target_area / aspect_ratio))) if random.random() < 0.5: new_h, new_w = new_w, new_h if new_w < w and new_h < h: x0 = random.randint(0, w - new_w) y0 = (random.randint(0, h - new_h))//d out = fixed_crop(img, x0, y0, new_w, new_h, self.size) return out # Fallback return center_crop(img, self.size) class DownCrop(): def __init__(self, size, select, scale=(0.36,0.81)): self.size = size self.scale = scale self.select = select def __call__(self,img, attr_idx): if attr_idx not in self.select: return img, attr_idx if attr_idx == 0: self.scale=(0.64,1.0) h, w, _ = img.shape area = h * w s = (self.scale[0]+self.scale[1])/2.0 target_area = s * area new_w = int(round(math.sqrt(target_area))) new_h = int(round(math.sqrt(target_area))) if new_w < w and new_h < h: dw = w-new_w x0 = int(0.5*dw) y0 = h-new_h out = fixed_crop(img, x0, y0, new_w, new_h, self.size) return out, attr_idx # Fallback return center_crop(img, self.size), attr_idx class ResizedCrop(object): def __init__(self, size, select,scale=(0.64, 1.0), ratio=(3. / 4., 4. / 3.)): self.size = size self.scale = scale self.ratio = ratio self.select = select def __call__(self,img, attr_idx): if attr_idx not in self.select: return img, attr_idx h, w, _ = img.shape area = h * w d=1 if attr_idx == 2: self.scale=(0.36,0.81) d=2 if attr_idx == 0: self.scale=(0.81,1.0) target_area = (self.scale[0]+self.scale[1])/2.0 * area # aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) new_w = int(round(math.sqrt(target_area))) new_h = int(round(math.sqrt(target_area))) # if random.random() < 0.5: # new_h, new_w = new_w, new_h if new_w < w and new_h < h: x0 = (w - new_w)//2 y0 = (h - new_h)//d//2 out = fixed_crop(img, x0, y0, new_w, new_h, self.size) # cv2.imshow('{}_img'.format(idx2attr_map[attr_idx]), img) # cv2.imshow('{}_crop'.format(idx2attr_map[attr_idx]), out) # # cv2.waitKey(0) return out, attr_idx # Fallback return center_crop(img, self.size), attr_idx class RandomHflip(object): def __call__(self, image): if random.randint(2): return cv2.flip(image, 1) else: return image class RandomVflip(object): def __call__(self, image): if random.randint(2): return cv2.flip(image, 0) else: return image class Hflip(object): def __init__(self,doHflip): self.doHflip = doHflip def __call__(self, image): if self.doHflip: return cv2.flip(image, 1) else: return image class CenterCrop(object): def __init__(self, size): self.size = size def __call__(self, image): return center_crop(image, self.size) class UpperCrop(): def __init__(self, size, scale=(0.09, 0.64)): self.size = size self.scale = scale def __call__(self,img): h, w, _ = img.shape area = h * w s = (self.scale[0]+self.scale[1])/2.0 target_area = s * area new_w = int(round(math.sqrt(target_area))) new_h = int(round(math.sqrt(target_area))) if new_w < w and new_h < h: dw = w-new_w x0 = int(0.5*dw) y0 = 0 out = fixed_crop(img, x0, y0, new_w, new_h, self.size) return out # Fallback return center_crop(img, self.size) class RandomUpperCrop(object): def __init__(self, size, select, scale=(0.09, 0.64), ratio=(3. / 4., 4. / 3.)): self.size = size self.scale = scale self.ratio = ratio self.select = select def __call__(self,img, attr_idx): if random.random() < 0.2: return img, attr_idx if attr_idx not in self.select: return img, attr_idx h, w, _ = img.shape area = h * w for attempt in range(10): s = random.uniform(self.scale[0], self.scale[1]) d = 0.1 + (0.3 - 0.1) / (self.scale[1] - self.scale[0]) * (s - self.scale[0]) target_area = s * area aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) new_w = int(round(math.sqrt(target_area * aspect_ratio))) new_h = int(round(math.sqrt(target_area / aspect_ratio))) # new_w = int(round(math.sqrt(target_area))) # new_h = int(round(math.sqrt(target_area))) if new_w < w and new_h < h: dw = w-new_w x0 = random.randint(int((0.5-d)*dw), int((0.5+d)*dw)+1) y0 = (random.randint(0, h - new_h))//10 out = fixed_crop(img, x0, y0, new_w, new_h, self.size) return out, attr_idx # Fallback return center_crop(img, self.size), attr_idx class RandomDownCrop(object): def __init__(self, size, select, scale=(0.36, 0.81), ratio=(3. / 4., 4. / 3.)): self.size = size self.scale = scale self.ratio = ratio self.select = select def __call__(self,img, attr_idx): if random.random() < 0.2: return img, attr_idx if attr_idx not in self.select: return img, attr_idx if attr_idx == 0: self.scale=(0.64,1.0) h, w, _ = img.shape area = h * w for attempt in range(10): s = random.uniform(self.scale[0], self.scale[1]) d = 0.1 + (0.3 - 0.1) / (self.scale[1] - self.scale[0]) * (s - self.scale[0]) target_area = s * area aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) new_w = int(round(math.sqrt(target_area * aspect_ratio))) new_h = int(round(math.sqrt(target_area / aspect_ratio))) # # new_w = int(round(math.sqrt(target_area))) # new_h = int(round(math.sqrt(target_area))) if new_w < w and new_h < h: dw = w-new_w x0 = random.randint(int((0.5-d)*dw), int((0.5+d)*dw)+1) y0 = (random.randint((h - new_h)*9//10, h - new_h)) out = fixed_crop(img, x0, y0, new_w, new_h, self.size) # cv2.imshow('{}_img'.format(idx2attr_map[attr_idx]), img) # cv2.imshow('{}_crop'.format(idx2attr_map[attr_idx]), out) # # cv2.waitKey(0) return out, attr_idx # Fallback return center_crop(img, self.size), attr_idx class RandomHShift(object): def __init__(self, select, scale=(0.0, 0.2)): self.scale = scale self.select = select def __call__(self,img, attr_idx): if attr_idx not in self.select: return img, attr_idx do_shift_crop = random.randint(0, 2) if do_shift_crop: h, w, _ = img.shape min_shift = int(w*self.scale[0]) max_shift = int(w*self.scale[1]) shift_idx = random.randint(min_shift, max_shift) direction = random.randint(0,2) if direction: right_part = img[:, -shift_idx:, :] left_part = img[:, :-shift_idx, :] else: left_part = img[:, :shift_idx, :] right_part = img[:, shift_idx:, :] img = np.concatenate((right_part, left_part), axis=1) # Fallback return img, attr_idx class RandomBottomCrop(object): def __init__(self, size, select, scale=(0.4, 0.8)): self.size = size self.scale = scale self.select = select def __call__(self,img, attr_idx): if attr_idx not in self.select: return img, attr_idx h, w, _ = img.shape area = h * w for attempt in range(10): s = random.uniform(self.scale[0], self.scale[1]) d = 0.25 + (0.45 - 0.25) / (self.scale[1] - self.scale[0]) * (s - self.scale[0]) target_area = s * area new_w = int(round(math.sqrt(target_area))) new_h = int(round(math.sqrt(target_area))) if new_w < w and new_h < h: dw = w-new_w dh = h - new_h x0 = random.randint(int((0.5-d)*dw), min(int((0.5+d)*dw)+1,dw)) y0 = (random.randint(max(0,int(0.8*dh)-1), dh)) out = fixed_crop(img, x0, y0, new_w, new_h, self.size) return out, attr_idx # Fallback return bottom_crop(img, self.size), attr_idx class BottomCrop(): def __init__(self, size, select, scale=(0.4, 0.8)): self.size = size self.scale = scale self.select = select def __call__(self,img, attr_idx): if attr_idx not in self.select: return img, attr_idx h, w, _ = img.shape area = h * w s = (self.scale[0]+self.scale[1])/3.*2. target_area = s * area new_w = int(round(math.sqrt(target_area))) new_h = int(round(math.sqrt(target_area))) if new_w < w and new_h < h: dw = w-new_w dh = h-new_h x0 = int(0.5*dw) y0 = int(0.9*dh) out = fixed_crop(img, x0, y0, new_w, new_h, self.size) return out, attr_idx # Fallback return bottom_crop(img, self.size), attr_idx class Resize(object): def __init__(self, size, inter=cv2.INTER_CUBIC): self.size = size self.inter = inter def __call__(self, image): return cv2.resize(image, (self.size[0], self.size[0]), interpolation=self.inter) class ExpandBorder(object): def __init__(self, mode='constant', value=255, size=(336,336), resize=False): self.mode = mode self.value = value self.resize = resize self.size = size def __call__(self, image): h, w, _ = image.shape if h > w: pad1 = (h-w)//2 pad2 = h - w - pad1 if self.mode == 'constant': image = np.pad(image, ((0, 0), (pad1, pad2), (0, 0)), self.mode, constant_values=self.value) else: image = np.pad(image,((0,0), (pad1, pad2),(0,0)), self.mode) elif h < w: pad1 = (w-h)//2 pad2 = w-h - pad1 if self.mode == 'constant': image = np.pad(image, ((pad1, pad2),(0, 0), (0, 0)), self.mode,constant_values=self.value) else: image = np.pad(image, ((pad1, pad2), (0, 0), (0, 0)),self.mode) if self.resize: image = cv2.resize(image, (self.size[0], self.size[0]),interpolation=cv2.INTER_LINEAR) return image class AstypeToInt(): def __call__(self, image, attr_idx): return image.clip(0,255.0).astype(np.uint8), attr_idx class AstypeToFloat(): def __call__(self, image, attr_idx): return image.astype(np.float32), attr_idx import matplotlib.pyplot as plt class Normalize(object): def __init__(self,mean, std): ''' :param mean: RGB order :param std: RGB order ''' self.mean = np.array(mean).reshape(3,1,1) self.std = np.array(std).reshape(3,1,1) def __call__(self, image): ''' :param image: (H,W,3) RGB :return: ''' # plt.figure(1) # plt.imshow(image) # plt.show() return (image.transpose((2, 0, 1)) / 255. - self.mean) / self.std class RandomErasing(object): def __init__(self, select,EPSILON=0.5,sl=0.02, sh=0.09, r1=0.3, mean=[0.485, 0.456, 0.406]): self.EPSILON = EPSILON self.mean = mean self.sl = sl self.sh = sh self.r1 = r1 self.select = select def __call__(self, img,attr_idx): if attr_idx not in self.select: return img,attr_idx if random.uniform(0, 1) > self.EPSILON: return img,attr_idx for attempt in range(100): area = img.shape[1] * img.shape[2] target_area = random.uniform(self.sl, self.sh) * area aspect_ratio = random.uniform(self.r1, 1 / self.r1) h = int(round(math.sqrt(target_area * aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio))) if w <= img.shape[2] and h <= img.shape[1]: x1 = random.randint(0, img.shape[1] - h) y1 = random.randint(0, img.shape[2] - w) if img.shape[0] == 3: # img[0, x1:x1+h, y1:y1+w] = random.uniform(0, 1) # img[1, x1:x1+h, y1:y1+w] = random.uniform(0, 1) # img[2, x1:x1+h, y1:y1+w] = random.uniform(0, 1) img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] # img[:, x1:x1+h, y1:y1+w] = torch.from_numpy(np.random.rand(3, h, w)) else: img[0, x1:x1 + h, y1:y1 + w] = self.mean[1] # img[0, x1:x1+h, y1:y1+w] = torch.from_numpy(np.random.rand(1, h, w)) return img,attr_idx return img,attr_idx if __name__ == '__main__': import matplotlib.pyplot as plt class FSAug(object): def __init__(self): self.augment = Compose([ AstypeToFloat(), # RandomHShift(scale=(0.,0.2),select=range(8)), # RandomRotate(angles=(-20., 20.), bound=True), ExpandBorder(select=range(8), mode='symmetric'),# symmetric # Resize(size=(336, 336), select=[ 2, 7]), AstypeToInt() ]) def __call__(self, spct,attr_idx): return self.augment(spct,attr_idx) trans = FSAug() img_path = '/media/gserver/data/FashionAI/round2/train/Images/coat_length_labels/0b6b4a2146fc8616a19fcf2026d61d50.jpg' img = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB) img_trans,_ = trans(img,5) # img_trans2,_ = trans(img,6) plt.figure() plt.subplot(221) plt.imshow(img) plt.subplot(222) plt.imshow(img_trans) # plt.subplot(223) # plt.imshow(img_trans2) # plt.imshow(img_trans2) plt.show()
方式二: 用于线下增强数据,采用的方法是
- 高斯噪声
- 亮度变化
- 左右翻转
- 上下翻转
- 色彩抖动
- 对化
- 锐度变化
from PIL import Image,ImageEnhance,ImageFilter,ImageOps import os import shutil import numpy as np import cv2 import random from skimage.util import random_noise from skimage import exposure image_number = 0 raw_path = "./data/train/" new_path = "./aug/train/" # 加高斯噪声 def addNoise(img): ''' 注意:输出的像素是[0,1]之间,所以乘以5得到[0,255]之间 ''' return random_noise(img, mode='gaussian', seed=13, clip=True)*255 def changeLight(img): rate = random.uniform(0.5, 1.5) # print(rate) img = exposure.adjust_gamma(img, rate) #大于1为调暗,小于1为调亮;1.05 return img try: for i in range(59): os.makedirs(new_path + os.sep + str(i)) except: pass for raw_dir_name in range(59): raw_dir_name = str(raw_dir_name) saved_image_path = new_path + raw_dir_name+"/" raw_image_path = raw_path + raw_dir_name+"/" if not os.path.exists(saved_image_path): os.mkdir(saved_image_path) raw_image_file_name = os.listdir(raw_image_path) raw_image_file_path = [] for i in raw_image_file_name: raw_image_file_path.append(raw_image_path+i) for x in raw_image_file_path: img = Image.open(x) cv_image = cv2.imread(x) # 高斯噪声 gau_image = addNoise(cv_image) # 随机改变 light = changeLight(cv_image) light_and_gau = addNoise(light) cv2.imwrite(saved_image_path + "gau_" + os.path.basename(x),gau_image) cv2.imwrite(saved_image_path + "light_" + os.path.basename(x),light) cv2.imwrite(saved_image_path + "gau_light" + os.path.basename(x),light_and_gau) #img = img.resize((800,600)) #1.翻转 img_flip_left_right = img.transpose(Image.FLIP_LEFT_RIGHT) img_flip_top_bottom = img.transpose(Image.FLIP_TOP_BOTTOM) #2.旋转 #img_rotate_90 = img.transpose(Image.ROTATE_90) #img_rotate_180 = img.transpose(Image.ROTATE_180) #img_rotate_270 = img.transpose(Image.ROTATE_270) #img_rotate_90_left = img_flip_left_right.transpose(Image.ROTATE_90) #img_rotate_270_left = img_flip_left_right.transpose(Image.ROTATE_270) #3.亮度 #enh_bri = ImageEnhance.Brightness(img) #brightness = 1.5 #image_brightened = enh_bri.enhance(brightness) #4.色彩 #enh_col = ImageEnhance.Color(img) #color = 1.5 #image_colored = enh_col.enhance(color) #5.对比度 enh_con = ImageEnhance.Contrast(img) contrast = 1.5 image_contrasted = enh_con.enhance(contrast) #6.锐度 #enh_sha = ImageEnhance.Sharpness(img) #sharpness = 3.0 #image_sharped = enh_sha.enhance(sharpness) #保存 img.save(saved_image_path + os.path.basename(x)) img_flip_left_right.save(saved_image_path + "left_right_" + os.path.basename(x)) img_flip_top_bottom.save(saved_image_path + "top_bottom_" + os.path.basename(x)) #img_rotate_90.save(saved_image_path + "rotate_90_" + os.path.basename(x)) #img_rotate_180.save(saved_image_path + "rotate_180_" + os.path.basename(x)) #img_rotate_270.save(saved_image_path + "rotate_270_" + os.path.basename(x)) #img_rotate_90_left.save(saved_image_path + "rotate_90_left_" + os.path.basename(x)) #img_rotate_270_left.save(saved_image_path + "rotate_270_left_" + os.path.basename(x)) #image_brightened.save(saved_image_path + "brighted_" + os.path.basename(x)) #image_colored.save(saved_image_path + "colored_" + os.path.basename(x)) image_contrasted.save(saved_image_path + "contrasted_" + os.path.basename(x)) #image_sharped.save(saved_image_path + "sharped_" + os.path.basename(x)) image_number += 1 print("convert pictur" "es :%s size:%s mode:%s" % (image_number, img.size, img.mode))
加载数据的类(自定义继承)
- 与pytorch中的加载数据类差不多,只是多了自己的某些功能。
from torch.utils.data import Dataset from torchvision import transforms as T from config import config from PIL import Image from itertools import chain from glob import glob from tqdm import tqdm import random import numpy as np import pandas as pd import os import cv2 import torch #1.set random seed random.seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) #2.define dataset class ZiyiDataset(Dataset): def __init__(self,label_list,transforms=None,train=True,test=False): self.test = test self.train = train imgs = [] if self.test: for index,row in label_list.iterrows(): imgs.append((row["filename"])) self.imgs = imgs else: for index,row in label_list.iterrows(): imgs.append((row["filename"],row["label"])) self.imgs = imgs if transforms is None: if self.test or not train: self.transforms = T.Compose([ T.Resize((config.img_weight,config.img_height)), T.ToTensor(), T.Normalize(mean = [0.485,0.456,0.406], std = [0.229,0.224,0.225])]) else: self.transforms = T.Compose([ T.Resize((config.img_weight,config.img_height)), T.RandomRotation(30), T.RandomHorizontalFlip(), T.RandomVerticalFlip(), T.RandomAffine(45), T.ToTensor(), T.Normalize(mean = [0.485,0.456,0.406], std = [0.229,0.224,0.225])]) else: self.transforms = transforms def __getitem__(self,index): if self.test: filename = self.imgs[index] img = Image.open(filename) img = self.transforms(img) return img,filename else: filename,label = self.imgs[index] img = Image.open(filename) img = self.transforms(img) return img,label def __len__(self): return len(self.imgs) def collate_fn(batch): imgs = [] label = [] for sample in batch: imgs.append(sample[0]) label.append(sample[1]) return torch.stack(imgs, 0), \ label def get_files(root,mode): #for test if mode == "test": files = [] for img in os.listdir(root): files.append(root + img) files = pd.DataFrame({"filename":files}) return files elif mode != "test": #for train and val all_data_path,labels = [],[] image_folders = list(map(lambda x:root+x,os.listdir(root))) jpg_image_1 = list(map(lambda x:glob(x+"/*.jpg"),image_folders)) jpg_image_2 = list(map(lambda x:glob(x+"/*.JPG"),image_folders)) all_images = list(chain.from_iterable(jpg_image_1 + jpg_image_2)) print("loading train dataset") for file in tqdm(all_images): all_data_path.append(file) labels.append(int(file.split("/")[-2])) all_files = pd.DataFrame({"filename":all_data_path,"label":labels}) return all_files else: print("check the mode please!")
3.获取模型
获取模型较为简单,单一模型采取pytorch中的预训练模型,添加所需要的层,进行微调然后迁移学习新数据。
import torchvision import torch.nn.functional as F from torch import nn from config import config def generate_model(): class DenseModel(nn.Module): def __init__(self, pretrained_model): super(DenseModel, self).__init__() self.classifier = nn.Linear(pretrained_model.classifier.in_features, config.num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal(m.weight) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_() self.features = pretrained_model.features self.layer1 = pretrained_model.features._modules['denseblock1'] self.layer2 = pretrained_model.features._modules['denseblock2'] self.layer3 = pretrained_model.features._modules['denseblock3'] self.layer4 = pretrained_model.features._modules['denseblock4'] def forward(self, x): features = self.features(x) out = F.relu(features, inplace=True) out = F.avg_pool2d(out, kernel_size=8).view(features.size(0), -1) out = F.sigmoid(self.classifier(out)) return out return DenseModel(torchvision.models.densenet169(pretrained=True)) def get_net(): #return MyModel(torchvision.models.resnet101(pretrained = True)) model = torchvision.models.resnet50(pretrained = True) #for param in model.parameters(): # param.requires_grad = False # pytorch添加层的方式直接在Model.层名=层具体形式 model.avgpool = nn.AdaptiveAvgPool2d(1) model.fc = nn.Linear(2048,config.num_classes) #添加全连接层以作分类任务,num_classes为分类个数 return model
4.开始训练
import os import random import time import json import torch import torchvision import numpy as np import pandas as pd import warnings from datetime import datetime from torch import nn,optim from config import config from collections import OrderedDict from torch.autograd import Variable from torch.utils.data import DataLoader from dataset.dataloader import * from sklearn.model_selection import train_test_split,StratifiedKFold from timeit import default_timer as timer from models.model import * from utils import * #1. 设置随机种子 and cudnn performance random.seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) os.environ["CUDA_VISIBLE_DEVICES"] = config.gpus torch.backends.cudnn.benchmark = True warnings.filterwarnings('ignore') #2. 评估函数,通过Losses,topk的不断更新来评估模型 def evaluate(val_loader,model,criterion): #2.1 AverageMeter类是Computes and stores the average and current value # 创建三个其对象,以用于评估 losses = AverageMeter() top1 = AverageMeter() top2 = AverageMeter() #2.2 开启评估模式 and confirm model has been transfered to cuda model.cuda() model.eval() with torch.no_grad(): for i,(input,target) in enumerate(val_loader): input = Variable(input).cuda() target = Variable(torch.from_numpy(np.array(target)).long()).cuda() #target = Variable(target).cuda() #2.2.1 compute output output = model(input) loss = criterion(output,target) #2.2.2 measure accuracy and record loss precision1,precision2 = accuracy(output,target,topk=(1,2)) losses.update(loss.item(),input.size(0)) top1.update(precision1[0],input.size(0)) top2.update(precision2[0],input.size(0)) return [losses.avg,top1.avg,top2.avg] #3. test model on public dataset and save the probability matrix def test(test_loader,model,folds): #3.1 confirm the model converted to cuda # 得出的结果是概率,再用softmax得出最终分类结果 csv_map = OrderedDict({"filename":[],"probability":[]}) model.cuda() model.eval() with open("./submit/baseline.json","w",encoding="utf-8") as f : submit_results = [] for i,(input,filepath) in enumerate(tqdm(test_loader)): # filepath?????? # 通过模型得到输出概率结果,再用softmax得出预测结果,写入文件。 #3.2 change everything to cuda and get only basename filepath = [os.path.basename(x) for x in filepath] with torch.no_grad(): image_var = Variable(input).cuda() #3.3.output #print(filepath) #print(input,input.shape) y_pred = model(image_var) #print(y_pred.shape) smax = nn.Softmax(1) smax_out = smax(y_pred) #3.4 save probability to csv files csv_map["filename"].extend(filepath) for output in smax_out: prob = ";".join([str(i) for i in output.data.tolist()]) csv_map["probability"].append(prob) result = pd.DataFrame(csv_map) result["probability"] = result["probability"].map(lambda x : [float(i) for i in x.split(";")]) for index, row in result.iterrows(): # 因为44,45类删除,所以预测结果加2 pred_label = np.argmax(row['probability']) if pred_label > 43: pred_label = pred_label + 2 submit_results.append({"image_id":row['filename'],"disease_class":pred_label}) json.dump(submit_results,f,ensure_ascii=False,cls = MyEncoder) #4. more details to build main function def main(): fold = 0 #4.1 mkdirs if not os.path.exists(config.submit): os.mkdir(config.submit) if not os.path.exists(config.weights): os.mkdir(config.weights) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists(config.logs): os.mkdir(config.logs) if not os.path.exists(config.weights + config.model_name + os.sep +str(fold) + os.sep): os.makedirs(config.weights + config.model_name + os.sep +str(fold) + os.sep) if not os.path.exists(config.best_models + config.model_name + os.sep +str(fold) + os.sep): os.makedirs(config.best_models + config.model_name + os.sep +str(fold) + os.sep) #4.2 get model and optimizer model = get_net() #model = torch.nn.DataParallel(model) model.cuda() #optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=config.weight_decay) optimizer = optim.Adam(model.parameters(),lr = config.lr,amsgrad=True,weight_decay=config.weight_decay) criterion = nn.CrossEntropyLoss().cuda() #criterion = FocalLoss().cuda() log = Logger() log.open(config.logs + "log_train.txt",mode="a") log.write("\n----------------------------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 51)) #4.3 some parameters for K-fold and restart model start_epoch = 0 best_precision1 = 0 best_precision_save = 0 resume = False #4.4 restart the training process if resume: checkpoint = torch.load(config.best_models + str(fold) + "/model_best.pth.tar") start_epoch = checkpoint["epoch"] fold = checkpoint["fold"] best_precision1 = checkpoint["best_precision1"] model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) #4.5 get files and split for K-fold dataset #4.5.1 read files train_ = get_files(config.train_data,"train") #val_data_list = get_files(config.val_data,"val") test_files = get_files(config.test_data,"test") """ #4.5.2 split split_fold = StratifiedKFold(n_splits=3) folds_indexes = split_fold.split(X=origin_files["filename"],y=origin_files["label"]) folds_indexes = np.array(list(folds_indexes)) fold_index = folds_indexes[fold] #4.5.3 using fold index to split for train data and val data train_data_list = pd.concat([origin_files["filename"][fold_index[0]],origin_files["label"][fold_index[0]]],axis=1) val_data_list = pd.concat([origin_files["filename"][fold_index[1]],origin_files["label"][fold_index[1]]],axis=1) """ train_data_list,val_data_list = train_test_split(train_,test_size = 0.15,stratify=train_["label"]) #4.5.4 load dataset train_dataloader = DataLoader(ZiyiDataset(train_data_list),batch_size=config.batch_size,shuffle=True,collate_fn=collate_fn,pin_memory=True) val_dataloader = DataLoader(ZiyiDataset(val_data_list,train=False),batch_size=config.batch_size,shuffle=True,collate_fn=collate_fn,pin_memory=False) test_dataloader = DataLoader(ZiyiDataset(test_files,test=True),batch_size=1,shuffle=False,pin_memory=False) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,"max",verbose=1,patience=3) scheduler = optim.lr_scheduler.StepLR(optimizer,step_size = 10,gamma=0.1) # optim.lr_scheduler 提供了基于多种epoch数目调整学习率的方法 # step_size(整数类型): 调整学习率的步长,每过step_size次,更新一次学习率 # gamma(float 类型):学习率下降的乘数因子 #4.5.5.1 define metrics train_losses = AverageMeter() train_top1 = AverageMeter() train_top2 = AverageMeter() valid_loss = [np.inf,0,0] model.train() #logs log.write('** start training here! **\n') log.write(' |------------ VALID -------------|----------- TRAIN -------------|------Accuracy------|------------|\n') log.write('lr iter epoch | loss top-1 top-2 | loss top-1 top-2 | Current Best | time |\n') log.write('-------------------------------------------------------------------------------------------------------------------------------\n') #4.5.5 train start = timer() for epoch in range(start_epoch,config.epochs): # 一个epoch为所有数据迭代一次进入模型拟合的过程,其中又分为batch_size来分批次进行 scheduler.step(epoch) # train #global iter for iter,(input,target) in enumerate(train_dataloader): #4.5.5 switch to continue train process model.train() input = Variable(input).cuda() target = Variable(torch.from_numpy(np.array(target)).long()).cuda() #target = Variable(target).cuda() output = model(input) loss = criterion(output,target) precision1_train,precision2_train = accuracy(output,target,topk=(1,2)) train_losses.update(loss.item(),input.size(0)) train_top1.update(precision1_train[0],input.size(0)) train_top2.update(precision2_train[0],input.size(0)) #backward optimizer.zero_grad() loss.backward() optimizer.step() lr = get_learning_rate(optimizer) print('\r',end='',flush=True) print('%0.4f %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s | %s' % (\ lr, iter/len(train_dataloader) + epoch, epoch, valid_loss[0], valid_loss[1], valid_loss[2], train_losses.avg, train_top1.avg, train_top2.avg,str(best_precision_save), time_to_str((timer() - start),'min')) , end='',flush=True) #evaluate lr = get_learning_rate(optimizer) #evaluate every half epoch valid_loss = evaluate(val_dataloader,model,criterion) is_best = valid_loss[1] > best_precision1 best_precision1 = max(valid_loss[1],best_precision1) try: best_precision_save = best_precision1.cpu().data.numpy() except: pass save_checkpoint({ "epoch":epoch + 1, "model_name":config.model_name, "state_dict":model.state_dict(), "best_precision1":best_precision1, "optimizer":optimizer.state_dict(), "fold":fold, "valid_loss":valid_loss, },is_best,fold) #adjust learning rate #scheduler.step(valid_loss[1]) print("\r",end="",flush=True) log.write('%0.4f %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s | %s' % (\ lr, 0 + epoch, epoch, valid_loss[0], valid_loss[1], valid_loss[2], train_losses.avg, train_top1.avg, train_top2.avg, str(best_precision_save), time_to_str((timer() - start),'min')) ) log.write('\n') time.sleep(0.01) best_model = torch.load(config.best_models + os.sep+config.model_name+os.sep+ str(fold) +os.sep+ 'model_best.pth.tar') model.load_state_dict(best_model["state_dict"]) test(test_dataloader,model,fold) if __name__ =="__main__": main()
使用Pytorch进行图像分类,AI challenger 农作物病害分类竞赛源码解读的更多相关文章
- [源码解读] ResNet源码解读(pytorch)
自己看读完pytorch封装的源码后,自己又重新写了一边(模仿其书写格式), 一些问题在代码中说明. import torch import torchvision import argparse i ...
- PyTorch源码解读之torchvision.transforms(转)
原文地址:https://blog.csdn.net/u014380165/article/details/79167753 版权声明:本文为博主原创文章,未经博主允许不得转载. https://bl ...
- PyTorch源码解读之torchvision.models(转)
原文地址:https://blog.csdn.net/u014380165/article/details/79119664 PyTorch框架中有一个非常重要且好用的包:torchvision,该包 ...
- PyTorch源码解读之torch.utils.data.DataLoader(转)
原文链接 https://blog.csdn.net/u014380165/article/details/79058479 写得特别好!最近正好在学习pytorch,学习一下! PyTorch中数据 ...
- pytorch bert 源码解读
https://daiwk.github.io/posts/nlp-bert.html 目录 概述 BERT 模型架构 Input Representation Pre-training Tasks ...
- [源码解析] PyTorch 分布式(1) --- 数据加载之DistributedSampler
[源码解析] PyTorch 分布式(1) --- 数据加载之DistributedSampler 目录 [源码解析] PyTorch 分布式(1) --- 数据加载之DistributedSampl ...
- [源码解析] PyTorch 分布式(2) --- 数据加载之DataLoader
[源码解析] PyTorch 分布式(2) --- 数据加载之DataLoader 目录 [源码解析] PyTorch 分布式(2) --- 数据加载之DataLoader 0x00 摘要 0x01 ...
- [源码解析] PyTorch 分布式之弹性训练(1) --- 总体思路
[源码解析] PyTorch 分布式之弹性训练(1) --- 总体思路 目录 [源码解析] PyTorch 分布式之弹性训练(1) --- 总体思路 0x00 摘要 0x01 痛点 0x02 难点 0 ...
- [源码解析] PyTorch 分布式之弹性训练(6)---监控/容错
[源码解析] PyTorch 分布式之弹性训练(6)---监控/容错 目录 [源码解析] PyTorch 分布式之弹性训练(6)---监控/容错 0x00 摘要 0x01 总体逻辑 1.1 Node集 ...
随机推荐
- [No000018F]Vim自动缩进配置、原理和tab键替换空格-Vim使用技巧(4)
一.Vim缩进介绍 在没有设置Vim自动缩进的条件下,可以手动使用Vim命令对特定行进行缩进处理.在Vim插入模式下,按下 Tab 键时默认会输入一个制表符,可通过Vim配置项将 Tab 替换为空格, ...
- [No000017A]改善C#程序的建议3:在C#中选择正确的集合进行编码
要选择正确的集合,我们首先要了解一些数据结构的知识.所谓数据结构,就是相互之间存在一种或多种特定关系的数据元素的集合.结合下图,我们看一下对集合的分类. 集合分类 在上图中,可以看到,集合总体上分为线 ...
- [No000016A]CSS常用三种选择器
1.HTML Tag p{color:red;} 2.id #myid{color:red;} 3.class .myclass{color:red;} CSS常用文本样式属性 color font- ...
- tensorflow一些常用函数的使用注意
tf.abs() 求tensor中数据的绝对值 tf.sign() 每一个数据都执行sigmod函数,得到对应的数值 tf.reduce_sum() 对不同维度数据求和.注意:1:求和每一行 0:求和 ...
- splash 安装
搞定NVIDIA显卡后,开始弄splash 根据 https://github.com/paperManu/splash 提示 最简安装就是用apt sudo apt install flatpak ...
- [skill][telnet] 用telnet获取一个网页
一直也搞不懂, telnet到底是干嘛用的. 然而, 它可以得到一个网页. /home/tong/Data/performance_test [tong@T7] [:] > telnet nyu ...
- FlinkCEP - Complex event processing for Flink
https://ci.apache.org/projects/flink/flink-docs-release-1.3/dev/libs/cep.html 首先目的是匹配pattern sequenc ...
- Dom4j与sax 简单对比
Dom4j与sax之间的对比 dom4j不适合大文件的解析,因为它是一下子将文件加载到内存中,所以有可能出现内存溢出,sax是基于事件来对xml进行解析的,所以他可以解析大文件的xml,也正是因为如此 ...
- idea+maven+springboot+mybatis+springmvc+shiro
springboot就是把创建项目简单化,省去了以往的配置mybatis.springmvc的繁琐过程. 搭建web应用三个主要功能,请求和响应,数据库交互,权限配置. 一.idea创建项目 (1) ...
- AT命令text模式发送中文
AT命令text模式发送中文 AT+CSCS=? 查询支持哪些编码 设置编码和编码格式等 AT+CMGF=1 //TEXT 模式 //AT+CSCS="UCS2" //设置编码 A ...