AIR

深度学习对数据集的预处理

因为在使用神经网络的时候常常采用的图片数据集,常常是一个尺寸相同的,但是我们下载来的数据集往往尺寸不一定相同。所以我们应该转化为相同尺寸的数据集。笔者首先考虑过用cv2.resize()把图片变为等尺寸的,在同torch.form_numpy()转化成tensor来出来,但是resize改变了图片等的比例,所以在神经网络中的拟合出的结果可能不是我们所希望的。

所以我们采用一下的方法:

  1. 首先设置一个图片的目标尺寸
  2. 把图片以最短边按比例缩小
  3. 然后随机剪裁为目标尺寸

代码环境:python3.7.4,pytorch1.4.0,jupyter notebook

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#!/usr/bin/env python
# coding: utf-8

# In[1]:


from __future__ import print_function,division
import os
import torch
import pandas as pd
from skimage import io,transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms,utils
import warnings


# In[2]:


warnings.filterwarnings('ignore')


# In[3]:


plt.ion()


# In[4]:


landmarks_frame=pd.read_csv('data/faces/face_landmarks.csv')
# https://download.pytorch.org/tutorial/faces.zip 数据集下载地址
# 把数据放在data文件夹下
n=65
img_name=landmarks_frame.iloc[n,0]
landmarks=landmarks_frame.iloc[n,1:]
landmarks=np.asarray(landmarks)
landmarks=landmarks.astype('float').reshape(-1,2)
# 转化成n行2列的形式
print('Image name: {}'.format(img_name))
print('Landmarks shape: {}'.format(landmarks.shape))
print('First 4 Landmarks: {}'.format(landmarks[:4]))


# In[5]:


def show_landmarks(image, landmarks):
plt.imshow(image)
plt.scatter(landmarks[:,0],landmarks[:,1],s=10,marker='.',c='red')
plt.pause(0.001)


# In[6]:


plt.figure()
show_landmarks(io.imread(os.path.join('data/faces/', img_name)),
landmarks)
plt.show()


# In[7]:


class FaceLandmarksDataset(Dataset):
def __init__(self,csv_file,root_dir,transform=None):
'''
:param csv_file: 带注释带csv文件路径
:param root_dir: 所有图像的目录
:param transform: (可选)在一个样本上转换
'''
self.landmarks_frame=pd.read_csv(csv_file)
self.root_dir=root_dir
self.transform=transform

def __len__(self):
return len(self.landmarks_frame)

def __getitem__(self, idx):
if torch.is_tensor(idx):
idx=idx.tolist() # 将张量作为(嵌套的)列表返回
img_name=os.path.join(self.root_dir,
self.landmarks_frame.iloc[idx,0]) # 图片地址
image=io.imread(img_name)
landmarks=self.landmarks_frame.iloc[idx,1:] # 图片的标记点
landmarks=np.array([landmarks])
landmarks=landmarks.astype('float').reshape(-1,2)
sample={'image':image,'landmarks':landmarks}
if self.transform:
sample=self.transform(sample) # 转置
return sample


# In[8]:


face_dataset=FaceLandmarksDataset(csv_file='data/faces/face_landmarks.csv',
root_dir='data/faces/')
fig=plt.figure()
for i in range(len(face_dataset)):
sample=face_dataset[i]
print(i,sample['image'].shape,sample['landmarks'].shape)
ax=plt.subplot(1,4,i+1)
plt.tight_layout()
ax.set_title('sample #{}'.format(i))
ax.axis('off')
show_landmarks(**sample) # dict输入
if i==3: # 展示前4组图片
plt.show()
break



# In[9]:


class Rescale(object):
"""
把图片缩放为相同的大小
如果为元组,则输出与output_size匹配。
如果为int,则将较小的图像边缘与output_size匹配,并保持宽高比相同。
参数:
output_size:输出大小
"""
def __init__(self, output_size):
assert isinstance(output_size,(int,tuple))
self.output_size=output_size

def __call__(self, sample):
image, landmarks = sample['image'], sample['landmarks']
h, w = image.shape[:2]
if isinstance(self.output_size, int): # 如果是整型,将较小的图像边缘与output_size匹配,并保持宽高比相同
if h > w:
new_h,new_w=self.output_size*h/w,self.output_size
else:
new_h,new_w=self.output_size,self.output_size*w/h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h),int(new_w)
img = transform.resize(image, (new_h, new_w))
# h and w are swapped for landmarks because for images,
# x and y axes are axis 1 and 0 respectively
landmarks = landmarks*[new_w / w, new_h / h] # 同时把标记按比例缩小
return {'image': img, 'landmarks': landmarks}


# In[10]:


class RandomCrop(object):
"""
随机裁剪图片
Args:
output_size (tuple or int):期望的输入如果是整形则裁剪成正方形
"""
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
assert len(output_size) == 2
self.output_size = output_size

def __call__(self, sample):
image, landmarks = sample['image'], sample['landmarks']
h, w = image.shape[:2]
new_h, new_w = self.output_size
top = np.random.randint(0, h - new_h) # 在0到h-new_h之间产生随机数
left = np.random.randint(0, w - new_w)
image = image[top: top + new_h,
left: left + new_w] # 随机剪裁的范围
landmarks = landmarks - [left, top]
return {'image': image, 'landmarks': landmarks}


# In[11]:


class ToTensor(object):
"""
把darray转成tensor
"""
def __call__(self, sample):
image, landmarks = sample['image'], sample['landmarks']
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1)) # 把numpy的格式转化成tensor
return {'image': torch.from_numpy(image),
'landmarks': torch.from_numpy(landmarks)}


# In[12]:


scale = Rescale(256)
crop = RandomCrop(128)
composed = transforms.Compose([Rescale(256),
RandomCrop(224)])
# 在每一个样本图片上应用
fig = plt.figure()
sample = face_dataset[65]
for i, tsfrm in enumerate([scale, crop, composed]):
transformed_sample = tsfrm(sample)
ax = plt.subplot(1, 3, i + 1)
plt.tight_layout()
ax.set_title(type(tsfrm).__name__)
show_landmarks(**transformed_sample)
plt.show()


# In[13]:


transformed_dataset=FaceLandmarksDataset(csv_file='data/faces/face_landmarks.csv',
root_dir='data/faces/',
transform=transforms.Compose([Rescale(256),
RandomCrop(224),
ToTensor()]))
for i in range(len(transformed_dataset)):
sample=transformed_dataset[i]
print(i,sample['image'].size(),sample['landmarks'].size())
if i==3:
break


# In[14]:


dataloader = DataLoader(transformed_dataset,
batch_size=4,
shuffle=True,
num_workers=4) # 用4个进程来加载数据每个批次4个并洗牌


# In[15]:


def show_landmarks_batch(sample_batched):
# 在一组图片中使用标记展示图片
images_batch,landmarks_batch=sample_batched['image'],sample_batched['landmarks']
batch_size=len(images_batch)
im_size=images_batch.size(2)
grid_border_size=2
grid=utils.make_grid(images_batch)
plt.imshow(grid.numpy().transpose((1,2,0)))
for i in range(batch_size):
plt.scatter(landmarks_batch[i,:,0].numpy()+i*im_size+(i+1)*grid_border_size,
landmarks_batch[i,:,1].numpy()+grid_border_size,
s=10,
marker='.',
c='red')
plt.title('Batch from dataloader')


# In[16]:


for i_batch, sample_batched in enumerate(dataloader):
print(i_batch,sample_batched['image'].size(),sample_batched['landmarks'].size())
if i_batch==3:
plt.figure()
show_landmarks_batch(sample_batched)
plt.axis('off')
plt.ioff()
plt.show()
break

 Comments


Blog content follows the Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) License

Use Material X as theme , total visits times .
载入天数...载入时分秒...