Skip to content
Snippets Groups Projects
Unverified Commit 931db990 authored by Mashiro's avatar Mashiro Committed by GitHub
Browse files

[Enhance] Enhance img data preprocessor (#290)

* fix BaseDataPreprocessor

* fix BaseDataPreprocessor

* change device type to torch.device

* change device type to torch.device

* fix cpu method of base model

* Allow ImgDataPreprocessor do not normalize

* remove unnecessary type ignore

* make mean and std optional

* refine docstring
parent 8b3675a2
No related branches found
No related tags found
No related merge requests found
......@@ -151,16 +151,14 @@ class ImgDataPreprocessor(BaseDataPreprocessor):
constructor of :class:`BaseDataset`.
Args:
mean (Sequence[float or int]): The pixel mean of image channels. If
``bgr_to_rgb=True`` it means the mean value of R, G, B channels.
If ``mean`` and ``std`` are not specified, ``ImgDataPreprocessor``
will normalize images to [-1, 1]. Defaults to (127.5, 127.5,
127.5).
std (Sequence[float or int]): The pixel standard deviation of image
channels. If ``bgr_to_rgb=True`` it means the standard deviation of
R, G, B channels. If ``mean`` and ``std`` are not specified,
ImgDataPreprocessor will normalize images to [-1, 1]. Defaults
to (127.5, 127.5, 127.5).
mean (Sequence[float or int], optional): The pixel mean of image
channels. If ``bgr_to_rgb=True`` it means the mean value of R,
G, B channels. If it is not specified, images will not be
normalized. Defaults None.
std (Sequence[float or int], optional): The pixel standard deviation of
image channels. If ``bgr_to_rgb=True`` it means the standard
deviation of R, G, B channels. If it is not specified, images will
not be normalized. Defaults None.
pad_size_divisor (int): The size of padded image should be
divisible by ``pad_size_divisor``. Defaults to 1.
pad_value (float or int): The padded pixel value. Defaults to 0.
......@@ -168,27 +166,40 @@ class ImgDataPreprocessor(BaseDataPreprocessor):
Defaults to False.
rgb_to_bgr (bool): whether to convert image from RGB to RGB.
Defaults to False.
Note:
if images do not need to be normalized, `std` and `mean` should be
both set to None, otherwise both of them should be set to a tuple of
corresponding values.
"""
def __init__(self,
mean: Sequence[Union[float, int]] = (127.5, 127.5, 127.5),
std: Sequence[Union[float, int]] = (127.5, 127.5, 127.5),
mean: Optional[Sequence[Union[float, int]]] = None,
std: Optional[Sequence[Union[float, int]]] = None,
pad_size_divisor: int = 1,
pad_value: Union[float, int] = 0,
bgr_to_rgb: bool = False,
rgb_to_bgr: bool = False):
super().__init__()
assert len(mean) == 3 or len(mean) == 1, (
'The length of mean should be 1 or 3 to be compatible with RGB '
f'or gray image, but got {len(mean)}')
assert len(std) == 3 or len(std) == 1, (
'The length of std should be 1 or 3 to be compatible with RGB '
f'or gray image, but got {len(std)}')
assert not (bgr_to_rgb and rgb_to_bgr), (
'`bgr2rgb` and `rgb2bgr` cannot be set to True at the same time')
assert (mean is None) == (std is None), (
'mean and std should be both None or tuple')
if mean is not None:
assert len(mean) == 3 or len(mean) == 1, (
'The length of mean should be 1 or 3 to be compatible with '
f'RGB or gray image, but got {len(mean)}')
assert len(std) == 3 or len(std) == 1, ( # type: ignore
'The length of std should be 1 or 3 to be compatible with RGB ' # type: ignore # noqa: E501
f'or gray image, but got {len(std)}')
self._enable_normalize = True
self.register_buffer('mean',
torch.tensor(mean).view(-1, 1, 1), False)
self.register_buffer('std',
torch.tensor(std).view(-1, 1, 1), False)
else:
self._enable_normalize = False
self.channel_conversion = rgb_to_bgr or bgr_to_rgb
self.register_buffer('mean', torch.tensor(mean).view(-1, 1, 1), False)
self.register_buffer('std', torch.tensor(std).view(-1, 1, 1), False)
self.pad_size_divisor = pad_size_divisor
self.pad_value = pad_value
......@@ -214,7 +225,8 @@ class ImgDataPreprocessor(BaseDataPreprocessor):
if self.channel_conversion:
inputs = [_input[[2, 1, 0], ...] for _input in inputs]
# Normalization.
inputs = [(_input - self.mean) / self.std for _input in inputs]
if self._enable_normalize:
inputs = [(_input - self.mean) / self.std for _input in inputs]
# Pad and stack Tensor.
batch_inputs = stack_batch(inputs, self.pad_size_divisor,
self.pad_value)
......
......@@ -55,10 +55,8 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
# initiate model without `preprocess_cfg`
data_processor = ImgDataPreprocessor()
self.assertFalse(data_processor.channel_conversion)
assert_allclose(data_processor.mean,
torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1))
assert_allclose(data_processor.std,
torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1))
self.assertFalse(hasattr(data_processor, 'mean'))
self.assertFalse(hasattr(data_processor, 'std'))
self.assertEqual(data_processor.pad_size_divisor, 1)
assert_allclose(data_processor.pad_value, torch.tensor(0))
# initiate model with preprocess_cfg` and feat keys
......@@ -68,6 +66,7 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
std=[255, 255, 255],
pad_size_divisor=16,
pad_value=10)
self.assertTrue(data_processor._enable_normalize)
self.assertTrue(data_processor.channel_conversion, True)
assert_allclose(data_processor.mean,
torch.tensor([0, 0, 0]).view(-1, 1, 1))
......@@ -77,14 +76,26 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
self.assertEqual(data_processor.pad_size_divisor, 16)
with self.assertRaisesRegex(AssertionError, 'The length of mean'):
ImgDataPreprocessor(mean=(1, 2))
ImgDataPreprocessor(mean=(1, 2), std=(1, 2, 3))
with self.assertRaisesRegex(AssertionError, 'The length of std'):
ImgDataPreprocessor(std=(1, 2))
ImgDataPreprocessor(mean=(1, 2, 3), std=(1, 2))
with self.assertRaisesRegex(AssertionError, '`bgr2rgb` and `rgb2bgr`'):
ImgDataPreprocessor(bgr_to_rgb=True, rgb_to_bgr=True)
with self.assertRaisesRegex(AssertionError, 'mean and std should be'):
ImgDataPreprocessor(
bgr_to_rgb=True,
mean=None,
std=[255, 255, 255],
pad_size_divisor=16,
pad_value=10)
data_processor = ImgDataPreprocessor(
bgr_to_rgb=True, pad_size_divisor=16, pad_value=10)
self.assertFalse(data_processor._enable_normalize)
def test_forward(self):
# Test `pad_value`, `to_rgb`, `pad_size_divisor`.
data_preprocessor = ImgDataPreprocessor(
......@@ -104,12 +115,32 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
]
std = torch.tensor([1, 2, 3]).view(-1, 1, 1)
inputs1 = (inputs1[[2, 1, 0], ...] - 127.5) / std
inputs2 = (inputs2[[2, 1, 0], ...] - 127.5) / std
inputs1 = F.pad(inputs1, (0, 6, 0, 6), value=10)
inputs2 = F.pad(inputs2, (0, 1, 0, 1), value=10)
target_inputs1 = (inputs1.clone()[[2, 1, 0], ...] - 127.5) / std
target_inputs2 = (inputs2.clone()[[2, 1, 0], ...] - 127.5) / std
target_inputs1 = F.pad(target_inputs1, (0, 6, 0, 6), value=10)
target_inputs2 = F.pad(target_inputs2, (0, 1, 0, 1), value=10)
target_inputs = [target_inputs1, target_inputs2]
inputs, data_samples = data_preprocessor(data, True)
target_data_samples = [data_sample1, data_sample2]
for input_, data_sample, target_input, target_data_sample in zip(
inputs, data_samples, target_inputs, target_data_samples):
assert_allclose(input_, target_input)
assert_allclose(data_sample.bboxes, target_data_sample.bboxes)
# Test image without normalization.
data_preprocessor = ImgDataPreprocessor(
pad_size_divisor=16,
pad_value=10,
rgb_to_bgr=True,
)
target_inputs1 = (inputs1.clone()[[2, 1, 0], ...])
target_inputs2 = (inputs2.clone()[[2, 1, 0], ...])
target_inputs1 = F.pad(target_inputs1, (0, 6, 0, 6), value=10)
target_inputs2 = F.pad(target_inputs2, (0, 1, 0, 1), value=10)
target_inputs = [inputs1, inputs2]
target_inputs = [target_inputs1, target_inputs2]
inputs, data_samples = data_preprocessor(data, True)
target_data_samples = [data_sample1, data_sample2]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment