diff --git a/mmengine/model/base_model/data_preprocessor.py b/mmengine/model/base_model/data_preprocessor.py index cf94b54de29d24713f88deb8ff7ea4718aa80f12..7242fde94951f708e747b5e5795ca7f520358524 100644 --- a/mmengine/model/base_model/data_preprocessor.py +++ b/mmengine/model/base_model/data_preprocessor.py @@ -151,16 +151,14 @@ class ImgDataPreprocessor(BaseDataPreprocessor): constructor of :class:`BaseDataset`. Args: - mean (Sequence[float or int]): The pixel mean of image channels. If - ``bgr_to_rgb=True`` it means the mean value of R, G, B channels. - If ``mean`` and ``std`` are not specified, ``ImgDataPreprocessor`` - will normalize images to [-1, 1]. Defaults to (127.5, 127.5, - 127.5). - std (Sequence[float or int]): The pixel standard deviation of image - channels. If ``bgr_to_rgb=True`` it means the standard deviation of - R, G, B channels. If ``mean`` and ``std`` are not specified, - ImgDataPreprocessor will normalize images to [-1, 1]. Defaults - to (127.5, 127.5, 127.5). + mean (Sequence[float or int], optional): The pixel mean of image + channels. If ``bgr_to_rgb=True`` it means the mean value of R, + G, B channels. If it is not specified, images will not be + normalized. Defaults None. + std (Sequence[float or int], optional): The pixel standard deviation of + image channels. If ``bgr_to_rgb=True`` it means the standard + deviation of R, G, B channels. If it is not specified, images will + not be normalized. Defaults None. pad_size_divisor (int): The size of padded image should be divisible by ``pad_size_divisor``. Defaults to 1. pad_value (float or int): The padded pixel value. Defaults to 0. @@ -168,27 +166,40 @@ class ImgDataPreprocessor(BaseDataPreprocessor): Defaults to False. rgb_to_bgr (bool): whether to convert image from RGB to RGB. Defaults to False. + + Note: + if images do not need to be normalized, `std` and `mean` should be + both set to None, otherwise both of them should be set to a tuple of + corresponding values. """ def __init__(self, - mean: Sequence[Union[float, int]] = (127.5, 127.5, 127.5), - std: Sequence[Union[float, int]] = (127.5, 127.5, 127.5), + mean: Optional[Sequence[Union[float, int]]] = None, + std: Optional[Sequence[Union[float, int]]] = None, pad_size_divisor: int = 1, pad_value: Union[float, int] = 0, bgr_to_rgb: bool = False, rgb_to_bgr: bool = False): super().__init__() - assert len(mean) == 3 or len(mean) == 1, ( - 'The length of mean should be 1 or 3 to be compatible with RGB ' - f'or gray image, but got {len(mean)}') - assert len(std) == 3 or len(std) == 1, ( - 'The length of std should be 1 or 3 to be compatible with RGB ' - f'or gray image, but got {len(std)}') assert not (bgr_to_rgb and rgb_to_bgr), ( '`bgr2rgb` and `rgb2bgr` cannot be set to True at the same time') + assert (mean is None) == (std is None), ( + 'mean and std should be both None or tuple') + if mean is not None: + assert len(mean) == 3 or len(mean) == 1, ( + 'The length of mean should be 1 or 3 to be compatible with ' + f'RGB or gray image, but got {len(mean)}') + assert len(std) == 3 or len(std) == 1, ( # type: ignore + 'The length of std should be 1 or 3 to be compatible with RGB ' # type: ignore # noqa: E501 + f'or gray image, but got {len(std)}') + self._enable_normalize = True + self.register_buffer('mean', + torch.tensor(mean).view(-1, 1, 1), False) + self.register_buffer('std', + torch.tensor(std).view(-1, 1, 1), False) + else: + self._enable_normalize = False self.channel_conversion = rgb_to_bgr or bgr_to_rgb - self.register_buffer('mean', torch.tensor(mean).view(-1, 1, 1), False) - self.register_buffer('std', torch.tensor(std).view(-1, 1, 1), False) self.pad_size_divisor = pad_size_divisor self.pad_value = pad_value @@ -214,7 +225,8 @@ class ImgDataPreprocessor(BaseDataPreprocessor): if self.channel_conversion: inputs = [_input[[2, 1, 0], ...] for _input in inputs] # Normalization. - inputs = [(_input - self.mean) / self.std for _input in inputs] + if self._enable_normalize: + inputs = [(_input - self.mean) / self.std for _input in inputs] # Pad and stack Tensor. batch_inputs = stack_batch(inputs, self.pad_size_divisor, self.pad_value) diff --git a/tests/test_model/test_base_model/test_data_preprocessor.py b/tests/test_model/test_base_model/test_data_preprocessor.py index 0639b59c9997ccc7cb94b328ff9841f9d36ab9d1..b82b09fc17cfd7aef1699e51436e7656a5ce2038 100644 --- a/tests/test_model/test_base_model/test_data_preprocessor.py +++ b/tests/test_model/test_base_model/test_data_preprocessor.py @@ -55,10 +55,8 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor): # initiate model without `preprocess_cfg` data_processor = ImgDataPreprocessor() self.assertFalse(data_processor.channel_conversion) - assert_allclose(data_processor.mean, - torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1)) - assert_allclose(data_processor.std, - torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1)) + self.assertFalse(hasattr(data_processor, 'mean')) + self.assertFalse(hasattr(data_processor, 'std')) self.assertEqual(data_processor.pad_size_divisor, 1) assert_allclose(data_processor.pad_value, torch.tensor(0)) # initiate model with preprocess_cfg` and feat keys @@ -68,6 +66,7 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor): std=[255, 255, 255], pad_size_divisor=16, pad_value=10) + self.assertTrue(data_processor._enable_normalize) self.assertTrue(data_processor.channel_conversion, True) assert_allclose(data_processor.mean, torch.tensor([0, 0, 0]).view(-1, 1, 1)) @@ -77,14 +76,26 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor): self.assertEqual(data_processor.pad_size_divisor, 16) with self.assertRaisesRegex(AssertionError, 'The length of mean'): - ImgDataPreprocessor(mean=(1, 2)) + ImgDataPreprocessor(mean=(1, 2), std=(1, 2, 3)) with self.assertRaisesRegex(AssertionError, 'The length of std'): - ImgDataPreprocessor(std=(1, 2)) + ImgDataPreprocessor(mean=(1, 2, 3), std=(1, 2)) with self.assertRaisesRegex(AssertionError, '`bgr2rgb` and `rgb2bgr`'): ImgDataPreprocessor(bgr_to_rgb=True, rgb_to_bgr=True) + with self.assertRaisesRegex(AssertionError, 'mean and std should be'): + ImgDataPreprocessor( + bgr_to_rgb=True, + mean=None, + std=[255, 255, 255], + pad_size_divisor=16, + pad_value=10) + + data_processor = ImgDataPreprocessor( + bgr_to_rgb=True, pad_size_divisor=16, pad_value=10) + self.assertFalse(data_processor._enable_normalize) + def test_forward(self): # Test `pad_value`, `to_rgb`, `pad_size_divisor`. data_preprocessor = ImgDataPreprocessor( @@ -104,12 +115,32 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor): ] std = torch.tensor([1, 2, 3]).view(-1, 1, 1) - inputs1 = (inputs1[[2, 1, 0], ...] - 127.5) / std - inputs2 = (inputs2[[2, 1, 0], ...] - 127.5) / std - inputs1 = F.pad(inputs1, (0, 6, 0, 6), value=10) - inputs2 = F.pad(inputs2, (0, 1, 0, 1), value=10) + target_inputs1 = (inputs1.clone()[[2, 1, 0], ...] - 127.5) / std + target_inputs2 = (inputs2.clone()[[2, 1, 0], ...] - 127.5) / std + target_inputs1 = F.pad(target_inputs1, (0, 6, 0, 6), value=10) + target_inputs2 = F.pad(target_inputs2, (0, 1, 0, 1), value=10) + + target_inputs = [target_inputs1, target_inputs2] + inputs, data_samples = data_preprocessor(data, True) + + target_data_samples = [data_sample1, data_sample2] + for input_, data_sample, target_input, target_data_sample in zip( + inputs, data_samples, target_inputs, target_data_samples): + assert_allclose(input_, target_input) + assert_allclose(data_sample.bboxes, target_data_sample.bboxes) + + # Test image without normalization. + data_preprocessor = ImgDataPreprocessor( + pad_size_divisor=16, + pad_value=10, + rgb_to_bgr=True, + ) + target_inputs1 = (inputs1.clone()[[2, 1, 0], ...]) + target_inputs2 = (inputs2.clone()[[2, 1, 0], ...]) + target_inputs1 = F.pad(target_inputs1, (0, 6, 0, 6), value=10) + target_inputs2 = F.pad(target_inputs2, (0, 1, 0, 1), value=10) - target_inputs = [inputs1, inputs2] + target_inputs = [target_inputs1, target_inputs2] inputs, data_samples = data_preprocessor(data, True) target_data_samples = [data_sample1, data_sample2]