[Enhance] Enhance img data preprocessor (#290)

* fix BaseDataPreprocessor * fix BaseDataPreprocessor * change device type to torch.device * change device type to torch.device * fix cpu method of base model * Allow ImgDataPreprocessor do not normalize * remove unnecessary type ignore * make mean and std optional * refine docstring

[Enhance] Enhance img data preprocessor (#290)
* fix BaseDataPreprocessor * fix BaseDataPreprocessor * change device type to torch.device * change device type to torch.device * fix cpu method of base model * Allow ImgDataPreprocessor do not normalize * remove unnecessary type ignore * make mean and std optional * refine docstring
931db990 · Mashiro · GitHub · 8b3675a2 · 931db990 · 931db990
Unverified Commit 931db990 authored 2 years ago by Mashiro Committed by GitHub 2 years ago
--- a/mmengine/model/base_model/data_preprocessor.py
+++ b/mmengine/model/base_model/data_preprocessor.py
@@ -151,16 +151,14 @@ class ImgDataPreprocessor(BaseDataPreprocessor):
        constructor of :class:`BaseDataset`.

    Args:
-        mean (Sequence[float or int]): The pixel mean of image channels. If
-            ``bgr_to_rgb=True`` it means the mean value of R, G, B channels.
-            If ``mean`` and ``std`` are not specified, ``ImgDataPreprocessor``
-            will normalize images to [-1, 1]. Defaults to (127.5, 127.5,
-            127.5).
-        std (Sequence[float or int]): The pixel standard deviation of image
-            channels. If ``bgr_to_rgb=True`` it means the standard deviation of
-            R, G, B channels. If ``mean`` and ``std`` are not specified,
-            ImgDataPreprocessor will normalize images to [-1, 1]. Defaults
-            to (127.5, 127.5, 127.5).
+        mean (Sequence[float or int], optional): The pixel mean of image
+            channels. If ``bgr_to_rgb=True`` it means the mean value of R,
+            G, B channels. If it is not specified, images will not be
+            normalized. Defaults None.
+        std (Sequence[float or int], optional): The pixel standard deviation of
+            image channels. If ``bgr_to_rgb=True`` it means the standard
+            deviation of R, G, B channels. If it is not specified, images will
+            not be normalized. Defaults None.
        pad_size_divisor (int): The size of padded image should be
            divisible by ``pad_size_divisor``. Defaults to 1.
        pad_value (float or int): The padded pixel value. Defaults to 0.
@@ -168,27 +166,40 @@ class ImgDataPreprocessor(BaseDataPreprocessor):
            Defaults to False.
        rgb_to_bgr (bool): whether to convert image from RGB to RGB.
            Defaults to False.
+
+    Note:
+        if images do not need to be normalized, `std` and `mean` should be
+        both set to None, otherwise both of them should be set to a tuple of
+        corresponding values.
    """

    def __init__(self,
-                 mean: Sequence[Union[float, int]] = (127.5, 127.5, 127.5),
-                 std: Sequence[Union[float, int]] = (127.5, 127.5, 127.5),
+                 mean: Optional[Sequence[Union[float, int]]] = None,
+                 std: Optional[Sequence[Union[float, int]]] = None,
                 pad_size_divisor: int = 1,
                 pad_value: Union[float, int] = 0,
                 bgr_to_rgb: bool = False,
                 rgb_to_bgr: bool = False):
        super().__init__()
-        assert len(mean) == 3 or len(mean) == 1, (
-            'The length of mean should be 1 or 3 to be compatible with RGB '
-            f'or gray image, but got {len(mean)}')
-        assert len(std) == 3 or len(std) == 1, (
-            'The length of std should be 1 or 3 to be compatible with RGB '
-            f'or gray image, but got {len(std)}')
        assert not (bgr_to_rgb and rgb_to_bgr), (
            '`bgr2rgb` and `rgb2bgr` cannot be set to True at the same time')
+        assert (mean is None) == (std is None), (
+            'mean and std should be both None or tuple')
+        if mean is not None:
+            assert len(mean) == 3 or len(mean) == 1, (
+                'The length of mean should be 1 or 3 to be compatible with '
+                f'RGB or gray image, but got {len(mean)}')
+            assert len(std) == 3 or len(std) == 1, (  # type: ignore
+                'The length of std should be 1 or 3 to be compatible with RGB '  # type: ignore # noqa: E501
+                f'or gray image, but got {len(std)}')
+            self._enable_normalize = True
+            self.register_buffer('mean',
+                                 torch.tensor(mean).view(-1, 1, 1), False)
+            self.register_buffer('std',
+                                 torch.tensor(std).view(-1, 1, 1), False)
+        else:
+            self._enable_normalize = False
        self.channel_conversion = rgb_to_bgr or bgr_to_rgb
-        self.register_buffer('mean', torch.tensor(mean).view(-1, 1, 1), False)
-        self.register_buffer('std', torch.tensor(std).view(-1, 1, 1), False)
        self.pad_size_divisor = pad_size_divisor
        self.pad_value = pad_value

@@ -214,7 +225,8 @@ class ImgDataPreprocessor(BaseDataPreprocessor):
        if self.channel_conversion:
            inputs = [_input[[2, 1, 0], ...] for _input in inputs]
        # Normalization.
-        inputs = [(_input - self.mean) / self.std for _input in inputs]
+        if self._enable_normalize:
+            inputs = [(_input - self.mean) / self.std for _input in inputs]
        # Pad and stack Tensor.
        batch_inputs = stack_batch(inputs, self.pad_size_divisor,
                                   self.pad_value)

--- a/tests/test_model/test_base_model/test_data_preprocessor.py
+++ b/tests/test_model/test_base_model/test_data_preprocessor.py
@@ -55,10 +55,8 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
        # initiate model without `preprocess_cfg`
        data_processor = ImgDataPreprocessor()
        self.assertFalse(data_processor.channel_conversion)
-        assert_allclose(data_processor.mean,
-                        torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1))
-        assert_allclose(data_processor.std,
-                        torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1))
+        self.assertFalse(hasattr(data_processor, 'mean'))
+        self.assertFalse(hasattr(data_processor, 'std'))
        self.assertEqual(data_processor.pad_size_divisor, 1)
        assert_allclose(data_processor.pad_value, torch.tensor(0))
        # initiate model with preprocess_cfg` and feat keys
@@ -68,6 +66,7 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
            std=[255, 255, 255],
            pad_size_divisor=16,
            pad_value=10)
+        self.assertTrue(data_processor._enable_normalize)
        self.assertTrue(data_processor.channel_conversion, True)
        assert_allclose(data_processor.mean,
                        torch.tensor([0, 0, 0]).view(-1, 1, 1))
@@ -77,14 +76,26 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
        self.assertEqual(data_processor.pad_size_divisor, 16)

        with self.assertRaisesRegex(AssertionError, 'The length of mean'):
-            ImgDataPreprocessor(mean=(1, 2))
+            ImgDataPreprocessor(mean=(1, 2), std=(1, 2, 3))

        with self.assertRaisesRegex(AssertionError, 'The length of std'):
-            ImgDataPreprocessor(std=(1, 2))
+            ImgDataPreprocessor(mean=(1, 2, 3), std=(1, 2))

        with self.assertRaisesRegex(AssertionError, '`bgr2rgb` and `rgb2bgr`'):
            ImgDataPreprocessor(bgr_to_rgb=True, rgb_to_bgr=True)

+        with self.assertRaisesRegex(AssertionError, 'mean and std should be'):
+            ImgDataPreprocessor(
+                bgr_to_rgb=True,
+                mean=None,
+                std=[255, 255, 255],
+                pad_size_divisor=16,
+                pad_value=10)
+
+        data_processor = ImgDataPreprocessor(
+            bgr_to_rgb=True, pad_size_divisor=16, pad_value=10)
+        self.assertFalse(data_processor._enable_normalize)
+
    def test_forward(self):
        # Test `pad_value`, `to_rgb`, `pad_size_divisor`.
        data_preprocessor = ImgDataPreprocessor(
@@ -104,12 +115,32 @@ class TestImageDataPreprocessor(TestBaseDataPreprocessor):
        ]

        std = torch.tensor([1, 2, 3]).view(-1, 1, 1)
-        inputs1 = (inputs1[[2, 1, 0], ...] - 127.5) / std
-        inputs2 = (inputs2[[2, 1, 0], ...] - 127.5) / std
-        inputs1 = F.pad(inputs1, (0, 6, 0, 6), value=10)
-        inputs2 = F.pad(inputs2, (0, 1, 0, 1), value=10)
+        target_inputs1 = (inputs1.clone()[[2, 1, 0], ...] - 127.5) / std
+        target_inputs2 = (inputs2.clone()[[2, 1, 0], ...] - 127.5) / std
+        target_inputs1 = F.pad(target_inputs1, (0, 6, 0, 6), value=10)
+        target_inputs2 = F.pad(target_inputs2, (0, 1, 0, 1), value=10)
+
+        target_inputs = [target_inputs1, target_inputs2]
+        inputs, data_samples = data_preprocessor(data, True)
+
+        target_data_samples = [data_sample1, data_sample2]
+        for input_, data_sample, target_input, target_data_sample in zip(
+                inputs, data_samples, target_inputs, target_data_samples):
+            assert_allclose(input_, target_input)
+            assert_allclose(data_sample.bboxes, target_data_sample.bboxes)
+
+        # Test image without normalization.
+        data_preprocessor = ImgDataPreprocessor(
+            pad_size_divisor=16,
+            pad_value=10,
+            rgb_to_bgr=True,
+        )
+        target_inputs1 = (inputs1.clone()[[2, 1, 0], ...])
+        target_inputs2 = (inputs2.clone()[[2, 1, 0], ...])
+        target_inputs1 = F.pad(target_inputs1, (0, 6, 0, 6), value=10)
+        target_inputs2 = F.pad(target_inputs2, (0, 1, 0, 1), value=10)

-        target_inputs = [inputs1, inputs2]
+        target_inputs = [target_inputs1, target_inputs2]
        inputs, data_samples = data_preprocessor(data, True)

        target_data_samples = [data_sample1, data_sample2]