[Feature]: Add parameter schedulers. (#22)

* [Feature]: Add parameter schedulers. * update * update * update * update * add docstring to lr and momentum * resolve comments

[Feature]: Add parameter schedulers. (#22)
7905f039 · RangiLyu · GitHub · 41e1191c · 7905f039 · 7905f039
Unverified Commit 7905f039 authored 3 years ago by RangiLyu Committed by GitHub 3 years ago
--- a/mmengine/optim/scheduler/__init__.py
+++ b/mmengine/optim/scheduler/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .lr_scheduler import (ConstantLR, CosineAnnealingLR, ExponentialLR,
+                           LinearLR, MultiStepLR, StepLR)
+from .momentum_scheduler import (ConstantMomentum, CosineAnnealingMomentum,
+                                 ExponentialMomentum, LinearMomentum,
+                                 MultiStepMomentum, StepMomentum)
+from .param_scheduler import (ConstantParamScheduler,
+                              CosineAnnealingParamScheduler,
+                              ExponentialParamScheduler, LinearParamScheduler,
+                              MultiStepParamScheduler, StepParamScheduler,
+                              _ParamScheduler)
+__all__ = [
+    'ConstantLR', 'CosineAnnealingLR', 'ExponentialLR', 'LinearLR',
+    'MultiStepLR', 'StepLR', 'ConstantMomentum', 'CosineAnnealingMomentum',
+    'ExponentialMomentum', 'LinearMomentum', 'MultiStepMomentum',
+    'StepMomentum', 'ConstantParamScheduler', 'CosineAnnealingParamScheduler',
+    'ExponentialParamScheduler', 'LinearParamScheduler',
+    'MultiStepParamScheduler', 'StepParamScheduler', '_ParamScheduler'
+]
--- a/mmengine/optim/scheduler/lr_scheduler.py
+++ b/mmengine/optim/scheduler/lr_scheduler.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+import torch
+from mmengine.registry import PARAM_SCHEDULERS
+from .param_scheduler import (INF, ConstantParamScheduler,
+                              CosineAnnealingParamScheduler,
+                              ExponentialParamScheduler, LinearParamScheduler,
+                              MultiStepParamScheduler, StepParamScheduler)
+@PARAM_SCHEDULERS.register_module()
+class ConstantLR(ConstantParamScheduler):
+    """Decays the learning rate value of each parameter group by a small
+    constant factor until the number of epoch reaches a pre-defined milestone:
+    ``end``. Notice that such decay can happen simultaneously with other
+    changes to the learning rate value from outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        factor (float): The number we multiply learning rate until the
+            milestone. Defaults to 1./3.
+        begin (int): Step at which to start updating the learning rate.
+            Defaults to 0.
+        end (int): Step at which to stop updating the learning rate.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without state
+            dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled learning rate is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the learning rate for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 factor: float = 1.0 / 3,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='lr',
+            factor=factor,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class CosineAnnealingLR(CosineAnnealingParamScheduler):
+    r"""Set the learning rate of each parameter group using a cosine annealing
+    schedule, where :math:`\eta_{max}` is set to the initial value and
+    :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
+    .. math::
+        \begin{aligned}
+            \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1
+            + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right),
+            & T_{cur} \neq (2k+1)T_{max}; \\
+            \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min})
+            \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right),
+            & T_{cur} = (2k+1)T_{max}.
+        \end{aligned}
+    Notice that because the schedule
+    is defined recursively, the learning rate can be simultaneously modified
+    outside this scheduler by other operators. If the learning rate is set
+    solely by this scheduler, the learning rate at each step becomes:
+    .. math::
+        \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
+        \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right)
+    It has been proposed in
+    `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this
+    only implements the cosine annealing part of SGDR, and not the restarts.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        T_max (int): Maximum number of iterations.
+        eta_min (float): Minimum learning rate. Defaults to 0.
+        begin (int): Step at which to start updating the learning rate.
+            Defaults to 0.
+        end (int): Step at which to stop updating the learning rate.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled learning rate is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the learning rate for each update.
+            Defaults to False.
+    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
+        https://arxiv.org/abs/1608.03983
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 T_max: int,
+                 eta_min: int = 0,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='lr',
+            T_max=T_max,
+            eta_min=eta_min,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class ExponentialLR(ExponentialParamScheduler):
+    """Decays the learning rate of each parameter group by gamma every epoch.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        gamma (float): Multiplicative factor of learning rate decay.
+        begin (int): Step at which to start updating the learning rate.
+            Defaults to 0.
+        end (int): Step at which to stop updating the learning rate.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled learning rate is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the learning rate for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 gamma: float,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='lr',
+            gamma=gamma,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class LinearLR(LinearParamScheduler):
+    """Decays the learning rate of each parameter group by linearly changing
+    small multiplicative factor until the number of epoch reaches a pre-defined
+    milestone: ``end``.
+    Notice that such decay can happen simultaneously with other changes to the
+    learning rate from outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        start_factor (float): The number we multiply learning rate in the
+            first epoch. The multiplication factor changes towards end_factor
+            in the following epochs. Defaults to 1./3.
+        end_factor (float): The number we multiply learning rate at the end
+            of linear changing process. Defaults to 1.0.
+        begin (int): Step at which to start updating the learning rate.
+            Defaults to 0.
+        end (int): Step at which to stop updating the learning rate.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled learning rate is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the learning rate for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 start_factor: float = 1.0 / 3,
+                 end_factor: float = 1.0,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='lr',
+            start_factor=start_factor,
+            end_factor=end_factor,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class MultiStepLR(MultiStepParamScheduler):
+    """Decays the specified learning rate in each parameter group by gamma once
+    the number of epoch reaches one of the milestones. Notice that such decay
+    can happen simultaneously with other changes to the learning rate from
+    outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        milestones (list): List of epoch indices. Must be increasing.
+        gamma (float): Multiplicative factor of learning rate decay.
+            Defaults to 0.1.
+        begin (int): Step at which to start updating the learning rate.
+            Defaults to 0.
+        end (int): Step at which to stop updating the learning rate.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled learning rate is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the learning rate for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 milestones: List[int],
+                 gamma: float = 0.1,
+                 last_step: int = -1,
+                 begin: int = 0,
+                 end: int = INF,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='lr',
+            milestones=milestones,
+            gamma=gamma,
+            last_step=last_step,
+            begin=begin,
+            end=end,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class StepLR(StepParamScheduler):
+    """Decays the learning rate of each parameter group by gamma every
+    step_size epochs. Notice that such decay can happen simultaneously with
+    other changes to the learning rate from outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        step_size (int): Period of learning rate decay.
+        gamma (float): Multiplicative factor of learning rate decay.
+            Defaults to 0.1.
+        begin (int): Step at which to start updating the learning rate.
+            Defaults to 0.
+        end (int): Step at which to stop updating the learning rate.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled learning rate is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the learning rate for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 step_size: int,
+                 gamma: float = 0.1,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='lr',
+            step_size=step_size,
+            gamma=gamma,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
--- a/mmengine/optim/scheduler/momentum_scheduler.py
+++ b/mmengine/optim/scheduler/momentum_scheduler.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+import torch
+from mmengine.registry import PARAM_SCHEDULERS
+from .param_scheduler import (INF, ConstantParamScheduler,
+                              CosineAnnealingParamScheduler,
+                              ExponentialParamScheduler, LinearParamScheduler,
+                              MultiStepParamScheduler, StepParamScheduler)
+@PARAM_SCHEDULERS.register_module()
+class ConstantMomentum(ConstantParamScheduler):
+    """Decays the momentum value of each parameter group by a small constant
+    factor until the number of epoch reaches a pre-defined milestone: ``end``.
+    Notice that such decay can happen simultaneously with other changes to the
+    momentum value from outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        factor (float): The number we multiply momentum until the milestone.
+            Defaults to 1./3.
+        begin (int): Step at which to start updating the momentum.
+            Defaults to 0.
+        end (int): Step at which to stop updating the momentum.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without state
+            dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled momentum is updated by epochs.
+            Defaults to True.
+        verbose (bool): Whether to print the momentum for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 factor: float = 1.0 / 3,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='momentum',
+            factor=factor,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class CosineAnnealingMomentum(CosineAnnealingParamScheduler):
+    r"""Set the momentum of each parameter group using a cosine annealing
+    schedule, where :math:`\eta_{max}` is set to the initial value and
+    :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
+    .. math::
+        \begin{aligned}
+            \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1
+            + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right),
+            & T_{cur} \neq (2k+1)T_{max}; \\
+            \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min})
+            \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right),
+            & T_{cur} = (2k+1)T_{max}.
+        \end{aligned}
+    Notice that because the schedule
+    is defined recursively, the momentum can be simultaneously modified
+    outside this scheduler by other operators. If the momentum is set
+    solely by this scheduler, the momentum at each step becomes:
+    .. math::
+        \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
+        \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right)
+    It has been proposed in
+    `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this
+    only implements the cosine annealing part of SGDR, and not the restarts.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        T_max (int): Maximum number of iterations.
+        eta_min (float): Minimum momentum value. Defaults to 0.
+        begin (int): Step at which to start updating the momentum.
+            Defaults to 0.
+        end (int): Step at which to stop updating the momentum.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled momentum is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the momentum for each update.
+            Defaults to False.
+    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
+        https://arxiv.org/abs/1608.03983
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 T_max: int,
+                 eta_min: int = 0,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='momentum',
+            T_max=T_max,
+            eta_min=eta_min,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class ExponentialMomentum(ExponentialParamScheduler):
+    """Decays the momentum of each parameter group by gamma every epoch.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        gamma (float): Multiplicative factor of momentum value decay.
+        begin (int): Step at which to start updating the momentum.
+            Defaults to 0.
+        end (int): Step at which to stop updating the momentum.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled momentum is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the momentum for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 gamma: float,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='momentum',
+            gamma=gamma,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class LinearMomentum(LinearParamScheduler):
+    """Decays the momentum of each parameter group by linearly changing
+    small multiplicative factor until the number of epoch reaches a pre-defined
+    milestone: ``end``.
+    Notice that such decay can happen simultaneously with other changes to the
+    momentum from outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        start_factor (float): The number we multiply momentum in the
+            first epoch. The multiplication factor changes towards end_factor
+            in the following epochs. Defaults to 1./3.
+        end_factor (float): The number we multiply momentum at the end
+            of linear changing process. Defaults to 1.0.
+        begin (int): Step at which to start updating the momentum.
+            Defaults to 0.
+        end (int): Step at which to stop updating the momentum.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled momentum is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the momentum for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 start_factor: float = 1.0 / 3,
+                 end_factor: float = 1.0,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='momentum',
+            start_factor=start_factor,
+            end_factor=end_factor,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class MultiStepMomentum(MultiStepParamScheduler):
+    """Decays the specified momentum in each parameter group by gamma once the
+    number of epoch reaches one of the milestones. Notice that such decay can
+    happen simultaneously with other changes to the momentum from outside this
+    scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        milestones (list): List of epoch indices. Must be increasing.
+        gamma (float): Multiplicative factor of momentum value decay.
+            Defaults to 0.1.
+        begin (int): Step at which to start updating the momentum.
+            Defaults to 0.
+        end (int): Step at which to stop updating the momentum.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled momentum is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the momentum for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 milestones: List[int],
+                 gamma: float = 0.1,
+                 last_step: int = -1,
+                 begin: int = 0,
+                 end: int = INF,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='momentum',
+            milestones=milestones,
+            gamma=gamma,
+            last_step=last_step,
+            begin=begin,
+            end=end,
+            by_epoch=by_epoch,
+            verbose=verbose)
+@PARAM_SCHEDULERS.register_module()
+class StepMomentum(StepParamScheduler):
+    """Decays the momentum of each parameter group by gamma every step_size
+    epochs. Notice that such decay can happen simultaneously with other changes
+    to the momentum from outside this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        step_size (int): Period of momentum value decay.
+        gamma (float): Multiplicative factor of momentum value decay.
+            Defaults to 0.1.
+        begin (int): Step at which to start updating the momentum.
+            Defaults to 0.
+        end (int): Step at which to stop updating the momentum.
+            Defaults to INF.
+        last_step (int): The index of last step. Used for resume without
+            state dict. Defaults to -1.
+        by_epoch (bool): Whether the scheduled momentum is updated by
+            epochs. Defaults to True.
+        verbose (bool): Whether to print the momentum for each update.
+            Defaults to False.
+    """
+    def __init__(self,
+                 optimizer: torch.optim.Optimizer,
+                 step_size: int,
+                 gamma: float = 0.1,
+                 begin: int = 0,
+                 end: int = INF,
+                 last_step: int = -1,
+                 by_epoch: bool = True,
+                 verbose: bool = False):
+        super().__init__(
+            optimizer,
+            param_name='momentum',
+            step_size=step_size,
+            gamma=gamma,
+            begin=begin,
+            end=end,
+            last_step=last_step,
+            by_epoch=by_epoch,
+            verbose=verbose)
--- a/mmengine/optim/scheduler/param_scheduler.py
+++ b/mmengine/optim/scheduler/param_scheduler.py
--- a/mmengine/registry/__init__.py
+++ b/mmengine/registry/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from .registry import Registry, build_from_cfg
 from .root import (DATA_SAMPLERS, DATASETS, HOOKS, MODELS,
-                   OPTIMIZER_CONSTRUCTORS, OPTIMIZERS, RUNNER_CONSTRUCTORS,
+                   OPTIMIZER_CONSTRUCTORS, OPTIMIZERS, PARAM_SCHEDULERS,
-                   RUNNERS, TASK_UTILS, TRANSFORMS, WEIGHT_INITIALIZERS)
+                   RUNNER_CONSTRUCTORS, RUNNERS, TASK_UTILS, TRANSFORMS,
+                   WEIGHT_INITIALIZERS)
 __all__ = [
    'Registry', 'build_from_cfg', 'RUNNERS', 'RUNNER_CONSTRUCTORS', 'HOOKS',
    'DATASETS', 'DATA_SAMPLERS', 'TRANSFORMS', 'MODELS', 'WEIGHT_INITIALIZERS',
-    'OPTIMIZERS', 'OPTIMIZER_CONSTRUCTORS', 'TASK_UTILS'
+    'OPTIMIZERS', 'OPTIMIZER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS'
 ]
--- a/mmengine/registry/root.py
+++ b/mmengine/registry/root.py
@@ -29,6 +29,8 @@ WEIGHT_INITIALIZERS = Registry('weight initializer')
 OPTIMIZERS = Registry('optimizer')
 # manage constructors that customize the optimization hyperparameters.
 OPTIMIZER_CONSTRUCTORS = Registry('optimizer constructor')
+# mangage all kinds of parameter schedulers like `MultiStepLR`
+PARAM_SCHEDULERS = Registry('parameter scheduler')
 # manage task-specific modules like anchor generators and box coders
 TASK_UTILS = Registry('task util')
--- a/tests/test_optim/test_scheduler/test_lr_scheduler.py
+++ b/tests/test_optim/test_scheduler/test_lr_scheduler.py
+# Copyright (c) OpenMMLab. All rights reserved.
 import math
 from unittest import TestCase
@@ -39,7 +40,7 @@ class TestLRScheduler(TestCase):
            _ParamScheduler(self.optimizer, param_name='lr')
    def test_invalid_optimizer(self):
-        with self.assertRaisesRegex(TypeError, 'is not an Optimizer'):
+        with self.assertRaisesRegex(TypeError, 'should be an Optimizer'):
            StepLR('invalid_optimizer', step_size=1)
    def test_overwrite_optimzer_step(self):

--- a/tests/test_optim/test_scheduler/test_momentum_scheduler.py
+++ b/tests/test_optim/test_scheduler/test_momentum_scheduler.py
+# Copyright (c) OpenMMLab. All rights reserved.
 import math
 from unittest import TestCase
@@ -37,7 +38,7 @@ class TestMomentumScheduler(TestCase):
            self.model.parameters(), lr=0.01, momentum=0.05, weight_decay=5e-4)
    def test_invalid_optimizer(self):
-        with self.assertRaisesRegex(TypeError, 'is not an Optimizer'):
+        with self.assertRaisesRegex(TypeError, 'should be an Optimizer'):
            StepMomentum('invalid_optimizer', step_size=1)
    def test_overwrite_optimzer_step(self):

--- a/tests/test_optim/test_scheduler/test_param_scheduler.py
+++ b/tests/test_optim/test_scheduler/test_param_scheduler.py
+# Copyright (c) OpenMMLab. All rights reserved.
 import math
 from unittest import TestCase
@@ -42,7 +43,7 @@ class TestParameterScheduler(TestCase):
            _ParamScheduler(self.optimizer, param_name='lr')
    def test_invalid_optimizer(self):
-        with self.assertRaisesRegex(TypeError, 'is not an Optimizer'):
+        with self.assertRaisesRegex(TypeError, 'should be an Optimizer'):
            StepParamScheduler('invalid_optimizer', 'lr', step_size=1)
    def test_overwrite_optimzer_step(self):