From cfccabc6570ec55d16fafc78d775f395f30d179e Mon Sep 17 00:00:00 2001 From: Yining Li <liyining0712@gmail.com> Date: Tue, 8 Mar 2022 15:12:11 +0800 Subject: [PATCH] [Feature] Support metric prefix in BaseEvaluator (#82) * support metric prefix in BaseEvaluator * update docs * use class attribute default_prefix --- docs/zh_cn/tutorials/evaluator.md | 23 +++++-- mmengine/evaluator/base.py | 28 ++++++++- mmengine/evaluator/composed_evaluator.py | 2 +- tests/test_evaluator/test_base_evaluator.py | 67 +++++++++++++++------ 4 files changed, 93 insertions(+), 27 deletions(-) diff --git a/docs/zh_cn/tutorials/evaluator.md b/docs/zh_cn/tutorials/evaluator.md index cfe2aca9..f9c07901 100644 --- a/docs/zh_cn/tutorials/evaluator.md +++ b/docs/zh_cn/tutorials/evaluator.md @@ -46,15 +46,17 @@ validation_cfg=dict( ) ``` -在使用多个评测器时,å¯èƒ½ä¼šå‡ºçŽ°è¯„æµ‹æŒ‡æ ‡åŒå的情况。比如,在下é¢çš„例åä¸ä½¿ç”¨äº† 2 个å‚æ•°ä¸åŒçš„分类æ£ç¡®çŽ‡è¯„æµ‹å™¨ï¼Œå®ƒä»¬å¯¹åº”çš„è¯„æµ‹æŒ‡æ ‡éƒ½æ˜¯ accuracy。æ¤æ—¶ï¼Œä¸ºäº†é¿å…æ§ä¹‰ï¼Œéœ€è¦ç»™è¯„测器设置 `prefix` å‚数。评测器的 `prefix` ä¼šè‡ªåŠ¨æ·»åŠ åœ¨è¯„æµ‹æŒ‡æ ‡å称的开头,从而使åŒåçš„è¯„æµ‹æŒ‡æ ‡å¯ä»¥åŒºåˆ†ã€‚ +使用多个评测器时,å¯èƒ½å‡ºçŽ°è¯„æµ‹æŒ‡æ ‡åŒå的情况。比如,在下é¢çš„例åä¸ä½¿ç”¨äº† 2 个 `COCOEvaluator` åˆ†åˆ«å¯¹æ£€æµ‹æ¡†å’Œå…³é”®ç‚¹çš„é¢„æµ‹ç»“æžœè¿›è¡Œè¯„æµ‹ï¼Œå®ƒä»¬çš„è¯„æµ‹æŒ‡æ ‡éƒ½åŒ…æ‹¬ `AP`,`AR` ç‰ã€‚为了é¿å…åŒåè¯„æµ‹æŒ‡æ ‡å¼•å‘æ§ä¹‰ï¼Œ`Evaluator` ä¸æ”¯æŒé€šè¿‡ `prefix` å‚æ•°ä¸ºè¯„æµ‹æŒ‡æ ‡åå¢žåŠ å‰ç¼€ã€‚通常,一个 `Evaluator` 会有默认的å‰ç¼€ï¼Œç”¨æˆ·ä¹Ÿå¯ä»¥åœ¨é…置文件ä¸è¿›è¡ŒæŒ‡å®šã€‚ ```python validation_cfg=dict( evaluator=[ - dict(type='Accuracy', top_k=1, prefix='top1'), - dict(type='Accuracy', top_k=5, prefix='top5') + dict(type='COCO', iou_type='bbox'), # 使用默认å‰ç¼€ `COCO` + dict(type='COCO', iou_type='keypoints', prefix='COCOKpts') # 自定义å‰ç¼€ `COCOKpts` ], - main_metric='top1_accuracy', # å‰ç¼€ 'top1' è¢«è‡ªåŠ¨æ·»åŠ è¿›æŒ‡æ ‡å称ä¸ï¼Œç”¨ä»¥åŒºåˆ†åŒåæŒ‡æ ‡ + # 指定使用å‰ç¼€ä¸º COCO çš„ AP 为主è¦è¯„æµ‹æŒ‡æ ‡ + # 在没有é‡åæŒ‡æ ‡æ§ä¹‰çš„情况下,æ¤å¤„å¯ä»¥ä¸å†™å‰ç¼€ï¼Œåªå†™è¯„æµ‹æŒ‡æ ‡å + main_metric='COCO.AP', interval=10, by_epoch=True, ) @@ -85,7 +87,9 @@ validation_cfg=dict( `process()` 方法有 2 个输入å‚数,分别是测试数æ®æ ·æœ¬`data_samples`和模型预测结果 `predictions`。我们从ä¸åˆ†åˆ«å–å‡ºæ ·æœ¬ç±»åˆ«æ ‡ç¾å’Œåˆ†ç±»é¢„测结果,并å˜æ”¾åœ¨ `self.results` ä¸ã€‚ -`compute_metrics()`方法有 1 个输入å‚æ•° `results`,里é¢å˜æ”¾äº†æ‰€æœ‰æ‰¹æ¬¡æµ‹è¯•æ•°æ®ç»è¿‡ `process()` 方法处ç†åŽå¾—到的结果。从ä¸å–å‡ºæ ·æœ¬ç±»åˆ«æ ‡ç¾å’Œåˆ†ç±»é¢„测结果,å³å¯è®¡ç®—得到分类æ£ç¡®çŽ‡ `acc`ã€‚æœ€ç»ˆï¼Œå°†è®¡ç®—å¾—åˆ°çš„è¯„æµ‹æŒ‡æ ‡ä»¥å—典的形å¼è¿”回。 +`compute_metrics()` 方法有 1 个输入å‚æ•° `results`,里é¢å˜æ”¾äº†æ‰€æœ‰æ‰¹æ¬¡æµ‹è¯•æ•°æ®ç»è¿‡ `process()` 方法处ç†åŽå¾—到的结果。从ä¸å–å‡ºæ ·æœ¬ç±»åˆ«æ ‡ç¾å’Œåˆ†ç±»é¢„测结果,å³å¯è®¡ç®—得到分类æ£ç¡®çŽ‡ `acc`ã€‚æœ€ç»ˆï¼Œå°†è®¡ç®—å¾—åˆ°çš„è¯„æµ‹æŒ‡æ ‡ä»¥å—典的形å¼è¿”回。 + +æ¤å¤–,我们建议在åç±»ä¸ä¸ºç±»å±žæ€§ `default_prefix` 赋值。如果在åˆå§‹åŒ–å‚æ•°ï¼ˆå³ config ä¸ï¼‰æ²¡æœ‰æŒ‡å®š `prefix`,则会自动使用 `default_prefix` ä½œä¸ºè¯„æµ‹æŒ‡æ ‡åçš„å‰ç¼€ã€‚åŒæ—¶ï¼Œåº”在 docstring ä¸è¯´æ˜Žè¯¥è¯„测器的 `default_prefix` 值以åŠæ‰€æœ‰çš„è¯„æµ‹æŒ‡æ ‡ã€‚ 具体的实现如下: @@ -97,6 +101,15 @@ import numpy as np @EVALUATORS.register_module() class Accuracy(BaseEvaluator): + """ Accuracy Evaluator + + Default prefix: ACC + + Metrics: + - accuracy: classification accuracy + """ + + default_prefix = 'ACC' def process(self, data_samples: Dict, predictions: Dict): """Process one batch of data and predictions. The processed diff --git a/mmengine/evaluator/base.py b/mmengine/evaluator/base.py index 51fbd17e..6e676c11 100644 --- a/mmengine/evaluator/base.py +++ b/mmengine/evaluator/base.py @@ -22,13 +22,24 @@ class BaseEvaluator(metaclass=ABCMeta): Then it collects all results together from all ranks if distributed training is used. Finally, it computes the metrics of the entire dataset. + A subclass of class:`BaseEvaluator` should assign a meanful value to the + class attribute `default_prefix`. See the argument `prefix` for details. + Args: collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. + prefix (str, optional): The prefix that will be added in the metric + names to disambiguate homonymous metrics of different evaluators. + If prefix is not provided in the argument, self.default_prefix + will be used instead. Default: None """ - def __init__(self, collect_device: str = 'cpu') -> None: + default_prefix: Optional[str] = None + + def __init__(self, + collect_device: str = 'cpu', + prefix: Optional[str] = None) -> None: self._dataset_meta: Union[None, dict] = None self.collect_device = collect_device self.results: List[Any] = [] @@ -37,6 +48,11 @@ class BaseEvaluator(metaclass=ABCMeta): self.rank = rank self.world_size = world_size + self.prefix = prefix or self.default_prefix + if self.prefix is None: + warnings.warn('The prefix is not set in evaluator class ' + f'{self.__class__.__name__}.') + @property def dataset_meta(self) -> Optional[dict]: return self._dataset_meta @@ -97,9 +113,17 @@ class BaseEvaluator(metaclass=ABCMeta): if self.rank == 0: # TODO: replace with mmengine.dist.master_only - metrics = [self.compute_metrics(results)] + metrics = self.compute_metrics(results) + # Add prefix to metric names + if self.prefix: + metrics = { + '.'.join((self.prefix, k)): v + for k, v in metrics.items() + } + metrics = [metrics] # type: ignore else: metrics = [None] # type: ignore + # TODO: replace with mmengine.dist.broadcast if self.world_size > 1: metrics = dist.broadcast_object_list(metrics) diff --git a/mmengine/evaluator/composed_evaluator.py b/mmengine/evaluator/composed_evaluator.py index c0ba27f9..5d828cc9 100644 --- a/mmengine/evaluator/composed_evaluator.py +++ b/mmengine/evaluator/composed_evaluator.py @@ -6,7 +6,7 @@ from .base import BaseEvaluator class ComposedEvaluator: - """Wrapper class to compose multiple :class:`DatasetEvaluator` instances. + """Wrapper class to compose multiple :class:`BaseEvaluator` instances. Args: evaluators (Sequence[BaseEvaluator]): The evaluators to compose. diff --git a/tests/test_evaluator/test_base_evaluator.py b/tests/test_evaluator/test_base_evaluator.py index 382c8b4e..e1b52ff1 100644 --- a/tests/test_evaluator/test_base_evaluator.py +++ b/tests/test_evaluator/test_base_evaluator.py @@ -5,17 +5,35 @@ from unittest import TestCase import numpy as np -from mmengine.evaluator import BaseEvaluator, ComposedEvaluator +from mmengine.evaluator import BaseEvaluator, build_evaluator from mmengine.registry import EVALUATORS @EVALUATORS.register_module() class ToyEvaluator(BaseEvaluator): + """Evaluaotr that calculates the metric `accuracy` from predictions and + labels. Alternatively, this evaluator can return arbitrary dummy metrics + set in the config. + + Default prefix: Toy + + Metrics: + - accuracy (float): The classification accuracy. Only when + `dummy_metrics` is None. + - size (int): The number of test samples. Only when `dummy_metrics` + is None. + + If `dummy_metrics` is set as a dict in the config, it will be + returned as the metrics and override `accuracy` and `size`. + """ + + default_prefix = 'Toy' def __init__(self, collect_device: str = 'cpu', + prefix: Optional[str] = None, dummy_metrics: Optional[Dict] = None): - super().__init__(collect_device=collect_device) + super().__init__(collect_device=collect_device, prefix=prefix) self.dummy_metrics = dummy_metrics def process(self, data_samples, predictions): @@ -39,6 +57,18 @@ class ToyEvaluator(BaseEvaluator): return metrics +@EVALUATORS.register_module() +class UnprefixedEvaluator(BaseEvaluator): + """Evaluator with unassigned `default_prefix` to test the warning + information.""" + + def process(self, data_samples: dict, predictions: dict) -> None: + pass + + def compute_metrics(self, results: list) -> dict: + return dict(dummy=0.0) + + def generate_test_results(size, batch_size, pred, label): num_batch = math.ceil(size / batch_size) bs_residual = size % batch_size @@ -51,16 +81,9 @@ def generate_test_results(size, batch_size, pred, label): class TestBaseEvaluator(TestCase): - def build_evaluator(self, cfg): - if isinstance(cfg, (list, tuple)): - evaluators = [EVALUATORS.build(_cfg) for _cfg in cfg] - return ComposedEvaluator(evaluators=evaluators) - else: - return EVALUATORS.build(cfg) - def test_single_evaluator(self): cfg = dict(type='ToyEvaluator') - evaluator = self.build_evaluator(cfg) + evaluator = build_evaluator(cfg) size = 10 batch_size = 4 @@ -70,12 +93,12 @@ class TestBaseEvaluator(TestCase): evaluator.process(data_samples, predictions) metrics = evaluator.evaluate(size=size) - self.assertAlmostEqual(metrics['accuracy'], 1.0) - self.assertEqual(metrics['size'], size) + self.assertAlmostEqual(metrics['Toy.accuracy'], 1.0) + self.assertEqual(metrics['Toy.size'], size) # Test empty results cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0)) - evaluator = self.build_evaluator(cfg) + evaluator = build_evaluator(cfg) with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'): evaluator.evaluate(0) @@ -85,7 +108,7 @@ class TestBaseEvaluator(TestCase): dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)) ] - evaluator = self.build_evaluator(cfg) + evaluator = build_evaluator(cfg) size = 10 batch_size = 4 @@ -96,9 +119,9 @@ class TestBaseEvaluator(TestCase): metrics = evaluator.evaluate(size=size) - self.assertAlmostEqual(metrics['accuracy'], 1.0) - self.assertAlmostEqual(metrics['mAP'], 0.0) - self.assertEqual(metrics['size'], size) + self.assertAlmostEqual(metrics['Toy.accuracy'], 1.0) + self.assertAlmostEqual(metrics['Toy.mAP'], 0.0) + self.assertEqual(metrics['Toy.size'], size) def test_ambiguate_metric(self): @@ -107,7 +130,7 @@ class TestBaseEvaluator(TestCase): dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)) ] - evaluator = self.build_evaluator(cfg) + evaluator = build_evaluator(cfg) size = 10 batch_size = 4 @@ -129,8 +152,14 @@ class TestBaseEvaluator(TestCase): dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)) ] - evaluator = self.build_evaluator(cfg) + evaluator = build_evaluator(cfg) evaluator.dataset_meta = dataset_meta + self.assertDictEqual(evaluator.dataset_meta, dataset_meta) for _evaluator in evaluator.evaluators: self.assertDictEqual(_evaluator.dataset_meta, dataset_meta) + + def test_prefix(self): + cfg = dict(type='UnprefixedEvaluator') + with self.assertWarnsRegex(UserWarning, 'The prefix is not set'): + _ = build_evaluator(cfg) -- GitLab