Skip to content
Snippets Groups Projects
Unverified Commit a7d4b7c7 authored by Mashiro's avatar Mashiro Committed by GitHub
Browse files

[Enhance] Support configuring directory used to synchronize results in BaseMetric (#1074)


* [Enhance] Support configuring synchronize directory for BaseMetric

* Raise error if tmpdir is not an shared dirctory for ann ranks

* Raise error if tmpdir is not an shared dirctory for ann ranks

* Update mmengine/evaluator/metric.py

Co-authored-by: default avatarZaida Zhou <58739961+zhouzaida@users.noreply.github.com>

* refine

* Update mmengine/evaluator/metric.py

---------

Co-authored-by: default avatarZaida Zhou <58739961+zhouzaida@users.noreply.github.com>
parent 60b4c199
No related branches found
No related tags found
No related merge requests found
......@@ -1000,6 +1000,11 @@ def collect_results_cpu(result_part: list,
part_list = []
for i in range(world_size):
path = osp.join(tmpdir, f'part_{i}.pkl') # type: ignore
if not osp.exists(path):
raise FileNotFoundError(
f'{tmpdir} is not an shared directory for '
f'rank {i}, please make sure {tmpdir} is a shared '
'directory for all ranks!')
with open(path, 'rb') as f:
part_list.append(pickle.load(f))
# sort the results
......
......@@ -32,17 +32,28 @@ class BaseMetric(metaclass=ABCMeta):
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
collect_dir: (str, optional): Synchronize directory for collecting data
from different ranks. This argument should only be configured when
``collect_device`` is 'cpu'. Defaults to None.
`New in version 0.7.3.`
"""
default_prefix: Optional[str] = None
def __init__(self,
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
prefix: Optional[str] = None,
collect_dir: Optional[str] = None) -> None:
if collect_dir is not None and collect_device != 'cpu':
raise ValueError('`collec_dir` could only be configured when '
"`collect_device='cpu'`")
self._dataset_meta: Union[None, dict] = None
self.collect_device = collect_device
self.results: List[Any] = []
self.prefix = prefix or self.default_prefix
self.collect_dir = collect_dir
if self.prefix is None:
print_log(
'The prefix is not set in metric class '
......@@ -107,7 +118,14 @@ class BaseMetric(metaclass=ABCMeta):
logger='current',
level=logging.WARNING)
results = collect_results(self.results, size, self.collect_device)
if self.collect_device == 'cpu':
results = collect_results(
self.results,
size,
self.collect_device,
tmpdir=self.collect_dir)
else:
collect_results(self.results, size, self.collect_device)
if is_main_process():
# cast all tensors in results list to cpu
......@@ -140,12 +158,18 @@ class DumpResults(BaseMetric):
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
collect_dir: (str, optional): Synchronize directory for collecting data
from different ranks. This argument should only be configured when
``collect_device`` is 'cpu'. Defaults to None.
`New in version 0.7.3.`
"""
def __init__(self,
out_file_path: str,
collect_device: str = 'cpu') -> None:
super().__init__(collect_device=collect_device)
collect_device: str = 'cpu',
collect_dir: Optional[str] = None) -> None:
super().__init__(
collect_device=collect_device, collect_dir=collect_dir)
if not out_file_path.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
self.out_file_path = out_file_path
......
......@@ -17,6 +17,13 @@ class TestDumpResults(TestCase):
'The output file must be a pkl file.'):
DumpResults(out_file_path='./results.json')
# collect_dir could only be configured when collect_device='cpu'
with self.assertRaises(ValueError):
DumpResults(
out_file_path='./results.json',
collect_device='gpu',
collect_dir='./tmp')
def test_process(self):
metric = DumpResults(out_file_path='./results.pkl')
data_samples = [dict(data=(Tensor([1, 2, 3]), Tensor([4, 5, 6])))]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment