Skip to content
Snippets Groups Projects
Unverified Commit cd257c90 authored by Mashiro's avatar Mashiro Committed by GitHub
Browse files

[Enhance] Support log enviroment information during initiate runner (#384)

* Support log enviroment information when initiate runner

* Fix unit test

* fix as comment, save world_size

* log gpu num

* clear code and reformat log

* minor refine

* fix as comment

* minor refine

* clean the code

* clean the code

* remove save world_size in meta
parent 7e230238
No related branches found
No related tags found
No related merge requests found
...@@ -37,9 +37,9 @@ from mmengine.registry import (DATA_SAMPLERS, DATASETS, EVALUATOR, HOOKS, ...@@ -37,9 +37,9 @@ from mmengine.registry import (DATA_SAMPLERS, DATASETS, EVALUATOR, HOOKS,
RUNNERS, VISUALIZERS, DefaultScope, RUNNERS, VISUALIZERS, DefaultScope,
count_registered_modules) count_registered_modules)
from mmengine.registry.root import LOG_PROCESSORS from mmengine.registry.root import LOG_PROCESSORS
from mmengine.utils import (TORCH_VERSION, digit_version, get_git_hash, from mmengine.utils import (TORCH_VERSION, collect_env, digit_version,
is_list_of, is_seq_of, revert_sync_batchnorm, get_git_hash, is_list_of, is_seq_of,
set_multi_processing) revert_sync_batchnorm, set_multi_processing)
from mmengine.visualization import Visualizer from mmengine.visualization import Visualizer
from .base_loop import BaseLoop from .base_loop import BaseLoop
from .checkpoint import (_load_checkpoint, _load_checkpoint_to_model, from .checkpoint import (_load_checkpoint, _load_checkpoint_to_model,
...@@ -362,6 +362,9 @@ class Runner: ...@@ -362,6 +362,9 @@ class Runner:
# corresponding attribute needs a type hint. # corresponding attribute needs a type hint.
self.logger = self.build_logger(log_level=log_level) self.logger = self.build_logger(log_level=log_level)
# Collect and log environment information.
self._log_env(env_cfg)
# collect information of all modules registered in the registries # collect information of all modules registered in the registries
registries_info = count_registered_modules( registries_info = count_registered_modules(
self.work_dir if self.rank == 0 else None, verbose=False) self.work_dir if self.rank == 0 else None, verbose=False)
...@@ -2138,9 +2141,6 @@ class Runner: ...@@ -2138,9 +2141,6 @@ class Runner:
Args: Args:
param_scheduler (dict or list): The original parameter scheduler. param_scheduler (dict or list): The original parameter scheduler.
Returns:
list or dict: Parsed parameter scheduler configs or instances.
""" # noqa: E501 """ # noqa: E501
param_schedulers: Union[dict, list, _ParamScheduler] param_schedulers: Union[dict, list, _ParamScheduler]
if param_scheduler is None: if param_scheduler is None:
...@@ -2174,3 +2174,29 @@ class Runner: ...@@ -2174,3 +2174,29 @@ class Runner:
'contains key `type`, it means a scheduler config for a ' 'contains key `type`, it means a scheduler config for a '
'single optimizer. If it does not contain key `type`, it ' 'single optimizer. If it does not contain key `type`, it '
'means multiple lists of schedulers for multiple optimizers.') 'means multiple lists of schedulers for multiple optimizers.')
def _log_env(self, env_cfg: dict) -> None:
"""Logging environment information of the current task.
Args:
env_cfg (dict): The environment config of the runner.
"""
# Collect and log environment information.
env = collect_env()
runtime_env = OrderedDict()
runtime_env.update(env_cfg)
runtime_env.update(self._randomness_cfg)
runtime_env['Distributed launcher'] = self._launcher
runtime_env['Distributed training'] = self._distributed
runtime_env['GPU number'] = self._world_size
env_info = '\n ' + '\n '.join(f'{k}: {v}'
for k, v in env.items())
runtime_env_info = '\n ' + '\n '.join(
f'{k}: {v}' for k, v in runtime_env.items())
dash_line = '-' * 60
self.logger.info('\n' + dash_line + '\nSystem environment:' +
env_info + '\n'
'\nRuntime environment:' + runtime_env_info + '\n' +
dash_line + '\n')
self.logger.info(f'Config:\n{self.cfg.pretty_text}')
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .collect_env import collect_env
from .hub import load_url from .hub import load_url
from .manager import ManagerMeta, ManagerMixin from .manager import ManagerMeta, ManagerMixin
from .misc import (check_prerequisites, concat_list, deprecated_api_warning, from .misc import (check_prerequisites, concat_list, deprecated_api_warning,
...@@ -31,5 +32,5 @@ __all__ = [ ...@@ -31,5 +32,5 @@ __all__ = [
'digit_version', 'get_git_hash', 'TORCH_VERSION', 'load_url', 'digit_version', 'get_git_hash', 'TORCH_VERSION', 'load_url',
'ManagerMeta', 'ManagerMixin', 'set_multi_processing', 'has_batch_norm', 'ManagerMeta', 'ManagerMixin', 'set_multi_processing', 'has_batch_norm',
'is_abs', 'is_installed', 'call_command', 'get_installed_path', 'is_abs', 'is_installed', 'call_command', 'get_installed_path',
'check_install_package', 'is_abs', 'revert_sync_batchnorm' 'check_install_package', 'is_abs', 'revert_sync_batchnorm', 'collect_env'
] ]
# Copyright (c) OpenMMLab. All rights reserved.
"""This file holding some environment constant for sharing by other files."""
import os.path as osp
import subprocess
import sys
from collections import OrderedDict, defaultdict
import cv2
import numpy as np
import torch
import mmengine
from .parrots_wrapper import TORCH_VERSION, get_build_config, is_rocm_pytorch
def _get_cuda_home():
if TORCH_VERSION == 'parrots':
from parrots.utils.build_extension import CUDA_HOME
else:
if is_rocm_pytorch():
from torch.utils.cpp_extension import ROCM_HOME
CUDA_HOME = ROCM_HOME
else:
from torch.utils.cpp_extension import CUDA_HOME
return CUDA_HOME
def collect_env():
"""Collect the information of the running environments.
Returns:
dict: The environment information. The following fields are contained.
- sys.platform: The variable of ``sys.platform``.
- Python: Python version.
- CUDA available: Bool, indicating if CUDA is available.
- GPU devices: Device type of each GPU.
- CUDA_HOME (optional): The env var ``CUDA_HOME``.
- NVCC (optional): NVCC version.
- GCC: GCC version, "n/a" if GCC is not installed.
- MSVC: Microsoft Virtual C++ Compiler version, Windows only.
- PyTorch: PyTorch version.
- PyTorch compiling details: The output of \
``torch.__config__.show()``.
- TorchVision (optional): TorchVision version.
- OpenCV (optional): OpenCV version.
- MMENGINE: MMENGINE version.
"""
env_info = OrderedDict()
env_info['sys.platform'] = sys.platform
env_info['Python'] = sys.version.replace('\n', '')
cuda_available = torch.cuda.is_available()
env_info['CUDA available'] = cuda_available
env_info['numpy_random_seed'] = np.random.get_state()[1][0]
if cuda_available:
devices = defaultdict(list)
for k in range(torch.cuda.device_count()):
devices[torch.cuda.get_device_name(k)].append(str(k))
for name, device_ids in devices.items():
env_info['GPU ' + ','.join(device_ids)] = name
CUDA_HOME = _get_cuda_home()
env_info['CUDA_HOME'] = CUDA_HOME
if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
try:
nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
nvcc = subprocess.check_output(f'"{nvcc}" -V', shell=True)
nvcc = nvcc.decode('utf-8').strip()
release = nvcc.rfind('Cuda compilation tools')
build = nvcc.rfind('Build ')
nvcc = nvcc[release:build].strip()
except subprocess.SubprocessError:
nvcc = 'Not Available'
env_info['NVCC'] = nvcc
try:
# Check C++ Compiler.
# For Unix-like, sysconfig has 'CC' variable like 'gcc -pthread ...',
# indicating the compiler used, we use this to get the compiler name
import sysconfig
cc = sysconfig.get_config_var('CC')
if cc:
cc = osp.basename(cc.split()[0])
cc_info = subprocess.check_output(f'{cc} --version', shell=True)
env_info['GCC'] = cc_info.decode('utf-8').partition(
'\n')[0].strip()
else:
# on Windows, cl.exe is not in PATH. We need to find the path.
# distutils.ccompiler.new_compiler() returns a msvccompiler
# object and after initialization, path to cl.exe is found.
import locale
import os
from distutils.ccompiler import new_compiler
ccompiler = new_compiler()
ccompiler.initialize()
cc = subprocess.check_output(
f'{ccompiler.cc}', stderr=subprocess.STDOUT, shell=True)
encoding = os.device_encoding(
sys.stdout.fileno()) or locale.getpreferredencoding()
env_info['MSVC'] = cc.decode(encoding).partition('\n')[0].strip()
env_info['GCC'] = 'n/a'
except subprocess.CalledProcessError:
env_info['GCC'] = 'n/a'
env_info['PyTorch'] = torch.__version__
env_info['PyTorch compiling details'] = get_build_config()
try:
import torchvision
env_info['TorchVision'] = torchvision.__version__
except ModuleNotFoundError:
pass
env_info['OpenCV'] = cv2.__version__
env_info['MMEngine'] = mmengine.__version__
return env_info
# Copyright (c) OpenMMLab. All rights reserved.
import sys
from unittest import TestCase
import torch.cuda
import mmengine
from mmengine.utils.collect_env import _get_cuda_home, collect_env
class TestCollectEnv(TestCase):
def test_get_cuda_home(self):
CUDA_HOME = _get_cuda_home()
if torch.cuda.is_available():
self.assertIsNotNone(CUDA_HOME)
else:
self.assertIsNone(CUDA_HOME)
def test_collect_env(self):
env_info = collect_env()
expected_keys = [
'sys.platform', 'Python', 'CUDA available', 'PyTorch',
'PyTorch compiling details', 'OpenCV', 'MMEngine', 'GCC'
]
for key in expected_keys:
assert key in env_info
if env_info['CUDA available']:
for key in ['CUDA_HOME', 'NVCC']:
assert key in env_info
if sys.platform == 'win32':
assert 'MSVC' in env_info
assert env_info['sys.platform'] == sys.platform
assert env_info['Python'] == sys.version.replace('\n', '')
assert env_info['MMEngine'] == mmengine.__version__
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment