[Enhance] Support log enviroment information during initiate runner (#384)

* Support log enviroment information when initiate runner * Fix unit test * fix as comment, save world_size * log gpu num * clear code and reformat log * minor refine * fix as comment * minor refine * clean the code * clean the code * remove save world_size in meta

[Enhance] Support log enviroment information during initiate runner (#384)
* Support log enviroment information when initiate runner * Fix unit test * fix as comment, save world_size * log gpu num * clear code and reformat log * minor refine * fix as comment * minor refine * clean the code * clean the code * remove save world_size in meta
cd257c90 · Mashiro · GitHub · 7e230238 · cd257c90 · cd257c90
Unverified Commit cd257c90 authored 2 years ago by Mashiro Committed by GitHub 2 years ago
--- a/mmengine/runner/runner.py
+++ b/mmengine/runner/runner.py
@@ -37,9 +37,9 @@ from mmengine.registry import (DATA_SAMPLERS, DATASETS, EVALUATOR, HOOKS,
                               RUNNERS, VISUALIZERS, DefaultScope,
                               count_registered_modules)
 from mmengine.registry.root import LOG_PROCESSORS
-from mmengine.utils import (TORCH_VERSION, digit_version, get_git_hash,
+from mmengine.utils import (TORCH_VERSION, collect_env, digit_version,
-                            is_list_of, is_seq_of, revert_sync_batchnorm,
+                            get_git_hash, is_list_of, is_seq_of,
-                            set_multi_processing)
+                            revert_sync_batchnorm, set_multi_processing)
 from mmengine.visualization import Visualizer
 from .base_loop import BaseLoop
 from .checkpoint import (_load_checkpoint, _load_checkpoint_to_model,
@@ -362,6 +362,9 @@ class Runner:
        # corresponding attribute needs a type hint.
        self.logger = self.build_logger(log_level=log_level)
+        # Collect and log environment information.
+        self._log_env(env_cfg)
        # collect information of all modules registered in the registries
        registries_info = count_registered_modules(
            self.work_dir if self.rank == 0 else None, verbose=False)
@@ -2138,9 +2141,6 @@ class Runner:
        Args:
            param_scheduler (dict or list): The original parameter scheduler.
-        Returns:
-            list or dict: Parsed parameter scheduler configs or instances.
        """  # noqa: E501
        param_schedulers: Union[dict, list, _ParamScheduler]
        if param_scheduler is None:
@@ -2174,3 +2174,29 @@ class Runner:
                'contains key `type`, it means a scheduler config for a '
                'single optimizer. If it does not contain key `type`, it '
                'means multiple lists of schedulers for multiple optimizers.')
+    def _log_env(self, env_cfg: dict) -> None:
+        """Logging environment information of the current task.
+        Args:
+            env_cfg (dict): The environment config of the runner.
+        """
+        # Collect and log environment information.
+        env = collect_env()
+        runtime_env = OrderedDict()
+        runtime_env.update(env_cfg)
+        runtime_env.update(self._randomness_cfg)
+        runtime_env['Distributed launcher'] = self._launcher
+        runtime_env['Distributed training'] = self._distributed
+        runtime_env['GPU number'] = self._world_size
+        env_info = '\n    ' + '\n    '.join(f'{k}: {v}'
+                                            for k, v in env.items())
+        runtime_env_info = '\n    ' + '\n    '.join(
+            f'{k}: {v}' for k, v in runtime_env.items())
+        dash_line = '-' * 60
+        self.logger.info('\n' + dash_line + '\nSystem environment:' +
+                         env_info + '\n'
+                         '\nRuntime environment:' + runtime_env_info + '\n' +
+                         dash_line + '\n')
+        self.logger.info(f'Config:\n{self.cfg.pretty_text}')
--- a/mmengine/utils/__init__.py
+++ b/mmengine/utils/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from .collect_env import collect_env
 from .hub import load_url
 from .manager import ManagerMeta, ManagerMixin
 from .misc import (check_prerequisites, concat_list, deprecated_api_warning,
@@ -31,5 +32,5 @@ __all__ = [
    'digit_version', 'get_git_hash', 'TORCH_VERSION', 'load_url',
    'ManagerMeta', 'ManagerMixin', 'set_multi_processing', 'has_batch_norm',
    'is_abs', 'is_installed', 'call_command', 'get_installed_path',
-    'check_install_package', 'is_abs', 'revert_sync_batchnorm'
+    'check_install_package', 'is_abs', 'revert_sync_batchnorm', 'collect_env'
 ]
--- a/mmengine/utils/collect_env.py
+++ b/mmengine/utils/collect_env.py
+# Copyright (c) OpenMMLab. All rights reserved.
+"""This file holding some environment constant for sharing by other files."""
+import os.path as osp
+import subprocess
+import sys
+from collections import OrderedDict, defaultdict
+import cv2
+import numpy as np
+import torch
+import mmengine
+from .parrots_wrapper import TORCH_VERSION, get_build_config, is_rocm_pytorch
+def _get_cuda_home():
+    if TORCH_VERSION == 'parrots':
+        from parrots.utils.build_extension import CUDA_HOME
+    else:
+        if is_rocm_pytorch():
+            from torch.utils.cpp_extension import ROCM_HOME
+            CUDA_HOME = ROCM_HOME
+        else:
+            from torch.utils.cpp_extension import CUDA_HOME
+    return CUDA_HOME
+def collect_env():
+    """Collect the information of the running environments.
+    Returns:
+        dict: The environment information. The following fields are contained.
+            - sys.platform: The variable of ``sys.platform``.
+            - Python: Python version.
+            - CUDA available: Bool, indicating if CUDA is available.
+            - GPU devices: Device type of each GPU.
+            - CUDA_HOME (optional): The env var ``CUDA_HOME``.
+            - NVCC (optional): NVCC version.
+            - GCC: GCC version, "n/a" if GCC is not installed.
+            - MSVC: Microsoft Virtual C++ Compiler version, Windows only.
+            - PyTorch: PyTorch version.
+            - PyTorch compiling details: The output of \
+                ``torch.__config__.show()``.
+            - TorchVision (optional): TorchVision version.
+            - OpenCV (optional): OpenCV version.
+            - MMENGINE: MMENGINE version.
+    """
+    env_info = OrderedDict()
+    env_info['sys.platform'] = sys.platform
+    env_info['Python'] = sys.version.replace('\n', '')
+    cuda_available = torch.cuda.is_available()
+    env_info['CUDA available'] = cuda_available
+    env_info['numpy_random_seed'] = np.random.get_state()[1][0]
+    if cuda_available:
+        devices = defaultdict(list)
+        for k in range(torch.cuda.device_count()):
+            devices[torch.cuda.get_device_name(k)].append(str(k))
+        for name, device_ids in devices.items():
+            env_info['GPU ' + ','.join(device_ids)] = name
+        CUDA_HOME = _get_cuda_home()
+        env_info['CUDA_HOME'] = CUDA_HOME
+        if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
+            try:
+                nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
+                nvcc = subprocess.check_output(f'"{nvcc}" -V', shell=True)
+                nvcc = nvcc.decode('utf-8').strip()
+                release = nvcc.rfind('Cuda compilation tools')
+                build = nvcc.rfind('Build ')
+                nvcc = nvcc[release:build].strip()
+            except subprocess.SubprocessError:
+                nvcc = 'Not Available'
+            env_info['NVCC'] = nvcc
+    try:
+        # Check C++ Compiler.
+        # For Unix-like, sysconfig has 'CC' variable like 'gcc -pthread ...',
+        # indicating the compiler used, we use this to get the compiler name
+        import sysconfig
+        cc = sysconfig.get_config_var('CC')
+        if cc:
+            cc = osp.basename(cc.split()[0])
+            cc_info = subprocess.check_output(f'{cc} --version', shell=True)
+            env_info['GCC'] = cc_info.decode('utf-8').partition(
+                '\n')[0].strip()
+        else:
+            # on Windows, cl.exe is not in PATH. We need to find the path.
+            # distutils.ccompiler.new_compiler() returns a msvccompiler
+            # object and after initialization, path to cl.exe is found.
+            import locale
+            import os
+            from distutils.ccompiler import new_compiler
+            ccompiler = new_compiler()
+            ccompiler.initialize()
+            cc = subprocess.check_output(
+                f'{ccompiler.cc}', stderr=subprocess.STDOUT, shell=True)
+            encoding = os.device_encoding(
+                sys.stdout.fileno()) or locale.getpreferredencoding()
+            env_info['MSVC'] = cc.decode(encoding).partition('\n')[0].strip()
+            env_info['GCC'] = 'n/a'
+    except subprocess.CalledProcessError:
+        env_info['GCC'] = 'n/a'
+    env_info['PyTorch'] = torch.__version__
+    env_info['PyTorch compiling details'] = get_build_config()
+    try:
+        import torchvision
+        env_info['TorchVision'] = torchvision.__version__
+    except ModuleNotFoundError:
+        pass
+    env_info['OpenCV'] = cv2.__version__
+    env_info['MMEngine'] = mmengine.__version__
+    return env_info
--- a/tests/test_utils/test_get_env.py
+++ b/tests/test_utils/test_get_env.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import sys
+from unittest import TestCase
+import torch.cuda
+import mmengine
+from mmengine.utils.collect_env import _get_cuda_home, collect_env
+class TestCollectEnv(TestCase):
+    def test_get_cuda_home(self):
+        CUDA_HOME = _get_cuda_home()
+        if torch.cuda.is_available():
+            self.assertIsNotNone(CUDA_HOME)
+        else:
+            self.assertIsNone(CUDA_HOME)
+    def test_collect_env(self):
+        env_info = collect_env()
+        expected_keys = [
+            'sys.platform', 'Python', 'CUDA available', 'PyTorch',
+            'PyTorch compiling details', 'OpenCV', 'MMEngine', 'GCC'
+        ]
+        for key in expected_keys:
+            assert key in env_info
+        if env_info['CUDA available']:
+            for key in ['CUDA_HOME', 'NVCC']:
+                assert key in env_info
+        if sys.platform == 'win32':
+            assert 'MSVC' in env_info
+        assert env_info['sys.platform'] == sys.platform
+        assert env_info['Python'] == sys.version.replace('\n', '')
+        assert env_info['MMEngine'] == mmengine.__version__