diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index 70505eb873b9c31f804381b99ab162643ba23ea6..d079936a34097a1f9fe104d28b90809ee59b8304 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -29,6 +29,8 @@ tutorials/visualization.md tutorials/fileio.md tutorials/utils.md + tutorials/model.md + tutorials/initialize.md .. toctree:: :maxdepth: 1 diff --git a/docs/zh_cn/tutorials/initialize.md b/docs/zh_cn/tutorials/initialize.md new file mode 100644 index 0000000000000000000000000000000000000000..eb5e9e5403373ae983c0baaada424cf1c6d2e207 --- /dev/null +++ b/docs/zh_cn/tutorials/initialize.md @@ -0,0 +1,328 @@ +# åˆå§‹åŒ– + +基于 Pytorch 构建模型时,我们通常会选择 [nn.Module](https://pytorch.org/docs/stable/nn.html?highlight=nn%20module#module-torch.nn.modules) 作为模型的基类,æé…使用 Pytorch çš„åˆå§‹åŒ–æ¨¡å— [torch.nn.init](https://pytorch.org/docs/stable/nn.init.html?highlight=kaiming#torch.nn.init.kaiming_normal_),完æˆæ¨¡åž‹çš„åˆå§‹åŒ–。`MMEngine` 在æ¤åŸºç¡€ä¸ŠæŠ½è±¡å‡ºåŸºç¡€æ¨¡å—(BaseModule),è®©æˆ‘ä»¬èƒ½å¤Ÿé€šè¿‡ä¼ å‚或é…置文件æ¥é€‰æ‹©æ¨¡åž‹çš„åˆå§‹åŒ–æ–¹å¼ã€‚æ¤å¤–,`MMEngine` 还æ供了一系列模å—åˆå§‹åŒ–å‡½æ•°ï¼Œè®©æˆ‘ä»¬èƒ½å¤Ÿæ›´åŠ æ–¹ä¾¿çµæ´»åœ°åˆå§‹åŒ–模型å‚数。 + +## é…ç½®å¼åˆå§‹åŒ– + +ä¸ºäº†èƒ½å¤Ÿæ›´åŠ çµæ´»åœ°åˆå§‹åŒ–模型æƒé‡ï¼Œ`MMEngine` 抽象出了模å—基类 `BaseModule`。模å—基类继承自 `nn.Module`,在具备 `nn.Module` 基础功能的åŒæ—¶ï¼Œè¿˜æ”¯æŒåœ¨æž„é€ æ—¶æŽ¥å—å‚数,以æ¤æ¥é€‰æ‹©æƒé‡åˆå§‹åŒ–æ–¹å¼ã€‚继承自 `BaseModule` 的模型å¯ä»¥åœ¨å®žä¾‹åŒ–é˜¶æ®µæŽ¥å— `init_cfg` å‚数,我们å¯ä»¥é€šè¿‡é…ç½® `init_cfg` 为模型ä¸ä»»æ„组件çµæ´»åœ°é€‰æ‹©åˆå§‹åŒ–æ–¹å¼ã€‚ç›®å‰æˆ‘们å¯ä»¥åœ¨ `init_cfg` ä¸é…置以下åˆå§‹åŒ–器: + +| åˆå§‹åŒ–器 | 注册å | 功能 | +| :-------------------------------------------------------------- | :----------: | :--------------------------------------------------------------------------------------------------------------------------------- | +| [ConstantInit](../api.html#mmengine.model.ConstantInit) | Constant | å°† weight å’Œ bias åˆå§‹åŒ–为指定常é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [XavierInit](../api.html#mmengine.model.XavierInit) | Xavier | å°† weight `Xavier` æ–¹å¼åˆå§‹åŒ–,将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [NormalInit](../api.html#mmengine.model.NormalInit) | Normal | å°† weight 以æ£æ€åˆ†å¸ƒçš„æ–¹å¼åˆå§‹åŒ–,将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [TruncNormalInit](../api.html#mmengine.model.TruncNormalInit) | TruncNormal | å°† weight 以被截æ–çš„æ£æ€åˆ†å¸ƒçš„æ–¹å¼åˆå§‹åŒ–,å‚æ•° a å’Œ b 为æ£æ€åˆ†å¸ƒçš„有效区域;将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ– `transformer` | +| [UniformInit](../api.html#mmengine.model.UniformInit) | Uniform | å°† weight 以å‡åŒ€åˆ†å¸ƒçš„æ–¹å¼åˆå§‹åŒ–,å‚æ•° a å’Œ b 为å‡åŒ€åˆ†å¸ƒçš„范围;将 bias åˆå§‹åŒ–为指定常é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [KaimingInit](../api.html#mmengine.model.KaimingInit) | Kaiming | å°† weight 以 `Kaiming` çš„æ–¹å¼åˆå§‹åŒ–,将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [Caffe2XavierInit](../api.html#mmengine.model.Caffe2XavierInit) | Caffe2Xavier | Caffe2 ä¸ Xavier åˆå§‹åŒ–æ–¹å¼ï¼Œåœ¨ Pytorch ä¸å¯¹åº” `fan_in`, `normal` 模å¼çš„ `Kaiming` åˆå§‹åŒ–,,通常用于åˆå§‹åŒ–å· | +| [PretrainedInit](../api.html#mmengine.model.PretrainedInit) | Pretrained | åŠ è½½é¢„è®ç»ƒæƒé‡ | + +æˆ‘ä»¬é€šè¿‡å‡ ä¸ªä¾‹åæ¥ç†è§£å¦‚何在 `init_cfg` 里é…ç½®åˆå§‹åŒ–器,æ¥é€‰æ‹©æ¨¡åž‹çš„åˆå§‹åŒ–æ–¹å¼ã€‚ + +### 使用预è®ç»ƒæƒé‡åˆå§‹åŒ– + +å‡è®¾æˆ‘们定义了模型类 `ToyNet`,它继承自模å—基类(`BaseModule`)。æ¤æ—¶æˆ‘们å¯ä»¥åœ¨ `ToyNet` åˆå§‹åŒ–æ—¶ä¼ å…¥ `init_cfg` å‚æ•°æ¥é€‰æ‹©æ¨¡åž‹çš„åˆå§‹åŒ–æ–¹å¼ï¼Œå®žä¾‹åŒ–åŽå†è°ƒç”¨ `init_weights` 方法,完æˆæƒé‡çš„åˆå§‹åŒ–ã€‚ä»¥åŠ è½½é¢„è®ç»ƒæƒé‡ä¸ºä¾‹ï¼š + +```python +import torch +import torch.nn as nn + +from mmengine.model import BaseModule + + +class ToyNet(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv1 = nn.Linear(1, 1) + + +# ä¿å˜é¢„è®ç»ƒæƒé‡ +toy_net = ToyNet() +torch.save(toy_net.state_dict(), './pretrained.pth') +pretrained = './pretrained.pth' + +# é…ç½®åŠ è½½é¢„è®ç»ƒæƒé‡çš„åˆå§‹åŒ–æ–¹å¼ +toy_net = ToyNet(init_cfg=dict(type='Pretrained', checkpoint=pretrained)) +# åŠ è½½æƒé‡ +toy_net.init_weights() +``` + +``` +08/19 16:50:24 - mmengine - INFO - load model from: ./pretrained.pth +08/19 16:50:24 - mmengine - INFO - local loads checkpoint from path: ./pretrained.pth +``` + +当 `init_cfg` 是一个å—典时,`type` å—段就表示一ç§åˆå§‹åŒ–器,它需è¦è¢«æ³¨å†Œåˆ° `WEIGHT_INITIALIZERS` [注册器](./registry.md)。我们å¯ä»¥é€šè¿‡æŒ‡å®š `init_cfg=dict(type='Pretrained', checkpoint='path/to/ckpt')` æ¥åŠ 载预è®ç»ƒæƒé‡ï¼Œå…¶ä¸ `Pretrained` 为 `PretrainedInit` åˆå§‹åŒ–å™¨çš„ç¼©å†™ï¼Œè¿™ä¸ªæ˜ å°„åç”± `WEIGHT_INITIALIZERS` 维护;`checkpoint` 是 `PretrainedInit` çš„åˆå§‹åŒ–å‚数,用于指定æƒé‡çš„åŠ è½½è·¯å¾„ï¼Œå®ƒå¯ä»¥æ˜¯æœ¬åœ°ç£ç›˜è·¯å¾„,也å¯ä»¥æ˜¯ URL。 + +### 常用的åˆå§‹åŒ–æ–¹å¼ + +和使用 `PretrainedInit` åˆå§‹åŒ–器类似,如果我们想对å·ç§¯åš `Kaiming` åˆå§‹åŒ–,需è¦ä»¤ `init_cfg=dict(type='Kaiming', layer='Conv2d')`ã€‚è¿™æ ·æ¨¡åž‹åˆå§‹åŒ–时,就会以 `Kaiming` åˆå§‹åŒ–çš„æ–¹å¼æ¥åˆå§‹åŒ–类型为 `Conv2d` 的模å—。 + +有时候我们å¯èƒ½éœ€è¦ç”¨ä¸åŒçš„åˆå§‹åŒ–æ–¹å¼åŽ»åˆå§‹åŒ–ä¸åŒç±»åž‹çš„模å—,例如对å·ç§¯ä½¿ç”¨ `Kaiming` åˆå§‹åŒ–,对线性层使用 `Xavier` +åˆå§‹åŒ–。æ¤æ—¶æˆ‘们å¯ä»¥ä½¿ `init_cfg` æˆä¸ºä¸€ä¸ªåˆ—表,,其ä¸çš„æ¯ä¸€ä¸ªå…ƒç´ 都表示对æŸäº›å±‚使用特定的åˆå§‹åŒ–æ–¹å¼ã€‚ + +```python +import torch.nn as nn + +from mmengine.model import BaseModule + + +class ToyNet(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.linear = nn.Linear(1, 1) + self.conv = nn.Conv2d(1, 1, 1) + + +# 对å·ç§¯åš Kaiming åˆå§‹åŒ–ï¼Œçº¿æ€§å±‚åš Xavier åˆå§‹åŒ– +toy_net = ToyNet( + init_cfg=[ + dict(type='Kaiming', layer='Conv2d'), + dict(type='Xavier', layer='Linear') + ], ) +toy_net.init_weights() +``` + +``` +08/19 16:50:24 - mmengine - INFO - +linear.weight - torch.Size([1, 1]): +XavierInit: gain=1, distribution=normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +linear.bias - torch.Size([1]): +XavierInit: gain=1, distribution=normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv.weight - torch.Size([1, 1, 1, 1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 +``` + +类似地,`layer` å‚数也å¯ä»¥æ˜¯ä¸€ä¸ªåˆ—表,表示列表ä¸çš„多ç§ä¸åŒçš„ `layer` å‡ä½¿ç”¨ `type` 指定的åˆå§‹åŒ–æ–¹å¼ + +```python +# 对å·ç§¯å’Œçº¿æ€§å±‚åš Kaiming åˆå§‹åŒ– +toy_net = ToyNet(init_cfg=[dict(type='Kaiming', layer=['Conv2d', 'Linear'])], ) +toy_net.init_weights() +``` + +``` +08/19 16:50:24 - mmengine - INFO - +linear.weight - torch.Size([1, 1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +linear.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv.weight - torch.Size([1, 1, 1, 1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 +``` + +#### 更细粒度的åˆå§‹åŒ– + +有时åŒä¸€ç±»åž‹çš„ä¸åŒæ¨¡å—有ä¸åŒåˆå§‹åŒ–æ–¹å¼ï¼Œä¾‹å¦‚现在有 `conv1` å’Œ `conv2` 两个模å—,他们的类型å‡ä¸º `Conv2d` +。我们需è¦å¯¹ conv1 进行 `Kaiming` åˆå§‹åŒ–,conv2 进行 `Xavier` åˆå§‹åŒ–,则å¯ä»¥é€šè¿‡é…ç½® `override` å‚æ•°æ¥æ»¡è¶³è¿™æ ·çš„需求: + +```python +import torch.nn as nn + +from mmengine.model import BaseModule + + +class ToyNet(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv1 = nn.Conv2d(1, 1, 1) + self.conv2 = nn.Conv2d(1, 1, 1) + + +# 对 conv1 åšå·ç§¯åˆå§‹åŒ–,对 从 conv2 åš Xavier åˆå§‹åŒ– +toy_net = ToyNet( + init_cfg=[ + dict( + type='Kaiming', + layer=['Conv2d'], + override=dict(name='conv2', type='Xavier')), + ], ) +toy_net.init_weights() +``` + +``` +08/19 16:50:24 - mmengine - INFO - +conv1.weight - torch.Size([1, 1, 1, 1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv1.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv2.weight - torch.Size([1, 1, 1, 1]): +XavierInit: gain=1, distribution=normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv2.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 +``` + +`override` å¯ä»¥ç†è§£æˆä¸€ä¸ªåµŒå¥—çš„ `init_cfg`, ä»–åŒæ ·å¯ä»¥æ˜¯ `list` 或者 `dict`,也需è¦é€šè¿‡ `type` +å—段指定åˆå§‹åŒ–æ–¹å¼ã€‚ä¸åŒçš„是 `override` 必须指定 `name`,`name` 相当于 `override` +的作用域,如上例ä¸ï¼Œ`override` 的作用域为 `toy_net.conv2`, +我们会以 `Xavier` åˆå§‹åŒ–æ–¹å¼åˆå§‹åŒ– `toy_net.conv2` 下的所有å‚数,而ä¸ä¼šå½±å“作用域以外的模å—。 + +### 自定义的åˆå§‹åŒ–æ–¹å¼ + +尽管 `init_cfg` 能够控制å„个模å—çš„åˆå§‹åŒ–æ–¹å¼ï¼Œä½†æ˜¯åœ¨ä¸æ‰©å±• `WEIGHT_INITIALIZERS` +çš„æƒ…å†µä¸‹ï¼Œæˆ‘ä»¬æ˜¯æ— æ³•åˆå§‹åŒ–一些自定义模å—çš„ï¼Œä¾‹å¦‚è¡¨æ ¼ä¸æ到的大多数åˆå§‹åŒ–器,都需è¦å¯¹åº”的模å—有 `weight` å’Œ `bias` 属性 。对于这ç§æƒ…况,我们建议让自定义模å—实现 `init_weights` 方法。模型调用 `init_weights` +时,会链å¼åœ°è°ƒç”¨æ‰€æœ‰å模å—çš„ `init_weights`。 + +å‡è®¾æˆ‘们定义了以下模å—: + +- 继承自 `nn.Module` çš„ `ToyConv`,实现了 `init_weights` 方法,让 `custom_weight` åˆå§‹åŒ–为 1,`custom_bias` åˆå§‹åŒ–为 0 +- 继承自模å—基类的模型 `ToyNet`,且å«æœ‰ `ToyConv` å模å—。 + +我们在调用 `ToyNet` çš„ `init_weights` 方法时,会链å¼çš„调用的åæ¨¡å— `ToyConv` çš„ `init_weights` 方法,实现自定义模å—çš„åˆå§‹åŒ–。 + +```python +import torch +import torch.nn as nn + +from mmengine.model import BaseModule + + +class ToyConv(nn.Module): + + def __init__(self): + super().__init__() + self.custom_weight = nn.Parameter(torch.empty(1, 1, 1, 1)) + self.custom_bias = nn.Parameter(torch.empty(1)) + + def init_weights(self): + with torch.no_grad(): + self.custom_weight = self.custom_weight.fill_(1) + self.custom_bias = self.custom_bias.fill_(0) + + +class ToyNet(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv1 = nn.Conv2d(1, 1, 1) + self.conv2 = nn.Conv2d(1, 1, 1) + self.custom_conv = ToyConv() + + +toy_net = ToyNet( + init_cfg=[ + dict( + type='Kaiming', + layer=['Conv2d'], + override=dict(name='conv2', type='Xavier')) + ]) +# 链å¼è°ƒç”¨ `ToyConv.init_weights()`,以自定义的方å¼åˆå§‹åŒ– +toy_net.init_weights() +``` + +``` +08/19 16:50:24 - mmengine - INFO - +conv1.weight - torch.Size([1, 1, 1, 1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv1.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv2.weight - torch.Size([1, 1, 1, 1]): +XavierInit: gain=1, distribution=normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +conv2.bias - torch.Size([1]): +KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 + +08/19 16:50:24 - mmengine - INFO - +custom_conv.custom_weight - torch.Size([1, 1, 1, 1]): +Initialized by user-defined `init_weights` in ToyConv + +08/19 16:50:24 - mmengine - INFO - +custom_conv.custom_bias - torch.Size([1]): +Initialized by user-defined `init_weights` in ToyConv +``` + +### å°ç»“ + +最åŽæˆ‘们对 `init_cfg` å’Œ `init_weights` 两ç§åˆå§‹åŒ–æ–¹å¼åšä¸€äº›æ€»ç»“: + +**1. é…ç½® `init_cfg` 控制åˆå§‹åŒ–** + +- 通常用于åˆå§‹åŒ–一些比较底层的模å—,例如å·ç§¯ã€çº¿æ€§å±‚ç‰ã€‚如果想通过 `init_cfg` é…置自定义模å—çš„åˆå§‹åŒ–æ–¹å¼ï¼Œéœ€è¦å°†ç›¸åº”çš„åˆå§‹åŒ–器注册到 `WEIGHT_INITIALIZERS` 里。 +- 动æ€åˆå§‹åŒ–特性,åˆå§‹åŒ–æ–¹å¼éš `init_cfg` 的值改å˜ã€‚ + +**2. 实现 `init_weights` 方法** + +- 通常用于åˆå§‹åŒ–自定义模å—。相比于 `init_cfg` 的自定义åˆå§‹åŒ–,实现 `init_weights` æ–¹æ³•æ›´åŠ ç®€å•ï¼Œæ— 需注册。然而,它的çµæ´»æ€§ä¸åŠ `init_cfg`ï¼Œæ— æ³•åŠ¨æ€åœ°æŒ‡å®šä»»æ„模å—çš„åˆå§‹åŒ–æ–¹å¼ã€‚ + +```{note} +- init_weights 的优先级比 `init_cfg` 高 +- 执行器会在 train() 函数ä¸è°ƒç”¨ init_weights。 +``` + +## 函数å¼åˆå§‹åŒ– + +在[自定义的åˆå§‹åŒ–æ–¹å¼](自定义的åˆå§‹åŒ–æ–¹å¼)一节æ到,我们å¯ä»¥åœ¨ `init_weights` 里实现自定义的å‚æ•°åˆå§‹åŒ–é€»è¾‘ã€‚ä¸ºäº†èƒ½å¤Ÿæ›´åŠ æ–¹ä¾¿åœ°å®žçŽ°å‚æ•°åˆå§‹åŒ–,MMEngine 在 `torch.nn.init`的基础上,æ供了一系列**模å—åˆå§‹åŒ–函数**æ¥åˆå§‹åŒ–整个模å—。例如我们对å·ç§¯å±‚çš„æƒé‡ï¼ˆ`weight`)进行æ£æ€åˆ†å¸ƒåˆå§‹åŒ–,å·ç§¯å±‚çš„å置(`bias`)进行常数åˆå§‹åŒ–,基于 `torch.nn.init` 的实现如下: + +```python +from torch.nn.init import normal_, constant_ +import torch.nn as nn + +model = nn.Conv2d(1, 1, 1) +normal_(model.weight, mean=0, std=0.01) +constant_(model.bias, val=0) +``` + +``` +Parameter containing: +tensor([0.], requires_grad=True) +``` + +上述æµç¨‹å®žé™…上是å·ç§¯æ£æ€åˆ†å¸ƒåˆå§‹åŒ–çš„æ ‡å‡†æµç¨‹ï¼Œå› æ¤ MMEngine 在æ¤åŸºç¡€ä¸Šåšäº†è¿›ä¸€æ¥åœ°ç®€åŒ–,实现了一系列常用的**模å—**åˆå§‹åŒ–函数。相比 `torch.nn.init`,MMEngine æ供的åˆå§‹åŒ–函数直接接å—å·ç§¯æ¨¡å—,一行代ç 能实现åŒæ ·çš„åˆå§‹åŒ–逻辑: + +```python +from mmengine.model import normal_init + +normal_init(model, mean=0, std=0.01, bias=0) +``` + +类似地,我们也å¯ä»¥ç”¨ [Kaiming](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) åˆå§‹åŒ–å’Œ [Xavier](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) åˆå§‹åŒ–: + +```python +from mmengine.model import kaiming_init, xavier_init + +kaiming_init(model) +xavier_init(model) +``` + +ç›®å‰ MMEngine æ供了以下åˆå§‹åŒ–函数: + +| åˆå§‹åŒ–函数 | 功能 | +| :-------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------- | +| [constant_init](../api.html#mmengine.model.constant_init) | å°† weight å’Œ bias åˆå§‹åŒ–为指定常é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [xavier_init](../api.html#mmengine.model.xavier_init) | å°† weight 以 `Xavier` æ–¹å¼åˆå§‹åŒ–,将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [normal_init](../api.html#mmengine.model.normal_init) | å°† weight 以æ£æ€åˆ†å¸ƒçš„æ–¹å¼åˆå§‹åŒ–,将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [trunc_normal_init](../api.html#mmengine.model.trunc_normal_init) | å°† weight 以被截æ–çš„æ£æ€åˆ†å¸ƒçš„æ–¹å¼åˆå§‹åŒ–,å‚æ•° a å’Œ b 为æ£æ€åˆ†å¸ƒçš„有效区域;将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ– `transformer` | +| [uniform_init](../api.html#mmengine.model.uniform_init) | å°† weight 以å‡åŒ€åˆ†å¸ƒçš„æ–¹å¼åˆå§‹åŒ–,å‚æ•° a å’Œ b 为å‡åŒ€åˆ†å¸ƒçš„范围;将 bias åˆå§‹åŒ–为指定常é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [kaiming_init](../api.html#mmengine.model.kaiming_init) | å°† weight 以 `Kaiming` æ–¹å¼åˆå§‹åŒ–,将 bias åˆå§‹åŒ–æˆæŒ‡å®šå¸¸é‡ï¼Œé€šå¸¸ç”¨äºŽåˆå§‹åŒ–å·ç§¯ | +| [caffe2_xavier_init](../api.html#mmengine.model.caffe2_xavier_init) | Caffe2 ä¸ Xavier åˆå§‹åŒ–æ–¹å¼ï¼Œåœ¨ Pytorch ä¸å¯¹åº” `fan_in`, `normal` 模å¼çš„ `Kaiming` åˆå§‹åŒ–,通常用于åˆå§‹åŒ–å·ç§¯ | +| [bias_init_with_prob](../api.html#mmengine.model.bias_init_with_prob) | 以概率值的形å¼åˆå§‹åŒ– bias | diff --git a/docs/zh_cn/tutorials/model.md b/docs/zh_cn/tutorials/model.md new file mode 100644 index 0000000000000000000000000000000000000000..2e577232cb5454662698afe140999debb77af481 --- /dev/null +++ b/docs/zh_cn/tutorials/model.md @@ -0,0 +1,170 @@ +# 模型 + +在è®ç»ƒæ·±åº¦å¦ä¹ 任务时,我们通常需è¦å®šä¹‰ä¸€ä¸ªæ¨¡åž‹æ¥å®žçŽ°ç®—法的主体。在基于 MMEngine å¼€å‘时,模型由[执行器](./runner.md)管ç†ï¼Œéœ€è¦å®žçŽ° `train_step`,`val_step` å’Œ `test_step` 方法。 + +对于检测ã€è¯†åˆ«ã€åˆ†å‰²ä¸€ç±»çš„深度å¦ä¹ ä»»åŠ¡ï¼Œä¸Šè¿°æ–¹æ³•é€šå¸¸ä¸ºæ ‡å‡†çš„æµç¨‹ï¼Œä¾‹å¦‚在 `train_step` 里更新å‚数,返回æŸå¤±ï¼›`val_step` å’Œ `test_step` è¿”å›žé¢„æµ‹ç»“æžœã€‚å› æ¤ MMEngine 抽象出模型基类 [BaseModel](mmengine.model.BaseModel),实现了上述接å£çš„æ ‡å‡†æµç¨‹ã€‚我们åªéœ€è¦è®©æ¨¡åž‹ç»§æ‰¿è‡ªæ¨¡åž‹åŸºç±»ï¼Œå¹¶æŒ‰ç…§ä¸€å®šçš„规范实现 `forward`,就能让模型在执行器ä¸è¿è¡Œèµ·æ¥ã€‚ + +模型基类继承自[模å—基类](./initialize.md),能够通过é…ç½® `init_cfg` çµæ´»çš„选择åˆå§‹åŒ–æ–¹å¼ã€‚ + +## 接å£çº¦å®š + +[forward](mmengine.model.BaseModel.forward): `forward` çš„å…¥å‚需è¦å’Œ [DataLoader](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html) 的输出ä¿æŒä¸€è‡´ (自定义[æ•°æ®å¤„ç†å™¨](æ•°æ®å¤„ç†å™¨ï¼ˆDataPreprocessor))除外),如果 `DataLoader` è¿”å›žå…ƒç»„ç±»åž‹çš„æ•°æ® `data`,`forward` 需è¦èƒ½å¤ŸæŽ¥å— `*data` 的解包åŽçš„å‚数;如果返回å—å…¸ç±»åž‹çš„æ•°æ® `data`,`forward` 需è¦èƒ½å¤ŸæŽ¥å— `**data` 解包åŽçš„å‚数。 `mode` å‚数用于控制 forward 的返回结果: + +- `mode='loss'`:`loss` 模å¼é€šå¸¸åœ¨è®ç»ƒé˜¶æ®µå¯ç”¨ï¼Œå¹¶è¿”回一个æŸå¤±å—典。æŸå¤±å—典的 key-value 分别为æŸå¤±åå’Œå¯å¾®çš„ `torch.Tensor`。å—å…¸ä¸è®°å½•çš„æŸå¤±ä¼šè¢«ç”¨äºŽæ›´æ–°å‚数和记录日志。模型基类会在 `train_step` 方法ä¸è°ƒç”¨è¯¥æ¨¡å¼çš„ `forward`。 +- `mode='predict'`: `predict` 模å¼é€šå¸¸åœ¨éªŒè¯ã€æµ‹è¯•é˜¶æ®µå¯ç”¨ï¼Œå¹¶è¿”回列表/元组形å¼çš„预测结果,预测结果需è¦å’Œ [process](mmengine.evaluator.Evaluator) 接å£çš„å‚数相匹é…。OpenMMLab 系列算法对 `predict` 模å¼çš„输出有ç€æ›´åŠ ä¸¥æ ¼çš„çº¦å®šï¼Œéœ€è¦è¾“出列表形å¼çš„[æ•°æ®å…ƒç´ ](./data_element.md)。模型基类会在 `val_step`,`test_step` 方法ä¸è°ƒç”¨è¯¥æ¨¡å¼çš„ `forward`。 +- `mode='tensor'`:`tensor` å’Œ `predict` 模å¼å‡è¿”回模型的å‰å‘推ç†ç»“果,区别在于 `tensor` 模å¼ä¸‹ï¼Œ`forward` 会返回未ç»åŽå¤„ç†çš„å¼ é‡ï¼Œä¾‹å¦‚返回未ç»éžæžå¤§å€¼æŠ‘制(nms)处ç†çš„æ£€æµ‹ç»“æžœï¼Œè¿”å›žæœªç» `argmax` 处ç†çš„分类结果。我们å¯ä»¥åŸºäºŽ `tensor` 模å¼çš„结果进行自定义的åŽå¤„ç†ã€‚ + +[train_step](mmengine.model.BaseModel.train_step): 调用 `loss` 模å¼çš„ `forward` 接å£ï¼Œå¾—到æŸå¤±å—典。模型基类基于[优化器å°è£…](./optim_wrapper.md) å®žçŽ°äº†æ ‡å‡†çš„æ¢¯åº¦è®¡ç®—ã€å‚æ•°æ›´æ–°ã€æ¢¯åº¦æ¸…零æµç¨‹ã€‚ + +[val_step](mmengine.model.BaseModel.val_step): 调用 `predict` 模å¼çš„ `forward`,返回预测结果,预测结果会被进一æ¥ä¼ ç»™[评测器](./metric_and_evaluator.md)çš„ [process](mmengine.evaluator.Evaluator.process) 接å£å’Œ[é’©å(Hook)](./hook.md)çš„ `after_val_iter` 接å£ã€‚ + +[test_step](mmengine.model.BaseModel.test_step): åŒ `val_step`,预测结果会被进一æ¥ä¼ ç»™ `after_test_iter` 接å£ã€‚ + +基于上述接å£çº¦å®šï¼Œæˆ‘们定义了继承自模型基类的 `NeuralNetwork`,é…åˆæ‰§è¡Œå™¨æ¥è®ç»ƒ `FashionMNIST`: + +```python +from torch.utils.data import DataLoader +from torch import nn +from torchvision import datasets +from torchvision.transforms import ToTensor +from mmengine.model import BaseModel +from mmengine.evaluator import BaseMetric +from mmengine import Runner + + +training_data = datasets.FashionMNIST( + root="data", + train=True, + download=True, + transform=ToTensor() +) + +test_data = datasets.FashionMNIST( + root="data", + train=False, + download=True, + transform=ToTensor() +) + +train_dataloader = DataLoader(dataset=training_data, batch_size=64) +test_dataloader = DataLoader(dataset=test_data, batch_size=64) + + +class NeuralNetwork(BaseModel): + def __init__(self, data_preprocessor=None): + super(NeuralNetwork, self).__init__(data_preprocessor) + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(28*28, 512), + nn.ReLU(), + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, 10), + ) + self.loss = nn.CrossEntropyLoss() + + def forward(self, img, label, mode='tensor'): + x = self.flatten(img) + pred = self.linear_relu_stack(x) + loss = self.loss(pred, label) + if mode == 'loss': + return dict(loss=loss) + elif mode=='predict': + return pred.argmax(1), loss.item() + else: + return pred + + +class FashionMnistMetric(BaseMetric): + def process(self, data, preds) -> None: + # data å‚数为 Dataloader è¿”å›žçš„å…ƒç»„ï¼Œå³ (img, label) + # predict 为模型 `predict` 模å¼ä¸‹ï¼Œè¿”回的元组,分别为 `pred.argmax(1) å’Œ `loss`` + self.results.append(((data[1] == preds[0].cpu()).sum(), preds[1], len(preds[0]))) + + def compute_metrics(self, results): + correct, loss, batch_size = zip(*results) + test_loss, correct = sum(loss) / len(self.results), sum(correct) / sum(batch_size) + return dict(Accuracy=correct, Avg_loss=test_loss) + + +runner = Runner( + model=NeuralNetwork(), + work_dir='./work_dir', + train_dataloader=train_dataloader, + optim_wrapper=dict(optimizer=dict(type='SGD', lr=1e-3)), + train_cfg=dict(by_epoch=True, max_epochs=5, val_interval=1), + val_cfg=dict(fp16=True), + val_dataloader=test_dataloader, + val_evaluator=dict(metrics=FashionMnistMetric())) +runner.train() +``` + +在这个例åä¸ï¼Œ`NeuralNetwork.forward` å˜åœ¨ç€ä»¥ä¸‹è·¨æ¨¡å—的接å£çº¦å®šï¼š + +- 由于 `train_dataloader` 会返回一个 `(img, label)` å½¢å¼çš„å…ƒç»„ï¼Œå› æ¤ `forward` 接å£çš„å‰ä¸¤ä¸ªå‚数分别需è¦ä¸º `img` å’Œ `label`。 +- 由于 `forward` 在 `predict` 模å¼ä¸‹ä¼šè¿”回 `(pred, loss)` å½¢å¼çš„å…ƒç»„ï¼Œå› æ¤ `process` çš„ `preds` å‚数应当åŒæ ·ä¸ºç›¸åŒå½¢å¼çš„元组。 + +相比于 [Pytorch 官方示例](https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html#),MMEngine 的代ç æ›´åŠ ç®€æ´ï¼Œè®°å½•çš„æ—¥å¿—ä¹Ÿæ›´åŠ ä¸°å¯Œã€‚ + +## æ•°æ®å¤„ç†å™¨ï¼ˆDataPreprocessor) + +å¦‚æžœä½ çš„ç”µè„‘é…有 GPUï¼ˆæˆ–å…¶ä»–èƒ½å¤ŸåŠ é€Ÿè®ç»ƒçš„硬件,如 mpsã€ipu ç‰ï¼‰ï¼Œå¹¶è¿è¡Œäº†ä¸ŠèŠ‚的代ç ç¤ºä¾‹ã€‚ä½ ä¼šå‘现 Pytorch 的示例是在 CPU 上è¿è¡Œçš„,而 MMEngine 的示例是在 GPU 上è¿è¡Œçš„。`MMEngine` 是在何时把数æ®å’Œæ¨¡åž‹ä»Ž CPU æ¬è¿åˆ° GPU 的呢? + +äº‹å®žä¸Šï¼Œæ‰§è¡Œå™¨ä¼šåœ¨æž„é€ é˜¶æ®µå°†æ¨¡åž‹æ¬è¿åˆ°æŒ‡å®šè®¾å¤‡ï¼Œè€Œæ•°æ®åˆ™ä¼šåœ¨ `train_step`ã€`val_step`ã€`test_step` ä¸ï¼Œè¢«[基础数æ®å¤„ç†å™¨ï¼ˆBaseDataPreprocessor)](mmengine.model.BaseDataPreprocessor)æ¬è¿åˆ°æŒ‡å®šè®¾å¤‡ï¼Œè¿›ä¸€æ¥å°†å¤„ç†å¥½çš„æ•°æ®ä¼ 给模型。数æ®å¤„ç†å™¨ä½œä¸ºæ¨¡åž‹åŸºç±»çš„ä¸€ä¸ªå±žæ€§ï¼Œä¼šåœ¨æ¨¡åž‹åŸºç±»çš„æž„é€ è¿‡ç¨‹ä¸è¢«å®žä¾‹åŒ–。 + +为了体现数æ®å¤„ç†å™¨èµ·åˆ°çš„作用,我们ä»ç„¶ä»¥[上一节](接å£çº¦å®š)è®ç»ƒ FashionMNIST 为例, 实现了一个简易的数æ®å¤„ç†å™¨ï¼Œç”¨äºŽæ¬è¿æ•°æ®å’Œå½’一化: + +```python +from torch.optim import SGD +from mmengine.model import BaseDataPreprocessor, BaseModel + + +class NeuralNetwork1(NeuralNetwork): + + def __init__(self, data_preprocessor): + super().__init__(data_preprocessor=data_preprocessor) + self.data_preprocessor = data_preprocessor + + def train_step(self, data, optimizer): + img, label = self.data_preprocessor(data) + loss = self(img, label, mode='loss')['loss'].sum() + loss.backward() + optimizer.step() + optimizer.zero_grad() + return dict(loss=loss) + + def test_step(self, data): + img, label = self.data_preprocessor(data) + return self(img, label, mode='predict') + + def val_step(self, data): + img, label = self.data_preprocessor(data) + return self(img, label, mode='predict') + + +class NormalizeDataPreprocessor(BaseDataPreprocessor): + + def forward(self, data, training=False): + img, label = [item for item in data] + img = (img - 127.5) / 127.5 + return img, label + + +model = NeuralNetwork1(data_preprocessor=NormalizeDataPreprocessor()) +optimizer = SGD(model.parameters(), lr=0.01) +data = (torch.full((3, 28, 28), fill_value=127.5), torch.ones(3, 10)) + +model.train_step(data, optimizer) +model.val_step(data) +model.test_step(data) +``` + +上例ä¸ï¼Œæˆ‘们实现了 `BaseModel.train_step`ã€`BaseModel.val_step` å’Œ `BaseModel.test_step` 的简化版。数æ®ç» `NormalizeDataPreprocessor.forward` 归一化处ç†ï¼Œè§£åŒ…åŽä¼ ç»™ `NeuralNetwork.forward`,进一æ¥è¿”回æŸå¤±æˆ–者预测结果。如果想实现自定义的å‚数优化或预测逻辑,å¯ä»¥è‡ªè¡Œå®žçŽ° `train_step`ã€`val_step` å’Œ `test_step`,具体例åå¯ä»¥å‚考:[使用 MMEngine è®ç»ƒç”Ÿæˆå¯¹æŠ—网络](../examples/train_a_gan.md) + +```{note} +上例ä¸æ•°æ®å¤„ç†å™¨çš„ training å‚数用于区分è®ç»ƒã€æµ‹è¯•é˜¶æ®µä¸åŒçš„批增强ç–略,`train_step` ä¼šä¼ å…¥ `training=True`,`test_step` å’Œ `val_step` åˆ™ä¼šä¼ å…¥ `trainig=Fasle`。 +``` + +```{note} +通常情况下,我们è¦æ±‚ DataLoader çš„ `data` æ•°æ®è§£åŒ…åŽï¼ˆå—典类型的被 **data 解包,元组列表类型被 *data è§£åŒ…ï¼‰èƒ½å¤Ÿç›´æŽ¥ä¼ ç»™æ¨¡åž‹çš„ `forward`。但是如果数æ®å¤„ç†å™¨ä¿®æ”¹äº† data çš„æ•°æ®ç±»åž‹ï¼Œåˆ™è¦æ±‚æ•°æ®å¤„ç†å™¨çš„ `forward` 的返回值与模型 `forward` çš„å…¥å‚相匹é…。 +```