From 5da2bb6c81a1e5394998775db7ff7236814e72dd Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 5 Mar 2021 14:19:30 +0800 Subject: [PATCH] support TorchScript --- README.md | 11 +- src/models/backbones/mobilenetv2.py | 26 +++- src/models/backbones/wrapper.py | 33 ++++- torchscript/README.md | 18 +++ torchscript/__init__.py | 0 torchscript/export_torchscript.py | 46 +++++++ torchscript/modnet_torchscript.py | 258 ++++++++++++++++++++++++++++++++++++ 7 files changed, 378 insertions(+), 14 deletions(-) create mode 100755 torchscript/README.md create mode 100755 torchscript/__init__.py create mode 100755 torchscript/export_torchscript.py create mode 100755 torchscript/modnet_torchscript.py diff --git a/README.md b/README.md index 0904342..6974b23 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ WebCam Video Demo [Offline][ --> - **Colab Demo of Bokeh (Blur Background)** @@ -57,6 +58,10 @@ You can try [this Colab demo](https://colab.research.google.com/github/eyaler/av - **ONNX Version of MODNet** You can convert the pre-trained MODNet to an ONNX model by using [this code](onnx) (provided by [@manthan3C273](https://github.com/manthan3C273)). You can also try [this Colab demo](https://colab.research.google.com/drive/1P3cWtg8fnmu9karZHYDAtmm1vj1rgA-f?usp=sharing) for MODNet image matting (ONNX version). +- **TorchScript Version of MODNet** +You can convert the pre-trained MODNet to an TorchScript model by using [this code](torchscript) (provided by [@yarkable](https://github.com/yarkable)). + + ## Code We provide the [code](src/trainer.py) of MODNet training iteration, including: - **Supervised Training**: Train MODNet on a labeled matting dataset @@ -79,7 +84,7 @@ This project (**code, pre-trained models, demos, *etc.***) is released under the ## Acknowledgement - We thank [City University of Hong Kong](https://www.cityu.edu.hk/) and [SenseTime](https://www.sensetime.com/) for their support to this project. - We thank -        [the Gradio team](https://github.com/gradio-app/gradio), [@eyaler](https://github.com/eyaler), [@manthan3C273](https://github.com/manthan3C273), +        [the Gradio team](https://github.com/gradio-app/gradio), [@eyaler](https://github.com/eyaler), [@manthan3C273](https://github.com/manthan3C273), [@yarkable](https://github.com/yarkable), for their contributions to this repository or their cool applications based on MODNet. diff --git a/src/models/backbones/mobilenetv2.py b/src/models/backbones/mobilenetv2.py index 67cc138..709d352 100644 --- a/src/models/backbones/mobilenetv2.py +++ b/src/models/backbones/mobilenetv2.py @@ -136,17 +136,31 @@ class MobileNetV2(nn.Module): # Initialize weights self._init_weights() - def forward(self, x, feature_names=None): + def forward(self, x): # Stage1 - x = reduce(lambda x, n: self.features[n](x), list(range(0,2)), x) + x = self.features[0](x) + x = self.features[1](x) # Stage2 - x = reduce(lambda x, n: self.features[n](x), list(range(2,4)), x) + x = self.features[2](x) + x = self.features[3](x) # Stage3 - x = reduce(lambda x, n: self.features[n](x), list(range(4,7)), x) + x = self.features[4](x) + x = self.features[5](x) + x = self.features[6](x) # Stage4 - x = reduce(lambda x, n: self.features[n](x), list(range(7,14)), x) + x = self.features[7](x) + x = self.features[8](x) + x = self.features[9](x) + x = self.features[10](x) + x = self.features[11](x) + x = self.features[12](x) + x = self.features[13](x) # Stage5 - x = reduce(lambda x, n: self.features[n](x), list(range(14,19)), x) + x = self.features[14](x) + x = self.features[15](x) + x = self.features[16](x) + x = self.features[17](x) + x = self.features[18](x) # Classification if self.num_classes is not None: diff --git a/src/models/backbones/wrapper.py b/src/models/backbones/wrapper.py index 36817ba..72b8f17 100644 --- a/src/models/backbones/wrapper.py +++ b/src/models/backbones/wrapper.py @@ -36,15 +36,38 @@ class MobileNetV2Backbone(BaseBackbone): self.enc_channels = [16, 24, 32, 96, 1280] def forward(self, x): - x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x) + # x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x) + x = self.model.features[0](x) + x = self.model.features[1](x) enc2x = x - x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x) + + # x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x) + x = self.model.features[2](x) + x = self.model.features[3](x) enc4x = x - x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x) + + # x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x) + x = self.model.features[4](x) + x = self.model.features[5](x) + x = self.model.features[6](x) enc8x = x - x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x) + + # x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x) + x = self.model.features[7](x) + x = self.model.features[8](x) + x = self.model.features[9](x) + x = self.model.features[10](x) + x = self.model.features[11](x) + x = self.model.features[12](x) + x = self.model.features[13](x) enc16x = x - x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x) + + # x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x) + x = self.model.features[14](x) + x = self.model.features[15](x) + x = self.model.features[16](x) + x = self.model.features[17](x) + x = self.model.features[18](x) enc32x = x return [enc2x, enc4x, enc8x, enc16x, enc32x] diff --git a/torchscript/README.md b/torchscript/README.md new file mode 100755 index 0000000..509fbc5 --- /dev/null +++ b/torchscript/README.md @@ -0,0 +1,18 @@ +## MODNet - TorchScript Model + +This TorchScript version of MODNet is provided by [@yarkable](https://github.com/yarkable) from the community. +Please note that the PyTorch version required for this TorchScript export function is higher than the official MODNet code (torch>=1.2.0). + +You can also download the TorchScript version of the official **Image Matting Model** from [this link](https://pan.baidu.com/s/1kOmmmbG7lSZiSmDdE7CaRw) with the exextraction code `dm9e`. + +To export the TorchScript version of MODNet (assuming you are currently in project root directory): +1. Download the pre-trained **Image Matting Model** from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing) and put the model into the folder `MODNet/pretrained/`. + +2. Ensure your PyTorch version >= 1.2.0. + +3. Export the TorchScript version of MODNet by: + ```shell + python -m torchscript.export_torchscript \ + --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \ + --output-path=pretrained/modnet_photographic_portrait_matting.torchscript + ``` diff --git a/torchscript/__init__.py b/torchscript/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/torchscript/export_torchscript.py b/torchscript/export_torchscript.py new file mode 100755 index 0000000..e0e8eec --- /dev/null +++ b/torchscript/export_torchscript.py @@ -0,0 +1,46 @@ +""" +Export TorchScript model of MODNet + +Arguments: + --ckpt-path: path of the checkpoint that will be converted + --output-path: path for saving the TorchScript model + +Example: + python export_torchscript.py \ + --ckpt-path=modnet_photographic_portrait_matting.ckpt \ + --output-path=modnet_photographic_portrait_matting.torchscript +""" + +import os +import argparse + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from . import modnet_torchscript + + +if __name__ == '__main__': + # define cmd arguments + parser = argparse.ArgumentParser() + parser.add_argument('--ckpt-path', type=str, required=True, help='path of the checkpoint that will be converted') + parser.add_argument('--output-path', type=str, required=True, help='path for saving the TorchScript model') + args = parser.parse_args() + + # check input arguments + if not os.path.exists(args.ckpt_path): + print(args.ckpt_path) + print('Cannot find checkpoint path: {0}'.format(args.ckpt_path)) + exit() + + # create MODNet and load the pre-trained ckpt + modnet = modnet_torchscript.MODNet(backbone_pretrained=False) + modnet = nn.DataParallel(modnet).cuda() + state_dict = torch.load(args.ckpt_path) + modnet.load_state_dict(state_dict) + modnet.eval() + + # export to TorchScript model + scripted_model = torch.jit.script(modnet.module) + torch.jit.save(scripted_model, os.path.join(args.output_path)) diff --git a/torchscript/modnet_torchscript.py b/torchscript/modnet_torchscript.py new file mode 100755 index 0000000..7b732d7 --- /dev/null +++ b/torchscript/modnet_torchscript.py @@ -0,0 +1,258 @@ +""" +This file contains a modified version of the original file `modnet.py` without +`pred_semantic` and `pred_details` as these both returns None when `inference=True` + +And it does not contain `inference` argument which will make it easier to +convert checkpoint to TorchScript model. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from src.models.backbones import SUPPORTED_BACKBONES + + +#------------------------------------------------------------------------------ +# MODNet Basic Modules +#------------------------------------------------------------------------------ + +class IBNorm(nn.Module): + """ Combine Instance Norm and Batch Norm into One Layer + """ + + def __init__(self, in_channels): + super(IBNorm, self).__init__() + in_channels = in_channels + self.bnorm_channels = int(in_channels / 2) + self.inorm_channels = in_channels - self.bnorm_channels + + self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True) + self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False) + + def forward(self, x): + bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous()) + in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous()) + + return torch.cat((bn_x, in_x), 1) + + +class Conv2dIBNormRelu(nn.Module): + """ Convolution + IBNorm + ReLu + """ + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, bias=True, + with_ibn=True, with_relu=True): + super(Conv2dIBNormRelu, self).__init__() + + layers = [ + nn.Conv2d(in_channels, out_channels, kernel_size, + stride=stride, padding=padding, dilation=dilation, + groups=groups, bias=bias) + ] + + if with_ibn: + layers.append(IBNorm(out_channels)) + if with_relu: + layers.append(nn.ReLU(inplace=True)) + + self.layers = nn.Sequential(*layers) + + def forward(self, x): + return self.layers(x) + + +class SEBlock(nn.Module): + """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf + """ + + def __init__(self, in_channels, out_channels, reduction=1): + super(SEBlock, self).__init__() + self.pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(in_channels, int(in_channels // reduction), bias=False), + nn.ReLU(inplace=True), + nn.Linear(int(in_channels // reduction), out_channels, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + w = self.pool(x).view(b, c) + w = self.fc(w).view(b, c, 1, 1) + + return x * w.expand_as(x) + + +#------------------------------------------------------------------------------ +# MODNet Branches +#------------------------------------------------------------------------------ + +class LRBranch(nn.Module): + """ Low Resolution Branch of MODNet + """ + + def __init__(self, backbone): + super(LRBranch, self).__init__() + + enc_channels = backbone.enc_channels + + self.backbone = backbone + self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4) + self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2) + self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2) + self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False) + + def forward(self, img): + enc_features = self.backbone.forward(img) + enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4] + + enc32x = self.se_block(enc32x) + lr16x = F.interpolate(enc32x, scale_factor=2.0, mode='bilinear', align_corners=False) + lr16x = self.conv_lr16x(lr16x) + lr8x = F.interpolate(lr16x, scale_factor=2.0, mode='bilinear', align_corners=False) + lr8x = self.conv_lr8x(lr8x) + + return lr8x, enc2x, enc4x + + +class HRBranch(nn.Module): + """ High Resolution Branch of MODNet + """ + + def __init__(self, hr_channels, enc_channels): + super(HRBranch, self).__init__() + + self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0) + self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1) + + self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0) + self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1) + + self.conv_hr4x = nn.Sequential( + Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1), + ) + + self.conv_hr2x = nn.Sequential( + Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1), + ) + + self.conv_hr = nn.Sequential( + Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False), + ) + + def forward(self, img, enc2x, enc4x, lr8x): + img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False) + img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False) + + enc2x = self.tohr_enc2x(enc2x) + hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1)) + + enc4x = self.tohr_enc4x(enc4x) + hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1)) + + lr4x = F.interpolate(lr8x, scale_factor=2.0, mode='bilinear', align_corners=False) + hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1)) + + hr2x = F.interpolate(hr4x, scale_factor=2.0, mode='bilinear', align_corners=False) + hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1)) + + return hr2x + + +class FusionBranch(nn.Module): + """ Fusion Branch of MODNet + """ + + def __init__(self, hr_channels, enc_channels): + super(FusionBranch, self).__init__() + self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2) + + self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1) + self.conv_f = nn.Sequential( + Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1), + Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False), + ) + + def forward(self, img, lr8x, hr2x): + lr4x = F.interpolate(lr8x, scale_factor=2.0, mode='bilinear', align_corners=False) + lr4x = self.conv_lr4x(lr4x) + lr2x = F.interpolate(lr4x, scale_factor=2.0, mode='bilinear', align_corners=False) + + f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1)) + f = F.interpolate(f2x, scale_factor=2.0, mode='bilinear', align_corners=False) + f = self.conv_f(torch.cat((f, img), dim=1)) + pred_matte = torch.sigmoid(f) + + return pred_matte + + +#------------------------------------------------------------------------------ +# MODNet +#------------------------------------------------------------------------------ + +class MODNet(nn.Module): + """ Architecture of MODNet + """ + + def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True): + super(MODNet, self).__init__() + + self.in_channels = in_channels + self.hr_channels = hr_channels + self.backbone_arch = backbone_arch + self.backbone_pretrained = backbone_pretrained + + self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels) + + self.lr_branch = LRBranch(self.backbone) + self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels) + self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + self._init_conv(m) + elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d): + self._init_norm(m) + + if self.backbone_pretrained: + self.backbone.load_pretrained_ckpt() + + def forward(self, img): + # NOTE + lr_out = self.lr_branch(img) + lr8x = lr_out[0] + enc2x = lr_out[1] + enc4x = lr_out[2] + + hr2x = self.hr_branch(img, enc2x, enc4x, lr8x) + + pred_matte = self.f_branch(img, lr8x, hr2x) + + return pred_matte + + def freeze_norm(self): + norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d] + for m in self.modules(): + for n in norm_types: + if isinstance(m, n): + m.eval() + continue + + def _init_conv(self, conv): + nn.init.kaiming_uniform_( + conv.weight, a=0, mode='fan_in', nonlinearity='relu') + if conv.bias is not None: + nn.init.constant_(conv.bias, 0) + + def _init_norm(self, norm): + if norm.weight is not None: + nn.init.constant_(norm.weight, 1) + nn.init.constant_(norm.bias, 0)