diff --git a/README.md b/README.md index 7adc727..f3b504b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@

-WebCam Demo [Offline][Colab] | +WebCam Video Demo [Offline][Colab] | Custom Video Demo [Offline] | Image Demo [WebGUI][Colab]

@@ -20,25 +20,28 @@ WebCam Demo [Offline][ + ## Image Matting Demo We provide an [online Colab demo](https://colab.research.google.com/drive/1GANpbKT06aEFiW-Ssx0DQnnEADcXwQG6?usp=sharing) for portrait image matting. It allows you to upload portrait images and predict/visualize/download the alpha mattes. -You can also use this [WebGUI](https://gradio.app/g/modnet) (hosted on [Gradio](https://github.com/gradio-app/gradio)) for portrait image matting directly from your browser without any code! The source code of this demo is coming soon. +You can also use this [WebGUI](https://gradio.app/g/modnet) (hosted on [Gradio](https://github.com/gradio-app/gradio)) for portrait image matting directly from your browser without any code! @@ -49,7 +52,7 @@ You can also use this [WebGUI](https://gradio.app/g/modnet) (hosted on [Gradio]( ## License -This project is released under the [Creative Commons Attribution NonCommercial ShareAlike 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode) license. +This project (code, pre-trained models, demos, *etc*.) is released under the [Creative Commons Attribution NonCommercial ShareAlike 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode) license. ## Acknowledgement diff --git a/demo/video_matting/custom/README.md b/demo/video_matting/custom/README.md new file mode 100644 index 0000000..a6439cd --- /dev/null +++ b/demo/video_matting/custom/README.md @@ -0,0 +1,50 @@ +## MODNet - Custom Portrait Video Matting Demo +This is a MODNet portrait video matting demo that allows you to process custom videos. + +### 1. Requirements +The basic requirements for this demo are: +- Ubuntu System +- Python 3+ + + +### 2. Introduction +We use ~400 unlabeled video clips (divided into ~50,000 frames) downloaded from the internet to perform SOC to adapt MODNet to the video domain. **Nonetheless, due to insufficient labeled training data (~3k labeled foregrounds), our model may still make errors in portrait semantics estimation under challenging scenes.** Besides, this demo does not currently support the OFD trick. + + +For a better experience, please make sure your videos satisfy: + +* the portrait and background are distinguishable, i.e., are not similar +* captured in soft and bright ambient lighting +* the contents do not move too fast + +### 3. Run Demo +We recommend creating a new conda virtual environment to run this demo, as follow: + +1. Clone the MODNet repository: + ``` + git clone https://github.com/ZHKKKe/MODNet.git + cd MODNet + ``` + +2. Download the pre-trained model from this [link](https://drive.google.com/file/d/1Nf1ZxeJZJL8Qx9KadcYYyEmmlKhTADxX/view?usp=sharing) and put it into the folder `MODNet/pretrained/`. + + +3. Create a conda virtual environment named `modnet` (if it doesn't exist) and activate it. Here we use `python=3.6` as an example: + ``` + conda create -n modnet python=3.6 + source activate modnet + ``` + +4. Install the required python dependencies (please make sure your CUDA version is supported by the PyTorch version installed): + ``` + pip install -r demo/video_matting/custom/requirements.txt + ``` + +5. Execute the main code: + ``` + python -m demo.video_matting.custom.run --video YOUR_VIDEO_PATH + ``` + where `YOUR_VIDEO_PATH` is the specific path of your video. + There are some optional arguments: + - `--result-type (default=fg)` : fg - save the alpha matte; fg - save the foreground + - `--fps (default=30)` : fps of the result video diff --git a/demo/video_matting/offline/requirements.txt b/demo/video_matting/custom/requirements.txt similarity index 100% rename from demo/video_matting/offline/requirements.txt rename to demo/video_matting/custom/requirements.txt diff --git a/demo/video_matting/offline/run.py b/demo/video_matting/custom/run.py similarity index 55% rename from demo/video_matting/offline/run.py rename to demo/video_matting/custom/run.py index e2d18ea..76ec77d 100644 --- a/demo/video_matting/offline/run.py +++ b/demo/video_matting/custom/run.py @@ -1,9 +1,8 @@ import os - import cv2 +import argparse import numpy as np from PIL import Image -import argparse from tqdm import tqdm import torch @@ -20,25 +19,10 @@ torch_transforms = transforms.Compose( ] ) -print('Load pre-trained MODNet...') -pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt' -modnet = MODNet(backbone_pretrained=False) -modnet = nn.DataParallel(modnet) -GPU = True if torch.cuda.device_count() > 0 else False -if GPU: - print('Use GPU...') - modnet = modnet.cuda() - modnet.load_state_dict(torch.load(pretrained_ckpt)) -else: - print('Use CPU...') - modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu'))) -modnet.eval() - - -def offline_matting(video_path, save_path, alpha_matte=False, fps=30): +def matting(video, result, alpha_matte=False, fps=30): # video capture - vc = cv2.VideoCapture(video_path) + vc = cv2.VideoCapture(video) if vc.isOpened(): rval, frame = vc.read() @@ -46,7 +30,7 @@ def offline_matting(video_path, save_path, alpha_matte=False, fps=30): rval = False if not rval: - print('Read video {} failed.'.format(video_path)) + print('Failed to read the video: {0}'.format(video)) exit() num_frame = vc.get(cv2.CAP_PROP_FRAME_COUNT) @@ -54,7 +38,7 @@ def offline_matting(video_path, save_path, alpha_matte=False, fps=30): # video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') - video_writer = cv2.VideoWriter(save_path, fourcc, fps, (w, h)) + video_writer = cv2.VideoWriter(result, fourcc, fps, (w, h)) print('Start matting...') with tqdm(range(int(num_frame)))as t: @@ -85,22 +69,39 @@ def offline_matting(video_path, save_path, alpha_matte=False, fps=30): c += 1 video_writer.release() - print('Save video to {}'.format(save_path)) + print('Save the result video to {0}'.format(result)) return -if __name__ == "__main__": +if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--video_path', type=str, default='./sample/video.mp4') - parser.add_argument('--save_path', type=str, default='./sample/res.mp4', help='Video should be .mp4 format.') - parser.add_argument('--alpha_matte', action='store_true', default=False, help='If True, save alpha_matte video.') - parser.add_argument('--fps', type=int, default=30) + parser.add_argument('--video', type=str, required=True, help='input video file') + parser.add_argument('--result-type', type=str, default='fg', choices=['fg', 'matte'], + help='matte - save the alpha matte; fg - save the foreground') + parser.add_argument('--fps', type=int, default=30, help='fps of the result video') + print('Get CMD Arguments...') args = parser.parse_args() - if not args.save_path.endswith('.mp4'): - args.save_path = os.path.splitext(args.save_path)[0] + '.mp4' + if not os.path.exists(args.video): + print('Cannot find the input video: {0}'.format(args.video)) + exit() - offline_matting(args.video_path, args.save_path, args.alpha_matte, args.fps) + print('Load pre-trained MODNet...') + pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt' + modnet = MODNet(backbone_pretrained=False) + modnet = nn.DataParallel(modnet) + GPU = True if torch.cuda.device_count() > 0 else False + if GPU: + print('Use GPU...') + modnet = modnet.cuda() + modnet.load_state_dict(torch.load(pretrained_ckpt)) + else: + print('Use CPU...') + modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu'))) + modnet.eval() + result = os.path.splitext(args.video)[0] + '_{0}.mp4'.format(args.result_type) + alpha_matte = True if args.result_type == 'matte' else False + matting(args.video, result, alpha_matte, args.fps)