diff --git a/README.md b/README.md index 7adc727..f3b504b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@
-WebCam Demo [Offline][Colab] | +WebCam Video Demo [Offline][Colab] | Custom Video Demo [Offline] | Image Demo [WebGUI][Colab]
@@ -20,25 +20,28 @@ WebCam Demo [Offline][ + ## Image Matting Demo We provide an [online Colab demo](https://colab.research.google.com/drive/1GANpbKT06aEFiW-Ssx0DQnnEADcXwQG6?usp=sharing) for portrait image matting. It allows you to upload portrait images and predict/visualize/download the alpha mattes.
-You can also use this [WebGUI](https://gradio.app/g/modnet) (hosted on [Gradio](https://github.com/gradio-app/gradio)) for portrait image matting directly from your browser without any code! The source code of this demo is coming soon.
+You can also use this [WebGUI](https://gradio.app/g/modnet) (hosted on [Gradio](https://github.com/gradio-app/gradio)) for portrait image matting directly from your browser without any code!
@@ -49,7 +52,7 @@ You can also use this [WebGUI](https://gradio.app/g/modnet) (hosted on [Gradio](
## License
-This project is released under the [Creative Commons Attribution NonCommercial ShareAlike 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode) license.
+This project (code, pre-trained models, demos, *etc*.) is released under the [Creative Commons Attribution NonCommercial ShareAlike 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode) license.
## Acknowledgement
diff --git a/demo/video_matting/custom/README.md b/demo/video_matting/custom/README.md
new file mode 100644
index 0000000..a6439cd
--- /dev/null
+++ b/demo/video_matting/custom/README.md
@@ -0,0 +1,50 @@
+## MODNet - Custom Portrait Video Matting Demo
+This is a MODNet portrait video matting demo that allows you to process custom videos.
+
+### 1. Requirements
+The basic requirements for this demo are:
+- Ubuntu System
+- Python 3+
+
+
+### 2. Introduction
+We use ~400 unlabeled video clips (divided into ~50,000 frames) downloaded from the internet to perform SOC to adapt MODNet to the video domain. **Nonetheless, due to insufficient labeled training data (~3k labeled foregrounds), our model may still make errors in portrait semantics estimation under challenging scenes.** Besides, this demo does not currently support the OFD trick.
+
+
+For a better experience, please make sure your videos satisfy:
+
+* the portrait and background are distinguishable, i.e., are not similar
+* captured in soft and bright ambient lighting
+* the contents do not move too fast
+
+### 3. Run Demo
+We recommend creating a new conda virtual environment to run this demo, as follow:
+
+1. Clone the MODNet repository:
+ ```
+ git clone https://github.com/ZHKKKe/MODNet.git
+ cd MODNet
+ ```
+
+2. Download the pre-trained model from this [link](https://drive.google.com/file/d/1Nf1ZxeJZJL8Qx9KadcYYyEmmlKhTADxX/view?usp=sharing) and put it into the folder `MODNet/pretrained/`.
+
+
+3. Create a conda virtual environment named `modnet` (if it doesn't exist) and activate it. Here we use `python=3.6` as an example:
+ ```
+ conda create -n modnet python=3.6
+ source activate modnet
+ ```
+
+4. Install the required python dependencies (please make sure your CUDA version is supported by the PyTorch version installed):
+ ```
+ pip install -r demo/video_matting/custom/requirements.txt
+ ```
+
+5. Execute the main code:
+ ```
+ python -m demo.video_matting.custom.run --video YOUR_VIDEO_PATH
+ ```
+ where `YOUR_VIDEO_PATH` is the specific path of your video.
+ There are some optional arguments:
+ - `--result-type (default=fg)` : fg - save the alpha matte; fg - save the foreground
+ - `--fps (default=30)` : fps of the result video
diff --git a/demo/video_matting/offline/requirements.txt b/demo/video_matting/custom/requirements.txt
similarity index 100%
rename from demo/video_matting/offline/requirements.txt
rename to demo/video_matting/custom/requirements.txt
diff --git a/demo/video_matting/offline/run.py b/demo/video_matting/custom/run.py
similarity index 55%
rename from demo/video_matting/offline/run.py
rename to demo/video_matting/custom/run.py
index e2d18ea..76ec77d 100644
--- a/demo/video_matting/offline/run.py
+++ b/demo/video_matting/custom/run.py
@@ -1,9 +1,8 @@
import os
-
import cv2
+import argparse
import numpy as np
from PIL import Image
-import argparse
from tqdm import tqdm
import torch
@@ -20,25 +19,10 @@ torch_transforms = transforms.Compose(
]
)
-print('Load pre-trained MODNet...')
-pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt'
-modnet = MODNet(backbone_pretrained=False)
-modnet = nn.DataParallel(modnet)
-GPU = True if torch.cuda.device_count() > 0 else False
-if GPU:
- print('Use GPU...')
- modnet = modnet.cuda()
- modnet.load_state_dict(torch.load(pretrained_ckpt))
-else:
- print('Use CPU...')
- modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu')))
-modnet.eval()
-
-
-def offline_matting(video_path, save_path, alpha_matte=False, fps=30):
+def matting(video, result, alpha_matte=False, fps=30):
# video capture
- vc = cv2.VideoCapture(video_path)
+ vc = cv2.VideoCapture(video)
if vc.isOpened():
rval, frame = vc.read()
@@ -46,7 +30,7 @@ def offline_matting(video_path, save_path, alpha_matte=False, fps=30):
rval = False
if not rval:
- print('Read video {} failed.'.format(video_path))
+ print('Failed to read the video: {0}'.format(video))
exit()
num_frame = vc.get(cv2.CAP_PROP_FRAME_COUNT)
@@ -54,7 +38,7 @@ def offline_matting(video_path, save_path, alpha_matte=False, fps=30):
# video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
- video_writer = cv2.VideoWriter(save_path, fourcc, fps, (w, h))
+ video_writer = cv2.VideoWriter(result, fourcc, fps, (w, h))
print('Start matting...')
with tqdm(range(int(num_frame)))as t:
@@ -85,22 +69,39 @@ def offline_matting(video_path, save_path, alpha_matte=False, fps=30):
c += 1
video_writer.release()
- print('Save video to {}'.format(save_path))
+ print('Save the result video to {0}'.format(result))
return
-if __name__ == "__main__":
+if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--video_path', type=str, default='./sample/video.mp4')
- parser.add_argument('--save_path', type=str, default='./sample/res.mp4', help='Video should be .mp4 format.')
- parser.add_argument('--alpha_matte', action='store_true', default=False, help='If True, save alpha_matte video.')
- parser.add_argument('--fps', type=int, default=30)
+ parser.add_argument('--video', type=str, required=True, help='input video file')
+ parser.add_argument('--result-type', type=str, default='fg', choices=['fg', 'matte'],
+ help='matte - save the alpha matte; fg - save the foreground')
+ parser.add_argument('--fps', type=int, default=30, help='fps of the result video')
+ print('Get CMD Arguments...')
args = parser.parse_args()
- if not args.save_path.endswith('.mp4'):
- args.save_path = os.path.splitext(args.save_path)[0] + '.mp4'
+ if not os.path.exists(args.video):
+ print('Cannot find the input video: {0}'.format(args.video))
+ exit()
- offline_matting(args.video_path, args.save_path, args.alpha_matte, args.fps)
+ print('Load pre-trained MODNet...')
+ pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt'
+ modnet = MODNet(backbone_pretrained=False)
+ modnet = nn.DataParallel(modnet)
+ GPU = True if torch.cuda.device_count() > 0 else False
+ if GPU:
+ print('Use GPU...')
+ modnet = modnet.cuda()
+ modnet.load_state_dict(torch.load(pretrained_ckpt))
+ else:
+ print('Use CPU...')
+ modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu')))
+ modnet.eval()
+ result = os.path.splitext(args.video)[0] + '_{0}.mp4'.format(args.result_type)
+ alpha_matte = True if args.result_type == 'matte' else False
+ matting(args.video, result, alpha_matte, args.fps)