Added the option for using mask

myjeffxie · Jul 19, 2021 · 5e1e0dd · 5e1e0dd
1 parent 61d8e7e
commit 5e1e0dd
Show file tree

Hide file tree

Showing 8 changed files with 334 additions and 271 deletions.
diff --git a/test_video_swap_multispecific.py b/test_video_swap_multispecific.py
@@ -51,44 +51,44 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
     source_specific_id_nonorm_list = []
     source_path = os.path.join(multisepcific_dir,'SRC_*')
     source_specific_images_path = sorted(glob.glob(source_path))
-
-    for source_specific_image_path in source_specific_images_path:
-        specific_person_whole = cv2.imread(source_specific_image_path)
-        specific_person_align_crop, _ = app.get(specific_person_whole,crop_size)
-        specific_person_align_crop_pil = Image.fromarray(cv2.cvtColor(specific_person_align_crop[0],cv2.COLOR_BGR2RGB)) 
-        specific_person = transformer_Arcface(specific_person_align_crop_pil)
-        specific_person = specific_person.view(-1, specific_person.shape[0], specific_person.shape[1], specific_person.shape[2])
-        # convert numpy to tensor
-        specific_person = specific_person.cuda()
-        #create latent id
-        specific_person_downsample = F.interpolate(specific_person, scale_factor=0.5)
-        specific_person_id_nonorm = model.netArc(specific_person_downsample)
-        source_specific_id_nonorm_list.append(specific_person_id_nonorm.clone())
-
-
-    # The person who provides id information (list)
-    target_id_norm_list = []
-    target_path = os.path.join(multisepcific_dir,'DST_*')
-    target_images_path = sorted(glob.glob(target_path))
-
-    for target_image_path in target_images_path:
-        img_a_whole = cv2.imread(target_image_path)
-        img_a_align_crop, _ = app.get(img_a_whole,crop_size)
-        img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
-        img_a = transformer_Arcface(img_a_align_crop_pil)
-        img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
-        # convert numpy to tensor
-        img_id = img_id.cuda()
-        #create latent id
-        img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
-        latend_id = model.netArc(img_id_downsample)
-        latend_id = F.normalize(latend_id, p=2, dim=1)
-        target_id_norm_list.append(latend_id.clone())
-
-    assert len(target_id_norm_list) == len(source_specific_id_nonorm_list), "The number of images in source and target directory must be same !!!"
-
-
-
-    video_swap(opt.video_path, target_id_norm_list,source_specific_id_nonorm_list, opt.id_thres, \
-        model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo)
+    with torch.no_grad():
+        for source_specific_image_path in source_specific_images_path:
+            specific_person_whole = cv2.imread(source_specific_image_path)
+            specific_person_align_crop, _ = app.get(specific_person_whole,crop_size)
+            specific_person_align_crop_pil = Image.fromarray(cv2.cvtColor(specific_person_align_crop[0],cv2.COLOR_BGR2RGB)) 
+            specific_person = transformer_Arcface(specific_person_align_crop_pil)
+            specific_person = specific_person.view(-1, specific_person.shape[0], specific_person.shape[1], specific_person.shape[2])
+            # convert numpy to tensor
+            specific_person = specific_person.cuda()
+            #create latent id
+            specific_person_downsample = F.interpolate(specific_person, scale_factor=0.5)
+            specific_person_id_nonorm = model.netArc(specific_person_downsample)
+            source_specific_id_nonorm_list.append(specific_person_id_nonorm.clone())
+
+
+        # The person who provides id information (list)
+        target_id_norm_list = []
+        target_path = os.path.join(multisepcific_dir,'DST_*')
+        target_images_path = sorted(glob.glob(target_path))
+
+        for target_image_path in target_images_path:
+            img_a_whole = cv2.imread(target_image_path)
+            img_a_align_crop, _ = app.get(img_a_whole,crop_size)
+            img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
+            img_a = transformer_Arcface(img_a_align_crop_pil)
+            img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
+            # convert numpy to tensor
+            img_id = img_id.cuda()
+            #create latent id
+            img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
+            latend_id = model.netArc(img_id_downsample)
+            latend_id = F.normalize(latend_id, p=2, dim=1)
+            target_id_norm_list.append(latend_id.clone())
+
+        assert len(target_id_norm_list) == len(source_specific_id_nonorm_list), "The number of images in source and target directory must be same !!!"
+
+
+
+        video_swap(opt.video_path, target_id_norm_list,source_specific_id_nonorm_list, opt.id_thres, \
+            model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask)
 
diff --git a/test_video_swapmulti.py b/test_video_swapmulti.py
@@ -44,29 +44,31 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
     app = Face_detect_crop(name='antelope', root='./insightface_func/models')
     app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))
 
-    pic_a = opt.pic_a_path
-    # img_a = Image.open(pic_a).convert('RGB')
-    img_a_whole = cv2.imread(pic_a)
-    img_a_align_crop, _ = app.get(img_a_whole,crop_size)
-    img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
-    img_a = transformer_Arcface(img_a_align_crop_pil)
-    img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
-
-    # pic_b = opt.pic_b_path
-    # img_b_whole = cv2.imread(pic_b)
-    # img_b_align_crop, b_mat = app.get(img_b_whole,crop_size)
-    # img_b_align_crop_pil = Image.fromarray(cv2.cvtColor(img_b_align_crop,cv2.COLOR_BGR2RGB)) 
-    # img_b = transformer(img_b_align_crop_pil)
-    # img_att = img_b.view(-1, img_b.shape[0], img_b.shape[1], img_b.shape[2])
-
-    # convert numpy to tensor
-    img_id = img_id.cuda()
-    # img_att = img_att.cuda()
-
-    #create latent id
-    img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
-    latend_id = model.netArc(img_id_downsample)
-    latend_id = F.normalize(latend_id, p=2, dim=1)
-
-    video_swap(opt.video_path, latend_id, model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo)
+    with torch.no_grad():
+        pic_a = opt.pic_a_path
+        # img_a = Image.open(pic_a).convert('RGB')
+        img_a_whole = cv2.imread(pic_a)
+        img_a_align_crop, _ = app.get(img_a_whole,crop_size)
+        img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
+        img_a = transformer_Arcface(img_a_align_crop_pil)
+        img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
+
+        # pic_b = opt.pic_b_path
+        # img_b_whole = cv2.imread(pic_b)
+        # img_b_align_crop, b_mat = app.get(img_b_whole,crop_size)
+        # img_b_align_crop_pil = Image.fromarray(cv2.cvtColor(img_b_align_crop,cv2.COLOR_BGR2RGB)) 
+        # img_b = transformer(img_b_align_crop_pil)
+        # img_att = img_b.view(-1, img_b.shape[0], img_b.shape[1], img_b.shape[2])
+
+        # convert numpy to tensor
+        img_id = img_id.cuda()
+        # img_att = img_att.cuda()
+
+        #create latent id
+        img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
+        latend_id = model.netArc(img_id_downsample)
+        latend_id = F.normalize(latend_id, p=2, dim=1)
+
+        video_swap(opt.video_path, latend_id, model, app, opt.output_path,temp_results_dir=opt.temp_path,\
+            no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask)
 
diff --git a/test_video_swapsingle.py b/test_video_swapsingle.py
@@ -43,30 +43,31 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
 
     app = Face_detect_crop(name='antelope', root='./insightface_func/models')
     app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))
-
-    pic_a = opt.pic_a_path
-    # img_a = Image.open(pic_a).convert('RGB')
-    img_a_whole = cv2.imread(pic_a)
-    img_a_align_crop, _ = app.get(img_a_whole,crop_size)
-    img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
-    img_a = transformer_Arcface(img_a_align_crop_pil)
-    img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
-
-    # pic_b = opt.pic_b_path
-    # img_b_whole = cv2.imread(pic_b)
-    # img_b_align_crop, b_mat = app.get(img_b_whole,crop_size)
-    # img_b_align_crop_pil = Image.fromarray(cv2.cvtColor(img_b_align_crop,cv2.COLOR_BGR2RGB)) 
-    # img_b = transformer(img_b_align_crop_pil)
-    # img_att = img_b.view(-1, img_b.shape[0], img_b.shape[1], img_b.shape[2])
-
-    # convert numpy to tensor
-    img_id = img_id.cuda()
-    # img_att = img_att.cuda()
-
-    #create latent id
-    img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
-    latend_id = model.netArc(img_id_downsample)
-    latend_id = F.normalize(latend_id, p=2, dim=1)
-
-    video_swap(opt.video_path, latend_id, model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo)
+    with torch.no_grad():
+        pic_a = opt.pic_a_path
+        # img_a = Image.open(pic_a).convert('RGB')
+        img_a_whole = cv2.imread(pic_a)
+        img_a_align_crop, _ = app.get(img_a_whole,crop_size)
+        img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
+        img_a = transformer_Arcface(img_a_align_crop_pil)
+        img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
+
+        # pic_b = opt.pic_b_path
+        # img_b_whole = cv2.imread(pic_b)
+        # img_b_align_crop, b_mat = app.get(img_b_whole,crop_size)
+        # img_b_align_crop_pil = Image.fromarray(cv2.cvtColor(img_b_align_crop,cv2.COLOR_BGR2RGB)) 
+        # img_b = transformer(img_b_align_crop_pil)
+        # img_att = img_b.view(-1, img_b.shape[0], img_b.shape[1], img_b.shape[2])
+
+        # convert numpy to tensor
+        img_id = img_id.cuda()
+        # img_att = img_att.cuda()
+
+        #create latent id
+        img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
+        latend_id = model.netArc(img_id_downsample)
+        latend_id = F.normalize(latend_id, p=2, dim=1)
+
+        video_swap(opt.video_path, latend_id, model, app, opt.output_path,temp_results_dir=opt.temp_path,\
+            no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask)
 
diff --git a/test_video_swapspecific.py b/test_video_swapspecific.py
@@ -43,42 +43,42 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
 
     app = Face_detect_crop(name='antelope', root='./insightface_func/models')
     app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))
-
-    pic_a = opt.pic_a_path
-    # img_a = Image.open(pic_a).convert('RGB')
-    img_a_whole = cv2.imread(pic_a)
-    img_a_align_crop, _ = app.get(img_a_whole,crop_size)
-    img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
-    img_a = transformer_Arcface(img_a_align_crop_pil)
-    img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
-
-    # pic_b = opt.pic_b_path
-    # img_b_whole = cv2.imread(pic_b)
-    # img_b_align_crop, b_mat = app.get(img_b_whole,crop_size)
-    # img_b_align_crop_pil = Image.fromarray(cv2.cvtColor(img_b_align_crop,cv2.COLOR_BGR2RGB)) 
-    # img_b = transformer(img_b_align_crop_pil)
-    # img_att = img_b.view(-1, img_b.shape[0], img_b.shape[1], img_b.shape[2])
-
-    # convert numpy to tensor
-    img_id = img_id.cuda()
-    # img_att = img_att.cuda()
-
-    #create latent id
-    img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
-    latend_id = model.netArc(img_id_downsample)
-    latend_id = F.normalize(latend_id, p=2, dim=1)
-
-
-    # The specific person to be swapped
-    specific_person_whole = cv2.imread(pic_specific)
-    specific_person_align_crop, _ = app.get(specific_person_whole,crop_size)
-    specific_person_align_crop_pil = Image.fromarray(cv2.cvtColor(specific_person_align_crop[0],cv2.COLOR_BGR2RGB)) 
-    specific_person = transformer_Arcface(specific_person_align_crop_pil)
-    specific_person = specific_person.view(-1, specific_person.shape[0], specific_person.shape[1], specific_person.shape[2])
-    specific_person = specific_person.cuda()
-    specific_person_downsample = F.interpolate(specific_person, scale_factor=0.5)
-    specific_person_id_nonorm = model.netArc(specific_person_downsample)
-
-    video_swap(opt.video_path, latend_id,specific_person_id_nonorm, opt.id_thres, \
-        model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo)
+    with torch.no_grad():
+        pic_a = opt.pic_a_path
+        # img_a = Image.open(pic_a).convert('RGB')
+        img_a_whole = cv2.imread(pic_a)
+        img_a_align_crop, _ = app.get(img_a_whole,crop_size)
+        img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
+        img_a = transformer_Arcface(img_a_align_crop_pil)
+        img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
+
+        # pic_b = opt.pic_b_path
+        # img_b_whole = cv2.imread(pic_b)
+        # img_b_align_crop, b_mat = app.get(img_b_whole,crop_size)
+        # img_b_align_crop_pil = Image.fromarray(cv2.cvtColor(img_b_align_crop,cv2.COLOR_BGR2RGB)) 
+        # img_b = transformer(img_b_align_crop_pil)
+        # img_att = img_b.view(-1, img_b.shape[0], img_b.shape[1], img_b.shape[2])
+
+        # convert numpy to tensor
+        img_id = img_id.cuda()
+        # img_att = img_att.cuda()
+
+        #create latent id
+        img_id_downsample = F.interpolate(img_id, scale_factor=0.5)
+        latend_id = model.netArc(img_id_downsample)
+        latend_id = F.normalize(latend_id, p=2, dim=1)
+
+
+        # The specific person to be swapped
+        specific_person_whole = cv2.imread(pic_specific)
+        specific_person_align_crop, _ = app.get(specific_person_whole,crop_size)
+        specific_person_align_crop_pil = Image.fromarray(cv2.cvtColor(specific_person_align_crop[0],cv2.COLOR_BGR2RGB)) 
+        specific_person = transformer_Arcface(specific_person_align_crop_pil)
+        specific_person = specific_person.view(-1, specific_person.shape[0], specific_person.shape[1], specific_person.shape[2])
+        specific_person = specific_person.cuda()
+        specific_person_downsample = F.interpolate(specific_person, scale_factor=0.5)
+        specific_person_id_nonorm = model.netArc(specific_person_downsample)
+
+        video_swap(opt.video_path, latend_id,specific_person_id_nonorm, opt.id_thres, \
+            model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask)