Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for multiple faces #566

Merged
merged 7 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ models/inswapper_128.onnx
models/GFPGANv1.4.pth
*.onnx
models/DMDNet.pth
faceswap/
.vscode/
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ options:
--keep-audio keep original audio
--keep-frames keep temporary frames
--many-faces process every face
--map-faces map source target faces
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (documentation): Consider clarifying the description of the --map-faces option

The current description 'map source target faces' is a bit ambiguous. Consider rephrasing to 'map source to target faces' for improved clarity while maintaining brevity.

Suggested change
--map-faces map source target faces
--map-faces map source to target faces

--nsfw-filter filter the NSFW image or video
--video-encoder {libx264,libx265,libvpx-vp9} adjust output video encoder
--video-quality [0-51] adjust output video quality
Expand Down
32 changes: 32 additions & 0 deletions modules/cluster_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from typing import Any


def find_cluster_centroids(embeddings, max_k=10) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (performance): Optimize the cluster centroid finding algorithm

The current implementation runs K-means for every K up to max_k, which could be inefficient for large datasets. Consider using a more efficient method for determining the optimal number of clusters, such as the elbow method or silhouette analysis, without running K-means for every K.

from typing import List, Tuple
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

def find_cluster_centroids(embeddings, max_k=10) -> List[Tuple[int, List[float]]]:
    best_k = 2
    best_score = -1
    for k in range(2, max_k + 1):
        kmeans = KMeans(n_clusters=k, random_state=42).fit(embeddings)
        score = silhouette_score(embeddings, kmeans.labels_)
        if score > best_score:
            best_score, best_k = score, k
    final_kmeans = KMeans(n_clusters=best_k, random_state=42).fit(embeddings)
    return list(enumerate(final_kmeans.cluster_centers_.tolist()))

inertia = []
cluster_centroids = []
K = range(1, max_k+1)

for k in K:
kmeans = KMeans(n_clusters=k, random_state=0)
kmeans.fit(embeddings)
inertia.append(kmeans.inertia_)
cluster_centroids.append({"k": k, "centroids": kmeans.cluster_centers_})

diffs = [inertia[i] - inertia[i+1] for i in range(len(inertia)-1)]
optimal_centroids = cluster_centroids[diffs.index(max(diffs)) + 1]['centroids']

return optimal_centroids

def find_closest_centroid(centroids: list, normed_face_embedding) -> list:
try:
centroids = np.array(centroids)
normed_face_embedding = np.array(normed_face_embedding)
similarities = np.dot(centroids, normed_face_embedding)
closest_centroid_index = np.argmax(similarities)

return closest_centroid_index, centroids[closest_centroid_index]
except ValueError:
return None
13 changes: 9 additions & 4 deletions modules/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def parse_args() -> None:
program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true', default=False)
program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true', default=False)
program.add_argument('--nsfw-filter', help='filter the NSFW image or video', dest='nsfw_filter', action='store_true', default=False)
program.add_argument('--map-faces', help='map source target faces', dest='map_faces', action='store_true', default=False)
program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx264', choices=['libx264', 'libx265', 'libvpx-vp9'])
program.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18, choices=range(52), metavar='[0-51]')
program.add_argument('--live-mirror', help='The live camera display as you see it in the front-facing camera frame', dest='live_mirror', action='store_true', default=False)
Expand Down Expand Up @@ -67,6 +68,7 @@ def parse_args() -> None:
modules.globals.keep_frames = args.keep_frames
modules.globals.many_faces = args.many_faces
modules.globals.nsfw_filter = args.nsfw_filter
modules.globals.map_faces = args.map_faces
modules.globals.video_encoder = args.video_encoder
modules.globals.video_quality = args.video_quality
modules.globals.live_mirror = args.live_mirror
Expand Down Expand Up @@ -194,10 +196,13 @@ def start() -> None:
# process image to videos
if modules.globals.nsfw_filter and ui.check_and_ignore_nsfw(modules.globals.target_path, destroy):
return
update_status('Creating temp resources...')
create_temp(modules.globals.target_path)
update_status('Extracting frames...')
extract_frames(modules.globals.target_path)

if not modules.globals.map_faces:
update_status('Creating temp resources...')
create_temp(modules.globals.target_path)
update_status('Extracting frames...')
extract_frames(modules.globals.target_path)

temp_frame_paths = get_temp_frame_paths(modules.globals.target_path)
for frame_processor in get_frame_processors_modules(modules.globals.frame_processors):
update_status('Progressing...', frame_processor.NAME)
Expand Down
158 changes: 158 additions & 0 deletions modules/face_analyser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import os
import shutil
from typing import Any
import insightface

import cv2
import numpy as np
import modules.globals
from tqdm import tqdm
from modules.typing import Frame
from modules.cluster_analysis import find_cluster_centroids, find_closest_centroid
from modules.utilities import get_temp_directory_path, create_temp, extract_frames, clean_temp, get_temp_frame_paths
from pathlib import Path

FACE_ANALYSER = None

Expand All @@ -29,3 +37,153 @@ def get_many_faces(frame: Frame) -> Any:
return get_face_analyser().get(frame)
except IndexError:
return None

def has_valid_map() -> bool:
for map in modules.globals.souce_target_map:
if "source" in map and "target" in map:
return True
return False

def default_source_face() -> Any:
for map in modules.globals.souce_target_map:
if "source" in map:
return map['source']['face']
return None

def simplify_maps() -> Any:
centroids = []
faces = []
for map in modules.globals.souce_target_map:
if "source" in map and "target" in map:
centroids.append(map['target']['face'].normed_embedding)
faces.append(map['source']['face'])

modules.globals.simple_map = {'source_faces': faces, 'target_embeddings': centroids}
return None

def add_blank_map() -> Any:
try:
max_id = -1
if len(modules.globals.souce_target_map) > 0:
max_id = max(modules.globals.souce_target_map, key=lambda x: x['id'])['id']

modules.globals.souce_target_map.append({
'id' : max_id + 1
})
except ValueError:
return None

def get_unique_faces_from_target_image() -> Any:
try:
modules.globals.souce_target_map = []
target_frame = cv2.imread(modules.globals.target_path)
many_faces = get_many_faces(target_frame)
i = 0

for face in many_faces:
x_min, y_min, x_max, y_max = face['bbox']
modules.globals.souce_target_map.append({
'id' : i,
'target' : {
'cv2' : target_frame[int(y_min):int(y_max), int(x_min):int(x_max)],
'face' : face
}
})
i = i + 1
except ValueError:
return None


def get_unique_faces_from_target_video() -> Any:
try:
modules.globals.souce_target_map = []
frame_face_embeddings = []
face_embeddings = []

print('Creating temp resources...')
clean_temp(modules.globals.target_path)
create_temp(modules.globals.target_path)
print('Extracting frames...')
extract_frames(modules.globals.target_path)

temp_frame_paths = get_temp_frame_paths(modules.globals.target_path)

i = 0
for temp_frame_path in tqdm(temp_frame_paths, desc="Extracting face embeddings from frames"):
temp_frame = cv2.imread(temp_frame_path)
many_faces = get_many_faces(temp_frame)

for face in many_faces:
face_embeddings.append(face.normed_embedding)

frame_face_embeddings.append({'frame': i, 'faces': many_faces, 'location': temp_frame_path})
i += 1

centroids = find_cluster_centroids(face_embeddings)

for frame in frame_face_embeddings:
for face in frame['faces']:
closest_centroid_index, _ = find_closest_centroid(centroids, face.normed_embedding)
face['target_centroid'] = closest_centroid_index

for i in range(len(centroids)):
modules.globals.souce_target_map.append({
'id' : i
})

temp = []
for frame in tqdm(frame_face_embeddings, desc=f"Mapping frame embeddings to centroids-{i}"):
temp.append({'frame': frame['frame'], 'faces': [face for face in frame['faces'] if face['target_centroid'] == i], 'location': frame['location']})

modules.globals.souce_target_map[i]['target_faces_in_frame'] = temp

# dump_faces(centroids, frame_face_embeddings)
default_target_face()
except ValueError:
return None


def default_target_face():
for map in modules.globals.souce_target_map:
best_face = None
best_frame = None
for frame in map['target_faces_in_frame']:
if len(frame['faces']) > 0:
best_face = frame['faces'][0]
best_frame = frame
break

for frame in map['target_faces_in_frame']:
for face in frame['faces']:
if face['det_score'] > best_face['det_score']:
best_face = face
best_frame = frame

x_min, y_min, x_max, y_max = best_face['bbox']

target_frame = cv2.imread(best_frame['location'])
map['target'] = {
'cv2' : target_frame[int(y_min):int(y_max), int(x_min):int(x_max)],
'face' : best_face
}


def dump_faces(centroids: Any, frame_face_embeddings: list):
temp_directory_path = get_temp_directory_path(modules.globals.target_path)

for i in range(len(centroids)):
if os.path.exists(temp_directory_path + f"/{i}") and os.path.isdir(temp_directory_path + f"/{i}"):
shutil.rmtree(temp_directory_path + f"/{i}")
Path(temp_directory_path + f"/{i}").mkdir(parents=True, exist_ok=True)

for frame in tqdm(frame_face_embeddings, desc=f"Copying faces to temp/./{i}"):
temp_frame = cv2.imread(frame['location'])

j = 0
for face in frame['faces']:
if face['target_centroid'] == i:
x_min, y_min, x_max, y_max = face['bbox']

if temp_frame[int(y_min):int(y_max), int(x_min):int(x_max)].size > 0:
cv2.imwrite(temp_directory_path + f"/{i}/{frame['frame']}_{j}.png", temp_frame[int(y_min):int(y_max), int(x_min):int(x_max)])
j += 1
6 changes: 5 additions & 1 deletion modules/globals.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import List, Dict
from typing import List, Dict, Any

ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
WORKFLOW_DIR = os.path.join(ROOT_DIR, 'workflow')
Expand All @@ -9,6 +9,9 @@
('Video', ('*.mp4','*.mkv'))
]

souce_target_map = []
simple_map = {}

source_path = None
target_path = None
output_path = None
Expand All @@ -17,6 +20,7 @@
keep_audio = None
keep_frames = None
many_faces = None
map_faces = None
color_correction = None # New global variable for color correction toggle
nsfw_filter = None
video_encoder = None
Expand Down
Loading