-
Notifications
You must be signed in to change notification settings - Fork 127
/
stage2.py
173 lines (135 loc) · 6.04 KB
/
stage2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import cv2
import os
import glob
import shutil
import numpy as np
import math
#---------------------------------
# Copied from PySceneDetect
def mean_pixel_distance(left: np.ndarray, right: np.ndarray) -> float:
"""Return the mean average distance in pixel values between `left` and `right`.
Both `left and `right` should be 2 dimensional 8-bit images of the same shape.
"""
assert len(left.shape) == 2 and len(right.shape) == 2
assert left.shape == right.shape
num_pixels: float = float(left.shape[0] * left.shape[1])
return (np.sum(np.abs(left.astype(np.int32) - right.astype(np.int32))) / num_pixels)
def estimated_kernel_size(frame_width: int, frame_height: int) -> int:
"""Estimate kernel size based on video resolution."""
size: int = 4 + round(math.sqrt(frame_width * frame_height) / 192)
if size % 2 == 0:
size += 1
return size
_kernel = None
def _detect_edges(lum: np.ndarray) -> np.ndarray:
global _kernel
"""Detect edges using the luma channel of a frame.
Arguments:
lum: 2D 8-bit image representing the luma channel of a frame.
Returns:
2D 8-bit image of the same size as the input, where pixels with values of 255
represent edges, and all other pixels are 0.
"""
# Initialize kernel.
if _kernel is None:
kernel_size = estimated_kernel_size(lum.shape[1], lum.shape[0])
_kernel = np.ones((kernel_size, kernel_size), np.uint8)
# Estimate levels for thresholding.
sigma: float = 1.0 / 3.0
median = np.median(lum)
low = int(max(0, (1.0 - sigma) * median))
high = int(min(255, (1.0 + sigma) * median))
# Calculate edges using Canny algorithm, and reduce noise by dilating the edges.
# This increases edge overlap leading to improved robustness against noise and slow
# camera movement. Note that very large kernel sizes can negatively affect accuracy.
edges = cv2.Canny(lum, low, high)
return cv2.dilate(edges, _kernel)
#---------------------------------
def detect_edges(img_path, mask_path, is_invert_mask):
im = cv2.imread(img_path)
if mask_path:
mask = cv2.imread(mask_path)[:,:,0]
mask = mask[:, :, np.newaxis]
im = im * ( (mask == 0) if is_invert_mask else (mask > 0) )
# im = im * (mask/255)
# im = im.astype(np.uint8)
# cv2.imwrite( os.path.join( os.path.dirname(mask_path) , "tmp.png" ) , im)
hue, sat, lum = cv2.split(cv2.cvtColor( im , cv2.COLOR_BGR2HSV))
return _detect_edges(lum)
def get_mask_path_of_img(img_path, mask_dir):
img_basename = os.path.basename(img_path)
mask_path = os.path.join( mask_dir , img_basename )
return mask_path if os.path.isfile( mask_path ) else None
def analyze_key_frames(png_dir, mask_dir, th, min_gap, max_gap, add_last_frame, is_invert_mask):
keys = []
frames = sorted(glob.glob( os.path.join(png_dir, "[0-9]*.png") ))
key_frame = frames[0]
keys.append( int(os.path.splitext(os.path.basename(key_frame))[0]) )
key_edges = detect_edges( key_frame, get_mask_path_of_img( key_frame, mask_dir ), is_invert_mask )
gap = 0
for frame in frames:
gap += 1
if gap < min_gap:
continue
edges = detect_edges( frame, get_mask_path_of_img( frame, mask_dir ), is_invert_mask )
delta = mean_pixel_distance( edges, key_edges )
_th = th * (max_gap - gap)/max_gap
if _th < delta:
basename_without_ext = os.path.splitext(os.path.basename(frame))[0]
keys.append( int(basename_without_ext) )
key_frame = frame
key_edges = edges
gap = 0
if add_last_frame:
basename_without_ext = os.path.splitext(os.path.basename(frames[-1]))[0]
last_frame = int(basename_without_ext)
if not last_frame in keys:
keys.append( last_frame )
return keys
def remove_pngs_in_dir(path):
if not os.path.isdir(path):
return
pngs = glob.glob( os.path.join(path, "*.png") )
for png in pngs:
os.remove(png)
def ebsynth_utility_stage2(dbg, project_args, key_min_gap, key_max_gap, key_th, key_add_last_frame, is_invert_mask):
dbg.print("stage2")
dbg.print("")
_, original_movie_path, frame_path, frame_mask_path, org_key_path, _, _ = project_args
remove_pngs_in_dir(org_key_path)
os.makedirs(org_key_path, exist_ok=True)
fps = 30
clip = cv2.VideoCapture(original_movie_path)
if clip:
fps = clip.get(cv2.CAP_PROP_FPS)
clip.release()
if key_min_gap == -1:
key_min_gap = int(10 * fps/30)
else:
key_min_gap = max(1, key_min_gap)
key_min_gap = int(key_min_gap * fps/30)
if key_max_gap == -1:
key_max_gap = int(300 * fps/30)
else:
key_max_gap = max(10, key_max_gap)
key_max_gap = int(key_max_gap * fps/30)
key_min_gap,key_max_gap = (key_min_gap,key_max_gap) if key_min_gap < key_max_gap else (key_max_gap,key_min_gap)
dbg.print("fps: {}".format(fps))
dbg.print("key_min_gap: {}".format(key_min_gap))
dbg.print("key_max_gap: {}".format(key_max_gap))
dbg.print("key_th: {}".format(key_th))
keys = analyze_key_frames(frame_path, frame_mask_path, key_th, key_min_gap, key_max_gap, key_add_last_frame, is_invert_mask)
dbg.print("keys : " + str(keys))
for k in keys:
filename = str(k).zfill(5) + ".png"
shutil.copy( os.path.join( frame_path , filename) , os.path.join(org_key_path, filename) )
dbg.print("")
dbg.print("Keyframes are output to [" + org_key_path + "]")
dbg.print("")
dbg.print("[Ebsynth Utility]->[configuration]->[stage 2]->[Threshold of delta frame edge]")
dbg.print("The smaller this value, the narrower the keyframe spacing, and if set to 0, the keyframes will be equally spaced at the value of [Minimum keyframe gap].")
dbg.print("")
dbg.print("If you do not like the selection, you can modify it manually.")
dbg.print("(Delete keyframe, or Add keyframe from ["+frame_path+"])")
dbg.print("")
dbg.print("completed.")