Walk back unnecessary complexity

sevagh · sevagh · commit 323aee39331d · 2021-02-26T15:27:26.000-05:00
diff --git a/headbang/consensus.py b/headbang/consensus.py
diff --git a/headbang/headbang.py b/headbang/headbang.py
@@ -1,11 +1,9 @@
-from .percussive_transients import ihpss
+from .transients import ihpss
 from .onset import OnsetDetector, ODF
-from .beattrack import ConsensusBeatTracker
+from .consensus import ConsensusBeatTracker
 from .params import DEFAULTS
 import numpy
 import madmom
-from librosa.beat import beat_track
-from essentia.standard import TempoTapMaxAgreement
 
 
 def align_beats_onsets(beats, onsets, thresh):
@@ -85,8 +83,6 @@ def __init__(
         self.power_memory_ms = power_memory_ms
         self.filter_order = filter_order
 
-        self.ttap = TempoTapMaxAgreement()
-
     def beats(self, x):
         self.beat_consensus = self.cbt.beats(x)
         if self.disable_onsets:
diff --git a/headbang/hud_tool.py b/headbang/hud_tool.py
@@ -14,6 +14,7 @@
 from tempfile import gettempdir
 from headbang.motion import OpenposeDetector, bpm_from_beats
 from headbang.params import DEFAULTS
+from essentia.standard import TempoTapMaxAgreement
 
 from headbang import HeadbangBeatTracker
 
@@ -34,36 +35,12 @@ def main():
         default=DEFAULTS["bpm_frame_history"],
         help="History of frames (in seconds) to be included in the window of current bpm computation (default=%(default)s)",
     )
-    parser.add_argument(
-        "--adaptive-prominence-ratio",
-        type=float,
-        default=DEFAULTS["adaptive_prominence_ratio"],
-        help="Peak prominence will be this*(max_ycoord-min_ycoord) (default=%(default)s)",
-    )
-    parser.add_argument(
-        "--openpose-confidence-threshold",
-        type=float,
-        default=DEFAULTS["openpose_confidence_thresh"],
-        help="Openpose keypoints above this threshold will be preserved (default=%(default)s)",
-    )
-    parser.add_argument(
-        "--object-limit",
-        type=int,
-        default=DEFAULTS["detected_object_limit"],
-        help="Number of objects to track, sorted by their net displacement (default=%(default)s)",
-    )
     parser.add_argument(
         "--event-threshold-frames",
         type=int,
         default=DEFAULTS["event_thresh_frames"],
         help="Threshold in number of frames by which an event is considered to be the same (default=%(default)s)",
     )
-    parser.add_argument(
-        "--peak-width",
-        type=int,
-        default=DEFAULTS["peak_width"],
-        help="Peak width (in frames), don't want headbangs too close together (default=%(default)s)",
-    )
     parser.add_argument(
         "--debug-motion",
         action="store_true",
@@ -98,10 +75,6 @@ def main():
     pose_tracker = OpenposeDetector(
         total_frames,
         keypoints=args.keypoints,
-        obj_limit=args.object_limit,
-        adaptive_prominence_ratio=args.adaptive_prominence_ratio,
-        openpose_confidence_threshold=args.openpose_confidence_threshold,
-        peak_width=args.peak_width,
     )
 
     fps = cap.get(cv2.CAP_PROP_FPS)
@@ -176,23 +149,32 @@ def process_first_pass(*args, **kwargs):
     all_time = numpy.linspace(0, frame_duration * total_frames, int(total_frames))
 
     # take top peaks only
-    peaks = pose_tracker.find_peaks()[0][1]
+    print("Getting peaks of y motion")
+    peaks = pose_tracker.find_peaks()
     bop_locations = all_time[peaks]
 
+    if args.debug_motion:
+        print("Displaying debug y coordinate plot")
+        pose_tracker.plot_ycoords()
+    else:
+        ttap = TempoTapMaxAgreement()
+        # choose best aligning peaks
+        best_peaks = None
+        for i, pks in enumerate(all_peaks):
+            _, peaks = pks
+            peak_times = all_times[peaks]
+            beat_consensus, _ = ttap([all_beat_locations, peak_times])
+            print("FOR PEAKS: {0}, consensus: {1}".format(i, len(beat_consensus)))
+
     event_thresh = args.event_threshold_frames * frame_duration
 
     print("Marking beat and head bop positions on output frames")
 
-    all_beats_bpm = 0
-    strong_beats_bpm = 0
-    bop_bpm = 0
-
     print("run a gc, just in case...")
     gc.collect()
 
     # define a function to filter the first video to add more stuff
     def process_second_pass(get_frame_fn, frame_time):
-        nonlocal all_beats_bpm, bop_bpm, strong_beats_bpm
         frame = get_frame_fn(frame_time)
 
         frame_max = frame_time
@@ -216,18 +198,14 @@ def process_second_pass(get_frame_fn, frame_time):
             numpy.where((bop_locations >= frame_min) & (bop_locations <= frame_max))
         ]
 
-        all_beats_bpm_tmp = bpm_from_beats(all_beat_history)
-        bop_bpm_tmp = bpm_from_beats(bop_history)
-
-        if not numpy.isnan(all_beats_bpm_tmp):
-            all_beats_bpm = all_beats_bpm_tmp
-
-        if not numpy.isnan(bop_bpm_tmp):
-            bop_bpm = bop_bpm_tmp
+        all_beats_bpm = bpm_from_beats(all_beat_history)
+        bop_bpm = bpm_from_beats(bop_history)
 
         is_strong_beat = False
         is_beat = False
         is_bop = False
+        is_bop_debug2 = False
+        is_bop_debug3 = False
         if any(
             [b for b in all_beat_locations if numpy.abs(b - frame_time) <= event_thresh]
         ):
@@ -333,6 +311,3 @@ def process_second_pass(get_frame_fn, frame_time):
 
     print("cleaning up tmp mp4")
     os.remove(tmp_mp4)
-
-    if args.debug_motion:
-        pose_tracker.plot_ycoords()
diff --git a/headbang/motion.py b/headbang/motion.py
@@ -2,9 +2,8 @@
 import sys
 import os
 import scipy
-from scipy.signal import find_peaks, peak_prominences
+from scipy.signal import find_peaks_cwt
 import matplotlib.pyplot as plt
-from defaultlist import defaultlist
 from headbang.params import DEFAULTS
 
 openpose_install_path = "/home/sevagh/thirdparty-repos/openpose"
@@ -17,30 +16,23 @@
 
 class OpenposeDetector:
     undef_coord_default = numpy.nan
+    object_limit = 3
+    min_confidence = 0.5
 
     def __init__(
         self,
         n_frames,
         keypoints=DEFAULTS["pose_keypoints"],
-        obj_limit=DEFAULTS["detected_object_limit"],
-        adaptive_prominence_ratio=DEFAULTS["adaptive_prominence_ratio"],
-        openpose_confidence_threshold=DEFAULTS["openpose_confidence_thresh"],
-        peak_width=DEFAULTS["peak_width"],
     ):
         config = {}
-        # config["dir"] = openpose_install_path
         config["logging_level"] = 3
-        config["net_resolution"] = "320x320"  # 320x176
-        # config["output_resolution"] = "-1x768"  # 320x176
+        config["net_resolution"] = "320x320"
         config["model_pose"] = "BODY_25"
         config["alpha_pose"] = 0.6
         config["scale_gap"] = 0.3
         config["scale_number"] = 1
-        # config["keypoint_scale"] = 4 # scale to -1,1
         config["render_threshold"] = 0.05
-        config[
-            "num_gpu_start"
-        ] = 0  # If GPU version is built, and multiple GPUs are available, set the ID here
+        config["num_gpu_start"] = 0
         config["disable_blending"] = False
 
         config["model_folder"] = openpose_dir + "/models/"
@@ -51,13 +43,8 @@ def __init__(
         self.keypoints = [int(i) for i in keypoints.split(",")]
 
         self.n_frames = int(n_frames)
-        self.all_y_coords = [[OpenposeDetector.undef_coord_default] * self.n_frames]
+        self.all_y_coords = [OpenposeDetector.undef_coord_default] * self.n_frames
         self.frame_idx = 0
-        self.obj_limit = obj_limit
-
-        self.confidence_threshold = openpose_confidence_threshold
-        self.adaptive_prominence_ratio = adaptive_prominence_ratio
-        self.peak_width = peak_width
 
     def detect_pose(self, image):
         datum = op.Datum()
@@ -68,76 +55,40 @@ def detect_pose(self, image):
         return datum.poseKeypoints, datum.cvOutputData
 
     def process_frame(self, frame):
-        tracked_objects = None
         multiple_detected_poses, outframe = self.detect_pose(frame)
 
-        median_x = None
-        median_y = None
-
         if multiple_detected_poses is not None:
-            # array of (x, y) coordinates of the head/neck
-            multiple_poses_of_interest = [
-                [
-                    (d[0], d[1])
-                    for i, d in enumerate(single_detected_poses)
-                    if i in self.keypoints and d[2] > self.confidence_threshold
-                ]
-                for single_detected_poses in multiple_detected_poses
-            ]
-
-            if multiple_poses_of_interest:
-                for i, poses_of_interest in enumerate(multiple_poses_of_interest):
-                    poses_of_interest = numpy.asarray(poses_of_interest)
-                    median_coords = numpy.median(poses_of_interest, axis=0)
-                    if not numpy.any(numpy.isnan(median_coords)):
-                        median_y = median_coords[1]
-                        y_norm = median_y / frame.shape[0]
-                        try:
-                            self.all_y_coords[i][self.frame_idx] = y_norm
-                        except IndexError:
-                            self.all_y_coords.append(
-                                [OpenposeDetector.undef_coord_default] * self.n_frames
-                            )
-                            self.all_y_coords[i][self.frame_idx] = y_norm
+            poses_of_interest = []
+
+            # collect (x, y) coordinates of the head, median across the first object_limit objects
+            for detected_poses in multiple_detected_poses[
+                : OpenposeDetector.object_limit
+            ]:
+                for keypoint, d in enumerate(detected_poses):
+                    if (
+                        keypoint in self.keypoints
+                        and d[2] > OpenposeDetector.min_confidence
+                    ):
+                        poses_of_interest.append((d[0], d[1]))
+
+            poses_of_interest = numpy.asarray(poses_of_interest)
+            median_coords = numpy.median(poses_of_interest, axis=0)
+
+            if not numpy.any(numpy.isnan(median_coords)):
+                median_y = median_coords[1]
+                y_norm = median_y / frame.shape[0]
+                self.all_y_coords[self.frame_idx] = y_norm
 
         self.frame_idx += 1
         return outframe
 
     def find_peaks(self):
-        peaks = [None] * len(self.all_y_coords)
-        prominences = [None] * len(self.all_y_coords)
-        adjusted_y_coords = [None] * len(self.all_y_coords)
-
-        for i, y_coords in enumerate(self.all_y_coords):
-            min_coord = numpy.nanmin(y_coords)
-            max_coord = numpy.nanmax(y_coords)
-
-            # adaptive peak prominence - X% of max displacement
-            adaptive_prominence = self.adaptive_prominence_ratio * (
-                max_coord - min_coord
-            )
-
-            adjusted_y_coords[i] = numpy.nan_to_num(y_coords, nan=min_coord)
+        min_coord = numpy.nanmin(self.all_y_coords)
+        adjusted_y_coords = numpy.nan_to_num(self.all_y_coords, nan=min_coord)
 
-            peaks[i], _ = find_peaks(
-                adjusted_y_coords[i],
-                prominence=adaptive_prominence,
-                wlen=self.peak_width,
-            )
-
-            prominences[i], _, _ = peak_prominences(adjusted_y_coords[i], peaks[i])
-
-        top_ycoords_and_peaks = [
-            (ycrds, pks)
-            for _, pks, ycrds in sorted(
-                zip(prominences, peaks, adjusted_y_coords),
-                key=lambda triplet: sum(triplet[0]),
-                reverse=True,
-            )
-        ]
-
-        # only track up to obj_limit objects
-        return top_ycoords_and_peaks[: self.obj_limit]
+        # wavelets are good for peaks
+        # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2631518/
+        return find_peaks_cwt(adjusted_y_coords, numpy.arange(5, 10))
 
     def plot_ycoords(self):
         plt.figure(1)
@@ -147,20 +98,17 @@ def plot_ycoords(self):
         plt.ylabel("y coord")
 
         frames = numpy.arange(self.n_frames)
-        best_coords_and_peaks = self.find_peaks()
-
-        for i, coordspeaks in enumerate(best_coords_and_peaks):
-            y_coords, peaks = coordspeaks
-            y_coords = numpy.asarray(y_coords)
-            plt.plot(
-                frames,
-                y_coords,
-                "-D",
-                label="obj {0}".format(i),
-                markevery=peaks,
-                mec="black",
-                mfc="black",
-            )
+        peaks = self.find_peaks()
+
+        y_coords = numpy.asarray(self.all_y_coords)
+
+        plt.plot(
+            frames,
+            y_coords,
+            "-D",
+            markevery=peaks,
+            mec="black",
+        )
 
         plt.legend()
         plt.show()
diff --git a/headbang/params.py b/headbang/params.py
@@ -13,11 +13,7 @@
     "release_ms": 20,
     "power_memory_ms": 1,
     "filter_order": 3,
-    "bpm_frame_history": 3.0,
+    "bpm_frame_history": 2.0,
     "pose_keypoints": "0,15,16,17,18",
     "event_thresh_frames": 2,
-    "detected_object_limit": 1,
-    "adaptive_prominence_ratio": 0.5,
-    "openpose_confidence_thresh": 0.5,
-    "peak_width": 3,
 }
diff --git a/headbang/transients.py b/headbang/transients.py
diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,3 @@ moviepy==1.0.3
 essentia==2.1b6.dev374
 madmom==0.16.1
 scipy==1.6.0
-defaultlist