Smoother motion BPM

sevagh · sevagh · commit 2650f8fc1404 · 2021-02-25T11:22:48.000-05:00
diff --git a/bin/annotate_beats.py b/bin/annotate_beats.py
@@ -8,7 +8,7 @@
 import librosa
 import sys
 import argparse
-from headbang.util import load_wav
+from headbang.util import load_wav, overlay_clicks
 
 
 class Player:
@@ -111,15 +111,9 @@ def main():
 
     player.terminate_processing()
 
-    x = load_wav(args.wav_in, stereo=True)
-
     print("annotated beat locations: {0}".format(beats))
-    beat_clicks = librosa.clicks(beats, sr=44100, length=len(x))
-
-    if x.shape[0] == 1:
-        beat_clicks = numpy.column_stack((beat_clicks, beat_clicks))
-
-    beat_waveform = (x + beat_clicks).astype(numpy.single)
+    x = load_wav(args.wav_in, stereo=True)
+    beat_waveform = overlay_clicks(x, beats)
 
     print("Writing outputs with clicks to {0}".format(args.beat_wav_out))
     write_wave_file(beat_waveform, args.beat_wav_out, sample_rate=44100)
diff --git a/bin/beat_track.py b/bin/beat_track.py
@@ -10,7 +10,7 @@
 from madmom.io.audio import write_wave_file
 
 from headbang import HeadbangBeatTracker
-from headbang.util import load_wav
+from headbang.util import load_wav, overlay_clicks
 from headbang.params import DEFAULTS
 
 
@@ -26,27 +26,27 @@ def main():
         "--algorithms",
         type=str,
         default=DEFAULTS["algorithms"],
-        help="List of beat tracking algorithms to apply",
+        help="List of beat tracking algorithms to apply (default=%(default)s)",
     )
     beat_args.add_argument(
         "--onset-align-threshold-s",
         type=float,
         default=DEFAULTS["onset_align_threshold_s"],
-        help="How close beats should align with onsets (in seconds)",
+        help="How close beats should align with onsets (in seconds) (default=%(default)s)",
     )
 
     onset_args = parser.add_argument_group("onsets arguments")
     onset_args.add_argument(
         "--max-no-beats",
         type=float,
         default=DEFAULTS["max_no_beats"],
-        help="Segments with missing beats to substitute onsets",
+        help="Segments with missing beats to substitute onsets (default=%(default)s)",
     )
     onset_args.add_argument(
         "--onset-near-threshold-s",
         type=float,
         default=DEFAULTS["onset_near_threshold_s"],
-        help="How close onsets should be (in seconds) when supplementing onset information",
+        help="How close onsets should be (in seconds) when supplementing onset information (default=%(default)s)",
     )
     onset_args.add_argument(
         "--onset-silence-threshold",
@@ -59,7 +59,7 @@ def main():
         "--n-pool",
         type=int,
         default=multiprocessing.cpu_count() - 1,
-        help="How many threads to use in multiprocessing pool",
+        help="How many threads to use in multiprocessing pool (default=%(default)s)",
     )
     parser.add_argument(
         "--show-plots",
@@ -77,62 +77,68 @@ def main():
         help="disable transient shaping, only use percussive separation",
     )
     parser.add_argument(
-        "--beats-out", type=str, default="", help="output beats txt file"
+        "--beats-out",
+        type=str,
+        default="",
+        help="output beats txt file (default=%(default)s)",
     )
 
     hpss_args = parser.add_argument_group("hpss arguments")
     hpss_args.add_argument(
         "--harmonic-margin",
         type=float,
         default=DEFAULTS["harmonic_margin"],
-        help="Separation margin for HPSS harmonic iteration",
+        help="Separation margin for HPSS harmonic iteration (default=%(default)s)",
     )
     hpss_args.add_argument(
         "--harmonic-frame",
         type=int,
         default=DEFAULTS["harmonic_frame"],
-        help="T-F/frame size for HPSS harmonic iteration",
+        help="T-F/frame size for HPSS harmonic iteration (default=%(default)s)",
     )
     hpss_args.add_argument(
         "--percussive-margin",
         type=float,
         default=DEFAULTS["percussive_margin"],
-        help="Separation margin for HPSS percussive iteration",
+        help="Separation margin for HPSS percussive iteration (default=%(default)s)",
     )
     hpss_args.add_argument(
         "--percussive-frame",
         type=int,
         default=DEFAULTS["percussive_frame"],
-        help="T-F/frame size for HPSS percussive iteration",
+        help="T-F/frame size for HPSS percussive iteration (default=%(default)s)",
     )
 
     tshaper_args = parser.add_argument_group("multiband transient shaper arguments")
     tshaper_args.add_argument(
         "--fast-attack-ms",
         type=int,
         default=DEFAULTS["fast_attack_ms"],
-        help="Fast attack (ms)",
+        help="Fast attack (ms) (default=%(default)s)",
     )
     tshaper_args.add_argument(
         "--slow-attack-ms",
         type=int,
         default=DEFAULTS["slow_attack_ms"],
-        help="Slow attack (ms)",
+        help="Slow attack (ms) (default=%(default)s)",
     )
     tshaper_args.add_argument(
-        "--release-ms", type=int, default=DEFAULTS["release_ms"], help="Release (ms)"
+        "--release-ms",
+        type=int,
+        default=DEFAULTS["release_ms"],
+        help="Release (ms) (default=%(default)s)",
     )
     tshaper_args.add_argument(
         "--power-memory-ms",
         type=int,
         default=DEFAULTS["power_memory_ms"],
-        help="Power filter memory (ms)",
+        help="Power filter memory (ms) (default=%(default)s)",
     )
     tshaper_args.add_argument(
         "--filter-order",
         type=int,
         default=DEFAULTS["filter_order"],
-        help="Bandpass (butter) filter order",
+        help="Bandpass (butter) filter order (default=%(default)s)",
     )
 
     parser.add_argument("wav_in", help="input wav file")
@@ -180,18 +186,11 @@ def main():
                 f.write(f"{b}\n")
 
     print("Overlaying clicks at beat locations")
-    clicks = librosa.clicks(beats, sr=44100, length=len(x))
-
-    # if stereo, write it that way for higher quality
     x_stereo = load_wav(args.wav_in, stereo=True)
-
-    if len(x_stereo.shape) > 1 and x_stereo.shape[1] == 2:
-        clicks = numpy.column_stack((clicks, clicks))  # convert to stereo
-
-    final_waveform = (x_stereo + clicks).astype(numpy.single)
+    x_with_clicks = overlay_clicks(x_stereo, beats)
 
     print("Writing output with clicks to {0}".format(args.wav_out))
-    write_wave_file(final_waveform, args.wav_out, sample_rate=44100)
+    write_wave_file(x_with_clicks, args.wav_out, sample_rate=44100)
 
     if args.show_plots:
         print("Displaying plots")
diff --git a/bin/reference_beats.py b/bin/reference_beats.py
@@ -6,7 +6,7 @@
 import numpy
 import librosa
 from madmom.io.audio import load_audio_file, write_wave_file
-from headbang.util import load_wav
+from headbang.util import load_wav, overlay_clicks
 from headbang.beattrack import apply_single_beat_tracker, algo_names
 import madmom
 
@@ -19,10 +19,16 @@ def main():
     )
 
     parser.add_argument(
-        "--algorithm", type=int, default=1, help="which single algorithm to use"
+        "--algorithm",
+        type=int,
+        default=1,
+        help="which single algorithm to use (default=%(default)s)",
     )
     parser.add_argument(
-        "--filter-order", type=int, default=2, help="butter filter order"
+        "--filter-order",
+        type=int,
+        default=2,
+        help="butter filter order (default=%(default)s)",
     )
     parser.add_argument("wav_in", help="input wav file")
     parser.add_argument("beat_wav_out", help="output beat wav file")
@@ -36,10 +42,7 @@ def main():
     beat_times = apply_single_beat_tracker(x, args.algorithm)
 
     print("Overlaying clicks at beat locations")
-
-    beat_clicks = librosa.clicks(beat_times, sr=44100, length=len(x))
-
-    beat_waveform = (x + beat_clicks).astype(numpy.single)
+    beat_waveform = overlay_clicks(x, beat_times)
 
     print("Writing outputs with clicks to {0}".format(args.beat_wav_out))
     write_wave_file(beat_waveform, args.beat_wav_out, sample_rate=44100)
diff --git a/docs/index.md b/docs/index.md
@@ -95,7 +95,7 @@ The HeadbangBeatTracker first gathers beats using the ConsensusBeatTracker, and
 
 ![percussive_hpss](percussive_hpss.png)
 
-The algorithm used is one based on median filtering the spectrogram, originally described in [[9]](#9), and further improved with iterative algorithm in [[10]](#10). I use the [librosa implementation](https://librosa.org/doc/0.8.0/generated/librosa.decompose.hpss.html), which has a good visualization of the effect.
+The algorithm used is one based on median filtering the spectrogram, originally described in [[9]](#9), and further improved with an iterative algorithm in [[10]](#10). I use the [librosa implementation](https://librosa.org/doc/0.8.0/generated/librosa.decompose.hpss.html), which has a good visualization of the effect.
 
 The parameters of the HPSS can be modified (but I don't find it changes the results significantly):
 * `harmonic_frame=16384` (a larger frame size in the first iteration gives us higher frequency resolution which helps separate pitched, harmonic components)
@@ -197,11 +197,6 @@ Here's a table of some interesting outputs of headbang's algorithms:
       <td>{% include embed-audio.html src="themixture_dbn.wav" %}</td>
       <td>{% include embed-audio.html src="themixture_hbt.wav" %}</td>
     </tr>
-    <tr>
-      <td><a href="https://www.youtube.com/watch?v=Pru_5HW9Ofg">Vitalism - Luxata</a></td>
-      <td>{% include embed-audio.html src="luxata_dbn.wav" %}</td>
-      <td>{% include embed-audio.html src="luxata_hbt.wav" %}</td>
-    </tr>
     <tr>
       <td><a href="https://www.youtube.com/watch?v=8niG0ta4jZs">Anup Sastry - Origin</a></td>
       <td>{% include embed-audio.html src="origin_dbn.wav" %}</td>
@@ -394,11 +389,6 @@ for bpm in bpms:
 
 Note the off-beat head bops at first, which transition to being on-beat.
 
-[Keith Ape - It G Ma](https://www.youtube.com/watch?v=Ls9QJEE0Drw):
-{% include embed-video.html src="it_g_ma_short.mp4" %}
-
-Although all the talk is about metal so far, it works well on the above rap song.
-
 Here's a clip from drummer [Anup Sastry - Titan](https://www.youtube.com/watch?v=Y82rls0yoAM), where the motion is tracked on the left and right arms (instead of the head and neck):
 {% include embed-video.html src="anupsastry_short.mp4" %}
 
@@ -408,10 +398,10 @@ Here's a clip from drummer [Anup Sastry - Titan](https://www.youtube.com/watch?v
 
 Note the two-step process:
 1. First, the video is stepped through frame by frame to apply OpenPose pose detection
-2. The resultant frames (with drawn keypoints) are written to a tmp mp4 file
+2. The resultant frames (with drawn keypoints) are written to a temporary mp4 file
 3. y coordinates are accumulated per-frame to track motion throughout the video and pick peaks
 4. Beats are computed from the audio 
-5. The tmp mp4 file is loaded frame by frame, bop/beat/tempo values are drawn on the respective frames, and the result is written to the final output file
+5. The temporary mp4 file is loaded frame by frame, bop/beat/tempo values are drawn on the respective frames, and the result is written to the final output file
 
 The two-pass design was chosen out of necessity; keeping all of the frames of the video in-memory while performing all of the processing was leading to huge memory usage (32+GB) for long videos.
 
diff --git a/docs/luxata_dbn.wav b/docs/luxata_dbn.wav
diff --git a/docs/luxata_hbt.wav b/docs/luxata_hbt.wav
diff --git a/headbang-hud/headbang-hud.py b/headbang-hud/headbang-hud.py
@@ -104,12 +104,24 @@ def bpm_from_beats(beats):
     return 60 / beat_step
 
 
+def bops_realistic_smoothing(bops, min_spacing):
+    return bops[numpy.where(numpy.diff(bops) > min_spacing)[0]]
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Track human pose in videos with music alongside groove metrics and beat tracking"
     )
     parser.add_argument(
-        "--custom-keypoints", type=str, help="Override the default face/neck keypoints"
+        "--custom-keypoints",
+        type=str,
+        help="Override the default face/neck keypoints (default=%(default)s)",
+    )
+    parser.add_argument(
+        "--minimum-bop-spacing",
+        type=float,
+        default=0.2,
+        help="Minimum spacing (in seconds) between bops to filter out implausible events (default=%(default)s)",
     )
     parser.add_argument("mp4_in", type=str, help="mp4 file to process")
     parser.add_argument("mp4_out", type=str, help="mp4 output path")
@@ -225,7 +237,7 @@ def process_first_pass(*args, **kwargs):
 
     print("Marking beat and head bop positions on output frames")
 
-    frame_history = 3  # consider this many seconds of history for bpm computation
+    frame_history = 5  # consider this many seconds of history for bpm computation
 
     all_beats_bpm = 0
     strong_beats_bpm = 0
@@ -255,10 +267,13 @@ def process_second_pass(get_frame_fn, frame_time):
             )
         ]
 
+        # keep a running history of bops
         bop_history = bop_locations[
             numpy.where((bop_locations >= frame_min) & (bop_locations <= frame_max))
         ]
 
+        bop_history = bops_realistic_smoothing(bop_history, args.minimum_bop_spacing)
+
         all_beats_bpm_tmp = bpm_from_beats(all_beat_history)
         bop_bpm_tmp = bpm_from_beats(bop_history)
 
diff --git a/headbang/util.py b/headbang/util.py
@@ -1,5 +1,6 @@
 from madmom.io.audio import load_audio_file
 import numpy
+import librosa
 
 
 def load_wav(wav_in, stereo=False):
@@ -17,3 +18,12 @@ def load_wav(wav_in, stereo=False):
     x /= numpy.max(numpy.abs(x))
 
     return x
+
+
+def overlay_clicks(x, beats):
+    clicks = librosa.clicks(beats, sr=44100, length=len(x))
+
+    if len(x.shape) > 1 and x.shape[1] == 2:
+        clicks = numpy.column_stack((clicks, clicks))  # convert to stereo
+
+    return (x + clicks).astype(numpy.single)
diff --git a/misc/latex-deliverables/citations.bib b/misc/latex-deliverables/citations.bib
@@ -1,4 +1,4 @@
-@website{mirex06,
+@misc{mirex06,
 title={2006:Audio Beat Tracking},
 url={https://www.music-ir.org/mirex/wiki/2006:Audio_Beat_Tracking},
 journal={MIREX Wiki}}
diff --git a/misc/latex-deliverables/proposal.tex b/misc/latex-deliverables/proposal.tex
@@ -1,5 +1,5 @@
 \documentclass[letter,12pt]{report}
-\setlength{\parindent}{0pt}
+%\setlength{\parindent}{0pt}
 \usepackage[left=2cm, right=2cm, top=2cm, bottom=2cm]{geometry}
 \usepackage[shortlabels]{enumitem}
 \usepackage{graphicx}
@@ -24,12 +24,7 @@
 \renewcommand{\topfraction}{0.85}
 \renewcommand{\textfraction}{0.1}
 \renewcommand{\floatpagefraction}{0.75}
-\usepackage[
-    %backend=biber, 
-    natbib=true,
-    style=numeric,
-    sorting=none
-]{biblatex}
+\usepackage[backend=biber,authordate]{biblatex-chicago}
 \addbibresource{citations.bib}
 \usepackage{titlesec}
  
@@ -38,31 +33,23 @@
 
 \begin{document}
 
-\Large{\textbf{headbang.py}}\\
-\large{Final Project Proposal. MUMT 621, April 06, 2021}\\
+\noindent\Large{\textbf{headbang.py}}\\
+\large{Final project proposal. MUMT 621, April 06, 2021}\\
 \large{Sevag Hanssian, 260398537}
 
-\hrulefill
+\noindent\hrulefill
 
 \vspace{2em}
 
-Beat tracking is a rich field of music information retrieval (MIR). The audio beat tracking task has been a part of MIREX since 2006 \cite{mirex06}, and receives submissions every year. Most recently, state of the art results have been achieved by \cite{bock1} and \cite{bock2}, who have also released their algorithms in the madmom Python library \cite{madmom}.
+Beat tracking is a rich field of music information retrieval (MIR). The audio beat tracking task has been a part of MIREX since 2006 (\cite{mirex06}), and receives submissions every year. Most recently, \textcite{bock1, bock2} have achieved state of the art results, and have released their algorithms in the open-source madmom Python library (\cite{madmom}).
 
-\vspace{1em}
+The beat tracking algorithms in MIREX are evaluated against diverse and challenging beat tracking datasets (\cite{beatmeta}). However, in my personal experiments on my preferred genres of music (mostly rhythmically-complex progressive metal, e.g., \cite{meshuggah, periphery}), I noticed that in several cases the beat locations output by the best algorithms were not correct.
 
-\qquad The beat tracking algorithms in MIREX are evaluated against diverse and challenging beat tracking datasets (\cite{beatmeta}). However, in my personal experiments on my preferred genres of music (mostly rhythmically-complex progressive metal, e.g., \cite{meshuggah}, \cite{periphery}), I noticed that in several cases the beat locations output by the best algorithms were not correct.
+For the first goal of my final project, I propose to explore various beat tracking algorithms and pre-processing techniques to demonstrate improved beat results in progressive metal songs. The name of the project is ``headbang.py''; the ``.py'' suffix is because it will be a code project written in Python, and ``headbang'' refers to the act of headbanging, where metal musicians or fans violently move their head up and down to the beat of a metal song.
 
-\vspace{1em}
+There are recent papers which combine MIR tasks with 2D pose estimation to associate human dance motion with musical beats (\cite{pose1}, \cite{pose2}). For the second goal of headbang.py, I propose to analyze headbanging motion in metal videos with the OpenPose 2D human pose estimation library. The results of the headbanging motion analysis can be displayed alongside the results of beat tracking, to potentially reveal some information about what drives the urge to headbang.
 
-\qquad For the first goal of my final project, I propose to explore various beat tracking algorithms and pre-processing techniques to demonstrate improved beat results in progressive metal songs. The name of the project is ``headbang.py''; the ``.py'' suffix is because it will be a code project written in Python, and ``headbang'' refers to the act of headbanging, where metal musicians or fans violently move their head up and down to the beat of a metal song.
-
-\vspace{1em}
-
-\qquad There are recent papers which combine MIR tasks with 2D pose estimation to associate human dance motion with musical beats (\cite{pose1}, \cite{pose2}). For the second goal of headbang.py, I propose to analyze headbanging motion in metal videos with the OpenPose 2D human pose estimation library. The results of the headbanging motion analysis can be displayed alongside the results of beat tracking, to potentially reveal some information about what drives the urge to headbang.
-
-\vspace{1em}
-
-\qquad One method for evaluating beat tracking results is overlaying clicks, or ``sonification'' of the beat annotations (\cite{clicks}), on the original track. This helps a person to verify that the clicks line up with their own perception of beat locations in listening tests. For an optional third goal of headbang.py (if time permits), I want to create a digital animation of a humanoid figure (2D or 3D) which headbangs on beat locations, as an alternative method of visualizing the outputs of beat trackers.
+One method for evaluating beat tracking results is overlaying clicks on predicted beats over the original track, to sonify the beat annotations (\cite{clicks}). This helps a person to verify that the clicks line up with their own perception of beat locations in listening tests. For an optional third goal of headbang.py (if time permits), I want to create a digital animation of a humanoid figure (2D or 3D) which headbangs on beat locations, as an alternative method of visualizing the outputs of beat trackers.
 
 \vfill
 \clearpage

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-@website{mirex06,`
	`1`	`+@misc{mirex06,`
`2`	`2`	`title={2006:Audio Beat Tracking},`
`3`	`3`	`url={https://www.music-ir.org/mirex/wiki/2006:Audio_Beat_Tracking},`
`4`	`4`	`journal={MIREX Wiki}}`