Skip to content

Commit

Permalink
Produce First version output
Browse files Browse the repository at this point in the history
  • Loading branch information
csotaku0926 committed May 23, 2024
1 parent 5779fa7 commit 38c91bd
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 49 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
audio
*.png
*.wav
*.pdf
*.json
processing/__pycache__
1 change: 0 additions & 1 deletion notes.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"delay": 45,
"stop_time": 2400,
"notes":
[
Expand Down
145 changes: 129 additions & 16 deletions processing/audio_signal.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import numpy as np
import librosa
from scipy import signal
from scipy.io.wavfile import write
# plot
import matplotlib.pyplot as plt
# just to measure time elapsed
# measure time elapsed
import time
# io
from scipy.io.wavfile import write
import os
import json

def np_stft(signal, window_size, hop_length):
"""
Expand Down Expand Up @@ -33,7 +36,11 @@ def timer(callback, title, **kwargs):
print(f"[*] {title} time: {end_time-start_time}")
return ret

def plot_spectrogram(x, fs=22050, H=1024, save_title=None, log=True):
def plot_spectrogram(x:np.ndarray, fs=22050, H=1024, save_title=None, log=True):
# change complex array to magnitude
if (x.dtype == np.complex64):
x = np.abs(x) ** 2

if (log):
x = np.log(1 + 100 * x)

Expand All @@ -48,13 +55,13 @@ def plot_spectrogram(x, fs=22050, H=1024, save_title=None, log=True):
plt.show()


def compute_spectrom(x, fs=22050, N=2048, H=1024):
def compute_stft(x, fs=22050, N=2048, H=1024):
X = timer(librosa.stft, "stft",
y=x, n_fft=N, hop_length=H, win_length=N, window='hann',
center=True, pad_mode='constant')
# power spectrogram
Y = np.abs(X) ** 2
return Y
return X, Y

def hp_medfilt(y, l_h, l_p, binary_mask=False):
"""
Expand Down Expand Up @@ -88,35 +95,141 @@ def hp_medfilt(y, l_h, l_p, binary_mask=False):
return m_h, m_p, y_h, y_p

def reconstruct(X, m_h, m_p, x_len,
fs=22050, N=2048, H=1024):
fs=22050, N=2048, H=1024,
do_istft=False):
"""
reconstruct harmony and percussive sound, given masks
- `do_istft`: perform inverse stft or not. This should only be used to listen to output (get .wav)
"""
X_h = X * m_h
X_p = X * m_p

if (not do_istft):
return X_h, X_p

x_h = librosa.istft(stft_matrix=X_h, hop_length=H, win_length=N, window='hann', center=True, length=x_len)
x_p = librosa.istft(stft_matrix=X_p, hop_length=H, win_length=N, window='hann', center=True, length=x_len)

# write ndarray to wav file
write("harmony.wav", fs, x_h)
write("percussive.wav", fs, x_p)

def hpss():
return x_h, x_p

def salient_freq(X:np.ndarray, sr=22050, H=1024, start_time=None, end_time=None, my_sfreq_num=None):
"""
capture frequencies with top 15% magnitude
np.abs(X) is the magnitude of frequency bin `f` at frame `t`
bin `f` correspoonds to frequecies (0, sr/n_fft, 2*sr/n_fft, ..., sr/2)
time `t` corresponds to frames[i] * hop_length
- `X`: stft output with shape ((1 + n_fft/2), (duration * sr / hop_length))
Note: In paper, `X` is given by user to capture sfreq for certain percussion sound
, I will use whole audio signal for now
- `start_time`: start time of salient frequencies (in sec)
- `end_time`: end time of salient frequencies (in sec)
"""

start_t_band = int(start_time * sr / H) if start_time else None
end_t_band = int(end_time * sr / H) if end_time else None
X = X[:, start_t_band:end_t_band]

x_test = librosa.istft(stft_matrix=X, hop_length=H, win_length=2048, window='hann', center=True)
write("test.wav", sr, x_test)

s_db = librosa.amplitude_to_db(np.abs(X), ref=np.max)
sfreq_num = int(s_db.shape[0] * 0.15) if not my_sfreq_num else my_sfreq_num
m_sum = np.sum(s_db, axis=-1) * -1
# top-k magnitude
sfreq = np.argpartition(m_sum, kth=sfreq_num)[:sfreq_num]

# then, avg power magnitude of sfreqs are summed up and mutiplied by 0.4
threshold = np.sum(np.mean(X[sfreq], axis=1)) * 0.4

return sfreq, threshold

def onset_detection(X:np.ndarray, sr=22050, H=1024, interval=0.08,
start_time=None, end_time=None):
"""
identify whether there's a percussion sound
for 32 sec, sr=22050, H=1024, one time unit is 0.046 sec, min interval is 0.07
that is, check percussion sound every two time units
- `X`: stft output with shape ((1 + n_fft/2), (duration * sr / hop_length))
- `interval`: interval of detection (in sec). Default is 0.08 sec
- `start_time`, `end_time`: refer to `salient_frequnecy()`
"""
sfreq, threshold = salient_freq(X, sr, H, start_time, end_time)
# to show salient freq spectrogram
# X[~np.isin(np.arange(len(X)), sfreq)] = np.zeros(X.shape[-1])
# plot_spectrogram(np.abs(X), save_title="sfreq.png")

# then, avg power magnitude of sfreqs are summed up and mutiplied by 0.4
# threshold = np.sum(np.mean(X[sfreq], axis=1))
time_per_frame = H / sr
n_frame_per_interval = int(np.ceil(interval / time_per_frame))

sums = np.sum(X[sfreq, ::n_frame_per_interval], axis=0)

# compared with previous
prev_sum = np.roll(sums, 1)
prev_sum[0] = np.inf

# indices where sum exceeds threshold and larger than previous magnitude
valid_indices = (sums > threshold) #np.all([(sums > threshold), (sums > prev_sum)], axis=0)

interval_indices = np.arange(0, X.shape[1], n_frame_per_interval)
percussion = interval_indices[valid_indices] * time_per_frame

return percussion

def X_to_x(X, H=1024, N=2048, sample_rate=22050):
x_test_rm = librosa.istft(stft_matrix=X, hop_length=H, win_length=N, window='hann', center=True)
write("test.wav", sample_rate, x_test_rm)

def hpss(audio_filename="../audio/snow_halation.mp3"):
l_h, l_p = 23, 9
N, H = 2048, 1024

x, sample_rate = timer(librosa.load, "load",
path="../audio/snow_halation.mp3", duration=32)
path=audio_filename)
# x.shape: (duration * sr, )
x_len = x.size
l_h, l_p = 23, 29
N, H = 2048, 1024
print("duration (in sec):", x_len / sample_rate)

X, y = compute_stft(x, N=N, H=H)

m_soft_h, m_soft_p, _, _ = hp_medfilt(y, l_h, l_p)
X_h, X_p = reconstruct(X, m_soft_h, m_soft_p, x_len, N=N, H=H, do_istft=False)

percussion_h = timer(onset_detection, "onset detection (h)", X=X_h, start_time=25, end_time=30)
percussion_p = timer(onset_detection, "onset detection (p)", X=X_p, start_time=25, end_time=30)

y = compute_spectrom(x, N=N, H=H)
return percussion_h, percussion_p

m_soft_h, m_soft_p, y_h, y_p = hp_medfilt(y, l_h, l_p)
def output_json(percussion:np.ndarray, interval=0.05, dirname="../beat_map", filename="percussion.json"):
"""
Write percussion array to json file used by `script/game.js`
plot_spectrogram(y_h, H=H)
plot_spectrogram(y_p, H=H)
- `interval`: update frequency of the game, default is 50 ms
"""
notes = [{"track": "a", "second": int(s / interval)} for s in percussion]
output = {
"stop_time": 2400,
"notes": notes,
}

if (not os.path.exists(dirname)):
os.makedirs(dirname)

with open(os.path.join(dirname, filename), "w", encoding='utf-8') as f:
json.dump(output, f, indent=4)

reconstruct(y, m_soft_h, m_soft_p, x_len, N=N, H=H)

if __name__ == "__main__":
hpss()
per_h, per_p = hpss()
output_json(per_h, filename="harmony.json")
output_json(per_p)
66 changes: 34 additions & 32 deletions script/game.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// note peroidically falls down
const speed = 1;
const SPEED = 1;
var stop_time = 2400;
const interval = 50;
const INTERVAL = 50;
const JUDGE_HEIGHT = 80;
// available keys
const keys = ['a','s','d','f','h','j','k','l'];
var keys_dict = {};
Expand All @@ -12,7 +13,7 @@ for(let k of keys)
i += 1;
}

var second = 0;
var timer = 0;

// notes element: div
var notes_a = [];
Expand Down Expand Up @@ -59,45 +60,44 @@ var hit_score = 0.0;
// want to make total 1000000
const total_score = 1000000;
var perfect_score = 140;
var good_score = 0.5*perfect_score;
var good_score = 0.5 * perfect_score;

// every 10 combo have 0.1 addition
var combo_multiply = 1 + parseInt(combo_num/10)*0.1;
// this note should appear in track in time
var first_note = [0,0,0,0,0,0,0,0];
var combo_multiply = 1 + parseInt(combo_num / 10) * 0.1;
// the closet note of each track to detection line
var first_note = [0, 0, 0, 0, 0, 0, 0, 0];

function update_score(score){
let hit_score = document.getElementById("hit_score");
let str_score = String(score);
let return_score = '';

// zero padding
for(let i=0;i<7-str_score.length;i++)
for(let i = 0; i < 7 - str_score.length; i++)
return_score += '0';
return_score += str_score;
hit_score.innerText = return_score;
};

function draw_note (json) {
// put note into each list with json file
// read note from json file
for(let note of json["notes"])
{
let key = keys_dict[note["track"]];
// audio may have delay
(all_second[key]).push(note["second"] + json["delay"]);
(all_second[key]).push(note["second"]);
}
stop_time = json["stop_time"];
// time function
// every `INTERVAL` does
var timeID = window.setInterval(() => {
second += 1*speed;
timer += SPEED;

// check each track
for(let i=0; i<keys.length; i+=1)
for(let i = 0; i < keys.length; i += 1)
{
let this_first_note = first_note[i];
let this_second = all_second[i];
// reach time
if(this_first_note < this_second.length && this_second[this_first_note] == second)
if(this_first_note < this_second.length && this_second[this_first_note] == timer)
{
let this_track = tracks[i];
// add note to track
Expand All @@ -113,16 +113,16 @@ function draw_note (json) {
// every note drop down by time
let i = 0;

for(let n=0; n<all_notes.length; n++)
for (i=0; i<all_notes[n].length; i += 1)
for(let n = 0; n < all_notes.length; n++)
for (i = 0; i < all_notes[n].length; i += 1)
{
let notes = all_notes[n];
let seconds = all_second[n];
let note = notes[i];
let top_int = second - seconds[i];
let top_int = timer - seconds[i];
note.style.top = `${top_int}%` ;
// reach judge line
if(top_int > 80)
if(top_int > JUDGE_HEIGHT)
{
// miss
// update combo
Expand All @@ -138,7 +138,7 @@ function draw_note (json) {
// clear effect after a period
window.setTimeout(()=>{
text.classList.remove("miss");
}, 500/speed);
}, 500 / SPEED);
// remove the poped note
(notes.splice(i,1)[0]).remove();
seconds.splice(i,1);
Expand All @@ -148,13 +148,16 @@ function draw_note (json) {
}

// stop timer
if(second >= stop_time)
if(timer >= stop_time)
{
console.log("End playing");
window.clearInterval(timeID);
audio.pause();
audio.currentTime = 0;
if (audio) {
audio.pause();
audio.currentTime = 0;
}
}
}, interval);
}, INTERVAL);
};

// add the button interaction to play audio
Expand All @@ -165,9 +168,8 @@ confirm_button.addEventListener('click',(() => {

// play audio after first note is ready
let audio = document.getElementById("selected_song");
window.setTimeout(() => {
audio.play();
}, interval * 80 / speed);

setInterval(() => {audio.play();}, INTERVAL * JUDGE_HEIGHT);

/**
* Since most of web servers have blocked direct file system access
Expand All @@ -178,12 +180,12 @@ confirm_button.addEventListener('click',(() => {
*
*/

fetch ("notes.json")
fetch ("beat_map/percussion.json")
.then (res => res.json())
.then ((json) => draw_note(json))

}));
// detect if pressed

var isPressed = {};
for(let k of keys)
isPressed[k] = false;
Expand All @@ -205,7 +207,7 @@ window.addEventListener("keydown", (e) => {
let this_key = keys_dict[e.key];
let this_notes = all_notes[this_key];
let this_second = all_second[this_key];
let top_int = second - this_second[0];
let top_int = timer - this_second[0];

if(!isPressed[e.key])
{
Expand All @@ -229,7 +231,7 @@ window.addEventListener("keydown", (e) => {
window.setTimeout(()=>{
text.classList.remove("good");
jl.classList.remove("judge_line_good");
}, 500/speed);
}, 500 / SPEED);
// remove note
(this_notes.splice(0,1)[0]).remove();
this_second.splice(0,1);
Expand All @@ -253,7 +255,7 @@ window.addEventListener("keydown", (e) => {
window.setTimeout(()=>{
text.classList.remove("perfect");
jl.classList.remove("judge_line_perfect");
}, 500/speed);
}, 500 / SPEED);
// remove note
(this_notes.splice(0,1)[0]).remove();
this_second.splice(0,1);
Expand Down

0 comments on commit 38c91bd

Please sign in to comment.