Skip to content

Commit

Permalink
Use PyAV for audio mixing
Browse files Browse the repository at this point in the history
  • Loading branch information
WyattBlue committed Oct 31, 2024
1 parent c2d6c59 commit 8793ba4
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 35 deletions.
22 changes: 1 addition & 21 deletions auto_editor/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,27 +294,7 @@ def make_media(tl: v3, output_path: str) -> None:

if ctr.default_aud != "none":
ensure = Ensure(bar, samplerate, log)
audio_paths = make_new_audio(tl, ensure, args, ffmpeg, bar, log)
if (
not (args.keep_tracks_separate and ctr.max_audios is None)
and len(audio_paths) > 1
):
# Merge all the audio a_tracks into one.
new_a_file = os.path.join(log.temp, "new_audio.wav")
new_cmd = []
for path in audio_paths:
new_cmd.extend(["-i", path])
new_cmd.extend(
[
"-filter_complex",
f"amix=inputs={len(audio_paths)}:duration=longest",
"-ac",
"2",
new_a_file,
]
)
ffmpeg.run(new_cmd)
audio_paths = [new_a_file]
audio_paths = make_new_audio(tl, ctr, ensure, args, ffmpeg, bar, log)
else:
audio_paths = []

Expand Down
7 changes: 1 addition & 6 deletions auto_editor/ffwrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fractions import Fraction
from pathlib import Path
from shutil import which
from subprocess import PIPE, Popen, run
from subprocess import PIPE, Popen

import av

Expand Down Expand Up @@ -35,11 +35,6 @@ def initFFmpeg(log: Log, ff_location: str | None, my_ffmpeg: bool) -> FFmpeg:
class FFmpeg:
path: str

def run(self, cmd: list[str]) -> None:
cmd = [self.path, "-hide_banner", "-y"] + cmd
cmd.extend(["-nostats", "-loglevel", "error"])
run(cmd)

def Popen(self, cmd: list[str]) -> Popen:
return Popen([self.path] + cmd, stdout=PIPE, stderr=PIPE)

Expand Down
8 changes: 2 additions & 6 deletions auto_editor/lang/palet.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,8 @@ def edit_subtitle(pattern, stream=0, **kwargs):


class StackTraceManager:
__slots__ = ("stack",)

def __init__(self) -> None:
self.stack: list[Sym] = []

Expand All @@ -643,12 +645,6 @@ def pop(self) -> None:
if self.stack:
self.stack.pop()

def get_stacktrace(self) -> str:
return "\n".join(
f" at {sym.val} ({sym.lineno}:{sym.column})"
for sym in reversed(self.stack)
)


stack_trace_manager = StackTraceManager()

Expand Down
88 changes: 86 additions & 2 deletions auto_editor/render/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from auto_editor.timeline import TlAudio, v3
from auto_editor.utils.bar import Bar
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
from auto_editor.utils.container import Container
from auto_editor.utils.log import Log
from auto_editor.utils.types import Args
from auto_editor.wavfile import AudioData, read, write
Expand Down Expand Up @@ -236,12 +237,90 @@ def process_audio_clip(
return read(output_bytes)[1]


def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
mixed_audio = None
max_length = 0

# First pass: determine the maximum length
for path in audio_paths:
container = av.open(path)
stream = container.streams.audio[0]

# Calculate duration in samples
assert stream.duration is not None
assert stream.time_base is not None
duration_samples = int(stream.duration * sr / stream.time_base.denominator)
max_length = max(max_length, duration_samples)
container.close()

# Second pass: read and mix audio
for path in audio_paths:
container = av.open(path)
stream = container.streams.audio[0]

resampler = av.audio.resampler.AudioResampler(
format="s16", layout="mono", rate=sr
)

audio_array: list[np.ndarray] = []
for frame in container.decode(audio=0):
frame.pts = None
resampled = resampler.resample(frame)[0]
audio_array.extend(resampled.to_ndarray().flatten())

# Pad or truncate to max_length
current_audio = np.array(audio_array[:max_length])
if len(current_audio) < max_length:
current_audio = np.pad(
current_audio, (0, max_length - len(current_audio)), "constant"
)

if mixed_audio is None:
mixed_audio = current_audio.astype(np.float32)
else:
mixed_audio += current_audio.astype(np.float32)

container.close()

if mixed_audio is None:
raise ValueError("mixed_audio is None")

# Normalize the mixed audio
max_val = np.max(np.abs(mixed_audio))
if max_val > 0:
mixed_audio = mixed_audio * (32767 / max_val)
mixed_audio = mixed_audio.astype(np.int16) # type: ignore

output_container = av.open(output_path, mode="w")
output_stream = output_container.add_stream("pcm_s16le", rate=sr)

chunk_size = sr # Process 1 second at a time
for i in range(0, len(mixed_audio), chunk_size):
# Shape becomes (1, samples) for mono
chunk = np.array([mixed_audio[i : i + chunk_size]])

frame = av.AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
frame.rate = sr
frame.pts = i # Set presentation timestamp

output_container.mux(output_stream.encode(frame))

output_container.mux(output_stream.encode(None))
output_container.close()


def make_new_audio(
tl: v3, ensure: Ensure, args: Args, ffmpeg: FFmpeg, bar: Bar, log: Log
tl: v3,
ctr: Container,
ensure: Ensure,
args: Args,
ffmpeg: FFmpeg,
bar: Bar,
log: Log,
) -> list[str]:
sr = tl.sr
tb = tl.tb
output = []
output: list[str] = []
samples: dict[tuple[FileInfo, int], AudioData] = {}

norm = parse_norm(args.audio_normalize, log)
Expand Down Expand Up @@ -319,4 +398,9 @@ def make_new_audio(
Path(temp, "asdf.map").unlink(missing_ok=True)
except PermissionError:
pass

if not (args.keep_tracks_separate and ctr.max_audios is None) and len(output) > 1:
new_a_file = f"{Path(temp, 'new_audio.wav')}"
mix_audio_files(sr, output, new_a_file)
return [new_a_file]
return output

0 comments on commit 8793ba4

Please sign in to comment.