11from __future__ import annotations
22
3+ import av
34import torchaudio
45import torch
56import comfy .model_management
67import folder_paths
78import os
89import io
910import json
10- import struct
1111import random
1212import hashlib
1313import node_helpers
@@ -90,61 +90,143 @@ def decode(self, vae, samples):
9090 return ({"waveform" : audio , "sample_rate" : 44100 }, )
9191
9292
93- def create_vorbis_comment_block (comment_dict , last_block ):
94- vendor_string = b'ComfyUI'
95- vendor_length = len (vendor_string )
93+ def save_audio (self , audio , filename_prefix = "ComfyUI" , format = "flac" , prompt = None , extra_pnginfo = None , quality = "128k" ):
9694
97- comments = []
98- for key , value in comment_dict .items ():
99- comment = f"{ key } ={ value } " .encode ('utf-8' )
100- comments .append (struct .pack ('<I' , len (comment )) + comment )
95+ filename_prefix += self .prefix_append
96+ full_output_folder , filename , counter , subfolder , filename_prefix = folder_paths .get_save_image_path (filename_prefix , self .output_dir )
97+ results : list [FileLocator ] = []
10198
102- user_comment_list_length = len (comments )
103- user_comments = b'' .join (comments )
99+ # Prepare metadata dictionary
100+ metadata = {}
101+ if not args .disable_metadata :
102+ if prompt is not None :
103+ metadata ["prompt" ] = json .dumps (prompt )
104+ if extra_pnginfo is not None :
105+ for x in extra_pnginfo :
106+ metadata [x ] = json .dumps (extra_pnginfo [x ])
104107
105- comment_data = struct .pack ('<I' , vendor_length ) + vendor_string + struct .pack ('<I' , user_comment_list_length ) + user_comments
106- if last_block :
107- id = b'\x84 '
108- else :
109- id = b'\x04 '
110- comment_block = id + struct .pack ('>I' , len (comment_data ))[1 :] + comment_data
108+ # Opus supported sample rates
109+ OPUS_RATES = [8000 , 12000 , 16000 , 24000 , 48000 ]
111110
112- return comment_block
111+ for (batch_number , waveform ) in enumerate (audio ["waveform" ].cpu ()):
112+ filename_with_batch_num = filename .replace ("%batch_num%" , str (batch_number ))
113+ file = f"{ filename_with_batch_num } _{ counter :05} _.{ format } "
114+ output_path = os .path .join (full_output_folder , file )
113115
114- def insert_or_replace_vorbis_comment (flac_io , comment_dict ):
115- if len (comment_dict ) == 0 :
116- return flac_io
117-
118- flac_io .seek (4 )
116+ # Use original sample rate initially
117+ sample_rate = audio ["sample_rate" ]
119118
120- blocks = []
121- last_block = False
119+ # Handle Opus sample rate requirements
120+ if format == "opus" :
121+ if sample_rate > 48000 :
122+ sample_rate = 48000
123+ elif sample_rate not in OPUS_RATES :
124+ # Find the next highest supported rate
125+ for rate in sorted (OPUS_RATES ):
126+ if rate > sample_rate :
127+ sample_rate = rate
128+ break
129+ if sample_rate not in OPUS_RATES : # Fallback if still not supported
130+ sample_rate = 48000
131+
132+ # Resample if necessary
133+ if sample_rate != audio ["sample_rate" ]:
134+ waveform = torchaudio .functional .resample (waveform , audio ["sample_rate" ], sample_rate )
135+
136+ # Create in-memory WAV buffer
137+ wav_buffer = io .BytesIO ()
138+ torchaudio .save (wav_buffer , waveform , sample_rate , format = "WAV" )
139+ wav_buffer .seek (0 ) # Rewind for reading
140+
141+ # Use PyAV to convert and add metadata
142+ input_container = av .open (wav_buffer )
143+
144+ # Create output with specified format
145+ output_buffer = io .BytesIO ()
146+ output_container = av .open (output_buffer , mode = 'w' , format = format )
147+
148+ # Set metadata on the container
149+ for key , value in metadata .items ():
150+ output_container .metadata [key ] = value
151+
152+ # Set up the output stream with appropriate properties
153+ input_container .streams .audio [0 ]
154+ if format == "opus" :
155+ out_stream = output_container .add_stream ("libopus" , rate = sample_rate )
156+ if quality == "64k" :
157+ out_stream .bit_rate = 64000
158+ elif quality == "96k" :
159+ out_stream .bit_rate = 96000
160+ elif quality == "128k" :
161+ out_stream .bit_rate = 128000
162+ elif quality == "192k" :
163+ out_stream .bit_rate = 192000
164+ elif quality == "320k" :
165+ out_stream .bit_rate = 320000
166+ elif format == "mp3" :
167+ out_stream = output_container .add_stream ("libmp3lame" , rate = sample_rate )
168+ if quality == "V0" :
169+ #TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool
170+ out_stream .codec_context .qscale = 1
171+ elif quality == "128k" :
172+ out_stream .bit_rate = 128000
173+ elif quality == "320k" :
174+ out_stream .bit_rate = 320000
175+ else : #format == "flac":
176+ out_stream = output_container .add_stream ("flac" , rate = sample_rate )
177+
178+
179+ # Copy frames from input to output
180+ for frame in input_container .decode (audio = 0 ):
181+ frame .pts = None # Let PyAV handle timestamps
182+ output_container .mux (out_stream .encode (frame ))
183+
184+ # Flush encoder
185+ output_container .mux (out_stream .encode (None ))
186+
187+ # Close containers
188+ output_container .close ()
189+ input_container .close ()
190+
191+ # Write the output to file
192+ output_buffer .seek (0 )
193+ with open (output_path , 'wb' ) as f :
194+ f .write (output_buffer .getbuffer ())
195+
196+ results .append ({
197+ "filename" : file ,
198+ "subfolder" : subfolder ,
199+ "type" : self .type
200+ })
201+ counter += 1
202+
203+ return { "ui" : { "audio" : results } }
122204
123- while not last_block :
124- header = flac_io .read (4 )
125- last_block = (header [0 ] & 0x80 ) != 0
126- block_type = header [0 ] & 0x7F
127- block_length = struct .unpack ('>I' , b'\x00 ' + header [1 :])[0 ]
128- block_data = flac_io .read (block_length )
205+ class SaveAudio :
206+ def __init__ (self ):
207+ self .output_dir = folder_paths .get_output_directory ()
208+ self .type = "output"
209+ self .prefix_append = ""
129210
130- if block_type == 4 or block_type == 1 :
131- pass
132- else :
133- header = bytes ([(header [0 ] & (~ 0x80 ))]) + header [1 :]
134- blocks .append (header + block_data )
211+ @classmethod
212+ def INPUT_TYPES (s ):
213+ return {"required" : { "audio" : ("AUDIO" , ),
214+ "filename_prefix" : ("STRING" , {"default" : "audio/ComfyUI" }),
215+ },
216+ "hidden" : {"prompt" : "PROMPT" , "extra_pnginfo" : "EXTRA_PNGINFO" },
217+ }
135218
136- blocks .append (create_vorbis_comment_block (comment_dict , last_block = True ))
219+ RETURN_TYPES = ()
220+ FUNCTION = "save_flac"
137221
138- new_flac_io = io .BytesIO ()
139- new_flac_io .write (b'fLaC' )
140- for block in blocks :
141- new_flac_io .write (block )
222+ OUTPUT_NODE = True
142223
143- new_flac_io .write (flac_io .read ())
144- return new_flac_io
224+ CATEGORY = "audio"
145225
226+ def save_flac (self , audio , filename_prefix = "ComfyUI" , format = "flac" , prompt = None , extra_pnginfo = None ):
227+ return save_audio (self , audio , filename_prefix , format , prompt , extra_pnginfo )
146228
147- class SaveAudio :
229+ class SaveAudioMP3 :
148230 def __init__ (self ):
149231 self .output_dir = folder_paths .get_output_directory ()
150232 self .type = "output"
@@ -153,50 +235,46 @@ def __init__(self):
153235 @classmethod
154236 def INPUT_TYPES (s ):
155237 return {"required" : { "audio" : ("AUDIO" , ),
156- "filename_prefix" : ("STRING" , {"default" : "audio/ComfyUI" })},
238+ "filename_prefix" : ("STRING" , {"default" : "audio/ComfyUI" }),
239+ "quality" : (["V0" , "128k" , "320k" ], {"default" : "V0" }),
240+ },
157241 "hidden" : {"prompt" : "PROMPT" , "extra_pnginfo" : "EXTRA_PNGINFO" },
158242 }
159243
160244 RETURN_TYPES = ()
161- FUNCTION = "save_audio "
245+ FUNCTION = "save_mp3 "
162246
163247 OUTPUT_NODE = True
164248
165249 CATEGORY = "audio"
166250
167- def save_audio (self , audio , filename_prefix = "ComfyUI" , prompt = None , extra_pnginfo = None ):
168- filename_prefix += self .prefix_append
169- full_output_folder , filename , counter , subfolder , filename_prefix = folder_paths .get_save_image_path (filename_prefix , self .output_dir )
170- results : list [FileLocator ] = []
171-
172- metadata = {}
173- if not args .disable_metadata :
174- if prompt is not None :
175- metadata ["prompt" ] = json .dumps (prompt )
176- if extra_pnginfo is not None :
177- for x in extra_pnginfo :
178- metadata [x ] = json .dumps (extra_pnginfo [x ])
251+ def save_mp3 (self , audio , filename_prefix = "ComfyUI" , format = "mp3" , prompt = None , extra_pnginfo = None , quality = "128k" ):
252+ return save_audio (self , audio , filename_prefix , format , prompt , extra_pnginfo , quality )
179253
180- for (batch_number , waveform ) in enumerate (audio ["waveform" ].cpu ()):
181- filename_with_batch_num = filename .replace ("%batch_num%" , str (batch_number ))
182- file = f"{ filename_with_batch_num } _{ counter :05} _.flac"
254+ class SaveAudioOpus :
255+ def __init__ (self ):
256+ self .output_dir = folder_paths .get_output_directory ()
257+ self .type = "output"
258+ self .prefix_append = ""
183259
184- buff = io .BytesIO ()
185- torchaudio .save (buff , waveform , audio ["sample_rate" ], format = "FLAC" )
260+ @classmethod
261+ def INPUT_TYPES (s ):
262+ return {"required" : { "audio" : ("AUDIO" , ),
263+ "filename_prefix" : ("STRING" , {"default" : "audio/ComfyUI" }),
264+ "quality" : (["64k" , "96k" , "128k" , "192k" , "320k" ], {"default" : "128k" }),
265+ },
266+ "hidden" : {"prompt" : "PROMPT" , "extra_pnginfo" : "EXTRA_PNGINFO" },
267+ }
186268
187- buff = insert_or_replace_vorbis_comment (buff , metadata )
269+ RETURN_TYPES = ()
270+ FUNCTION = "save_opus"
188271
189- with open (os .path .join (full_output_folder , file ), 'wb' ) as f :
190- f .write (buff .getbuffer ())
272+ OUTPUT_NODE = True
191273
192- results .append ({
193- "filename" : file ,
194- "subfolder" : subfolder ,
195- "type" : self .type
196- })
197- counter += 1
274+ CATEGORY = "audio"
198275
199- return { "ui" : { "audio" : results } }
276+ def save_opus (self , audio , filename_prefix = "ComfyUI" , format = "opus" , prompt = None , extra_pnginfo = None , quality = "V3" ):
277+ return save_audio (self , audio , filename_prefix , format , prompt , extra_pnginfo , quality )
200278
201279class PreviewAudio (SaveAudio ):
202280 def __init__ (self ):
@@ -248,7 +326,20 @@ def VALIDATE_INPUTS(s, audio):
248326 "VAEEncodeAudio" : VAEEncodeAudio ,
249327 "VAEDecodeAudio" : VAEDecodeAudio ,
250328 "SaveAudio" : SaveAudio ,
329+ "SaveAudioMP3" : SaveAudioMP3 ,
330+ "SaveAudioOpus" : SaveAudioOpus ,
251331 "LoadAudio" : LoadAudio ,
252332 "PreviewAudio" : PreviewAudio ,
253333 "ConditioningStableAudio" : ConditioningStableAudio ,
254334}
335+
336+ NODE_DISPLAY_NAME_MAPPINGS = {
337+ "EmptyLatentAudio" : "Empty Latent Audio" ,
338+ "VAEEncodeAudio" : "VAE Encode Audio" ,
339+ "VAEDecodeAudio" : "VAE Decode Audio" ,
340+ "PreviewAudio" : "Preview Audio" ,
341+ "LoadAudio" : "Load Audio" ,
342+ "SaveAudio" : "Save Audio (FLAC)" ,
343+ "SaveAudioMP3" : "Save Audio (MP3)" ,
344+ "SaveAudioOpus" : "Save Audio (Opus)" ,
345+ }
0 commit comments