audiohacking · lmangani · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/backend/app/services/music_service.py b/backend/app/services/music_service.py
@@ -804,7 +804,9 @@ def _pad_audio_token(token):
                 progress = int((i + 1) / max_audio_frames * 100)
                 callback(progress, f"Generating audio... {i + 1}/{max_audio_frames} frames")
 
-        frames = torch.stack(frames).permute(1, 2, 0).squeeze(0).cpu()  # Move to CPU immediately
+        # Stack frames and explicitly preserve torch.long dtype (critical for MPS compatibility)
+        # Explicitly ensure torch.long dtype is preserved (defensive fix for potential MPS backend issues)
+        frames = torch.stack(frames).permute(1, 2, 0).squeeze(0).to(dtype=torch.long).cpu()
 
         # Sequential offload: Move HeartMuLa to CPU before loading HeartCodec
         # This allows fitting on smaller GPUs (12GB) by never having both models in VRAM