postprocessing commented logic backend

zalabhavy · Sep 23, 2024 · 714090a · 714090a
1 parent 7570c9d
commit 714090a
Show file tree

Hide file tree

Showing 6 changed files with 57 additions and 69 deletions.
diff --git a/app/lib/pages/settings/developer.dart b/app/lib/pages/settings/developer.dart
@@ -5,7 +5,6 @@ import 'package:flutter/material.dart';
 import 'package:friend_private/backend/http/api/memories.dart';
 import 'package:friend_private/backend/schema/memory.dart';
 import 'package:friend_private/providers/developer_mode_provider.dart';
-import 'package:friend_private/utils/alerts/app_snackbar.dart';
 import 'package:friend_private/utils/analytics/mixpanel.dart';
 import 'package:path_provider/path_provider.dart';
 import 'package:provider/provider.dart';
@@ -362,11 +361,4 @@ class __DeveloperSettingsPageState extends State<_DeveloperSettingsPage> {
       suffixIcon: suffixIcon,
     );
   }
-
-  _snackBar(String content, {int seconds = 1}) {
-    AppSnackbar.showSnackbar(
-      content,
-      duration: Duration(seconds: seconds),
-    );
-  }
 }
diff --git a/backend/main.py b/backend/main.py
@@ -6,7 +6,7 @@
 
 from modal import Image, App, asgi_app, Secret, Cron
 from routers import workflow, chat, firmware, plugins, memories, transcribe, notifications, speech_profile, \
-    agents, facts, users, postprocessing, processing_memories, trends, sdcard
+    agents, facts, users, processing_memories, trends, sdcard
 from utils.other.notifications import start_cron_job
 
 if os.environ.get('SERVICE_ACCOUNT_JSON'):
@@ -19,7 +19,6 @@
 app = FastAPI()
 app.include_router(transcribe.router)
 app.include_router(memories.router)
-app.include_router(postprocessing.router)
 app.include_router(facts.router)
 app.include_router(chat.router)
 app.include_router(plugins.router)

diff --git a/backend/models/processing_memory.py b/backend/models/processing_memory.py
@@ -24,6 +24,7 @@ class ProcessingMemory(BaseModel):
     memory_id: Optional[str] = None
     message_ids: List[str] = []
 
+
 class BasicProcessingMemory(BaseModel):
     id: str
     timer_start: float

diff --git a/backend/routers/postprocessing.py b/backend/routers/postprocessing.py
@@ -1,42 +1,37 @@
-
-from fastapi import APIRouter, Depends, HTTPException, UploadFile
-
-from models.memory import *
-from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util
-from utils.other import endpoints as auth
-
-router = APIRouter()
-
-
-@router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories'])
-def postprocess_memory(
-        memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False,
-        uid: str = Depends(auth.get_current_user_uid)
-):
-    """
-    The objective of this endpoint, is to get the best possible transcript from the audio file.
-    Instead of storing the initial deepgram result, doing a full post-processing with whisper-x.
-    This increases the quality of transcript by at least 20%.
-    Which also includes a better summarization.
-    Which helps us create better vectors for the memory.
-    And improves the overall experience of the user.
-
-    TODO: Try Nvidia Nemo ASR as suggested by @jhonnycombs https://huggingface.co/spaces/hf-audio/open_asr_leaderboard
-    That + pyannote diarization 3.1, is as good as it gets. Then is only hardware improvements.
-    TODO: should consider storing non beautified segments, and beautify on read?
-    TODO: post llm process here would be great, sometimes whisper x outputs without punctuation
-    """
-
-    # TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better.
-
-    # Save file
-    file_path = f"_temp/{memory_id}_{file.filename}"
-    with open(file_path, 'wb') as f:
-        f.write(file.file.read())
-
-    # Process
-    status_code, result = postprocess_memory_util(memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback, streaming_model="deepgram_streaming")
-    if status_code != 200:
-        raise HTTPException(status_code=status_code, detail=result)
-
-    return result
+# from fastapi import APIRouter, Depends, HTTPException, UploadFile
+#
+# from models.memory import *
+# from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util
+# from utils.other import endpoints as auth
+#
+# router = APIRouter()
+#
+#
+# @router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories'])
+# def postprocess_memory(
+#         memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False,
+#         uid: str = Depends(auth.get_current_user_uid)
+# ):
+#     """
+#     The objective of this endpoint, is to get the best possible transcript from the audio file.
+#     Instead of storing the initial deepgram result, doing a full post-processing with whisper-x.
+#     This increases the quality of transcript by at least 20%.
+#     Which also includes a better summarization.
+#     Which helps us create better vectors for the memory.
+#     And improves the overall experience of the user.
+#     """
+#
+#     # Save file
+#     file_path = f"_temp/{memory_id}_{file.filename}"
+#     with open(file_path, 'wb') as f:
+#         f.write(file.file.read())
+#
+#     # Process
+#     status_code, result = postprocess_memory_util(
+#         memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback,
+#         streaming_model="deepgram_streaming"
+#     )
+#     if status_code != 200:
+#         raise HTTPException(status_code=status_code, detail=result)
+#
+#     return result
diff --git a/backend/utils/memories/postprocess_memory.py b/backend/utils/memories/postprocess_memory.py
@@ -16,6 +16,8 @@
 from utils.stt.vad import vad_is_empty
 
 
+# TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better.
+# TODO: should consider storing non beautified segments, and beautify on read?
 def postprocess_memory(memory_id: str, file_path: str, uid: str, emotional_feedback: bool, streaming_model: str):
     memory_data = _get_memory_by_id(uid, memory_id)
     if not memory_data:
@@ -40,10 +42,7 @@ def postprocess_memory(memory_id: str, file_path: str, uid: str, emotional_feedb
     memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.in_progress)
 
     try:
-        # Calling VAD to avoid processing empty parts and getting hallucinations from whisper.
-        # TODO: use this logs to determine if whisperx is failing because of the VAD results.
-        print('previous to vad_is_empty (segments duration):',
-              memory.transcript_segments[-1].end - memory.transcript_segments[0].start)
+        print('previous to vad_is_empty (segments duration):', memory.transcript_segments[-1].end)
         vad_segments = vad_is_empty(file_path, return_segments=True)
         if vad_segments:
             start = vad_segments[0]['start']

diff --git a/backend/utils/processing_memories.py b/backend/utils/processing_memories.py
@@ -1,13 +1,12 @@
-import uuid
 from datetime import datetime, timezone
 
+import database.memories as memories_db
+import database.processing_memories as processing_memories_db
+from models.memory import CreateMemory, PostProcessingModel, PostProcessingStatus, MemoryPostProcessing
 from models.processing_memory import ProcessingMemory, UpdateProcessingMemory, BasicProcessingMemory
-from models.memory import CreateMemory, PostProcessingModel, PostProcessingStatus, MemoryPostProcessing, TranscriptSegment
-from utils.memories.process_memory import process_memory
 from utils.memories.location import get_google_maps_location
+from utils.memories.process_memory import process_memory
 from utils.plugins import trigger_external_integrations
-import database.processing_memories as processing_memories_db
-import database.memories as memories_db
 
 
 async def create_memory_by_processing_memory(uid: str, processing_memory_id: str):
@@ -40,11 +39,12 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str
     language_code = new_memory.language
     memory = process_memory(uid, language_code, new_memory)
 
-    if not memory.discarded:
-        memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.not_started)
-        # TODO: thinh, check why we need populate postprocessing to client
-        memory.postprocessing = MemoryPostProcessing(status=PostProcessingStatus.not_started,
-                                                     model=PostProcessingModel.fal_whisperx)
+    # if not memory.discarded:
+    #     memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.not_started)
+    #     # TODO: thinh, check why we need populate postprocessing to client
+    #     memory.postprocessing = MemoryPostProcessing(
+    #         status=PostProcessingStatus.not_started, model=PostProcessingModel.fal_whisperx
+    #     )
 
     messages = trigger_external_integrations(uid, memory)
 
@@ -53,9 +53,11 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str
     processing_memory.message_ids = list(map(lambda m: m.id, messages))
     processing_memories_db.update_processing_memory(uid, processing_memory.id, processing_memory.dict())
 
-    return (memory, messages, processing_memory)
+    return memory, messages, processing_memory
+
 
-def update_basic_processing_memory(uid: str, update_processing_memory: UpdateProcessingMemory,) -> BasicProcessingMemory:
+def update_basic_processing_memory(uid: str,
+                                   update_processing_memory: UpdateProcessingMemory, ) -> BasicProcessingMemory:
     # Fetch new
     processing_memory = processing_memories_db.get_processing_memory_by_id(uid, update_processing_memory.id)
     if not processing_memory:
@@ -72,5 +74,5 @@ def update_basic_processing_memory(uid: str, update_processing_memory: UpdatePro
     # update
     processing_memories_db.update_basic(uid, processing_memory.id,
                                         processing_memory.geolocation.dict() if processing_memory.geolocation else None,
-                                        processing_memory.emotional_feedback,)
+                                        processing_memory.emotional_feedback, )
     return processing_memory