diff --git a/app/lib/pages/settings/developer.dart b/app/lib/pages/settings/developer.dart index 62477b0932..863f16f2e1 100644 --- a/app/lib/pages/settings/developer.dart +++ b/app/lib/pages/settings/developer.dart @@ -5,7 +5,6 @@ import 'package:flutter/material.dart'; import 'package:friend_private/backend/http/api/memories.dart'; import 'package:friend_private/backend/schema/memory.dart'; import 'package:friend_private/providers/developer_mode_provider.dart'; -import 'package:friend_private/utils/alerts/app_snackbar.dart'; import 'package:friend_private/utils/analytics/mixpanel.dart'; import 'package:path_provider/path_provider.dart'; import 'package:provider/provider.dart'; @@ -362,11 +361,4 @@ class __DeveloperSettingsPageState extends State<_DeveloperSettingsPage> { suffixIcon: suffixIcon, ); } - - _snackBar(String content, {int seconds = 1}) { - AppSnackbar.showSnackbar( - content, - duration: Duration(seconds: seconds), - ); - } } diff --git a/backend/main.py b/backend/main.py index c3fa83b066..c28ac25ffc 100644 --- a/backend/main.py +++ b/backend/main.py @@ -6,7 +6,7 @@ from modal import Image, App, asgi_app, Secret, Cron from routers import workflow, chat, firmware, plugins, memories, transcribe, notifications, speech_profile, \ - agents, facts, users, postprocessing, processing_memories, trends, sdcard + agents, facts, users, processing_memories, trends, sdcard from utils.other.notifications import start_cron_job if os.environ.get('SERVICE_ACCOUNT_JSON'): @@ -19,7 +19,6 @@ app = FastAPI() app.include_router(transcribe.router) app.include_router(memories.router) -app.include_router(postprocessing.router) app.include_router(facts.router) app.include_router(chat.router) app.include_router(plugins.router) diff --git a/backend/models/processing_memory.py b/backend/models/processing_memory.py index 11d812068d..6680e750d7 100644 --- a/backend/models/processing_memory.py +++ b/backend/models/processing_memory.py @@ -24,6 +24,7 @@ class ProcessingMemory(BaseModel): memory_id: Optional[str] = None message_ids: List[str] = [] + class BasicProcessingMemory(BaseModel): id: str timer_start: float diff --git a/backend/routers/postprocessing.py b/backend/routers/postprocessing.py index 55eac26e76..b1e80cab77 100644 --- a/backend/routers/postprocessing.py +++ b/backend/routers/postprocessing.py @@ -1,42 +1,37 @@ - -from fastapi import APIRouter, Depends, HTTPException, UploadFile - -from models.memory import * -from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util -from utils.other import endpoints as auth - -router = APIRouter() - - -@router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories']) -def postprocess_memory( - memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False, - uid: str = Depends(auth.get_current_user_uid) -): - """ - The objective of this endpoint, is to get the best possible transcript from the audio file. - Instead of storing the initial deepgram result, doing a full post-processing with whisper-x. - This increases the quality of transcript by at least 20%. - Which also includes a better summarization. - Which helps us create better vectors for the memory. - And improves the overall experience of the user. - - TODO: Try Nvidia Nemo ASR as suggested by @jhonnycombs https://huggingface.co/spaces/hf-audio/open_asr_leaderboard - That + pyannote diarization 3.1, is as good as it gets. Then is only hardware improvements. - TODO: should consider storing non beautified segments, and beautify on read? - TODO: post llm process here would be great, sometimes whisper x outputs without punctuation - """ - - # TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better. - - # Save file - file_path = f"_temp/{memory_id}_{file.filename}" - with open(file_path, 'wb') as f: - f.write(file.file.read()) - - # Process - status_code, result = postprocess_memory_util(memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback, streaming_model="deepgram_streaming") - if status_code != 200: - raise HTTPException(status_code=status_code, detail=result) - - return result +# from fastapi import APIRouter, Depends, HTTPException, UploadFile +# +# from models.memory import * +# from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util +# from utils.other import endpoints as auth +# +# router = APIRouter() +# +# +# @router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories']) +# def postprocess_memory( +# memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False, +# uid: str = Depends(auth.get_current_user_uid) +# ): +# """ +# The objective of this endpoint, is to get the best possible transcript from the audio file. +# Instead of storing the initial deepgram result, doing a full post-processing with whisper-x. +# This increases the quality of transcript by at least 20%. +# Which also includes a better summarization. +# Which helps us create better vectors for the memory. +# And improves the overall experience of the user. +# """ +# +# # Save file +# file_path = f"_temp/{memory_id}_{file.filename}" +# with open(file_path, 'wb') as f: +# f.write(file.file.read()) +# +# # Process +# status_code, result = postprocess_memory_util( +# memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback, +# streaming_model="deepgram_streaming" +# ) +# if status_code != 200: +# raise HTTPException(status_code=status_code, detail=result) +# +# return result diff --git a/backend/utils/memories/postprocess_memory.py b/backend/utils/memories/postprocess_memory.py index be9762f8a5..4ea0ec6c19 100644 --- a/backend/utils/memories/postprocess_memory.py +++ b/backend/utils/memories/postprocess_memory.py @@ -16,6 +16,8 @@ from utils.stt.vad import vad_is_empty +# TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better. +# TODO: should consider storing non beautified segments, and beautify on read? def postprocess_memory(memory_id: str, file_path: str, uid: str, emotional_feedback: bool, streaming_model: str): memory_data = _get_memory_by_id(uid, memory_id) if not memory_data: @@ -40,10 +42,7 @@ def postprocess_memory(memory_id: str, file_path: str, uid: str, emotional_feedb memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.in_progress) try: - # Calling VAD to avoid processing empty parts and getting hallucinations from whisper. - # TODO: use this logs to determine if whisperx is failing because of the VAD results. - print('previous to vad_is_empty (segments duration):', - memory.transcript_segments[-1].end - memory.transcript_segments[0].start) + print('previous to vad_is_empty (segments duration):', memory.transcript_segments[-1].end) vad_segments = vad_is_empty(file_path, return_segments=True) if vad_segments: start = vad_segments[0]['start'] diff --git a/backend/utils/processing_memories.py b/backend/utils/processing_memories.py index 057cbd0c6e..c368d62b9f 100644 --- a/backend/utils/processing_memories.py +++ b/backend/utils/processing_memories.py @@ -1,13 +1,12 @@ -import uuid from datetime import datetime, timezone +import database.memories as memories_db +import database.processing_memories as processing_memories_db +from models.memory import CreateMemory, PostProcessingModel, PostProcessingStatus, MemoryPostProcessing from models.processing_memory import ProcessingMemory, UpdateProcessingMemory, BasicProcessingMemory -from models.memory import CreateMemory, PostProcessingModel, PostProcessingStatus, MemoryPostProcessing, TranscriptSegment -from utils.memories.process_memory import process_memory from utils.memories.location import get_google_maps_location +from utils.memories.process_memory import process_memory from utils.plugins import trigger_external_integrations -import database.processing_memories as processing_memories_db -import database.memories as memories_db async def create_memory_by_processing_memory(uid: str, processing_memory_id: str): @@ -40,11 +39,12 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str language_code = new_memory.language memory = process_memory(uid, language_code, new_memory) - if not memory.discarded: - memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.not_started) - # TODO: thinh, check why we need populate postprocessing to client - memory.postprocessing = MemoryPostProcessing(status=PostProcessingStatus.not_started, - model=PostProcessingModel.fal_whisperx) + # if not memory.discarded: + # memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.not_started) + # # TODO: thinh, check why we need populate postprocessing to client + # memory.postprocessing = MemoryPostProcessing( + # status=PostProcessingStatus.not_started, model=PostProcessingModel.fal_whisperx + # ) messages = trigger_external_integrations(uid, memory) @@ -53,9 +53,11 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str processing_memory.message_ids = list(map(lambda m: m.id, messages)) processing_memories_db.update_processing_memory(uid, processing_memory.id, processing_memory.dict()) - return (memory, messages, processing_memory) + return memory, messages, processing_memory + -def update_basic_processing_memory(uid: str, update_processing_memory: UpdateProcessingMemory,) -> BasicProcessingMemory: +def update_basic_processing_memory(uid: str, + update_processing_memory: UpdateProcessingMemory, ) -> BasicProcessingMemory: # Fetch new processing_memory = processing_memories_db.get_processing_memory_by_id(uid, update_processing_memory.id) if not processing_memory: @@ -72,5 +74,5 @@ def update_basic_processing_memory(uid: str, update_processing_memory: UpdatePro # update processing_memories_db.update_basic(uid, processing_memory.id, processing_memory.geolocation.dict() if processing_memory.geolocation else None, - processing_memory.emotional_feedback,) + processing_memory.emotional_feedback, ) return processing_memory