Skip to content

Commit

Permalink
postprocessing commented logic backend
Browse files Browse the repository at this point in the history
  • Loading branch information
josancamon19 committed Sep 23, 2024
1 parent 7570c9d commit 714090a
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 69 deletions.
8 changes: 0 additions & 8 deletions app/lib/pages/settings/developer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import 'package:flutter/material.dart';
import 'package:friend_private/backend/http/api/memories.dart';
import 'package:friend_private/backend/schema/memory.dart';
import 'package:friend_private/providers/developer_mode_provider.dart';
import 'package:friend_private/utils/alerts/app_snackbar.dart';
import 'package:friend_private/utils/analytics/mixpanel.dart';
import 'package:path_provider/path_provider.dart';
import 'package:provider/provider.dart';
Expand Down Expand Up @@ -362,11 +361,4 @@ class __DeveloperSettingsPageState extends State<_DeveloperSettingsPage> {
suffixIcon: suffixIcon,
);
}

_snackBar(String content, {int seconds = 1}) {
AppSnackbar.showSnackbar(
content,
duration: Duration(seconds: seconds),
);
}
}
3 changes: 1 addition & 2 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from modal import Image, App, asgi_app, Secret, Cron
from routers import workflow, chat, firmware, plugins, memories, transcribe, notifications, speech_profile, \
agents, facts, users, postprocessing, processing_memories, trends, sdcard
agents, facts, users, processing_memories, trends, sdcard
from utils.other.notifications import start_cron_job

if os.environ.get('SERVICE_ACCOUNT_JSON'):
Expand All @@ -19,7 +19,6 @@
app = FastAPI()
app.include_router(transcribe.router)
app.include_router(memories.router)
app.include_router(postprocessing.router)
app.include_router(facts.router)
app.include_router(chat.router)
app.include_router(plugins.router)
Expand Down
1 change: 1 addition & 0 deletions backend/models/processing_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class ProcessingMemory(BaseModel):
memory_id: Optional[str] = None
message_ids: List[str] = []


class BasicProcessingMemory(BaseModel):
id: str
timer_start: float
Expand Down
79 changes: 37 additions & 42 deletions backend/routers/postprocessing.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,37 @@

from fastapi import APIRouter, Depends, HTTPException, UploadFile

from models.memory import *
from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util
from utils.other import endpoints as auth

router = APIRouter()


@router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories'])
def postprocess_memory(
memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False,
uid: str = Depends(auth.get_current_user_uid)
):
"""
The objective of this endpoint, is to get the best possible transcript from the audio file.
Instead of storing the initial deepgram result, doing a full post-processing with whisper-x.
This increases the quality of transcript by at least 20%.
Which also includes a better summarization.
Which helps us create better vectors for the memory.
And improves the overall experience of the user.
TODO: Try Nvidia Nemo ASR as suggested by @jhonnycombs https://huggingface.co/spaces/hf-audio/open_asr_leaderboard
That + pyannote diarization 3.1, is as good as it gets. Then is only hardware improvements.
TODO: should consider storing non beautified segments, and beautify on read?
TODO: post llm process here would be great, sometimes whisper x outputs without punctuation
"""

# TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better.

# Save file
file_path = f"_temp/{memory_id}_{file.filename}"
with open(file_path, 'wb') as f:
f.write(file.file.read())

# Process
status_code, result = postprocess_memory_util(memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback, streaming_model="deepgram_streaming")
if status_code != 200:
raise HTTPException(status_code=status_code, detail=result)

return result
# from fastapi import APIRouter, Depends, HTTPException, UploadFile
#
# from models.memory import *
# from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util
# from utils.other import endpoints as auth
#
# router = APIRouter()
#
#
# @router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories'])
# def postprocess_memory(
# memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False,
# uid: str = Depends(auth.get_current_user_uid)
# ):
# """
# The objective of this endpoint, is to get the best possible transcript from the audio file.
# Instead of storing the initial deepgram result, doing a full post-processing with whisper-x.
# This increases the quality of transcript by at least 20%.
# Which also includes a better summarization.
# Which helps us create better vectors for the memory.
# And improves the overall experience of the user.
# """
#
# # Save file
# file_path = f"_temp/{memory_id}_{file.filename}"
# with open(file_path, 'wb') as f:
# f.write(file.file.read())
#
# # Process
# status_code, result = postprocess_memory_util(
# memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback,
# streaming_model="deepgram_streaming"
# )
# if status_code != 200:
# raise HTTPException(status_code=status_code, detail=result)
#
# return result
7 changes: 3 additions & 4 deletions backend/utils/memories/postprocess_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from utils.stt.vad import vad_is_empty


# TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better.
# TODO: should consider storing non beautified segments, and beautify on read?
def postprocess_memory(memory_id: str, file_path: str, uid: str, emotional_feedback: bool, streaming_model: str):
memory_data = _get_memory_by_id(uid, memory_id)
if not memory_data:
Expand All @@ -40,10 +42,7 @@ def postprocess_memory(memory_id: str, file_path: str, uid: str, emotional_feedb
memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.in_progress)

try:
# Calling VAD to avoid processing empty parts and getting hallucinations from whisper.
# TODO: use this logs to determine if whisperx is failing because of the VAD results.
print('previous to vad_is_empty (segments duration):',
memory.transcript_segments[-1].end - memory.transcript_segments[0].start)
print('previous to vad_is_empty (segments duration):', memory.transcript_segments[-1].end)
vad_segments = vad_is_empty(file_path, return_segments=True)
if vad_segments:
start = vad_segments[0]['start']
Expand Down
28 changes: 15 additions & 13 deletions backend/utils/processing_memories.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import uuid
from datetime import datetime, timezone

import database.memories as memories_db
import database.processing_memories as processing_memories_db
from models.memory import CreateMemory, PostProcessingModel, PostProcessingStatus, MemoryPostProcessing
from models.processing_memory import ProcessingMemory, UpdateProcessingMemory, BasicProcessingMemory
from models.memory import CreateMemory, PostProcessingModel, PostProcessingStatus, MemoryPostProcessing, TranscriptSegment
from utils.memories.process_memory import process_memory
from utils.memories.location import get_google_maps_location
from utils.memories.process_memory import process_memory
from utils.plugins import trigger_external_integrations
import database.processing_memories as processing_memories_db
import database.memories as memories_db


async def create_memory_by_processing_memory(uid: str, processing_memory_id: str):
Expand Down Expand Up @@ -40,11 +39,12 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str
language_code = new_memory.language
memory = process_memory(uid, language_code, new_memory)

if not memory.discarded:
memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.not_started)
# TODO: thinh, check why we need populate postprocessing to client
memory.postprocessing = MemoryPostProcessing(status=PostProcessingStatus.not_started,
model=PostProcessingModel.fal_whisperx)
# if not memory.discarded:
# memories_db.set_postprocessing_status(uid, memory.id, PostProcessingStatus.not_started)
# # TODO: thinh, check why we need populate postprocessing to client
# memory.postprocessing = MemoryPostProcessing(
# status=PostProcessingStatus.not_started, model=PostProcessingModel.fal_whisperx
# )

messages = trigger_external_integrations(uid, memory)

Expand All @@ -53,9 +53,11 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str
processing_memory.message_ids = list(map(lambda m: m.id, messages))
processing_memories_db.update_processing_memory(uid, processing_memory.id, processing_memory.dict())

return (memory, messages, processing_memory)
return memory, messages, processing_memory


def update_basic_processing_memory(uid: str, update_processing_memory: UpdateProcessingMemory,) -> BasicProcessingMemory:
def update_basic_processing_memory(uid: str,
update_processing_memory: UpdateProcessingMemory, ) -> BasicProcessingMemory:
# Fetch new
processing_memory = processing_memories_db.get_processing_memory_by_id(uid, update_processing_memory.id)
if not processing_memory:
Expand All @@ -72,5 +74,5 @@ def update_basic_processing_memory(uid: str, update_processing_memory: UpdatePro
# update
processing_memories_db.update_basic(uid, processing_memory.id,
processing_memory.geolocation.dict() if processing_memory.geolocation else None,
processing_memory.emotional_feedback,)
processing_memory.emotional_feedback, )
return processing_memory

0 comments on commit 714090a

Please sign in to comment.