forked from BasedHardware/omi
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
postprocessing commented logic backend
- Loading branch information
1 parent
7570c9d
commit 714090a
Showing
6 changed files
with
57 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,37 @@ | ||
|
||
from fastapi import APIRouter, Depends, HTTPException, UploadFile | ||
|
||
from models.memory import * | ||
from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util | ||
from utils.other import endpoints as auth | ||
|
||
router = APIRouter() | ||
|
||
|
||
@router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories']) | ||
def postprocess_memory( | ||
memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False, | ||
uid: str = Depends(auth.get_current_user_uid) | ||
): | ||
""" | ||
The objective of this endpoint, is to get the best possible transcript from the audio file. | ||
Instead of storing the initial deepgram result, doing a full post-processing with whisper-x. | ||
This increases the quality of transcript by at least 20%. | ||
Which also includes a better summarization. | ||
Which helps us create better vectors for the memory. | ||
And improves the overall experience of the user. | ||
TODO: Try Nvidia Nemo ASR as suggested by @jhonnycombs https://huggingface.co/spaces/hf-audio/open_asr_leaderboard | ||
That + pyannote diarization 3.1, is as good as it gets. Then is only hardware improvements. | ||
TODO: should consider storing non beautified segments, and beautify on read? | ||
TODO: post llm process here would be great, sometimes whisper x outputs without punctuation | ||
""" | ||
|
||
# TODO: this pipeline vs groq+pyannote diarization 3.1, probably the latter is better. | ||
|
||
# Save file | ||
file_path = f"_temp/{memory_id}_{file.filename}" | ||
with open(file_path, 'wb') as f: | ||
f.write(file.file.read()) | ||
|
||
# Process | ||
status_code, result = postprocess_memory_util(memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback, streaming_model="deepgram_streaming") | ||
if status_code != 200: | ||
raise HTTPException(status_code=status_code, detail=result) | ||
|
||
return result | ||
# from fastapi import APIRouter, Depends, HTTPException, UploadFile | ||
# | ||
# from models.memory import * | ||
# from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util | ||
# from utils.other import endpoints as auth | ||
# | ||
# router = APIRouter() | ||
# | ||
# | ||
# @router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories']) | ||
# def postprocess_memory( | ||
# memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False, | ||
# uid: str = Depends(auth.get_current_user_uid) | ||
# ): | ||
# """ | ||
# The objective of this endpoint, is to get the best possible transcript from the audio file. | ||
# Instead of storing the initial deepgram result, doing a full post-processing with whisper-x. | ||
# This increases the quality of transcript by at least 20%. | ||
# Which also includes a better summarization. | ||
# Which helps us create better vectors for the memory. | ||
# And improves the overall experience of the user. | ||
# """ | ||
# | ||
# # Save file | ||
# file_path = f"_temp/{memory_id}_{file.filename}" | ||
# with open(file_path, 'wb') as f: | ||
# f.write(file.file.read()) | ||
# | ||
# # Process | ||
# status_code, result = postprocess_memory_util( | ||
# memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback, | ||
# streaming_model="deepgram_streaming" | ||
# ) | ||
# if status_code != 200: | ||
# raise HTTPException(status_code=status_code, detail=result) | ||
# | ||
# return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters