better parsing for times and phone numbers

remsky · Feb 16, 2025 · 41598eb · 41598eb
1 parent 3290bad
commit 41598eb
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 0 deletions.
diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py
@@ -7,6 +7,7 @@
 import re
 from functools import lru_cache
 import inflect
+from numpy import number
 
 from ...structures.schemas import NormalizationOptions
 
@@ -87,6 +88,8 @@
 
 UNIT_PATTERN = re.compile(r"((?<!\w)([+-]?)(\d{1,3}(,\d{3})*|\d+)(\.\d+)?)\s*(" + "|".join(sorted(list(VALID_UNITS.keys()),reverse=True)) + r"""){1}(?=[^\w\d]{1}|\b)""",re.IGNORECASE)
 
+TIME_PATTERN = re.compile(r"([0-9]{2} ?: ?[0-9]{2}( ?: ?[0-9]{2})?)( ?(pm|am)\b)?", re.IGNORECASE)
+
 INFLECT_ENGINE=inflect.engine()
 
 def split_num(num: re.Match[str]) -> str:
@@ -214,6 +217,32 @@ def handle_url(u: re.Match[str]) -> str:
     # Clean up extra spaces
     return re.sub(r"\s+", " ", url).strip()
 
+def handle_phone_number(p: re.Match[str]) -> str:
+    p=list(p.groups())
+
+    country_code=""
+    if p[0] is not None:
+        p[0]=p[0].replace("+","")
+        country_code += INFLECT_ENGINE.number_to_words(p[0])
+
+    area_code=INFLECT_ENGINE.number_to_words(p[2].replace("(","").replace(")",""),group=1,comma="")
+
+    telephone_prefix=INFLECT_ENGINE.number_to_words(p[3],group=1,comma="")
+
+    line_number=INFLECT_ENGINE.number_to_words(p[4],group=1,comma="")
+
+    return ",".join([country_code,area_code,telephone_prefix,line_number])
+
+def handle_time(t: re.Match[str]) -> str:
+    t=t.groups()
+
+    numbers = " ".join([INFLECT_ENGINE.number_to_words(X.strip()) for X in t[0].split(":")])
+
+    half=""
+    if t[2] is not None:
+        half=t[2].strip()
+
+    return numbers + half
 
 def normalize_text(text: str,normalization_options: NormalizationOptions) -> str:
     """Normalize text for TTS processing"""
@@ -233,6 +262,10 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
     if normalization_options.optional_pluralization_normalization:
         text = re.sub(r"\(s\)","s",text)
 
+    # Replace phone numbers:
+    if normalization_options.phone_normalization:
+        text = re.sub(r"(\+?\d{1,2})?([ .-]?)(\(?\d{3}\)?)[\s.-](\d{3})[\s.-](\d{4})",handle_phone_number,text)
+
     # Replace quotes and brackets
     text = text.replace(chr(8216), "'").replace(chr(8217), "'")
     text = text.replace("«", chr(8220)).replace("»", chr(8221))
@@ -243,6 +276,9 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
     for a, b in zip("、。！，：；？–", ",.!,:;?-"):
         text = text.replace(a, b + " ")
 
+    # Handle simple time in the format of HH:MM:SS
+    text = TIME_PATTERN.sub(handle_time, text, )
+
     # Clean up whitespace
     text = re.sub(r"[^\S \n]", " ", text)
     text = re.sub(r"  +", " ", text)

diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py
@@ -43,6 +43,7 @@ class NormalizationOptions(BaseModel):
     url_normalization: bool = Field(default=True, description="Changes urls so they can be properly pronouced by kokoro")
     email_normalization: bool = Field(default=True, description="Changes emails so they can be properly pronouced by kokoro")
     optional_pluralization_normalization: bool = Field(default=True, description="Replaces (s) with s so some words get pronounced correctly")
+    phone_normalization: bool = Field(default=True, description="Changes phone numbers so they can be properly pronouced by kokoro")
 
 class OpenAISpeechRequest(BaseModel):
     """Request schema for OpenAI-compatible speech endpoint"""