Update WhatsApp API version to v22.0. Add typing indicator

OdyAsh · OdyAsh · commit 85bb370992b7 · 2025-04-12T17:20:23.000+02:00
diff --git a/src/ansari/app/main_whatsapp.py b/src/ansari/app/main_whatsapp.py
@@ -87,45 +87,39 @@ async def main_webhook(request: Request, background_tasks: BackgroundTasks) -> R
         Response: HTTP response with status code 200.
 
     """
-    # Wait for the incoming webhook message to be received as JSON
-    data = await request.json()
-
-    if get_settings().DEV_MODE:
-        if "statuses" in (value := data.get("entry", [{}])[0].get("changes", [{}])[0].get("value", {})):
-            recipient_id = data["entry"][0]["changes"][0]["value"]["statuses"][0].get("recipient_id") or "unknown"
-            # logger.debug(f"Incoming whatsapp webhook status from {recipient_id}")
-            pass
-        else:
-            recipient_id = ((messages := (value.get("messages", []) or [])) and messages[0].get("from")) or "unknown"
-            logger.debug(f"Incoming whatsapp webhook message from {recipient_id}")
 
     # # Logging the origin (host) of the incoming webhook message
     # logger.debug(f"ORIGIN of the incoming webhook message: {json.dumps(request, indent=4)}")
 
-    # Terminate if incoming webhook message is empty/invalid/msg-status-update(sent,delivered,read)
+    # Wait for the incoming webhook message to be received as JSON
+    data = await request.json()
+
+    # Extract all relevant data in one go
     try:
-        result = await presenter.extract_relevant_whatsapp_message_details(data)
+        (
+            is_status,
+            from_whatsapp_number,
+            incoming_msg_type,
+            incoming_msg_body,
+            message_id,
+        ) = await presenter.extract_relevant_whatsapp_message_details(data)
     except Exception as e:
         logger.error(f"Error extracting message details: {e}")
         return Response(status_code=200)
-    else:
-        if isinstance(result, str):
-            return Response(status_code=200)
 
-    # Get relevant info from Meta's API
-    (
-        from_whatsapp_number,
-        incoming_msg_type,
-        incoming_msg_body,
-    ) = result
+    if not is_status:
+        logger.debug(f"Incoming whatsapp webhook message from {from_whatsapp_number}")
+    else:
+        # NOTE: This is a status message (e.g., "delivered"), not a user message, so doesn't need processing
+        return Response(status_code=200)
 
     # Workaround while locally developing:
     #   don't process other dev's whatsapp recepient phone nums coming from staging env.
     #   (as both stage Meta app / local-.env-file have same testing number)
-    dev_num_sub_str = "<your dev. number>"
+    dev_num_sub_str = "YOUR_DEV_PHONE_NUM"
     if get_settings().DEV_MODE and dev_num_sub_str not in from_whatsapp_number:
         logger.debug(
-            f"Incoming message from {from_whatsapp_number} (doesn't have {dev_num_sub_str}). \
+            f"Incoming message from {from_whatsapp_number} (doesn't have this sub-str: {dev_num_sub_str}). \
             Therefore, will not process it as it's not cur. dev."
         )
         return Response(status_code=200)
@@ -183,10 +177,20 @@ async def main_webhook(request: Request, background_tasks: BackgroundTasks) -> R
     #         f"Ack: {incoming_msg_text}",
     #     )
 
-    # Send a typing indicator to the sender
-    # Side note: As of 2024-12-21, Meta's WhatsApp API does not support typing indicators
-    # Source: Search "typing indicator whatsapp api" on Google
-    background_tasks.add_task(presenter.send_whatsapp_message, from_whatsapp_number, "...")
+    # Send a typing indicator if message_id is available,
+    # otherwise send a placeholder message (should never happen)
+    if message_id:
+        background_tasks.add_task(
+            presenter.send_whatsapp_typing_indicator,
+            from_whatsapp_number,
+            message_id,
+        )
+    else:
+        background_tasks.add_task(
+            presenter.send_whatsapp_message,
+            from_whatsapp_number,
+            "...",
+        )
 
     # Actual code to process the incoming message using Ansari agent then reply to the sender
     background_tasks.add_task(
diff --git a/src/ansari/config.py b/src/ansari/config.py
@@ -176,7 +176,7 @@ def get_resource_path(filename):
     SENDGRID_API_KEY: SecretStr | None = Field(default=None)
     QURAN_DOT_COM_API_KEY: SecretStr = Field(alias="QURAN_DOT_COM_API_KEY")
     WHATSAPP_ENABLED: bool = Field(default=True)
-    WHATSAPP_API_VERSION: str | None = Field(default="v21.0")
+    WHATSAPP_API_VERSION: str | None = Field(default="v22.0")
     WHATSAPP_BUSINESS_PHONE_NUMBER_ID: SecretStr | None = Field(default=None)
     WHATSAPP_ACCESS_TOKEN_FROM_SYS_USER: SecretStr | None = Field(default=None)
     WHATSAPP_VERIFY_TOKEN_FOR_WEBHOOK: SecretStr | None = Field(default=None)
@@ -194,14 +194,14 @@ def get_resource_path(filename):
     ANTHROPIC_MODEL: str = Field(default="claude-3-7-sonnet-latest")
     LOGGING_LEVEL: str = Field(default="INFO")
     DEV_MODE: bool = Field(default=False)
-    
+
     # Application settings
     MAINTENANCE_MODE: bool = Field(default=False)
-    
+
     # iOS app build versions
     IOS_MINIMUM_BUILD_VERSION: int = Field(default=1)
     IOS_LATEST_BUILD_VERSION: int = Field(default=1)
-    
+
     # Android app build versions
     ANDROID_MINIMUM_BUILD_VERSION: int = Field(default=1)
     ANDROID_LATEST_BUILD_VERSION: int = Field(default=1)
diff --git a/src/ansari/presenters/whatsapp_presenter.py b/src/ansari/presenters/whatsapp_presenter.py
@@ -36,17 +36,18 @@ def __init__(
     async def extract_relevant_whatsapp_message_details(
         self,
         body: dict[str, Any],
-    ) -> tuple[str, str, str] | str | None:
+    ) -> tuple[str, str, dict] | str:
         """Extracts relevant whatsapp message details from the incoming webhook payload.
 
         Args:
             body (Dict[str, Any]): The JSON body of the incoming request.
 
         Returns:
-            Optional[Tuple[str, str, str]]: A tuple containing the business phone number ID,
-            the sender's WhatsApp number and the their message (if the extraction is successful).
-            Returns None if the extraction fails.
+            Union[tuple, str]: A tuple of (user_whatsapp_number, incoming_msg_type, incoming_msg_body)
+                if successful, or an error message string if it's a status update or other invalid data.
 
+        Raises:
+            Exception: If the payload structure is invalid or unsupported.
         """
         # logger.debug(f"Received payload from WhatsApp user:\n{body}")
 
@@ -70,8 +71,11 @@ async def extract_relevant_whatsapp_message_details(
             # logger.debug(
             #     f"WhatsApp status update received:\n({status} at {timestamp}.)",
             # )
-            return "status update"
+            return True, None, None, None, None
+        else:
+            is_status = False
 
+        # should never be entered
         if "messages" not in value:
             error_msg = f"Unsupported message type received from WhatsApp user:\n{body}"
             logger.error(
@@ -81,6 +85,8 @@ async def extract_relevant_whatsapp_message_details(
 
         incoming_msg = value["messages"][0]
 
+        # Extract and store the message ID for use in send_whatsapp_typing_indicator
+        message_id = incoming_msg.get("id")
         # Extract the phone number of the WhatsApp sender
         user_whatsapp_number = incoming_msg["from"]
         # Meta API note: Meta sends "errors" key when receiving unsupported message types
@@ -91,11 +97,7 @@ async def extract_relevant_whatsapp_message_details(
 
         logger.info(f"Received a supported whatsapp message from {user_whatsapp_number}: {incoming_msg_body}")
 
-        return (
-            user_whatsapp_number,
-            incoming_msg_type,
-            incoming_msg_body,
-        )
+        return (is_status, user_whatsapp_number, incoming_msg_type, incoming_msg_body, message_id)
 
     async def check_and_register_user(
         self,
@@ -141,6 +143,44 @@ async def check_and_register_user(
             logger.error(f"Failed to register new whatsapp user: {user_whatsapp_number}")
             return False
 
+    async def send_whatsapp_typing_indicator(
+        self,
+        user_whatsapp_number: str,
+        message_id: str,
+    ) -> None:
+        """Sends a typing indicator to the WhatsApp sender.
+
+        Args:
+            user_whatsapp_number (str): The sender's WhatsApp number.
+            message_id (str): The ID of the message being replied to.
+
+        """
+        url = self.meta_api_url
+        headers = {
+            "Authorization": f"Bearer {self.access_token}",
+            "Content-Type": "application/json",
+        }
+
+        try:
+            async with httpx.AsyncClient() as client:
+                logger.debug(f"SENDING TYPING INDICATOR REQUEST TO: {url}")
+
+                json_data = {
+                    "messaging_product": "whatsapp",
+                    "status": "read",
+                    "message_id": message_id,
+                    "typing_indicator": {"type": "text"},
+                }
+
+                response = await client.post(url, headers=headers, json=json_data)
+                response.raise_for_status()  # Raise an exception for HTTP errors
+
+                logger.debug(f"Sent typing indicator to WhatsApp user {user_whatsapp_number}")
+
+        except Exception as e:
+            logger.error(f"Error sending typing indicator: {e}. Details are in next log.")
+            logger.exception(e)
+
     async def send_whatsapp_message(
         self,
         user_whatsapp_number: str,
@@ -213,52 +253,101 @@ def _get_retention_time_in_seconds(self) -> int:
 
     def _get_whatsapp_markdown(self, msg: str) -> str:
         """Convert conventional markdown syntax to WhatsApp's markdown syntax"""
-
         msg_direction = get_language_direction_from_text(msg)
 
-        # Replace text surrounded with single "*" with "_"
-        #   (as WhatsApp doesn't support italic text with "*"; it uses "_" instead)
+        # Process standard markdown syntax
+        msg = self._convert_italic_syntax(msg)
+        msg = self._convert_bold_syntax(msg)
+        msg = self._convert_headers(msg)
+
+        # Process lists based on text direction
+        if msg_direction in ["ltr", "rtl"]:
+            msg = self._format_nested_lists(msg)
+
+        return msg
+
+    def _convert_italic_syntax(self, text: str) -> str:
+        """Convert markdown italic syntax (*text*) to WhatsApp italic syntax (_text_)"""
         # Regex details:
         # (?<![\*_])  # Negative lookbehind: Ensures that the '*' is not preceded by '*' or '_'
         # \*          # Matches a literal '*'
         # ([^\*_]+?)  # Non-greedy match: Captures one or more characters that are not '*' or '_'
-        #   "Captures" mean it can be obtained via \1 in the replacement string
         # \*          # Matches a literal '*'
         # (?![\*_])   # Negative lookahead: Ensures that the '*' is not followed by '*' or '_'
+        #
+        # This pattern carefully identifies standalone italic markers (*text*) while avoiding
+        # matching bold markers (**text**) or mixed formatting.
         pattern = re.compile(r"(?<![\*_])\*([^\*_]+?)\*(?![\*_])")
-        msg = pattern.sub(r"_\1_", msg)
-
-        # Replace "**" (markdown bold) with "*" (whatsapp bold)
-        msg = msg.replace("**", "*")
-
-        # Match headers (#*) (that doesn't have a space before it (i.e., in the middle of a text))
-        #   where there's text directly after them
-        # NOTE: the `\**_*` part is to neglect any */_ in the returned group (.*?)
+        return pattern.sub(r"_\1_", text)
+
+    def _convert_bold_syntax(self, text: str) -> str:
+        """Convert markdown bold syntax (**text**) to WhatsApp bold syntax (*text*)"""
+        return text.replace("**", "*")
+
+    def _convert_headers(self, text: str) -> str:
+        """Convert markdown headers to WhatsApp's bold+italic format"""
+        # Process headers with content directly after them
+        # (?! )     # Ensures there's no space before the hash (avoiding matching in middle of text)
+        # #+ \**_*  # Matches one or more hash symbols and ignores any bold/italic markers already present
+        # (.*?)     # Captures the header text (non-greedy)
+        # \**_*\n   # Matches any trailing formatting markers and the newline
+        # (?!\n)    # Ensures the newline isn't followed by another newline (i.e., not an isolated header)
         pattern = re.compile(r"(?! )#+ \**_*(.*?)\**_*\n(?!\n)")
+        text = pattern.sub(r"*_\1_*\n\n", text)
 
-        # Replace them with bold (*) and italic (_) markdown syntax
-        #   and add extra newline (to leave space between header and content)
-        msg = pattern.sub(r"*_\1_*\n\n", msg)
-
-        # Match headers (#*) (that doesn't have a space before it (i.e., in the middle of a text))
-        #   where there's another newline directly after them
-        # NOTE: the `\**_*` part is to neglect any */_ in the returned group (.*?)
+        # Process headers with empty line after them
         pattern = re.compile(r"(?! )#+ \**_*(.*?)\**_*\n\n")
+        return pattern.sub(r"*_\1_*\n\n", text)
 
-        # Replace them with bold (*) and italic (_) markdown syntax
-        msg = pattern.sub(r"*_\1_*\n\n", msg)
-
-        # As nested text always appears in left side, even if text is RTL, which could be confusing to the reader,
-        #   we decided to manipulate the nesting symbols (i.e., \d+\. , * , - , etc) so that they appear in right side
-        # NOTE: added "ltr" for consistency of formatting across different languages
-        if msg_direction in ["ltr", "rtl"]:
-            # Replace lines that start with (possibly indented) "- " or "* " with "-- "
-            msg = re.sub(r"(\s*)[\*-] ", r"\1-- ", msg)
+    def _format_nested_lists(self, text: str) -> str:
+        """
+        Format only nested lists/bullet points with WhatsApp's special formatting.
 
-            # Replace the dot numbered lists (1. , etc.) with a dash (e.g., 1 - )
-            msg = re.sub(r"(\s*)(\d+)(\.) ", r"\1\2 - ", msg, flags=re.MULTILINE)
+        This handles:
+        1. Nested bullet points within numbered lists
+        2. Nested numbered lists within bullet points
+        3. Purely nested bullet points
+        4. Purely nested numbered lists
 
-        return msg
+        Simple (non-nested) lists retain their original formatting.
+        """
+        lines = text.split("\n")
+        processed_lines = []
+        in_nested_section = False
+        nested_section_indent = 0
+
+        for i, line in enumerate(lines):
+            # Check for indentation to detect nesting
+            indent_match = re.match(r"^(\s+)", line) if line.strip() else None
+            current_indent = len(indent_match.group(1)) if indent_match else 0
+
+            # Check if this is a list item (numbered or bullet)
+            is_numbered_item = re.match(r"^\s*\d+\.\s", line)
+            is_bullet_item = re.match(r"^\s*[\*-]\s", line)
+
+            # Determine if we're entering, in, or exiting a nested section
+            if (is_numbered_item or is_bullet_item) and current_indent > 0:
+                # This is a nested item
+                if not in_nested_section:
+                    in_nested_section = True
+                    nested_section_indent = current_indent
+
+                # Format nested items
+                if is_numbered_item:
+                    # Convert nested numbered list format: "  1. Item" -> "  1 - Item"
+                    line = re.sub(r"(\s*)(\d+)(\.) ", r"\1\2 - ", line)
+                elif is_bullet_item:
+                    # Convert nested bullet format: "  - Item" or "  * Item" -> "  -- Item"
+                    line = re.sub(r"(\s*)[\*-] ", r"\1-- ", line)
+
+            elif in_nested_section and current_indent < nested_section_indent:
+                # We're exiting the nested section
+                in_nested_section = False
+
+            # For non-nested items, leave them as they are
+            processed_lines.append(line)
+
+        return "\n".join(processed_lines)
 
     def _split_long_messages(self, msg_body: str) -> list[str]:
         """Split long messages into smaller chunks based on formatted headers or other patterns.
@@ -274,11 +363,6 @@ def _split_long_messages(self, msg_body: str) -> list[str]:
 
         Returns:
             list[str]: A list of message chunks that can be sent separately
-
-        Example:
-            >>> msg = "*_First Header_*\nSome text here...\n\n*_Second Header_*\nMore text..."
-            >>> _split_long_messages(msg)
-            ['*_First Header_*\nSome text here...', '*_Second Header_*\nMore text...']
         """
         # WhatsApp character limit
         MAX_LENGTH = 4000