Skip to content

Commit 8bc3db5

Browse files
committed
parsing patch for multi-model filtering
1 parent 385cb68 commit 8bc3db5

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

src/text_message_handler.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,8 @@ async def handle_message(bot, update: Update, context: CallbackContext, logger)
11141114
system_message = (
11151115
f"[Perplexity.ai response]: {perplexity_response} "
11161116
"[Translate to the user's language if needed. "
1117-
"Use only Telegram-compatible HTML; keep it simple. CONVERT MARKDOWN TO HTML."
1118-
"and do NOT use <h1>, <h2>, <h3>, <h4>, <h5>, <h6>, <pre> tags. If you want to use a codeblock, use <code>]. Remember to translate to the user's language, i.e. if they're asking in Finnish instead of English, translate into Finnish!"
1117+
"Use only Telegram-compatible HTML; keep it simple. CONVERT MARKDOWN TO HTML. NO <br> TAGS!"
1118+
"Overall, in HTML formatting, DO NOT USE: <ul>, <li>, <br>, <h1>, <h2>, <h3>, <h4>, <h5>, <h6>, <pre> tags. If you want to use a codeblock, use <code>]. Remember to translate to the user's language, i.e. if they're asking in Finnish instead of English, translate into Finnish!"
11191119
)
11201120
chat_history.append({"role": "system", "content": system_message})
11211121
context.chat_data['chat_history'] = chat_history # Update the chat data with the new history
@@ -1131,6 +1131,7 @@ async def handle_message(bot, update: Update, context: CallbackContext, logger)
11311131
bot.logger.info(f"Bot's response content: '{bot_reply_content}'")
11321132

11331133
bot_reply = bot_reply_content.strip() if bot_reply_content else ""
1134+
bot_reply = strip_disallowed_html_tags(bot_reply)
11341135

11351136
# Update usage metrics and logs
11361137
bot_token_count = bot.count_tokens(bot_reply)
@@ -1677,6 +1678,26 @@ def sanitize_html(content):
16771678
content = str(soup)
16781679
return content
16791680

1681+
# further; strip disallowed html tags
1682+
def strip_disallowed_html_tags(text):
1683+
"""
1684+
Replace disallowed HTML tags with safe equivalents or remove them entirely.
1685+
Telegram's HTML parser is extremely limited.
1686+
"""
1687+
# Replace <br> with newline
1688+
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
1689+
1690+
# Replace <li> with bullet point and newline
1691+
text = re.sub(r'</li>', '\n', text, flags=re.IGNORECASE)
1692+
text = re.sub(r'<li>', '• ', text, flags=re.IGNORECASE)
1693+
1694+
# Remove <ul>, </ul>, <ol>, </ol>
1695+
text = re.sub(r'</?(ul|ol)>', '', text, flags=re.IGNORECASE)
1696+
1697+
return text
1698+
1699+
## more
1700+
16801701
# # // (old request type)
16811702
# async def make_api_request(bot, chat_history, timeout=30):
16821703
# # Prepare the payload for the API request with updated chat history

0 commit comments

Comments
 (0)