Skip to content

Commit

Permalink
Merge branch 'vNext-Dev' into geearl/7562-ServiceTreeEntryID
Browse files Browse the repository at this point in the history
  • Loading branch information
georearl authored May 14, 2024
2 parents cea0bd5 + 1819d00 commit 9edf72d
Show file tree
Hide file tree
Showing 36 changed files with 3,481 additions and 247 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -395,4 +395,9 @@ terraform.tfstate
terraform.tfstate.d
.tfplan.txt
infra/infoasst*
infra/sp_config/config.json
infra/sp_config/config.json

#Upgrade & Migrate Support
scripts/upgrade_repoint.config.json
azcopy.tar.gz
azcopy_dir
25 changes: 23 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,26 @@ destroy-inf: check-subscription
functional-tests: extract-env ## Run functional tests to check the processing pipeline is working
@./scripts/functional-tests.sh

run-migration: ## Migrate from bicep to terraform
python ./scripts/merge-databases.py
merge-databases: ## Upgrade from bicep to terraform
@figlet "Upgrading in place"
python ./scripts/merge-databases.py

import-state: check-subscription ## import state of current srevcies to TF state
@./scripts/inf-import-state.sh

# Command to merge databases and import TF state in prep for an upgrade from 1.0 to 1.n
prep-upgrade:
@figlet "Upgrading"
merge-databases
import-state

# Apply role assignments as needed to upgrade
prep-env:
@figlet "Preparing Environment"
@./scripts/prep-env.sh

prep-migration-env: ## Prepare the environment for migration by assigning required roles
@./scripts/prep-migration-env.sh

run-data-migration: ## Run the data migration moving data from one rg to another
python ./scripts/extract-content.py
15 changes: 3 additions & 12 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,20 +295,11 @@ async def chat(request: Request):
return {"error": "unknown approach"}, 400

if (Approaches(int(approach)) == Approaches.CompareWorkWithWeb or Approaches(int(approach)) == Approaches.CompareWebWithWork):
r = await impl.run(json_body.get("history", []), json_body.get("overrides", {}), json_body.get("citation_lookup", {}), json_body.get("thought_chain", {}))
r = impl.run(json_body.get("history", []), json_body.get("overrides", {}), json_body.get("citation_lookup", {}), json_body.get("thought_chain", {}))
else:
r = await impl.run(json_body.get("history", []), json_body.get("overrides", {}), {}, json_body.get("thought_chain", {}))
r = impl.run(json_body.get("history", []), json_body.get("overrides", {}), {}, json_body.get("thought_chain", {}))

response = {
"data_points": r["data_points"],
"answer": r["answer"],
"thoughts": r["thoughts"],
"thought_chain": r["thought_chain"],
"work_citation_lookup": r["work_citation_lookup"],
"web_citation_lookup": r["web_citation_lookup"]
}

return response
return StreamingResponse(r, media_type="application/x-ndjson")

except Exception as ex:
log.error(f"Error in chat:: {ex}")
Expand Down
181 changes: 99 additions & 82 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import json
import re
import logging
import urllib.parse
from datetime import datetime, timedelta
from typing import Any, Sequence
from typing import Any, AsyncGenerator, Coroutine, Sequence

import openai
from openai import AzureOpenAI
Expand Down Expand Up @@ -151,6 +152,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
log.setLevel('DEBUG')
log.propagate = True

chat_completion = None
use_semantic_captions = True if overrides.get("semantic_captions") else False
top = overrides.get("top") or 3
user_persona = overrides.get("user_persona", "")
Expand Down Expand Up @@ -182,14 +184,19 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
self.chatgpt_token_limit - len(user_question)
)

try:
chat_completion= await self.client.chat.completions.create(
model=self.chatgpt_deployment,
messages=messages,
temperature=0.0,
# max_tokens=32, # setting it too low may cause malformed JSON
max_tokens=100,
n=1)

chat_completion= await self.client.chat.completions.create(
model=self.chatgpt_deployment,
messages=messages,
temperature=0.0,
# max_tokens=32, # setting it too low may cause malformed JSON
max_tokens=100,
n=1)
except Exception as e:
log.error(f"Error generating optimized keyword search: {str(e)}")
yield json.dumps({"error": f"Error generating optimized keyword search: {str(e)}"}) + "\n"
return

generated_query = chat_completion.choices[0].message.content

Expand All @@ -208,14 +215,23 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
'Content-Type': 'application/json',
}

response = requests.post(url, json=data,headers=headers,timeout=60)
if response.status_code == 200:
response_data = response.json()
embedded_query_vector =response_data.get('data')
else:
log.error(f"Error generating embedding:: {response.status_code}")
raise Exception('Error generating embedding:', response.status_code)

embedded_query_vector = None
try:
response = requests.post(url, json=data,headers=headers,timeout=60)
if response.status_code == 200:
response_data = response.json()
embedded_query_vector =response_data.get('data')
else:
# Generate an error message if the embedding generation fails
log.error(f"Error generating embedding:: {response.status_code}")
yield json.dumps({"error": "Error generating embedding"}) + "\n"
return # Go no further
except Exception as e:
# Timeout or other error has occurred
log.error(f"Error generating embedding: {str(e)}")
yield json.dumps({"error": f"Error generating embedding: {str(e)}"}) + "\n"
return # Go no further

#vector set up for pure vector search & Hybrid search & Hybrid semantic
vector = RawVectorQuery(vector=embedded_query_vector, k=top, fields="contentVector")

Expand Down Expand Up @@ -339,17 +355,19 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
userPersona=user_persona,
systemPersona=system_persona,
)
# STEP 3: Generate a contextual and content-specific answer using the search results and chat history.
#Added conditional block to use different system messages for different models.
if self.model_name.startswith("gpt-35-turbo"):
messages = self.get_messages_from_history(
system_message,
self.model_name,
history,
history[-1]["user"] + "Sources:\n" + content + "\n\n", # 3.5 has recency Bias that is why this is here
self.RESPONSE_PROMPT_FEW_SHOTS,
max_tokens=self.chatgpt_token_limit - 500
)

try:
# STEP 3: Generate a contextual and content-specific answer using the search results and chat history.
#Added conditional block to use different system messages for different models.
if self.model_name.startswith("gpt-35-turbo"):
messages = self.get_messages_from_history(
system_message,
self.model_name,
history,
history[-1]["user"] + "Sources:\n" + content + "\n\n", # 3.5 has recency Bias that is why this is here
self.RESPONSE_PROMPT_FEW_SHOTS,
max_tokens=self.chatgpt_token_limit - 500
)

#Uncomment to debug token usage.
#print(messages)
Expand All @@ -361,66 +379,65 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
#print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base"))
#print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base"))
#print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base"))

chat_completion= await self.client.chat.completions.create(
model=self.chatgpt_deployment,
messages=messages,
temperature=float(overrides.get("response_temp")) or 0.6,
n=1
)
chat_completion= await self.client.chat.completions.create(
model=self.chatgpt_deployment,
messages=messages,
temperature=float(overrides.get("response_temp")) or 0.6,
n=1,
stream=True
)

elif self.model_name.startswith("gpt-4"):
messages = self.get_messages_from_history(
system_message,
# "Sources:\n" + content + "\n\n" + system_message,
self.model_name,
history,
# history[-1]["user"],
history[-1]["user"] + "Sources:\n" + content + "\n\n", # GPT 4 starts to degrade with long system messages. so moving sources here
self.RESPONSE_PROMPT_FEW_SHOTS,
max_tokens=self.chatgpt_token_limit
)
elif self.model_name.startswith("gpt-4"):
messages = self.get_messages_from_history(
system_message,
# "Sources:\n" + content + "\n\n" + system_message,
self.model_name,
history,
# history[-1]["user"],
history[-1]["user"] + "Sources:\n" + content + "\n\n", # GPT 4 starts to degrade with long system messages. so moving sources here
self.RESPONSE_PROMPT_FEW_SHOTS,
max_tokens=self.chatgpt_token_limit
)

#Uncomment to debug token usage.
#print(messages)
#message_string = ""
#for message in messages:
# # enumerate the messages and add the role and content elements of the dictoinary to the message_string
# message_string += f"{message['role']}: {message['content']}\n"
#print("Content Tokens: ", self.num_tokens_from_string("Sources:\n" + content + "\n\n", "cl100k_base"))
#print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base"))
#print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base"))
#print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base"))
#Uncomment to debug token usage.
#print(messages)
#message_string = ""
#for message in messages:
# # enumerate the messages and add the role and content elements of the dictoinary to the message_string
# message_string += f"{message['role']}: {message['content']}\n"
#print("Content Tokens: ", self.num_tokens_from_string("Sources:\n" + content + "\n\n", "cl100k_base"))
#print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base"))
#print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base"))
#print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base"))


chat_completion= await self.client.chat.completions.create(
model=self.chatgpt_deployment,
messages=messages,
temperature=float(overrides.get("response_temp")) or 0.6,
max_tokens=1024,
n=1
)
# STEP 4: Format the response
msg_to_display = '\n\n'.join([str(message) for message in messages])
generated_response=chat_completion.choices[0].message.content

# # Detect the language of the response
response_language = self.detect_language(generated_response)
#if response is not in user's language, translate it to user's language
if response_language != detectedlanguage:
translated_response = self.translate_response(generated_response, detectedlanguage)
else:
translated_response = generated_response
thought_chain["work_response"] = urllib.parse.unquote(translated_response)
model=self.chatgpt_deployment,
messages=messages,
temperature=float(overrides.get("response_temp")) or 0.6,
n=1,
stream=True

)
msg_to_display = '\n\n'.join([str(message) for message in messages])

return {
"data_points": data_points,
"answer": f"{urllib.parse.unquote(translated_response)}",
"thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
"thought_chain": thought_chain,
"work_citation_lookup": citation_lookup,
"web_citation_lookup": {}
}

# Return the data we know
yield json.dumps({"data_points": {},
"thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
"thought_chain": thought_chain,
"work_citation_lookup": citation_lookup,
"web_citation_lookup": {}}) + "\n"

# STEP 4: Format the response
async for chunk in chat_completion:
# Check if there is at least one element and the first element has the key 'delta'
if len(chunk.choices) > 0:
yield json.dumps({"content": chunk.choices[0].delta.content}) + "\n"
except Exception as e:
log.error(f"Error generating chat completion: {str(e)}")
yield json.dumps({"error": f"Error generating chat completion: {str(e)}"}) + "\n"
return


def detect_language(self, text: str) -> str:
""" Function to detect the language of the text"""
Expand Down
55 changes: 42 additions & 13 deletions app/backend/approaches/chatwebretrieveread.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import json
import logging
import os
import re
from typing import Any, Sequence
Expand Down Expand Up @@ -104,7 +106,11 @@ async def run(self, history: Sequence[dict[str, str]],overrides: dict[str, Any],
Returns:
Any: The result of the approach.
"""

log = logging.getLogger("uvicorn")
log.setLevel('DEBUG')
log.propagate = True

query_resp = None
user_query = history[-1].get("user")
user_persona = overrides.get("user_persona", "")
system_persona = overrides.get("system_persona", "")
Expand All @@ -127,7 +133,13 @@ async def run(self, history: Sequence[dict[str, str]],overrides: dict[str, Any],
self.chatgpt_token_limit - len(user_query)
)

query_resp = await self.make_chat_completion(messages)
try:
query_resp = await self.make_chat_completion(messages)
except Exception as e:
log.error(f"Error generating optimized keyword search: {str(e)}")
yield json.dumps({"error": f"Error generating optimized keyword search: {str(e)}"}) + "\n"
return

thought_chain["web_search_term"] = query_resp
# STEP 2: Use the search query to get the top web search results
url_snippet_dict = await self.web_search_with_safe_search(query_resp)
Expand All @@ -152,18 +164,35 @@ async def run(self, history: Sequence[dict[str, str]],overrides: dict[str, Any],
self.RESPONSE_PROMPT_FEW_SHOTS,
max_tokens=4097 - 500
)

msg_to_display = '\n\n'.join([str(message) for message in messages])
# STEP 3: Use the search results to answer the user's question
resp = await self.make_chat_completion(messages)
thought_chain["web_response"] = resp
return {
"data_points": None,
"answer": f"{urllib.parse.unquote(resp)}",
"thoughts": f"Searched for:<br>{query_resp}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
"thought_chain": thought_chain,
"work_citation_lookup": {},
"web_citation_lookup": self.citations
}
try:
# STEP 3: Use the search results to answer the user's question
resp = await self.client.chat.completions.create(
model=self.chatgpt_deployment,
messages=messages,
temperature=0.6,
n=1,
stream=True
)

# Return the data we know
yield json.dumps({"data_points": {},
"thoughts": f"Searched for:<br>{query_resp}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
"thought_chain": thought_chain,
"work_citation_lookup": {},
"web_citation_lookup": self.citations}) + "\n"

# STEP 4: Format the response
async for chunk in resp:
# Check if there is at least one element and the first element has the key 'delta'
if len(chunk.choices) > 0:
yield json.dumps({"content": chunk.choices[0].delta.content}) + "\n"

except Exception as e:
log.error(f"Error generating chat completion: {str(e)}")
yield json.dumps({"error": f"Error generating chat completion: {str(e)}"}) + "\n"
return


async def web_search_with_safe_search(self, user_query):
Expand Down
Loading

0 comments on commit 9edf72d

Please sign in to comment.