Merge branch 'vNext-Dev' into geearl/7562-ServiceTreeEntryID

cakaramicrosoft · May 14, 2024 · 9edf72d · 9edf72d
2 parents cea0bd5 + 1819d00
commit 9edf72d
Show file tree

Hide file tree

Showing 36 changed files with 3,481 additions and 247 deletions.
diff --git a/.gitignore b/.gitignore
@@ -395,4 +395,9 @@ terraform.tfstate
 terraform.tfstate.d
 .tfplan.txt
 infra/infoasst*
-infra/sp_config/config.json
+infra/sp_config/config.json
+
+#Upgrade & Migrate Support
+scripts/upgrade_repoint.config.json
+azcopy.tar.gz
+azcopy_dir
diff --git a/Makefile b/Makefile
@@ -64,5 +64,26 @@ destroy-inf: check-subscription
 functional-tests: extract-env ## Run functional tests to check the processing pipeline is working
 	@./scripts/functional-tests.sh	
 
-run-migration: ## Migrate from bicep to terraform
-	python ./scripts/merge-databases.py
+merge-databases: ## Upgrade from bicep to terraform
+	@figlet "Upgrading in place"
+	python ./scripts/merge-databases.py
+
+import-state: check-subscription ## import state of current srevcies to TF state
+	@./scripts/inf-import-state.sh
+
+# Command to merge databases and import TF state in prep for an upgrade from 1.0 to 1.n
+prep-upgrade: 
+	@figlet "Upgrading"
+	merge-databases 
+	import-state 
+
+# Apply role assignments as needed to upgrade
+prep-env: 
+	@figlet "Preparing Environment"
+	@./scripts/prep-env.sh
+
+prep-migration-env: ## Prepare the environment for migration by assigning required roles
+	@./scripts/prep-migration-env.sh
+
+run-data-migration: ## Run the data migration moving data from one rg to another
+	python ./scripts/extract-content.py
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -295,20 +295,11 @@ async def chat(request: Request):
             return {"error": "unknown approach"}, 400
 
         if (Approaches(int(approach)) == Approaches.CompareWorkWithWeb or Approaches(int(approach)) == Approaches.CompareWebWithWork):
-            r = await impl.run(json_body.get("history", []), json_body.get("overrides", {}), json_body.get("citation_lookup", {}), json_body.get("thought_chain", {}))
+            r = impl.run(json_body.get("history", []), json_body.get("overrides", {}), json_body.get("citation_lookup", {}), json_body.get("thought_chain", {}))
         else:
-            r = await impl.run(json_body.get("history", []), json_body.get("overrides", {}), {}, json_body.get("thought_chain", {}))
+            r = impl.run(json_body.get("history", []), json_body.get("overrides", {}), {}, json_body.get("thought_chain", {}))
 
-        response = {
-                "data_points": r["data_points"],
-                "answer": r["answer"],
-                "thoughts": r["thoughts"],
-                "thought_chain": r["thought_chain"],
-                "work_citation_lookup": r["work_citation_lookup"],
-                "web_citation_lookup": r["web_citation_lookup"]
-        }
-
-        return response
+        return StreamingResponse(r, media_type="application/x-ndjson")
 
     except Exception as ex:
         log.error(f"Error in chat:: {ex}")

diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -1,11 +1,12 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
+import json
 import re
 import logging
 import urllib.parse
 from datetime import datetime, timedelta
-from typing import Any, Sequence
+from typing import Any, AsyncGenerator, Coroutine, Sequence
 
 import openai
 from openai import AzureOpenAI
@@ -151,6 +152,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
         log.setLevel('DEBUG')
         log.propagate = True
 
+        chat_completion = None
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         top = overrides.get("top") or 3
         user_persona = overrides.get("user_persona", "")
@@ -182,14 +184,19 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
             self.chatgpt_token_limit - len(user_question)
             )
 
+        try:
+            chat_completion= await self.client.chat.completions.create(
+                    model=self.chatgpt_deployment,
+                    messages=messages,
+                    temperature=0.0,
+                    # max_tokens=32, # setting it too low may cause malformed JSON
+                    max_tokens=100,
+                n=1)
 
-        chat_completion= await self.client.chat.completions.create(
-            model=self.chatgpt_deployment,
-            messages=messages,
-            temperature=0.0,
-            # max_tokens=32, # setting it too low may cause malformed JSON
-            max_tokens=100,
-            n=1)
+        except Exception as e:
+            log.error(f"Error generating optimized keyword search: {str(e)}")
+            yield json.dumps({"error": f"Error generating optimized keyword search: {str(e)}"}) + "\n"
+            return
 
         generated_query = chat_completion.choices[0].message.content
 
@@ -208,14 +215,23 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
                 'Content-Type': 'application/json',
             }
 
-        response = requests.post(url, json=data,headers=headers,timeout=60)
-        if response.status_code == 200:
-            response_data = response.json()
-            embedded_query_vector =response_data.get('data')          
-        else:
-            log.error(f"Error generating embedding:: {response.status_code}")
-            raise Exception('Error generating embedding:', response.status_code)
-
+        embedded_query_vector = None
+        try:
+            response = requests.post(url, json=data,headers=headers,timeout=60)
+            if response.status_code == 200:
+                response_data = response.json()
+                embedded_query_vector =response_data.get('data')          
+            else:
+                # Generate an error message if the embedding generation fails
+                log.error(f"Error generating embedding:: {response.status_code}")
+                yield json.dumps({"error": "Error generating embedding"}) + "\n"
+                return # Go no further
+        except Exception as e:
+            # Timeout or other error has occurred
+            log.error(f"Error generating embedding: {str(e)}")
+            yield json.dumps({"error": f"Error generating embedding: {str(e)}"}) + "\n"
+            return # Go no further
+
         #vector set up for pure vector search & Hybrid search & Hybrid semantic
         vector = RawVectorQuery(vector=embedded_query_vector, k=top, fields="contentVector")
 
@@ -339,17 +355,19 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
                 userPersona=user_persona,
                 systemPersona=system_persona,
             )
-        # STEP 3: Generate a contextual and content-specific answer using the search results and chat history.
-        #Added conditional block to use different system messages for different models.
-        if self.model_name.startswith("gpt-35-turbo"):
-            messages = self.get_messages_from_history(
-                system_message,
-                self.model_name,
-                history,
-                history[-1]["user"] + "Sources:\n" + content + "\n\n", # 3.5 has recency Bias that is why this is here
-                self.RESPONSE_PROMPT_FEW_SHOTS,
-                max_tokens=self.chatgpt_token_limit - 500
-            )
+
+        try:
+            # STEP 3: Generate a contextual and content-specific answer using the search results and chat history.
+            #Added conditional block to use different system messages for different models.
+            if self.model_name.startswith("gpt-35-turbo"):
+                messages = self.get_messages_from_history(
+                    system_message,
+                    self.model_name,
+                    history,
+                    history[-1]["user"] + "Sources:\n" + content + "\n\n", # 3.5 has recency Bias that is why this is here
+                    self.RESPONSE_PROMPT_FEW_SHOTS,
+                    max_tokens=self.chatgpt_token_limit - 500
+                )
 
             #Uncomment to debug token usage.
             #print(messages)
@@ -361,66 +379,65 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
             #print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base"))
             #print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base"))
             #print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base"))
-
-            chat_completion= await self.client.chat.completions.create(
-            model=self.chatgpt_deployment,
-            messages=messages,
-            temperature=float(overrides.get("response_temp")) or 0.6,
-            n=1
-        )
+                chat_completion= await self.client.chat.completions.create(
+                    model=self.chatgpt_deployment,
+                    messages=messages,
+                    temperature=float(overrides.get("response_temp")) or 0.6,
+                    n=1,
+                    stream=True
+                )
 
-        elif self.model_name.startswith("gpt-4"):
-            messages = self.get_messages_from_history(
-                system_message,
-                # "Sources:\n" + content + "\n\n" + system_message,
-                self.model_name,
-                history,
-                # history[-1]["user"],
-                history[-1]["user"] + "Sources:\n" + content + "\n\n", # GPT 4 starts to degrade with long system messages. so moving sources here 
-                self.RESPONSE_PROMPT_FEW_SHOTS,
-                max_tokens=self.chatgpt_token_limit
-            )
+            elif self.model_name.startswith("gpt-4"):
+                messages = self.get_messages_from_history(
+                    system_message,
+                    # "Sources:\n" + content + "\n\n" + system_message,
+                    self.model_name,
+                    history,
+                    # history[-1]["user"],
+                    history[-1]["user"] + "Sources:\n" + content + "\n\n", # GPT 4 starts to degrade with long system messages. so moving sources here 
+                    self.RESPONSE_PROMPT_FEW_SHOTS,
+                    max_tokens=self.chatgpt_token_limit
+                )
 
-            #Uncomment to debug token usage.
-            #print(messages)
-            #message_string = ""
-            #for message in messages:
-            #    # enumerate the messages and add the role and content elements of the dictoinary to the message_string
-            #    message_string += f"{message['role']}: {message['content']}\n"
-            #print("Content Tokens: ", self.num_tokens_from_string("Sources:\n" + content + "\n\n", "cl100k_base"))
-            #print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base"))
-            #print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base"))
-            #print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base"))
+                #Uncomment to debug token usage.
+                #print(messages)
+                #message_string = ""
+                #for message in messages:
+                #    # enumerate the messages and add the role and content elements of the dictoinary to the message_string
+                #    message_string += f"{message['role']}: {message['content']}\n"
+                #print("Content Tokens: ", self.num_tokens_from_string("Sources:\n" + content + "\n\n", "cl100k_base"))
+                #print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base"))
+                #print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base"))
+                #print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base"))
 
-
             chat_completion= await self.client.chat.completions.create(
-            model=self.chatgpt_deployment,
-            messages=messages,
-            temperature=float(overrides.get("response_temp")) or 0.6,
-            max_tokens=1024,
-            n=1
-        )
-        # STEP 4: Format the response
-        msg_to_display = '\n\n'.join([str(message) for message in messages])
-        generated_response=chat_completion.choices[0].message.content
-
-        # # Detect the language of the response
-        response_language = self.detect_language(generated_response)
-        #if response is not in user's language, translate it to user's language
-        if response_language != detectedlanguage:
-            translated_response = self.translate_response(generated_response, detectedlanguage)
-        else:
-            translated_response = generated_response
-        thought_chain["work_response"] = urllib.parse.unquote(translated_response)
+                model=self.chatgpt_deployment,
+                messages=messages,
+                temperature=float(overrides.get("response_temp")) or 0.6,
+                n=1,
+                stream=True
+
+            )
+            msg_to_display = '\n\n'.join([str(message) for message in messages])
 
-        return {
-            "data_points": data_points,
-            "answer": f"{urllib.parse.unquote(translated_response)}",
-            "thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
-            "thought_chain": thought_chain,
-            "work_citation_lookup": citation_lookup,
-            "web_citation_lookup": {}
-        }
+
+            # Return the data we know
+            yield json.dumps({"data_points": {},
+                              "thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
+                              "thought_chain": thought_chain,
+                              "work_citation_lookup": citation_lookup,
+                              "web_citation_lookup": {}}) + "\n"
+
+            # STEP 4: Format the response
+            async for chunk in chat_completion:
+                # Check if there is at least one element and the first element has the key 'delta'
+                if len(chunk.choices) > 0:
+                    yield json.dumps({"content": chunk.choices[0].delta.content}) + "\n"
+        except Exception as e:
+            log.error(f"Error generating chat completion: {str(e)}")
+            yield json.dumps({"error": f"Error generating chat completion: {str(e)}"}) + "\n"
+            return
+
 
     def detect_language(self, text: str) -> str:
         """ Function to detect the language of the text"""

diff --git a/app/backend/approaches/chatwebretrieveread.py b/app/backend/approaches/chatwebretrieveread.py
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
+import json
+import logging
 import os
 import re
 from typing import Any, Sequence
@@ -104,7 +106,11 @@ async def run(self, history: Sequence[dict[str, str]],overrides: dict[str, Any],
         Returns:
             Any: The result of the approach.
         """
-
+        log = logging.getLogger("uvicorn")
+        log.setLevel('DEBUG')
+        log.propagate = True
+
+        query_resp = None
         user_query = history[-1].get("user")
         user_persona = overrides.get("user_persona", "")
         system_persona = overrides.get("system_persona", "")
@@ -127,7 +133,13 @@ async def run(self, history: Sequence[dict[str, str]],overrides: dict[str, Any],
             self.chatgpt_token_limit - len(user_query)
             )
 
-        query_resp = await self.make_chat_completion(messages)
+        try:
+            query_resp = await self.make_chat_completion(messages)
+        except Exception as e:
+            log.error(f"Error generating optimized keyword search: {str(e)}")
+            yield json.dumps({"error": f"Error generating optimized keyword search: {str(e)}"}) + "\n"
+            return
+
         thought_chain["web_search_term"] = query_resp
         # STEP 2: Use the search query to get the top web search results
         url_snippet_dict = await self.web_search_with_safe_search(query_resp)
@@ -152,18 +164,35 @@ async def run(self, history: Sequence[dict[str, str]],overrides: dict[str, Any],
             self.RESPONSE_PROMPT_FEW_SHOTS,
              max_tokens=4097 - 500
          )
+
         msg_to_display = '\n\n'.join([str(message) for message in messages])
-        # STEP 3: Use the search results to answer the user's question
-        resp = await self.make_chat_completion(messages)  
-        thought_chain["web_response"] = resp
-        return {
-            "data_points": None,
-            "answer": f"{urllib.parse.unquote(resp)}",
-            "thoughts": f"Searched for:<br>{query_resp}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
-            "thought_chain": thought_chain,
-            "work_citation_lookup": {},
-            "web_citation_lookup": self.citations
-        }
+        try:
+            # STEP 3: Use the search results to answer the user's question
+            resp = await self.client.chat.completions.create(
+                model=self.chatgpt_deployment,
+                messages=messages,
+                temperature=0.6,
+                n=1,
+                stream=True
+            ) 
+
+            # Return the data we know
+            yield json.dumps({"data_points": {},
+                            "thoughts": f"Searched for:<br>{query_resp}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
+                            "thought_chain": thought_chain,
+                            "work_citation_lookup": {},
+                            "web_citation_lookup": self.citations}) + "\n"
+
+            # STEP 4: Format the response
+            async for chunk in resp:
+                # Check if there is at least one element and the first element has the key 'delta'
+                if len(chunk.choices) > 0:
+                    yield json.dumps({"content": chunk.choices[0].delta.content}) + "\n"
+
+        except Exception as e:
+            log.error(f"Error generating chat completion: {str(e)}")
+            yield json.dumps({"error": f"Error generating chat completion: {str(e)}"}) + "\n"
+            return
 
 
     async def web_search_with_safe_search(self, user_query):