Merge branch 'main' into feat/add-azure-content-filter

manhhungdt06 · May 11, 2024 · bbe1300 · bbe1300
2 parents 1639a51 + 3ee6135
commit bbe1300
Show file tree

Hide file tree

Showing 200 changed files with 19,095 additions and 2,843 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1,4 +1,4 @@
-version: 2.1
+version: 4.3.4
 jobs:
   local_testing:
     docker:
@@ -188,7 +188,7 @@ jobs:
           command: |
             docker run -d \
               -p 4000:4000 \
-              -e DATABASE_URL=$PROXY_DOCKER_DB_URL \
+              -e DATABASE_URL=$PROXY_DATABASE_URL \
               -e AZURE_API_KEY=$AZURE_API_KEY \
               -e REDIS_HOST=$REDIS_HOST \
               -e REDIS_PASSWORD=$REDIS_PASSWORD \
@@ -198,6 +198,7 @@ jobs:
               -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
               -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
               -e AWS_REGION_NAME=$AWS_REGION_NAME \
+              -e AUTO_INFER_REGION=True \
               -e OPENAI_API_KEY=$OPENAI_API_KEY \
               -e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \
               -e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \
@@ -208,9 +209,7 @@ jobs:
               my-app:latest \
               --config /app/config.yaml \
               --port 4000 \
-              --num_workers 8 \
               --detailed_debug \
-              --run_gunicorn \
       - run:
           name: Install curl and dockerize
           command: |
@@ -225,7 +224,7 @@ jobs:
           background: true
       - run: 
           name: Wait for app to be ready
-          command: dockerize -wait http://localhost:4000 -timeout 1m
+          command: dockerize -wait http://localhost:4000 -timeout 5m
       - run:
           name: Run tests
           command: |

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,51 @@
+{
+	"name": "Python 3.11",
+	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	"image": "mcr.microsoft.com/devcontainers/python:3.11-bookworm",
+	// https://github.com/devcontainers/images/tree/main/src/python
+	// https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
+
+	// "build": {
+	// 	"dockerfile": "Dockerfile",
+	// 	"context": ".."
+	// },
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Configure tool-specific properties.
+	"customizations": {
+		// Configure properties specific to VS Code.
+		"vscode": {
+			"settings": {},
+			"extensions": [
+				"ms-python.python",
+				"ms-python.vscode-pylance",
+				"GitHub.copilot",
+				"GitHub.copilot-chat"
+			]
+		}
+	},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	"forwardPorts": [4000],
+
+	"containerEnv": {
+		"LITELLM_LOG": "DEBUG"
+	},
+
+	// Use 'portsAttributes' to set default properties for specific forwarded ports. 
+	// More info: https://containers.dev/implementors/json_reference/#port-attributes
+	"portsAttributes": {
+		"4000": {
+			"label": "LiteLLM Server",
+			"onAutoForward": "notify"
+		}
+	},
+
+	// More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "litellm",
+
+	// Use 'postCreateCommand' to run commands after the container is created.
+	"postCreateCommand": "pipx install poetry && poetry install -E extra_proxy -E proxy"
+}
diff --git a/.github/workflows/interpret_load_test.py b/.github/workflows/interpret_load_test.py
@@ -64,6 +64,11 @@ def interpret_results(csv_file):
     )  # Replace with your repository's username and name
     latest_release = repo.get_latest_release()
     print("got latest release: ", latest_release)
+    print(latest_release.title)
+    print(latest_release.tag_name)
+
+    release_version = latest_release.title
+
     print("latest release body: ", latest_release.body)
     print("markdown table: ", markdown_table)
 
@@ -74,8 +79,22 @@ def interpret_results(csv_file):
         start_index = latest_release.body.find("Load Test LiteLLM Proxy Results")
         existing_release_body = latest_release.body[:start_index]
 
+    docker_run_command = f"""
+\n\n
+## Docker Run LiteLLM Proxy
+
+```
+docker run \\
+-e STORE_MODEL_IN_DB=True \\
+-p 4000:4000 \\
+ghcr.io/berriai/litellm:main-{release_version}
+```
+    """
+    print("docker run command: ", docker_run_command)
+
     new_release_body = (
         existing_release_body
+        + docker_run_command
         + "\n\n"
         + "### Don't want to maintain your internal proxy? get in touch 🎉"
         + "\nHosted Proxy Alpha: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"

diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 .venv
 .env
+litellm/proxy/myenv/*
 litellm_uuid.txt
 __pycache__/
 *.pyc
@@ -52,3 +53,6 @@ litellm/proxy/_new_secret_config.yaml
 litellm/proxy/_new_secret_config.yaml
 litellm/proxy/_super_secret_config.yaml
 litellm/proxy/_super_secret_config.yaml
+litellm/proxy/myenv/bin/activate
+litellm/proxy/myenv/bin/Activate.ps1
+myenv/*
diff --git a/README.md b/README.md
@@ -226,6 +226,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
 | [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra)                       | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
 | [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity)                  | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
 | [Groq AI](https://docs.litellm.ai/docs/providers/groq)                              | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
+| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek)                         | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
 | [anyscale](https://docs.litellm.ai/docs/providers/anyscale)                         | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
 | [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx)                  | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅ 
 | [voyage ai](https://docs.litellm.ai/docs/providers/voyage)                          |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                            |

diff --git a/deploy/azure_resource_manager/azure_marketplace.zip b/deploy/azure_resource_manager/azure_marketplace.zip
diff --git a/deploy/azure_resource_manager/azure_marketplace/createUiDefinition.json b/deploy/azure_resource_manager/azure_marketplace/createUiDefinition.json
@@ -0,0 +1,15 @@
+{
+    "$schema": "https://schema.management.azure.com/schemas/0.1.2-preview/CreateUIDefinition.MultiVm.json#",
+    "handler": "Microsoft.Azure.CreateUIDef",
+    "version": "0.1.2-preview",
+    "parameters": {
+        "config": {
+            "isWizard": false,
+            "basics": { }
+        },
+        "basics": [ ],
+        "steps": [ ],
+        "outputs": { },
+        "resourceTypes": [ ]
+    }
+}
diff --git a/deploy/azure_resource_manager/azure_marketplace/mainTemplate.json b/deploy/azure_resource_manager/azure_marketplace/mainTemplate.json
@@ -0,0 +1,63 @@
+{
+    "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
+    "contentVersion": "1.0.0.0",
+    "parameters": {
+      "imageName": {
+        "type": "string",
+        "defaultValue": "ghcr.io/berriai/litellm:main-latest"
+      },
+      "containerName": {
+        "type": "string",
+        "defaultValue": "litellm-container"
+      },
+      "dnsLabelName": {
+        "type": "string",
+        "defaultValue": "litellm"
+      },
+      "portNumber": {
+        "type": "int",
+        "defaultValue": 4000
+      }
+    },
+    "resources": [
+      {
+        "type": "Microsoft.ContainerInstance/containerGroups",
+        "apiVersion": "2021-03-01",
+        "name": "[parameters('containerName')]",
+        "location": "[resourceGroup().location]",
+        "properties": {
+          "containers": [
+            {
+              "name": "[parameters('containerName')]",
+              "properties": {
+                "image": "[parameters('imageName')]",
+                "resources": {
+                  "requests": {
+                    "cpu": 1,
+                    "memoryInGB": 2
+                  }
+                },
+                "ports": [
+                  {
+                    "port": "[parameters('portNumber')]"
+                  }
+                ]
+              }
+            }
+          ],
+          "osType": "Linux",
+          "restartPolicy": "Always",
+          "ipAddress": {
+            "type": "Public",
+            "ports": [
+              {
+                "protocol": "tcp",
+                "port": "[parameters('portNumber')]"
+              }
+            ],
+            "dnsNameLabel": "[parameters('dnsLabelName')]"
+          }
+        }
+      }
+    ]
+  }
diff --git a/deploy/azure_resource_manager/main.bicep b/deploy/azure_resource_manager/main.bicep
@@ -0,0 +1,42 @@
+param imageName string = 'ghcr.io/berriai/litellm:main-latest'
+param containerName string = 'litellm-container'
+param dnsLabelName string = 'litellm'
+param portNumber int = 4000
+
+resource containerGroupName 'Microsoft.ContainerInstance/containerGroups@2021-03-01' = {
+  name: containerName
+  location: resourceGroup().location
+  properties: {
+    containers: [
+      {
+        name: containerName
+        properties: {
+          image: imageName
+          resources: {
+            requests: {
+              cpu: 1
+              memoryInGB: 2
+            }
+          }
+          ports: [
+            {
+              port: portNumber
+            }
+          ]
+        }
+      }
+    ]
+    osType: 'Linux'
+    restartPolicy: 'Always'
+    ipAddress: {
+      type: 'Public'
+      ports: [
+        {
+          protocol: 'tcp'
+          port: portNumber
+        }
+      ]
+      dnsNameLabel: dnsLabelName
+    }
+  }
+}
diff --git a/deploy/charts/litellm-helm/Chart.yaml b/deploy/charts/litellm-helm/Chart.yaml
@@ -24,7 +24,7 @@ version: 0.2.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: v1.24.5
+appVersion: v1.35.38
 
 dependencies:
   - name: "postgresql"

diff --git a/docs/my-website/docs/completion/input.md b/docs/my-website/docs/completion/input.md
@@ -83,8 +83,9 @@ def completion(
     top_p: Optional[float] = None,
     n: Optional[int] = None,
     stream: Optional[bool] = None,
+    stream_options: Optional[dict] = None,
     stop=None,
-    max_tokens: Optional[float] = None,
+    max_tokens: Optional[int] = None,
     presence_penalty: Optional[float] = None,
     frequency_penalty: Optional[float] = None,
     logit_bias: Optional[dict] = None,
@@ -139,6 +140,10 @@ def completion(
 
 - `stream`: *boolean or null (optional)* - If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.
 
+- `stream_options` *dict or null (optional)* - Options for streaming response. Only set this when you set `stream: true`
+
+    - `include_usage` *boolean (optional)* - If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. 
+
 - `stop`: *string/ array/ null (optional)* - Up to 4 sequences where the API will stop generating further tokens.
 
 - `max_tokens`: *integer (optional)* - The maximum number of tokens to generate in the chat completion.