replicate · technillogue · Jan 16, 2024 · Jan 16, 2024 · Oct 30, 2023 · May 7, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -4,14 +4,17 @@ on:
   push:
     branches:
       - main
+      - async
     tags:
       - "**"
   pull_request:
     branches:
       - main
+      - async
   merge_group:
     branches:
       - main
+      - async
     types:
       - checks_requested
 jobs:

diff --git a/.tool-versions b/.tool-versions
@@ -1 +1 @@
-golang 1.20
+golang 1.21.0
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -0,0 +1,14 @@
+# Metrics
+
+Prediction objects have a `metrics` field. This normally includes `predict_time` and `total_time`. Official language models have metrics like `input_token_count`, `output_token_count`, `tokens_per_second`, and `time_to_first_token`. Currently, custom metrics from Cog are ignored when running on Replicate. Official Replicate-published models are the only exception to this. When running outside of Replicate, you can emit custom metrics like this:
+
+
+```python
+import cog
+from cog import BasePredictor, Path
+
+class Predictor(BasePredictor):
+    def predict(self, width: int, height: int) -> Path:
+        """Run a single prediction on the model"""
+        cog.emit_metric(name="pixel_count", value=width * height)
+```
diff --git a/pkg/config/config.go b/pkg/config/config.go
@@ -57,16 +57,21 @@ type Build struct {
 	pythonRequirementsContent []string
 }
 
+type Concurrency struct {
+	Max int `json:"max,omitempty" yaml:"max"`
+}
+
 type Example struct {
 	Input  map[string]string `json:"input" yaml:"input"`
 	Output string            `json:"output" yaml:"output"`
 }
 
 type Config struct {
-	Build   *Build `json:"build" yaml:"build"`
-	Image   string `json:"image,omitempty" yaml:"image"`
-	Predict string `json:"predict,omitempty" yaml:"predict"`
-	Train   string `json:"train,omitempty" yaml:"train"`
+	Build       *Build       `json:"build" yaml:"build"`
+	Image       string       `json:"image,omitempty" yaml:"image"`
+	Predict     string       `json:"predict,omitempty" yaml:"predict"`
+	Train       string       `json:"train,omitempty" yaml:"train"`
+	Concurrency *Concurrency `json:"concurrency,omitempty" yaml:"concurrency"`
 }
 
 func DefaultConfig() *Config {

diff --git a/pkg/config/data/config_schema_v1.0.json b/pkg/config/data/config_schema_v1.0.json
@@ -154,11 +154,6 @@
           "$id": "#/properties/concurrency/properties/max",
           "type": "integer",
           "description": "The maximum number of concurrent predictions."
-        },
-        "default_target": {
-          "$id": "#/properties/concurrency/properties/default_target",
-          "type": "integer",
-          "description": "The default target for number of concurrent predictions. This setting can be used by an autoscaler to determine when to scale a deployment of a model up or down."
         }
       }
     }

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,11 +10,13 @@ authors = [{ name = "Replicate", email = "team@replicate.com" }]
 license.file = "LICENSE"
 urls."Source" = "https://github.com/replicate/cog"
 
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 dependencies = [
   # intentionally loose. perhaps these should be vendored to not collide with user code?
   "attrs>=20.1,<24",
   "fastapi>=0.75.2,<0.99.0",
+  # we may not need http2
+  "httpx[http2]>=0.21.0,<1",
   "pydantic>=1.9,<2",
   "PyYAML",
   "requests>=2,<3",
@@ -27,14 +29,15 @@ dependencies = [
 optional-dependencies = { "dev" = [
   "black",
   "build",
-  "httpx",
   'hypothesis<6.80.0; python_version < "3.8"',
   'hypothesis; python_version >= "3.8"',
+  "respx",
   'numpy<1.22.0; python_version < "3.8"',
   'numpy; python_version >= "3.8"',
   "pillow",
   "pyright==1.1.347",
   "pytest",
+  "pytest-asyncio",
   "pytest-httpserver",
   "pytest-rerunfailures",
   "pytest-xdist",

diff --git a/python/cog/__init__.py b/python/cog/__init__.py
@@ -1,7 +1,15 @@
 from pydantic import BaseModel
 
 from .predictor import BasePredictor
-from .types import ConcatenateIterator, File, Input, Path, Secret
+from .server.worker import emit_metric
+from .types import (
+    AsyncConcatenateIterator,
+    ConcatenateIterator,
+    File,
+    Input,
+    Path,
+    Secret,
+)
 
 try:
     from ._version import __version__
@@ -14,8 +22,10 @@
     "BaseModel",
     "BasePredictor",
     "ConcatenateIterator",
+    "AsyncConcatenateIterator",
     "File",
     "Input",
     "Path",
     "Secret",
+    "emit_metric",
 ]
diff --git a/python/cog/command/ast_openapi_schema.py b/python/cog/command/ast_openapi_schema.py
@@ -147,6 +147,24 @@
         "summary": "Healthcheck"
       }
     },
+    "/ready": {
+      "get": {
+        "summary": "Ready",
+        "operationId": "ready_ready_get",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Response Ready Ready Get"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
     "/predictions": {
       "post": {
         "description": "Run a single prediction on the model",
@@ -372,7 +390,7 @@ def get_call_name(call: ast.Call) -> str:
 def parse_args(tree: ast.AST) -> "list[tuple[ast.arg, ast.expr | types.EllipsisType]]":
     """Parse argument, default pairs from a file with a predict function"""
     predict = find(tree, "predict")
-    assert isinstance(predict, ast.FunctionDef)
+    assert isinstance(predict, (ast.FunctionDef, ast.AsyncFunctionDef))
     args = predict.args.args  # [-len(defaults) :]
     # use Ellipsis instead of None here to distinguish a default of None
     defaults = [...] * (len(args) - len(predict.args.defaults)) + predict.args.defaults
@@ -449,7 +467,7 @@ def parse_return_annotation(
     tree: ast.AST, fn: str = "predict"
 ) -> "tuple[JSONDict, JSONDict]":
     predict = find(tree, fn)
-    if not isinstance(predict, ast.FunctionDef):
+    if not isinstance(predict, (ast.FunctionDef, ast.AsyncFunctionDef)):
         raise ValueError("Could not find predict function")
     annotation = predict.returns
     if not annotation:

diff --git a/python/cog/files.py b/python/cog/files.py
diff --git a/python/cog/json.py b/python/cog/json.py
@@ -1,13 +1,10 @@
-import io
 from datetime import datetime
 from enum import Enum
 from types import GeneratorType
-from typing import Any, Callable
+from typing import Any
 
 from pydantic import BaseModel
 
-from .types import Path
-
 
 def make_encodeable(obj: Any) -> Any:
     """
@@ -39,24 +36,3 @@ def make_encodeable(obj: Any) -> Any:
         if isinstance(obj, np.ndarray):
             return obj.tolist()
     return obj
-
-
-def upload_files(obj: Any, upload_file: Callable[[io.IOBase], str]) -> Any:
-    """
-    Iterates through an object from make_encodeable and uploads any files.
-
-    When a file is encountered, it will be passed to upload_file. Any paths will be opened and converted to files.
-    """
-    # skip four isinstance checks for fast text models
-    if type(obj) == str:  # noqa: E721
-        return obj
-    if isinstance(obj, dict):
-        return {key: upload_files(value, upload_file) for key, value in obj.items()}
-    if isinstance(obj, list):
-        return [upload_files(value, upload_file) for value in obj]
-    if isinstance(obj, Path):
-        with obj.open("rb") as f:
-            return upload_file(f)
-    if isinstance(obj, io.IOBase):
-        return upload_file(obj)
-    return obj
diff --git a/python/cog/logging.py b/python/cog/logging.py
@@ -86,4 +86,5 @@ def setup_logging(*, log_level: int = logging.NOTSET) -> None:
 
     # Reconfigure log levels for some overly chatty libraries
     logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+    # FIXME: no more urllib3(?)
     logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)