replicate · bfirsh · Mar 26, 2023 · Mar 23, 2023
diff --git a/README.md b/README.md
@@ -25,109 +25,125 @@ We recommend not adding the token directly to your source code, because you don'
 Create a new Python file and add the following code:
 
 ```python
-import replicate
-model = replicate.models.get("stability-ai/stable-diffusion")
-version = model.versions.get("27b93a2413e7f36cd83da926f3656280b2931564ff050bf9575f1fdf9bcd7478")
-version.predict(prompt="a 19th century portrait of a wombat gentleman")
+>>> import replicate
+>>> replicate.run(
+        "stability-ai/stable-diffusion:27b93a2413e7f36cd83da926f3656280b2931564ff050bf9575f1fdf9bcd7478",
+        input={"prompt": "a 19th century portrait of a wombat gentleman"}
+    )
 
-# ['https://replicate.com/api/models/stability-ai/stable-diffusion/files/50fcac81-865d-499e-81ac-49de0cb79264/out-0.png']
+['https://replicate.com/api/models/stability-ai/stable-diffusion/files/50fcac81-865d-499e-81ac-49de0cb79264/out-0.png']
 ```
 
 Some models, like [methexis-inc/img2prompt](https://replicate.com/methexis-inc/img2prompt), receive images as inputs. To pass a file as an input, use a file handle or URL:
 
 ```python
-model = replicate.models.get("methexis-inc/img2prompt")
-version = model.versions.get("50adaf2d3ad20a6f911a8a9e3ccf777b263b8596fbd2c8fc26e8888f8a0edbb5")
-inputs = {
-    "image": open("path/to/mystery.jpg", "rb"),
-}
-output = version.predict(**inputs)
-
-# [['n02123597', 'Siamese_cat', 0.8829364776611328],
-#  ['n02123394', 'Persian_cat', 0.09810526669025421],
-#  ['n02123045', 'tabby', 0.005758069921284914]]
+>>> output = replicate.run(
+        "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
+        input={"image": open("path/to/mystery.jpg", "rb")},
+    )
+
+"an astronaut riding a horse"
 ```
 
-## Compose models into a pipeline
+## Run a model in the background
 
-You can run a model and feed the output into another model:
+You can start a model and run it in the background:
 
 ```python
-laionide = replicate.models.get("afiaka87/laionide-v4").versions.get("b21cbe271e65c1718f2999b038c18b45e21e4fba961181fbfae9342fc53b9e05")
-swinir = replicate.models.get("jingyunliang/swinir").versions.get("660d922d33153019e8c263a3bba265de882e7f4f70396546b6c9c8f9d47a021a")
-image = laionide.predict(prompt="avocado armchair")
-upscaled_image = swinir.predict(image=image)
-```
+>>> model = replicate.models.get("kvfrans/clipdraw")
+>>> version = model.versions.get("5797a99edc939ea0e9242d5e8c9cb3bc7d125b1eac21bda852e5cb79ede2cd9b")
+>>> prediction = replicate.predictions.create(
+    version=version,
+    input={"prompt":"Watercolor painting of an underwater submarine"})
 
-## Get output from a running model
+>>> prediction
+Prediction(...)
 
-Run a model and get its output while it's running:
+>>> prediction.status
+'starting'
 
-```python
-model = replicate.models.get("pixray/text2image")
-version = model.versions.get("5c347a4bfa1d4523a58ae614c2194e15f2ae682b57e3797a5bb468920aa70ebf")
-for image in version.predict(prompts="san francisco sunset"):
-    display(image)
+>>> dict(prediction)
+{"id": "...", "status": "starting", ...}
+
+>>> prediction.reload()
+>>> prediction.status
+'processing'
+
+>>> print(prediction.logs)
+iteration: 0, render:loss: -0.6171875
+iteration: 10, render:loss: -0.92236328125
+iteration: 20, render:loss: -1.197265625
+iteration: 30, render:loss: -1.3994140625
+
+>>> prediction.wait()
+
+>>> prediction.status
+'succeeded'
+
+>>> prediction.output
+'https://.../output.png'
 ```
 
-## Run a model in the background
+## Run a model in the background and get a webhook
 
-You can start a model and run it in the background:
+You can run a model and get a webhook when it completes, instead of waiting for it to finish:
 
 ```python
 model = replicate.models.get("kvfrans/clipdraw")
 version = model.versions.get("5797a99edc939ea0e9242d5e8c9cb3bc7d125b1eac21bda852e5cb79ede2cd9b")
 prediction = replicate.predictions.create(
     version=version,
-    input={"prompt":"Watercolor painting of an underwater submarine"})
-
-# >>> prediction
-# Prediction(...)
+    input={"prompt":"Watercolor painting of an underwater submarine"},
+    webhook="https://example.com/your-webhook",
+    webhook_events_filter=["completed"]
+)
+```
 
-# >>> prediction.status
-# 'starting'
+## Compose models into a pipeline
 
-# >>> dict(prediction)
-# {"id": "...", "status": "starting", ...}
+You can run a model and feed the output into another model:
 
-# >>> prediction.reload()
-# >>> prediction.status
-# 'processing'
+```python
+laionide = replicate.models.get("afiaka87/laionide-v4").versions.get("b21cbe271e65c1718f2999b038c18b45e21e4fba961181fbfae9342fc53b9e05")
+swinir = replicate.models.get("jingyunliang/swinir").versions.get("660d922d33153019e8c263a3bba265de882e7f4f70396546b6c9c8f9d47a021a")
+image = laionide.predict(prompt="avocado armchair")
+upscaled_image = swinir.predict(image=image)
+```
 
-# >>> print(prediction.logs)
-# iteration: 0, render:loss: -0.6171875
-# iteration: 10, render:loss: -0.92236328125
-# iteration: 20, render:loss: -1.197265625
-# iteration: 30, render:loss: -1.3994140625
+## Get output from a running model
 
-# >>> prediction.wait()
+Run a model and get its output while it's running:
 
-# >>> prediction.status
-# 'succeeded'
+```python
+iterator = replicate.run(
+    "pixray/text2image:5c347a4bfa1d4523a58ae614c2194e15f2ae682b57e3797a5bb468920aa70ebf",
+    input={"prompts": "san francisco sunset"}
+)
 
-# >>> prediction.output
-# 'https://.../output.png'
+for image in iterator:
+    display(image)
 ```
 
 ## Cancel a prediction
 
 You can cancel a running prediction:
 
 ```python
-model = replicate.models.get("kvfrans/clipdraw")
-version = model.versions.get("5797a99edc939ea0e9242d5e8c9cb3bc7d125b1eac21bda852e5cb79ede2cd9b")
-prediction = replicate.predictions.create(
-    version=version,
-    input={"prompt":"Watercolor painting of an underwater submarine"})
+>>> model = replicate.models.get("kvfrans/clipdraw")
+>>> version = model.versions.get("5797a99edc939ea0e9242d5e8c9cb3bc7d125b1eac21bda852e5cb79ede2cd9b")
+>>> prediction = replicate.predictions.create(
+        version=version,
+        input={"prompt":"Watercolor painting of an underwater submarine"}
+    )
 
-# >>> prediction.status
-# 'starting'
+>>> prediction.status
+'starting'
 
-# >>> prediction.cancel()
+>>> prediction.cancel()
 
-# >>> prediction.reload()
-# >>> prediction.status
-# 'canceled'
+>>> prediction.reload()
+>>> prediction.status
+'canceled'
 ```
 
 ## List predictions

diff --git a/replicate/__init__.py b/replicate/__init__.py
@@ -2,5 +2,6 @@
 from .client import Client
 
 default_client = Client()
+run = default_client.run
 models = default_client.models
 predictions = default_client.predictions
diff --git a/replicate/client.py b/replicate/client.py
@@ -1,11 +1,13 @@
 import os
+import re
 from json import JSONDecodeError
+from typing import Any, Iterator, Union
 
 import requests
 from requests.adapters import HTTPAdapter, Retry
 
 from replicate.__about__ import __version__
-from replicate.exceptions import ReplicateError
+from replicate.exceptions import ModelError, ReplicateError
 from replicate.model import ModelCollection
 from replicate.prediction import PredictionCollection
 
@@ -35,7 +37,20 @@ def __init__(self, api_token=None) -> None:
             # TODO: Only retry on GET so we don't unintionally mutute data
             method_whitelist=["GET", "POST", "PUT"],
             # https://support.cloudflare.com/hc/en-us/articles/115003011431-Troubleshooting-Cloudflare-5XX-errors
-            status_forcelist=[429, 500, 502, 503, 504, 520, 521, 522, 523, 524, 526, 527],
+            status_forcelist=[
+                429,
+                500,
+                502,
+                503,
+                504,
+                520,
+                521,
+                522,
+                523,
+                524,
+                526,
+                527,
+            ],
         )
 
         self.session.mount("http://", HTTPAdapter(max_retries=retries))
@@ -84,3 +99,30 @@ def models(self) -> ModelCollection:
     @property
     def predictions(self) -> PredictionCollection:
         return PredictionCollection(client=self)
+
+    def run(self, model_version, **kwargs) -> Union[Any, Iterator[Any]]:
+        """
+        Run a model in the format owner/name:version.
+        """
+        # Split model_version into owner, name, version in format owner/name:version
+        m = re.match(r"^(?P<model>[^/]+/[^:]+):(?P<version>.+)$", model_version)
+        if not m:
+            raise ReplicateError(
+                f"Invalid model_version: {model_version}. Expected format: owner/name:version"
+            )
+        model = self.models.get(m.group("model"))
+        version = model.versions.get(m.group("version"))
+        prediction = self.predictions.create(version=version, **kwargs)
+        # Return an iterator of the output
+        schema = version.get_transformed_schema()
+        output = schema["components"]["schemas"]["Output"]
+        if (
+            output.get("type") == "array"
+            and output.get("x-cog-array-type") == "iterator"
+        ):
+            return prediction.output_iterator()
+
+        prediction.wait()
+        if prediction.status == "failed":
+            raise ModelError(prediction.error)
+        return prediction.output
diff --git a/replicate/version.py b/replicate/version.py
@@ -1,4 +1,5 @@
 import datetime
+import warnings
 from typing import Any, Iterator, List, Union
 
 from replicate.base_model import BaseModel
@@ -14,10 +15,13 @@ class Version(BaseModel):
     openapi_schema: Any
 
     def predict(self, **kwargs) -> Union[Any, Iterator[Any]]:
-        # TODO: support args
+        warnings.warn(
+            "version.predict() is deprecated. Use replicate.run() instead. It will be removed before version 1.0.",
+            DeprecationWarning,
+        )
+
         prediction = self._client.predictions.create(version=self, input=kwargs)
         # Return an iterator of the output
-        # FIXME: might just be a list, not an iterator. I wonder if we should differentiate?
         schema = self.get_transformed_schema()
         output = schema["components"]["schemas"]["Output"]
         if (