Skip to content

Version 11.6 diff #66

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ openai.api_key = "sk-..." # supply your API key however you choose
text_string = "sample text"

# choose an embedding
model_id = "davinci-similarity"
model_id = "text-similarity-davinci-001"

# compute the embedding of the text
embedding = openai.Engine(id=model_id).embeddings(input=text_string)['data'][0]['embedding']
embedding = openai.Embedding.create(input=text_string, engine=model_id)['data'][0]['embedding']
```

An example of how to call the embeddings method is shown in the [get embeddings notebook](https://github.com/openai/openai-python/blob/main/examples/embeddings/Get_embeddings.ipynb).
Expand Down
11 changes: 6 additions & 5 deletions examples/embeddings/Get_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"source": [
"import openai\n",
"\n",
"embedding = openai.Engine(id=\"davinci-similarity\").embeddings(input=\"Sample document text goes here\")['data'][0]['embedding']\n",
"embedding = openai.Embedding.create(input=\"Sample document text goes here\", engine=\"text-similarity-davinci-001\")['data'][0]['embedding']\n",
"len(embedding)"
]
},
Expand All @@ -50,14 +50,15 @@
"from tenacity import retry, wait_random_exponential, stop_after_attempt\n",
"\n",
"@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n",
"def get_embedding(text, engine=\"davinci-similarity\"):\n",
"def get_embedding(text, engine=\"text-similarity-davinci-001\"):\n",
"\n",
"\n",
" # replace newlines, which can negatively affect performance.\n",
" text = text.replace(\"\\n\", \" \")\n",
"\n",
" return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding']\n",
" return openai.Embedding.create(input=[text], engine=engine)['data'][0]['embedding']\n",
"\n",
"embedding = get_embedding(\"Sample query text goes here\", engine=\"ada-search-query\")\n",
"embedding = get_embedding(\"Sample query text goes here\", engine=\"text-search-ada-query-001\")\n",
"print(len(embedding))"
]
},
Expand All @@ -75,7 +76,7 @@
}
],
"source": [
"embedding = get_embedding(\"Sample document text goes here\", engine=\"ada-search-document\")\n",
"embedding = get_embedding(\"Sample document text goes here\", engine=\"text-search-ada-doc-001\")\n",
"print(len(embedding))"
]
}
Expand Down
4 changes: 3 additions & 1 deletion openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Answer,
Classification,
Completion,
Embedding,
Engine,
ErrorObject,
File,
Expand All @@ -31,7 +32,7 @@
proxy = None
app_info = None
enable_telemetry = False # Ignored; the telemetry feature was removed.
ca_bundle_path = os.path.join(os.path.dirname(__file__), "data/ca-certificates.crt")
ca_bundle_path = None # No longer used, feature was removed
debug = False
log = None # Set to either 'debug' or 'info', controls console logging

Expand All @@ -40,6 +41,7 @@
"Answer",
"Classification",
"Completion",
"Embedding",
"Engine",
"ErrorObject",
"File",
Expand Down
1 change: 0 additions & 1 deletion openai/api_requestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def _make_session() -> requests.Session:
proxies = _requests_proxies_arg(openai.proxy)
if proxies:
s.proxies = proxies
s.verify = openai.ca_bundle_path
s.mount(
"https://",
requests.adapters.HTTPAdapter(max_retries=MAX_CONNECTION_RETRIES),
Expand Down
1 change: 1 addition & 0 deletions openai/api_resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from openai.api_resources.answer import Answer # noqa: F401
from openai.api_resources.classification import Classification # noqa: F401
from openai.api_resources.completion import Completion # noqa: F401
from openai.api_resources.embedding import Embedding # noqa: F401
from openai.api_resources.engine import Engine # noqa: F401
from openai.api_resources.error_object import ErrorObject # noqa: F401
from openai.api_resources.file import File # noqa: F401
Expand Down
58 changes: 58 additions & 0 deletions openai/api_resources/embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import base64
import time

import numpy as np

from openai import util
from openai.api_resources.abstract import DeletableAPIResource, ListableAPIResource
from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
from openai.error import InvalidRequestError, TryAgain


class Embedding(EngineAPIResource, ListableAPIResource, DeletableAPIResource):
engine_required = True
OBJECT_NAME = "embedding"

@classmethod
def create(cls, *args, **kwargs):
"""
Creates a new embedding for the provided input and parameters.

See https://beta.openai.com/docs/api-reference/embeddings for a list
of valid parameters.
"""
start = time.time()
timeout = kwargs.pop("timeout", None)
if kwargs.get("model", None) is None and kwargs.get("engine", None) is None:
raise InvalidRequestError(
"Must provide an 'engine' or 'model' parameter to create an Embedding.",
param="engine",
)

user_provided_encoding_format = kwargs.get("encoding_format", None)

# If encoding format was not explicitly specified, we opaquely use base64 for performance
if not user_provided_encoding_format:
kwargs["encoding_format"] = "base64"

while True:
try:
response = super().create(*args, **kwargs)

# If a user specifies base64, we'll just return the encoded string.
# This is only for the default case.
if not user_provided_encoding_format:
for data in response.data:

# If an engine isn't using this optimization, don't do anything
if type(data["embedding"]) == str:
data["embedding"] = np.frombuffer(
base64.b64decode(data["embedding"]), dtype="float32"
).tolist()

return response
except TryAgain as e:
if timeout is not None and time.time() > start + timeout:
raise

util.log_info("Waiting for model to warm up", error=e)
4 changes: 4 additions & 0 deletions openai/api_resources/engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import warnings

from openai import util
from openai.api_resources.abstract import ListableAPIResource, UpdateableAPIResource
Expand Down Expand Up @@ -29,4 +30,7 @@ def search(self, **params):
return self.request("post", self.instance_url() + "/search", params)

def embeddings(self, **params):
warnings.warn(
"Engine.embeddings is deprecated, use Embedding.create", DeprecationWarning
)
return self.request("post", self.instance_url() + "/embeddings", params)
Loading