Skip to content

Commit

Permalink
Allow embeddings requests to be tokens or strings (sashabaranov#417)
Browse files Browse the repository at this point in the history
* Allow raw tokens to be used as embedding input

* fix linting issues (lines too long)

* add endpoint test for embedding from tokens

* remove redundant comments

* fix comment to match new param name

* change interface to any

* Rename methods and implement convert for base req

* add comments to CreateEmbeddings

* update tests

* shorten line length

* rename parameter
  • Loading branch information
jacksors authored Jul 5, 2023
1 parent 2042608 commit 5c7d882
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 6 deletions.
62 changes: 56 additions & 6 deletions embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,25 @@ type EmbeddingResponse struct {
Usage Usage `json:"usage"`
}

// EmbeddingRequest is the input to a Create embeddings request.
type EmbeddingRequestConverter interface {
// Needs to be of type EmbeddingRequestStrings or EmbeddingRequestTokens
Convert() EmbeddingRequest
}

type EmbeddingRequest struct {
Input any `json:"input"`
Model EmbeddingModel `json:"model"`
User string `json:"user"`
}

func (r EmbeddingRequest) Convert() EmbeddingRequest {
return r
}

// EmbeddingRequestStrings is the input to a create embeddings request with a slice of strings.
type EmbeddingRequestStrings struct {
// Input is a slice of strings for which you want to generate an Embedding vector.
// Each input must not exceed 2048 tokens in length.
// Each input must not exceed 8192 tokens in length.
// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
// have observed inferior results when newlines are present.
// E.g.
Expand All @@ -129,15 +144,50 @@ type EmbeddingRequest struct {
User string `json:"user"`
}

// CreateEmbeddings returns an EmbeddingResponse which will contain an Embedding for every item in |request.Input|.
func (r EmbeddingRequestStrings) Convert() EmbeddingRequest {
return EmbeddingRequest{
Input: r.Input,
Model: r.Model,
User: r.User,
}
}

type EmbeddingRequestTokens struct {
// Input is a slice of slices of ints ([][]int) for which you want to generate an Embedding vector.
// Each input must not exceed 8192 tokens in length.
// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
// have observed inferior results when newlines are present.
// E.g.
// "The food was delicious and the waiter..."
Input [][]int `json:"input"`
// ID of the model to use. You can use the List models API to see all of your available models,
// or see our Model overview for descriptions of them.
Model EmbeddingModel `json:"model"`
// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
User string `json:"user"`
}

func (r EmbeddingRequestTokens) Convert() EmbeddingRequest {
return EmbeddingRequest{
Input: r.Input,
Model: r.Model,
User: r.User,
}
}

// CreateEmbeddings returns an EmbeddingResponse which will contain an Embedding for every item in |body.Input|.
// https://beta.openai.com/docs/api-reference/embeddings/create
func (c *Client) CreateEmbeddings(ctx context.Context, request EmbeddingRequest) (resp EmbeddingResponse, err error) {
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL("/embeddings", request.Model.String()), withBody(request))
//
// Body should be of type EmbeddingRequestStrings for embedding strings or EmbeddingRequestTokens
// for embedding groups of text already converted to tokens.
func (c *Client) CreateEmbeddings(ctx context.Context, conv EmbeddingRequestConverter) (res EmbeddingResponse, err error) { //nolint:lll
baseReq := conv.Convert()
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL("/embeddings", baseReq.Model.String()), withBody(baseReq))
if err != nil {
return
}

err = c.sendRequest(req, &resp)
err = c.sendRequest(req, &res)

return
}
38 changes: 38 additions & 0 deletions embeddings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func TestEmbedding(t *testing.T) {
BabbageCodeSearchText,
}
for _, model := range embeddedModels {
// test embedding request with strings (simple embedding request)
embeddingReq := EmbeddingRequest{
Input: []string{
"The food was delicious and the waiter",
Expand All @@ -46,6 +47,34 @@ func TestEmbedding(t *testing.T) {
if !bytes.Contains(marshaled, []byte(`"model":"`+model.String()+`"`)) {
t.Fatalf("Expected embedding request to contain model field")
}

// test embedding request with strings
embeddingReqStrings := EmbeddingRequestStrings{
Input: []string{
"The food was delicious and the waiter",
"Other examples of embedding request",
},
Model: model,
}
marshaled, err = json.Marshal(embeddingReqStrings)
checks.NoError(t, err, "Could not marshal embedding request")
if !bytes.Contains(marshaled, []byte(`"model":"`+model.String()+`"`)) {
t.Fatalf("Expected embedding request to contain model field")
}

// test embedding request with tokens
embeddingReqTokens := EmbeddingRequestTokens{
Input: [][]int{
{464, 2057, 373, 12625, 290, 262, 46612},
{6395, 6096, 286, 11525, 12083, 2581},
},
Model: model,
}
marshaled, err = json.Marshal(embeddingReqTokens)
checks.NoError(t, err, "Could not marshal embedding request")
if !bytes.Contains(marshaled, []byte(`"model":"`+model.String()+`"`)) {
t.Fatalf("Expected embedding request to contain model field")
}
}
}

Expand Down Expand Up @@ -75,6 +104,15 @@ func TestEmbeddingEndpoint(t *testing.T) {
fmt.Fprintln(w, string(resBytes))
},
)
// test create embeddings with strings (simple embedding request)
_, err := client.CreateEmbeddings(context.Background(), EmbeddingRequest{})
checks.NoError(t, err, "CreateEmbeddings error")

// test create embeddings with strings
_, err = client.CreateEmbeddings(context.Background(), EmbeddingRequestStrings{})
checks.NoError(t, err, "CreateEmbeddings strings error")

// test create embeddings with tokens
_, err = client.CreateEmbeddings(context.Background(), EmbeddingRequestTokens{})
checks.NoError(t, err, "CreateEmbeddings tokens error")
}

0 comments on commit 5c7d882

Please sign in to comment.