Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 17 additions & 35 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,50 +25,30 @@
<thead><tr><th>sub-module</th><th>doc</th><th>about</th></tr></thead>
<tbody>
<!-- Module bedrock -->
<tr><td><a href="./bedrock/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=bedrock/*"/>
<tr><td><a href="./llm/bedrock/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=llm/bedrock/*"/>
</a></td>
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/bedrock">
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/llm/bedrock">
<img src="https://img.shields.io/badge/doc-bedrock-007d9c?logo=go&logoColor=white&style=flat-square" />
</a></td>
<td>
AWS Bedrock embeddings models
</td></tr>
<!-- Module cache -->
<tr><td><a href="./cache/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=cache/*"/>
</a></td>
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/cache">
<img src="https://img.shields.io/badge/doc-cache-007d9c?logo=go&logoColor=white&style=flat-square" />
</a></td>
<td>
Caching vector embeddings
</td></tr>
<!-- Module openai -->
<tr><td><a href="./openai/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=openai/*"/>
<tr><td><a href="./llm/openai/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=llm/openai/*"/>
</a></td>
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/openai">
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/llm/openai">
<img src="https://img.shields.io/badge/doc-openai-007d9c?logo=go&logoColor=white&style=flat-square" />
</a></td>
<td>
OpenAI embeddings models
</td></tr>
<!-- Module scanner -->
<tr><td><a href="./scanner/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=scanner/*"/>
</a></td>
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/scanner">
<img src="https://img.shields.io/badge/doc-scanner-007d9c?logo=go&logoColor=white&style=flat-square" />
</a></td>
<td>
Semantic chunking utility
</td></tr>
<!-- Module word2vec -->
<tr><td><a href="./scanner/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=word2vec/*"/>
<tr><td><a href="./llm/word2vec/">
<img src="https://img.shields.io/github/v/tag/kshard/embeddings?label=version&filter=llm/word2vec/*"/>
</a></td>
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/scanner">
<td><a href="https://pkg.go.dev/github.com/kshard/embeddings/llm/word2vec">
<img src="https://img.shields.io/badge/doc-word2vec-007d9c?logo=go&logoColor=white&style=flat-square" />
</a></td>
<td>
Expand All @@ -84,12 +64,17 @@
The library implements generic trait to transform text into vector embeddings.

```go
type Embeddings interface {
Embedding(ctx context.Context, text string) ([]float32, error)
type Embedder interface {
Embedding(ctx context.Context, text string) (Embedding, error)
}
```

The library is structured from submodules, each implements the defined interface towards vendor.
The library defines common embedding I/O utlities throught this generic trait:
* Caching of embeddings
* Embeddings I/O Rate Limiter
* Semantic Chunking (Sanning)

The library also defines adapter for common text Embeddings api, each define as own submodule:
* [AWS BedRock embeddings](https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html)
* [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings)
* [word2vec model](https://github.com/fogfish/word2vec)
Expand All @@ -106,9 +91,6 @@ text, err := embeddings.New(/* config options */)

// Calculate embeddings
vector, err := text.Embedding(context.Background(), "text embeddings")

// Checks number of tokens consumed by active sessions
text.ConsumedTokens()
```

## How To Contribute
Expand Down
58 changes: 58 additions & 0 deletions examples/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
module github.com/kshard/embeddings/examples

go 1.23.0

replace github.com/kshard/embeddings/llm/bedrock => ../llm/bedrock

replace github.com/kshard/embeddings/llm/openai => ../llm/openai

// replace github.com/kshard/embeddings/llm/word2vec => ../llm/word2vec
// replace github.com/fogfish/word2vec => ./word2vec

require (
github.com/kshard/embeddings v0.2.0
github.com/kshard/embeddings/llm/bedrock v0.0.0
github.com/kshard/embeddings/llm/openai v0.0.0
// github.com/kshard/embeddings/llm/word2vec v0.0.0
)

require (
github.com/Masterminds/semver/v3 v3.2.1 // indirect
github.com/ajg/form v1.5.2-0.20200323032839-9aeb3cf462e1 // indirect
github.com/aws/aws-cdk-go/awscdk/v2 v2.150.0 // indirect
github.com/aws/aws-sdk-go-v2 v1.25.2 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.1 // indirect
github.com/aws/aws-sdk-go-v2/config v1.27.4 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.17.4 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.2 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.2 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.2 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect
github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.7.1 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.2 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.20.1 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.1 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.28.1 // indirect
github.com/aws/constructs-go/constructs/v10 v10.3.0 // indirect
github.com/aws/jsii-runtime-go v1.101.0 // indirect
github.com/aws/smithy-go v1.20.1 // indirect
github.com/cdklabs/awscdk-asset-awscli-go/awscliv1/v2 v2.2.202 // indirect
github.com/cdklabs/awscdk-asset-kubectl-go/kubectlv20/v2 v2.1.2 // indirect
github.com/cdklabs/awscdk-asset-node-proxy-agent-go/nodeproxyagentv6/v2 v2.0.3 // indirect
github.com/fatih/color v1.17.0 // indirect
github.com/fogfish/golem/hseq v1.3.0 // indirect
github.com/fogfish/golem/optics v0.14.0 // indirect
github.com/fogfish/gurl/v2 v2.10.0 // indirect
github.com/fogfish/opts v0.0.5 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/jdxcode/netrc v1.0.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/yuin/goldmark v1.4.13 // indirect
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
golang.org/x/mod v0.18.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/tools v0.22.0 // indirect
)
111 changes: 111 additions & 0 deletions examples/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/ajg/form v1.5.2-0.20200323032839-9aeb3cf462e1 h1:8Qzi+0Uch1VJvdrOhJ8U8FqoPLbUdETPgMqGJ6DSMSQ=
github.com/ajg/form v1.5.2-0.20200323032839-9aeb3cf462e1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY=
github.com/aws/aws-cdk-go/awscdk/v2 v2.150.0 h1:5hg6OOh771WR7qRUpfZdDlzbqSraC31n6jgAoDwSdCE=
github.com/aws/aws-cdk-go/awscdk/v2 v2.150.0/go.mod h1:lpJq6B2AsZbjSvlJbLmCwjKwuT7voQc3xmFjEbJOTdA=
github.com/aws/aws-sdk-go-v2 v1.25.2 h1:/uiG1avJRgLGiQM9X3qJM8+Qa6KRGK5rRPuXE0HUM+w=
github.com/aws/aws-sdk-go-v2 v1.25.2/go.mod h1:Evoc5AsmtveRt1komDwIsjHFyrP5tDuF1D1U+6z6pNo=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.1 h1:gTK2uhtAPtFcdRRJilZPx8uJLL2J85xK11nKtWL0wfU=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.1/go.mod h1:sxpLb+nZk7tIfCWChfd+h4QwHNUR57d8hA1cleTkjJo=
github.com/aws/aws-sdk-go-v2/config v1.27.4 h1:AhfWb5ZwimdsYTgP7Od8E9L1u4sKmDW2ZVeLcf2O42M=
github.com/aws/aws-sdk-go-v2/config v1.27.4/go.mod h1:zq2FFXK3A416kiukwpsd+rD4ny6JC7QSkp4QdN1Mp2g=
github.com/aws/aws-sdk-go-v2/credentials v1.17.4 h1:h5Vztbd8qLppiPwX+y0Q6WiwMZgpd9keKe2EAENgAuI=
github.com/aws/aws-sdk-go-v2/credentials v1.17.4/go.mod h1:+30tpwrkOgvkJL1rUZuRLoxcJwtI/OkeBLYnHxJtVe0=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.2 h1:AK0J8iYBFeUk2Ax7O8YpLtFsfhdOByh2QIkHmigpRYk=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.2/go.mod h1:iRlGzMix0SExQEviAyptRWRGdYNo3+ufW/lCzvKVTUc=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.2 h1:bNo4LagzUKbjdxE0tIcR9pMzLR2U/Tgie1Hq1HQ3iH8=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.2/go.mod h1:wRQv0nN6v9wDXuWThpovGQjqF1HFdcgWjporw14lS8k=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.2 h1:EtOU5jsPdIQNP+6Q2C5e3d65NKT1PeCiQk+9OdzO12Q=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.2/go.mod h1:tyF5sKccmDz0Bv4NrstEr+/9YkSPJHrcO7UsUKf7pWM=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY=
github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.7.1 h1:3QbuXUFmX7uLRWsA4wbj1G2jNTgvK2MdCfzbO0VkeSE=
github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.7.1/go.mod h1:0S4p4IdEhakLLKoVwmI3vIoOtIt17TFo4QUFuez9O0Y=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1 h1:EyBZibRTVAs6ECHZOw5/wlylS9OcTzwyjeQMudmREjE=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1/go.mod h1:JKpmtYhhPs7D97NL/ltqz7yCkERFW5dOlHyVl66ZYF8=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.2 h1:5ffmXjPtwRExp1zc7gENLgCPyHFbhEPwVTkTiH9niSk=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.2/go.mod h1:Ru7vg1iQ7cR4i7SZ/JTLYN9kaXtbL69UdgG0OQWQxW0=
github.com/aws/aws-sdk-go-v2/service/sso v1.20.1 h1:utEGkfdQ4L6YW/ietH7111ZYglLJvS+sLriHJ1NBJEQ=
github.com/aws/aws-sdk-go-v2/service/sso v1.20.1/go.mod h1:RsYqzYr2F2oPDdpy+PdhephuZxTfjHQe7SOBcZGoAU8=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.1 h1:9/GylMS45hGGFCcMrUZDVayQE1jYSIN6da9jo7RAYIw=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.1/go.mod h1:YjAPFn4kGFqKC54VsHs5fn5B6d+PCY2tziEa3U/GB5Y=
github.com/aws/aws-sdk-go-v2/service/sts v1.28.1 h1:3I2cBEYgKhrWlwyZgfpSO2BpaMY1LHPqXYk/QGlu2ew=
github.com/aws/aws-sdk-go-v2/service/sts v1.28.1/go.mod h1:uQ7YYKZt3adCRrdCBREm1CD3efFLOUNH77MrUCvx5oA=
github.com/aws/constructs-go/constructs/v10 v10.3.0 h1:LsjBIMiaDX/vqrXWhzTquBJ9pPdi02/H+z1DCwg0PEM=
github.com/aws/constructs-go/constructs/v10 v10.3.0/go.mod h1:GgzwIwoRJ2UYsr3SU+JhAl+gq5j39bEMYf8ev3J+s9s=
github.com/aws/jsii-runtime-go v1.101.0 h1:x4rWNWRz7uDhVN0qSO7T6cG0VAhQ9300s5DjWUrXmWY=
github.com/aws/jsii-runtime-go v1.101.0/go.mod h1:4L4Qmve/HSwM5hXV5ZowR2gBNb9zqkUtycaaN6aZ3mg=
github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E=
github.com/cdklabs/awscdk-asset-awscli-go/awscliv1/v2 v2.2.202 h1:VixXB9DnHN8oP7pXipq8GVFPjWCOdeNxIaS/ZyUwTkI=
github.com/cdklabs/awscdk-asset-awscli-go/awscliv1/v2 v2.2.202/go.mod h1:iPUti/SWjA3XAS3CpnLciFjS8TN9Y+8mdZgDfSgcyus=
github.com/cdklabs/awscdk-asset-kubectl-go/kubectlv20/v2 v2.1.2 h1:k+WD+6cERd59Mao84v0QtRrcdZuuSMfzlEmuIypKnVs=
github.com/cdklabs/awscdk-asset-kubectl-go/kubectlv20/v2 v2.1.2/go.mod h1:CvFHBo0qcg8LUkJqIxQtP1rD/sNGv9bX3L2vHT2FUAo=
github.com/cdklabs/awscdk-asset-node-proxy-agent-go/nodeproxyagentv6/v2 v2.0.3 h1:8NLWOIVaxAtpUXv5reojlAeDP7R8yswm9mDONf7F/3o=
github.com/cdklabs/awscdk-asset-node-proxy-agent-go/nodeproxyagentv6/v2 v2.0.3/go.mod h1:ZjFqfhYpCLzh4z7ChcHCrkXfqCuEiRlNApDfJd6plts=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4=
github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI=
github.com/fogfish/golem/hseq v1.3.0 h1:WIJViOF7vsPHvqVLzFrIz4QrBI4EPTC34esrQnjqUvk=
github.com/fogfish/golem/hseq v1.3.0/go.mod h1:17XORt8nNKl6KOhF43MHSmjK8NksbkBsohAoJGiinUs=
github.com/fogfish/golem/optics v0.14.0 h1:8XFZ6rlr6GlwDPB/jUtEcPbFngbpY9DfArDXcFN2mts=
github.com/fogfish/golem/optics v0.14.0/go.mod h1:aTXUA/VC6yu3zbUN1Tmy4Z4IW0jxfDFF4c2UB5MuwkA=
github.com/fogfish/gurl/v2 v2.10.0 h1:91qNyuYG6H+qHEqrPIogct1e8WUeH/QUFWrBG7+u5i8=
github.com/fogfish/gurl/v2 v2.10.0/go.mod h1:7T4FFZiWmEXVYnTgSdqEbAM/bwPfWSkEYgaVAsVSIso=
github.com/fogfish/it/v2 v2.2.1 h1:NuuaENAZka8XiJkEj2Q6THRsHSwleC/BLDux82NvkII=
github.com/fogfish/it/v2 v2.2.1/go.mod h1:HHwufnTaZTvlRVnSesPl49HzzlMrQtweKbf+8Co/ll4=
github.com/fogfish/opts v0.0.5 h1:Bh3Nucr1kx7G1F0Tq3DxO14/qYgmR6C2GjWr2k6O+Oc=
github.com/fogfish/opts v0.0.5/go.mod h1:+HM1YrMsTzfouZRoHfPOsGT9VZw+0ZBKZ36PMqoNFqM=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/jdxcode/netrc v1.0.0 h1:tJR3fyzTcjDi22t30pCdpOT8WJ5gb32zfYE1hFNCOjk=
github.com/jdxcode/netrc v1.0.0/go.mod h1:Zi/ZFkEqFHTm7qkjyNJjaWH4LQA9LQhGJyF0lTYGpxw=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kshard/embeddings v0.2.0 h1:6qCxRHgXmtnrJ5fxEhd/hVEAoEf8ZLh9uxLO75nTSk8=
github.com/kshard/embeddings v0.2.0/go.mod h1:brIUA6EPtFWO4RRrye7CWi6rAd003xIC2rHZkjq2dDY=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U=
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
94 changes: 94 additions & 0 deletions examples/txt2vec/txt2vec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//
// Copyright (C) 2024 Dmitry Kolesnikov
//
// This file may be modified and distributed under the terms
// of the MIT license. See the LICENSE file for details.
// https://github.com/kshard/embeddings
//

package main

//
// Command line utility to build embeddings for text file
//
// txt2vec {model} {text file}
//
// It outputs embeddings as text to each file
//

import (
"bufio"
"context"
"fmt"
"os"

"github.com/kshard/embeddings"
"github.com/kshard/embeddings/llm/bedrock"
"github.com/kshard/embeddings/llm/openai"
// "github.com/kshard/embeddings/llm/word2vec"
)

func main() {
lpd := os.Args[1]
ifl := os.Args[2]
if lpd == "" || ifl == "" {
panic(fmt.Errorf("bad input\ntxt2vec {model} {text file}"))
}

llm := setup(lpd)
fd, err := os.Open(ifl)
if err != nil {
panic(err)
}
defer fd.Close()

scanner := bufio.NewScanner(fd)
for scanner.Scan() {
word := scanner.Text()
v32, err := llm.Embedding(context.Background(), word)
if err != nil {
panic(err)
}

fmt.Printf("%s ", word)
for _, f := range v32.Vector {
fmt.Printf("%f ", f)
}
fmt.Println()
}
}

func setup(lpd string) embeddings.Embedder {
switch lpd {
case "aws":
cli, err := bedrock.New(
bedrock.WithTitanV2,
bedrock.WithEmbeddingSize256,
bedrock.WithRegion("us-east-1"),
)
if err != nil {
panic(err)
}
return cli
case "oai":
cli, err := openai.New(
openai.WithLLM(openai.TEXT_ADA_002),
openai.WithNetRC("api.openai.com"),
)
if err != nil {
panic(err)
}
return cli
// case "w2v":
// cli, err := word2vec.New(
// word2vec.WithLLM("~/devel/datasets/word2vec/t3bln-v300w5e5s1h05-en.bin"),
// word2vec.WithEmbeddingSize(300),
// )
// if err != nil {
// panic(err)
// }
// return cli
default:
panic(fmt.Errorf("unknown LLM provider, required one of aws, oai."))
}
}
Loading
Loading