Skip to content

Commit

Permalink
fork afs, revert tokenizer namespace (for now)
Browse files Browse the repository at this point in the history
  • Loading branch information
RJKeevil authored and riccardopinosio committed Jul 18, 2024
1 parent 86d2fef commit 819435e
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ARG CGO_LDFLAGS="-L./usr/lib/libtokenizers.a"

FROM --platform=$BUILD_PLATFORM rust:$RUST_VERSION AS tokenizer

RUN git clone https://github.com/knights-analytics/tokenizers -b rebase && \
RUN git clone https://github.com/knights-analytics/tokenizers -b namespace && \
cd tokenizers && \
cargo build --release

Expand Down
14 changes: 4 additions & 10 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
module github.com/knights-analytics/hugot

go 1.20

replace github.com/viant/afsc => github.com/knights-analytics/afsc v0.0.0-20240425201009-7e46526445df

replace github.com/daulet/tokenizers => github.com/knights-analytics/tokenizers v0.0.0-20240717085127-ca3ae0687267
go 1.22

require (
github.com/bodaay/HuggingFaceModelDownloader v0.0.0-20240307153905-2f38356a6d6c
github.com/daulet/tokenizers v0.8.0
github.com/json-iterator/go v1.1.12
github.com/knights-analytics/afs v0.0.0-20240718075927-c142c3b151dc
github.com/knights-analytics/afsc v0.0.0-20240718080420-8bb598c8addc
github.com/knights-analytics/tokenizers v0.13.0
github.com/mattn/go-isatty v0.0.20
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.2
github.com/viant/afs v1.25.1
github.com/viant/afsc v1.9.2
github.com/yalue/onnxruntime_go v1.10.0
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7
)
Expand All @@ -26,7 +22,6 @@ require (
github.com/fatih/color v1.17.0 // indirect
github.com/go-errors/errors v1.5.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
Expand All @@ -35,7 +30,6 @@ require (
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sys v0.22.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
20 changes: 9 additions & 11 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ github.com/bodaay/HuggingFaceModelDownloader v0.0.0-20240307153905-2f38356a6d6c
github.com/bodaay/HuggingFaceModelDownloader v0.0.0-20240307153905-2f38356a6d6c/go.mod h1:p6JQ7mJjWx82F+SrFfj9RkoHlKEGXR4959uX/vkMbzE=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand All @@ -19,13 +18,12 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/knights-analytics/afsc v0.0.0-20240425201009-7e46526445df h1:rVna1iJaI7gj5RonGys0dZ0iLy7upULdcbRQd9F2qg8=
github.com/knights-analytics/afsc v0.0.0-20240425201009-7e46526445df/go.mod h1:yZo80n1EB2eMwmmec7BekX6clpd7uY+joUpDRIBbeYs=
github.com/knights-analytics/tokenizers v0.0.0-20240717085127-ca3ae0687267 h1:M2jdyK5zl/AUe1ZBLUWqAAjSu6LwF9ZFegk+UBMjVjY=
github.com/knights-analytics/tokenizers v0.0.0-20240717085127-ca3ae0687267/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/knights-analytics/afs v0.0.0-20240718075927-c142c3b151dc h1:Sb44tb3EyV3C8HfPkfCxwSTZdslqjPpi3pzpsNFxZLg=
github.com/knights-analytics/afs v0.0.0-20240718075927-c142c3b151dc/go.mod h1:wABEWemufPlgyTjP1dQMZHfYpNy5QpnudpJzdUWYXGQ=
github.com/knights-analytics/afsc v0.0.0-20240718080420-8bb598c8addc h1:HacVNN/5UTKbp90iLlT5iGCqzxAF9AotolVDYSUMIf8=
github.com/knights-analytics/afsc v0.0.0-20240718080420-8bb598c8addc/go.mod h1:v4MjD7X6iX20Xo+ZCJ1t63loMLR58N3jls3ZY/wfkjs=
github.com/knights-analytics/tokenizers v0.13.0 h1:xQv7Ycw5NAmqrajeRt3mBN/adfwSpdN6U6IwCoL60FA=
github.com/knights-analytics/tokenizers v0.13.0/go.mod h1:QCmtYGTdiEQYNFOf+MiRSNYZvDnFjJONcs9ZUmfKK6g=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
Expand All @@ -48,8 +46,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI=
github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM=
github.com/viant/afs v1.25.1 h1:IPcqwzsPUaWqsSkQXoM1vXwQuRI6u7ZgqQHKQZ8Wxyg=
github.com/viant/afs v1.25.1/go.mod h1:rScbFd9LJPGTM8HOI8Kjwee0AZ+MZMupAvFpPg+Qdj4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yalue/onnxruntime_go v1.10.0 h1:om1yzOQYv/4GlsSP5HIZvS6G3WF3THv4x5rhO5AFERU=
Expand All @@ -65,9 +61,11 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk=
golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
3 changes: 2 additions & 1 deletion hugot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ import (
"strings"
"testing"

"github.com/stretchr/testify/assert"

"github.com/knights-analytics/hugot/pipelines"
util "github.com/knights-analytics/hugot/utils"
"github.com/stretchr/testify/assert"

ort "github.com/yalue/onnxruntime_go"
)
Expand Down
3 changes: 2 additions & 1 deletion pipelines/featureExtraction.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import (

ort "github.com/yalue/onnxruntime_go"

"github.com/daulet/tokenizers"
"github.com/knights-analytics/tokenizers"

util "github.com/knights-analytics/hugot/utils"
)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"os"
"strings"

"github.com/daulet/tokenizers"
"github.com/knights-analytics/tokenizers"
ort "github.com/yalue/onnxruntime_go"

util "github.com/knights-analytics/hugot/utils"
Expand Down
2 changes: 1 addition & 1 deletion pipelines/textClassification.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (

util "github.com/knights-analytics/hugot/utils"

"github.com/daulet/tokenizers"
jsoniter "github.com/json-iterator/go"
"github.com/knights-analytics/tokenizers"
ort "github.com/yalue/onnxruntime_go"
)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/tokenClassification.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (

util "github.com/knights-analytics/hugot/utils"

"github.com/daulet/tokenizers"
jsoniter "github.com/json-iterator/go"
"github.com/knights-analytics/tokenizers"
)

// TokenClassificationPipeline is a go version of huggingface tokenClassificationPipeline.
Expand Down
5 changes: 3 additions & 2 deletions pipelines/zeroShotClassification.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ import (
"sync/atomic"
"time"

util "github.com/knights-analytics/hugot/utils"
ort "github.com/yalue/onnxruntime_go"

"github.com/daulet/tokenizers"
util "github.com/knights-analytics/hugot/utils"

jsoniter "github.com/json-iterator/go"
"github.com/knights-analytics/tokenizers"
)

/**
Expand Down
4 changes: 2 additions & 2 deletions utils/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"path/filepath"
"strings"

"github.com/viant/afs"
_ "github.com/viant/afsc/s3"
"github.com/knights-analytics/afs"
_ "github.com/knights-analytics/afsc/s3"
)

var FileSystem = afs.New()
Expand Down

0 comments on commit 819435e

Please sign in to comment.