From 819435eced4adbb5dc5ec77e97598a3624fcc00c Mon Sep 17 00:00:00 2001 From: Rob Keevil Date: Thu, 18 Jul 2024 10:14:11 +0200 Subject: [PATCH] fork afs, revert tokenizer namespace (for now) --- Dockerfile | 2 +- go.mod | 14 ++++---------- go.sum | 20 +++++++++----------- hugot_test.go | 3 ++- pipelines/featureExtraction.go | 3 ++- pipelines/pipeline.go | 2 +- pipelines/textClassification.go | 2 +- pipelines/tokenClassification.go | 2 +- pipelines/zeroShotClassification.go | 5 +++-- utils/file.go | 4 ++-- 10 files changed, 26 insertions(+), 31 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2abacf8..b33e884 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ARG CGO_LDFLAGS="-L./usr/lib/libtokenizers.a" FROM --platform=$BUILD_PLATFORM rust:$RUST_VERSION AS tokenizer -RUN git clone https://github.com/knights-analytics/tokenizers -b rebase && \ +RUN git clone https://github.com/knights-analytics/tokenizers -b namespace && \ cd tokenizers && \ cargo build --release diff --git a/go.mod b/go.mod index bf876e9..d8a5d39 100644 --- a/go.mod +++ b/go.mod @@ -1,20 +1,16 @@ module github.com/knights-analytics/hugot -go 1.20 - -replace github.com/viant/afsc => github.com/knights-analytics/afsc v0.0.0-20240425201009-7e46526445df - -replace github.com/daulet/tokenizers => github.com/knights-analytics/tokenizers v0.0.0-20240717085127-ca3ae0687267 +go 1.22 require ( github.com/bodaay/HuggingFaceModelDownloader v0.0.0-20240307153905-2f38356a6d6c - github.com/daulet/tokenizers v0.8.0 github.com/json-iterator/go v1.1.12 + github.com/knights-analytics/afs v0.0.0-20240718075927-c142c3b151dc + github.com/knights-analytics/afsc v0.0.0-20240718080420-8bb598c8addc + github.com/knights-analytics/tokenizers v0.13.0 github.com/mattn/go-isatty v0.0.20 github.com/stretchr/testify v1.9.0 github.com/urfave/cli/v2 v2.27.2 - github.com/viant/afs v1.25.1 - github.com/viant/afsc v1.9.2 github.com/yalue/onnxruntime_go v1.10.0 golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 ) @@ -26,7 +22,6 @@ require ( github.com/fatih/color v1.17.0 // indirect github.com/go-errors/errors v1.5.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect - github.com/kr/text v0.2.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -35,7 +30,6 @@ require ( github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect golang.org/x/crypto v0.25.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sys v0.22.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 826cfcf..19ead31 100644 --- a/go.sum +++ b/go.sum @@ -4,7 +4,6 @@ github.com/bodaay/HuggingFaceModelDownloader v0.0.0-20240307153905-2f38356a6d6c github.com/bodaay/HuggingFaceModelDownloader v0.0.0-20240307153905-2f38356a6d6c/go.mod h1:p6JQ7mJjWx82F+SrFfj9RkoHlKEGXR4959uX/vkMbzE= github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -19,13 +18,12 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/knights-analytics/afsc v0.0.0-20240425201009-7e46526445df h1:rVna1iJaI7gj5RonGys0dZ0iLy7upULdcbRQd9F2qg8= -github.com/knights-analytics/afsc v0.0.0-20240425201009-7e46526445df/go.mod h1:yZo80n1EB2eMwmmec7BekX6clpd7uY+joUpDRIBbeYs= -github.com/knights-analytics/tokenizers v0.0.0-20240717085127-ca3ae0687267 h1:M2jdyK5zl/AUe1ZBLUWqAAjSu6LwF9ZFegk+UBMjVjY= -github.com/knights-analytics/tokenizers v0.0.0-20240717085127-ca3ae0687267/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/knights-analytics/afs v0.0.0-20240718075927-c142c3b151dc h1:Sb44tb3EyV3C8HfPkfCxwSTZdslqjPpi3pzpsNFxZLg= +github.com/knights-analytics/afs v0.0.0-20240718075927-c142c3b151dc/go.mod h1:wABEWemufPlgyTjP1dQMZHfYpNy5QpnudpJzdUWYXGQ= +github.com/knights-analytics/afsc v0.0.0-20240718080420-8bb598c8addc h1:HacVNN/5UTKbp90iLlT5iGCqzxAF9AotolVDYSUMIf8= +github.com/knights-analytics/afsc v0.0.0-20240718080420-8bb598c8addc/go.mod h1:v4MjD7X6iX20Xo+ZCJ1t63loMLR58N3jls3ZY/wfkjs= +github.com/knights-analytics/tokenizers v0.13.0 h1:xQv7Ycw5NAmqrajeRt3mBN/adfwSpdN6U6IwCoL60FA= +github.com/knights-analytics/tokenizers v0.13.0/go.mod h1:QCmtYGTdiEQYNFOf+MiRSNYZvDnFjJONcs9ZUmfKK6g= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -48,8 +46,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= -github.com/viant/afs v1.25.1 h1:IPcqwzsPUaWqsSkQXoM1vXwQuRI6u7ZgqQHKQZ8Wxyg= -github.com/viant/afs v1.25.1/go.mod h1:rScbFd9LJPGTM8HOI8Kjwee0AZ+MZMupAvFpPg+Qdj4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yalue/onnxruntime_go v1.10.0 h1:om1yzOQYv/4GlsSP5HIZvS6G3WF3THv4x5rhO5AFERU= @@ -65,9 +61,11 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= +golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/hugot_test.go b/hugot_test.go index 75a4770..fd5769b 100644 --- a/hugot_test.go +++ b/hugot_test.go @@ -10,9 +10,10 @@ import ( "strings" "testing" + "github.com/stretchr/testify/assert" + "github.com/knights-analytics/hugot/pipelines" util "github.com/knights-analytics/hugot/utils" - "github.com/stretchr/testify/assert" ort "github.com/yalue/onnxruntime_go" ) diff --git a/pipelines/featureExtraction.go b/pipelines/featureExtraction.go index bd9a868..6324f33 100644 --- a/pipelines/featureExtraction.go +++ b/pipelines/featureExtraction.go @@ -10,7 +10,8 @@ import ( ort "github.com/yalue/onnxruntime_go" - "github.com/daulet/tokenizers" + "github.com/knights-analytics/tokenizers" + util "github.com/knights-analytics/hugot/utils" ) diff --git a/pipelines/pipeline.go b/pipelines/pipeline.go index cc7cd40..0cc2bb3 100644 --- a/pipelines/pipeline.go +++ b/pipelines/pipeline.go @@ -8,7 +8,7 @@ import ( "os" "strings" - "github.com/daulet/tokenizers" + "github.com/knights-analytics/tokenizers" ort "github.com/yalue/onnxruntime_go" util "github.com/knights-analytics/hugot/utils" diff --git a/pipelines/textClassification.go b/pipelines/textClassification.go index 5435d41..30d31d8 100644 --- a/pipelines/textClassification.go +++ b/pipelines/textClassification.go @@ -9,8 +9,8 @@ import ( util "github.com/knights-analytics/hugot/utils" - "github.com/daulet/tokenizers" jsoniter "github.com/json-iterator/go" + "github.com/knights-analytics/tokenizers" ort "github.com/yalue/onnxruntime_go" ) diff --git a/pipelines/tokenClassification.go b/pipelines/tokenClassification.go index 7128e9d..4907ce5 100644 --- a/pipelines/tokenClassification.go +++ b/pipelines/tokenClassification.go @@ -13,8 +13,8 @@ import ( util "github.com/knights-analytics/hugot/utils" - "github.com/daulet/tokenizers" jsoniter "github.com/json-iterator/go" + "github.com/knights-analytics/tokenizers" ) // TokenClassificationPipeline is a go version of huggingface tokenClassificationPipeline. diff --git a/pipelines/zeroShotClassification.go b/pipelines/zeroShotClassification.go index 9743013..419821e 100644 --- a/pipelines/zeroShotClassification.go +++ b/pipelines/zeroShotClassification.go @@ -12,11 +12,12 @@ import ( "sync/atomic" "time" - util "github.com/knights-analytics/hugot/utils" ort "github.com/yalue/onnxruntime_go" - "github.com/daulet/tokenizers" + util "github.com/knights-analytics/hugot/utils" + jsoniter "github.com/json-iterator/go" + "github.com/knights-analytics/tokenizers" ) /** diff --git a/utils/file.go b/utils/file.go index 2089b37..9e3ab43 100644 --- a/utils/file.go +++ b/utils/file.go @@ -7,8 +7,8 @@ import ( "path/filepath" "strings" - "github.com/viant/afs" - _ "github.com/viant/afsc/s3" + "github.com/knights-analytics/afs" + _ "github.com/knights-analytics/afsc/s3" ) var FileSystem = afs.New()