Skip to content

Commit

Permalink
[tokenizers] Use tokenizers from rust.io (#3476)
Browse files Browse the repository at this point in the history
  • Loading branch information
xyang16 authored Sep 16, 2024
1 parent 531b1f0 commit bc31aff
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 20 deletions.
8 changes: 0 additions & 8 deletions extensions/tokenizers/build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,6 @@
@rem choco install rust -y
@rem choco install jdk8 -y

set VERSION=v"%1"

if exist "tokenizers" (
echo Found "tokenizers"
) else (
git clone https://github.com/huggingface/tokenizers -b %VERSION%
)

if exist build rd /q /s build
md build\classes

Expand Down
4 changes: 2 additions & 2 deletions extensions/tokenizers/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,11 @@ tasks {
if ("mac" in os || "linux" in os) {
val arch = if (arch == "amd64") "x86_64" else arch
exec {
commandLine("bash", "build.sh", libs.versions.tokenizers.get(), arch, flavor)
commandLine("bash", "build.sh", arch, flavor)
}
} else
exec {
commandLine("${project.projectDir}/build.cmd", libs.versions.tokenizers.get())
commandLine("${project.projectDir}/build.cmd")
}

// for ci to upload to S3
Expand Down
11 changes: 2 additions & 9 deletions extensions/tokenizers/build.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
#!/usr/bin/env bash

set -e
WORK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLATFORM=$(uname | tr '[:upper:]' '[:lower:]')

VERSION=v$1
ARCH=$2
FLAVOR=$3

pushd "$WORK_DIR"
if [ ! -d "tokenizers" ]; then
git clone https://github.com/huggingface/tokenizers -b "$VERSION"
fi
ARCH=$1
FLAVOR=$2

if [ ! -d "build" ]; then
mkdir build
Expand Down
2 changes: 1 addition & 1 deletion extensions/tokenizers/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ candle-flash-attn = { version = "*", optional = true }
candle-cublaslt = { git = "https://github.com/huggingface/candle-cublaslt", rev = "cf789b7dd6d4abb19b03b9556442f94f0588b4a0", optional = true }
candle-layer-norm = { git = "https://github.com/xyang16/candle-layer-norm", rev = "e574de6a7f88bafbede8edf9ee43170c6a8ce51a", optional = true }
candle-rotary = { git = "https://github.com/huggingface/candle-rotary", rev = "0a718a0856569a92f3112e64f10d07e4447822e8", optional = true }
tokenizers = { path = "../tokenizers/tokenizers", version = "*", features = ["http"] }
tokenizers = { version = "0.20.0", features = ["http"] }
half = "2.4.0"
tracing = "0.1.40"
safetensors = "0.4.3"
Expand Down

0 comments on commit bc31aff

Please sign in to comment.