@@ -31,9 +31,9 @@ extension StaticEmbeddings {
31
31
let tokenizer =
32
32
if let tokenizerConfig = loadConfig. tokenizerConfig {
33
33
try AutoTokenizer . from (
34
- tokenizerDataFile : modelFolder. appendingPathComponent (
35
- tokenizerConfig. dataFileName ) ,
36
- tokenizerClass : tokenizerConfig. tokenizerClass
34
+ modelFolder : modelFolder,
35
+ tokenizerData : tokenizerConfig. data ,
36
+ tokenizerConfig : tokenizerConfig. config
37
37
)
38
38
} else {
39
39
try await AutoTokenizer . from ( modelFolder: modelFolder)
@@ -65,17 +65,35 @@ extension StaticEmbeddings {
65
65
66
66
extension AutoTokenizer {
67
67
static func from(
68
- tokenizerDataFile: URL ,
69
- tokenizerClass: String
68
+ modelFolder: URL ,
69
+ tokenizerData: TokenizerConfigType ,
70
+ tokenizerConfig: TokenizerConfigType
70
71
) throws -> any Tokenizer {
71
- let data = try Data ( contentsOf: tokenizerDataFile)
72
- let parsedData = try JSONSerialization . jsonObject ( with: data, options: [ ] )
73
- guard let tokenizerData = parsedData as? [ NSString : Any ] else {
74
- throw EmbeddingsError . invalidFile
75
- }
72
+ let tokenizerConfig = try resolveConfig ( tokenizerConfig, in: modelFolder)
73
+ let tokenizerData = try resolveConfig ( tokenizerData, in: modelFolder)
76
74
return try AutoTokenizer . from (
77
- tokenizerConfig: Config ( [ " tokenizerClass " : tokenizerClass ] ) ,
78
- tokenizerData: Config ( tokenizerData)
75
+ tokenizerConfig: tokenizerConfig ,
76
+ tokenizerData: tokenizerData
79
77
)
80
78
}
81
79
}
80
+
81
+ func resolveConfig( _ tokenizerConfig: TokenizerConfigType , in modelFolder: URL ) throws -> Config {
82
+ switch tokenizerConfig {
83
+ case . filePath( let filePath) :
84
+ let fileURL = modelFolder. appendingPathComponent ( filePath)
85
+ let data = try loadJSONConfig ( at: fileURL)
86
+ return Config ( data as [ NSString : Any ] )
87
+ case . data( let data) :
88
+ return Config ( data as [ NSString : Any ] )
89
+ }
90
+ }
91
+
92
+ func loadJSONConfig( at filePath: URL ) throws -> [ String : Any ] {
93
+ let data = try Data ( contentsOf: filePath)
94
+ let parsedData = try JSONSerialization . jsonObject ( with: data, options: [ ] )
95
+ guard let config = parsedData as? [ String : Any ] else {
96
+ throw EmbeddingsError . invalidFile
97
+ }
98
+ return config
99
+ }
0 commit comments