@@ -34,10 +34,14 @@ import (
34
34
_ "github.com/ollama/ollama/model/models"
35
35
)
36
36
37
+ type contextList struct {
38
+ list []ml.Context
39
+ }
40
+
37
41
type Sequence struct {
38
- // ctx for allocating tensors that last the lifetime of the sequence, such as
42
+ // ctxs are used for allocating tensors that last the lifetime of the sequence, such as
39
43
// multimodal embeddings
40
- ctx ml. Context
44
+ ctxs * contextList
41
45
42
46
// batch index
43
47
iBatch int
@@ -99,9 +103,8 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
99
103
s .ready .Wait ()
100
104
101
105
startTime := time .Now ()
102
- ctx := s .model .Backend ().NewContext ()
103
106
104
- inputs , err := s .inputs (ctx , prompt , images )
107
+ inputs , ctxs , err := s .inputs (prompt , images )
105
108
if err != nil {
106
109
return nil , fmt .Errorf ("failed to process inputs: %w" , err )
107
110
} else if len (inputs ) == 0 {
@@ -127,7 +130,7 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
127
130
// TODO(jessegross): Ingest cached history for grammar
128
131
129
132
return & Sequence {
130
- ctx : ctx ,
133
+ ctxs : ctxs ,
131
134
inputs : inputs ,
132
135
numPromptInputs : len (inputs ),
133
136
startProcessingTime : startTime ,
@@ -146,7 +149,7 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
146
149
// inputs processes the prompt and images into a list of inputs
147
150
// by splitting the prompt on [img-<n>] tags, tokenizing text and
148
151
// decoding images
149
- func (s * Server ) inputs (ctx ml. Context , prompt string , images []llm.ImageData ) ([]input.Input , error ) {
152
+ func (s * Server ) inputs (prompt string , images []llm.ImageData ) ([]input.Input , * contextList , error ) {
150
153
var inputs []input.Input
151
154
var parts []string
152
155
var matches [][]string
@@ -161,12 +164,19 @@ func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) (
161
164
parts = []string {prompt }
162
165
}
163
166
167
+ var contexts contextList
168
+ runtime .AddCleanup (& contexts , func (ctxs []ml.Context ) {
169
+ for _ , ctx := range ctxs {
170
+ ctx .Close ()
171
+ }
172
+ }, contexts .list )
173
+
164
174
postTokenize := false
165
175
for i , part := range parts {
166
176
// text - tokenize
167
177
tokens , err := s .model .(model.TextProcessor ).Encode (part , i == 0 )
168
178
if err != nil {
169
- return nil , err
179
+ return nil , nil , err
170
180
}
171
181
172
182
for _ , t := range tokens {
@@ -186,12 +196,14 @@ func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) (
186
196
}
187
197
188
198
if imageIndex < 0 {
189
- return nil , fmt .Errorf ("invalid image index: %d" , n )
199
+ return nil , nil , fmt .Errorf ("invalid image index: %d" , n )
190
200
}
191
201
202
+ ctx := s .model .Backend ().NewContext ()
203
+ contexts .list = append (contexts .list , ctx )
192
204
imageEmbeddings , err := multimodalProcessor .EncodeMultimodal (ctx , images [imageIndex ].Data )
193
205
if err != nil {
194
- return nil , err
206
+ return nil , nil , err
195
207
}
196
208
197
209
s .multimodalHash .Reset ()
@@ -205,13 +217,13 @@ func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) (
205
217
206
218
if visionModel && postTokenize {
207
219
var err error
208
- inputs , err = multimodalProcessor .PostTokenize (ctx , inputs )
220
+ inputs , err = multimodalProcessor .PostTokenize (inputs )
209
221
if err != nil {
210
- return nil , err
222
+ return nil , nil , err
211
223
}
212
224
}
213
225
214
- return inputs , nil
226
+ return inputs , & contexts , nil
215
227
}
216
228
217
229
type Server struct {
@@ -306,7 +318,6 @@ func (s *Server) removeSequence(seqIndex int, reason string) {
306
318
close (seq .responses )
307
319
close (seq .embedding )
308
320
seq .cache .InUse = false
309
- seq .ctx .Close ()
310
321
s .seqs [seqIndex ] = nil
311
322
s .seqsSem .Release (1 )
312
323
}
0 commit comments