Skip to content

Commit 00d0661

Browse files
committed
Revert "chat api (ollama#991)" while context variable is fixed
This reverts commit 7a0899d.
1 parent f1ef3f9 commit 00d0661

File tree

8 files changed

+135
-550
lines changed

8 files changed

+135
-550
lines changed

api/client.go

-13
Original file line numberDiff line numberDiff line change
@@ -221,19 +221,6 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
221221
})
222222
}
223223

224-
type ChatResponseFunc func(ChatResponse) error
225-
226-
func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
227-
return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
228-
var resp ChatResponse
229-
if err := json.Unmarshal(bts, &resp); err != nil {
230-
return err
231-
}
232-
233-
return fn(resp)
234-
})
235-
}
236-
237224
type PullProgressFunc func(ProgressResponse) error
238225

239226
func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {

api/types.go

+22-52
Original file line numberDiff line numberDiff line change
@@ -36,49 +36,14 @@ type GenerateRequest struct {
3636
Prompt string `json:"prompt"`
3737
System string `json:"system"`
3838
Template string `json:"template"`
39-
Context []int `json:"context,omitempty"` // DEPRECATED: context is deprecated, use the /chat endpoint instead for chat history
39+
Context []int `json:"context,omitempty"`
4040
Stream *bool `json:"stream,omitempty"`
4141
Raw bool `json:"raw,omitempty"`
4242
Format string `json:"format"`
4343

4444
Options map[string]interface{} `json:"options"`
4545
}
4646

47-
type ChatRequest struct {
48-
Model string `json:"model"`
49-
Messages []Message `json:"messages"`
50-
Template string `json:"template"`
51-
Stream *bool `json:"stream,omitempty"`
52-
Format string `json:"format"`
53-
54-
Options map[string]interface{} `json:"options"`
55-
}
56-
57-
type Message struct {
58-
Role string `json:"role"` // one of ["system", "user", "assistant"]
59-
Content string `json:"content"`
60-
}
61-
62-
type ChatResponse struct {
63-
Model string `json:"model"`
64-
CreatedAt time.Time `json:"created_at"`
65-
Message *Message `json:"message,omitempty"`
66-
67-
Done bool `json:"done"`
68-
Context []int `json:"context,omitempty"`
69-
70-
EvalMetrics
71-
}
72-
73-
type EvalMetrics struct {
74-
TotalDuration time.Duration `json:"total_duration,omitempty"`
75-
LoadDuration time.Duration `json:"load_duration,omitempty"`
76-
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
77-
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
78-
EvalCount int `json:"eval_count,omitempty"`
79-
EvalDuration time.Duration `json:"eval_duration,omitempty"`
80-
}
81-
8247
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
8348
type Options struct {
8449
Runner
@@ -208,34 +173,39 @@ type GenerateResponse struct {
208173
Done bool `json:"done"`
209174
Context []int `json:"context,omitempty"`
210175

211-
EvalMetrics
176+
TotalDuration time.Duration `json:"total_duration,omitempty"`
177+
LoadDuration time.Duration `json:"load_duration,omitempty"`
178+
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
179+
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
180+
EvalCount int `json:"eval_count,omitempty"`
181+
EvalDuration time.Duration `json:"eval_duration,omitempty"`
212182
}
213183

214-
func (m *EvalMetrics) Summary() {
215-
if m.TotalDuration > 0 {
216-
fmt.Fprintf(os.Stderr, "total duration: %v\n", m.TotalDuration)
184+
func (r *GenerateResponse) Summary() {
185+
if r.TotalDuration > 0 {
186+
fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration)
217187
}
218188

219-
if m.LoadDuration > 0 {
220-
fmt.Fprintf(os.Stderr, "load duration: %v\n", m.LoadDuration)
189+
if r.LoadDuration > 0 {
190+
fmt.Fprintf(os.Stderr, "load duration: %v\n", r.LoadDuration)
221191
}
222192

223-
if m.PromptEvalCount > 0 {
224-
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", m.PromptEvalCount)
193+
if r.PromptEvalCount > 0 {
194+
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount)
225195
}
226196

227-
if m.PromptEvalDuration > 0 {
228-
fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
229-
fmt.Fprintf(os.Stderr, "prompt eval rate: %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
197+
if r.PromptEvalDuration > 0 {
198+
fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", r.PromptEvalDuration)
199+
fmt.Fprintf(os.Stderr, "prompt eval rate: %.2f tokens/s\n", float64(r.PromptEvalCount)/r.PromptEvalDuration.Seconds())
230200
}
231201

232-
if m.EvalCount > 0 {
233-
fmt.Fprintf(os.Stderr, "eval count: %d token(s)\n", m.EvalCount)
202+
if r.EvalCount > 0 {
203+
fmt.Fprintf(os.Stderr, "eval count: %d token(s)\n", r.EvalCount)
234204
}
235205

236-
if m.EvalDuration > 0 {
237-
fmt.Fprintf(os.Stderr, "eval duration: %s\n", m.EvalDuration)
238-
fmt.Fprintf(os.Stderr, "eval rate: %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
206+
if r.EvalDuration > 0 {
207+
fmt.Fprintf(os.Stderr, "eval duration: %s\n", r.EvalDuration)
208+
fmt.Fprintf(os.Stderr, "eval rate: %.2f tokens/s\n", float64(r.EvalCount)/r.EvalDuration.Seconds())
239209
}
240210
}
241211

docs/api.md

+10-142
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,30 @@ All durations are returned in nanoseconds.
2424

2525
### Streaming responses
2626

27-
Certain endpoints stream responses as JSON objects.
27+
Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
2828

2929
## Generate a completion
3030

3131
```shell
3232
POST /api/generate
3333
```
3434

35-
Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
35+
Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses. The final response object will include statistics and additional data from the request.
3636

3737
### Parameters
3838

39-
`model` is required.
40-
4139
- `model`: (required) the [model name](#model-names)
4240
- `prompt`: the prompt to generate a response for
4341

4442
Advanced parameters (optional):
4543

4644
- `format`: the format to return a response in. Currently the only accepted value is `json`
4745
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
48-
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
4946
- `system`: system prompt to (overrides what is defined in the `Modelfile`)
47+
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
48+
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
5049
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
51-
- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API.
50+
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
5251

5352
### JSON mode
5453

@@ -58,7 +57,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
5857
5958
### Examples
6059

61-
#### Request (Prompt)
60+
#### Request
6261

6362
```shell
6463
curl http://localhost:11434/api/generate -d '{
@@ -90,7 +89,7 @@ The final response in the stream also includes additional data about the generat
9089
- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
9190
- `eval_count`: number of tokens the response
9291
- `eval_duration`: time in nanoseconds spent generating the response
93-
- `context`: deprecated, an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
92+
- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
9493
- `response`: empty if the response was streamed, if not streamed, this will contain the full response
9594

9695
To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
@@ -115,8 +114,6 @@ To calculate how fast the response is generated in tokens per second (token/s),
115114

116115
#### Request (No streaming)
117116

118-
A response can be recieved in one reply when streaming is off.
119-
120117
```shell
121118
curl http://localhost:11434/api/generate -d '{
122119
"model": "llama2",
@@ -147,9 +144,9 @@ If `stream` is set to `false`, the response will be a single JSON object:
147144
}
148145
```
149146

150-
#### Request (Raw Mode)
147+
#### Request (Raw mode)
151148

152-
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting.
149+
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
153150

154151
```shell
155152
curl http://localhost:11434/api/generate -d '{
@@ -167,7 +164,6 @@ curl http://localhost:11434/api/generate -d '{
167164
"model": "mistral",
168165
"created_at": "2023-11-03T15:36:02.583064Z",
169166
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
170-
"context": [1, 2, 3],
171167
"done": true,
172168
"total_duration": 14648695333,
173169
"load_duration": 3302671417,
@@ -279,6 +275,7 @@ curl http://localhost:11434/api/generate -d '{
279275
"model": "llama2",
280276
"created_at": "2023-08-04T19:22:45.499127Z",
281277
"response": "The sky is blue because it is the color of the sky.",
278+
"context": [1, 2, 3],
282279
"done": true,
283280
"total_duration": 5589157167,
284281
"load_duration": 3013701500,
@@ -291,135 +288,6 @@ curl http://localhost:11434/api/generate -d '{
291288
}
292289
```
293290

294-
## Send Chat Messages
295-
```shell
296-
POST /api/chat
297-
```
298-
299-
Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
300-
301-
### Parameters
302-
303-
`model` is required.
304-
305-
- `model`: (required) the [model name](#model-names)
306-
- `messages`: the messages of the chat, this can be used to keep a chat memory
307-
308-
Advanced parameters (optional):
309-
310-
- `format`: the format to return a response in. Currently the only accepted value is `json`
311-
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
312-
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
313-
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
314-
315-
### Examples
316-
317-
#### Request
318-
Send a chat message with a streaming response.
319-
320-
```shell
321-
curl http://localhost:11434/api/generate -d '{
322-
"model": "llama2",
323-
"messages": [
324-
{
325-
"role": "user",
326-
"content": "why is the sky blue?"
327-
}
328-
]
329-
}'
330-
```
331-
332-
#### Response
333-
334-
A stream of JSON objects is returned:
335-
336-
```json
337-
{
338-
"model": "llama2",
339-
"created_at": "2023-08-04T08:52:19.385406455-07:00",
340-
"message": {
341-
"role": "assisant",
342-
"content": "The"
343-
},
344-
"done": false
345-
}
346-
```
347-
348-
Final response:
349-
350-
```json
351-
{
352-
"model": "llama2",
353-
"created_at": "2023-08-04T19:22:45.499127Z",
354-
"done": true,
355-
"total_duration": 5589157167,
356-
"load_duration": 3013701500,
357-
"sample_count": 114,
358-
"sample_duration": 81442000,
359-
"prompt_eval_count": 46,
360-
"prompt_eval_duration": 1160282000,
361-
"eval_count": 113,
362-
"eval_duration": 1325948000
363-
}
364-
```
365-
366-
#### Request (With History)
367-
Send a chat message with a conversation history.
368-
369-
```shell
370-
curl http://localhost:11434/api/generate -d '{
371-
"model": "llama2",
372-
"messages": [
373-
{
374-
"role": "user",
375-
"content": "why is the sky blue?"
376-
},
377-
{
378-
"role": "assistant",
379-
"content": "due to rayleigh scattering."
380-
},
381-
{
382-
"role": "user",
383-
"content": "how is that different than mie scattering?"
384-
}
385-
]
386-
}'
387-
```
388-
389-
#### Response
390-
391-
A stream of JSON objects is returned:
392-
393-
```json
394-
{
395-
"model": "llama2",
396-
"created_at": "2023-08-04T08:52:19.385406455-07:00",
397-
"message": {
398-
"role": "assisant",
399-
"content": "The"
400-
},
401-
"done": false
402-
}
403-
```
404-
405-
Final response:
406-
407-
```json
408-
{
409-
"model": "llama2",
410-
"created_at": "2023-08-04T19:22:45.499127Z",
411-
"done": true,
412-
"total_duration": 5589157167,
413-
"load_duration": 3013701500,
414-
"sample_count": 114,
415-
"sample_duration": 81442000,
416-
"prompt_eval_count": 46,
417-
"prompt_eval_duration": 1160282000,
418-
"eval_count": 113,
419-
"eval_duration": 1325948000
420-
}
421-
```
422-
423291
## Create a Model
424292

425293
```shell

0 commit comments

Comments
 (0)