Skip to content

Remove the custom prompt formating and relie on ollama #67

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 0 additions & 48 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,37 +83,6 @@
},
"inference.model": {
"type": "string",
"enum": [
"stable-code:3b-code-q4_0",
"codellama:7b-code-q4_K_S",
"codellama:7b-code-q4_K_M",
"codellama:7b-code-q6_K",
"codellama:7b-code-fp16",
"codellama:13b-code-q4_K_S",
"codellama:13b-code-q4_K_M",
"codellama:13b-code-q6_K",
"codellama:13b-code-fp16",
"codellama:34b-code-q4_K_S",
"codellama:34b-code-q4_K_M",
"codellama:34b-code-q6_K",
"codellama:70b-code-q4_K_S",
"codellama:70b-code-q4_K_M",
"codellama:70b-code-q6_K",
"codellama:70b-code-fp16",
"deepseek-coder:1.3b-base-q4_0",
"deepseek-coder:1.3b-base-q4_1",
"deepseek-coder:1.3b-base-q8_0",
"deepseek-coder:6.7b-base-q4_K_S",
"deepseek-coder:6.7b-base-q4_K_M",
"deepseek-coder:6.7b-base-q5_K_S",
"deepseek-coder:6.7b-base-q5_K_M",
"deepseek-coder:6.7b-base-q8_0",
"deepseek-coder:6.7b-base-fp16",
"deepseek-coder:33b-base-q4_K_S",
"deepseek-coder:33b-base-q4_K_M",
"deepseek-coder:33b-base-fp16",
"custom"
],
"default": "stable-code:3b-code-q4_0",
"description": "Inference model to use",
"order": 2
Expand All @@ -124,23 +93,6 @@
"description": "Temperature of the model. Increasing the temperature will make the model answer more creatively.",
"order": 3
},
"inference.custom.model": {
"type": "string",
"default": "",
"description": "Custom model name",
"order": 4
},
"inference.custom.format": {
"type": "string",
"enum": [
"stable-code",
"codellama",
"deepseek"
],
"default": "stable-code",
"description": "Custom model prompt format",
"order": 5
},
"inference.maxLines": {
"type": "number",
"default": 16,
Expand Down
13 changes: 0 additions & 13 deletions src/config.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import vscode from 'vscode';
import { ModelFormat } from './prompts/processors/models';

class Config {

Expand All @@ -24,17 +23,6 @@ class Config {

// Load model
let modelName = config.get('model') as string;
let modelFormat: ModelFormat = 'codellama';
if (modelName === 'custom') {
modelName = config.get('custom.model') as string;
modelFormat = config.get('cutom.format') as ModelFormat;
} else {
if (modelName.startsWith('deepseek-coder')) {
modelFormat = 'deepseek';
} else if (modelName.startsWith('stable-code')) {
modelFormat = 'stable-code';
}
}

let delay = config.get('delay') as number;

Expand All @@ -45,7 +33,6 @@ class Config {
maxTokens,
temperature,
modelName,
modelFormat,
delay
};
}
Expand Down
54 changes: 0 additions & 54 deletions src/modules/lineGenerator.ts

This file was deleted.

21 changes: 0 additions & 21 deletions src/modules/ollamaCheckModel.ts

This file was deleted.

9 changes: 0 additions & 9 deletions src/modules/ollamaDownloadModel.ts

This file was deleted.

35 changes: 35 additions & 0 deletions src/modules/ollamaRequest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
export async function makeOllamaRequest(url: string, data: any, bearerToken: string): Promise<string> {
// Request
const controller = new AbortController();
let res = await fetch(url, {
method: 'POST',
body: JSON.stringify(data),
headers: bearerToken ? {
'Content-Type': 'application/json',
Authorization: `Bearer ${bearerToken}`,
} : {
'Content-Type': 'application/json',
},
signal: controller.signal,
});
if (!res.ok || !res.body) {
throw Error('Unable to connect to backend');
}

// Reading stream
let stream = res.body.getReader();
const decoder = new TextDecoder();
try {
const { value } = await stream.read();

// Append chunk
let chunk = decoder.decode(value);
return chunk;
} finally {
stream.releaseLock();
if (!stream.closed) { // Stop generation
await stream.cancel();
}
controller.abort();
}
}
22 changes: 0 additions & 22 deletions src/modules/ollamaTokenGenerator.ts

This file was deleted.

98 changes: 22 additions & 76 deletions src/prompts/autocomplete.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator';
import { countSymbol } from '../modules/text';
import { info } from '../modules/log';
import { ModelFormat, adaptPrompt } from './processors/models';
import { makeOllamaRequest } from "../modules/ollamaRequest";

type OllamaToken = {
model: string,
response: string,
};

export async function autocomplete(args: {
endpoint: string,
bearerToken: string,
model: string,
format: ModelFormat,
prefix: string,
suffix: string,
maxLines: number,
Expand All @@ -16,88 +17,33 @@ export async function autocomplete(args: {
canceled?: () => boolean,
}): Promise<string> {

let prompt = adaptPrompt({ prefix: args.prefix, suffix: args.suffix, format: args.format });

// Calculate arguments
let data = {
model: args.model,
prompt: prompt.prompt,
prompt: args.prefix,
suffix: args.suffix,
raw: true,
stream: false,
options: {
stop: prompt.stop,
num_predict: args.maxTokens,
temperature: args.temperature
}
};

// Receiving tokens
let res = '';
let totalLines = 1;
let blockStack: ('[' | '(' | '{')[] = [];
outer: for await (let tokens of ollamaTokenGenerator(args.endpoint + '/api/generate', data, args.bearerToken)) {
const res = await makeOllamaRequest(args.endpoint + '/api/generate', data, args.bearerToken);
try {
const tokens = JSON.parse(res) as OllamaToken;
if (args.canceled && args.canceled()) {
break;
}

// Block stack
for (let c of tokens.response) {

// Open block
if (c === '[') {
blockStack.push('[');
} else if (c === '(') {
blockStack.push('(');
}
if (c === '{') {
blockStack.push('{');
}

// Close block
if (c === ']') {
if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '[') {
blockStack.pop();
} else {
info('Block stack error, breaking.');
break outer;
}
}
if (c === ')') {
if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '(') {
blockStack.pop();
} else {
info('Block stack error, breaking.');
break outer;
}
}
if (c === '}') {
if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '{') {
blockStack.pop();
} else {
info('Block stack error, breaking.');
break outer;
}
}

// Append charater
res += c;
}

// Update total lines
totalLines += countSymbol(tokens.response, '\n');
// Break if too many lines and on top level
if (totalLines > args.maxLines && blockStack.length === 0) {
info('Too many lines, breaking.');
break;
return "";
}
const response = tokens.response;

// take only args.maLines lines from the response
let lines = response.split('\n');
lines = lines.slice(0, args.maxLines);
return lines.join('\n');
} catch (e) {
console.warn('Receive wrong line: ' + res);
return "";
}

// Remove <EOT>
if (res.endsWith('<EOT>')) {
res = res.slice(0, res.length - 5);
}

// Trim ends of all lines since sometimes the AI completion will add extra spaces
res = res.split('\n').map((v) => v.trimEnd()).join('\n');

return res;
}
Loading