Skip to content

Commit

Permalink
wip+feat: working on language detection, detect language for filename…
Browse files Browse the repository at this point in the history
… marking
  • Loading branch information
ex3ndr committed Nov 26, 2023
1 parent 2cebf58 commit 3793194
Show file tree
Hide file tree
Showing 18 changed files with 2,104 additions and 591 deletions.
13 changes: 0 additions & 13 deletions CHANGELOG.md

This file was deleted.

18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,20 @@ Currently Llama Coder supports only Codellama. Model is quantized in different w
* m - slow on MacOS
* g - slow on older NVidia cards (pre 30xx)

## License
## Changelog

MIT
## [0.0.8]
- Improved DeepSeek support and language detection

## [0.0.7]
- Added DeepSeek support
- Ability to change temperature and top p
- Fixed some bugs

## [0.0.6]
- Fix ollama links
- Added more models

## [0.0.4]

- Initial release of Llama Coder
7 changes: 7 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
testPathIgnorePatterns: ["/node_modules/","/out/"],
setupFiles: ['./jest.setup.js']
};
1 change: 1 addition & 0 deletions jest.setup.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
require('dotenv').config();
18 changes: 9 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "llama-coder",
"displayName": "Llama Coder",
"description": "Better and self-hosted Github Copilot replacement",
"version": "0.0.7",
"version": "0.0.8",
"icon": "icon.png",
"publisher": "ex3ndr",
"repository": {
Expand Down Expand Up @@ -93,19 +93,19 @@
"watch": "tsc -watch -p ./",
"pretest": "yarn run compile && yarn run lint",
"lint": "eslint src --ext ts",
"test": "node ./out/test/runTest.js",
"test": "jest",
"package": "vsce package"
},
"devDependencies": {
"@types/vscode": "^1.84.0",
"@types/mocha": "^10.0.3",
"@types/jest": "^29.5.10",
"@types/node": "18.x",
"@types/vscode": "^1.84.0",
"@typescript-eslint/eslint-plugin": "^6.9.0",
"@typescript-eslint/parser": "^6.9.0",
"dotenv": "^16.3.1",
"eslint": "^8.52.0",
"glob": "^10.3.10",
"mocha": "^10.2.0",
"typescript": "^5.2.2",
"@vscode/test-electron": "^2.3.6"
"jest": "^29.7.0",
"ts-jest": "^29.1.1",
"typescript": "^5.2.2"
}
}
}
22 changes: 11 additions & 11 deletions src/prompts/adaptors/adaptPrompt.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
export function adaptPrompt(args: { model: string, prefix: string, suffix: string }): { prompt: string, stop: string[] } {
export function adaptPrompt(args: { model: string, prefix: string, suffix: string | null }): { prompt: string, stop: string[] } {

// Starcoder format
if (args.model.startsWith('deepseek-coder')) {

if (args.suffix.length < 1000) {
return {
prompt: args.prefix,
stop: [`<END>`]
};
}
// Common non FIM mode
if (!args.suffix) {
return {
prompt: args.prefix,
stop: [`<END>`]
};
}

// Starcoder FIM
if (args.model.startsWith('deepseek-coder')) {
return {
prompt: `<|fim▁begin|>${args.prefix}<|fim▁hole|>${args.suffix}<|fim▁end|>`,
stop: [`<|fim▁begin|>`, `<|fim▁hole|>`, `<|fim▁end|>`, `<END>`]
};
}

// Codellama format
// Codellama FIM
return {
prompt: `<PRE> ${args.prefix} <SUF>${args.suffix} <MID>`,
stop: [`<PRE>`, `<SUF>`, `<MID>`, `<END>`]
Expand Down
2 changes: 1 addition & 1 deletion src/prompts/autocomplete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export async function autocomplete(args: {
endpoint: string,
model: string,
prefix: string,
suffix: string,
suffix: string | null,
maxLines: number,
maxTokens: number,
temperature: number,
Expand Down
25 changes: 13 additions & 12 deletions src/prompts/preparePrompt.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
import vscode from 'vscode';
import path from 'path';
import { detectLanguage } from './processors/detectLanguage';
import { fileHeaders } from './processors/fileHeaders';
import { languages } from './processors/languages';

export async function preparePrompt(document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext) {

// Load document text
let text = document.getText();
let offset = document.offsetAt(position);
let prefix = text.slice(0, offset);
let suffix = text.slice(offset);
let suffix: string | null = text.slice(offset);

// Trim suffix
// NOTE: It seems that most neural networks are built have a focus on last characters and we therefore need to trim them to not get weird results.
// TODO: Better solution?
// TODO: Am i right here? What if we would want to generate something that uses something in the end of the file?
if (suffix.length > 256) {
suffix = suffix.slice(0, 256);
// If suffix is too small it is safe to assume that it could be ignored which would allow us to use
// more powerful completition instead of in middle one
if (suffix.length < 256) {
suffix = null;
}

// Add filename and language to prefix
// NOTE: Most networks don't have a concept of filenames and expected language, but we expect that some files in training set has something in title that
// would indicate filename and language
// NOTE: We are building for typescript for now so we can use C-style comments to indicate filename
let filename = path.basename(document.fileName);
let language = document.languageId;
let filenamePrefix = `/* ${language}, filename: ${filename} */`;
prefix = filenamePrefix + '\n' + prefix;
// NOTE: If we can't detect language, we could ignore this since the number of languages that need detection is limited
let language = detectLanguage(document.uri.fsPath, document.languageId);
if (language) {
prefix = fileHeaders(prefix, document.uri.fsPath, languages[language]);
}

return {
prefix,
Expand Down
12 changes: 12 additions & 0 deletions src/prompts/processors/comment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { LanguageDescriptor } from "./languages";

export function comment(text: string, language: LanguageDescriptor): string | null {
if (language.comment) {
if (language.comment.end) {
return `${language.comment.start} ${text} ${language.comment.end}`;
} else {
return `${language.comment.start} ${text}`;
}
}
return null;
}
7 changes: 7 additions & 0 deletions src/prompts/processors/detectLanguage.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { detectLanguage } from './detectLanguage';

describe('detectLanguage', () => {
it('should detect language from happy path', () => {

});
});
42 changes: 35 additions & 7 deletions src/prompts/processors/detectLanguage.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,38 @@
// import path from 'path';
import path from 'path';
import { Language, languages } from './languages';

// let languages: { [key: string]: {} } = {
let aliases: { [key: string]: Language } = {
'typescriptreact': 'typescript',
'javascriptreact': 'javascript',
'jsx': 'javascript'
};

// };
export function detectLanguage(uri: string, languageId: string | null): Language | null {

// export function fileHeaderProcessor(uri: string, languageId: string): string | null {
// let basename = path.basename(uri);
// let extname =
// }
// Resolve aliases
if (!!languageId && aliases[languageId]) {
return aliases[languageId];
}

// Resolve using language id
if (!!languageId && !!languages[languageId as Language]) {
return languageId as Language;
}

// Resolve using filename and extension
let basename = path.basename(uri);
let extname = path.extname(basename).toLowerCase();

// Check extensions
for (let lang in languages) {
let k = languages[lang as Language];
for (let ex of k.extensions) {
if (extname === ex) {
return lang as Language;
}
}
}

// Return result
return null;
}
21 changes: 21 additions & 0 deletions src/prompts/processors/fileHeaders.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { comment } from "./comment";
import { LanguageDescriptor } from "./languages";

export function fileHeaders(content: string, uri: string, language: LanguageDescriptor | null) {
let res = content;
if (language) {

// Add path marker
let pathMarker = comment('Path: ' + uri, language);
if (pathMarker) {
res = pathMarker + '\n' + res;
}

// Add language marker
let typeMarker = comment('Language: ' + language.name, language);
if (typeMarker) {
res = typeMarker + '\n' + res;
}
}
return res;
}
Loading

0 comments on commit 3793194

Please sign in to comment.