Skip to content

Commit

Permalink
implement support for notebooks
Browse files Browse the repository at this point in the history
- reads all cells (with their outputs) and uses it as prompt
- markdown is handled as the language comment
- only text output is used (also handled  as the language comment)
  • Loading branch information
sahandevs committed Feb 5, 2024
1 parent 9288c45 commit c36cdff
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/prompts/filter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type vscode from 'vscode';

export function isSupported(doc: vscode.TextDocument) {
return doc.uri.scheme === 'file';
return doc.uri.scheme === 'file' || doc.uri.scheme === 'vscode-notebook-cell';
}

export function isNotNeeded(doc: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext): boolean {
Expand Down
71 changes: 70 additions & 1 deletion src/prompts/preparePrompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,84 @@ import { detectLanguage } from './processors/detectLanguage';
import { fileHeaders } from './processors/fileHeaders';
import { languages } from './processors/languages';

var decoder = new TextDecoder("utf8");

function getNotebookDocument(document: vscode.TextDocument): vscode.NotebookDocument | undefined {
return vscode.workspace.notebookDocuments
.find(x => x.uri.path === document.uri.path);
}

export async function preparePrompt(document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext) {

// Load document text
console.log(document);
let text = document.getText();
let offset = document.offsetAt(position);
let prefix = text.slice(0, offset);
let suffix: string = text.slice(offset);

// If this is a notebook, add the surrounding cells to the prefix and suffix
let notebookDocument = getNotebookDocument(document);
let language = detectLanguage(document.uri.fsPath, document.languageId);
let commentStart: string | undefined = undefined;
if (language) {
commentStart = languages[language].comment?.start;
}

if (notebookDocument) {
let beforeCurrentCell = true;

let prefixCells = "";
let suffixCells = "";

notebookDocument.getCells().forEach((cell) => {
let out = "";

if (cell.document.uri.fragment === document.uri.fragment) {
beforeCurrentCell = false; // switch to suffix mode
return;
}

// add the markdown cell output to the prompt as a comment
if (cell.kind === vscode.NotebookCellKind.Markup && commentStart) {
for (const line of cell.document.getText().split('\n')) {
out += `\n${commentStart}${line}`;
}
} else {
out += cell.document.getText();
}

// if there is any outputs add them to the prompt as a comment
if (cell.kind === vscode.NotebookCellKind.Code && commentStart) {
console.log(cell.outputs);
let cellOutputs = cell.outputs
.map(x => x.items
.filter(x => x.mime === 'text/plain')
.map(x => decoder.decode(x.data))
.map(x => x.slice(0, 256).split('\n'))) // limit to 256 characters
.flat(3);

if (cellOutputs.length > 0) {
out += `\n${commentStart}Output:`;
for (const line of cellOutputs) {
out += `\n${commentStart}${line}`;
}
}
}

// update the prefix/suffix
if (beforeCurrentCell) {
prefixCells += out;
} else {
suffixCells += out;
}

});

prefix = prefixCells + prefix;
suffix = suffix + suffixCells;
}

// Trim suffix
// If suffix is too small it is safe to assume that it could be ignored which would allow us to use
// more powerful completition instead of in middle one
Expand All @@ -22,7 +92,6 @@ export async function preparePrompt(document: vscode.TextDocument, position: vsc
// NOTE: Most networks don't have a concept of filenames and expected language, but we expect that some files in training set has something in title that
// would indicate filename and language
// NOTE: If we can't detect language, we could ignore this since the number of languages that need detection is limited
let language = detectLanguage(document.uri.fsPath, document.languageId);
if (language) {
prefix = fileHeaders(prefix, document.uri.fsPath, languages[language]);
}
Expand Down
2 changes: 1 addition & 1 deletion src/prompts/processors/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ export const languages: { [key in Language]: LanguageDescriptor } = {
},
python: {
name: 'Python',
extensions: ['.py'],
extensions: ['.py', 'ipynb'],
comment: { start: '#' }
},
c: {
Expand Down

0 comments on commit c36cdff

Please sign in to comment.