From 7068024f7f15f89d35ea27e9ad7cc3f8ddfb9ab3 Mon Sep 17 00:00:00 2001 From: hiroki osame Date: Sun, 26 Mar 2023 22:41:36 -0400 Subject: [PATCH] fix: tokenize with `gpt-3.5-turbo` model (#173) --- package.json | 2 +- pnpm-lock.yaml | 8 ++++---- src/utils/openai.ts | 4 +--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index a57c0ab0..e1fd30be 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "*.ts": "eslint --cache" }, "dependencies": { - "@dqbd/tiktoken": "^0.4.0" + "@dqbd/tiktoken": "^1.0.2" }, "devDependencies": { "@clack/prompts": "^0.6.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0be6c5e6..16aec88a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -7,7 +7,7 @@ patchedDependencies: specifiers: '@clack/prompts': ^0.6.1 - '@dqbd/tiktoken': ^0.4.0 + '@dqbd/tiktoken': ^1.0.2 '@pvtnbr/eslint-config': ^0.33.0 '@types/ini': ^1.3.31 '@types/inquirer': ^9.0.3 @@ -28,7 +28,7 @@ specifiers: typescript: ^4.9.5 dependencies: - '@dqbd/tiktoken': 0.4.0 + '@dqbd/tiktoken': 1.0.2 devDependencies: '@clack/prompts': 0.6.1_seqcoud6rtee7vmn7zfu7zbwcy @@ -92,8 +92,8 @@ packages: - is-unicode-supported patched: true - /@dqbd/tiktoken/0.4.0: - resolution: {integrity: sha512-iaHgmwKAOqowBFZKxelyszoeGLoNw62eOULcmyme1aA1Ymr3JgYl0V7jwpuUm7fksalycZajx3loFn9TRUaviw==} + /@dqbd/tiktoken/1.0.2: + resolution: {integrity: sha512-AjGTBRWsMoVmVeN55NLyupyM8TNamOUBl6tj5t/leLDVup3CFGO9tVagNL1jf3GyZLkWZSTmYVbPQ/M2LEcNzw==} dev: false /@esbuild-kit/cjs-loader/2.4.2: diff --git a/src/utils/openai.ts b/src/utils/openai.ts index 16966ade..17cff903 100644 --- a/src/utils/openai.ts +++ b/src/utils/openai.ts @@ -91,8 +91,6 @@ const deduplicateMessages = (array: string[]) => Array.from(new Set(array)); const getPrompt = (locale: string, diff: string) => `Write an insightful but concise Git commit message in a complete sentence in present tense for the following diff without prefacing it with anything, the response must be in the language ${locale}:\n${diff}`; const model = 'gpt-3.5-turbo'; -// TODO: update for the new gpt-3.5 model -const encoder = encodingForModel('text-davinci-003'); export const generateCommitMessage = async ( apiKey: string, @@ -106,7 +104,7 @@ export const generateCommitMessage = async ( * text-davinci-003 has a token limit of 4000 * https://platform.openai.com/docs/models/overview#:~:text=to%20Sep%202021-,text%2Ddavinci%2D003,-Can%20do%20any */ - if (encoder.encode(prompt).length > 4000) { + if (encodingForModel(model).encode(prompt).length > 4000) { throw new KnownError('The diff is too large for the OpenAI API. Try reducing the number of staged changes, or write your own commit message.'); }