ChatGPTBox-dev · wassname · Oct 29, 2024 · Oct 29, 2024 · Apr 30, 2025 · PeterDaveHello
diff --git a/src/config/index.mjs b/src/config/index.mjs
@@ -332,7 +332,7 @@ export const defaultConfig = {
 
   // advanced
 
-  maxResponseTokenLength: 1000,
+  maxResponseTokenLength: 200000,
   maxConversationContextLength: 9,
   temperature: 1,
   customChatGptWebApiUrl: 'https://chatgpt.com',

diff --git a/src/content-script/site-adapters/youtube/index.mjs b/src/content-script/site-adapters/youtube/index.mjs
@@ -48,14 +48,14 @@ export default {
       let subtitleContent = ''
       while (subtitleData.indexOf('">') !== -1) {
         subtitleData = subtitleData.substring(subtitleData.indexOf('">') + 2)
-        subtitleContent += subtitleData.substring(0, subtitleData.indexOf('<')) + ','
+        subtitleContent += subtitleData.substring(0, subtitleData.indexOf('<')) + '\n'
       }
 
       subtitleContent = replaceHtmlEntities(subtitleContent)
 
       return await cropText(
-        `Provide a structured summary of the following video in markdown format, focusing on key takeaways and crucial information, and ensuring to include the video title. The summary should be easy to read and concise, yet comprehensive.` +
-          `The video title is "${title}". The subtitle content is as follows:\n${subtitleContent}`,
+        `Provide a structured summary of the content of the following video in markdown format, focusing on key takeaways and crucial information for the viewer Gwern, and ensure to include the video title and completeness of transcript if needed. Ignore promotions, bio's, and other uninteresting parts. The summary should be easy to read and concise, yet comprehensive. You should include key text as markdown quotes after tidying them up.` +
+          `The video title is "${title}". Add a tldr and BLUF. The subtitle content is as follows:\n${subtitleContent}`,
       )
     } catch (e) {
       console.log(e)

diff --git a/src/utils/crop-text.mjs b/src/utils/crop-text.mjs
@@ -28,11 +28,12 @@ const clamp = (v, min, max) => {
   return Math.min(Math.max(v, min), max)
 }
 
+/** this function will crop text by keeping the beginning and end */
 export async function cropText(
   text,
-  maxLength = 4000,
-  startLength = 400,
-  endLength = 300,
+  maxLength = 200000,
+  startLength = 0,
+  endLength = 0,
   tiktoken = true,
 ) {
   const userConfig = await getUserConfig()
@@ -41,54 +42,69 @@ export async function cropText(
     null,
     userConfig.customModelName,
   ).match(/[- (]*([0-9]+)k/)?.[1]
+
+  // for maxlength prefer modelLimit > userLimit > default
   if (k) {
+    // if we have the models exact content limit use that
     maxLength = Number(k) * 1000
-    maxLength -= 100 + clamp(userConfig.maxResponseTokenLength, 1, maxLength - 1000)
+  } else if (userConfig.maxResponseTokenLength) {
+    // if we don't have the models exact content limit use the default
+    maxLength = userConfig.maxResponseTokenLength
   } else {
-    maxLength -= 100 + clamp(userConfig.maxResponseTokenLength, 1, maxLength - 1000)
+    // if we don't have the models exact content limit use the default
   }
 
-  const splits = text.split(/[,，。?？!！;；]/).map((s) => s.trim())
-  const splitsLength = splits.map((s) => (tiktoken ? encode(s).length : s.length))
-  const length = splitsLength.reduce((sum, length) => sum + length, 0)
+  if (userConfig.maxResponseTokenLength) {
+    maxLength = clamp(maxLength, 1, userConfig.maxResponseTokenLength)
+  }
+  maxLength -= 100 // give some buffer
 
-  const cropLength = length - startLength - endLength
+  const splits = text.split(/[,，。?？!！;；\n]/).map((s) => s.trim())
+  const splitsLength = splits.map((s) => (tiktoken ? encode(s).length : s.length))
   const cropTargetLength = maxLength - startLength - endLength
-  const cropPercentage = cropTargetLength / cropLength
-  const cropStep = Math.max(0, 1 / cropPercentage - 1)
-
-  if (cropStep === 0) return text
 
+  let firstHalfTokens = 0
+  let secondHalfTokens = 0
+  const halfTargetTokens = Math.floor(cropTargetLength / 2)
+  let middleIndex = -1
+  let endStartIndex = splits.length
+  let totalTokens = splitsLength.reduce((sum, length) => sum + length + 1, 0)
+  let croppedTokens = 0
   let croppedText = ''
   let currentLength = 0
-  let currentIndex = 0
-  let currentStep = 0
 
-  for (; currentIndex < splits.length; currentIndex++) {
-    if (currentLength + splitsLength[currentIndex] + 1 <= startLength) {
-      croppedText += splits[currentIndex] + ','
-      currentLength += splitsLength[currentIndex] + 1
-    } else if (currentLength + splitsLength[currentIndex] + 1 + endLength <= maxLength) {
-      if (currentStep < cropStep) {
-        currentStep++
-      } else {
-        croppedText += splits[currentIndex] + ','
-        currentLength += splitsLength[currentIndex] + 1
-        currentStep = currentStep - cropStep
-      }
+  // First pass: find the middle
+  for (let i = 0; i < splits.length; i++) {
+    if (firstHalfTokens < halfTargetTokens) {
+      firstHalfTokens += splitsLength[i] + 1
     } else {
+      middleIndex = i
       break
     }
   }
 
-  let endPart = ''
-  let endPartLength = 0
-  for (let i = splits.length - 1; endPartLength + splitsLength[i] <= endLength; i--) {
-    endPart = splits[i] + ',' + endPart
-    endPartLength += splitsLength[i] + 1
+  // Second pass: find the start of the end section
+  for (let i = splits.length - 1; i >= middleIndex; i--) {
+    secondHalfTokens += splitsLength[i] + 1
+    if (secondHalfTokens >= halfTargetTokens) {
+      endStartIndex = i
+      break
+    }
   }
-  currentLength += endPartLength
-  croppedText += endPart
+
+  // Calculate cropped tokens
+  croppedTokens = totalTokens - firstHalfTokens - secondHalfTokens
+
+  // Construct the cropped text
+  croppedText = splits.slice(0, middleIndex).join('\n')
+  if (croppedTokens > 0) {
+    croppedText += `\n\n**Important disclaimer**, this text is incomplete! ${croppedTokens} or ${
+      (croppedTokens / totalTokens).toFixed(2) * 100
-      (croppedTokens / totalTokens).toFixed(2) * 100
+      Number((croppedTokens / totalTokens).toFixed(2)) * 100
-      (croppedTokens / totalTokens).toFixed(2) * 100
+      Number((croppedTokens / totalTokens).toFixed(2)) * 100
+    }% of tokens have been removed from this location in the text due to lack limited model context of ${maxLength}\n\n`
+  }
+  croppedText += splits.slice(endStartIndex).join('\n')
+
+  currentLength = firstHalfTokens + secondHalfTokens + (middleIndex !== endStartIndex ? 20 : 0) // 20 is approx the length of the disclaimer
 
   console.log(
     `input maxLength: ${maxLength}\n` +