Skip to content

Commit

Permalink
input for chunk size and chunk overlap
Browse files Browse the repository at this point in the history
  • Loading branch information
dissorial committed May 22, 2023
1 parent a6a3a80 commit ec2104d
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 16 deletions.
1 change: 0 additions & 1 deletion components/main/MessageList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ function MessageList({
userImage,
userName,
}: MessageListProps) {
console.log(messages);
return (
<>
<div className="overflow-y-auto">
Expand Down
80 changes: 80 additions & 0 deletions components/other/ChunkSizeModal.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { Fragment } from 'react';
import { Dialog, Transition } from '@headlessui/react';

type Props = {
open: boolean;
setOpen: React.Dispatch<React.SetStateAction<boolean>>;
};

function ChunkSizeModal({ open, setOpen }: Props) {
return (
<Transition.Root show={open} as={Fragment}>
<Dialog as="div" className="relative z-10" onClose={setOpen}>
<Transition.Child
as={Fragment}
enter="ease-out duration-300"
enterFrom="opacity-0"
enterTo="opacity-100"
leave="ease-in duration-200"
leaveFrom="opacity-100"
leaveTo="opacity-0"
>
<div className="fixed inset-0 bg-gray-800 bg-opacity-75 transition-opacity" />
</Transition.Child>

<div className="fixed inset-0 z-10 overflow-y-auto">
<div className="flex min-h-full items-end justify-center p-4 text-center sm:items-center sm:p-0">
<Transition.Child
as={Fragment}
enter="ease-out duration-300"
enterFrom="opacity-0 translate-y-4 sm:translate-y-0 sm:scale-95"
enterTo="opacity-100 translate-y-0 sm:scale-100"
leave="ease-in duration-200"
leaveFrom="opacity-100 translate-y-0 sm:scale-100"
leaveTo="opacity-0 translate-y-4 sm:translate-y-0 sm:scale-95"
>
<Dialog.Panel className="relative transform overflow-hidden rounded-lg bg-white px-4 pb-4 pt-5 text-left shadow-xl transition-all sm:my-8 sm:w-full sm:max-w-sm sm:p-6">
<div>
<div className="mt-3 text-center sm:mt-5">
<Dialog.Title
as="h3"
className="text-base font-semibold leading-6 text-gray-900"
>
Chunk size
</Dialog.Title>
<div className="mt-2">
<p className="text-sm text-gray-500">
Values in the range of 500-1,200 tokens are suitable for
average-length documents (a few pages) and strike a
balance between capturing context and computational
efficiency. The default is set to 1,200 tokens, which is
a good starting point for most use cases.
<br />
<br />
Values in the range of 2,000-4,000 tokens are suitable
for long documents (10+ pages). Longer documents are
more computationally expensive, but the extra context
may result in better responses.
</p>
</div>
</div>
</div>
<div className="mt-5 sm:mt-6">
<button
type="button"
className="inline-flex w-full justify-center rounded-md bg-indigo-600 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-indigo-500 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
onClick={() => setOpen(false)}
>
Go back to dashboard
</button>
</div>
</Dialog.Panel>
</Transition.Child>
</div>
</div>
</Dialog>
</Transition.Root>
);
}

export default ChunkSizeModal;
79 changes: 79 additions & 0 deletions components/other/OverlapSizeModal.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { Fragment } from 'react';
import { Dialog, Transition } from '@headlessui/react';

type Props = {
open: boolean;
setOpen: React.Dispatch<React.SetStateAction<boolean>>;
};

function OverlapSizeModal({ open, setOpen }: Props) {
return (
<Transition.Root show={open} as={Fragment}>
<Dialog as="div" className="relative z-10" onClose={setOpen}>
<Transition.Child
as={Fragment}
enter="ease-out duration-300"
enterFrom="opacity-0"
enterTo="opacity-100"
leave="ease-in duration-200"
leaveFrom="opacity-100"
leaveTo="opacity-0"
>
<div className="fixed inset-0 bg-gray-800 bg-opacity-75 transition-opacity" />
</Transition.Child>

<div className="fixed inset-0 z-10 overflow-y-auto">
<div className="flex min-h-full items-end justify-center p-4 text-center sm:items-center sm:p-0">
<Transition.Child
as={Fragment}
enter="ease-out duration-300"
enterFrom="opacity-0 translate-y-4 sm:translate-y-0 sm:scale-95"
enterTo="opacity-100 translate-y-0 sm:scale-100"
leave="ease-in duration-200"
leaveFrom="opacity-100 translate-y-0 sm:scale-100"
leaveTo="opacity-0 translate-y-4 sm:translate-y-0 sm:scale-95"
>
<Dialog.Panel className="relative transform overflow-hidden rounded-lg bg-white px-4 pb-4 pt-5 text-left shadow-xl transition-all sm:my-8 sm:w-full sm:max-w-sm sm:p-6">
<div>
<div className="mt-3 text-center sm:mt-5">
<Dialog.Title
as="h3"
className="text-base font-semibold leading-6 text-gray-900"
>
Overlap size
</Dialog.Title>
<div className="mt-2">
<p className="text-sm text-gray-500">
Regardless of the chunk size, an overlap of 10-20% is
generally recommended to capture contextual information
at the boundaries.
<br />
<br />
If your texts contain important contextual information
at the boundaries, or if you want to capture
dependencies between adjacent chunks more
comprehensively, you can increase the overlap size to
30-50% of the chunk size.
</p>
</div>
</div>
</div>
<div className="mt-5 sm:mt-6">
<button
type="button"
className="inline-flex w-full justify-center rounded-md bg-indigo-600 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-indigo-500 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
onClick={() => setOpen(false)}
>
Dismiss
</button>
</div>
</Dialog.Panel>
</Transition.Child>
</div>
</div>
</Dialog>
</Transition.Root>
);
}

export default OverlapSizeModal;
19 changes: 12 additions & 7 deletions pages/api/consume.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,24 @@ export default async function handler(
req: NextApiRequest,
res: NextApiResponse,
) {
const { namespaceName, userEmail } = req.query;
const { namespaceName, userEmail, chunkSize, overlapSize } = req.query;

const PINECONE_INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? '';

try {
await connectDB();

// Create a new namespace with the given name and user email
const newNamespace = new Namespace({
userEmail: userEmail as string,
const existingNamespace = await Namespace.findOne({
name: namespaceName as string,
});
await newNamespace.save();

if (!existingNamespace) {
const newNamespace = new Namespace({
userEmail: userEmail as string,
name: namespaceName as string,
});
await newNamespace.save();
}

// Load PDF files from the specified directory
const directoryLoader = new DirectoryLoader(filePath, {
Expand All @@ -42,8 +47,8 @@ export default async function handler(

// Split the PDF documents into smaller chunks
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1200,
chunkOverlap: 200,
chunkSize: Number(chunkSize),
chunkOverlap: Number(overlapSize),
});

const docs = await textSplitter.splitDocuments(rawDocs);
Expand Down
109 changes: 101 additions & 8 deletions pages/settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ import React, { useState, useEffect, useCallback } from 'react';
import { useRouter } from 'next/router';
import { useSession } from 'next-auth/react';
import { useDropzone } from 'react-dropzone';
import ChunkSizeModal from '@/components/other/ChunkSizeModal';
import OverlapSizeModal from '@/components/other/OverlapSizeModal';
import { QuestionMarkCircleIcon } from '@heroicons/react/20/solid';

export default function Settings() {
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
Expand All @@ -12,6 +15,12 @@ export default function Settings() {
const [uploadMessage, setUploadMessage] = useState<string>('');
const [error, setError] = useState<string>('');
const [namespaces, setNamespaces] = useState<string[]>([]);
const [chunkSize, setChunkSize] = useState<number>(1200);
const [overlapSize, setOverlapSize] = useState<number>(20);
const [selectedNamespace, setSelectedNamespace] = useState<string>('');
const [showChunkSizeModal, setShowChunkSizeModal] = useState<boolean>(false);
const [showOverlapSizeModal, setShowOverlapSizeModal] =
useState<boolean>(false);
const router = useRouter();

const { data: session, status } = useSession({
Expand Down Expand Up @@ -104,7 +113,7 @@ export default function Settings() {
setLoading(true);

const response = await fetch(
`/api/consume?namespaceName=${namespaceName}&userEmail=${userEmail}`,
`/api/consume?namespaceName=${namespaceName}&userEmail=${userEmail}&chunkSize=${chunkSize}&overlapSize=${overlapSize}`,
{
method: 'POST',
},
Expand Down Expand Up @@ -203,7 +212,7 @@ export default function Settings() {
</span>
)}

<ul role="list" className="space-y-4">
<ul role="list" className="grid grid-cols-2 gap-4">
{namespaces.map((namespace) => (
<li
key={namespace}
Expand All @@ -215,12 +224,29 @@ export default function Settings() {
</p>
</div>
<div className="flex-shrink-0">
<button
className="inline-flex items-center px-3 py-2 border border-transparent text-sm leading-4 font-medium rounded-md shadow-sm text-white bg-red-600 hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
onClick={() => handleDelete(namespace)}
>
Delete
</button>
{selectedNamespace === namespace ? (
<>
<button
className="inline-flex items-center px-3 py-2 border border-transparent text-sm leading-4 font-medium rounded-md shadow-sm text-white bg-red-600 hover:bg-red-700 focus:ring-indigo-500"
onClick={() => handleDelete(selectedNamespace)}
>
Confirm Delete
</button>
<button
className="ml-2 inline-flex items-center px-3 py-2 border border-transparent text-sm leading-4 font-medium rounded-md shadow-sm text-gray-300 hover:text-gray-400 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
onClick={() => setSelectedNamespace('')}
>
Cancel
</button>
</>
) : (
<button
className="inline-flex items-center px-3 py-2 border border-transparent text-sm leading-4 font-medium rounded-md shadow-sm text-white bg-red-600 hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
onClick={() => setSelectedNamespace(namespace)}
>
Delete
</button>
)}
</div>
</li>
))}
Expand Down Expand Up @@ -252,6 +278,7 @@ export default function Settings() {
Treat namespaces like topics of conversation. You can create as
many as you like, and they can be used to organize your data.
</p>

<div
className="mt-4 sm:mt-8 flex justify-center"
{...getRootProps()}
Expand Down Expand Up @@ -294,6 +321,71 @@ export default function Settings() {
{uploadMessage ? uploadMessage : 'Upload files'}
</button>
</div>
<div>
<div className="flex items-center">
<label
htmlFor="chunkSize"
className="block text-sm font-medium leading-6 text-gray-300"
>
Chunk size
</label>
<QuestionMarkCircleIcon
className="ml-2 h-5 w-5 text-gray-300 hover:text-gray-400 cursor-pointer"
onClick={() => setShowChunkSizeModal(true)}
/>
</div>

<div className="w-full">
<input
type="range"
min={100}
max={4000}
step={100}
value={chunkSize}
onChange={(e) => setChunkSize(Number(e.target.value))}
className="w-full"
/>

<div className="text-center text-gray-100">{chunkSize}</div>
</div>
</div>

<ChunkSizeModal
open={showChunkSizeModal}
setOpen={setShowChunkSizeModal}
/>
<div>
<div className="flex items-center">
<label
htmlFor="overlapSize"
className="block text-sm font-medium leading-6 text-gray-300"
>
Overlap size
</label>
<QuestionMarkCircleIcon
className="ml-2 h-5 w-5 text-gray-300 cursor-pointer hover:text-gray-400"
onClick={() => setShowOverlapSizeModal(true)}
/>
</div>

<div className="w-full">
<input
type="range"
min={0}
max={50}
step={5}
value={overlapSize}
onChange={(e) => setOverlapSize(Number(e.target.value))}
className="w-full"
/>
<div className="text-center text-gray-100">{overlapSize}%</div>
</div>
</div>
<OverlapSizeModal
open={showOverlapSizeModal}
setOpen={setShowOverlapSizeModal}
/>

{uploadMessage && (
<div className="mt-4 sm:mt-8 grid grid-cols-1 gap-x-4 sm:gap-x-8 gap-y-4 sm:gap-y-6 sm:grid-cols-2">
<div className="sm:col-span-2">
Expand All @@ -303,6 +395,7 @@ export default function Settings() {
>
Namespace name
</label>

<div className="mt-2.5">
<input
type="text"
Expand Down

1 comment on commit ec2104d

@vercel
Copy link

@vercel vercel bot commented on ec2104d May 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.