forked from lobehub/lobe-chat
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
⚡️ perf: support latex chunking (lobehub#3592)
* 💄 style: support latex chunking * ✅ test: fix test
- Loading branch information
Showing
6 changed files
with
352 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
205 changes: 205 additions & 0 deletions
205
src/libs/langchain/loaders/latex/__tests__/__snapshots__/index.test.ts.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html | ||
|
||
exports[`LatexLoader > should run 1`] = ` | ||
[ | ||
Document { | ||
"id": undefined, | ||
"metadata": { | ||
"loc": { | ||
"lines": { | ||
"from": 1, | ||
"to": 41, | ||
}, | ||
}, | ||
}, | ||
"pageContent": "\\documentclass{article} | ||
\\usepackage{graphicx} % Required for inserting images | ||
\\usepackage{amsmath} % Required for mathematical symbols | ||
\\usepackage{hyperref} % For hyperlinks | ||
\\title{Sample LaTeX Document} | ||
\\author{Generated by ChatGPT} | ||
\\date{\\today} | ||
\\begin{document} | ||
\\maketitle | ||
\\tableofcontents | ||
\\section{Introduction} | ||
This is a sample LaTeX document that includes various common elements such as sections, lists, tables, figures, and mathematical equations. | ||
\\section{Lists} | ||
\\subsection{Itemized List} | ||
\\begin{itemize} | ||
\\item First item | ||
\\item Second item | ||
\\item Third item | ||
\\end{itemize} | ||
\\subsection{Enumerated List} | ||
\\begin{enumerate} | ||
\\item First item | ||
\\item Second item | ||
\\item Third item | ||
\\end{enumerate}", | ||
}, | ||
Document { | ||
"id": undefined, | ||
"metadata": { | ||
"loc": { | ||
"lines": { | ||
"from": 27, | ||
"to": 61, | ||
}, | ||
}, | ||
}, | ||
"pageContent": "\\section{Lists} | ||
\\subsection{Itemized List} | ||
\\begin{itemize} | ||
\\item First item | ||
\\item Second item | ||
\\item Third item | ||
\\end{itemize} | ||
\\subsection{Enumerated List} | ||
\\begin{enumerate} | ||
\\item First item | ||
\\item Second item | ||
\\item Third item | ||
\\end{enumerate} | ||
\\section{Mathematical Equations} | ||
Here are some sample mathematical equations: | ||
\\subsection{Inline Equation} | ||
This is an inline equation: \\( E = mc^2 \\). | ||
\\subsection{Displayed Equations} | ||
\\begin{equation} | ||
a^2 + b^2 = c^2 | ||
\\end{equation} | ||
\\begin{align} | ||
x &= y + z \\\\ | ||
y &= mx + b | ||
\\end{align}", | ||
}, | ||
Document { | ||
"id": undefined, | ||
"metadata": { | ||
"loc": { | ||
"lines": { | ||
"from": 44, | ||
"to": 93, | ||
}, | ||
}, | ||
}, | ||
"pageContent": "\\section{Mathematical Equations} | ||
Here are some sample mathematical equations: | ||
\\subsection{Inline Equation} | ||
This is an inline equation: \\( E = mc^2 \\). | ||
\\subsection{Displayed Equations} | ||
\\begin{equation} | ||
a^2 + b^2 = c^2 | ||
\\end{equation} | ||
\\begin{align} | ||
x &= y + z \\\\ | ||
y &= mx + b | ||
\\end{align} | ||
\\section{Tables} | ||
Here is a sample table: | ||
\\begin{table}[h!] | ||
\\centering | ||
\\begin{tabular}{|c|c|c|} | ||
\\hline | ||
Header 1 & Header 2 & Header 3 \\\\ | ||
\\hline | ||
Data 1 & Data 2 & Data 3 \\\\ | ||
Data 4 & Data 5 & Data 6 \\\\ | ||
Data 7 & Data 8 & Data 9 \\\\ | ||
\\hline | ||
\\end{tabular} | ||
\\caption{Sample Table} | ||
\\label{table:1} | ||
\\end{table} | ||
\\section{Figures} | ||
Here is a sample figure: | ||
\\begin{figure}[h!] | ||
\\centering | ||
\\includegraphics[width=0.5\\textwidth]{example-image} | ||
\\caption{Sample Figure} | ||
\\label{fig:1} | ||
\\end{figure}", | ||
}, | ||
Document { | ||
"id": undefined, | ||
"metadata": { | ||
"loc": { | ||
"lines": { | ||
"from": 84, | ||
"to": 112, | ||
}, | ||
}, | ||
}, | ||
"pageContent": "\\section{Figures} | ||
Here is a sample figure: | ||
\\begin{figure}[h!] | ||
\\centering | ||
\\includegraphics[width=0.5\\textwidth]{example-image} | ||
\\caption{Sample Figure} | ||
\\label{fig:1} | ||
\\end{figure} | ||
\\section{Sections and Subsections} | ||
This is an example of a section with subsections. | ||
\\subsection{Subsection 1} | ||
Content of subsection 1. | ||
\\subsection{Subsection 2} | ||
Content of subsection 2. | ||
\\section{References} | ||
Here is a reference to the table \\ref{table:1} and the figure \\ref{fig:1}. | ||
\\end{document}", | ||
}, | ||
] | ||
`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
\documentclass{article} | ||
|
||
|
||
\usepackage{graphicx} % Required for inserting images | ||
\usepackage{amsmath} % Required for mathematical symbols | ||
\usepackage{hyperref} % For hyperlinks | ||
|
||
|
||
\title{Sample LaTeX Document} | ||
\author{Generated by ChatGPT} | ||
\date{\today} | ||
|
||
|
||
\begin{document} | ||
|
||
|
||
\maketitle | ||
|
||
|
||
\tableofcontents | ||
|
||
|
||
\section{Introduction} | ||
This is a sample LaTeX document that includes various common elements such as sections, lists, tables, figures, and mathematical equations. | ||
|
||
|
||
\section{Lists} | ||
\subsection{Itemized List} | ||
\begin{itemize} | ||
\item First item | ||
\item Second item | ||
\item Third item | ||
\end{itemize} | ||
|
||
|
||
\subsection{Enumerated List} | ||
\begin{enumerate} | ||
\item First item | ||
\item Second item | ||
\item Third item | ||
\end{enumerate} | ||
|
||
|
||
\section{Mathematical Equations} | ||
Here are some sample mathematical equations: | ||
|
||
|
||
\subsection{Inline Equation} | ||
This is an inline equation: \( E = mc^2 \). | ||
|
||
|
||
\subsection{Displayed Equations} | ||
\begin{equation} | ||
a^2 + b^2 = c^2 | ||
\end{equation} | ||
|
||
|
||
\begin{align} | ||
x &= y + z \\ | ||
y &= mx + b | ||
\end{align} | ||
|
||
|
||
\section{Tables} | ||
Here is a sample table: | ||
|
||
|
||
\begin{table}[h!] | ||
\centering | ||
\begin{tabular}{|c|c|c|} | ||
\hline | ||
Header 1 & Header 2 & Header 3 \\ | ||
\hline | ||
Data 1 & Data 2 & Data 3 \\ | ||
Data 4 & Data 5 & Data 6 \\ | ||
Data 7 & Data 8 & Data 9 \\ | ||
\hline | ||
\end{tabular} | ||
\caption{Sample Table} | ||
\label{table:1} | ||
\end{table} | ||
|
||
|
||
\section{Figures} | ||
Here is a sample figure: | ||
|
||
|
||
\begin{figure}[h!] | ||
\centering | ||
\includegraphics[width=0.5\textwidth]{example-image} | ||
\caption{Sample Figure} | ||
\label{fig:1} | ||
\end{figure} | ||
|
||
|
||
\section{Sections and Subsections} | ||
This is an example of a section with subsections. | ||
|
||
|
||
\subsection{Subsection 1} | ||
Content of subsection 1. | ||
|
||
|
||
\subsection{Subsection 2} | ||
Content of subsection 2. | ||
|
||
|
||
\section{References} | ||
Here is a reference to the table \ref{table:1} and the figure \ref{fig:1}. | ||
|
||
|
||
\end{document} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// @vitest-environment node | ||
import * as fs from 'node:fs'; | ||
import { join } from 'node:path'; | ||
import { expect } from 'vitest'; | ||
|
||
import { LatexLoader } from '../index'; | ||
|
||
describe('LatexLoader', () => { | ||
it('should run', async () => { | ||
const content = fs.readFileSync(join(__dirname, `./demo.tex`), 'utf-8'); | ||
|
||
const data = await LatexLoader(content); | ||
|
||
expect(data).toMatchSnapshot(); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import { LatexTextSplitter } from 'langchain/text_splitter'; | ||
|
||
import { loaderConfig } from '../config'; | ||
|
||
export const LatexLoader = async (text: string) => { | ||
const splitter = new LatexTextSplitter(loaderConfig); | ||
|
||
return await splitter.createDocuments([text]); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
export type LangChainLoaderType = 'code' | 'ppt' | 'pdf' | 'markdown' | 'doc' | 'text'; | ||
export type LangChainLoaderType = 'code' | 'ppt' | 'pdf' | 'markdown' | 'doc' | 'text' | 'latex'; |