From dfbdd1732e034983c21e32cd3c487feac5659515 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 24 Aug 2024 01:15:27 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20remove=20orphan=20chunks?= =?UTF-8?q?=20if=20there=20is=20no=20related=20file=20(#3578)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✅ test: add more tests * 🐛 fix: add `DISABLE_REMOVE_GLOBAL_FILE` in dbENV * 🐛 fix: remove orphan chunks if there is related file * ✅ test: fix test * 👷 fix: throw error when not set `APP_URL` at server side * ✅ test: fix test --- .github/workflows/release.yml | 5 +- .github/workflows/test.yml | 3 +- src/config/app.ts | 17 +- src/config/db.ts | 4 + .../server/models/__tests__/chunk.test.ts | 61 +++- .../server/models/__tests__/file.test.ts | 277 +++++++++++++----- .../models/__tests__/knowledgeBase.test.ts | 130 +++++++- .../server/models/__tests__/user.test.ts | 37 ++- src/database/server/models/chunk.ts | 15 +- src/database/server/models/file.ts | 6 +- src/server/routers/lambda/file.ts | 5 + 11 files changed, 479 insertions(+), 81 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 877742504add..698f0187ba4f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,7 @@ jobs: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 ports: - 5432:5432 - + steps: - uses: actions/checkout@v4 @@ -40,7 +40,8 @@ jobs: DATABASE_DRIVER: node NEXT_PUBLIC_SERVICE_MODE: server KEY_VAULTS_SECRET: LA7n9k3JdEcbSgml2sxfw+4TV1AzaaFU5+R176aQz4s= - NEXT_PUBLIC_S3_DOMAIN: https://example.com + S3_PUBLIC_DOMAIN: https://example.com + APP_URL: https://home.com - name: Test App Coverage run: bun run test-app:coverage diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b9708bb258d..02b4c5641d23 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,7 +39,8 @@ jobs: DATABASE_DRIVER: node NEXT_PUBLIC_SERVICE_MODE: server KEY_VAULTS_SECRET: LA7n9k3JdEcbSgml2sxfw+4TV1AzaaFU5+R176aQz4s= - NEXT_PUBLIC_S3_DOMAIN: https://example.com + S3_PUBLIC_DOMAIN: https://example.com + APP_URL: https://home.com - name: Upload Server coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/src/config/app.ts b/src/config/app.ts index 3ad2493ea336..27e86ab861e0 100644 --- a/src/config/app.ts +++ b/src/config/app.ts @@ -2,6 +2,8 @@ import { createEnv } from '@t3-oss/env-nextjs'; import { z } from 'zod'; +import { isServerMode } from '@/const/version'; + declare global { // eslint-disable-next-line @typescript-eslint/no-namespace namespace NodeJS { @@ -10,14 +12,19 @@ declare global { } } } +const isInVercel = process.env.VERCEL === '1'; -export const getAppConfig = () => { - const ACCESS_CODES = process.env.ACCESS_CODE?.split(',').filter(Boolean) || []; - const isInVercel = process.env.VERCEL === '1'; +const vercelUrl = `https://${process.env.VERCEL_URL}`; - const vercelUrl = `https://${process.env.VERCEL_URL}`; +const APP_URL = process.env.APP_URL ? process.env.APP_URL : isInVercel ? vercelUrl : undefined; - const APP_URL = process.env.APP_URL ? process.env.APP_URL : isInVercel ? vercelUrl : undefined; +// only throw error in server mode and server side +if (typeof window === 'undefined' && isServerMode && !APP_URL) { + throw new Error('`APP_URL` is required in server mode'); +} + +export const getAppConfig = () => { + const ACCESS_CODES = process.env.ACCESS_CODE?.split(',').filter(Boolean) || []; return createEnv({ client: { diff --git a/src/config/db.ts b/src/config/db.ts index 4c02c3bf0f43..6d6361c22f96 100644 --- a/src/config/db.ts +++ b/src/config/db.ts @@ -11,6 +11,8 @@ export const getServerDBConfig = () => { DATABASE_TEST_URL: process.env.DATABASE_TEST_URL, DATABASE_URL: process.env.DATABASE_URL, + DISABLE_REMOVE_GLOBAL_FILE: process.env.DISABLE_REMOVE_GLOBAL_FILE === '1', + KEY_VAULTS_SECRET: process.env.KEY_VAULTS_SECRET, NEXT_PUBLIC_ENABLED_SERVER_SERVICE: process.env.NEXT_PUBLIC_SERVICE_MODE === 'server', @@ -20,6 +22,8 @@ export const getServerDBConfig = () => { DATABASE_TEST_URL: z.string().optional(), DATABASE_URL: z.string().optional(), + DISABLE_REMOVE_GLOBAL_FILE: z.boolean().optional(), + KEY_VAULTS_SECRET: z.string().optional(), }, }); diff --git a/src/database/server/models/__tests__/chunk.test.ts b/src/database/server/models/__tests__/chunk.test.ts index 072f38e354cc..4e94c35a55a2 100644 --- a/src/database/server/models/__tests__/chunk.test.ts +++ b/src/database/server/models/__tests__/chunk.test.ts @@ -98,7 +98,66 @@ describe('ChunkModel', () => { }); }); - // Add more test cases for other methods... + describe('deleteOrphanChunks', () => { + it('should delete orphaned chunks', async () => { + // Create orphaned chunks + await serverDB + .insert(chunks) + .values([ + { text: 'Orphan Chunk 1', userId }, + { text: 'Orphan Chunk 2', userId }, + ]) + .returning(); + + // Create a non-orphaned chunk + const [nonOrphanChunk] = await serverDB + .insert(chunks) + .values([{ text: 'Non-Orphan Chunk', userId }]) + .returning(); + + await serverDB.insert(fileChunks).values([{ fileId: '1', chunkId: nonOrphanChunk.id }]); + + // Execute the method + await chunkModel.deleteOrphanChunks(); + + // Check if orphaned chunks are deleted + const remainingChunks = await serverDB.query.chunks.findMany(); + expect(remainingChunks).toHaveLength(1); + expect(remainingChunks[0].id).toBe(nonOrphanChunk.id); + }); + + it('should not delete any chunks when there are no orphans', async () => { + // Create non-orphaned chunks + const [chunk1, chunk2] = await serverDB + .insert(chunks) + .values([ + { text: 'Chunk 1', userId }, + { text: 'Chunk 2', userId }, + ]) + .returning(); + + await serverDB.insert(fileChunks).values([ + { fileId: '1', chunkId: chunk1.id }, + { fileId: '2', chunkId: chunk2.id }, + ]); + + // Execute the method + await chunkModel.deleteOrphanChunks(); + + // Check if all chunks are still present + const remainingChunks = await serverDB.query.chunks.findMany(); + expect(remainingChunks).toHaveLength(2); + }); + + it('should not throw an error when the database is empty', async () => { + // Ensure the database is empty + await serverDB.delete(chunks); + await serverDB.delete(fileChunks); + + // Execute the method and expect it not to throw + await expect(chunkModel.deleteOrphanChunks()).resolves.not.toThrow(); + }); + }); describe('semanticSearch', () => { it('should perform semantic search and return results', async () => { diff --git a/src/database/server/models/__tests__/file.test.ts b/src/database/server/models/__tests__/file.test.ts index 705abf7c9b04..20d0f24a4354 100644 --- a/src/database/server/models/__tests__/file.test.ts +++ b/src/database/server/models/__tests__/file.test.ts @@ -1,7 +1,8 @@ // @vitest-environment node -import { eq } from 'drizzle-orm'; +import { eq, inArray } from 'drizzle-orm'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { getServerDBConfig, serverDBEnv } from '@/config/db'; import { getTestDBInstance } from '@/database/server/core/dbForTest'; import { FilesTabs, SortType } from '@/types/files'; @@ -22,6 +23,20 @@ vi.mock('@/database/server/core/db', async () => ({ }, })); +let DISABLE_REMOVE_GLOBAL_FILE = false; + +vi.mock('@/config/db', async () => ({ + get serverDBEnv() { + return { + get DISABLE_REMOVE_GLOBAL_FILE() { + return DISABLE_REMOVE_GLOBAL_FILE; + }, + DATABASE_TEST_URL: process.env.DATABASE_TEST_URL, + DATABASE_DRIVER: 'node', + }; + }, +})); + const userId = 'file-model-test-user-id'; const fileModel = new FileModel(userId); @@ -73,18 +88,21 @@ describe('FileModel', () => { }); }); - it('should create a global file', async () => { - const globalFile = { - hashId: 'test-hash', - fileType: 'text/plain', - size: 100, - url: 'https://example.com/global-file.txt', - metadata: { key: 'value' }, - }; + describe('createGlobalFile', () => { + it('should create a global file', async () => { + const globalFile = { + hashId: 'test-hash', + fileType: 'text/plain', + size: 100, + url: 'https://example.com/global-file.txt', + metadata: { key: 'value' }, + }; - const result = await fileModel.createGlobalFile(globalFile); - expect(result[0]).toMatchObject(globalFile); + const result = await fileModel.createGlobalFile(globalFile); + expect(result[0]).toMatchObject(globalFile); + }); }); + describe('checkHash', () => { it('should return isExist: false for non-existent hash', async () => { const result = await fileModel.checkHash('non-existent-hash'); @@ -113,58 +131,183 @@ describe('FileModel', () => { }); }); - it('should delete a file by id', async () => { - const { id } = await fileModel.create({ - name: 'test-file.txt', - url: 'https://example.com/test-file.txt', - size: 100, - fileType: 'text/plain', - }); + describe('delete', () => { + it('should delete a file by id', async () => { + await fileModel.createGlobalFile({ + hashId: '1', + url: 'https://example.com/file1.txt', + size: 100, + fileType: 'text/plain', + }); - await fileModel.delete(id); + const { id } = await fileModel.create({ + name: 'test-file.txt', + url: 'https://example.com/test-file.txt', + size: 100, + fileType: 'text/plain', + fileHash: '1', + }); - const file = await serverDB.query.files.findFirst({ where: eq(files.id, id) }); - expect(file).toBeUndefined(); - }); + await fileModel.delete(id); - it('should delete multiple files', async () => { - const file1 = await fileModel.create({ - name: 'file1.txt', - url: 'https://example.com/file1.txt', - size: 100, - fileType: 'text/plain', - }); - const file2 = await fileModel.create({ - name: 'file2.txt', - url: 'https://example.com/file2.txt', - size: 200, - fileType: 'text/plain', + const file = await serverDB.query.files.findFirst({ where: eq(files.id, id) }); + const globalFile = await serverDB.query.globalFiles.findFirst({ + where: eq(globalFiles.hashId, '1'), + }); + + expect(file).toBeUndefined(); + expect(globalFile).toBeUndefined(); }); + it('should delete a file by id but global file not removed ', async () => { + DISABLE_REMOVE_GLOBAL_FILE = true; + await fileModel.createGlobalFile({ + hashId: '1', + url: 'https://example.com/file1.txt', + size: 100, + fileType: 'text/plain', + }); - await fileModel.deleteMany([file1.id, file2.id]); + const { id } = await fileModel.create({ + name: 'test-file.txt', + url: 'https://example.com/test-file.txt', + size: 100, + fileType: 'text/plain', + fileHash: '1', + }); + + await fileModel.delete(id); + + const file = await serverDB.query.files.findFirst({ where: eq(files.id, id) }); + const globalFile = await serverDB.query.globalFiles.findFirst({ + where: eq(globalFiles.hashId, '1'), + }); - const remainingFiles = await serverDB.query.files.findMany({ where: eq(files.userId, userId) }); - expect(remainingFiles).toHaveLength(0); + expect(file).toBeUndefined(); + expect(globalFile).toBeDefined(); + DISABLE_REMOVE_GLOBAL_FILE = false; + }); }); - it('should clear all files for the user', async () => { - await fileModel.create({ - name: 'test-file-1.txt', - url: 'https://example.com/test-file-1.txt', - size: 100, - fileType: 'text/plain', + describe('deleteMany', () => { + it('should delete multiple files', async () => { + await fileModel.createGlobalFile({ + hashId: '1', + url: 'https://example.com/file1.txt', + size: 100, + fileType: 'text/plain', + }); + await fileModel.createGlobalFile({ + hashId: '2', + url: 'https://example.com/file2.txt', + size: 200, + fileType: 'text/plain', + }); + + const file1 = await fileModel.create({ + name: 'file1.txt', + url: 'https://example.com/file1.txt', + size: 100, + fileHash: '1', + fileType: 'text/plain', + }); + const file2 = await fileModel.create({ + name: 'file2.txt', + url: 'https://example.com/file2.txt', + size: 200, + fileType: 'text/plain', + fileHash: '2', + }); + const globalFilesResult = await serverDB.query.globalFiles.findMany({ + where: inArray(globalFiles.hashId, ['1', '2']), + }); + expect(globalFilesResult).toHaveLength(2); + + await fileModel.deleteMany([file1.id, file2.id]); + + const remainingFiles = await serverDB.query.files.findMany({ + where: eq(files.userId, userId), + }); + const globalFilesResult2 = await serverDB.query.globalFiles.findMany({ + where: inArray( + globalFiles.hashId, + remainingFiles.map((i) => i.fileHash as string), + ), + }); + + expect(remainingFiles).toHaveLength(0); + expect(globalFilesResult2).toHaveLength(0); }); - await fileModel.create({ - name: 'test-file-2.txt', - url: 'https://example.com/test-file-2.txt', - size: 200, - fileType: 'text/plain', + it('should delete multiple files but not remove global files if DISABLE_REMOVE_GLOBAL_FILE=true', async () => { + DISABLE_REMOVE_GLOBAL_FILE = true; + await fileModel.createGlobalFile({ + hashId: '1', + url: 'https://example.com/file1.txt', + size: 100, + fileType: 'text/plain', + }); + await fileModel.createGlobalFile({ + hashId: '2', + url: 'https://example.com/file2.txt', + size: 200, + fileType: 'text/plain', + }); + + const file1 = await fileModel.create({ + name: 'file1.txt', + url: 'https://example.com/file1.txt', + size: 100, + fileType: 'text/plain', + fileHash: '1', + }); + const file2 = await fileModel.create({ + name: 'file2.txt', + url: 'https://example.com/file2.txt', + size: 200, + fileType: 'text/plain', + fileHash: '2', + }); + + const globalFilesResult = await serverDB.query.globalFiles.findMany({ + where: inArray(globalFiles.hashId, ['1', '2']), + }); + + expect(globalFilesResult).toHaveLength(2); + + await fileModel.deleteMany([file1.id, file2.id]); + + const remainingFiles = await serverDB.query.files.findMany({ + where: eq(files.userId, userId), + }); + const globalFilesResult2 = await serverDB.query.globalFiles.findMany({ + where: inArray(globalFiles.hashId, ['1', '2']), + }); + + expect(remainingFiles).toHaveLength(0); + expect(globalFilesResult2).toHaveLength(2); + DISABLE_REMOVE_GLOBAL_FILE = false; }); + }); - await fileModel.clear(); + describe('clear', () => { + it('should clear all files for the user', async () => { + await fileModel.create({ + name: 'test-file-1.txt', + url: 'https://example.com/test-file-1.txt', + size: 100, + fileType: 'text/plain', + }); + await fileModel.create({ + name: 'test-file-2.txt', + url: 'https://example.com/test-file-2.txt', + size: 200, + fileType: 'text/plain', + }); + + await fileModel.clear(); - const userFiles = await serverDB.query.files.findMany({ where: eq(files.userId, userId) }); - expect(userFiles).toHaveLength(0); + const userFiles = await serverDB.query.files.findMany({ where: eq(files.userId, userId) }); + expect(userFiles).toHaveLength(0); + }); }); describe('Query', () => { @@ -334,22 +477,24 @@ describe('FileModel', () => { }); }); - it('should find a file by id', async () => { - const { id } = await fileModel.create({ - name: 'test-file.txt', - url: 'https://example.com/test-file.txt', - size: 100, - fileType: 'text/plain', - }); + describe('findById', () => { + it('should find a file by id', async () => { + const { id } = await fileModel.create({ + name: 'test-file.txt', + url: 'https://example.com/test-file.txt', + size: 100, + fileType: 'text/plain', + }); - const file = await fileModel.findById(id); - expect(file).toMatchObject({ - id, - name: 'test-file.txt', - url: 'https://example.com/test-file.txt', - size: 100, - fileType: 'text/plain', - userId, + const file = await fileModel.findById(id); + expect(file).toMatchObject({ + id, + name: 'test-file.txt', + url: 'https://example.com/test-file.txt', + size: 100, + fileType: 'text/plain', + userId, + }); }); }); diff --git a/src/database/server/models/__tests__/knowledgeBase.test.ts b/src/database/server/models/__tests__/knowledgeBase.test.ts index 2d26a4079c98..53fa49eeec0d 100644 --- a/src/database/server/models/__tests__/knowledgeBase.test.ts +++ b/src/database/server/models/__tests__/knowledgeBase.test.ts @@ -1,11 +1,18 @@ // @vitest-environment node import { eq } from 'drizzle-orm'; -import { desc } from 'drizzle-orm/expressions'; +import { and, desc } from 'drizzle-orm/expressions'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { getTestDBInstance } from '@/database/server/core/dbForTest'; -import { NewKnowledgeBase, knowledgeBases, users } from '../../schemas/lobechat'; +import { + NewKnowledgeBase, + files, + globalFiles, + knowledgeBaseFiles, + knowledgeBases, + users, +} from '../../schemas/lobechat'; import { KnowledgeBaseModel } from '../knowledgeBase'; let serverDB = await getTestDBInstance(); @@ -21,6 +28,7 @@ const knowledgeBaseModel = new KnowledgeBaseModel(userId); beforeEach(async () => { await serverDB.delete(users); + await serverDB.delete(globalFiles); await serverDB.insert(users).values([{ id: userId }, { id: 'user2' }]); }); @@ -129,4 +137,122 @@ describe('KnowledgeBaseModel', () => { }); }); }); + + const fileList = [ + { + id: 'file1', + name: 'document.pdf', + url: 'https://example.com/document.pdf', + fileHash: 'hash1', + size: 1000, + fileType: 'application/pdf', + userId, + }, + { + id: 'file2', + name: 'image.jpg', + url: 'https://example.com/image.jpg', + fileHash: 'hash2', + size: 500, + fileType: 'image/jpeg', + userId, + }, + ]; + + describe('addFilesToKnowledgeBase', () => { + it('should add files to a knowledge base', async () => { + await serverDB.insert(globalFiles).values([ + { + hashId: 'hash1', + url: 'https://example.com/document.pdf', + size: 1000, + fileType: 'application/pdf', + }, + { + hashId: 'hash2', + url: 'https://example.com/image.jpg', + size: 500, + fileType: 'image/jpeg', + }, + ]); + + await serverDB.insert(files).values(fileList); + + const { id: knowledgeBaseId } = await knowledgeBaseModel.create({ name: 'Test Group' }); + const fileIds = ['file1', 'file2']; + + const result = await knowledgeBaseModel.addFilesToKnowledgeBase(knowledgeBaseId, fileIds); + + expect(result).toHaveLength(2); + expect(result).toEqual( + expect.arrayContaining( + fileIds.map((fileId) => expect.objectContaining({ fileId, knowledgeBaseId })), + ), + ); + + const addedFiles = await serverDB.query.knowledgeBaseFiles.findMany({ + where: eq(knowledgeBaseFiles.knowledgeBaseId, knowledgeBaseId), + }); + expect(addedFiles).toHaveLength(2); + }); + }); + + describe('removeFilesFromKnowledgeBase', () => { + it('should remove files from a knowledge base', async () => { + await serverDB.insert(globalFiles).values([ + { + hashId: 'hash1', + url: 'https://example.com/document.pdf', + size: 1000, + fileType: 'application/pdf', + }, + { + hashId: 'hash2', + url: 'https://example.com/image.jpg', + size: 500, + fileType: 'image/jpeg', + }, + ]); + + await serverDB.insert(files).values(fileList); + + const { id: knowledgeBaseId } = await knowledgeBaseModel.create({ name: 'Test Group' }); + const fileIds = ['file1', 'file2']; + await knowledgeBaseModel.addFilesToKnowledgeBase(knowledgeBaseId, fileIds); + + const filesToRemove = ['file1']; + await knowledgeBaseModel.removeFilesFromKnowledgeBase(knowledgeBaseId, filesToRemove); + + const remainingFiles = await serverDB.query.knowledgeBaseFiles.findMany({ + where: and(eq(knowledgeBaseFiles.knowledgeBaseId, knowledgeBaseId)), + }); + expect(remainingFiles).toHaveLength(1); + expect(remainingFiles[0].fileId).toBe('file2'); + }); + }); + + describe('static findById', () => { + it('should find a knowledge base by id without user restriction', async () => { + const { id } = await knowledgeBaseModel.create({ name: 'Test Group' }); + + const group = await KnowledgeBaseModel.findById(id); + expect(group).toMatchObject({ + id, + name: 'Test Group', + userId, + }); + }); + + it('should find a knowledge base created by another user', async () => { + const anotherKnowledgeBaseModel = new KnowledgeBaseModel('user2'); + const { id } = await anotherKnowledgeBaseModel.create({ name: 'Another User Group' }); + + const group = await KnowledgeBaseModel.findById(id); + expect(group).toMatchObject({ + id, + name: 'Another User Group', + userId: 'user2', + }); + }); + }); }); diff --git a/src/database/server/models/__tests__/user.test.ts b/src/database/server/models/__tests__/user.test.ts index 58f54f9f5c80..60c945d9fe20 100644 --- a/src/database/server/models/__tests__/user.test.ts +++ b/src/database/server/models/__tests__/user.test.ts @@ -4,7 +4,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { INBOX_SESSION_ID } from '@/const/session'; import { getTestDBInstance } from '@/database/server/core/dbForTest'; import { KeyVaultsGateKeeper } from '@/server/modules/KeyVaultsEncrypt'; -import { UserPreference } from '@/types/user'; +import { UserGuide, UserPreference } from '@/types/user'; import { UserSettings } from '@/types/user/settings'; import { userSettings, users } from '../../schemas/lobechat'; @@ -167,6 +167,24 @@ describe('UserModel', () => { const { plaintext } = await gateKeeper.decrypt(updatedSettings!.keyVaults!); expect(JSON.parse(plaintext)).toEqual(settings.keyVaults); }); + + it('should update user settings with encrypted keyVaults', async () => { + const settings = { + general: { language: 'en-US' }, + } as UserSettings; + await serverDB.insert(users).values({ id: userId }); + await serverDB.insert(userSettings).values({ ...settings, keyVaults: '', id: userId }); + + const newSettings = { + general: { fontSize: 16, language: 'zh-CN', themeMode: 'dark' }, + } as UserSettings; + await userModel.updateSetting(userId, newSettings); + + const updatedSettings = await serverDB.query.userSettings.findFirst({ + where: eq(users.id, userId), + }); + expect(updatedSettings?.general).toEqual(newSettings.general); + }); }); describe('updatePreference', () => { @@ -183,4 +201,21 @@ describe('UserModel', () => { expect(updatedUser?.preference).toEqual({ ...preference, ...newPreference }); }); }); + + describe('updateGuide', () => { + it('should update user guide', async () => { + const preference = { guide: { topic: false } } as UserGuide; + await serverDB.insert(users).values({ id: userId, preference }); + + const newGuide: Partial = { + topic: true, + moveSettingsToAvatar: true, + uploadFileInKnowledgeBase: true, + }; + await userModel.updateGuide(userId, newGuide); + + const updatedUser = await serverDB.query.users.findFirst({ where: eq(users.id, userId) }); + expect(updatedUser?.preference).toEqual({ ...preference, guide: newGuide }); + }); + }); }); diff --git a/src/database/server/models/chunk.ts b/src/database/server/models/chunk.ts index 9a785c4041d6..174f761bc2cf 100644 --- a/src/database/server/models/chunk.ts +++ b/src/database/server/models/chunk.ts @@ -1,5 +1,5 @@ import { asc, cosineDistance, count, eq, inArray, sql } from 'drizzle-orm'; -import { and, desc } from 'drizzle-orm/expressions'; +import { and, desc, isNull } from 'drizzle-orm/expressions'; import { serverDB } from '@/database/server'; import { ChunkMetadata, FileChunk, SemanticSearchChunk } from '@/types/chunk'; @@ -43,6 +43,19 @@ export class ChunkModel { return serverDB.delete(chunks).where(and(eq(chunks.id, id), eq(chunks.userId, this.userId))); }; + deleteOrphanChunks = async () => { + const orphanedChunks = await serverDB + .select({ chunkId: chunks.id }) + .from(chunks) + .leftJoin(fileChunks, eq(chunks.id, fileChunks.chunkId)) + .where(isNull(fileChunks.fileId)); + + const ids = orphanedChunks.map((chunk) => chunk.chunkId); + if (ids.length === 0) return; + + await serverDB.delete(chunks).where(inArray(chunks.id, ids)); + }; + findById = async (id: string) => { return serverDB.query.chunks.findFirst({ where: and(eq(chunks.id, id)), diff --git a/src/database/server/models/file.ts b/src/database/server/models/file.ts index 4239468b2b78..3237a262c356 100644 --- a/src/database/server/models/file.ts +++ b/src/database/server/models/file.ts @@ -1,6 +1,7 @@ import { asc, count, eq, ilike, inArray, notExists } from 'drizzle-orm'; import { and, desc } from 'drizzle-orm/expressions'; +import { serverDBEnv } from '@/config/db'; import { serverDB } from '@/database/server/core/db'; import { FilesTabs, QueryFileListParams, SortType } from '@/types/files'; @@ -77,7 +78,8 @@ export class FileModel { const fileCount = result[0].count; // delete the file from global file if it is not used by other files - if (fileCount === 0) { + // if `DISABLE_REMOVE_GLOBAL_FILE` is true, we will not remove the global file + if (fileCount === 0 && !serverDBEnv.DISABLE_REMOVE_GLOBAL_FILE) { await trx.delete(globalFiles).where(eq(globalFiles.hashId, fileHash)); return file; @@ -118,7 +120,7 @@ export class FileModel { const needToDeleteList = fileHashCounts.filter((item) => item.count === 0); - if (needToDeleteList.length === 0) return; + if (needToDeleteList.length === 0 || serverDBEnv.DISABLE_REMOVE_GLOBAL_FILE) return; // delete the file from global file if it is not used by other files await trx.delete(globalFiles).where( diff --git a/src/server/routers/lambda/file.ts b/src/server/routers/lambda/file.ts index 0725fda6a4e9..97c1083ce68f 100644 --- a/src/server/routers/lambda/file.ts +++ b/src/server/routers/lambda/file.ts @@ -150,6 +150,8 @@ export const fileRouter = router({ removeFile: fileProcedure.input(z.object({ id: z.string() })).mutation(async ({ input, ctx }) => { const file = await ctx.fileModel.delete(input.id); + // delete the orphan chunks + await ctx.chunkModel.deleteOrphanChunks(); if (!file) return; // delele the file from remove from S3 if it is not used by other files @@ -162,6 +164,9 @@ export const fileRouter = router({ .mutation(async ({ input, ctx }) => { const needToRemoveFileList = await ctx.fileModel.deleteMany(input.ids); + // delete the orphan chunks + await ctx.chunkModel.deleteOrphanChunks(); + if (!needToRemoveFileList || needToRemoveFileList.length === 0) return; // remove from S3