Skip to content

Commit 06698a8

Browse files
authored
Merge pull request #192 from import-ai/chore/tasks
Chore/tasks
2 parents 6158cc2 + 77bddd9 commit 06698a8

File tree

9 files changed

+182
-25
lines changed

9 files changed

+182
-25
lines changed

src/api-key/api-key.service.spec.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ describe('APIKeyService', () => {
2222
let applicationsRepository: jest.Mocked<Repository<Applications>>;
2323
let permissionsService: jest.Mocked<PermissionsService>;
2424
let namespacesService: jest.Mocked<NamespacesService>;
25-
let userService: jest.Mocked<UserService>;
2625

2726
const mockApiKey = {
2827
id: 'test-api-key-id',
@@ -100,7 +99,6 @@ describe('APIKeyService', () => {
10099
applicationsRepository = module.get(getRepositoryToken(Applications));
101100
permissionsService = module.get(PermissionsService);
102101
namespacesService = module.get(NamespacesService);
103-
userService = module.get(UserService);
104102
});
105103

106104
it('should be defined', () => {

src/groups/groups.service.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Injectable, NotFoundException } from '@nestjs/common';
1+
import { Injectable } from '@nestjs/common';
22
import { InjectRepository } from '@nestjs/typeorm';
33
import {
44
DataSource,

src/namespace-resources/open.resource.controller.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ export class OpenResourcesController {
4949
content: data.content,
5050
tagIds: data.tag_ids || [],
5151
attrs: data.attrs || {},
52-
resourceType: ResourceType.FILE,
52+
resourceType: ResourceType.DOC,
5353
namespaceId: apiKey.namespaceId,
5454
parentId: apiKey.attrs.root_resource_id,
5555
};

src/tasks/task-pipeline.e2e-spec.ts

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@ class MockWizardWorker {
1717
private isPolling = false;
1818
private pollingInterval: NodeJS.Timeout | null = null;
1919
private readonly pollIntervalMs = 500; // Fast polling for tests
20+
private readonly namespaceId: string;
2021

21-
constructor(private readonly client: TestClient) {}
22+
constructor(private readonly client: TestClient) {
23+
this.namespaceId = client.namespace.id;
24+
}
2225

2326
/**
2427
* Starts polling for tasks and processing them
@@ -87,7 +90,7 @@ class MockWizardWorker {
8790
private async fetchTask(): Promise<TaskDto | null> {
8891
try {
8992
const response = await this.makeRequest()
90-
.get('/internal/api/v1/wizard/task')
93+
.get(`/internal/api/v1/wizard/task?namespace_id=${this.namespaceId}`)
9194
.timeout(5000); // 5 second timeout
9295

9396
if (response.status === 204) {
@@ -282,20 +285,16 @@ describe('Task Pipeline (e2e)', () => {
282285
let client: TestClient;
283286
let mockWorker: MockWizardWorker;
284287

285-
beforeAll(async () => {
288+
beforeEach(async () => {
286289
client = await TestClient.create();
287290
mockWorker = new MockWizardWorker(client);
288291
});
289292

290-
afterAll(async () => {
291-
await mockWorker.deleteAllTasks();
293+
afterEach(async () => {
294+
mockWorker.stopPolling();
292295
await client.close();
293296
});
294297

295-
beforeEach(async () => {
296-
await mockWorker.deleteAllTasks();
297-
});
298-
299298
describe('Basic Task Processing', () => {
300299
it('create resource trigger upsertIndex', async () => {
301300
mockWorker.startPolling();

src/user/user.e2e-spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ describe('UserController (e2e)', () => {
9393
});
9494

9595
it('should return null for non-existent user', async () => {
96-
const response = await client
96+
await client
9797
.get('/api/v1/user/00000000-0000-0000-0000-000000000000')
9898
.expect(HttpStatus.NOT_FOUND);
9999
});

src/wizard/dto/collect-request.dto.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
import { IsNotEmpty, IsString } from 'class-validator';
22

3-
export class CollectRequestDto {
4-
@IsString()
5-
@IsNotEmpty()
6-
html: string;
7-
3+
export class CollectZRequestDto {
84
@IsString()
95
@IsNotEmpty()
106
url: string;
@@ -21,3 +17,9 @@ export class CollectRequestDto {
2117
@IsNotEmpty()
2218
parentId: string;
2319
}
20+
21+
export class CollectRequestDto extends CollectZRequestDto {
22+
@IsString()
23+
@IsNotEmpty()
24+
html: string;
25+
}

src/wizard/internal.wizard.e2e-spec.ts

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,72 @@
11
import { TestClient } from 'test/test-client';
22
import { HttpStatus } from '@nestjs/common';
33
import { Image } from 'omniboxd/wizard/types/wizard.types';
4+
import { gzipSync } from 'zlib';
45

56
describe('InternalWizardController (e2e)', () => {
67
let client: TestClient;
78

8-
beforeAll(async () => {
9+
beforeEach(async () => {
910
client = await TestClient.create();
1011
});
1112

12-
afterAll(async () => {
13+
afterEach(async () => {
1314
await client.close();
1415
});
1516

17+
it('gzip_collect_callback', async () => {
18+
const html: string =
19+
'<html><body><h1>Test Page</h1><p>This is test content.</p></body></html>';
20+
const collectData = {
21+
url: 'https://example.com/test-page',
22+
title: 'Test Page Title',
23+
namespace_id: client.namespace.id,
24+
parentId: client.namespace.root_resource_id,
25+
};
26+
27+
const compressedHtml = gzipSync(html);
28+
29+
const taskCreateResponse = await client
30+
.post('/api/v1/wizard/collect/gzip')
31+
.field('url', collectData.url)
32+
.field('title', collectData.title)
33+
.field('namespace_id', collectData.namespace_id)
34+
.field('parentId', collectData.parentId)
35+
.attach('html', compressedHtml, {
36+
filename: 'html.gz',
37+
contentType: 'application/gzip',
38+
})
39+
.expect(HttpStatus.CREATED);
40+
41+
const taskId = taskCreateResponse.body.task_id;
42+
43+
const task = await client
44+
.get(`/internal/api/v1/wizard/task?namespace_id=${client.namespace.id}`)
45+
.expect(HttpStatus.OK);
46+
47+
expect(task.body.id).toBe(taskId);
48+
expect(task.body.input.html).toBe(html);
49+
50+
const response = await client
51+
.post('/internal/api/v1/wizard/callback')
52+
.send({
53+
id: taskId,
54+
output: {
55+
title: 'Test Page Title',
56+
markdown: '![图片](http://example.com/image.png)',
57+
images: [
58+
{
59+
name: '图片',
60+
link: 'http://example.com/image.png',
61+
data: 'iVBORw0KGgoAAAANSUhEUgAAAAUA',
62+
mimetype: 'image/png',
63+
},
64+
] as Image[],
65+
},
66+
});
67+
expect(response.status).toBe(HttpStatus.CREATED);
68+
});
69+
1670
it('collect_callback', async () => {
1771
const collectData = {
1872
html: '<html><body><h1>Test Page</h1><p>This is test content.</p></body></html>',
@@ -29,10 +83,13 @@ describe('InternalWizardController (e2e)', () => {
2983

3084
const taskId = taskCreateResponse.body.task_id;
3185

32-
await client
86+
const task = await client
3387
.get(`/internal/api/v1/wizard/task?namespace_id=${client.namespace.id}`)
3488
.expect(HttpStatus.OK);
3589

90+
expect(task.body.id).toBe(taskId);
91+
expect(task.body.input.html).toBe(collectData.html);
92+
3693
const response = await client
3794
.post('/internal/api/v1/wizard/callback')
3895
.send({

src/wizard/wizard.controller.ts

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
1-
import { Body, Controller, Post, Req, Sse } from '@nestjs/common';
1+
import {
2+
Body,
3+
Controller,
4+
Post,
5+
Req,
6+
Sse,
7+
UploadedFile,
8+
UseInterceptors,
9+
} from '@nestjs/common';
210
import { WizardService } from 'omniboxd/wizard/wizard.service';
3-
import { CollectRequestDto } from 'omniboxd/wizard/dto/collect-request.dto';
11+
import {
12+
CollectRequestDto,
13+
CollectZRequestDto,
14+
} from 'omniboxd/wizard/dto/collect-request.dto';
415
import { CollectResponseDto } from 'omniboxd/wizard/dto/collect-response.dto';
516
import { AgentRequestDto } from 'omniboxd/wizard/dto/agent-request.dto';
617
import { RequestId } from 'omniboxd/decorators/request-id.decorators';
718
import { UserId } from 'omniboxd/decorators/user-id.decorator';
19+
import { FileInterceptor } from '@nestjs/platform-express';
820

921
@Controller('api/v1/wizard')
1022
export class WizardController {
@@ -18,6 +30,16 @@ export class WizardController {
1830
return await this.wizardService.collect(userId, data);
1931
}
2032

33+
@Post('collect/gzip')
34+
@UseInterceptors(FileInterceptor('html'))
35+
async collectGzip(
36+
@UserId() userId: string,
37+
@Body() data: CollectZRequestDto,
38+
@UploadedFile() zHtml: Express.Multer.File,
39+
): Promise<CollectResponseDto> {
40+
return await this.wizardService.collectZ(userId, data, zHtml);
41+
}
42+
2143
@Post('ask')
2244
@Sse()
2345
async ask(

src/wizard/wizard.service.ts

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ import { Task } from 'omniboxd/tasks/tasks.entity';
33
import { NamespaceResourcesService } from 'omniboxd/namespace-resources/namespace-resources.service';
44
import { TagService } from 'omniboxd/tag/tag.service';
55
import { CreateResourceDto } from 'omniboxd/namespace-resources/dto/create-resource.dto';
6-
import { CollectRequestDto } from 'omniboxd/wizard/dto/collect-request.dto';
6+
import {
7+
CollectRequestDto,
8+
CollectZRequestDto,
9+
} from 'omniboxd/wizard/dto/collect-request.dto';
710
import { CollectResponseDto } from 'omniboxd/wizard/dto/collect-response.dto';
811
import { TaskCallbackDto } from 'omniboxd/wizard/dto/task-callback.dto';
912
import { ConfigService } from '@nestjs/config';
@@ -22,6 +25,8 @@ import { Image, ProcessedImage } from 'omniboxd/wizard/types/wizard.types';
2225
import { InternalTaskDto } from 'omniboxd/tasks/dto/task.dto';
2326
import { isEmpty } from 'omniboxd/utils/is-empty';
2427
import { FetchTaskRequest } from 'omniboxd/wizard/dto/fetch-task-request.dto';
28+
import { MinioService } from 'omniboxd/minio/minio.service';
29+
import { createGunzip } from 'zlib';
2530

2631
@Injectable()
2732
export class WizardService {
@@ -30,13 +35,16 @@ export class WizardService {
3035
readonly streamService: StreamService;
3136
readonly wizardApiService: WizardAPIService;
3237

38+
private readonly gzipHtmlFolder: string = 'collect/html/gzip';
39+
3340
constructor(
3441
private readonly wizardTaskService: WizardTaskService,
3542
private readonly namespaceResourcesService: NamespaceResourcesService,
3643
private readonly tagService: TagService,
3744
private readonly messagesService: MessagesService,
3845
private readonly configService: ConfigService,
3946
private readonly attachmentsService: AttachmentsService,
47+
private readonly minioService: MinioService,
4048
) {
4149
this.processors = {
4250
collect: new CollectProcessor(
@@ -69,6 +77,51 @@ export class WizardService {
6977
return await this.wizardTaskService.create(partialTask);
7078
}
7179

80+
async collectZ(
81+
userId: string,
82+
data: CollectZRequestDto,
83+
file: Express.Multer.File,
84+
) {
85+
if (!file) {
86+
throw new BadRequestException('Missing file');
87+
}
88+
const { url, title, namespace_id, parentId } = data;
89+
if (!namespace_id || !parentId || !url) {
90+
throw new BadRequestException('Missing required fields');
91+
}
92+
93+
const resourceDto: CreateResourceDto = {
94+
name: title || url,
95+
namespaceId: namespace_id,
96+
resourceType: ResourceType.LINK,
97+
parentId: parentId,
98+
attrs: { url },
99+
};
100+
const resource = await this.namespaceResourcesService.create(
101+
userId,
102+
resourceDto,
103+
);
104+
105+
const filename = 'html.gz';
106+
const { id } = await this.minioService.put(
107+
filename,
108+
file.buffer,
109+
file.mimetype,
110+
{
111+
folder: this.gzipHtmlFolder,
112+
metadata: { resourceId: resource.id, url },
113+
},
114+
);
115+
116+
const task = await this.wizardTaskService.createCollectTask(
117+
userId,
118+
namespace_id,
119+
resource.id,
120+
{ html: [this.gzipHtmlFolder, id].join('/'), url, title },
121+
);
122+
return { task_id: task.id, resource_id: resource.id };
123+
}
124+
72125
async collect(
73126
userId: string,
74127
data: CollectRequestDto,
@@ -305,9 +358,35 @@ export class WizardService {
305358
namespaceId: record.namespace_id,
306359
});
307360
const newTask = await this.wizardTaskService.taskRepository.save(task);
361+
// Fetch HTML content from S3 for collect tasks
362+
if (
363+
newTask.function === 'collect' &&
364+
newTask.input.html?.startsWith(this.gzipHtmlFolder) &&
365+
newTask.input.html?.length === this.gzipHtmlFolder.length + 36 // 1 + 32 + 3
366+
) {
367+
const htmlContent = await this.getHtmlFromMinioGzipFile(
368+
newTask.input.html,
369+
);
370+
newTask.input = { ...newTask.input, html: htmlContent };
371+
}
308372
return InternalTaskDto.fromEntity(newTask);
309373
}
310374

311375
return null;
312376
}
377+
378+
async getHtmlFromMinioGzipFile(path: string) {
379+
const stream = await this.minioService.getObject(path);
380+
const gunzip = createGunzip();
381+
return new Promise<string>((resolve, reject) => {
382+
const chunks: Buffer[] = [];
383+
stream
384+
.pipe(gunzip)
385+
.on('data', (chunk: Buffer) => chunks.push(chunk))
386+
.on('end', () => {
387+
resolve(Buffer.concat(chunks).toString('utf-8'));
388+
})
389+
.on('error', reject);
390+
});
391+
}
313392
}

0 commit comments

Comments
 (0)