Skip to content

Commit b0be3de

Browse files
committed
feat: add migration to clean resource names
1 parent efd4004 commit b0be3de

File tree

3 files changed

+260
-0
lines changed

3 files changed

+260
-0
lines changed

src/app/app.module.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import { Shares1753866547335 } from 'omniboxd/migrations/1753866547335-shares';
3838
import { SharesModule } from 'omniboxd/shares/shares.module';
3939
import { ApiKeys1754550165406 } from 'omniboxd/migrations/1754550165406-api-keys';
4040
import { ResourceAttachments1755059371000 } from 'omniboxd/migrations/1755059371000-resource-attachments';
41+
import { CleanResourceNames1755396702021 } from 'omniboxd/migrations/1755396702021-clean-resource-names';
4142

4243
@Module({})
4344
export class AppModule implements NestModule {
@@ -106,6 +107,7 @@ export class AppModule implements NestModule {
106107
Shares1753866547335,
107108
ApiKeys1754550165406,
108109
ResourceAttachments1755059371000,
110+
CleanResourceNames1755396702021,
109111
...extraMigrations,
110112
],
111113
migrationsRun: true,
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { MigrationInterface, QueryRunner } from 'typeorm';
2+
3+
function isUrlEncoded(str: string): boolean {
4+
try {
5+
const decoded = decodeURIComponent(str);
6+
const reEncoded = encodeURIComponent(decoded);
7+
return str !== decoded || str === reEncoded;
8+
} catch {
9+
return false;
10+
}
11+
}
12+
13+
function looksLikeMojibake(str: string): boolean {
14+
const weirdChars = //;
15+
if (weirdChars.test(str)) return true;
16+
17+
const highChars = str.match(/[\x80-\xFF]/g) || [];
18+
return highChars.length > str.length / 3;
19+
}
20+
21+
export class CleanResourceNames1755396702021 implements MigrationInterface {
22+
public async up(queryRunner: QueryRunner): Promise<void> {
23+
const records = await queryRunner.query(`
24+
SELECT id, name
25+
FROM resources
26+
WHERE name IS NOT NULL AND name != ''
27+
`);
28+
29+
for (const row of records) {
30+
try {
31+
let name = row.name;
32+
const originalName = name;
33+
34+
if (isUrlEncoded(name)) {
35+
name = decodeURIComponent(name);
36+
}
37+
38+
if (looksLikeMojibake(name)) {
39+
name = Buffer.from(name, 'latin1').toString('utf8');
40+
}
41+
42+
if (name !== originalName) {
43+
await queryRunner.query(
44+
`UPDATE resources SET name = $1 WHERE id = $2`,
45+
[name, row.id],
46+
);
47+
}
48+
} catch (e) {
49+
console.error({
50+
message: 'Error processing resource name',
51+
id: row.id,
52+
name: row.name,
53+
error: e instanceof Error ? e.message : String(e),
54+
});
55+
}
56+
}
57+
}
58+
59+
// eslint-disable-next-line @typescript-eslint/no-unused-vars, @typescript-eslint/require-await
60+
public async down(queryRunner: QueryRunner): Promise<void> {
61+
throw new Error('Not supported.');
62+
}
63+
}
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import { DataSource, QueryRunner } from 'typeorm';
2+
import { CleanResourceNames1755396702021 } from './1755396702021-clean-resource-names';
3+
4+
describe('CleanResourceNames Migration E2E', () => {
5+
let dataSource: DataSource;
6+
let queryRunner: QueryRunner;
7+
8+
beforeAll(async () => {
9+
dataSource = new DataSource({
10+
type: 'postgres',
11+
url: process.env.OBB_POSTGRES_URL,
12+
entities: [],
13+
migrations: [],
14+
synchronize: false,
15+
});
16+
await dataSource.initialize();
17+
});
18+
19+
beforeEach(async () => {
20+
queryRunner = dataSource.createQueryRunner();
21+
await queryRunner.connect();
22+
await queryRunner.startTransaction();
23+
24+
await queryRunner.query('DROP TABLE IF EXISTS resources CASCADE');
25+
26+
// Create resources table structure for testing
27+
await queryRunner.query(`
28+
CREATE TABLE IF NOT EXISTS resources (
29+
id character varying PRIMARY KEY,
30+
name character varying NOT NULL DEFAULT '',
31+
namespace_id character varying NOT NULL DEFAULT 'test-ns',
32+
user_id uuid,
33+
parent_id character varying,
34+
resource_type text NOT NULL DEFAULT 'doc',
35+
content text NOT NULL DEFAULT '',
36+
tags jsonb NOT NULL DEFAULT '[]'::jsonb,
37+
attrs jsonb NOT NULL DEFAULT '{}'::jsonb,
38+
global_permission text,
39+
created_at timestamp with time zone DEFAULT now(),
40+
updated_at timestamp with time zone DEFAULT now(),
41+
deleted_at timestamp with time zone
42+
)
43+
`);
44+
});
45+
46+
afterEach(async () => {
47+
await queryRunner.rollbackTransaction();
48+
await queryRunner.release();
49+
});
50+
51+
afterAll(async () => {
52+
await dataSource.destroy();
53+
});
54+
55+
describe('URL-encoded names', () => {
56+
it('should decode URL-encoded resource names', async () => {
57+
// Setup: Insert URL-encoded names
58+
await queryRunner.query(`
59+
INSERT INTO resources (id, name) VALUES
60+
('res1', 'Hello%20World'),
61+
('res2', 'My%20Document%2Etxt'),
62+
('res3', 'Normal Name'),
63+
('res4', 'File%2Bwith%2Bplus')
64+
`);
65+
66+
// Execute migration
67+
const migration = new CleanResourceNames1755396702021();
68+
await migration.up(queryRunner);
69+
70+
// Verify results
71+
const results = await queryRunner.query(`
72+
SELECT id, name FROM resources ORDER BY id
73+
`);
74+
75+
expect(results).toEqual([
76+
{ id: 'res1', name: 'Hello World' },
77+
{ id: 'res2', name: 'My Document.txt' },
78+
{ id: 'res3', name: 'Normal Name' },
79+
{ id: 'res4', name: 'File+with+plus' },
80+
]);
81+
});
82+
83+
it('should handle double-encoded names', async () => {
84+
await queryRunner.query(`
85+
INSERT INTO resources (id, name) VALUES
86+
('res1', 'Hello%2520World')
87+
`);
88+
89+
const migration = new CleanResourceNames1755396702021();
90+
await migration.up(queryRunner);
91+
92+
const results = await queryRunner.query(`
93+
SELECT name FROM resources WHERE id = 'res1'
94+
`);
95+
96+
expect(results[0].name).toBe('Hello%20World');
97+
});
98+
});
99+
100+
describe('Mojibake names', () => {
101+
it('should fix mojibake text', async () => {
102+
// Setup: Insert mojibake names (UTF-8 bytes interpreted as Latin-1)
103+
const mojibakeText = Buffer.from('Café', 'utf8').toString('latin1');
104+
105+
await queryRunner.query(
106+
`
107+
INSERT INTO resources (id, name) VALUES
108+
('res1', $1),
109+
('res2', 'Normal Text')
110+
`,
111+
[mojibakeText],
112+
);
113+
114+
// Execute migration
115+
const migration = new CleanResourceNames1755396702021();
116+
await migration.up(queryRunner);
117+
118+
// Verify results
119+
const results = await queryRunner.query(`
120+
SELECT id, name FROM resources ORDER BY id
121+
`);
122+
123+
expect(results[0].name).toBe('Café');
124+
expect(results[1].name).toBe('Normal Text');
125+
});
126+
});
127+
128+
describe('Combined issues', () => {
129+
it('should handle URL-encoded mojibake', async () => {
130+
// Create a name that's both URL-encoded AND has mojibake
131+
const text = '我为什么给你OFFER:跟着名企HR找工作.md';
132+
const mojibakeText = Buffer.from(text, 'utf8').toString('latin1');
133+
const urlEncodedMojibake = encodeURIComponent(mojibakeText);
134+
135+
await queryRunner.query(
136+
`
137+
INSERT INTO resources (id, name) VALUES
138+
('res1', $1)
139+
`,
140+
[urlEncodedMojibake],
141+
);
142+
143+
const migration = new CleanResourceNames1755396702021();
144+
await migration.up(queryRunner);
145+
146+
const results = await queryRunner.query(`
147+
SELECT name FROM resources WHERE id = 'res1'
148+
`);
149+
150+
expect(results[0].name).toBe(text);
151+
});
152+
});
153+
154+
describe('Edge cases', () => {
155+
it('should not modify already clean names', async () => {
156+
await queryRunner.query(`
157+
INSERT INTO resources (id, name) VALUES
158+
('res1', 'Clean Name'),
159+
('res2', 'Another Clean Name 123'),
160+
('res3', 'With-Special_Chars.txt'),
161+
('res4', '中文测试')
162+
`);
163+
164+
const migration = new CleanResourceNames1755396702021();
165+
await migration.up(queryRunner);
166+
167+
const results = await queryRunner.query(`
168+
SELECT name FROM resources ORDER BY id
169+
`);
170+
171+
expect(results[0].name).toBe('Clean Name');
172+
expect(results[1].name).toBe('Another Clean Name 123');
173+
expect(results[2].name).toBe('With-Special_Chars.txt');
174+
expect(results[3].name).toBe('中文测试');
175+
});
176+
177+
it('should handle empty and null names gracefully', async () => {
178+
await queryRunner.query(`
179+
INSERT INTO resources (id, name) VALUES
180+
('res1', ''),
181+
('res2', 'Valid Name')
182+
`);
183+
184+
const migration = new CleanResourceNames1755396702021();
185+
await migration.up(queryRunner);
186+
187+
const results = await queryRunner.query(`
188+
SELECT name FROM resources ORDER BY id
189+
`);
190+
191+
expect(results[0].name).toBe('');
192+
expect(results[1].name).toBe('Valid Name');
193+
});
194+
});
195+
});

0 commit comments

Comments
 (0)