Skip to content

Commit b18704a

Browse files
committed
fix: use curl -OJ instead of -o to avoid UTF-8 filename corruption on non-UTF-8 terminals
On Windows terminals using CP850/CP1252, UTF-8 characters in the -o filename parameter get corrupted when the user pastes the curl command. Using -OJ tells curl to read the filename from the server's Content-Disposition header instead, which is handled internally by curl and bypasses terminal encoding entirely.
1 parent 6ea61f9 commit b18704a

File tree

2 files changed

+24
-25
lines changed

2 files changed

+24
-25
lines changed

src/app/item-page/clarin-files-section/clarin-files-section.component.spec.ts

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ describe('ClarinFilesSectionComponent', () => {
108108
component.listOfFiles.next([createMetadataBitstream('simple.txt')]);
109109
component.generateCurlCommand();
110110
expect(component.command).toBe(
111-
`curl -o "simple.txt" "${BASE}/123456789/1/simple.txt"`
111+
`curl -OJ "${BASE}/123456789/1/simple.txt"`
112112
);
113113
});
114114

@@ -120,26 +120,26 @@ describe('ClarinFilesSectionComponent', () => {
120120
]);
121121
component.generateCurlCommand();
122122
expect(component.command).toBe(
123-
`curl -o "file1.txt" "${BASE}/123456789/2/file1.txt" ` +
124-
`-o "file2.txt" "${BASE}/123456789/2/file2.txt"`
123+
`curl -OJ "${BASE}/123456789/2/file1.txt" ` +
124+
`-OJ "${BASE}/123456789/2/file2.txt"`
125125
);
126126
});
127127

128-
it('should percent-encode spaces in URL but keep real name in -o', () => {
128+
it('should percent-encode spaces in URL', () => {
129129
component.itemHandle = '123456789/3';
130130
component.listOfFiles.next([createMetadataBitstream('my file.txt')]);
131131
component.generateCurlCommand();
132132
expect(component.command).toBe(
133-
`curl -o "my file.txt" "${BASE}/123456789/3/my%20file.txt"`
133+
`curl -OJ "${BASE}/123456789/3/my%20file.txt"`
134134
);
135135
});
136136

137-
it('should percent-encode parentheses in URL but keep real name in -o', () => {
137+
it('should percent-encode parentheses in URL', () => {
138138
component.itemHandle = '123456789/4';
139139
component.listOfFiles.next([createMetadataBitstream('logo (2).png')]);
140140
component.generateCurlCommand();
141141
expect(component.command).toBe(
142-
`curl -o "logo (2).png" "${BASE}/123456789/4/logo%20%282%29.png"`
142+
`curl -OJ "${BASE}/123456789/4/logo%20%282%29.png"`
143143
);
144144
});
145145

@@ -148,7 +148,7 @@ describe('ClarinFilesSectionComponent', () => {
148148
component.listOfFiles.next([createMetadataBitstream('dtq+logo.png')]);
149149
component.generateCurlCommand();
150150
expect(component.command).toBe(
151-
`curl -o "dtq+logo.png" "${BASE}/123456789/5/dtq%2Blogo.png"`
151+
`curl -OJ "${BASE}/123456789/5/dtq%2Blogo.png"`
152152
);
153153
});
154154

@@ -160,26 +160,26 @@ describe('ClarinFilesSectionComponent', () => {
160160
]);
161161
component.generateCurlCommand();
162162
expect(component.command).toBe(
163-
`curl -o "dtq+logo (2).png" "${BASE}/123456789/6/dtq%2Blogo%20%282%29.png" ` +
164-
`-o "Screenshot 1.png" "${BASE}/123456789/6/Screenshot%201.png"`
163+
`curl -OJ "${BASE}/123456789/6/dtq%2Blogo%20%282%29.png" ` +
164+
`-OJ "${BASE}/123456789/6/Screenshot%201.png"`
165165
);
166166
});
167167

168-
it('should preserve UTF-8 characters in -o filename and encode in URL', () => {
168+
it('should percent-encode UTF-8 characters in URL', () => {
169169
component.itemHandle = '123456789/9';
170170
component.listOfFiles.next([createMetadataBitstream('M\u00e9di\u00e1 (3).jfif')]);
171171
component.generateCurlCommand();
172172
expect(component.command).toBe(
173-
`curl -o "M\u00e9di\u00e1 (3).jfif" "${BASE}/123456789/9/M%C3%A9di%C3%A1%20%283%29.jfif"`
173+
`curl -OJ "${BASE}/123456789/9/M%C3%A9di%C3%A1%20%283%29.jfif"`
174174
);
175175
});
176176

177-
it('should escape double quotes in filenames', () => {
177+
it('should percent-encode double quotes in URL', () => {
178178
component.itemHandle = '123456789/10';
179179
component.listOfFiles.next([createMetadataBitstream('file "quoted".txt')]);
180180
component.generateCurlCommand();
181181
expect(component.command).toBe(
182-
`curl -o "file \\"quoted\\".txt" "${BASE}/123456789/10/file%20%22quoted%22.txt"`
182+
`curl -OJ "${BASE}/123456789/10/file%20%22quoted%22.txt"`
183183
);
184184
});
185185

@@ -204,7 +204,7 @@ describe('ClarinFilesSectionComponent', () => {
204204
component.listOfFiles.next([createMetadataBitstream('100% done.txt')]);
205205
component.generateCurlCommand();
206206
expect(component.command).toBe(
207-
`curl -o "100% done.txt" "${BASE}/123456789/11/100%25%20done.txt"`
207+
`curl -OJ "${BASE}/123456789/11/100%25%20done.txt"`
208208
);
209209
});
210210

@@ -213,7 +213,7 @@ describe('ClarinFilesSectionComponent', () => {
213213
component.listOfFiles.next([createMetadataBitstream('M\u00e9di\u00e1 (+)#9) ano')]);
214214
component.generateCurlCommand();
215215
expect(component.command).toBe(
216-
`curl -o "M\u00e9di\u00e1 (+)#9) ano" "${BASE}/123456789/12/M%C3%A9di%C3%A1%20%28%2B%29%239%29%20ano"`
216+
`curl -OJ "${BASE}/123456789/12/M%C3%A9di%C3%A1%20%28%2B%29%239%29%20ano"`
217217
);
218218
});
219219

@@ -228,12 +228,12 @@ describe('ClarinFilesSectionComponent', () => {
228228
expect(component.canShowCurlDownload).toBeFalse();
229229
});
230230

231-
it('should escape dollar signs and backticks in filenames for shell safety', () => {
231+
it('should percent-encode dollar signs in URL', () => {
232232
component.itemHandle = '123456789/14';
233233
component.listOfFiles.next([createMetadataBitstream('price$100.txt')]);
234234
component.generateCurlCommand();
235235
expect(component.command).toBe(
236-
`curl -o "price\\$100.txt" "${BASE}/123456789/14/price%24100.txt"`
236+
`curl -OJ "${BASE}/123456789/14/price%24100.txt"`
237237
);
238238
});
239239
});

src/app/item-page/clarin-files-section/clarin-files-section.component.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,16 @@ export class ClarinFilesSectionComponent implements OnInit {
121121
return file.name;
122122
});
123123

124-
// Generate curl command with -o "filename" "url" pairs for each file.
125-
// Each file needs its own -o + URL pair because curl URL globbing ({})
126-
// does NOT support per-file -o flags (multiple -o with {} results in
127-
// "Got more output options than URLs" and only the first file is saved).
128-
// Using -o lets the shell pass the real filename (including UTF-8) directly.
124+
// Generate curl command with -OJ per URL. The -O flag tells curl to use the
125+
// remote name and -J (--remote-header-name) tells it to use the filename from
126+
// the server's Content-Disposition header instead. This avoids placing non-ASCII
127+
// filenames directly in the shell command, which breaks on terminals that don't
128+
// use UTF-8 encoding (e.g. Windows CP850/CP1252 corrupts the bytes).
129129
const baseUrl = `${this.halService.getRootHref()}/core/bitstreams/handle/${this.itemHandle}`;
130130
const parts = fileNames.map(name => {
131131
const encodedName = encodeURIComponent(name)
132132
.replace(/[()]/g, c => '%' + c.charCodeAt(0).toString(16).toUpperCase());
133-
const safeName = name.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\$/g, '\\$').replace(/`/g, '\\`');
134-
return `-o "${safeName}" "${baseUrl}/${encodedName}"`;
133+
return `-OJ "${baseUrl}/${encodedName}"`;
135134
});
136135
this.command = `curl ${parts.join(' ')}`;
137136
}

0 commit comments

Comments
 (0)