Skip to content

Commit 0bfded4

Browse files
committed
fix!: support other charset labels by fallback to encoding_rs
Breaking change to public DecoderFnc that is now a struct.
1 parent f24ddcb commit 0bfded4

File tree

5 files changed

+38
-13
lines changed

5 files changed

+38
-13
lines changed

src/decoders/charsets/map.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ use super::{
88
multi_byte::*,
99
single_byte::*,
1010
utf::{decoder_utf16, decoder_utf16_be, decoder_utf16_le, decoder_utf7},
11-
DecoderFnc,
11+
Decoder, DecoderFnc,
1212
};
1313

14-
pub fn charset_decoder(charset: &[u8]) -> Option<DecoderFnc> {
14+
fn primary_charset_decoder(charset: &[u8]) -> Option<fn(&[u8]) -> String> {
1515
let mut l_charset = [0u8; 45];
1616

1717
for (dest, src) in l_charset.iter_mut().zip(charset.iter()) {
@@ -175,6 +175,14 @@ pub fn charset_decoder(charset: &[u8]) -> Option<DecoderFnc> {
175175
)
176176
}
177177

178+
pub fn charset_decoder(charset: &[u8]) -> Option<DecoderFnc> {
179+
primary_charset_decoder(charset)
180+
.map(Decoder::Fn)
181+
// fallback labels
182+
.or_else(|| encoding_rs::Encoding::for_label(charset).map(Decoder::Encoding))
183+
.map(DecoderFnc)
184+
}
185+
178186
#[cfg(test)]
179187
mod tests {
180188
use super::charset_decoder;

src/decoders/charsets/mod.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,21 @@ pub mod multi_byte;
99
pub mod single_byte;
1010
pub mod utf;
1111

12-
pub type DecoderFnc = fn(&[u8]) -> String;
12+
pub struct DecoderFnc(Decoder);
13+
14+
enum Decoder {
15+
Encoding(&'static encoding_rs::Encoding),
16+
Fn(fn(&[u8]) -> String),
17+
}
18+
19+
impl DecoderFnc {
20+
pub(crate) fn decode(&self, bytes: &[u8]) -> String {
21+
match self.0 {
22+
Decoder::Encoding(enc) => enc.decode(bytes).0.into_owned(),
23+
Decoder::Fn(decoder) => decoder(bytes),
24+
}
25+
}
26+
}
1327

1428
#[cfg(test)]
1529
mod tests {
@@ -63,7 +77,7 @@ mod tests {
6377
let decoder = charset_decoder(input.0.as_bytes())
6478
.expect(&("Failed to find decoder for ".to_owned() + input.0));
6579

66-
assert_eq!(decoder(&input.1), input.2);
80+
assert_eq!(decoder.decode(&input.1), input.2);
6781
}
6882
}
6983
}

src/decoders/encoded_word.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ impl MessageStream<'_> {
7777

7878
if let Some(bytes) = decode_fnc.and_then(|fnc| fnc(self)) {
7979
if let Some(decoder) = charset_decoder(self.bytes(charset_start..charset_end)) {
80-
decoder(&bytes).into()
80+
decoder.decode(&bytes).into()
8181
} else {
8282
String::from_utf8(bytes)
8383
.unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
@@ -149,11 +149,6 @@ mod tests {
149149
"مرحبا بالعالم",
150150
true,
151151
),
152-
(
153-
"?iso-8859-8-i?Q?=E4=F9=E1:_Septier_-_cooperation?=",
154-
"השב: Septier - cooperation",
155-
true,
156-
),
157152
#[cfg(feature = "full_encoding")]
158153
(
159154
"?shift_jis?B?g26DjYFbgUWDj4Fbg4uDaA==?=",
@@ -166,6 +161,12 @@ mod tests {
166161
"ハロー・ワールド",
167162
true,
168163
),
164+
#[cfg(feature = "full_encoding")]
165+
(
166+
"?iso-8859-8-i?Q?=E4=F9=E1:_Septier_-_cooperation?=",
167+
"השב: Septier - cooperation",
168+
true,
169+
),
169170
] {
170171
match MessageStream::new(input.as_bytes()).decode_rfc2047() {
171172
Some(result) => {

src/parsers/fields/content_type.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ impl<'x> ContentTypeParser<'x> {
188188
.as_ref()
189189
.and_then(|c| charset_decoder(c.as_bytes()))
190190
{
191-
decoder(&decoded_bytes).into()
191+
decoder.decode(&decoded_bytes).into()
192192
} else {
193193
String::from_utf8(decoded_bytes)
194194
.unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())

src/parsers/message.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,12 +320,14 @@ impl MessageParser {
320320
.and_then(|c| charset_decoder(c.as_bytes()))
321321
}),
322322
) {
323-
(Cow::Owned(vec), Some(charset_decoder)) => charset_decoder(&vec).into(),
323+
(Cow::Owned(vec), Some(charset_decoder)) => {
324+
charset_decoder.decode(&vec).into()
325+
}
324326
(Cow::Owned(vec), None) => String::from_utf8(vec)
325327
.unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
326328
.into(),
327329
(Cow::Borrowed(bytes), Some(charset_decoder)) => {
328-
charset_decoder(bytes).into()
330+
charset_decoder.decode(bytes).into()
329331
}
330332
(Cow::Borrowed(bytes), None) => String::from_utf8_lossy(bytes),
331333
};

0 commit comments

Comments
 (0)