Skip to content

Commit 0a7f1ae

Browse files
committed
Use the unicode replacement character
instead of an empty string so that it is clear that there was some sort of encoding issue as opposed silently dropping data This is only easily accomplished on ruby 1.9 and above, so ruby 1.8.x will keep the empty string behavior.
1 parent a217776 commit 0a7f1ae

File tree

4 files changed

+25
-15
lines changed

4 files changed

+25
-15
lines changed

CHANGELOG.rdoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
Features:
44
* #853 - `Mail::Message#set_sort_order` overrides the default message part sort order. (rafbm)
5+
* #1002 - Replace invalid encoded characters with the unicode replacement char instead of empty string. (kjg)
56

67
Compatibility:
78
* #655 - Sort attachments to the end of the parts list to work around email clients that may mistake a text attachment for the message body. (npickens)

lib/mail/version_specific/ruby_1_9.rb

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,12 @@ def Ruby19.get_constant(klass, string)
7878
end
7979

8080
def Ruby19.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
81-
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => '')
81+
if to_encoding == Encoding::UTF_8
82+
replacement_char = '�'
83+
else
84+
replacement_char = '?'
85+
end
86+
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
8287
end
8388

8489
def Ruby19.b_value_encode(str, encoding = nil)
@@ -93,11 +98,7 @@ def Ruby19.b_value_decode(str)
9398
str = Ruby19.decode_base64(match[2])
9499
str = charset_encoder.encode(str, charset)
95100
end
96-
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "")
97-
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "").encode(Encoding::UTF_8)
98-
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
99-
warn "Encoding conversion failed #{$!}"
100-
str.dup.force_encoding(Encoding::UTF_8)
101+
encode_to_utf8(str)
101102
end
102103

103104
def Ruby19.q_value_encode(str, encoding = nil)
@@ -118,11 +119,7 @@ def Ruby19.q_value_decode(str)
118119
# jruby/jruby#829 which subtly changes String#encode semantics.
119120
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
120121
end
121-
decoded = str.encode(Encoding::UTF_8, :invalid => :replace, :replace => "")
122-
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "").encode(Encoding::UTF_8)
123-
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
124-
warn "Encoding conversion failed #{$!}"
125-
str.dup.force_encoding(Encoding::UTF_8)
122+
encode_to_utf8(str)
126123
end
127124

128125
def Ruby19.param_decode(str, encoding)
@@ -221,6 +218,14 @@ def convert_to_encoding(encoding)
221218
end
222219
end
223220
end
221+
222+
def encode_to_utf8(str)
223+
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "�")
224+
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "�").encode(Encoding::UTF_8)
225+
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
226+
warn "Encoding conversion failed #{$!}"
227+
str.dup.force_encoding(Encoding::UTF_8)
228+
end
224229
end
225230
end
226231
end

spec/mail/encoding_spec.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@
186186
expect(mail.parts[0].content_type).to eq "text/html; charset=ISO-8859-1"
187187
end
188188

189-
it "should skip invalid characters" do
189+
it "should handle invalid characters" do
190190
m = Mail.new
191191
m['Subject'] = Mail::SubjectField.new("=?utf-8?Q?Hello_=96_World?=")
192192
if RUBY_VERSION > '1.9'
@@ -196,10 +196,14 @@
196196
end
197197
end
198198

199-
it "should skip characters of unknown and invalid encoding" do
199+
it "should replace characters of unknown and invalid encoding" do
200200
m = Mail.new
201201
m['Subject'] = Mail::SubjectField.new("Hello=?UNKNOWN?B?4g==?=")
202-
expect(m.subject).to eq "Hello"
202+
if RUBY_VERSION > '1.9'
203+
expect(m.subject).to eq "Hello�"
204+
else
205+
expect(m.subject).to eq "Hello"
206+
end
203207
end
204208

205209
if RUBY_VERSION > '1.9'

spec/mail/encodings_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@
316316

317317
it "should treat unrecognized charsets as binary" do
318318
if RUBY_VERSION >= "1.9"
319-
expect(Mail::Encodings.value_decode("=?ISO-FOOO?Q?Morten_R=F8verdatt=E9r?=")).to eq "Morten Rverdattr"
319+
expect(Mail::Encodings.value_decode("=?ISO-FOOO?Q?Morten_R=F8verdatt=E9r?=")).to eq "Morten R�verdatt�r"
320320
end
321321
end
322322
end

0 commit comments

Comments
 (0)