Skip to content
This repository was archived by the owner on Nov 30, 2024. It is now read-only.

Commit 5c54a6e

Browse files
committed
Fix invalid byte sequence on EncodedString#split
1 parent 2615f3d commit 5c54a6e

File tree

2 files changed

+28
-13
lines changed

2 files changed

+28
-13
lines changed

lib/rspec/support/encoded_string.rb

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ module Support
44
class EncodedString
55
# Ruby's default replacement string for is U+FFFD ("\xEF\xBF\xBD") for Unicode encoding forms
66
# else is '?' ("\x3F")
7-
MRI_UNICODE_UNKOWN_CHARACTER = "\xEF\xBF\xBD"
87
REPLACE = "\x3F"
98

109
def initialize(string, encoding=nil)
@@ -36,6 +35,24 @@ def to_s
3635

3736
private
3837

38+
ENCODING_STRATEGY = {
39+
:bad_bytes => {
40+
:invalid => :replace,
41+
# :undef => :nil,
42+
:replace => REPLACE
43+
},
44+
:cannot_convert => {
45+
# :invalid => :nil,
46+
:undef => :replace,
47+
:replace => REPLACE
48+
},
49+
:no_converter => {
50+
:invalid => :replace,
51+
# :undef => :nil,
52+
:replace => REPLACE
53+
}
54+
}
55+
3956
# Raised by Encoding and String methods:
4057
# Encoding::UndefinedConversionError:
4158
# when a transcoding operation fails
@@ -51,20 +68,19 @@ def to_s
5168
# Encoding::CompatibilityError
5269
#
5370
def matching_encoding(string)
54-
string.encode(@encoding)
55-
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
56-
normalize_missing(string.encode(@encoding, :invalid => :replace, :undef => :replace))
71+
# Converting it to a higher character set (UTF-16) and then back (to UTF-8)
72+
# ensures that we strip away invalid or undefined byte sequences
73+
# => no need to rescue Encoding::InvalidByteSequenceError, ArgumentError
74+
string.encode(::Encoding::UTF_16LE, ENCODING_STRATEGY[:bad_bytes]).
75+
encode(@encoding)
76+
rescue Encoding::UndefinedConversionError, Encoding::CompatibilityError
77+
string.encode(@encoding, ENCODING_STRATEGY[:cannot_convert])
78+
# Begin: Needed for 1.9.2
5779
rescue Encoding::ConverterNotFoundError
58-
normalize_missing(string.force_encoding(@encoding).encode(:invalid => :replace))
80+
string.force_encoding(@encoding).encode(ENCODING_STRATEGY[:no_converter])
5981
end
82+
# End: Needed for 1.9.2
6083

61-
def normalize_missing(string)
62-
if @encoding.to_s == "UTF-8"
63-
string.gsub(MRI_UNICODE_UNKOWN_CHARACTER.force_encoding(@encoding), REPLACE)
64-
else
65-
string
66-
end
67-
end
6884

6985
def detect_source_encoding(string)
7086
string.encoding

spec/rspec/support/encoded_string_spec.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ module RSpec::Support
155155
end
156156

157157
it 'replaces invalid bytes with the REPLACE string' do
158-
pending 'but is currently failing'
159158
resulting_array = build_encoded_string(message_with_invalid_byte_sequence, utf8_encoding).split("\n")
160159
expected_array = ["? ? ? I have bad bytes"]
161160
expect(resulting_array).to eq(expected_array)

0 commit comments

Comments
 (0)