Skip to content

Commit b4d29fa

Browse files
committed
Add optional force-transcoding to RDoc::Encoding#read_file. Address Ruby Bug #4376
1 parent 99f7210 commit b4d29fa

File tree

6 files changed

+82
-5
lines changed

6 files changed

+82
-5
lines changed

History.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
=== 3.5.3
2+
3+
* Bug fixes
4+
* When including a file lossily force-transcode it to the output encoding
5+
instead of crashing to preserve as much content as possible. Ruby Bug
6+
#4376 by Yui NARUSE.
7+
18
=== 3.5.2 / 2010-02-04
29

310
* Deprecations

lib/rdoc.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def self.const_missing const_name # :nodoc:
9595
##
9696
# RDoc version you are using
9797

98-
VERSION = '3.5.2'
98+
VERSION = '3.5.3'
9999

100100
##
101101
# Method visibilities

lib/rdoc/encoding.rb

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ module RDoc::Encoding
1212
#
1313
# The content will be converted to the +encoding+. If the file cannot be
1414
# converted a warning will be printed and nil will be returned.
15+
#
16+
# If +force_transcode+ is true the document will be transcoded and any
17+
# unknown character in the target encoding will be replaced with '?'
1518

16-
def self.read_file filename, encoding
19+
def self.read_file filename, encoding, force_transcode = false
1720
content = open filename, "rb" do |f| f.read end
1821

1922
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
@@ -50,8 +53,14 @@ def self.read_file filename, encoding
5053
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
5154
nil
5255
rescue Encoding::UndefinedConversionError => e
53-
warn "unable to convert #{e.message} for #{filename}, skipping"
54-
nil
56+
if force_transcode then
57+
content.force_encoding orig_encoding
58+
content.encode! encoding, :undef => :replace, :replace => '?'
59+
content
60+
else
61+
warn "unable to convert #{e.message} for #{filename}, skipping"
62+
nil
63+
end
5564
rescue Errno::EISDIR, Errno::ENOENT
5665
nil
5766
end

lib/rdoc/markup/pre_process.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def include_file name, indent, encoding
120120
return ''
121121
end
122122

123-
content = RDoc::Encoding.read_file full_name, encoding
123+
content = RDoc::Encoding.read_file full_name, encoding, true
124124

125125
# strip magic comment
126126
content = content.sub(/\A# .*coding[=:].*$/, '').lstrip

test/test_rdoc_encoding.rb

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@ def test_class_read_file_encoding_convert
4949
assert_equal "hi \u00e9verybody", contents.sub("\r", '')
5050
end
5151

52+
def test_class_read_file_encoding_fail
53+
skip "Encoding not implemented" unless Object.const_defined? :Encoding
54+
55+
@tempfile.write "# coding: utf-8\n\317\200" # pi
56+
@tempfile.flush
57+
58+
# FIXME 1.9 fix on windoze
59+
expected.gsub!("\n", "\r\n") if RUBY_VERSION =~ /^1.9/ && RUBY_PLATFORM =~ /mswin|mingw/
60+
61+
contents = :junk
62+
63+
_, err = capture_io do
64+
contents = RDoc::Encoding.read_file @tempfile.path, Encoding::US_ASCII
65+
end
66+
67+
assert_nil contents
68+
69+
assert_match %r%^unable to convert U\+03C0 from UTF-8 to US-ASCII for%, err
70+
end
71+
5272
def test_class_read_file_encoding_fancy
5373
skip "Encoding not implemented" unless Object.const_defined? :Encoding
5474

@@ -66,6 +86,21 @@ def test_class_read_file_encoding_fancy
6686
assert_equal Encoding::UTF_8, contents.encoding
6787
end
6888

89+
def test_class_read_file_encoding_force_transcode
90+
skip "Encoding not implemented" unless Object.const_defined? :Encoding
91+
92+
@tempfile.write "# coding: utf-8\n\317\200" # pi
93+
@tempfile.flush
94+
95+
# FIXME 1.9 fix on windoze
96+
expected.gsub!("\n", "\r\n") if RUBY_VERSION =~ /^1.9/ && RUBY_PLATFORM =~ /mswin|mingw/
97+
98+
contents = RDoc::Encoding.read_file @tempfile.path, Encoding::US_ASCII, true
99+
100+
assert_equal '?', contents
101+
assert_equal Encoding::US_ASCII, contents.encoding
102+
end
103+
69104
def test_class_read_file_encoding_guess
70105
skip "Encoding not implemented" unless Object.const_defined? :Encoding
71106

test/test_rdoc_markup_pre_process.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# coding: utf-8
2+
13
require 'tempfile'
24
require 'rubygems'
35
require 'minitest/autorun'
@@ -46,6 +48,30 @@ def test_include_file
4648
assert_equal expected, content
4749
end
4850

51+
def test_include_file_encoding_incompatible
52+
skip "Encoding not implemented" unless Object.const_defined? :Encoding
53+
54+
@tempfile.write <<-INCLUDE
55+
# -*- mode: rdoc; coding: utf-8; fill-column: 74; -*-
56+
57+
π
58+
INCLUDE
59+
60+
@tempfile.flush
61+
@tempfile.rewind
62+
63+
content = @pp.include_file @file_name, '', Encoding::US_ASCII
64+
65+
expected = "?\n"
66+
67+
# FIXME 1.9 fix on windoze
68+
# preprocessor uses binread, so line endings are \r\n
69+
expected.gsub!("\n", "\r\n") if
70+
RUBY_VERSION =~ /^1.9/ && RUBY_PLATFORM =~ /mswin|mingw/
71+
72+
assert_equal expected, content
73+
end
74+
4975
def test_handle
5076
text = "# :x: y\n"
5177
out = @pp.handle text

0 commit comments

Comments
 (0)