Skip to content

Commit

Permalink
Add optional force-transcoding to RDoc::Encoding#read_file. Address R…
Browse files Browse the repository at this point in the history
…uby Bug #4376
  • Loading branch information
drbrain committed Feb 7, 2011
1 parent 99f7210 commit b4d29fa
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 5 deletions.
7 changes: 7 additions & 0 deletions History.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
=== 3.5.3

* Bug fixes
* When including a file lossily force-transcode it to the output encoding
instead of crashing to preserve as much content as possible. Ruby Bug
#4376 by Yui NARUSE.

=== 3.5.2 / 2010-02-04

* Deprecations
Expand Down
2 changes: 1 addition & 1 deletion lib/rdoc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def self.const_missing const_name # :nodoc:
##
# RDoc version you are using

VERSION = '3.5.2'
VERSION = '3.5.3'

##
# Method visibilities
Expand Down
15 changes: 12 additions & 3 deletions lib/rdoc/encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ module RDoc::Encoding
#
# The content will be converted to the +encoding+. If the file cannot be
# converted a warning will be printed and nil will be returned.
#
# If +force_transcode+ is true the document will be transcoded and any
# unknown character in the target encoding will be replaced with '?'

def self.read_file filename, encoding
def self.read_file filename, encoding, force_transcode = false
content = open filename, "rb" do |f| f.read end

utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
Expand Down Expand Up @@ -50,8 +53,14 @@ def self.read_file filename, encoding
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
nil
rescue Encoding::UndefinedConversionError => e
warn "unable to convert #{e.message} for #{filename}, skipping"
nil
if force_transcode then
content.force_encoding orig_encoding
content.encode! encoding, :undef => :replace, :replace => '?'
content
else
warn "unable to convert #{e.message} for #{filename}, skipping"
nil
end
rescue Errno::EISDIR, Errno::ENOENT
nil
end
Expand Down
2 changes: 1 addition & 1 deletion lib/rdoc/markup/pre_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def include_file name, indent, encoding
return ''
end

content = RDoc::Encoding.read_file full_name, encoding
content = RDoc::Encoding.read_file full_name, encoding, true

# strip magic comment
content = content.sub(/\A# .*coding[=:].*$/, '').lstrip
Expand Down
35 changes: 35 additions & 0 deletions test/test_rdoc_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@ def test_class_read_file_encoding_convert
assert_equal "hi \u00e9verybody", contents.sub("\r", '')
end

def test_class_read_file_encoding_fail
skip "Encoding not implemented" unless Object.const_defined? :Encoding

@tempfile.write "# coding: utf-8\n\317\200" # pi
@tempfile.flush

# FIXME 1.9 fix on windoze
expected.gsub!("\n", "\r\n") if RUBY_VERSION =~ /^1.9/ && RUBY_PLATFORM =~ /mswin|mingw/

contents = :junk

_, err = capture_io do
contents = RDoc::Encoding.read_file @tempfile.path, Encoding::US_ASCII
end

assert_nil contents

assert_match %r%^unable to convert U\+03C0 from UTF-8 to US-ASCII for%, err
end

def test_class_read_file_encoding_fancy
skip "Encoding not implemented" unless Object.const_defined? :Encoding

Expand All @@ -66,6 +86,21 @@ def test_class_read_file_encoding_fancy
assert_equal Encoding::UTF_8, contents.encoding
end

def test_class_read_file_encoding_force_transcode
skip "Encoding not implemented" unless Object.const_defined? :Encoding

@tempfile.write "# coding: utf-8\n\317\200" # pi
@tempfile.flush

# FIXME 1.9 fix on windoze
expected.gsub!("\n", "\r\n") if RUBY_VERSION =~ /^1.9/ && RUBY_PLATFORM =~ /mswin|mingw/

contents = RDoc::Encoding.read_file @tempfile.path, Encoding::US_ASCII, true

assert_equal '?', contents
assert_equal Encoding::US_ASCII, contents.encoding
end

def test_class_read_file_encoding_guess
skip "Encoding not implemented" unless Object.const_defined? :Encoding

Expand Down
26 changes: 26 additions & 0 deletions test/test_rdoc_markup_pre_process.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# coding: utf-8

require 'tempfile'
require 'rubygems'
require 'minitest/autorun'
Expand Down Expand Up @@ -46,6 +48,30 @@ def test_include_file
assert_equal expected, content
end

def test_include_file_encoding_incompatible
skip "Encoding not implemented" unless Object.const_defined? :Encoding

@tempfile.write <<-INCLUDE
# -*- mode: rdoc; coding: utf-8; fill-column: 74; -*-
π
INCLUDE

@tempfile.flush
@tempfile.rewind

content = @pp.include_file @file_name, '', Encoding::US_ASCII

expected = "?\n"

# FIXME 1.9 fix on windoze
# preprocessor uses binread, so line endings are \r\n
expected.gsub!("\n", "\r\n") if
RUBY_VERSION =~ /^1.9/ && RUBY_PLATFORM =~ /mswin|mingw/

assert_equal expected, content
end

def test_handle
text = "# :x: y\n"
out = @pp.handle text
Expand Down

0 comments on commit b4d29fa

Please sign in to comment.