Skip to content

Commit

Permalink
test: add encoding coverage for all serialization methods
Browse files Browse the repository at this point in the history
See #2774 and #2798
  • Loading branch information
flavorjones committed Feb 28, 2023
1 parent 4ba43be commit 343c792
Showing 1 changed file with 198 additions and 0 deletions.
198 changes: 198 additions & 0 deletions test/test_serialization_encoding.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
# coding: utf-8
# frozen_string_literal: true

require "helper"

class TestSerializationEncoding < Nokogiri::TestCase
def round_trip_through_file
Tempfile.create do |io|
yield io
io.rewind
io.read
end
end

describe "serialization encoding" do
matrix = [
{
klass: Nokogiri::XML::Document,
documents: [
{ encoding: Encoding::UTF_8, path: ADDRESS_XML_FILE },
{ encoding: Encoding::Shift_JIS, path: SHIFT_JIS_XML },
],
},
{
klass: Nokogiri::HTML4::Document,
documents: [
{ encoding: Encoding::UTF_8, path: HTML_FILE },
{ encoding: Encoding::Shift_JIS, path: SHIFT_JIS_HTML },
],
},
]
if Nokogiri.uses_gumbo?
matrix << {
klass: Nokogiri::HTML5::Document,
documents: [
{ encoding: Encoding::UTF_8, path: HTML_FILE },
{ encoding: Encoding::Shift_JIS, path: SHIFT_JIS_HTML },
],
}
end

matrix.each do |matrix_entry|
describe matrix_entry[:klass] do
let(:klass) { matrix_entry[:klass] }
matrix_entry[:documents].each do |document|
describe document[:encoding] do
it "serializes with the expected encoding" do
doc = klass.parse(
File.read(
document[:path],
encoding: document[:encoding],
),
)

expected_default_encoding =
if defined?(Nokogiri::HTML5::Document) && klass == Nokogiri::HTML5::Document
Encoding::UTF_8 # FIXME: see #2801, this should be document[:encoding]
else
document[:encoding]
end

assert_equal(expected_default_encoding, doc.to_s.encoding)

assert_equal(expected_default_encoding, doc.to_xml.encoding)
assert_equal(Encoding::UTF_8, doc.to_xml(encoding: "UTF-8").encoding)
assert_equal(Encoding::Shift_JIS, doc.to_xml(encoding: "SHIFT_JIS").encoding)

assert_equal(expected_default_encoding, doc.to_xhtml.encoding)
assert_equal(Encoding::UTF_8, doc.to_xhtml(encoding: "UTF-8").encoding)
assert_equal(Encoding::Shift_JIS, doc.to_xhtml(encoding: "SHIFT_JIS").encoding)

assert_equal(expected_default_encoding, doc.to_html.encoding)
assert_equal(Encoding::UTF_8, doc.to_html(encoding: "UTF-8").encoding)
assert_equal(Encoding::Shift_JIS, doc.to_html(encoding: "SHIFT_JIS").encoding)

assert_equal(expected_default_encoding, doc.serialize.encoding)
assert_equal(Encoding::UTF_8, doc.serialize(encoding: "UTF-8").encoding)
assert_equal(Encoding::Shift_JIS, doc.serialize(encoding: "SHIFT_JIS").encoding)

assert_equal(
doc.serialize.bytes,
round_trip_through_file { |io| doc.write_to(io) }.bytes,
)
assert_equal(
doc.serialize(encoding: "UTF-8").bytes,
round_trip_through_file { |io| doc.write_to(io, encoding: "UTF-8") }.bytes,
)
assert_equal(
doc.serialize(encoding: "SHIFT_JIS").bytes,
round_trip_through_file { |io| doc.write_to(io, encoding: "SHIFT_JIS") }.bytes,
)
end
end
end
end
end

# describe "HTML4" do
# matrix_documents = [
# {encoding: Encoding::UTF_8, path: HTML_FILE},
# {encoding: Encoding::Shift_JIS, path: SHIFT_JIS_HTML},
# ]

# matrix_documents.each do |matrix_document|
# describe matrix_document[:encoding] do
# it "serializes with the expected encoding" do
# doc = Nokogiri::HTML4::Document.parse(
# File.read(
# matrix_document[:path],
# encoding: matrix_document[:encoding],
# )
# )

# assert_equal(matrix_document[:encoding], doc.to_s.encoding)

# assert_equal(matrix_document[:encoding], doc.to_xml.encoding)
# assert_equal(Encoding::UTF_8, doc.to_xml(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.to_xml(encoding: "SHIFT_JIS").encoding)

# assert_equal(matrix_document[:encoding], doc.to_xhtml.encoding)
# assert_equal(Encoding::UTF_8, doc.to_xhtml(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.to_xhtml(encoding: "SHIFT_JIS").encoding)

# assert_equal(matrix_document[:encoding], doc.to_html.encoding)
# assert_equal(Encoding::UTF_8, doc.to_html(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.to_html(encoding: "SHIFT_JIS").encoding)

# assert_equal(matrix_document[:encoding], doc.serialize.encoding)
# assert_equal(Encoding::UTF_8, doc.serialize(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.serialize(encoding: "SHIFT_JIS").encoding)

# payload = round_trip_through_file { |io| doc.write_to(io) }
# assert_equal(doc.serialize.bytes, payload.bytes)

# payload = round_trip_through_file { |io| doc.write_to(io, encoding: "UTF-8") }
# assert_equal(doc.serialize(encoding: "UTF-8").bytes, payload.bytes)

# payload = round_trip_through_file { |io| doc.write_to(io, encoding: "SHIFT_JIS") }
# assert_equal(doc.serialize(encoding: "SHIFT_JIS").bytes, payload.bytes)
# end
# end
# end
# end

# describe "HTML5" do
# matrix_documents = [
# {encoding: Encoding::UTF_8, path: HTML_FILE},
# {encoding: Encoding::Shift_JIS, path: SHIFT_JIS_HTML},
# ]

# matrix_documents.each do |matrix_document|
# describe matrix_document[:encoding] do
# it "serializes with the expected encoding" do
# doc = Nokogiri::HTML5::Document.parse(
# File.read(
# matrix_document[:path],
# encoding: matrix_document[:encoding],
# )
# )

# expected_default_encoding = Encoding::UTF_8 # FIXME: see #2801, this should be matrix_document[:encoding]

# assert_equal(expected_default_encoding, doc.to_s.encoding)

# assert_equal(expected_default_encoding, doc.to_xml.encoding)
# assert_equal(Encoding::UTF_8, doc.to_xml(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.to_xml(encoding: "SHIFT_JIS").encoding)

# assert_equal(expected_default_encoding, doc.to_xhtml.encoding)
# assert_equal(Encoding::UTF_8, doc.to_xhtml(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.to_xhtml(encoding: "SHIFT_JIS").encoding)

# assert_equal(expected_default_encoding, doc.to_html.encoding)
# assert_equal(Encoding::UTF_8, doc.to_html(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.to_html(encoding: "SHIFT_JIS").encoding)

# assert_equal(expected_default_encoding, doc.serialize.encoding)
# assert_equal(Encoding::UTF_8, doc.serialize(encoding: "UTF-8").encoding)
# assert_equal(Encoding::Shift_JIS, doc.serialize(encoding: "SHIFT_JIS").encoding)

# assert_equal(
# doc.serialize.bytes,
# round_trip_through_file { |io| doc.write_to(io) }.bytes,
# )
# assert_equal(
# doc.serialize(encoding: "UTF-8").bytes,
# round_trip_through_file { |io| doc.write_to(io, encoding: "UTF-8") }.bytes,
# )
# assert_equal(
# doc.serialize(encoding: "SHIFT_JIS").bytes,
# round_trip_through_file { |io| doc.write_to(io, encoding: "SHIFT_JIS") }.bytes,
# )
# end
# end
# end
# end if Nokogiri.uses_gumbo?
end
end

0 comments on commit 343c792

Please sign in to comment.