Skip to content

Commit

Permalink
Merge pull request #278 from welguisz/html_strings
Browse files Browse the repository at this point in the history
Added initial support to support HTML formatting.
  • Loading branch information
stevendaniels committed Apr 15, 2016
2 parents b19c4ff + c9e330c commit 4294ea4
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 4 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@
.project
*.lock
.idea
.buildpath
*~
.bundle/
lbin/
Empty file modified Gemfile
100644 → 100755
Empty file.
109 changes: 106 additions & 3 deletions lib/roo/excelx/shared_strings.rb
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ def to_a
@array ||= extract_shared_strings
end

def to_html
@html ||= extract_html
end

# Use to_html or to_a for html returns
# See what is happening with commit???
def use_html?(index)
to_html[index][/<([biu]|sup|sub)>/]
end

private

def fix_invalid_shared_strings(doc)
Expand Down Expand Up @@ -42,6 +52,99 @@ def extract_shared_strings
shared_string
end
end
end
end
end

def extract_html
return [] unless doc_exists?
fix_invalid_shared_strings(doc)
# read the shared strings xml document
doc.xpath('/sst/si').map do |si|
html_string = '<html>'
si.children.each do |elem|
case elem.name
when 'r'
html_string << extract_html_r(elem)
when 't'
html_string << elem.content
end # case elem.name
end # si.children.each do |elem|
html_string << '</html>'
end # doc.xpath('/sst/si').map do |si|
end # def extract_html

# The goal of this function is to take the following XML code snippet and create a html tag
# r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
# {code:xml}
# <r>
# <rPr>
# <i/>
# <b/>
# <u/>
# <vertAlign val="subscript"/>
# <vertAlign val="superscript"/>
# </rPr>
# <t>TEXT</t>
# </r>
# {code}
#
# Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
def extract_html_r(r_elem)
str = ''
xml_elems = {
sub: false,
sup: false,
b: false,
i: false,
u: false
}
b, i, u, sub, sup = false, false, false, false, false
r_elem.children.each do |elem|
case elem.name
when 'rPr'
elem.children.each do |rPr_elem|
case rPr_elem.name
when 'b'
# set formatting for Bold to true
xml_elems[:b] = true
when 'i'
# set formatting for Italics to true
xml_elems[:i] = true
when 'u'
# set formatting for Underline to true
xml_elems[:u] = true
when 'vertAlign'
# See if the Vertical Alignment is subscript or superscript
case rPr_elem.xpath('@val').first.value
when 'subscript'
# set formatting for Subscript to true and Superscript to false ... Can't have both
xml_elems[:sub] = true
xml_elems[:sup] = false
when 'superscript'
# set formatting for Superscript to true and Subscript to false ... Can't have both
xml_elems[:sup] = true
xml_elems[:sub] = false
end
end
end
when 't'
str << create_html(elem.content, xml_elems)
end
end
str
end # extract_html_r

# This will return an html string
def create_html(text, formatting)
tmp_str = ''
formatting.each do |elem, val|
tmp_str << "<#{elem}>" if val
end
tmp_str << text
reverse_format = Hash[formatting.to_a.reverse]
reverse_format.each do |elem, val|
tmp_str << "</#{elem}>" if val
end
tmp_str
end
end # class SharedStrings < Excelx::Extractor
end # class Excelx
end # module Roo
2 changes: 1 addition & 1 deletion lib/roo/excelx/sheet_doc.rb
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink,
# 3. formula
case value_type
when :shared
value = shared_strings[cell.content.to_i]
value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
when :boolean, :string
value = cell.content
Expand Down
33 changes: 33 additions & 0 deletions spec/lib/roo/excelx_spec.rb
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,39 @@
end
end

describe '#html_strings' do
let(:path) { 'test/files/html_strings_formatting.xlsx' }

it 'returns the expected result' do
expect(subject.excelx_value(1, 1, "Sheet1")).to eq "This has no formatting."
expect(subject.excelx_value(2, 1, "Sheet1")).to eq "<html>This has<b> bold </b>formatting.</html>"
expect(subject.excelx_value(2, 2, "Sheet1")).to eq "<html>This has <i>italics</i> formatting.</html>"
expect(subject.excelx_value(2, 3, "Sheet1")).to eq "<html>This has <u>underline</u> format.</html>"
expect(subject.excelx_value(2, 4, "Sheet1")).to eq "<html>Superscript. x<sup>123</sup></html>"
expect(subject.excelx_value(2, 5, "Sheet1")).to eq "<html>SubScript. T<sub>j</sub></html>"

expect(subject.excelx_value(3, 1, "Sheet1")).to eq "<html>Bold, italics <b><i>together</i></b>.</html>"
expect(subject.excelx_value(3, 2, "Sheet1")).to eq "<html>Bold, Underline <b><u>together</u></b>.</html>"
expect(subject.excelx_value(3, 3, "Sheet1")).to eq "<html>Bold, Superscript. <b>x</b><sup><b>N</b></sup></html>"
expect(subject.excelx_value(3, 4, "Sheet1")).to eq "<html>Bold, Subscript. <b>T</b><sub><b>abc</b></sub></html>"
expect(subject.excelx_value(3, 5, "Sheet1")).to eq "<html>Italics, Underline <i><u>together</u></i>.</html>"
expect(subject.excelx_value(3, 6, "Sheet1")).to eq "<html>Italics, Superscript. <i>X</i><sup><i>abc</i></sup></html>"
expect(subject.excelx_value(3, 7, "Sheet1")).to eq "<html>Italics, Subscript. <i>B</i><sub><i>efg</i></sub></html>"
expect(subject.excelx_value(4, 1, "Sheet1")).to eq "<html>Bold, italics underline,<b><i><u> together</u></i></b>.</html>"
expect(subject.excelx_value(4, 2, "Sheet1")).to eq "<html>Bold, italics, superscript. <b>X</b><sup><b><i>abc</i></b></sup><b><i>123</i></b></html>"
expect(subject.excelx_value(4, 3, "Sheet1")).to eq "<html>Bold, Italics, subscript. <b><i>Mg</i></b><sub><b><i>ha</i></b></sub><b><i>2</i></b></html>"
expect(subject.excelx_value(4, 4, "Sheet1")).to eq "<html>Bold, Underline, superscript. <b><u>AB</u></b><sup><b><u>C12</u></b></sup><b><u>3</u></b></html>"
expect(subject.excelx_value(4, 5, "Sheet1")).to eq "<html>Bold, Underline, subscript. <b><u>Good</u></b><sub><b><u>XYZ</u></b></sub></html>"
expect(subject.excelx_value(4, 6, "Sheet1")).to eq "<html>Italics, Underline, superscript. <i><u>Up</u></i><sup><i><u>swing</u></i></sup></html>"
expect(subject.excelx_value(4, 7, "Sheet1")).to eq "<html>Italics, Underline, subscript. <i><u>T</u></i><sub><i><u>swing</u></i></sub></html>"
expect(subject.excelx_value(5, 1, "Sheet1")).to eq "<html>Bold, italics, underline, superscript. <b><i><u>GHJK</u></i></b><sup><b><i><u>190</u></i></b></sup><b><i><u>4</u></i></b></html>"
expect(subject.excelx_value(5, 2, "Sheet1")).to eq "<html>Bold, italics, underline, subscript. <b><i><u>Mike</u></i></b><sub><b><i><u>drop</u></i></b></sub></html>"
expect(subject.excelx_value(6, 1, "Sheet1")).to eq "See that regular html tags do not create html tags.\n<ol>\n <li> Denver Broncos </li>\n <li> Carolina Panthers </li>\n <li> New England Patriots</li>\n <li>Arizona Panthers</li>\n</ol>"
expect(subject.excelx_value(7, 1, "Sheet1")).to eq "<html>Does create html tags when formatting is used..\n<ol>\n <li> <b>Denver Broncos</b> </li>\n <li> <i>Carolina Panthers </i></li>\n <li> <u>New England Patriots</u></li>\n <li>Arizona Panthers</li>\n</ol></html>"
end
end


describe '_x000D_' do
let(:path) { 'test/files/x000D.xlsx' }
it 'does not contain _x000D_' do
Expand Down
Binary file added test/files/html_strings_formatting.xlsx
Binary file not shown.

0 comments on commit 4294ea4

Please sign in to comment.