Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support reading from stdin #1

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions webtable_to_text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,19 @@

options = {}
OptionParser.new do |opts|
opts.banner = " Usage: webtable_to_text.rb [options]"
opts.banner = <<BANNER
usage:
webtable_to_text.rb [options] [-f FILE | -u URL]
curl [options] URL | webtable_to_text.rb
wget -O- [options] URL | webtable_to_text.rb

options:
BANNER

opts.on("-A", "--all", "Print all tables found on the specified page") { options[:all] = true }
opts.on("-a", "--asciidoc", "Output in asciidoc/asciidoctor format") { options[:asciidoc] = true }
opts.on("-c", "--csv", "Output in CSV / comma separated values format") { options[:csv] = true }
opts.on("-f", "--file FILE", "Specify HTML input file as source for extracting tables") { |v| options[:file] = v }
opts.on("-f", "--file FILE", "Specify HTML input file as source for extracting tables; use '-' to read from stdin") { |v| options[:file] = v }
opts.on("-i", "--interactive", "Interactive mode") { options[:interactive] = true }
opts.on("-m", "--markdown", "Output in markdown format") { options[:markdown] = true }
opts.on("-n", "--number NUM", "Print specific table number only; separate multiple numbers with commas") { |v| options[:number] = v }
Expand All @@ -27,8 +34,6 @@

end.parse!

source = ""

url = options[:url]
file = options[:file]

Expand All @@ -38,9 +43,21 @@
source_content = URI.open(escaped).read
elsif file
source_location = file
source_content = File.read(file)
if file == "-"
# h/t: https://stackoverflow.com/a/273841
source_content = ARGF.read
else
source_content = File.read(file)
end
else
abort(" Please provide a source file or URL as input")
if $stdin.isatty
abort(" Please provide a source file or URL as input. See '--help'.")
end
if options[:interactive]
abort(" Interactive mode not supported when reading from a pipe.")
end
source_location = '(stdin)'
source_content = ARGF.read
end

doc = Nokogiri::HTML(source_content)
Expand All @@ -49,7 +66,7 @@
len = tables.length

if len < 1
abort(" No tables found in page at #{source_location}")
abort(" No tables found in page at #{source_location}.")
end

numstring = "all"
Expand Down