Skip to content

Commit

Permalink
fix SciRuby#308. block handling in from_csv
Browse files Browse the repository at this point in the history
  • Loading branch information
parthm committed Sep 12, 2017
1 parent 6da569e commit b0f9bbc
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 13 deletions.
22 changes: 10 additions & 12 deletions lib/daru/io/io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ def dataframe_write_excel dataframe, path, _opts={}
end

# Functions for loading/writing CSV files
def from_csv path, opts={}
def from_csv path, opts={}, &block
daru_options, opts = from_csv_prepare_opts opts
# Preprocess headers for detecting and correcting repetition in
# case the :headers option is not specified.
hsh =
if opts[:headers]
from_csv_hash_with_headers(path, opts)
from_csv_hash_with_headers(path, opts, &block)
else
from_csv_hash(path, opts)
from_csv_hash(path, opts, &block)
.tap { |hash| daru_options[:order] = hash.keys }
end
Daru::DataFrame.new(hsh,daru_options)
Expand Down Expand Up @@ -227,18 +227,16 @@ def from_csv_prepare_converters(converters)

def from_csv_hash_with_headers(path, opts)
opts[:header_converters] ||= :symbol
::CSV
.parse(open(path), opts)
.tap { |c| yield c if block_given? }
.by_col.map { |col_name, values| [col_name, values] }.to_h
hash = ::CSV
.parse(open(path), opts)
.by_col.map { |col_name, values| [col_name, values] }.to_h
hash = yield hash if block_given?
hash
end

def from_csv_hash(path, opts)
csv_as_arrays =
::CSV
.parse(open(path), opts)
.tap { |c| yield c if block_given? }
.to_a
csv_as_arrays = ::CSV.parse(open(path), opts).to_a
csv_as_arrays = csv_as_arrays.map { |i| yield i } if block_given?
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
csv_as_arrays = csv_as_arrays.transpose
headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h
Expand Down
9 changes: 9 additions & 0 deletions spec/fixtures/block_processing.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
date,count
2016-01-10,0
2016-01-11,1
2016-01-12,2
2016-01-13,3
2016-01-14,4
2016-01-15,5
2016-01-16,6
2016-01-17,7
26 changes: 25 additions & 1 deletion spec/io/io_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
end
end

it "allows block to process read data" do
it "allows options to process header" do
df = Daru::DataFrame.from_csv 'spec/fixtures/macd_data.csv', {
headers: true,
header_converters: CSV::HeaderConverters[:symbol]
Expand All @@ -68,6 +68,30 @@
expect(df.vectors).to include(:macd_12_26_9, :macd_6_13_4, :macdhist_12_26_9, :macdhist_6_13_4, :macdsig_12_26_9, :macdsig_6_13_4, :price)
end

it "allows block to process rows" do
df = Daru::DataFrame.from_csv('spec/fixtures/block_processing.csv') do |row|
if row[0] == 'date' # skip header
row
else
[ Date.parse(row[0])+1, row[1].to_i + 1 ]
end
end
expect(df['date']).to all(be_a(Date))
expect(df['date'][0]).to eq(Date.parse('2016-01-11'))
expect(df['count'][0]).to eq(1)
end

it "allows block to process rows with headers=true" do
df = Daru::DataFrame.from_csv('spec/fixtures/block_processing.csv', headers: true) do |hash|
hash[:date] = hash[:date].map { |d| Date.parse(d)+1 }
hash[:count] = hash[:count].map { |c| c+1 }
hash
end
expect(df[:date]).to all(be_a(Date))
expect(df[:date][0]).to eq(Date.parse('2016-01-11'))
expect(df[:count][0]).to eq(1)
end

end

context "#write_csv" do
Expand Down

0 comments on commit b0f9bbc

Please sign in to comment.