fix SciRuby#308. block handling in from_csv

parthm · Sep 12, 2017 · b0f9bbc · b0f9bbc
1 parent 6da569e
commit b0f9bbc
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 13 deletions.
diff --git a/lib/daru/io/io.rb b/lib/daru/io/io.rb
@@ -75,15 +75,15 @@ def dataframe_write_excel dataframe, path, _opts={}
       end
 
       # Functions for loading/writing CSV files
-      def from_csv path, opts={}
+      def from_csv path, opts={}, &block
         daru_options, opts = from_csv_prepare_opts opts
         # Preprocess headers for detecting and correcting repetition in
         # case the :headers option is not specified.
         hsh =
           if opts[:headers]
-            from_csv_hash_with_headers(path, opts)
+            from_csv_hash_with_headers(path, opts, &block)
           else
-            from_csv_hash(path, opts)
+            from_csv_hash(path, opts, &block)
               .tap { |hash| daru_options[:order] = hash.keys }
           end
         Daru::DataFrame.new(hsh,daru_options)
@@ -227,18 +227,16 @@ def from_csv_prepare_converters(converters)
 
       def from_csv_hash_with_headers(path, opts)
         opts[:header_converters] ||= :symbol
-        ::CSV
-          .parse(open(path), opts)
-          .tap { |c| yield c if block_given? }
-          .by_col.map { |col_name, values| [col_name, values] }.to_h
+        hash = ::CSV
+               .parse(open(path), opts)
+               .by_col.map { |col_name, values| [col_name, values] }.to_h
+        hash = yield hash if block_given?
+        hash
       end
 
       def from_csv_hash(path, opts)
-        csv_as_arrays =
-          ::CSV
-          .parse(open(path), opts)
-          .tap { |c| yield c if block_given? }
-          .to_a
+        csv_as_arrays = ::CSV.parse(open(path), opts).to_a
+        csv_as_arrays = csv_as_arrays.map { |i| yield i } if block_given?
         headers       = ArrayHelper.recode_repeated(csv_as_arrays.shift)
         csv_as_arrays = csv_as_arrays.transpose
         headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h

diff --git a/spec/fixtures/block_processing.csv b/spec/fixtures/block_processing.csv
@@ -0,0 +1,9 @@
+date,count
+2016-01-10,0
+2016-01-11,1
+2016-01-12,2
+2016-01-13,3
+2016-01-14,4
+2016-01-15,5
+2016-01-16,6
+2016-01-17,7
diff --git a/spec/io/io_spec.rb b/spec/io/io_spec.rb
@@ -59,7 +59,7 @@
         end
       end
 
-      it "allows block to process read data" do
+      it "allows options to process header" do
         df = Daru::DataFrame.from_csv 'spec/fixtures/macd_data.csv', {
           headers: true,
           header_converters: CSV::HeaderConverters[:symbol]
@@ -68,6 +68,30 @@
         expect(df.vectors).to include(:macd_12_26_9, :macd_6_13_4, :macdhist_12_26_9, :macdhist_6_13_4, :macdsig_12_26_9, :macdsig_6_13_4, :price)
       end
 
+      it "allows block to process rows" do
+        df = Daru::DataFrame.from_csv('spec/fixtures/block_processing.csv') do |row|
+          if row[0] == 'date' # skip header
+            row
+          else
+            [ Date.parse(row[0])+1, row[1].to_i + 1 ]
+          end
+        end
+        expect(df['date']).to all(be_a(Date))
+        expect(df['date'][0]).to eq(Date.parse('2016-01-11'))
+        expect(df['count'][0]).to eq(1)
+      end
+
+      it "allows block to process rows with headers=true" do
+        df = Daru::DataFrame.from_csv('spec/fixtures/block_processing.csv', headers: true) do |hash|
+          hash[:date] = hash[:date].map { |d| Date.parse(d)+1 }
+          hash[:count] = hash[:count].map { |c| c+1 }
+          hash
+        end
+        expect(df[:date]).to all(be_a(Date))
+        expect(df[:date][0]).to eq(Date.parse('2016-01-11'))
+        expect(df[:count][0]).to eq(1)
+      end
+
     end
 
     context "#write_csv" do