From 934743a82210526ee522428c79999ca7cf390f87 Mon Sep 17 00:00:00 2001 From: Allen Fair Date: Mon, 9 Jun 2014 20:35:09 -0400 Subject: [PATCH] Adds pluck_rows and pluck_instances feature This is a light-weight version of pluck. --- README.md | 9 ++++++-- Rakefile | 7 ++++--- .../relation/cursor_iterators.rb | 21 +++++++++++++++++++ .../active_record/sql_cursor.rb | 19 ++++++++++++----- lib/postgresql_cursor/cursor.rb | 10 ++++----- 5 files changed, 51 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 47e5e33..695f8db 100644 --- a/README.md +++ b/README.md @@ -90,12 +90,17 @@ Product.select(:id, :name).each_row { |product| product.process } Pluck is a great alternative instead of using a cursor. It does not instantiate the row, and builds an array of result values, and translates the values into ruby values (numbers, Timestamps. etc.). Using the cursor would still allow you to lazy -load them in batches. +load them in batches for very large sets. + +You can also use the `pluck_rows` or `pluck_instances` if the results +won't eat up too much memory. ```ruby -Product.newly_arrived.pluck(:id) #=> [1, 2, 3, 4, etc.] +Product.newly_arrived.pluck(:id) #=> [1, 2, 3, ...] Product.newly_arrived.each_row { |hash| } Product.select(:id).each_row.map {|r| r["id"].to_i } # cursor instead of pluck +Product.pluck_rows(:id) #=> ["1", "2", ...] +Product.pluck_instances(:id, :quantity) #=> [[1, 503], [2, 932], ...] ``` ##Background: Why PostgreSQL Cursors? diff --git a/Rakefile b/Rakefile index 69b070a..13c85ca 100644 --- a/Rakefile +++ b/Rakefile @@ -11,9 +11,10 @@ end desc "Open and IRB Console with the gem loaded" task :console do - require 'irb' - ARGV.clear - IRB.start + sh "bundle exec irb -Ilib -I . -r postgresql_cursor -r test-app/app" + #require 'irb' + #ARGV.clear + #IRB.start end desc "Setup testing database and table" diff --git a/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb b/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb index 0bfa74e..eb67451 100644 --- a/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb +++ b/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb @@ -51,6 +51,27 @@ def each_instance(options={}, &block) end end + def pluck_rows(*cols) + pluck_method(:each_row, *cols) + end + alias :pluck_row :pluck_rows + + def pluck_instances(*cols) + pluck_method(:each_instance, *cols) + end + alias :pluck_instance :pluck_instances + + def pluck_method(method, *cols) + options = cols.last.is_a?(Hash) ? cols.pop : {} + cols = cols.map {|c| c.to_sym } + result = [] + self.send(method, options) do |row| + row = row.symbolize_keys if row.is_a?(Hash) + result << cols.map{ |c| row[c] } + end + result.flatten! if cols.size == 1 + result + end end end end diff --git a/lib/postgresql_cursor/active_record/sql_cursor.rb b/lib/postgresql_cursor/active_record/sql_cursor.rb index d702453..a8a111a 100644 --- a/lib/postgresql_cursor/active_record/sql_cursor.rb +++ b/lib/postgresql_cursor/active_record/sql_cursor.rb @@ -34,7 +34,7 @@ def each_instance(options={}, &block) end # Public: Returns each row as a hash to the given block - # + # sql - Full SQL statement, variables interpolated # options - Hash to control # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) @@ -77,12 +77,21 @@ def each_instance_by_sql(sql, options={}, &block) else PostgreSQLCursor::Cursor.new(sql, options).instance_iterator(self) end + end + + # Returns and array of the given column names. Use if you need cursors and don't expect + # this to comsume too much memory. Values are strings. Like ActiveRecord's pluck. + def pluck_rows(*cols) + all.pluck_row(*cols) + end + alias :pluck_row :pluck_rows - #PostgreSQLCursor::Cursor.new(sql, options).each do |row| - # model = instantiate(row) - # yield model - #end + # Returns and array of the given column names. Use if you need cursors and don't expect + # this to comsume too much memory. Values are instance types. Like ActiveRecord's pluck. + def pluck_instances(*cols) + all.pluck_instance(*cols) end + alias :pluck_instance :pluck_instances end end end diff --git a/lib/postgresql_cursor/cursor.rb b/lib/postgresql_cursor/cursor.rb index 495c9d5..fa6483c 100644 --- a/lib/postgresql_cursor/cursor.rb +++ b/lib/postgresql_cursor/cursor.rb @@ -10,7 +10,7 @@ # while: value - Exits loop when block does not return this value. # until: value - Exits loop when block returns this value. # -# Exmaples: +# Exmaples: # PostgreSQLCursor::Cursor.new("select ...").each { |hash| ... } # ActiveRecordModel.where(...).each_row { |hash| ... } # ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... } @@ -69,7 +69,7 @@ def each(&block) break if row.size==0 @count += 1 if @iterate == :instances - model = if ::ActiveRecord::VERSION::MAJOR < 4 + model = if ::ActiveRecord::VERSION::MAJOR < 4 @type.send(:instantiate,row) else @type.send(:instantiate,row, column_types) @@ -108,7 +108,7 @@ def column_types types[fname] = @connection.get_type_map.fetch(ftype, fmod) { |oid, mod| warn "unknown OID: #{fname}(#{oid}) (#{sql})" OID::Identity.new - } + } end @@ -125,7 +125,7 @@ def open # Public: Returns the next row from the cursor, or empty hash if end of results # - # Returns a row as a hash of {'colname'=>value,...} + # Returns a row as a hash of {'colname'=>value,...} def fetch fetch_block if @block.size==0 @block.shift @@ -144,7 +144,7 @@ def close end # Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched - # This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0) + # This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0) # used to determine the expected fraction (percent) of result rows returned the the caller. # This value determines the access path by the query planner. def set_cursor_tuple_fraction(frac=1.0)