Skip to content

Commit

Permalink
Updates to modern gem structure
Browse files Browse the repository at this point in the history
* Removes jeweler, re-scaffolded with bundler
* gemspec and version upgraded
* Started moving code inside of ActiveRecord
  We are limited by just how much we cando from outside of AR.
  By properly integrating into AR, we can inherit all its goodness
  • Loading branch information
afair committed May 14, 2014
1 parent fdf53ab commit 7831691
Show file tree
Hide file tree
Showing 10 changed files with 289 additions and 162 deletions.
4 changes: 4 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
source 'https://rubygems.org'

# Specify your gem's dependencies in postgresql_cursor.gemspec
gemspec
41 changes: 41 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
PATH
remote: .
specs:
postgresql_cursor (0.5.0)
activerecord (> 4.0.0)
pg

GEM
remote: https://rubygems.org/
specs:
activemodel (4.1.1)
activesupport (= 4.1.1)
builder (~> 3.1)
activerecord (4.1.1)
activemodel (= 4.1.1)
activesupport (= 4.1.1)
arel (~> 5.0.0)
activesupport (4.1.1)
i18n (~> 0.6, >= 0.6.9)
json (~> 1.7, >= 1.7.7)
minitest (~> 5.1)
thread_safe (~> 0.1)
tzinfo (~> 1.1)
arel (5.0.1.20140414130214)
builder (3.2.2)
i18n (0.6.9)
json (1.8.1)
minitest (5.3.3)
pg (0.17.1)
rake (10.3.1)
thread_safe (0.3.3)
tzinfo (1.1.0)
thread_safe (~> 0.1)

PLATFORMS
ruby

DEPENDENCIES
minitest
postgresql_cursor!
rake
54 changes: 1 addition & 53 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,53 +1 @@
require 'rubygems'
require 'rake'

begin
require 'jeweler'
Jeweler::Tasks.new do |gem|
gem.name = "postgresql_cursor"
gem.summary = %Q{ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set}
gem.description = %Q{PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in "chunks" (default of 10_000 rows), buffers them, and returns the rows one at a time.}
gem.email = "allen.fair@gmail.com"
gem.homepage = "http://github.com/afair/postgresql_cursor"
gem.authors = ["Allen Fair"]
gem.add_dependency 'activerecord'
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
end
Jeweler::GemcutterTasks.new
rescue LoadError
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
end

require 'rake/testtask'
Rake::TestTask.new(:test) do |test|
test.libs << 'lib' << 'test'
test.pattern = 'test/**/test_*.rb'
test.verbose = true
end

begin
require 'rcov/rcovtask'
Rcov::RcovTask.new do |test|
test.libs << 'test'
test.pattern = 'test/**/test_*.rb'
test.verbose = true
end
rescue LoadError
task :rcov do
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
end
end

task :test => :check_dependencies

task :default => :test

require 'rdoc/task'
Rake::RDocTask.new do |rdoc|
version = File.exist?('VERSION') ? File.read('VERSION') : ""

rdoc.rdoc_dir = 'rdoc'
rdoc.title = "postgresql_cursor #{version}"
rdoc.rdoc_files.include('README*')
rdoc.rdoc_files.include('lib/**/*.rb')
end
require "bundler/gem_tasks"
137 changes: 73 additions & 64 deletions lib/postgresql_cursor.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
require 'postgresql_cursor/version'

require 'active_record'
require 'active_record/connection_adapters/postgresql_adapter'
require 'postgresql_cursor/active_record/connection_adapters/postgresql/database_statements'
ActiveRecord::Base.extend(PostgreSQLCursor::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter)

################################################################################
# PostgreSQLCursor: library class provides postgresql cursor for large result
# set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries.
# If you don't use AR, this assumes #connection and #instantiate methods are available.
Expand All @@ -15,7 +23,8 @@
# ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... }
# ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... }
#
class PostgreSQLCursor
module PostgreSQLCursor
class SQLCursor
include Enumerable
attr_reader :sql, :options, :connection, :count, :result
@@cursor_seq = 0
Expand Down Expand Up @@ -90,7 +99,7 @@ def fetch
end

# Private: Fetches the next block of rows into @block
def fetch_block(block_size=nil)
def fetch_block(block_size=nil)
block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 }
@result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}")
@block = @result.collect {|row| row } # Make our own
Expand All @@ -115,66 +124,66 @@ def set_cursor_tuple_fraction(frac=1.0)

end

# Defines extension to ActiveRecord to use this library
class ActiveRecord::Base
# Public: Returns each row as a hash to the given block
#
# sql - Full SQL statement, variables interpolated
# options - Hash to control
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
# block_size: 1..n - The number of rows to fetch per db block fetch
# while: value - Exits loop when block does not return this value.
# until: value - Exits loop when block returns this value.
#
# Returns the number of rows yielded to the block
def self.each_row_by_sql(sql, options={}, &block)
options = {:connection => self.connection}.merge(options)
PostgreSQLCursor.new(sql, options).each(&block)
end

# Public: Returns each row as a model instance to the given block
# As this instantiates a model object, it is slower than each_row_by_sql
#
# Paramaters: see each_row_by_sql
#
# Returns the number of rows yielded to the block
def self.each_instance_by_sql(sql, options={}, &block)
options = {:connection => self.connection}.merge(options)
PostgreSQLCursor.new(sql, options).each do |row|
model = instantiate(row)
yield model
end
end
end

# Defines extension to ActiveRecord/AREL to use this library
class ActiveRecord::Relation

# Public: Executes the query, returning each row as a hash
# to the given block.
#
# options - Hash to control
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
# block_size: 1..n - The number of rows to fetch per db block fetch
# while: value - Exits loop when block does not return this value.
# until: value - Exits loop when block returns this value.
#
# Returns the number of rows yielded to the block
def each_row(options={}, &block)
options = {:connection => self.connection}.merge(options)
PostgreSQLCursor.new(to_sql, options).each(&block)
end

# Public: Like each_row, but returns an instantiated model object to the block
#
# Paramaters: same as each_row
#
# Returns the number of rows yielded to the block
def each_instance(options={}, &block)
options = {:connection => self.connection}.merge(options)
PostgreSQLCursor.new(to_sql, options).each do |row|
model = instantiate(row)
block.call model
end
end
# # Defines extension to ActiveRecord to use this library
# class ActiveRecord::Base
# # Public: Returns each row as a hash to the given block
# #
# # sql - Full SQL statement, variables interpolated
# # options - Hash to control
# # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
# # block_size: 1..n - The number of rows to fetch per db block fetch
# # while: value - Exits loop when block does not return this value.
# # until: value - Exits loop when block returns this value.
# #
# # Returns the number of rows yielded to the block
# def self.each_row_by_sql(sql, options={}, &block)
# options = {:connection => self.connection}.merge(options)
# PostgreSQLCursor.new(sql, options).each(&block)
# end
#
# # Public: Returns each row as a model instance to the given block
# # As this instantiates a model object, it is slower than each_row_by_sql
# #
# # Paramaters: see each_row_by_sql
# #
# # Returns the number of rows yielded to the block
# def self.each_instance_by_sql(sql, options={}, &block)
# options = {:connection => self.connection}.merge(options)
# PostgreSQLCursor.new(sql, options).each do |row|
# model = instantiate(row)
# yield model
# end
# end
# end
#
# # Defines extension to ActiveRecord/AREL to use this library
# class ActiveRecord::Relation
#
# # Public: Executes the query, returning each row as a hash
# # to the given block.
# #
# # options - Hash to control
# # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
# # block_size: 1..n - The number of rows to fetch per db block fetch
# # while: value - Exits loop when block does not return this value.
# # until: value - Exits loop when block returns this value.
# #
# # Returns the number of rows yielded to the block
# def each_row(options={}, &block)
# options = {:connection => self.connection}.merge(options)
# PostgreSQLCursor.new(to_sql, options).each(&block)
# end
#
# # Public: Like each_row, but returns an instantiated model object to the block
# #
# # Paramaters: same as each_row
# #
# # Returns the number of rows yielded to the block
# def each_instance(options={}, &block)
# options = {:connection => self.connection}.merge(options)
# PostgreSQLCursor.new(to_sql, options).each do |row|
# model = instantiate(row)
# block.call model
# end
# end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module PostgreSQLCursor
module ActiveRecord
module Reletion
def each_row
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
module PostgreSQLCursor
module ActiveRecord
module ConnectionAdapters
module PostgreSQLAdapter
################################################################################
# PostgreSQLCursor: library class provides postgresql cursor for large result
# set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries.
# If you don't use AR, this assumes #connection and #instantiate methods are available.
#
# options - Hash to control operation and loop breaks
# connection: instance - ActiveRecord connection to use
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
# block_size: 1..n - The number of rows to fetch per db block fetch
# while: value - Exits loop when block does not return this value.
# until: value - Exits loop when block returns this value.
#
# Exmaples:
# PostgreSQLCursor.new("select ...").each { |hash| ... }
# ActiveRecordModel.where(...).each_row { |hash| ... }
# ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... }
# ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... }
#
class Cursor
include Enumerable
attr_reader :sql, :options, :connection, :count, :result
@@cursor_seq = 0

# Public: Start a new PostgreSQL cursor query
# sql - The SQL statement with interpolated values
# options - hash of processing controls
# while: value - Exits loop when block does not return this value.
# until: value - Exits loop when block returns this value.
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
# block_size: 1..n - The number of rows to fetch per db block fetch
# Defaults to 1000
#
# Examples
#
# PostgreSQLCursor.new("select ....")
#
# Returns the cursor object when called with new.
def initialize(sql, options={})
@sql = sql
@options = options
@connection = @options.fetch(:connection) { ActiveRecord::Base.connection }
@count = 0
end

# Public: Yields each row of the result set to the passed block
#
#
# Yields the row to the block. The row is a hash with symbolized keys.
# {colname: value, ....}
#
# Returns the count of rows processed
def each(&block)
has_do_until = @options.has_key?(:until)
has_do_while = @options.has_key?(:while)
@count = 0
@connection.transaction do
begin
open
while (row = fetch) do
break if row.size==0
@count += 1
row = row.symbolize_keys
rc = yield row
# TODO: Handle exceptions raised within block
break if has_do_until && rc == @options[:until]
break if has_do_while && rc != @options[:while]
end
rescue Exception => e
raise e
ensure
close
end
end
@count
end

# Public: Opens (actually, "declares") the cursor. Call this before fetching
def open
set_cursor_tuple_fraction
@cursor = @@cursor_seq += 1
@result = @connection.execute("declare cursor_#{@cursor} cursor for #{@sql}")
@block = []
end

# Public: Returns the next row from the cursor, or empty hash if end of results
#
# Returns a row as a hash of {'colname'=>value,...}
def fetch
fetch_block if @block.size==0
@block.shift
end

# Private: Fetches the next block of rows into @block
def fetch_block(block_size=nil)
block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 }
@result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}")
@block = @result.collect {|row| row } # Make our own
end

# Public: Closes the cursor
def close
@connection.execute("close cursor_#{@cursor}")
end

# Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched
# This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0)
# used to determine the expected fraction (percent) of result rows returned the the caller.
# This value determines the access path by the query planner.
def set_cursor_tuple_fraction(frac=1.0)
@cursor_tuple_fraction ||= @options.fetch(:fraction) { 1.0 }
return @cursor_tuple_fraction if frac == @cursor_tuple_fraction
@cursor_tuple_fraction = frac
@result = @connection.execute("set cursor_tuple_fraction to #{frac}")
frac
end

end
end
end
end
end
Loading

0 comments on commit 7831691

Please sign in to comment.