diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..0ace6f7 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +# Specify your gem's dependencies in postgresql_cursor.gemspec +gemspec diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..a83e4e5 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,41 @@ +PATH + remote: . + specs: + postgresql_cursor (0.5.0) + activerecord (> 4.0.0) + pg + +GEM + remote: https://rubygems.org/ + specs: + activemodel (4.1.1) + activesupport (= 4.1.1) + builder (~> 3.1) + activerecord (4.1.1) + activemodel (= 4.1.1) + activesupport (= 4.1.1) + arel (~> 5.0.0) + activesupport (4.1.1) + i18n (~> 0.6, >= 0.6.9) + json (~> 1.7, >= 1.7.7) + minitest (~> 5.1) + thread_safe (~> 0.1) + tzinfo (~> 1.1) + arel (5.0.1.20140414130214) + builder (3.2.2) + i18n (0.6.9) + json (1.8.1) + minitest (5.3.3) + pg (0.17.1) + rake (10.3.1) + thread_safe (0.3.3) + tzinfo (1.1.0) + thread_safe (~> 0.1) + +PLATFORMS + ruby + +DEPENDENCIES + minitest + postgresql_cursor! + rake diff --git a/Rakefile b/Rakefile index da20e89..2995527 100644 --- a/Rakefile +++ b/Rakefile @@ -1,53 +1 @@ -require 'rubygems' -require 'rake' - -begin - require 'jeweler' - Jeweler::Tasks.new do |gem| - gem.name = "postgresql_cursor" - gem.summary = %Q{ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set} - gem.description = %Q{PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in "chunks" (default of 10_000 rows), buffers them, and returns the rows one at a time.} - gem.email = "allen.fair@gmail.com" - gem.homepage = "http://github.com/afair/postgresql_cursor" - gem.authors = ["Allen Fair"] - gem.add_dependency 'activerecord' - # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings - end - Jeweler::GemcutterTasks.new -rescue LoadError - puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler" -end - -require 'rake/testtask' -Rake::TestTask.new(:test) do |test| - test.libs << 'lib' << 'test' - test.pattern = 'test/**/test_*.rb' - test.verbose = true -end - -begin - require 'rcov/rcovtask' - Rcov::RcovTask.new do |test| - test.libs << 'test' - test.pattern = 'test/**/test_*.rb' - test.verbose = true - end -rescue LoadError - task :rcov do - abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" - end -end - -task :test => :check_dependencies - -task :default => :test - -require 'rdoc/task' -Rake::RDocTask.new do |rdoc| - version = File.exist?('VERSION') ? File.read('VERSION') : "" - - rdoc.rdoc_dir = 'rdoc' - rdoc.title = "postgresql_cursor #{version}" - rdoc.rdoc_files.include('README*') - rdoc.rdoc_files.include('lib/**/*.rb') -end +require "bundler/gem_tasks" diff --git a/lib/postgresql_cursor.rb b/lib/postgresql_cursor.rb index 6dbad1b..92b7465 100644 --- a/lib/postgresql_cursor.rb +++ b/lib/postgresql_cursor.rb @@ -1,3 +1,11 @@ +require 'postgresql_cursor/version' + +require 'active_record' +require 'active_record/connection_adapters/postgresql_adapter' +require 'postgresql_cursor/active_record/connection_adapters/postgresql/database_statements' +ActiveRecord::Base.extend(PostgreSQLCursor::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter) + +################################################################################ # PostgreSQLCursor: library class provides postgresql cursor for large result # set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries. # If you don't use AR, this assumes #connection and #instantiate methods are available. @@ -15,7 +23,8 @@ # ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... } # ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... } # -class PostgreSQLCursor +module PostgreSQLCursor +class SQLCursor include Enumerable attr_reader :sql, :options, :connection, :count, :result @@cursor_seq = 0 @@ -90,7 +99,7 @@ def fetch end # Private: Fetches the next block of rows into @block - def fetch_block(block_size=nil) + def fetch_block(block_size=nil) block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 } @result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}") @block = @result.collect {|row| row } # Make our own @@ -115,66 +124,66 @@ def set_cursor_tuple_fraction(frac=1.0) end -# Defines extension to ActiveRecord to use this library -class ActiveRecord::Base - # Public: Returns each row as a hash to the given block - # - # sql - Full SQL statement, variables interpolated - # options - Hash to control - # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) - # block_size: 1..n - The number of rows to fetch per db block fetch - # while: value - Exits loop when block does not return this value. - # until: value - Exits loop when block returns this value. - # - # Returns the number of rows yielded to the block - def self.each_row_by_sql(sql, options={}, &block) - options = {:connection => self.connection}.merge(options) - PostgreSQLCursor.new(sql, options).each(&block) - end - - # Public: Returns each row as a model instance to the given block - # As this instantiates a model object, it is slower than each_row_by_sql - # - # Paramaters: see each_row_by_sql - # - # Returns the number of rows yielded to the block - def self.each_instance_by_sql(sql, options={}, &block) - options = {:connection => self.connection}.merge(options) - PostgreSQLCursor.new(sql, options).each do |row| - model = instantiate(row) - yield model - end - end -end - -# Defines extension to ActiveRecord/AREL to use this library -class ActiveRecord::Relation - - # Public: Executes the query, returning each row as a hash - # to the given block. - # - # options - Hash to control - # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) - # block_size: 1..n - The number of rows to fetch per db block fetch - # while: value - Exits loop when block does not return this value. - # until: value - Exits loop when block returns this value. - # - # Returns the number of rows yielded to the block - def each_row(options={}, &block) - options = {:connection => self.connection}.merge(options) - PostgreSQLCursor.new(to_sql, options).each(&block) - end - - # Public: Like each_row, but returns an instantiated model object to the block - # - # Paramaters: same as each_row - # - # Returns the number of rows yielded to the block - def each_instance(options={}, &block) - options = {:connection => self.connection}.merge(options) - PostgreSQLCursor.new(to_sql, options).each do |row| - model = instantiate(row) - block.call model - end - end +# # Defines extension to ActiveRecord to use this library +# class ActiveRecord::Base +# # Public: Returns each row as a hash to the given block +# # +# # sql - Full SQL statement, variables interpolated +# # options - Hash to control +# # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) +# # block_size: 1..n - The number of rows to fetch per db block fetch +# # while: value - Exits loop when block does not return this value. +# # until: value - Exits loop when block returns this value. +# # +# # Returns the number of rows yielded to the block +# def self.each_row_by_sql(sql, options={}, &block) +# options = {:connection => self.connection}.merge(options) +# PostgreSQLCursor.new(sql, options).each(&block) +# end +# +# # Public: Returns each row as a model instance to the given block +# # As this instantiates a model object, it is slower than each_row_by_sql +# # +# # Paramaters: see each_row_by_sql +# # +# # Returns the number of rows yielded to the block +# def self.each_instance_by_sql(sql, options={}, &block) +# options = {:connection => self.connection}.merge(options) +# PostgreSQLCursor.new(sql, options).each do |row| +# model = instantiate(row) +# yield model +# end +# end +# end +# +# # Defines extension to ActiveRecord/AREL to use this library +# class ActiveRecord::Relation +# +# # Public: Executes the query, returning each row as a hash +# # to the given block. +# # +# # options - Hash to control +# # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) +# # block_size: 1..n - The number of rows to fetch per db block fetch +# # while: value - Exits loop when block does not return this value. +# # until: value - Exits loop when block returns this value. +# # +# # Returns the number of rows yielded to the block +# def each_row(options={}, &block) +# options = {:connection => self.connection}.merge(options) +# PostgreSQLCursor.new(to_sql, options).each(&block) +# end +# +# # Public: Like each_row, but returns an instantiated model object to the block +# # +# # Paramaters: same as each_row +# # +# # Returns the number of rows yielded to the block +# def each_instance(options={}, &block) +# options = {:connection => self.connection}.merge(options) +# PostgreSQLCursor.new(to_sql, options).each do |row| +# model = instantiate(row) +# block.call model +# end +# end end diff --git a/lib/postgresql_cursor/active_record/connection_adapters/postgresql/cursor.rb b/lib/postgresql_cursor/active_record/connection_adapters/postgresql/cursor.rb new file mode 100644 index 0000000..052392f --- /dev/null +++ b/lib/postgresql_cursor/active_record/connection_adapters/postgresql/cursor.rb @@ -0,0 +1,8 @@ +module PostgreSQLCursor + module ActiveRecord + module Reletion + def each_row + end + end + end +end diff --git a/lib/postgresql_cursor/active_record/connection_adapters/postgresql/database_statements.rb b/lib/postgresql_cursor/active_record/connection_adapters/postgresql/database_statements.rb new file mode 100644 index 0000000..49e1e63 --- /dev/null +++ b/lib/postgresql_cursor/active_record/connection_adapters/postgresql/database_statements.rb @@ -0,0 +1,125 @@ +module PostgreSQLCursor + module ActiveRecord + module ConnectionAdapters + module PostgreSQLAdapter + ################################################################################ + # PostgreSQLCursor: library class provides postgresql cursor for large result + # set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries. + # If you don't use AR, this assumes #connection and #instantiate methods are available. + # + # options - Hash to control operation and loop breaks + # connection: instance - ActiveRecord connection to use + # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) + # block_size: 1..n - The number of rows to fetch per db block fetch + # while: value - Exits loop when block does not return this value. + # until: value - Exits loop when block returns this value. + # + # Exmaples: + # PostgreSQLCursor.new("select ...").each { |hash| ... } + # ActiveRecordModel.where(...).each_row { |hash| ... } + # ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... } + # ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... } + # + class Cursor + include Enumerable + attr_reader :sql, :options, :connection, :count, :result + @@cursor_seq = 0 + + # Public: Start a new PostgreSQL cursor query + # sql - The SQL statement with interpolated values + # options - hash of processing controls + # while: value - Exits loop when block does not return this value. + # until: value - Exits loop when block returns this value. + # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) + # block_size: 1..n - The number of rows to fetch per db block fetch + # Defaults to 1000 + # + # Examples + # + # PostgreSQLCursor.new("select ....") + # + # Returns the cursor object when called with new. + def initialize(sql, options={}) + @sql = sql + @options = options + @connection = @options.fetch(:connection) { ActiveRecord::Base.connection } + @count = 0 + end + + # Public: Yields each row of the result set to the passed block + # + # + # Yields the row to the block. The row is a hash with symbolized keys. + # {colname: value, ....} + # + # Returns the count of rows processed + def each(&block) + has_do_until = @options.has_key?(:until) + has_do_while = @options.has_key?(:while) + @count = 0 + @connection.transaction do + begin + open + while (row = fetch) do + break if row.size==0 + @count += 1 + row = row.symbolize_keys + rc = yield row + # TODO: Handle exceptions raised within block + break if has_do_until && rc == @options[:until] + break if has_do_while && rc != @options[:while] + end + rescue Exception => e + raise e + ensure + close + end + end + @count + end + + # Public: Opens (actually, "declares") the cursor. Call this before fetching + def open + set_cursor_tuple_fraction + @cursor = @@cursor_seq += 1 + @result = @connection.execute("declare cursor_#{@cursor} cursor for #{@sql}") + @block = [] + end + + # Public: Returns the next row from the cursor, or empty hash if end of results + # + # Returns a row as a hash of {'colname'=>value,...} + def fetch + fetch_block if @block.size==0 + @block.shift + end + + # Private: Fetches the next block of rows into @block + def fetch_block(block_size=nil) + block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 } + @result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}") + @block = @result.collect {|row| row } # Make our own + end + + # Public: Closes the cursor + def close + @connection.execute("close cursor_#{@cursor}") + end + + # Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched + # This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0) + # used to determine the expected fraction (percent) of result rows returned the the caller. + # This value determines the access path by the query planner. + def set_cursor_tuple_fraction(frac=1.0) + @cursor_tuple_fraction ||= @options.fetch(:fraction) { 1.0 } + return @cursor_tuple_fraction if frac == @cursor_tuple_fraction + @cursor_tuple_fraction = frac + @result = @connection.execute("set cursor_tuple_fraction to #{frac}") + frac + end + + end + end + end + end +end diff --git a/lib/postgresql_cursor/active_record/relation/batches.rb b/lib/postgresql_cursor/active_record/relation/batches.rb new file mode 100644 index 0000000..f480383 --- /dev/null +++ b/lib/postgresql_cursor/active_record/relation/batches.rb @@ -0,0 +1,8 @@ +module PostgreSQLCursor + module ActiveRecord + module Batches + def find_each(options = {}) + end + end + end +end diff --git a/lib/postgresql_cursor/railtie.rb b/lib/postgresql_cursor/railtie.rb new file mode 100644 index 0000000..b916e4c --- /dev/null +++ b/lib/postgresql_cursor/railtie.rb @@ -0,0 +1,5 @@ +module PostgreSQLCursor + class Railtie < Rails::Railtie + + end +end diff --git a/lib/postgresql_cursor/version.rb b/lib/postgresql_cursor/version.rb new file mode 100644 index 0000000..51cd139 --- /dev/null +++ b/lib/postgresql_cursor/version.rb @@ -0,0 +1,3 @@ +module PostgresqlCursor + VERSION = "0.5.0" +end diff --git a/postgresql_cursor.gemspec b/postgresql_cursor.gemspec index 8784841..c994169 100644 --- a/postgresql_cursor.gemspec +++ b/postgresql_cursor.gemspec @@ -1,50 +1,26 @@ -# Generated by jeweler -# DO NOT EDIT THIS FILE DIRECTLY -# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' -# -*- encoding: utf-8 -*- +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'postgresql_cursor/version' -Gem::Specification.new do |s| - s.name = "postgresql_cursor" - s.version = "0.4.1" +Gem::Specification.new do |spec| + spec.name = "postgresql_cursor" + spec.version = PostgresqlCursor::VERSION + spec.authors = ["Allen Fair"] + spec.email = ["allen.fair@gmail.com"] + spec.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set" + spec.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 10_000 rows), buffers them, and returns the rows one at a time." + spec.homepage = "http://github.com/afair/postgresql_cursor" + spec.license = "MIT" - s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= - s.authors = ["Allen Fair"] - s.date = "2013-02-20" - s.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 10_000 rows), buffers them, and returns the rows one at a time." - s.email = "allen.fair@gmail.com" - s.extra_rdoc_files = [ - "LICENSE", - "README.rdoc" - ] - s.files = [ - ".document", - "LICENSE", - "README.rdoc", - "Rakefile", - "VERSION", - "lib/postgresql_cursor.rb", - "postgresql_cursor.gemspec", - "test/helper.rb", - "test/test_postgresql_cursor.rb" - ] - s.homepage = "http://github.com/afair/postgresql_cursor" - s.require_paths = ["lib"] - s.rubygems_version = "1.8.24" - s.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set" + spec.files = `git ls-files -z`.split("\x0") + spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } + spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) + spec.require_paths = ["lib"] - if s.respond_to? :specification_version then - s.specification_version = 3 + spec.add_dependency "pg" + spec.add_dependency "activerecord", "> 4.0.0" - if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then - s.add_runtime_dependency(%q, [">= 0"]) - s.add_runtime_dependency(%q, [">= 0"]) - else - s.add_dependency(%q, [">= 0"]) - s.add_dependency(%q, [">= 0"]) - end - else - s.add_dependency(%q, [">= 0"]) - s.add_dependency(%q, [">= 0"]) - end + spec.add_development_dependency "rake" + spec.add_development_dependency "minitest" end -