Skip to content

Commit

Permalink
Merge pull request #606 from yahooguntu/master
Browse files Browse the repository at this point in the history
Drastically speed up imports when bulk_size is specified
  • Loading branch information
pyromaniac authored Mar 10, 2018
2 parents eb1ad99 + d84b6cd commit f9c02dd
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
6 changes: 4 additions & 2 deletions lib/chewy/type/import/bulk_request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,17 @@ def request_base

def request_bodies(body)
if @bulk_size
serializer = ::Elasticsearch::API.serializer
pieces = body.each_with_object(['']) do |piece, result|
operation, meta = piece.to_a.first
data = meta.delete(:data)
piece = [{operation => meta}, data].compact.map(&:to_json).join("\n")
piece = serializer.dump(operation => meta)
piece << "\n" << serializer.dump(data) if data.present?

if result.last.bytesize + piece.bytesize > @bulk_size
result.push(piece)
else
result[-1] = [result[-1], piece].reject(&:blank?).join("\n")
result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
end
end
pieces.each { |piece| piece << "\n" }
Expand Down
2 changes: 1 addition & 1 deletion lib/chewy/type/import/journal_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def entries(action, objects)
index_name: @type.index.derivable_name,
type_name: @type.type_name,
action: action,
references: identify(objects).map(&:to_json).map(&Base64.method(:encode64)),
references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
created_at: Time.now.utc
}
end
Expand Down

0 comments on commit f9c02dd

Please sign in to comment.