Skip to content

Commit

Permalink
Drastically speeds up imports when bulk_size is specified
Browse files Browse the repository at this point in the history
  • Loading branch information
Mitch Birti committed Nov 9, 2017
1 parent 1a13f75 commit e56fcd4
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
6 changes: 4 additions & 2 deletions lib/chewy/type/import/bulk_request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,17 @@ def request_base

def request_bodies(body)
if @bulk_size
serializer = ::Elasticsearch::API.serializer
pieces = body.each_with_object(['']) do |piece, result|
operation, meta = piece.to_a.first
data = meta.delete(:data)
piece = [{operation => meta}, data].compact.map(&:to_json).join("\n")
piece = serializer.dump(operation => meta)
piece << "\n" << serializer.dump(data) if data.present?

if result.last.bytesize + piece.bytesize > @bulk_size
result.push(piece)
else
result[-1] = [result[-1], piece].reject(&:blank?).join("\n")
result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
end
end
pieces.each { |piece| piece << "\n" }
Expand Down
2 changes: 1 addition & 1 deletion lib/chewy/type/import/journal_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def entries(action, objects)
index_name: @type.index.derivable_name,
type_name: @type.type_name,
action: action,
references: identify(objects).map(&:to_json),
references: identify(objects).map { |item| ::Elasticsearch::API.serializer.dump(item) },
created_at: Time.now.utc
}
end
Expand Down

0 comments on commit e56fcd4

Please sign in to comment.