Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
jazzido committed Feb 9, 2014
1 parent dbed4e7 commit 35f5e84
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ platform :jruby do
gem "rack"
gem "tilt"
gem "rufus-lru"
gem "tabula-extractor", '~>0.7.2', :require => "tabula"
gem "tabula-extractor", '~>0.7.2', :require => "tabula", :path => '../tabula-extractor'

group :development do
gem "rake"
Expand Down
10 changes: 10 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
PATH
remote: ../tabula-extractor
specs:
tabula-extractor (0.7.2-java)
trollop (~> 2.0)

GEM
remote: https://rubygems.org/
specs:
Expand Down Expand Up @@ -27,6 +33,10 @@ DEPENDENCIES
rack
rake
rufus-lru
<<<<<<< Updated upstream
tabula-extractor (~> 0.7.2)
=======
tabula-extractor (~> 0.7.2)!
>>>>>>> Stashed changes
tilt
warbler
3 changes: 3 additions & 0 deletions webapp/static/js/pdf_view.js
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ Tabula.PDFView = Backbone.View.extend({
return [coords.y1, coords.x1, coords.y2, coords.x2].join(',');
},

debugTextChunks: function(image) {
return this._debugRectangularShapes(image, '/debug/' + this.PDF_ID + '/text_chunks');
},

/* functions for the follow-you-around bar */
total_selections: function(){
Expand Down
26 changes: 26 additions & 0 deletions webapp/tabula_debug.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,32 @@ class TabulaDebug < Cuba
}.to_json
end

on ":file_id/text_chunks" do |file_id|
par = JSON.load(req.params['coords']).first
page = par['page']

pdf_path = File.join(TabulaSettings::DOCUMENTS_BASEPATH, file_id, 'document.pdf')
extractor = Tabula::Extraction::ObjectExtractor.new(pdf_path, [page])

text_elements = extractor.extract.next.get_text([par['y1'].to_f,
par['x1'].to_f,
par['y2'].to_f,
par['x2'].to_f])

text_chunks = Tabula::TextElement.merge_words(text_elements)

puts text_chunks.inspect

res['Content-Type'] = 'application/json'
res.write text_chunks.map { |te|
{ 'left' => te.left,
'top' => te.top,
'width' => te.width,
'height' => te.height,
'text' => te.text }
}.to_json
end


on ":file_id/clipping_paths" do |file_id|
par = JSON.load(req.params['coords']).first
Expand Down

0 comments on commit 35f5e84

Please sign in to comment.