From 4eba3c37353ca3be2aa41d4990d63b489d10ac95 Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Sat, 30 Apr 2016 14:18:13 +1000 Subject: [PATCH 1/8] ENHANCE: Add ghostscript backup when validating a PDF --- app/helpers/file_helper.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 6e90f5d2c..5a6e8cb4e 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -342,10 +342,18 @@ def convert_files_to_pdf(from_path, dest_path) def pdf_valid?(file) did_succeed = false - try_within 30, "validating PDF" do + try_within 30, "validating PDF using pdftk" do did_succeed = system "nice -n 10 pdftk #{file} output /dev/null dont_ask" + end + + unless did_succeed + logger.warn "Failed to validate PDF file. Trying with ghostscript" + try_within 30, "validating PDF using ghostscript" do + did_succeed = system "nice -n 10 gs -o /dev/null -sDEVICE=nullpage -r36x36 -dNOPAUSE -q #{file} >>/dev/null 2>>/dev/null" + end + unless did_succeed - logger.error "Failed to validate PDF file. Is pdftk installed?" + logger.error "Failed to validate pdf file. Is ghostscript installed?" end end From d570ca565a1445edefac8fddce0f89c12395f024 Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Sat, 30 Apr 2016 22:01:03 +1000 Subject: [PATCH 2/8] QUALITY: Replace PDFtk validation with ghostscript --- app/helpers/file_helper.rb | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 5a6e8cb4e..86f6845da 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -342,16 +342,8 @@ def convert_files_to_pdf(from_path, dest_path) def pdf_valid?(file) did_succeed = false - try_within 30, "validating PDF using pdftk" do - did_succeed = system "nice -n 10 pdftk #{file} output /dev/null dont_ask" - end - - unless did_succeed - logger.warn "Failed to validate PDF file. Trying with ghostscript" - try_within 30, "validating PDF using ghostscript" do - did_succeed = system "nice -n 10 gs -o /dev/null -sDEVICE=nullpage -r36x36 -dNOPAUSE -q #{file} >>/dev/null 2>>/dev/null" - end - + try_within 30, "validating PDF using ghostscript" do + did_succeed = system "nice -n 10 gs -o /dev/null -sDEVICE=nullpage -r36x36 -dNOPAUSE -q #{file} >>/dev/null 2>>/dev/null" unless did_succeed logger.error "Failed to validate pdf file. Is ghostscript installed?" end From 638af31a924dc57027a1c56e0eaba0c2fc6ddd04 Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Sat, 30 Apr 2016 22:06:16 +1000 Subject: [PATCH 3/8] QUALITY: Replace PDFtk with ghostscript for aggregation --- app/helpers/file_helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 86f6845da..8ba904fbb 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -492,7 +492,7 @@ def aggregate(pdf_paths, final_pdf_path) logger.debug "Trying to aggregate PDFs to #{final_pdf_path}" did_compile = false - exec = "nice -n 10 pdftk #{pdf_paths.join ' '} cat output '#{final_pdf_path}' dont_ask compress" + exec = "nice -n 10 gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dPDFSETTINGS=/screen -sOutputFile='#{final_pdf_path}' #{pdf_paths.join ' '}" Terminator.terminate 180 do did_compile = system exec end From cfa68c3c03b8b3812c5f14d98c445c322a01bd22 Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Sat, 30 Apr 2016 22:47:48 +1000 Subject: [PATCH 4/8] ENHANCE: Add all system calls to use timeout script Extend timeout_helper with system_try_withn --- app/helpers/file_helper.rb | 69 +++++++++++++++++++++++------------ app/helpers/timeout_helper.rb | 17 +++++++++ 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 8ba904fbb..65c3fd8d7 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -184,14 +184,12 @@ def compress_image(path) logger.debug "File helper has started compressing #{path} to #{tmp_file}..." begin - exec = "#{Rails.root.join('lib', 'shell', 'timeout.sh')} -t 30 nice -n 10 convert \"#{path}\" -resize 1024x1024 \"#{tmp_file}\" >>/dev/null 2>>/dev/null" - # puts exec + exec = "convert \ + \"#{path}\" \ + -resize 1024x1024 \ + \"#{tmp_file}\" >>/dev/null 2>>/dev/null" - # try with convert - did_compress = false - try_within 40, "compressing image" do - did_compress = system exec - end + did_compress = system_try_within 40, "compressing image using convert", exec if did_compress FileUtils.mv tmp_file, path @@ -217,20 +215,30 @@ def compress_pdf(path, max_size = 2500000) tmp_file = File.join( Dir.tmpdir, 'doubtfire', 'compress', "#{File.dirname(path).split(File::Separator).last}-file.pdf" ) FileUtils.mkdir_p(File.join( Dir.tmpdir, 'doubtfire', 'compress' )) - exec = "#{Rails.root.join('lib', 'shell', 'timeout.sh')} -t 30 nice -n 10 gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.3 -dDetectDuplicateImages=true -dPDFSETTINGS=/screen -dNOPAUSE -dBATCH -dQUIET -sOutputFile=\"#{tmp_file}\" \"#{path}\" >>/dev/null 2>>/dev/null" + exec = "gs -sDEVICE=pdfwrite \ + -dCompatibilityLevel=1.3 \ + -dDetectDuplicateImages=true \ + -dPDFSETTINGS=/screen \ + -dNOPAUSE \ + -dBATCH \ + -dQUIET \ + -sOutputFile=\"#{tmp_file}\" \ + \"#{path}\" \ + >>/dev/null 2>>/dev/null" # try with ghostscript - did_compress = system exec + did_compress = system_try_within 30, "compressing PDF using ghostscript", exec if !did_compress logger.info "Failed to compress PDF #{path} using GhostScript. Trying with convert" - exec = "#{Rails.root.join('lib', 'shell', 'timeout.sh')} -t 30 nice -n 10 convert \"#{path}\" -compress Zip \"#{tmp_file}\" >>/dev/null 2>>/dev/null" + exec = "convert \"#{path}\" \ + -compress Zip \ + \"#{tmp_file}\" \ + >>/dev/null 2>>/dev/null" # try with convert - try_within 40, "compressing PDF" do - did_compress = system exec - end + did_compress = system_try_within 40, "compressing PDF using convert", exec if !did_compress logger.error "Failed to compress PDF #{path} using convert. Cannot compress this PDF. Command was:\n\t#{exec}" @@ -342,11 +350,17 @@ def convert_files_to_pdf(from_path, dest_path) def pdf_valid?(file) did_succeed = false - try_within 30, "validating PDF using ghostscript" do - did_succeed = system "nice -n 10 gs -o /dev/null -sDEVICE=nullpage -r36x36 -dNOPAUSE -q #{file} >>/dev/null 2>>/dev/null" - unless did_succeed - logger.error "Failed to validate pdf file. Is ghostscript installed?" - end + exec = "gs -o /dev/null \ + -sDEVICE=nullpage \ + -r36x36 \ + -dNOPAUSE \ + -q \ + #{file} >>/dev/null 2>>/dev/null" + + did_succeed = system_try_within 30, "validating PDF using ghostscript", exec + + unless did_succeed + logger.error "Failed to validate pdf file. Is ghostscript installed?" end did_succeed @@ -492,14 +506,21 @@ def aggregate(pdf_paths, final_pdf_path) logger.debug "Trying to aggregate PDFs to #{final_pdf_path}" did_compile = false - exec = "nice -n 10 gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dPDFSETTINGS=/screen -sOutputFile='#{final_pdf_path}' #{pdf_paths.join ' '}" - Terminator.terminate 180 do - did_compile = system exec - end - - if !did_compile + exec = "gs \ + -dBATCH \ + -dNOPAUSE \ + -q \ + -sDEVICE=pdfwrite \ + -dPDFSETTINGS=/screen \ + -sOutputFile='#{final_pdf_path}' \ + #{pdf_paths.join ' '}" + + did_compile = system_try_within 180, "trying to aggregate PDFs", exec + + unless did_compile logger.error "Failed to aggregate PDFs to #{final_pdf_path}. Command was:\n\t#{exec}" end + did_compile end diff --git a/app/helpers/timeout_helper.rb b/app/helpers/timeout_helper.rb index 59150c979..bfdab6e92 100644 --- a/app/helpers/timeout_helper.rb +++ b/app/helpers/timeout_helper.rb @@ -16,6 +16,23 @@ def try_within(sec, timeout_message = "operation", &block) end end + # + # Timeout system call + # + # Usage: + # system_try_within 30, "doing the thing", "gs do.pdf the.pdf thing.pdf" + # + def system_try_within(sec, timeout_message, command) + # shell script to kill command after timeout + timeout_exec = Rails.root.join('lib', 'shell', 'timeout.sh') + result = false + try_within sec, timeout_message do + result = system "#{timeout_exec} -t #{sec} nice -n 10 #{command}" + end + result + end + # Export functions as module functions module_function :try_within + module_function :system_try_within end From fdb781555b78862c415df991864033deabf5fd03 Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Sat, 30 Apr 2016 22:57:01 +1000 Subject: [PATCH 5/8] DOC: Replace install for PDFtk with ghostscript --- README.md | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 4e7fa2518..2c2a1fb6f 100644 --- a/README.md +++ b/README.md @@ -77,14 +77,12 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick` and `libmagic` using Homebrew: +Install `imagemagick`, `libmagic` and `ghostscript` using Homebrew: ``` -$ brew install imagemagick libmagic +$ brew install imagemagick libmagic ghostscript ``` -You also need to download and install PDFtk manually by downloading it [here](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/pdftk_server-2.02-mac_osx-10.6-setup.pkg). - You will also need to install the Python `pygments` package: ``` @@ -271,7 +269,9 @@ $ rbenv install 2.0.0-p353 Install [Postgres](http://www.postgresql.org/download/linux/): ``` -$ sudo apt-get install postgresql postgresql-contrib libpq-dev +$ sudo apt-get install postgresql \ + postgresql-contrib \ + libpq-dev ``` Ensure `pg_config` is on the `PATH`, and then login to Postgres. You will need to locate where `apt-get` has installed your Postgres binary and add this to your `PATH`. You can use `whereis psql` for that, but ensure you add the directory and not the executable to the path @@ -295,18 +295,15 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick`, `libmagic` and `pdftk`: - -``` -$ sudo apt-get install imagemagick libmagickwand-dev -$ sudo apt-get install libmagic-dev -$ sudo apt-get install pdftk -``` - -You will also need to install the Python `pygments` package: +Install `imagemagick`, `libmagic`, `ghostscript`. You will also need to install +the Python `pygments` package: ``` -$ sudo apt-get install python-pygments +$ sudo apt-get install ghostscript \ + imagemagick \ + libmagickwand-dev \ + libmagic-dev \ + python-pygments ``` #### 5. Install Doubtfire API dependencies From 06bfb32ef3268b45129f98b78306345dca099d7e Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Sat, 30 Apr 2016 22:57:41 +1000 Subject: [PATCH 6/8] CONFIG: Update Dockerfile to include ghostscript --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2c2a1fb6f..ad8cd18a9 100644 --- a/README.md +++ b/README.md @@ -77,10 +77,10 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick`, `libmagic` and `ghostscript` using Homebrew: +Install `imagemagick`, `libmagic` using Homebrew: ``` -$ brew install imagemagick libmagic ghostscript +$ brew install imagemagick libmagic ``` You will also need to install the Python `pygments` package: @@ -295,12 +295,11 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick`, `libmagic`, `ghostscript`. You will also need to install -the Python `pygments` package: +Install `imagemagick`, `libmagic`. You will also need to install the Python +`pygments` package: ``` -$ sudo apt-get install ghostscript \ - imagemagick \ +$ sudo apt-get install imagemagick \ libmagickwand-dev \ libmagic-dev \ python-pygments From 936d1ac181208fe494d267b7ac74e00715677d53 Mon Sep 17 00:00:00 2001 From: Alex Cummaudo Date: Tue, 5 Jul 2016 15:28:47 +1000 Subject: [PATCH 7/8] QUALITY: Clean up commented out puts debugging messages --- Dockerfile | 3 ++- README.md | 11 ++++++----- app/helpers/file_helper.rb | 1 - app/models/project.rb | 4 ---- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4963f3d2f..b5c0b0ac1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,8 @@ RUN apt-get install -y \ libmagic-dev \ pdftk \ libpq-dev \ - python-pygments + python-pygments \ + ghostscript ADD . /doubtfire-api WORKDIR /doubtfire-api diff --git a/README.md b/README.md index ad8cd18a9..2c2a1fb6f 100644 --- a/README.md +++ b/README.md @@ -77,10 +77,10 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick`, `libmagic` using Homebrew: +Install `imagemagick`, `libmagic` and `ghostscript` using Homebrew: ``` -$ brew install imagemagick libmagic +$ brew install imagemagick libmagic ghostscript ``` You will also need to install the Python `pygments` package: @@ -295,11 +295,12 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick`, `libmagic`. You will also need to install the Python -`pygments` package: +Install `imagemagick`, `libmagic`, `ghostscript`. You will also need to install +the Python `pygments` package: ``` -$ sudo apt-get install imagemagick \ +$ sudo apt-get install ghostscript \ + imagemagick \ libmagickwand-dev \ libmagic-dev \ python-pygments diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 65c3fd8d7..2a6e293f3 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -200,7 +200,6 @@ def compress_image(path) end end - # puts "#{did_compress}" raise "Failed to compress an image. Ensure all images are smaller than 1MB." unless did_compress return true end diff --git a/app/models/project.rb b/app/models/project.rb index 298c930b3..18add217e 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -953,8 +953,6 @@ def create_portfolio fout.puts pdf_text end - # FileHelper.compress_pdf(self.portfolio_path) - logger.info "Created portfolio at #{portfolio_path} - #{log_details()}" self.portfolio_production_date = DateTime.now @@ -964,9 +962,7 @@ def create_portfolio logger.error "Failed to convert portfolio to PDF - #{log_details()} -\nError: #{e.message}" log_file = e.message.scan(/\/.*\.log/).first - # puts "log file is ... #{log_file}" if log_file && File.exists?(log_file) - # puts "exists" begin puts "--- Latex Log ---\n" puts File.read(log_file) From 7b9cb00a5e8f35e3c15daa0e54284cba9d55a162 Mon Sep 17 00:00:00 2001 From: Andrew Cain Date: Mon, 4 Jul 2016 18:40:22 +1000 Subject: [PATCH 8/8] FIX: Remove need for PDFTK Remove PDFTK from checking PDF validity and aggregating PDFs. --- Dockerfile | 1 - README.md | 6 ++--- app/helpers/file_helper.rb | 49 +++++--------------------------------- 3 files changed, 8 insertions(+), 48 deletions(-) diff --git a/Dockerfile b/Dockerfile index b5c0b0ac1..ecfeb0e02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,6 @@ RUN apt-get install -y \ libpq-dev imagemagick \ libmagickwand-dev \ libmagic-dev \ - pdftk \ libpq-dev \ python-pygments \ ghostscript diff --git a/README.md b/README.md index 2c2a1fb6f..bde44aaf5 100644 --- a/README.md +++ b/README.md @@ -295,8 +295,8 @@ CREATE ROLE itig WITH CREATEDB PASSWORD 'd872$dh' LOGIN; #### 4. Install native tools -Install `imagemagick`, `libmagic`, `ghostscript`. You will also need to install -the Python `pygments` package: +Install `imagemagick`, `libmagic` and `ghostscript`. You will also need to +install the Python `pygments` package: ``` $ sudo apt-get install ghostscript \ @@ -569,8 +569,6 @@ After installing LaTeX, you must ensure the following are listed on the `PATH`: ``` $ which convert /usr/local/bin/convert -$ which pdftk -/usr/local/bin/pdftk $ which pygmentize /usr/local/bin/pygmentize $ which pdflatex diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 2a6e293f3..edd6dbebc 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -346,23 +346,13 @@ def convert_files_to_pdf(from_path, dest_path) # # Tests if a PDF is valid / corrupt # - def pdf_valid?(file) - did_succeed = false - - exec = "gs -o /dev/null \ - -sDEVICE=nullpage \ - -r36x36 \ - -dNOPAUSE \ - -q \ - #{file} >>/dev/null 2>>/dev/null" - - did_succeed = system_try_within 30, "validating PDF using ghostscript", exec - - unless did_succeed - logger.error "Failed to validate pdf file. Is ghostscript installed?" + def pdf_valid? filename + # Scan last 1024 bytes for the EOF mark + return false unless File.exists? filename + File.open(filename) do |f| + f.seek -1024, IO::SEEK_END + f.read.include? '%%EOF' end - - did_succeed end # @@ -497,32 +487,6 @@ def read_file_to_str(filename) result end - # - # Aggregate a list of PDFs into a single PDF file - # - returns boolean indicating success - # - def aggregate(pdf_paths, final_pdf_path) - logger.debug "Trying to aggregate PDFs to #{final_pdf_path}" - - did_compile = false - exec = "gs \ - -dBATCH \ - -dNOPAUSE \ - -q \ - -sDEVICE=pdfwrite \ - -dPDFSETTINGS=/screen \ - -sOutputFile='#{final_pdf_path}' \ - #{pdf_paths.join ' '}" - - did_compile = system_try_within 180, "trying to aggregate PDFs", exec - - unless did_compile - logger.error "Failed to aggregate PDFs to #{final_pdf_path}. Command was:\n\t#{exec}" - end - - did_compile - end - def path_to_plagarism_html(match_link) to_dir = student_work_dir(:plagarism, match_link.task) @@ -649,7 +613,6 @@ def extract_file_from_done(task, to_path, pattern, name_fn) module_function :doc_to_pdf module_function :cover_to_pdf module_function :read_file_to_str - module_function :aggregate module_function :path_to_plagarism_html module_function :save_plagiarism_html module_function :delete_plagarism_html