Skip to content

Commit

Permalink
plotextractor: improve TeX detection
Browse files Browse the repository at this point in the history
* Made a minor change to the TeX detection step, when looking
  for possible LaTeX sources in a tarball, that allows for
  capitalized file extensions.

* Tarball's and PDF's are now only harvested when needed.
  • Loading branch information
jalavik authored and tiborsimko committed Mar 25, 2011
1 parent 55a26d5 commit c14e3fc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion modules/miscutil/lib/plotextractor_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def untar(original_tarball, sdir):
# if neither, maybe it is TeX or an image anyway, otherwise,
# we don't care
else:
if extracted_file.split('.')[-1] == tex_file_extension:
if extracted_file.split('.')[-1].lower() == tex_file_extension:
# we might have tex source!
might_be_tex.append(extracted_file)
elif extracted_file.split('.')[-1] in ['eps', 'png', \
Expand Down
6 changes: 3 additions & 3 deletions modules/miscutil/lib/plotextractor_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def tarballs_by_arXiv_id(arXiv_ids, sdir):
for arXiv_id in arXiv_ids:
if 'arXiv' not in arXiv_id:
arXiv_id = 'arXiv:' + arXiv_id
tarball, dummy_pdf = harvest_single(arXiv_id, sdir)
tarball, dummy_pdf = harvest_single(arXiv_id, sdir, ("tarball",))
if tarball != None:
tarballs.append(tarball)

Expand Down Expand Up @@ -454,13 +454,13 @@ def src_pdf_from_marc(marc_file):
if possible_match != None:
# it's listed on arXiv, hooray!
arXiv_id = possible_match.group(0)
dummy1, pdf_loc = harvest_single(arXiv_id, to_dir)
dummy1, pdf_loc = harvest_single(arXiv_id, to_dir, ("pdf",))

possible_match = re.search(DESY_match, marc_text)
if possible_match != None:
# it's listed on DESY, hooray!
desy_id = possible_match.group(0)
dummy1, pdf_loc = harvest_single(desy_id, to_dir)
dummy1, pdf_loc = harvest_single(desy_id, to_dir, ("pdf",))

return pdf_loc

Expand Down

0 comments on commit c14e3fc

Please sign in to comment.