-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Steven Lang <steven.lang.mz@gmail.com>
- Loading branch information
1 parent
b19e660
commit 4409178
Showing
6 changed files
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Copyright (c) 2021 Steven Lang | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
"Software"), to deal in the Software without restriction, including | ||
without limitation the rights to use, copy, modify, merge, publish, | ||
distribute, sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, subject to | ||
the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be | ||
included in all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | ||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | ||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python3 | ||
import argparse | ||
|
||
from arxiv_downloader.utils import check_out_dir, download, url_to_id | ||
|
||
def parse_args(): | ||
# Parse arguments | ||
parser = argparse.ArgumentParser(description="ArXiv Paper Downloader.") | ||
parser.add_argument("--url", "-u", type=str, default=None, help="ArXiv article URL.") | ||
parser.add_argument("--id", "-i", type=str, default=None, help="ArXiv article ID (for https://arxiv.org/abs/2004.13316 this would be 2004.13316).") | ||
parser.add_argument( | ||
"--directory", "-d", default="./", type=str, help="Output directory." | ||
) | ||
parser.add_argument( | ||
"--source", | ||
"-s", | ||
default=False, | ||
action="store_true", | ||
help="Whether to download the source tar file.", | ||
) | ||
return parser.parse_args() | ||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
|
||
# xor between url and id | ||
assert (args.url is not None) ^ (args.id is not None), "Either URL or ID must be given but not both." | ||
|
||
# Get ID | ||
if args.id is None: | ||
article_id = url_to_id(args.url) | ||
else: | ||
article_id = args.id | ||
|
||
# Download article | ||
download(article_id, args.directory, source=args.source) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[build-system] | ||
requires = [ | ||
"setuptools>=42", | ||
"wheel" | ||
] | ||
build-backend = "setuptools.build_meta" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import setuptools | ||
|
||
with open("README.md", "r", encoding="utf-8") as fh: | ||
long_description = fh.read() | ||
|
||
setuptools.setup( | ||
name="arxiv-downloader", | ||
version="0.0.1", | ||
author="Steven Lang", | ||
author_email="steven.lang.mz@gmail.com ", | ||
description="A command line interface to download PDF files from https://arxiv.org.", | ||
long_description=long_description, | ||
long_description_content_type="text/markdown", | ||
url="https://github.com/steven-lang/arxiv-downloader", | ||
project_urls={ | ||
"Bug Tracker": "https://github.com/steven-lang/arxiv-downloader/issues", | ||
}, | ||
classifiers=[ | ||
"Programming Language :: Python :: 3", | ||
"License :: OSI Approved :: MIT License", | ||
"Operating System :: OS Independent", | ||
], | ||
package_dir={"": "src"}, | ||
# packages=setuptools.find_packages(where="src"), | ||
packages=["arxiv_downloader"], | ||
python_requires=">=3.6", | ||
scripts=["bin/arxiv-downloader"], | ||
install_requires="arxiv==1.2.0" | ||
) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import argparse | ||
import arxiv | ||
|
||
|
||
def url_to_id(url: str) -> str: | ||
""" | ||
Parse the given URL of the form `https://arxiv.org/abs/1907.13625` to the id `1907.13625`. | ||
Args: | ||
url: Input arxiv URL. | ||
Returns: | ||
str: ArXiv article ID. | ||
""" | ||
# Strip filetype | ||
if url.endswith(".pdf"): | ||
url = url[:-4] | ||
|
||
return url.split("/")[-1] | ||
|
||
|
||
def check_out_dir(directory: str): | ||
"""Check if the output directory exists. If not, ask the user to mkdir.""" | ||
if not os.path.exists(directory): | ||
print(f"Directory {directory} does not exist. Create? [y/n] ", end="") | ||
ans = input().lower().strip() | ||
if ans == "y": | ||
os.makedirs(directory) | ||
elif ans == "n": | ||
print("Exiting now.") | ||
exit(1) | ||
else: | ||
print("Invalid input. Exiting now.") | ||
exit(1) | ||
|
||
|
||
|
||
def download(article_id, directory: str, source:bool): | ||
|
||
# TODO: add checks for valid urls | ||
check_out_dir(directory) | ||
|
||
|
||
# Download | ||
result = arxiv.Search(id_list=[article_id]) | ||
result = [res for res in result.get()] | ||
result = result[0] | ||
print(f'Starting download of article: "{result.title}" ({article_id})') | ||
path = result.download_pdf(dirpath=directory) | ||
|
||
print(f"Download finished! Result saved at:\n{path}") | ||
|
||
if source: | ||
print(f'Starting download of article source files: "{result.title}" ({article_id})') | ||
result.download_source(dirpath=directory) |