Add initial files

Signed-off-by: Steven Lang <steven.lang.mz@gmail.com>
braun-steven · Jun 28, 2021 · 4409178 · 4409178
1 parent b19e660
commit 4409178
Show file tree

Hide file tree

Showing 6 changed files with 149 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2021 Steven Lang
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/bin/arxiv-downloader b/bin/arxiv-downloader
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+import argparse
+
+from arxiv_downloader.utils import check_out_dir, download, url_to_id
+
+def parse_args():
+    # Parse arguments
+    parser = argparse.ArgumentParser(description="ArXiv Paper Downloader.")
+    parser.add_argument("--url", "-u", type=str, default=None, help="ArXiv article URL.")
+    parser.add_argument("--id", "-i", type=str, default=None, help="ArXiv article ID (for https://arxiv.org/abs/2004.13316 this would be 2004.13316).")
+    parser.add_argument(
+        "--directory", "-d", default="./", type=str, help="Output directory."
+    )
+    parser.add_argument(
+        "--source",
+        "-s",
+        default=False,
+        action="store_true",
+        help="Whether to download the source tar file.",
+    )
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    # xor between url and id
+    assert (args.url is not None) ^ (args.id is not None), "Either URL or ID must be given but not both."
+
+    # Get ID
+    if args.id is None:
+        article_id = url_to_id(args.url)
+    else:
+        article_id = args.id
+
+    # Download article
+    download(article_id, args.directory, source=args.source)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel"
+]
+build-backend = "setuptools.build_meta"
diff --git a/setup.py b/setup.py
@@ -0,0 +1,29 @@
+import setuptools
+
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="arxiv-downloader",
+    version="0.0.1",
+    author="Steven Lang",
+    author_email="steven.lang.mz@gmail.com  ",
+    description="A command line interface to download PDF files from https://arxiv.org.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/steven-lang/arxiv-downloader",
+    project_urls={
+        "Bug Tracker": "https://github.com/steven-lang/arxiv-downloader/issues",
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    package_dir={"": "src"},
+    # packages=setuptools.find_packages(where="src"),
+    packages=["arxiv_downloader"],
+    python_requires=">=3.6",
+    scripts=["bin/arxiv-downloader"],
+        install_requires="arxiv==1.2.0"
+)
diff --git a/src/arxiv_downloader/__init__.py b/src/arxiv_downloader/__init__.py
diff --git a/src/arxiv_downloader/utils.py b/src/arxiv_downloader/utils.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+
+import os
+import argparse
+import arxiv
+
+
+def url_to_id(url: str) -> str:
+    """
+    Parse the given URL of the form `https://arxiv.org/abs/1907.13625` to the id `1907.13625`.
+
+    Args:
+        url: Input arxiv URL.
+
+    Returns:
+        str: ArXiv article ID.
+    """
+    # Strip filetype
+    if url.endswith(".pdf"):
+        url = url[:-4]
+
+    return url.split("/")[-1]
+
+
+def check_out_dir(directory: str):
+    """Check if the output directory exists. If not, ask the user to mkdir."""
+    if not os.path.exists(directory):
+        print(f"Directory {directory} does not exist. Create? [y/n] ", end="")
+        ans = input().lower().strip()
+        if ans == "y":
+            os.makedirs(directory)
+        elif ans == "n":
+            print("Exiting now.")
+            exit(1)
+        else:
+            print("Invalid input. Exiting now.")
+            exit(1)
+
+
+
+def download(article_id, directory: str, source:bool):
+
+    # TODO: add checks for valid urls
+    check_out_dir(directory)
+
+
+    # Download
+    result = arxiv.Search(id_list=[article_id])
+    result = [res for res in result.get()]
+    result = result[0]
+    print(f'Starting download of article: "{result.title}" ({article_id})')
+    path = result.download_pdf(dirpath=directory)
+
+    print(f"Download finished! Result saved at:\n{path}")
+
+    if source:
+        print(f'Starting download of article source files: "{result.title}" ({article_id})')
+        result.download_source(dirpath=directory)