Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement /archive #64

Merged
merged 20 commits into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ flask = "==1.0.2"
arxiv-base = {editable = true,git = "https://github.com/arXiv/arxiv-base.git",ref = "develop"}
validators = "*"
mypy-extensions = "*"
flask-wtf = "*"

[dev-packages]
pylama = "*"
Expand All @@ -41,7 +42,6 @@ coverage = "*"
coveralls = "*"
sphinx = "*"
sphinx-autodoc-typehints = "*"
mypy-extensions = "*"

[requires]
python_version = "3.6"
199 changes: 109 additions & 90 deletions Pipfile.lock

Large diffs are not rendered by default.

141 changes: 141 additions & 0 deletions browse/controllers/archive_page/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Archive landing page."""

from datetime import date
from typing import Dict, Any, Tuple, List, no_type_check

from flask import Response, url_for

from arxiv import status
from arxiv.taxonomy.definitions import ARCHIVES, CATEGORIES, ARCHIVES_SUBSUMED

from browse.controllers.archive_page.by_month_form import ByMonthForm
from browse.controllers.archive_page.catchup_form import CatchupForm
from browse.services.util.response_headers import abs_expires_header


def get_archive(archive_id: str) -> Response:
"""Gets archive page."""
data: Dict[str, Any] = {}
response_headers: Dict[str, Any] = {}

if archive_id == "list":
return archive_index(archive_id, status=status.HTTP_200_OK)

archive = ARCHIVES.get(archive_id, None)
if not archive:
cat_id = CATEGORIES.get(archive_id, {}).get("in_archive", None)
archive = ARCHIVES.get(cat_id, None)
if not archive:
return archive_index(archive_id,
status=status.HTTP_404_NOT_FOUND)
else:
archive_id = cat_id

_write_expires_header(response_headers)

subsumed_by = ARCHIVES_SUBSUMED.get(archive_id, None)
if subsumed_by:
data["subsumed_id"] = archive_id
data["subsumed_category"] = CATEGORIES.get(archive_id, {})
data["subsumed_by"] = subsumed_by
subsuming_category = CATEGORIES.get(subsumed_by, {})
data["subsuming_category"] = subsuming_category
archive_id = subsuming_category.get("in_archive", None)
archive = ARCHIVES.get(archive_id, None)

years = years_operating(archive)

data["archive_id"] = archive_id
data["archive"] = archive
data["list_form"] = ByMonthForm(archive_id, archive, years)
data["catchup_form"] = CatchupForm(archive_id, archive, years)
data["stats_by_year"] = stats_by_year(archive_id, archive, years)
data["category_list"] = category_list(archive_id)

data["template"] = "archive/single_archive.html"
return data, status.HTTP_200_OK, response_headers # type: ignore


def archive_index(archive_id: str, status: int) -> Response:
"""Landing page for when there is no archive specified."""
data: Dict[str, Any] = {}
data["bad_archive"] = archive_id

archives = [
(id, ARCHIVES[id]["name"])
for id in ARCHIVES.keys()
if id not in ARCHIVES_SUBSUMED and not id.startswith("test")
]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm I know what is happening here, I think Python is flattening the elements of what is inside the [ ] to be a list. But it looks like this almost a reverse-map style of syntax, where the output is specified first ((id, ARCHIVES[id]["name"])) then the list of things to map over is specified (the filtered ids). Would be good to know what this is. Also, are archives just List[str, str]? might be nice to annotate to make it a little more clear

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have it. The reverse-map syntax is python list comprehension.
I often use flatten (as in flatmap) to mean [[a],[b,c]] => [a,b,c] which isn't happening here. But I think you mean flaten in a metaphorical sense.

archives.sort(key=lambda tpl: tpl[0])
data["archives"] = archives

defunct = [
(id, ARCHIVES[id]["name"], ARCHIVES_SUBSUMED.get(id, ""))
for id in ARCHIVES.keys()
if "end_date" in ARCHIVES[id]
]
defunct.sort(key=lambda tpl: tpl[0])
data["defunct"] = defunct

data["template"] = "archive/archive_list_all.html"
return data, status, {} # type: ignore


def subsumed_msg(archive: Dict[str, str], subsumed_by: str) -> Dict[str, str]:
"""Adds information about subsuming categories and archives."""
sb = CATEGORIES.get(subsumed_by, {"name": "unknown category"})
sa = ARCHIVES.get(sb.get("in_archive", None), {"name": "unknown archive"})

return {"subsumed_by_cat": sb, "subsumed_by_arch": sa}


def years_operating(archive: Dict[str, Any]) -> List[int]:
"""Returns list of years operating in desc order. ex [1993,1992,1991]."""
if (
not archive
or "start_date" not in archive
or not isinstance(archive["start_date"], date)
):
return []
start = archive["start_date"].year
end = archive.get("end_date", None) or date.today().year
return list(reversed(range(start, end + 1)))


def stats_by_year(
archive_id: str, archive: Dict[str, Any], years: List[int]
) -> List[Tuple[str, str]]:
"""Returns links to year pages."""
if not archive or not archive_id or not years:
return [("bogusURL", "NODATA")]
else:
return [(_year_stats_link(archive_id, i), str(i)) for i in years]


@no_type_check # url_for should return str but is not typed in Flask
def _year_stats_link(archive_id: str, num: int) -> str:
return url_for(
bdc34 marked this conversation as resolved.
Show resolved Hide resolved
"browse.year",
year=str(num)[-2:], # danger: 2 digit year, NG can accept 4 digit
archive=archive_id,
)


def category_list(archive_id: str) -> List[Dict[str, str]]:
"""Retunrs categories for archive."""
cats = []
for cat_id in CATEGORIES:
cat = CATEGORIES[cat_id]
if(cat.get("in_archive", "yuck") == archive_id
and cat.get("is_active", True)):
cats.append({"id": cat_id,
"name": cat.get("name", ""),
"description": cat.get("description", "")})

cats.sort(key=lambda x: x["name"])
return cats


def _write_expires_header(response_headers: Dict[str, Any]) -> None:
"""Writes an expires header for the response."""
response_headers["Expires"] = abs_expires_header()[1]
49 changes: 49 additions & 0 deletions browse/controllers/archive_page/by_month_form.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Form for month selection of list controller."""
from typing import List, Any, Dict

from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField, HiddenField
from wtforms.validators import DataRequired

MONTHS = [
('all', 'all months'),
('01', '01 (Jan)'),
('02', '02 (Feb)'),
('03', '03 (Mar)'),
('04', '04 (Apr)'),
('05', '05 (May)'),
('06', '06 (Jun)'),
('07', '07 (Jul)'),
('08', '08 (Aug)'),
('09', '09 (Sep)'),
('10', '10 (Oct)'),
('11', '11 (Nov)'),
('12', '12 (Dec)'),
]


class ByMonthForm(FlaskForm):
"""Form for browse by month input on archive pages.

This doesn't try to account for the start date of the
archive, end date of the archive or dates in the future.
It just accepts these, and expects the /list controller
to deal with dates for which there are no articles.
"""

year = SelectField('year',
validators=[DataRequired()],
choices=[])
month = SelectField('month',
validators=[DataRequired()],
choices=MONTHS)
archive = HiddenField('archive', validators=[DataRequired()])
submit = SubmitField('Go')

def __init__(self,
archive_id: str,
archive: Dict[str, Any],
years: List[int]):
super(ByMonthForm, self).__init__()
self.year.choices = [(str(ye)[-2:], str(ye)) for ye in years]
self.archive.data = archive_id
63 changes: 63 additions & 0 deletions browse/controllers/archive_page/catchup_form.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Form for Catchup."""
from typing import List, Any, Dict
import datetime

from flask_wtf import FlaskForm
from wtforms import SelectField, HiddenField, SubmitField
from wtforms.validators import DataRequired

MONTHS = [
("01", "01 (Jan)"),
("02", "02 (Feb)"),
("03", "03 (Mar)"),
("04", "04 (Apr)"),
("05", "05 (May)"),
("06", "06 (Jun)"),
("07", "07 (Jul)"),
("08", "08 (Aug)"),
("09", "09 (Sep)"),
("10", "10 (Oct)"),
("11", "11 (Nov)"),
("12", "12 (Dec)"),
]

DAYS = [(str(i), "{:0>2d}".format(i)) for i in range(1, 32)]


class CatchupForm(FlaskForm):
"""Form for catchup.

This doesn't try to account for the start date of the
archive, end date of the archive or dates in the future.
It just accepts these, and expects the /list controller
to deal with dates for which there are no articles.
"""

day = SelectField("sday", validators=[DataRequired()], choices=DAYS)

month = SelectField("smonth", validators=[DataRequired()], choices=MONTHS)

year = SelectField("syear", validators=[DataRequired()] )

abstracts = SelectField(
"method", choices=[("without", "without"), ("with", "with")]
)
archive = HiddenField("archive", validators=[DataRequired()])
submit = SubmitField("Go")

def __init__(self, archive_id: str, archive: Dict[str, Any], years: List[int]):
super(CatchupForm, self).__init__()

# set date to 7 days ago, that might be in the previous month or year.
catchup_to = datetime.date.today() - datetime.timedelta(days=7)
self.year.default = catchup_to.strftime('%y') # BAD two digit year
self.month.default = catchup_to.strftime('%m')
self.day.default = catchup_to.strftime('%d')

self.year.choices = [(str(yer)[-2:], str(yer)) for yer in years] # BAD two digit year
self.archive.data = archive_id

self.process()



4 changes: 0 additions & 4 deletions browse/controllers/list_page/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,9 @@
import calendar
import logging
import math
from datetime import datetime
from email.utils import formatdate
from typing import Any, Dict, List, Optional, Tuple, cast, Union

from arxiv import status, taxonomy
from dateutil import parser
from dateutil.tz import tzutc
from flask import current_app, request, url_for
from werkzeug.exceptions import ServiceUnavailable, BadRequest

Expand Down
4 changes: 2 additions & 2 deletions browse/controllers/list_page/paging.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Paginiation links for listing pages."""


from typing import Any, Dict, List, Optional, Tuple, cast, Union
from typing import Any, Dict, List, Union
import math
from flask import current_app, request, url_for
from flask import url_for


def paging(count: int, skipn: int, shown: int, context: str, subcontext: str) \
Expand Down
1 change: 0 additions & 1 deletion browse/controllers/tb_page/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from werkzeug import MultiDict
from werkzeug.exceptions import BadRequest
from arxiv import status
from browse.domain.metadata import DocMetadata
from browse.exceptions import TrackbackNotFound
from browse.controllers import tb_page

Expand Down
1 change: 0 additions & 1 deletion browse/domain/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from re import RegexFlag
from typing import Match, Optional, Union, Tuple, Callable, List
from arxiv import taxonomy
from arxiv.base.config import BASE_SERVER, EXTERNAL_URL_SCHEME

# arXiv ID format used from 1991 to 2007-03
RE_ARXIV_OLD_ID = re.compile(
Expand Down
4 changes: 2 additions & 2 deletions browse/domain/listing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Return types for listing service."""

from datetime import date, datetime
from typing import List, Optional, Tuple
from datetime import date
from typing import List, Tuple

from mypy_extensions import TypedDict

Expand Down
Loading