Skip to content

Infra: Improve email and link processing and rendering in headers #2467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
3 changes: 2 additions & 1 deletion pep-0009.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ Type: Process
Content-Type: text/x-rst
Created: 14-Aug-2001
Post-History:
Resolution: https://mail.python.org/mailman/private/peps/2016-January/001165.html
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this link I get:

Private Archive Error - No such list peps

Does this link work for others?

Perhaps this later 2016 link withdrawing the PEP would be a good replacement?

https://mail.python.org/archives/list/python-dev@python.org/thread/2YMHVPRDWGQLA5A2FKXE2JMLM2HQEEGW/


(As an aside, I note the reason for not renaming .txt to .rst was tooling rather than Git churn, and PEP editors were open to someone doing it)

The PEP
editors will not be converting the legacy PEPs to reST, nor will we currently
be renaming the relevant PEP source files to end with ".rst" since there's too
much tooling that would have to change to do so. However, if either task
really interests you, please get in touch with the PEP editors.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got the same result as you, and coupled with it breaking the parsing which would require special case workarounds, so I'd removed it, I went ahead and replaced it with the much better link you suggested, both here and in #2484 (which I suggest approving and merging first once it is ready, and dropping the change here).

Resolution: https://mail.python.org/archives/list/python-dev@python.org/thread/2YMHVPRDWGQLA5A2FKXE2JMLM2HQEEGW/


::

Expand Down
137 changes: 117 additions & 20 deletions pep_sphinx_extensions/pep_processor/transforms/pep_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,23 @@ def apply(self) -> None:
if not isinstance(node, nodes.reference):
continue
node.replace_self(_mask_email(node))
elif name in {"discussions-to", "resolution"}:
# only handle threads, email addresses in Discussions-To aren't
# masked.
elif name in {"discussions-to", "resolution", "post-history"}:
# Prettify mailing list and Discourse links
for node in para:
if not isinstance(node, nodes.reference):
if (not isinstance(node, nodes.reference)
or not node["refuri"]):
continue
# Have known mailto links link to their main list pages
if node["refuri"].lower().startswith("mailto:"):
node["refuri"] = _generate_list_url(node["refuri"])
parts = node["refuri"].lower().split("/")
if len(parts) <= 2 or parts[2] not in LINK_PRETTIFIERS:
continue
if node["refuri"].startswith("https://mail.python.org"):
node[0] = _pretty_thread(node[0])
pretty_title = _make_link_pretty(str(node["refuri"]))
if name == "post-history":
node["reftitle"] = pretty_title
else:
node[0] = nodes.Text(pretty_title)
elif name in {"replaces", "superseded-by", "requires"}:
# replace PEP numbers with normalised list of links to PEPs
new_body = []
Expand All @@ -93,25 +102,113 @@ def apply(self) -> None:
# Mark unneeded fields
fields_to_remove.append(field)

# Remove any trailing commas and whitespace in the headers
if para and isinstance(para[-1], nodes.Text):
last_node = para[-1]
if last_node.astext().strip() == ",":
last_node.parent.remove(last_node)
else:
para[-1] = last_node.rstrip().rstrip(",")

# Remove unneeded fields
for field in fields_to_remove:
field.parent.remove(field)


def _pretty_thread(text: nodes.Text) -> nodes.Text:
parts = text.title().replace("Sig", "SIG").split("/")
def _generate_list_url(mailto: str) -> str:
list_name_domain = mailto.lower().removeprefix("mailto:").strip()
list_name = list_name_domain.split("@")[0]

# mailman structure is
# https://mail.python.org/archives/list/<list name>/thread/<id>
try:
return nodes.Text(parts[parts.index("Archives") + 2].removesuffix("@Python.Org"))
except ValueError:
pass
if list_name_domain.endswith("@googlegroups.com"):
return f"https://groups.google.com/g/{list_name}"

if not list_name_domain.endswith("@python.org"):
return mailto

# Active lists not yet on Mailman3; this URL will redirect if/when they are
if list_name in {"csv", "db-sig", "doc-sig", "python-list", "web-sig"}:
return f"https://mail.python.org/mailman/listinfo/{list_name}"
# Retired lists that are closed for posting, so only the archive matters
if list_name in {"import-sig", "python-3000"}:
return f"https://mail.python.org/pipermail/{list_name}/"
# The remaining lists (and any new ones) are all on Mailman3/Hyperkitty
return f"https://mail.python.org/archives/list/{list_name}@python.org/"


def _process_list_url(parts: list[str]) -> tuple[str, str]:
item_type = "list"

# HyperKitty (Mailman3) archive structure is
# https://mail.python.org/archives/list/<list_name>/thread/<id>
if "archives" in parts:
list_name = (
parts[parts.index("archives") + 2].removesuffix("@python.org"))
if len(parts) > 6 and parts[6] in {"message", "thread"}:
item_type = parts[6]

# Mailman3 list info structure is
# https://mail.python.org/mailman3/lists/<list_name>.python.org/
elif "mailman3" in parts:
list_name = (
parts[parts.index("mailman3") + 2].removesuffix(".python.org"))

# Pipermail (Mailman) archive structure is
# https://mail.python.org/pipermail/<list_name>/<month>-<year>/<id>
elif "pipermail" in parts:
list_name = parts[parts.index("pipermail") + 1]
item_type = "message" if len(parts) > 6 else "list"

# Mailman listinfo structure is
# https://mail.python.org/mailman/listinfo/<list_name>
elif "listinfo" in parts:
list_name = parts[parts.index("listinfo") + 1]

# Not a link to a mailing list, message or thread
else:
raise ValueError(
f"{'/'.join(parts)} not a link to a list, message or thread")

# pipermail structure is
# https://mail.python.org/pipermail/<list name>/<month-year>/<id>
return list_name, item_type


def _process_discourse_url(parts: list[str]) -> tuple[str, str]:
item_name = "discourse"

if len(parts) < 5 or ("t" not in parts and "c" not in parts):
raise ValueError(
f"{'/'.join(parts)} not a link to a Discourse thread or category")

first_subpart = parts[4]
has_title = not first_subpart.isnumeric()

if "t" in parts:
item_type = "post" if len(parts) > (5 + has_title) else "thread"
elif "c" in parts:
item_type = "category"
if has_title:
item_name = f"{first_subpart.replace('-', ' ')} {item_name}"

return item_name, item_type


# Domains supported for pretty URL parsing
LINK_PRETTIFIERS = {
"mail.python.org": _process_list_url,
"discuss.python.org": _process_discourse_url,
}


def _process_pretty_url(url: str) -> tuple[str, str]:
parts = url.lower().strip().strip("/").split("/")
try:
return nodes.Text(parts[parts.index("Pipermail") + 1])
except ValueError:
# archives and pipermail not in list, e.g. PEP 245
return text
item_name, item_type = LINK_PRETTIFIERS[parts[2]](parts)
except KeyError as error:
raise ValueError(
"{url} not a link to a recognized domain to prettify") from error
item_name = item_name.title().replace("Sig", "SIG")
return item_name, item_type


def _make_link_pretty(url: str) -> str:
item_name, item_type = _process_pretty_url(url)
return f"{item_name} {item_type}"