Skip to content

Updated discord invite regex to match leading http/https/www #124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions botcore/utils/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import re

DISCORD_INVITE = re.compile(
r"(https?://)?(www\.)?" # Optional http(s) and www.
r"(discord([.,]|dot)gg|" # Could be discord.gg/
r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/
r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/
r"discord([.,]|dot)me|" # or discord.me
r"discord([.,]|dot)li|" # or discord.li
r"discord([.,]|dot)io|" # or discord.io.
r"((?<!\w)([.,]|dot))gg" # or .gg/
r")([/]|slash)" # / or 'slash'
r")(/|slash)" # / or 'slash'
r"(?P<invite>\S+)", # the invite code itself
flags=re.IGNORECASE
)
Expand All @@ -32,7 +33,7 @@
r"(?P<code>.*?)" # extract all code inside the markup
r"\s*" # any more whitespace before the end of the code markup
r"(?P=delim)", # match the exact same delimiter from the start again
re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive
flags=re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive
)
"""
Regex for formatted code, using Discord's code blocks.
Expand All @@ -44,7 +45,7 @@
r"^(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code
r"(?P<code>.*?)" # extract all the rest as code
r"\s*$", # any trailing whitespace until the end of the string
re.DOTALL # "." also matches newlines
flags=re.DOTALL # "." also matches newlines
)
"""
Regex for raw code, *not* using Discord's code blocks.
Expand Down
3 changes: 3 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

Changelog
=========
- :release:`8.1.0 <16th August 2022>`
- :support:`124` Updated :obj:`botcore.utils.regex.DISCORD_INVITE` regex to optionally match leading "http[s]" and "www".


- :release:`8.0.0 <27th July 2022>`
- :breaking:`110` Bump async-rediscache to v1.0.0-rc2
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bot-core"
version = "8.0.0"
version = "8.1.0"
description = "Bot-Core provides the core functionality and utilities for the bots of the Python Discord community."
authors = ["Python Discord <info@pythondiscord.com>"]
license = "MIT"
Expand Down
69 changes: 42 additions & 27 deletions tests/botcore/utils/test_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,18 @@
from botcore.utils.regex import DISCORD_INVITE


def use_regex(s: str) -> Optional[str]:
"""Helper function to run the Regex on a string.
def match_regex(s: str) -> Optional[str]:
"""Helper function to run re.match on a string.

Return the invite capture group, if the string matches the pattern
else return None
"""
result = DISCORD_INVITE.match(s)
return result if result is None else result.group("invite")


def search_regex(s: str) -> Optional[str]:
"""Helper function to run re.search on a string.

Return the invite capture group, if the string matches the pattern
else return None
Expand All @@ -19,32 +29,37 @@ class UtilsRegexTests(unittest.TestCase):
def test_discord_invite_positives(self):
"""Test the DISCORD_INVITE regex on a set of strings we would expect to capture."""

self.assertEqual(use_regex("discord.gg/python"), "python")
self.assertEqual(use_regex("https://discord.gg/python"), "python")
self.assertEqual(use_regex("discord.com/invite/python"), "python")
self.assertEqual(use_regex("discordapp.com/invite/python"), "python")
self.assertEqual(use_regex("discord.me/python"), "python")
self.assertEqual(use_regex("discord.li/python"), "python")
self.assertEqual(use_regex("discord.io/python"), "python")
self.assertEqual(use_regex(".gg/python"), "python")

self.assertEqual(use_regex("discord.gg/python/but/extra"), "python/but/extra")
self.assertEqual(use_regex("discord.me/this/isnt/python"), "this/isnt/python")
self.assertEqual(use_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a")
self.assertEqual(use_regex("discordapp.com/invite/python/snakescord"), "python/snakescord")
self.assertEqual(use_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython")
self.assertEqual(use_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython")
self.assertEqual(use_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython")
self.assertEqual(use_regex("https://discord.gg/python/~/notpython"), "python/~/notpython")

self.assertEqual(use_regex("https://discord.gg/python with whitespace"), "python")
self.assertEqual(use_regex(" https://discord.gg/python "), "python")
self.assertEqual(match_regex("discord.gg/python"), "python")
self.assertEqual(match_regex("https://discord.gg/python"), "python")
self.assertEqual(match_regex("https://www.discord.gg/python"), "python")
self.assertEqual(match_regex("discord.com/invite/python"), "python")
self.assertEqual(match_regex("www.discord.com/invite/python"), "python")
self.assertEqual(match_regex("discordapp.com/invite/python"), "python")
self.assertEqual(match_regex("discord.me/python"), "python")
self.assertEqual(match_regex("discord.li/python"), "python")
self.assertEqual(match_regex("discord.io/python"), "python")
self.assertEqual(match_regex(".gg/python"), "python")

self.assertEqual(match_regex("discord.gg/python/but/extra"), "python/but/extra")
self.assertEqual(match_regex("discord.me/this/isnt/python"), "this/isnt/python")
self.assertEqual(match_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a")
self.assertEqual(match_regex("discordapp.com/invite/python/snakescord"), "python/snakescord")
self.assertEqual(match_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython")
self.assertEqual(match_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython")
self.assertEqual(match_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython")
self.assertEqual(match_regex("https://discord.gg/python/~/notpython"), "python/~/notpython")

self.assertEqual(search_regex("https://discord.gg/python with whitespace"), "python")
self.assertEqual(search_regex(" https://discord.gg/python "), "python")

def test_discord_invite_negatives(self):
"""Test the DISCORD_INVITE regex on a set of strings we would expect to not capture."""

self.assertEqual(use_regex("another string"), None)
self.assertEqual(use_regex("https://pythondiscord.com"), None)
self.assertEqual(use_regex("https://discord.com"), None)
self.assertEqual(use_regex("https://discord.gg"), None)
self.assertEqual(use_regex("https://discord.gg/ python"), None)
self.assertEqual(match_regex("another string"), None)
self.assertEqual(match_regex("https://pythondiscord.com"), None)
self.assertEqual(match_regex("https://discord.com"), None)
self.assertEqual(match_regex("https://discord.gg"), None)
self.assertEqual(match_regex("https://discord.gg/ python"), None)

self.assertEqual(search_regex("https://discord.com with whitespace"), None)
self.assertEqual(search_regex(" https://discord.com "), None)