Skip to content

Better emphasis #77

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ maddy uses [semver versioning](https://semver.org/).

## Upcoming

* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Only create emphasis tags at word boundaries, i.e. `not only_internal_underscores`.
* ...

## version 1.5.0 2025-04-21
Expand Down
4 changes: 3 additions & 1 deletion include/maddy/emphasizedparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ class EmphasizedParser : public LineParser
*/
void Parse(std::string& line) override
{
// Modifed from previous version, with help from
// https://stackoverflow.com/questions/61346949/regex-for-markdown-emphasis
static std::regex re(
R"((?!.*`.*|.*<code>.*)_(?!.*`.*|.*<\/code>.*)([^_]*)_(?!.*`.*|.*<\/code>.*))"
R"((?!.*`.*|.*<code>.*)\b_(?![\s])(?!.*`.*|.*<\/code>.*)(.*?[^\s])_\b(?!.*`.*|.*<\/code>.*))"
);
static std::string replacement = "<em>$1</em>";

Expand Down
17 changes: 17 additions & 0 deletions include/maddy/strongparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,23 @@ class StrongParser : public LineParser
*/
void Parse(std::string& line) override
{
// This version of the regex is changed exactly the same way
// that the regex for the emphasized parser was changed, and
// it then passes all the 'disabled' tests in the 'strong parser'
// test, but then it fails general parsing. For some reason,
// "__text__" translates "<i></i>text<i></i>" even though there
// are no word boundaries at the correct places. It's weird!

// static std::vector<std::regex> res{
// std::regex{
// R"((?!.*`.*|.*<code>.*)\b\*\*(?![\s])(?!.*`.*|.*<\/code>.*)"
// "(.*?[^\s])\*\*\b(?!.*`.*|.*<\/code>.*))"
// },
// std::regex{
// R"((?!.*`.*|.*<code>.*)\b__(?![\s])(?!.*`.*|.*<\/code>.*)"
// "(.*?[^\s])__\b(?!.*`.*|.*<\/code>.*))"
// }
// };
static std::vector<std::regex> res{
std::regex{
R"((?!.*`.*|.*<code>.*)\*\*(?!.*`.*|.*<\/code>.*)([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))"
Expand Down
113 changes: 113 additions & 0 deletions tests/maddy/test_maddy_emphasizedparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,89 @@ TEST(MADDY_EMPHASIZEDPARSER, ItReplacesMarkdownWithEmphasizedHTML)
ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresAtStringEdges)
{
std::string text = "_some text_";
std::string expected = "<em>some text</em>";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceMarkdownWithInlineUnderscores)
{
std::string text = "some text_bla_text testing _it_ out";
std::string expected = "some text_bla_text testing <em>it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItOnlyReplacesUnderscoresAtWordBreaks)
{
std::string text = "some _text_bla_ testing _it_ out";
std::string expected = "some <em>text_bla</em> testing <em>it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresWithMultipleWords)
{
std::string text = "some _text testing it_ out";
std::string expected = "some <em>text testing it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItAllowsDoubleUnderscores)
{
// I'm not sure if this is standard or not, but this is how the github
// markdown parser behaves. Other things I've seen want it to *not*
// match.
std::string text = "some __text testing it_ out";
std::string expected = "some <em>_text testing it</em> out";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code> at all";
std::string expected =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code> at all";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceUnderscoresInURLs)
{
std::string text = "[Link Title](http://example.com/what_you_didn't_know)";
std::string expected =
"[Link Title](http://example.com/what_you_didn't_know)";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotParseInsideInlineCode)
{
std::string text = "some text `*bla*` `/**text*/` testing _it_ out";
Expand All @@ -32,3 +115,33 @@ TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotParseInsideInlineCode)

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItParsesOutsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code>"
" but outside _should_.";
std::string expected =
"Stuff inside <code> blocks _shouldn't be emphasized_ </code>"
" but outside <em>should</em>.";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_EMPHASIZEDPARSER, ItParsesOutsideTickBlocks)
{
std::string text =
"Stuff inside `blocks _shouldn't be emphasized_ `"
" but outside _should_.";
std::string expected =
"Stuff inside `blocks _shouldn't be emphasized_ `"
" but outside <em>should</em>.";
auto emphasizedParser = std::make_shared<maddy::EmphasizedParser>();

emphasizedParser->Parse(text);

ASSERT_EQ(expected, text);
}
115 changes: 115 additions & 0 deletions tests/maddy/test_maddy_strongparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,118 @@ TEST(MADDY_STRONGPARSER, ItDoesNotParseInsideInlineCode)
ASSERT_EQ(test.expected, test.text);
}
}

TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresAtStringEdges)
{
std::string text = "__some text__";
std::string expected = "<strong>some text</strong>";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItDoesNotReplaceMarkdownWithInlineUnderscores)
{
std::string text = "some text__bla__text testing __it__ out";
std::string expected = "some text__bla__text testing <strong>it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItOnlyReplacesUnderscoresAtWordBreaks)
{
std::string text = "some __text__bla__ testing __it__ out";
std::string expected =
"some <strong>text__bla</strong> testing <strong>it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresWithMultipleWords)
{
std::string text = "some __text testing it__ out";
std::string expected = "some <strong>text testing it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItAllowsTripleUnderscores)
{
// I'm not sure if this is standard or not, but this is how the github
// markdown parser behaves. Other things I've seen want it to *not*
// match.

std::string text = "some ___text testing it__ out";
std::string expected = "some <strong>_text testing it</strong> out";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks __shouldn't be strong__ </code> at all";
std::string expected =
"Stuff inside <code> blocks __shouldn't be strong__ </code> at all";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(DISABLED_MADDY_STRONGPARSER, ItDoesNotReplaceUnderscoresInURLs)
{
std::string text = "[Link Title](http://example.com/what__you__didn't__know)";
std::string expected =
"[Link Title](http://example.com/what__you__didn't__know)";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItParsesOutsideCodeBlocks)
{
std::string text =
"Stuff inside <code> blocks __shouldn't be strong__ </code>"
" but outside __should__.";
std::string expected =
"Stuff inside <code> blocks __shouldn't be strong__ </code>"
" but outside <strong>should</strong>.";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}

TEST(MADDY_STRONGPARSER, ItParsesOutsideTickBlocks)
{
std::string text =
"Stuff inside `blocks __shouldn't be strong__ `"
" but outside __should__.";
std::string expected =
"Stuff inside `blocks __shouldn't be strong__ `"
" but outside <strong>should</strong>.";
auto strongParser = std::make_shared<maddy::StrongParser>();

strongParser->Parse(text);

ASSERT_EQ(expected, text);
}