File tree Expand file tree Collapse file tree 2 files changed +21
-7
lines changed
course_discovery/apps/course_metadata Expand file tree Collapse file tree 2 files changed +21
-7
lines changed Original file line number Diff line number Diff line change @@ -880,10 +880,21 @@ class UtilsTests(TestCase):
880880 '<p>Some text</p>\n <p>· Item 1</p>\n <ul>\n <li>Item 2</li>\n </ul>\n <p>Regular paragraph</p>\n <p>· Item 3</p>'
881881 )
882882 )
883+ @ddt .data (
884+ (
885+ '<p><em>The content of this course also forms part of the six-month online <a href="https://example.com">Example Link</a></em></p>' , # pylint: disable=line-too-long
886+ '<p><em>The content of this course also forms part of the six-month online <a href="https://example.com">Example Link</a></em></p>' # pylint: disable=line-too-long
887+ ),
888+ (
889+ '<div><p>online course.</p><p><strong>Module 1:</strong></p></div>' ,
890+ '<p>online course. <strong>Module 1:</strong></p>'
891+ )
892+ )
883893 @ddt .unpack
884894 def test_clean_html (self , content , expected ):
885895 """ Verify the method removes unnecessary HTML attributes. """
886- assert clean_html (content ) == expected
896+ result = clean_html (content )
897+ assert result == expected , f"\n Expected:\n { expected } \n Got:\n { result } "
887898
888899 def test_skill_data_transformation (self ):
889900 category_data = {
Original file line number Diff line number Diff line change @@ -774,6 +774,8 @@ def clean_html(content):
774774 (indicating right-to-left direction), this method will ensure that the 'dir' attribute is preserved
775775 or added to maintain consistency with the original content.
776776 """
777+ if not content :
778+ return ''
777779 LIST_TAGS = ['ul' , 'ol' ]
778780 is_list_with_dir_attr_present = False
779781
@@ -790,12 +792,13 @@ def clean_html(content):
790792 cleaned = cleaned .replace ('<p><b></b></p>' , '' )
791793 html_converter = HTML2TextWithLangSpans (bodywidth = None )
792794 html_converter .wrap_links = False
793- cleaned = html_converter .handle (cleaned ).strip ()
794- cleaned = markdown .markdown (cleaned )
795- for tag in LIST_TAGS :
796- cleaned = cleaned .replace (f'<{ tag } >' , f'<{ tag } dir="rtl">' ) if is_list_with_dir_attr_present else cleaned
797-
798- return cleaned
795+ markdown_text = html_converter .handle (cleaned ).strip ()
796+ cleaned = markdown .markdown (markdown_text )
797+ cleaned = re .sub (r'([^\s>])\s*(<a\b)' , r'\1 \2' , cleaned )
798+ if is_list_with_dir_attr_present :
799+ for tag in LIST_TAGS :
800+ cleaned = cleaned .replace (f'<{ tag } >' , f'<{ tag } dir="rtl">' )
801+ return cleaned .strip ()
799802
800803
801804def get_file_from_drive_link (image_url ):
You can’t perform that action at this time.
0 commit comments