@@ -679,6 +679,9 @@ def test_html_formatter_configuration(df, clean_formatter_state):
679679 max_width = 500 ,
680680 max_height = 200 ,
681681 enable_cell_expansion = False ,
682+ max_memory_bytes = 1024 * 1024 , # 1 MB
683+ min_rows_display = 15 ,
684+ repr_rows = 5 ,
682685 )
683686
684687 html_output = df ._repr_html_ ()
@@ -690,6 +693,71 @@ def test_html_formatter_configuration(df, clean_formatter_state):
690693 assert "expandable-container" not in html_output
691694
692695
696+ def test_html_formatter_row_display_settings (clean_formatter_state ):
697+ """Test that min_rows_display and repr_rows affect the output."""
698+ ctx = SessionContext ()
699+
700+ # Create a dataframe with 30 rows
701+ data = list (range (30 ))
702+ batch = pa .RecordBatch .from_arrays (
703+ [pa .array (data )],
704+ names = ["value" ],
705+ )
706+ df = ctx .create_dataframe ([[batch ]])
707+
708+ # Test with default settings (should use repr_rows)
709+ configure_formatter (repr_rows = 7 , min_rows_display = 20 )
710+ html_default = df ._repr_html_ ()
711+
712+ # Verify we only show repr_rows (7) rows in the output
713+ # by counting the number of value cells
714+ value_cells = re .findall (r"<td[^>]*>\s*\d+\s*</td>" , html_default )
715+ assert len (value_cells ) == 7
716+ assert "... with 23 more rows" in html_default
717+
718+ # Configure to show all rows since it's below min_rows_display
719+ reset_formatter ()
720+ configure_formatter (repr_rows = 5 , min_rows_display = 50 )
721+ html_all = df ._repr_html_ ()
722+
723+ # Verify we show all rows
724+ value_cells = re .findall (r"<td[^>]*>\s*\d+\s*</td>" , html_all )
725+ assert len (value_cells ) == 30
726+ assert "... with" not in html_all
727+
728+
729+ def test_html_formatter_memory_limit (clean_formatter_state ):
730+ """Test that max_memory_bytes limits the HTML rendering."""
731+ ctx = SessionContext ()
732+
733+ # Create a large string that will consume substantial memory when rendered
734+ large_string = "x" * 100000
735+
736+ # Create a dataframe with 10 rows of large strings
737+ batch = pa .RecordBatch .from_arrays (
738+ [pa .array ([large_string ] * 10 )],
739+ names = ["large_value" ],
740+ )
741+ df = ctx .create_dataframe ([[batch ]])
742+
743+ # Set very small memory limit
744+ configure_formatter (max_memory_bytes = 1000 ) # 1KB
745+
746+ html_limited = df ._repr_html_ ()
747+
748+ # Verify that memory limit warning is included in the output
749+ assert "Memory usage limit reached" in html_limited
750+
751+ # Now with larger limit, should display normally
752+ reset_formatter ()
753+ configure_formatter (max_memory_bytes = 10 * 1024 * 1024 ) # 10MB
754+
755+ html_full = df ._repr_html_ ()
756+
757+ # Verify no memory limit warning
758+ assert "Memory usage limit reached" not in html_full
759+
760+
693761def test_html_formatter_custom_style_provider (df , clean_formatter_state ):
694762 """Test using custom style providers with the HTML formatter."""
695763
@@ -771,74 +839,6 @@ def custom_cell_builder(value, row, col, table_id):
771839 r'<td style="background-color: #d3e9f0"[^>]*>(\d+)-low</td>' , html_output
772840 )
773841 mid_cells = re .findall (
774- r'<td style="border: 1px solid #ddd"[^>]*>(\d+)-mid</td>' , html_output
775- )
776- high_cells = re .findall (
777- r'<td style="background-color: #d9f0d3"[^>]*>(\d+)-high</td>' , html_output
778- )
779-
780- # Sort the extracted values for consistent comparison
781- low_cells = sorted (map (int , low_cells ))
782- mid_cells = sorted (map (int , mid_cells ))
783- high_cells = sorted (map (int , high_cells ))
784-
785- # Verify specific values have the correct styling applied
786- assert low_cells == [1 , 2 ] # Values < 3
787- assert mid_cells == [3 , 4 , 5 , 5 ] # Values 3-5
788- assert high_cells == [6 , 8 , 8 ] # Values > 5
789-
790- # Verify the exact content with styling appears in the output
791- assert (
792- '<td style="background-color: #d3e9f0" data-test="low">1-low</td>'
793- in html_output
794- )
795- assert (
796- '<td style="background-color: #d3e9f0" data-test="low">2-low</td>'
797- in html_output
798- )
799- assert (
800- '<td style="border: 1px solid #ddd" data-test="mid">3-mid</td>' in html_output
801- )
802- assert (
803- '<td style="border: 1px solid #ddd" data-test="mid">4-mid</td>' in html_output
804- )
805- assert (
806- '<td style="background-color: #d9f0d3" data-test="high">6-high</td>'
807- in html_output
808- )
809- assert (
810- '<td style="background-color: #d9f0d3" data-test="high">8-high</td>'
811- in html_output
812- )
813-
814- # Count occurrences to ensure all cells are properly styled
815- assert html_output .count ("-low</td>" ) == 2 # Two low values (1, 2)
816- assert html_output .count ("-mid</td>" ) == 4 # Four mid values (3, 4, 5, 5)
817- assert html_output .count ("-high</td>" ) == 3 # Three high values (6, 8, 8)
818-
819- # Create a custom cell builder that changes background color based on value
820- def custom_cell_builder (value , row , col , table_id ):
821- # Handle numeric values regardless of their exact type
822- try :
823- num_value = int (value )
824- if num_value > 5 : # Values > 5 get green background
825- return f'<td style="background-color: #d9f0d3">{ value } </td>'
826- if num_value < 3 : # Values < 3 get light blue background
827- return f'<td style="background-color: #d3e9f0">{ value } </td>'
828- except (ValueError , TypeError ):
829- pass
830-
831- # Default styling for other cells
832- return f'<td style="border: 1px solid #ddd">{ value } </td>'
833-
834- # Set our custom cell builder
835- formatter = get_formatter ()
836- formatter .set_custom_cell_builder (custom_cell_builder )
837-
838- html_output = df ._repr_html_ ()
839-
840- # Verify our custom cell styling was applied
841- assert "background-color: #d3e9f0" in html_output # For values 1,2
842842
843843
844844def test_html_formatter_custom_header_builder (df , clean_formatter_state ):
0 commit comments