From 972da1b34f069ed7a588392a9aa6240fa2c40ab8 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 7 Jun 2022 17:00:52 +0100 Subject: [PATCH] Natural Order work. --- .../src/Data/Ordering/Comparator.enso | 27 ++++---- .../src/Data/Ordering/Natural_Order.enso | 12 ++-- .../java/org/enso/base/ObjectComparator.java | 35 ++++++----- test/Table_Tests/src/Common_Table_Spec.enso | 8 +-- test/Table_Tests/src/Table_Spec.enso | 2 +- .../src/Data/Ordering/Comparator_Spec.enso | 63 +++++++++++++++++++ .../src/Data/Ordering/Natural_Order_Spec.enso | 4 ++ test/Tests/src/Data/Statistics_Spec.enso | 1 - test/Tests/src/Main.enso | 6 ++ 9 files changed, 122 insertions(+), 36 deletions(-) create mode 100644 test/Tests/src/Data/Ordering/Comparator_Spec.enso diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso index c72e46e0580c5..b1a1828bc9ee3 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso @@ -1,4 +1,5 @@ -from Standard.Base import Any, Ordering, Nothing, Vector, Case_Insensitive +from Standard.Base import all +import Standard.Base.Data.Ordering.Natural_Order from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering polyglot java import org.enso.base.ObjectComparator @@ -22,16 +23,20 @@ new custom_comparator=Nothing = Create a Java Comparator with the specified Text_Ordering Arguments: - - custom_comparator: - If `Nothing` will get a singleton instance for `.compare_to`. - Otherwise can support a custom fallback comparator. + - text_ordering: + Specifies how to compare Text values within the Comparator. for_text_ordering : Text_Ordering -> ObjectComparator for_text_ordering text_ordering = - comparator = case text_ordering.case_sensitive of - Case_Insensitive locale -> - here.new.withCaseInsensitivity locale.java_locale - _ -> - here.new + locale = case text_ordering.case_sensitive of + Case_Insensitive locale -> locale + _ -> Nothing - ## ToDo: sort_digits_as_numbers - comparator + case Pair text_ordering.sort_digits_as_numbers locale of + Pair True (Locale.Locale _) -> + txt_cmp a b = Natural_Order.compare a b text_ordering.case_sensitive . to_sign + here.new.withCustomTextComparator txt_cmp + Pair True Nothing -> + txt_cmp a b = Natural_Order.compare a b . to_sign + here.new.withCustomTextComparator txt_cmp + Pair False (Locale.Locale _) -> here.new.withCaseInsensitivity locale.java_locale + Pair False Nothing -> here.new diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso index a365a18a74054..8d7b5ffff7fae 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso @@ -18,8 +18,12 @@ polyglot java import com.ibm.icu.text.BreakIterator Sort a vector of texts according to the natural dictionary ordering. ["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"] -compare : Text -> Text -> Ordering -compare text1 text2 = +compare : Text -> Text -> (True|Case_Insensitive) Ordering +compare text1 text2 case_sensitive=True = + compare_text a b = case case_sensitive of + Case_Insensitive locale -> a.compare_to_ignore_case b locale + _ -> a.compare_to b + iter1 = BreakIterator.getCharacterInstance iter1.setText text1 @@ -79,7 +83,7 @@ compare text1 text2 = if (tmp.first.not && tmp.second) then Ordering.Greater else case tmp.first.not of True -> - text_comparison = substring1.compare_to substring2 + text_comparison = compare_text substring1 substring2 if text_comparison != Ordering.Equal then text_comparison else @Tail_Call order next1 iter1.next next2 iter2.next False -> @@ -93,7 +97,7 @@ compare text1 text2 = value_comparison = value1.compare_to value2 if value_comparison != Ordering.Equal then value_comparison else - text_comparison = num_text1.compare_to num_text2 + text_comparison = compare_text num_text1 num_text2 if text_comparison != Ordering.Equal then text_comparison else @Tail_Call order (parsed1.at 2) (parsed1.at 3) (parsed2.at 2) (parsed2.at 3) diff --git a/std-bits/base/src/main/java/org/enso/base/ObjectComparator.java b/std-bits/base/src/main/java/org/enso/base/ObjectComparator.java index 17afd07864263..f7c98654e6776 100644 --- a/std-bits/base/src/main/java/org/enso/base/ObjectComparator.java +++ b/std-bits/base/src/main/java/org/enso/base/ObjectComparator.java @@ -19,14 +19,15 @@ public class ObjectComparator implements Comparator { */ public static ObjectComparator getInstance(BiFunction fallbackComparator) { if (INSTANCE == null) { - INSTANCE = new ObjectComparator((l, r) -> fallbackComparator.apply(l, r).intValue()); + INSTANCE = new ObjectComparator(fallbackComparator); } return INSTANCE; } - private final BiFunction fallbackComparator; - private final Locale caseInsensitiveLocale; + private final BiFunction fallbackComparator; + private final BiFunction textComparator; + public ObjectComparator() { this( @@ -35,14 +36,13 @@ public ObjectComparator() { }); } - public ObjectComparator(BiFunction fallbackComparator) { - this.fallbackComparator = fallbackComparator; - this.caseInsensitiveLocale = null; + public ObjectComparator(BiFunction fallbackComparator) { + this(fallbackComparator, (a, b) -> Long.valueOf(Text_Utils.compare_normalized(a, b))); } - private ObjectComparator(BiFunction fallbackComparator, Locale caseInsensitiveLocale) { + private ObjectComparator(BiFunction fallbackComparator, BiFunction textComparator) { this.fallbackComparator = fallbackComparator; - this.caseInsensitiveLocale = caseInsensitiveLocale; + this.textComparator = textComparator; } /** @@ -51,7 +51,16 @@ private ObjectComparator(BiFunction fallbackComparator, * @return Comparator object. */ public ObjectComparator withCaseInsensitivity(Locale locale) { - return new ObjectComparator(this.fallbackComparator, locale); + return new ObjectComparator(this.fallbackComparator, (a, b) -> Long.valueOf(Text_Utils.compare_normalized_ignoring_case(a, b, locale))); + } + + /** + * Create a copy of the ObjectComparator with case-insensitive text comparisons. + * @param textComparator custom comparator for Text. + * @return Comparator object. + */ + public ObjectComparator withCustomTextComparator(BiFunction textComparator) { + return new ObjectComparator(this.fallbackComparator, textComparator); } @Override @@ -109,11 +118,7 @@ public int compare(Object thisValue, Object thatValue) throws ClassCastException // Text if (thisValue instanceof String thisString && thatValue instanceof String thatString) { - if (caseInsensitiveLocale != null) { - return Text_Utils.compare_normalized_ignoring_case(thisString, thatString, caseInsensitiveLocale); - } else { - return Text_Utils.compare_normalized(thisString, thatString); - } + return textComparator.apply(thisString, thatString).intValue(); } // DateTimes @@ -142,6 +147,6 @@ public int compare(Object thisValue, Object thatValue) throws ClassCastException } // Fallback to Enso - return fallbackComparator.apply(thisValue, thatValue); + return fallbackComparator.apply(thisValue, thatValue).intValue(); } } diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso index 48da1f558e7b7..b1231ea5f1a6a 100644 --- a/test/Table_Tests/src/Common_Table_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Spec.enso @@ -737,16 +737,16 @@ spec prefix table_builder test_selection pending=Nothing = Test.specify "should support natural and case insensitive ordering at the same time" pending=(if (test_selection.natural_ordering.not || test_selection.case_insensitive_ordering.not) then "Natural ordering or case sensitive ordering is not supported.") <| t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive) - t1.at "psi" . to_vector . should_equal ["c01", "C2", "c10", Nothing] + t1.at "psi" . to_vector . should_equal [Nothing, "c01", "C2", "c10"] t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True) - t2.at "psi" . to_vector . should_equal ["C2", "c01", "c10", Nothing] + t2.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"] t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering case_sensitive=Case_Insensitive) - t3.at "psi" . to_vector . should_equal ["c01", "c10", "C2", Nothing] + t3.at "psi" . to_vector . should_equal [Nothing, "c01", "c10", "C2"] t4 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) - t4.at "psi" . to_vector . should_equal ["C2", "c01", "c10", Nothing] + t4.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"] Test.specify "text ordering settings should not affect numeric columns" <| ordering = Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 0b3d1a0fbbc3c..82fbf7b9e97f5 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -635,7 +635,7 @@ spec = t_3 = Table.new [c_3_1, c_3_2, c_3_3] t_3.default_visualization.should_equal Visualization.Id.table - selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=True + selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True Common_Table_Spec.spec "[In-Memory] " table_builder=Table.new test_selection=selection Test.group "Use First Row As Names" <| diff --git a/test/Tests/src/Data/Ordering/Comparator_Spec.enso b/test/Tests/src/Data/Ordering/Comparator_Spec.enso new file mode 100644 index 0000000000000..9fda1f0ec1467 --- /dev/null +++ b/test/Tests/src/Data/Ordering/Comparator_Spec.enso @@ -0,0 +1,63 @@ +from Standard.Base import all + +import Standard.Base.Data.Ordering.Comparator +from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering + +import Standard.Test + +polyglot java import java.lang.ClassCastException + +# === Test Resources === + +type Ord number + +Ord.compare_to : Ord -> Ordering +Ord.compare_to that = that.number.compare_to this.number + +type No_Ord number + +# Tests + +spec = Test.group "Object Comparator" <| + handle_classcast = Panic.catch ClassCastException handler=(Error.throw Vector.Incomparable_Values_Error) + default_comparator a b = handle_classcast <| Comparator.new.compare a b + case_insensitive a b = handle_classcast <| Comparator.for_text_ordering (Text_Ordering False Case_Insensitive) . compare a b + + Test.specify "can compare numbers" <| + ((default_comparator 1 2) < 0) . should_equal True + ((default_comparator 1 1.2) < 0) . should_equal True + ((default_comparator 1 1) == 0) . should_equal True + + Test.specify "can compare booleans" <| + ((default_comparator True False) > 0) . should_equal True + ((default_comparator True True) == 0) . should_equal True + ((default_comparator False False) == 0) . should_equal True + + Test.specify "can compare Nothing and it ends up as lowest value" <| + ((default_comparator 1 Nothing) > 0) . should_equal True + ((default_comparator Nothing 1.235) < 0) . should_equal True + ((default_comparator True Nothing) > 0) . should_equal True + ((default_comparator Nothing False) < 0) . should_equal True + ((default_comparator "A" Nothing) > 0) . should_equal True + ((default_comparator Nothing "ZSA") < 0) . should_equal True + ((default_comparator Nothing Nothing) == 0) . should_equal True + + Test.specify "can compare Text with Enso standard defaults" <| + ((default_comparator "A" "a") < 0) . should_equal True + ((default_comparator "ABBA" "ABBA") == 0) . should_equal True + ((default_comparator '\u00E9' '\u0065\u{301}') == 0) . should_equal True + + Test.specify "can compare Text with case-insensitive comparisons" <| + ((case_insensitive "A" "a") == 0) . should_equal True + ((case_insensitive "ABBA" "abba") == 0) . should_equal True + ((case_insensitive '\u00E9' '\u0065\u{301}') == 0) . should_equal True + + Test.specify "can compare custom types" <| + ((default_comparator (Ord 1) (Ord 0)) < 0) . should_equal True + ((default_comparator (Ord 1) (Ord 1)) == 0) . should_equal True + + Test.specify "should fail gracefully for incomparable items" <| + (default_comparator 1 True).should_fail_with Vector.Incomparable_Values_Error + (default_comparator (No_Ord 1) (No_Ord 2)).should_fail_with Vector.Incomparable_Values_Error + +main = Test.Suite.run_main here.spec diff --git a/test/Tests/src/Data/Ordering/Natural_Order_Spec.enso b/test/Tests/src/Data/Ordering/Natural_Order_Spec.enso index c16e9d3e6651c..4ca85de5815d9 100644 --- a/test/Tests/src/Data/Ordering/Natural_Order_Spec.enso +++ b/test/Tests/src/Data/Ordering/Natural_Order_Spec.enso @@ -5,9 +5,13 @@ import Standard.Base.Data.Ordering.Natural_Order import Standard.Test spec = Test.group "Natural Order" <| + case_insensitive_compare a b = Natural_Order.compare a b Case_Insensitive + Test.specify "should behave as shown in examples" <| Natural_Order.compare "a2" "a100" . should_equal Ordering.Less ["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"] + ["A2", "a1", "A100", "A001", "a0001"].sort by=Natural_Order.compare . should_equal ["A001", "A2", "A100", "a0001", "a1"] + ["A2", "a1", "A100", "A001", "a0001"].sort by=case_insensitive_compare . should_equal ["a0001", "A001", "a1", "A2", "A100"] Test.specify "should correctly compare values" <| Natural_Order.compare "a1" "a2" . should_equal Ordering.Less diff --git a/test/Tests/src/Data/Statistics_Spec.enso b/test/Tests/src/Data/Statistics_Spec.enso index bf08ff7c740b1..38ba14d07e22c 100644 --- a/test/Tests/src/Data/Statistics_Spec.enso +++ b/test/Tests/src/Data/Statistics_Spec.enso @@ -247,5 +247,4 @@ spec = Statistics.covariance_matrix series . should_fail_with Illegal_Argument_Error Statistics.pearson_correlation series . should_fail_with Illegal_Argument_Error - main = Test.Suite.run_main here.spec diff --git a/test/Tests/src/Main.enso b/test/Tests/src/Main.enso index 86785f31b011d..1fc32761354c7 100644 --- a/test/Tests/src/Main.enso +++ b/test/Tests/src/Main.enso @@ -30,6 +30,9 @@ import project.Data.Noise.Generator_Spec as Noise_Generator_Spec import project.Data.Noise_Spec import project.Data.Numbers_Spec import project.Data.Ordering_Spec +import project.Data.Ordering.Comparator_Spec +import project.Data.Ordering.Natural_Order_Spec +import project.Data.Ordering.Vector_Lexicographic_Order_Spec import project.Data.Range_Spec import project.Data.Ref_Spec import project.Data.Text_Spec @@ -88,6 +91,9 @@ main = Test.Suite.run_main <| Noise_Spec.spec Numbers_Spec.spec Ordering_Spec.spec + Comparator_Spec.spec + Natural_Order_Spec.spec + Vector_Lexicographic_Order_Spec.spec Process_Spec.spec Python_Interop_Spec.spec R_Interop_Spec.spec