0.0.26

lizmat · Aug 16, 2024 · a953daf · a953daf
1 parent 755d29d
commit a953daf
Show file tree

Hide file tree

Showing 7 changed files with 184 additions and 2 deletions.
diff --git a/Changes b/Changes
@@ -2,6 +2,9 @@ Revision history for String-Utils
 
 {{$NEXT}}
 
+0.0.26  2024-08-16T13:53:24+02:00
+    - Add support for "regexify"
+
 0.0.25  2024-08-05T16:32:56+02:00
     - Add support for "paragraphs"
     - Separate documentation into a separate file

diff --git a/META6.json b/META6.json
@@ -30,5 +30,5 @@
   ],
   "test-depends": [
   ],
-  "version": "0.0.25"
+  "version": "0.0.26"
 }
diff --git a/README.md b/README.md
@@ -69,6 +69,10 @@ say all-same("");                      # Nil
 .say for paragraphs("a\n\nb");         # 0 => a␤2 => b␤
 .say for paragraphs($path.IO.lines);   # …
 
+my $string = "foo";
+my $regex  = regexify($string, :ignorecase);
+say "FOOBAR" ~~ $regex;                # ｢FOO｣
+
 use String::Utils <before after>;  # only import "before" and "after"
 ```
 
@@ -356,6 +360,61 @@ Lazily produces a `Seq` of `Pairs` with paragraphs from a `Seq` or string in whi
 
 The optional second argument can be used to indicate the ordinal number of the first line in the string.
 
+regexify
+--------
+
+```raku
+my $string = "foo";
+my $regex  = regexify($string, :ignorecase);
+say "FOOBAR" ~~ $regex;  # ｢FOO｣
+```
+
+Produce a `Regex` object from a given string and modifiers. Note that this is similar to the `/ <$string> /` syntax. But opposed to that syntax, which interpolates the contents of the string **each time** the regex is executed, the `Regex` object returned by `regexify` is immutable.
+
+The following modifiers are supported:
+
+### i / ignorecase
+
+```raku
+# accept haystack if "bar" is found, regardless of case
+my $regex = regexify("bar", :i);  # or :ignorecase
+```
+
+Allow characters to match even if they are of mixed case.
+
+### smartcase
+
+```raku
+# accept haystack if "bar" is found, regardless of case
+my &anycase = regexify("bar", :smartcase);
+
+# accept haystack if "Bar" is found
+my &exactcase = regexify("Bar", :smartcase);
+```
+
+If the needle is a string and does **not** contain any uppercase characters, then `ignorecase` semantics will be assumed.
+
+### m / ignoremark
+
+```raku
+# accept haystack if "bar" is found, regardless of any accents
+my &anycase = regexify("bar", :m);  # or :ignoremark
+```
+
+Allow characters to match even if they have accents (or not).
+
+### smartmark
+
+```raku
+# accept haystack if "bar" is found, regardless of any accents
+my &anymark = regexify("bar", :smartmark);
+
+# accept haystack if "bår" is found
+my &exactmark = regexify("bår", :smartmark);
+```
+
+If the needle is a string and does **not** contain any characters with accents, then `ignoremark` semantics will be assumed.
+
 AUTHOR
 ======
 

diff --git a/doc/String-Utils.rakudoc b/doc/String-Utils.rakudoc
@@ -68,6 +68,10 @@ say all-same("");                      # Nil
 .say for paragraphs("a\n\nb");         # 0 => a␤2 => b␤
 .say for paragraphs($path.IO.lines);   # …
 
+my $string = "foo";
+my $regex  = regexify($string, :ignorecase);
+say "FOOBAR" ~~ $regex;                # ｢FOO｣
+
 use String::Utils <before after>;  # only import "before" and "after"
 
 =end code
@@ -419,6 +423,75 @@ and the value is the paragraph (without trailing newline).
 The optional second argument can be used to indicate the ordinal number
 of the first line in the string.
 
+=head2 regexify
+
+=begin code :lang<raku>
+
+my $string = "foo";
+my $regex  = regexify($string, :ignorecase);
+say "FOOBAR" ~~ $regex;  # ｢FOO｣
+
+=end code
+
+Produce a C<Regex> object from a given string and modifiers.  Note that this
+is similar to the C</ <$string> /> syntax.  But opposed to that syntax,
+which interpolates the contents of the string B<each time> the regex is
+executed, the C<Regex> object returned by C<regexify> is immutable.
+
+The following modifiers are supported:
+
+=head3 i / ignorecase
+
+=begin code :lang<raku>
+
+# accept haystack if "bar" is found, regardless of case
+my $regex = regexify("bar", :i);  # or :ignorecase
+
+=end code
+
+Allow characters to match even if they are of mixed case.
+
+=head3 smartcase
+
+=begin code :lang<raku>
+
+# accept haystack if "bar" is found, regardless of case
+my &anycase = regexify("bar", :smartcase);
+
+# accept haystack if "Bar" is found
+my &exactcase = regexify("Bar", :smartcase);
+
+=end code
+
+If the needle is a string and does B<not> contain any uppercase characters,
+then C<ignorecase> semantics will be assumed.
+
+=head3 m / ignoremark
+
+=begin code :lang<raku>
+
+# accept haystack if "bar" is found, regardless of any accents
+my &anycase = regexify("bar", :m);  # or :ignoremark
+
+=end code
+
+Allow characters to match even if they have accents (or not).
+
+=head3 smartmark
+
+=begin code :lang<raku>
+
+# accept haystack if "bar" is found, regardless of any accents
+my &anymark = regexify("bar", :smartmark);
+
+# accept haystack if "bår" is found
+my &exactmark = regexify("bår", :smartmark);
+
+=end code
+
+If the needle is a string and does B<not> contain any characters with accents,
+then C<ignoremark> semantics will be assumed.
+
 =head1 AUTHOR
 
 Elizabeth Mattijsen <liz@raku.rocks>

diff --git a/lib/String/Utils.rakumod b/lib/String/Utils.rakumod
@@ -462,6 +462,21 @@ my multi sub paragraphs(Cool:D $string, Int:D $initial = 0) {
     paragraphs $string.Str.lines, $initial
 }
 
+my sub regexify(str $spec, *%_) {
+    my str $i = %_<i>
+      || %_<ignorecase>
+      || ((%_<m> || %_<smartcase>) && is-lowercase($spec))
+      ?? ':i '
+      !! '';
+    my str $m = %_<m>
+      || %_<ignoremark>
+      || ((%_<m> || %_<smartmark>) && !has-marks($spec))
+      ?? ':m '
+      !! '';
+
+    "/$i$m$spec/".EVAL  # until there's a better solution
+}
+
 my sub EXPORT(*@names) {
     Map.new: @names
       ?? @names.map: {

diff --git a/t/02-selective-importing.rakutest b/t/02-selective-importing.rakutest
@@ -3,7 +3,7 @@ use Test;
 my constant @subs = <
   after all-same around before between between-included chomp-needle
   consists-of has-marks is-sha1 is-lowercase is-uppercase is-whitespace
-  leading-whitespace leaf letters ngram non-word root stem
+  leading-whitespace leaf letters ngram non-word regexify root stem
   trailing-whitespace
 >;
 

diff --git a/t/03-regexify.rakutest b/t/03-regexify.rakutest
@@ -0,0 +1,32 @@
+BEGIN %*ENV<RAKU_TEST_DIE_ON_FAIL> = 1;
+
+use Test;
+use String::Utils;
+
+my @tests = 
+  \('\w+'),              "foobar", "foobar",
+  \('foo', :i),          "FOOBAR", "FOO",
+  \('foo', :ignorecase), "FOOBAR", "FOO",
+  \('foo', :smartcase),  "FOOBAR", "FOO",
+  \('FOO', :m),          "FÖOBAR", "FÖO",
+  \('FOO', :ignoremark), "FÖOBAR", "FÖO",
+  \('FOO', :smartmark),  "FÖOBAR", "FÖO",
+;
+
+plan @tests / 3;
+
+for @tests -> $capture, $haystack, $result {
+    subtest "Checking '$capture.raku.substr(2,*-1)'" => {
+        plan 3;
+
+        my $regex := regexify(|$capture);
+        isa-ok $regex, Regex;
+
+        $haystack ~~ $regex;
+        isa-ok $/, Match;
+
+        is $/.Str, $result, "did '$haystack' produce '$result'";
+    }
+}
+
+# vim: expandtab shiftwidth=4