Helper methods to cross-compile Unicode regular expressions.
Currently, all the code in this repo has been pulled out of the rxpattern rewrite.
The file/library regenerate.js is created by @mathiasbynens. Core functionality was ported to Haxe, see
utf16/RangeUtil.hx.
lix install gh:skial/regex
- seri - Unicode blocks, scripts, classes & range information.
 - unifill - Haxe library for Unicode UTF{8/16/32} support
 
- Tested ✅
 - Untested ➖
 
| Php | Python | Java | JVM | C# | Js/Node | Interp | Neko | HashLink | Lua | CPP | Flash | 
|---|---|---|---|---|---|---|---|---|---|---|---|
| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ➖ | ➖ | ➖ | 
package ;
import be.Regex;
class Main {
    public static function main() {
        /**
            Prints either a regular expression category `\p{Ll}` or
            the range of codepoints.
        **/
        var Ll = Regex.category('Ll');
        /**
            Why `²-¹⁰-⁹`?
            `²-¹` are `\u00B2-\u00B9` and `⁰-⁹` are `\u2080-\u2089`, so if you 
            used `⁰-⁹` you would only include `⁰`, `⁴`, `⁵`, `⁶`, `⁷`, `⁸`, `⁹`.
            ---
            See https://codepoints.net/search?gc=No for more info.
        **/
        var term = '(' + Ll + Regex.pattern('[²-¹⁰-⁹]?') + ')';
        /**
            The `u` Unicode flag is required. If you skip it, you can
            get an exception on some targets.
        **/
        var repeat = Regex.pattern('(?:[ +]*)');
        var regexp = new EReg(term + repeat, 'u');
 
        /**
            For regexp engines that support categories:
            - (\p{Ll}[²-¹⁰-⁹]), (?:[ +]*)
            
            For those that don't:
            - _skipping afew so not to show 1900+ codepoints_:
            - [a-z\\xB5\\xDF-\\xF6\\xF8-\\xFF\\u0101\\u0103\\u0105...|\\uD83A[\\uDD22-\\uDD43]
            
        **/
        trace( term, repeat );
        trace( regexp.match("a⁴ + b³+c²") ); // true
        // a⁴ +
        trace( regexp.matched(0) );
    }
}