Helper methods to cross-compile Unicode regular expressions.
Currently, all the code in this repo has been pulled out of the rxpattern rewrite.
The file/library regenerate.js is created by @mathiasbynens. Core functionality was ported to Haxe, see
utf16/RangeUtil.hx
.
lix install gh:skial/regex
- seri - Unicode blocks, scripts, classes & range information.
- unifill - Haxe library for Unicode UTF{8/16/32} support
- Tested ✅
- Untested ➖
Php | Python | Java | JVM | C# | Js/Node | Interp | Neko | HashLink | Lua | CPP | Flash |
---|---|---|---|---|---|---|---|---|---|---|---|
✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ➖ | ➖ | ➖ |
package ;
import be.Regex;
class Main {
public static function main() {
/**
Prints either a regular expression category `\p{Ll}` or
the range of codepoints.
**/
var Ll = Regex.category('Ll');
/**
Why `²-¹⁰-⁹`?
`²-¹` are `\u00B2-\u00B9` and `⁰-⁹` are `\u2080-\u2089`, so if you
used `⁰-⁹` you would only include `⁰`, `⁴`, `⁵`, `⁶`, `⁷`, `⁸`, `⁹`.
---
See https://codepoints.net/search?gc=No for more info.
**/
var term = '(' + Ll + Regex.pattern('[²-¹⁰-⁹]?') + ')';
/**
The `u` Unicode flag is required. If you skip it, you can
get an exception on some targets.
**/
var repeat = Regex.pattern('(?:[ +]*)');
var regexp = new EReg(term + repeat, 'u');
/**
For regexp engines that support categories:
- (\p{Ll}[²-¹⁰-⁹]), (?:[ +]*)
For those that don't:
- _skipping afew so not to show 1900+ codepoints_:
- [a-z\\xB5\\xDF-\\xF6\\xF8-\\xFF\\u0101\\u0103\\u0105...|\\uD83A[\\uDD22-\\uDD43]
**/
trace( term, repeat );
trace( regexp.match("a⁴ + b³+c²") ); // true
// a⁴ +
trace( regexp.matched(0) );
}
}