-
Notifications
You must be signed in to change notification settings - Fork 0
PHPORM-53 Fix and test like
and regex
operators
#17
Changes from all commits
c18e814
491e96d
4bb2b24
4d9d400
08330fd
6ddd29d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,8 @@ | |
*/ | ||
class Builder extends BaseBuilder | ||
{ | ||
private const REGEX_DELIMITERS = ['/', '#', '~']; | ||
|
||
/** | ||
* The database collection. | ||
* | ||
|
@@ -91,6 +93,7 @@ class Builder extends BaseBuilder | |
'all', | ||
'size', | ||
'regex', | ||
'not regex', | ||
'text', | ||
'slice', | ||
'elemmatch', | ||
|
@@ -113,13 +116,22 @@ class Builder extends BaseBuilder | |
* @var array | ||
*/ | ||
protected $conversion = [ | ||
'=' => '=', | ||
'!=' => '$ne', | ||
'<>' => '$ne', | ||
'<' => '$lt', | ||
'<=' => '$lte', | ||
'>' => '$gt', | ||
'>=' => '$gte', | ||
'!=' => 'ne', | ||
'<>' => 'ne', | ||
'<' => 'lt', | ||
'<=' => 'lte', | ||
'>' => 'gt', | ||
'>=' => 'gte', | ||
'regexp' => 'regex', | ||
'not regexp' => 'not regex', | ||
'ilike' => 'like', | ||
'elemmatch' => 'elemMatch', | ||
'geointersects' => 'geoIntersects', | ||
'geowithin' => 'geoWithin', | ||
'nearsphere' => 'nearSphere', | ||
'maxdistance' => 'maxDistance', | ||
'centersphere' => 'centerSphere', | ||
'uniquedocs' => 'uniqueDocs', | ||
]; | ||
|
||
/** | ||
|
@@ -932,20 +944,9 @@ protected function compileWheres(): array | |
if (isset($where['operator'])) { | ||
$where['operator'] = strtolower($where['operator']); | ||
|
||
// Operator conversions | ||
$convert = [ | ||
'regexp' => 'regex', | ||
'elemmatch' => 'elemMatch', | ||
'geointersects' => 'geoIntersects', | ||
'geowithin' => 'geoWithin', | ||
'nearsphere' => 'nearSphere', | ||
'maxdistance' => 'maxDistance', | ||
'centersphere' => 'centerSphere', | ||
'uniquedocs' => 'uniqueDocs', | ||
]; | ||
|
||
if (array_key_exists($where['operator'], $convert)) { | ||
$where['operator'] = $convert[$where['operator']]; | ||
// Convert aliased operators | ||
if (isset($this->conversion[$where['operator']])) { | ||
$where['operator'] = $this->conversion[$where['operator']]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We had 2 lists of converted operators. I merged them into a single in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to confirm, does this intentionally use I'm OK with the error being reported later, but wanted to ensure that actually happens without us having to explicitly handle it. I think the explicit handling was only for actual builder methods ("integerRaw" was it?), which I don't think applies here since we're using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Operators like |
||
} | ||
} | ||
|
||
|
@@ -1036,45 +1037,55 @@ protected function compileWhereBasic(array $where): array | |
|
||
// Replace like or not like with a Regex instance. | ||
if (in_array($operator, ['like', 'not like'])) { | ||
if ($operator === 'not like') { | ||
$operator = 'not'; | ||
} else { | ||
$operator = '='; | ||
} | ||
|
||
// Convert to regular expression. | ||
$regex = preg_replace('#(^|[^\\\])%#', '$1.*', preg_quote($value)); | ||
|
||
// Convert like to regular expression. | ||
if (! Str::startsWith($value, '%')) { | ||
$regex = '^'.$regex; | ||
} | ||
if (! Str::endsWith($value, '%')) { | ||
$regex .= '$'; | ||
} | ||
$regex = preg_replace( | ||
[ | ||
// Unescaped % are converted to .* | ||
// Group consecutive % | ||
'#(^|[^\\\])%+#', | ||
alcaeus marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Unescaped _ are converted to . | ||
// Use positive lookahead to replace consecutive _ | ||
'#(?<=^|[^\\\\])_#', | ||
// Escaped \% or \_ are unescaped | ||
'#\\\\\\\(%|_)#', | ||
], | ||
['$1.*', '$1.', '$1'], | ||
// Escape any regex reserved characters, so they are matched | ||
// All backslashes are converted to \\, which are needed in matching regexes. | ||
preg_quote($value), | ||
); | ||
jmikola marked this conversation as resolved.
Show resolved
Hide resolved
|
||
$value = new Regex('^'.$regex.'$', 'i'); | ||
|
||
// For inverse like operations, we can just use the $not operator with the Regex | ||
$operator = $operator === 'like' ? '=' : 'not'; | ||
} | ||
|
||
$value = new Regex($regex, 'i'); | ||
} // Manipulate regexp operations. | ||
elseif (in_array($operator, ['regexp', 'not regexp', 'regex', 'not regex'])) { | ||
// Manipulate regex operations. | ||
elseif (in_array($operator, ['regex', 'not regex'])) { | ||
// Automatically convert regular expression strings to Regex objects. | ||
if (! $value instanceof Regex) { | ||
$e = explode('/', $value); | ||
$flag = end($e); | ||
$regstr = substr($value, 1, -(strlen($flag) + 1)); | ||
$value = new Regex($regstr, $flag); | ||
if (is_string($value)) { | ||
// Detect the delimiter and validate the preg pattern | ||
$delimiter = substr($value, 0, 1); | ||
if (! in_array($delimiter, self::REGEX_DELIMITERS)) { | ||
throw new \LogicException(sprintf('Missing expected starting delimiter in regular expression "%s", supported delimiters are: %s', $value, implode(' ', self::REGEX_DELIMITERS))); | ||
} | ||
$e = explode($delimiter, $value); | ||
// We don't try to detect if the last delimiter is escaped. This would be an invalid regex. | ||
if (count($e) < 3) { | ||
throw new \LogicException(sprintf('Missing expected ending delimiter "%s" in regular expression "%s"', $delimiter, $value)); | ||
} | ||
// Flags are after the last delimiter | ||
$flags = end($e); | ||
// Extract the regex string between the delimiters | ||
$regstr = substr($value, 1, -1 - strlen($flags)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just want to confirm that this handles cases where the pattern includes escaped forward slashes. If we don't currently test that perhaps we should. I think it does, you only assert that I think there may be some edge cases where parsing might accept invalid input. Given If it helps, here are some local tests using a BSON regex object where the pattern string contains slash characters. <?php
require __DIR__ . '/vendor/autoload.php';
$client = new MongoDB\Client('mongodb://localhost:27070/?replicaSet=rs0');
$collection = $client->test->foo;
$collection->drop();
$collection->insertOne(['x' => 'foo']);
$collection->insertOne(['x' => 'foo/']);
foreach (['foo', 'foo/', 'foo\/', 'foo\\/', 'foo\\\/'] as $pattern) {
$regex = new MongoDB\BSON\Regex($pattern);
$cursor = $collection->find(['x' => $regex]);
$cursor->setTypeMap(['root' => 'bson']);
printf("Querying with: %s\n", json_encode($regex));
foreach ($cursor as $document) {
echo $document->toRelaxedExtendedJSON(), "\n";
}
echo "\n";
} Output:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @GromNaN: I think the only possible action item from the above is:
I'll defer to you. If you don't think any such test is necessary, feel free to resolve this thread after reading. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe any more regex parsing than absolutely necessary should be avoided. Looking at the example with Anything more than that would require us to specifically explode on unescaped delimiters. This means that simply matching for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Would it make sense to use consistent parsing here? So we pick up the first delimiter character and then find the next unescaped occurrence? It's unfortunate there's no If we rely on that, we're more likely to produce an exception by yielding an invalid flag string, instead of a pattern that won't match anything and could be more confusing to debug. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quick follow-up as I mentioned in Slack while thinking about this some more:
Given that, I now understand @alcaeus' previous comment:
I think we're OK to leave this as-is, but I'd propose adding a comment somewhere around this code to note the edge cases we're choosing not to handle. |
||
$value = new Regex($regstr, $flags); | ||
jmikola marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// For inverse regexp operations, we can just use the $not operator | ||
// and pass it a Regex instence. | ||
if (Str::startsWith($operator, 'not')) { | ||
$operator = 'not'; | ||
} | ||
// For inverse regex operations, we can just use the $not operator with the Regex | ||
$operator = $operator === 'regex' ? '=' : 'not'; | ||
} | ||
|
||
if (! isset($operator) || $operator == '=') { | ||
$query = [$column => $value]; | ||
} elseif (array_key_exists($operator, $this->conversion)) { | ||
$query = [$column => [$this->conversion[$operator] => $value]]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The conversion is now done earlier in |
||
} else { | ||
$query = [$column => ['$'.$operator => $value]]; | ||
} | ||
|
@@ -1133,7 +1144,7 @@ protected function compileWhereNull(array $where): array | |
*/ | ||
protected function compileWhereNotNull(array $where): array | ||
{ | ||
$where['operator'] = '!='; | ||
$where['operator'] = 'ne'; | ||
$where['value'] = null; | ||
|
||
return $this->compileWhereBasic($where); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
regexp
andilike
operators are provided by Laravel and documented in the readme of this project.Keeping compatibility with Laravel Query Builder and with previous versions of this package is cheap. I'd like to step back and simply alias the operators.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I decided to keep the Laravel operators as aliases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm OK with this. Being pedantic about what operators we support is likely just going to cause an inconvenience to users, especially if there's no harm in these aliases. The biggest mistake I can see is the fact that
like
is case-insensitive, but that's a default we cannot easily/safely change without harming users.regex
is probably the best migration path for folks usinglike
today, and we can always revisit this down the line and carefully consider how we might deprecatelike
with its case-insensitivity (if ever, but probably not).