Skip to content

[RFC] Approximately equals operator #18214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Zend/Optimizer/block_pass.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array
ZEND_FALLTHROUGH;

case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
if (opline->op1_type == IS_CONST &&
opline->op2_type == IS_CONST) {
Expand Down
1 change: 1 addition & 0 deletions Zend/Optimizer/dce.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ static inline bool may_have_side_effects(
case ZEND_SL:
case ZEND_SR:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down
1 change: 1 addition & 0 deletions Zend/Optimizer/pass1.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ void zend_optimizer_pass1(zend_op_array *op_array, zend_optimizer_ctx *ctx)
case ZEND_BW_AND:
case ZEND_BW_XOR:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down
1 change: 1 addition & 0 deletions Zend/Optimizer/sccp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,7 @@ static void sccp_visit_instr(scdf_ctx *scdf, zend_op *opline, zend_ssa_op *ssa_o
case ZEND_CONCAT:
case ZEND_FAST_CONCAT:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down
2 changes: 2 additions & 0 deletions Zend/Optimizer/zend_inference.c
Original file line number Diff line number Diff line change
Expand Up @@ -2620,6 +2620,7 @@ static zend_always_inline zend_result _zend_update_type_info(
case ZEND_IS_IDENTICAL:
case ZEND_IS_NOT_IDENTICAL:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down Expand Up @@ -5116,6 +5117,7 @@ ZEND_API bool zend_may_throw_ex(const zend_op *opline, const zend_ssa_op *ssa_op
case ZEND_BOOL_XOR:
return (t1 & MAY_BE_OBJECT) || (t2 & MAY_BE_OBJECT);
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down
2 changes: 2 additions & 0 deletions Zend/Optimizer/zend_optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,7 @@ static void zend_redo_pass_two(zend_op_array *op_array)
case ZEND_IS_IDENTICAL:
case ZEND_IS_NOT_IDENTICAL:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down Expand Up @@ -1406,6 +1407,7 @@ static void zend_redo_pass_two_ex(zend_op_array *op_array, zend_ssa *ssa)
case ZEND_IS_IDENTICAL:
case ZEND_IS_NOT_IDENTICAL:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down
52 changes: 52 additions & 0 deletions Zend/tests/approx_equals.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
--TEST--
approx equality
--FILE--
<?php

// Number compares
var_dump(2 ~= 1); // false
var_dump(1.4 ~= 1); // true
var_dump(-1.4 ~= -1); // true
var_dump(-1.5 ~= -1.8); // true
var_dump(random_int(1, 1) ~= 1.1); // true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a test for the common floating point example to make sure it is correctly handled:

Suggested change
var_dump(random_int(1, 1) ~= 1.1); // true
var_dump(random_int(1, 1) ~= 1.1); // true
var_dump(0.1 + 0.2 ~= 0.3); // true


// Array compares
var_dump([1, 2, 3] ~= [2, 3, 4]); // true
var_dump([1, 2, 3] ~= [2, 3, 4, 5]); // false

// String / string compares
var_dump("This is a tpyo" ~= "This is a typo"); // true: No more typos
var_dump("something" ~= "different"); // false: clearly completely different
var_dump("Wtf bro" ~= "Wtf sis"); // true: Abolish concept of gender

// String / different type compares
var_dump(-1.5 ~= "-1.a"); // true
var_dump(-1.5 ~= "-1.aaaaaaa"); // false
var_dump(NULL ~= "blablabla"); // false

// AST dump test
assert(function() { return 1 ~= 2; } && false);
Comment on lines +27 to +28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// AST dump test
assert(function() { return 1 ~= 2; } && false);
try {
// AST dump test
assert(function() { return 1 ~= 2; } && false);
} catch (AssertionError $e) {
echo $e->getMessage(), PHP_EOL;
}

The stacktrace is just noise.


?>
--EXPECTF--
bool(false)
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)
bool(false)
bool(true)
bool(false)
bool(true)
bool(true)
bool(false)
bool(false)

Fatal error: Uncaught AssertionError: assert(function () {
return 1 ~= 2;
} && false) in %s:%d
Stack trace:
#0 %s(%d): assert(false, 'assert(function...')
#1 {main}
thrown in %s on line %d
1 change: 1 addition & 0 deletions Zend/zend_ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -2436,6 +2436,7 @@ static ZEND_COLD void zend_ast_export_ex(smart_str *str, zend_ast *ast, int prio
case ZEND_IS_IDENTICAL: BINARY_OP(" === ", 170, 171, 171);
case ZEND_IS_NOT_IDENTICAL: BINARY_OP(" !== ", 170, 171, 171);
case ZEND_IS_EQUAL: BINARY_OP(" == ", 170, 171, 171);
case ZEND_IS_APPROX_EQUAL: BINARY_OP(" ~= ", 170, 171, 171);
case ZEND_IS_NOT_EQUAL: BINARY_OP(" != ", 170, 171, 171);
case ZEND_IS_SMALLER: BINARY_OP(" < ", 180, 181, 181);
case ZEND_IS_SMALLER_OR_EQUAL: BINARY_OP(" <= ", 180, 181, 181);
Expand Down
1 change: 1 addition & 0 deletions Zend/zend_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -2363,6 +2363,7 @@ ZEND_API bool zend_is_smart_branch(const zend_op *opline) /* {{{ */
case ZEND_IS_IDENTICAL:
case ZEND_IS_NOT_IDENTICAL:
case ZEND_IS_EQUAL:
case ZEND_IS_APPROX_EQUAL:
case ZEND_IS_NOT_EQUAL:
case ZEND_IS_SMALLER:
case ZEND_IS_SMALLER_OR_EQUAL:
Expand Down
5 changes: 4 additions & 1 deletion Zend/zend_language_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%left '|'
%left '^'
%left T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP
%nonassoc T_IS_EQUAL T_IS_APPROX_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP
%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL
%left '.'
%left T_SL T_SR
Expand Down Expand Up @@ -200,6 +200,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%token T_BOOLEAN_OR "'||'"
%token T_BOOLEAN_AND "'&&'"
%token T_IS_EQUAL "'=='"
%token T_IS_APPROX_EQUAL "'~='"
%token T_IS_NOT_EQUAL "'!='"
%token T_IS_IDENTICAL "'==='"
%token T_IS_NOT_IDENTICAL "'!=='"
Expand Down Expand Up @@ -1278,6 +1279,8 @@ expr:
{ $$ = zend_ast_create_binary_op(ZEND_IS_NOT_IDENTICAL, $1, $3); }
| expr T_IS_EQUAL expr
{ $$ = zend_ast_create_binary_op(ZEND_IS_EQUAL, $1, $3); }
| expr T_IS_APPROX_EQUAL expr
{ $$ = zend_ast_create_binary_op(ZEND_IS_APPROX_EQUAL, $1, $3); }
| expr T_IS_NOT_EQUAL expr
{ $$ = zend_ast_create_binary_op(ZEND_IS_NOT_EQUAL, $1, $3); }
| expr '<' expr
Expand Down
4 changes: 4 additions & 0 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,10 @@ OPTIONAL_WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_
RETURN_TOKEN(T_IS_EQUAL);
}

<ST_IN_SCRIPTING>"~=" {
RETURN_TOKEN(T_IS_APPROX_EQUAL);
}

<ST_IN_SCRIPTING>"!="|"<>" {
RETURN_TOKEN(T_IS_NOT_EQUAL);
}
Expand Down
2 changes: 2 additions & 0 deletions Zend/zend_opcode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,8 @@ ZEND_API binary_op_type get_binary_op(int opcode)
case ZEND_IS_EQUAL:
case ZEND_CASE:
return (binary_op_type) is_equal_function;
case ZEND_IS_APPROX_EQUAL:
return (binary_op_type) is_approx_equal_function;
case ZEND_IS_NOT_EQUAL:
return (binary_op_type) is_not_equal_function;
case ZEND_IS_SMALLER:
Expand Down
179 changes: 179 additions & 0 deletions Zend/zend_operators.c
Original file line number Diff line number Diff line change
Expand Up @@ -2389,6 +2389,178 @@ ZEND_API int ZEND_FASTCALL zend_compare(zval *op1, zval *op2) /* {{{ */
}
/* }}} */

ZEND_API int ZEND_FASTCALL zend_approx_compare(zval *op1, zval *op2)
{
ZEND_API zend_long reference_levdist(const zend_string *string1, const zend_string *string2, zend_long cost_ins, zend_long cost_rep, zend_long cost_del );

int converted = 0;
zval op1_copy, op2_copy;
zend_string *tmp;
zend_long dist;

// printf("types %d %d\n", Z_TYPE_P(op1), Z_TYPE_P(op2));

while (1) {
switch (TYPE_PAIR(Z_TYPE_P(op1), Z_TYPE_P(op2))) {
case TYPE_PAIR(IS_LONG, IS_LONG):
return Z_LVAL_P(op1)>Z_LVAL_P(op2)?1:(Z_LVAL_P(op1)<Z_LVAL_P(op2)?-1:0);

case TYPE_PAIR(IS_DOUBLE, IS_LONG):
if (!zend_finite(Z_DVAL_P(op1))) return ZEND_UNCOMPARABLE;
return ZEND_THREEWAY_COMPARE(round(Z_DVAL_P(op1)), (double) Z_LVAL_P(op2));

case TYPE_PAIR(IS_LONG, IS_DOUBLE):
if (!zend_finite(Z_DVAL_P(op2))) return ZEND_UNCOMPARABLE;
return ZEND_THREEWAY_COMPARE((double) Z_LVAL_P(op1), round(Z_DVAL_P(op2)));

case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE):
if (!zend_finite(Z_DVAL_P(op1)) || !zend_finite(Z_DVAL_P(op2))) return ZEND_UNCOMPARABLE;
return ZEND_THREEWAY_COMPARE(round(Z_DVAL_P(op1)), round(Z_DVAL_P(op2)));

case TYPE_PAIR(IS_ARRAY, IS_ARRAY):
return ZEND_THREEWAY_COMPARE(zend_hash_num_elements(Z_ARR_P(op1)), zend_hash_num_elements(Z_ARR_P(op2)));

case TYPE_PAIR(IS_NULL, IS_NULL):
case TYPE_PAIR(IS_NULL, IS_FALSE):
case TYPE_PAIR(IS_FALSE, IS_NULL):
case TYPE_PAIR(IS_FALSE, IS_FALSE):
case TYPE_PAIR(IS_TRUE, IS_TRUE):
return 0;

case TYPE_PAIR(IS_NULL, IS_TRUE):
return -1;

case TYPE_PAIR(IS_TRUE, IS_NULL):
return 1;

case TYPE_PAIR(IS_STRING, IS_STRING): {
levenshtein:
if (Z_STR_P(op1) == Z_STR_P(op2)) {
return 0;
}
dist = reference_levdist(Z_STR_P(op1), Z_STR_P(op2), 1, 1, 1);
float ratio = (float) dist / MAX(Z_STRLEN_P(op1), Z_STRLEN_P(op2));
// printf("dist %ld %g\n", dist, ratio);
if (ratio <= .5f) return 0;
if (Z_STRLEN_P(op1) < Z_STRLEN_P(op2)) return -1;
return 1;
}

case TYPE_PAIR(IS_NULL, IS_STRING): {
op1 = &op1_copy;
ZVAL_EMPTY_STRING(op1);
goto levenshtein;
}

case TYPE_PAIR(IS_STRING, IS_NULL): {
op2 = &op2_copy;
ZVAL_EMPTY_STRING(op2);
goto levenshtein;
}

case TYPE_PAIR(IS_LONG, IS_STRING): {
tmp = zend_long_to_str(Z_LVAL_P(op1));
op1_tmp_str_compare:
dist = reference_levdist(tmp, Z_STR_P(op2), 1, 1, 1);
size_t len = ZSTR_LEN(tmp);
float ratio = (float) dist / MAX(len, Z_STRLEN_P(op2));
zend_string_release_ex(tmp, false);
if (ratio <= .5f) return 0;
if (len < Z_STRLEN_P(op2)) return -1;
return 1;
}

case TYPE_PAIR(IS_STRING, IS_LONG): {
tmp = zend_long_to_str(Z_LVAL_P(op2));
op2_tmp_str_compare:
dist = reference_levdist(tmp, Z_STR_P(op1), 1, 1, 1);
size_t len = ZSTR_LEN(tmp);
float ratio = (float) dist / MAX(Z_STRLEN_P(op1), len);
zend_string_release_ex(tmp, false);
if (ratio <= .5f) return 0;
if (Z_STRLEN_P(op1) < len) return -1;
return 1;
}

case TYPE_PAIR(IS_DOUBLE, IS_STRING):
tmp = zend_double_to_str(Z_DVAL_P(op1));
goto op1_tmp_str_compare;

case TYPE_PAIR(IS_STRING, IS_DOUBLE):
tmp = zend_double_to_str(Z_DVAL_P(op2));
goto op2_tmp_str_compare;

case TYPE_PAIR(IS_OBJECT, IS_NULL):
return 1;

case TYPE_PAIR(IS_NULL, IS_OBJECT):
return -1;

default:
if (Z_ISREF_P(op1)) {
op1 = Z_REFVAL_P(op1);
continue;
} else if (Z_ISREF_P(op2)) {
op2 = Z_REFVAL_P(op2);
continue;
}

if (Z_TYPE_P(op1) == IS_OBJECT
|| Z_TYPE_P(op2) == IS_OBJECT) {
zval *object, *other;
if (Z_TYPE_P(op1) == IS_OBJECT) {
object = op1;
other = op2;
} else {
object = op2;
other = op1;
}
if (EXPECTED(Z_TYPE_P(other) == IS_OBJECT)) {
if (Z_OBJ_P(object) == Z_OBJ_P(other)) {
return 0;
}
} else if (Z_TYPE_P(other) == IS_TRUE || Z_TYPE_P(other) == IS_FALSE) {
zval casted;
if (Z_OBJ_HANDLER_P(object, cast_object)(Z_OBJ_P(object), &casted, _IS_BOOL) == FAILURE) {
return object == op1 ? 1 : -1;
}
int ret = object == op1 ? zend_approx_compare(&casted, other) : zend_approx_compare(other, &casted);
ZEND_ASSERT(!Z_REFCOUNTED_P(&casted));
return ret;
}
return Z_OBJ_HANDLER_P(object, compare)(op1, op2);
}

if (!converted) {
if (Z_TYPE_P(op1) < IS_TRUE) {
return zval_is_true(op2) ? -1 : 0;
} else if (Z_TYPE_P(op1) == IS_TRUE) {
return zval_is_true(op2) ? 0 : 1;
} else if (Z_TYPE_P(op2) < IS_TRUE) {
return zval_is_true(op1) ? 1 : 0;
} else if (Z_TYPE_P(op2) == IS_TRUE) {
return zval_is_true(op1) ? 0 : -1;
} else {
op1 = _zendi_convert_scalar_to_number_silent(op1, &op1_copy);
op2 = _zendi_convert_scalar_to_number_silent(op2, &op2_copy);
if (EG(exception)) {
return 1; /* to stop comparison of arrays */
}
converted = 1;
}
} else if (Z_TYPE_P(op1)==IS_ARRAY) {
return 1;
} else if (Z_TYPE_P(op2)==IS_ARRAY) {
return -1;
} else {
ZEND_UNREACHABLE();
zend_throw_error(NULL, "Unsupported operand types");
return 1;
}
}
}
}

/* return int to be compatible with compare_func_t */
static int hash_zval_identical_function(zval *z1, zval *z2) /* {{{ */
{
Expand Down Expand Up @@ -2453,6 +2625,13 @@ ZEND_API zend_result ZEND_FASTCALL is_equal_function(zval *result, zval *op1, zv
}
/* }}} */

ZEND_API zend_result ZEND_FASTCALL is_approx_equal_function(zval *result, zval *op1, zval *op2) /* {{{ */
{
ZVAL_BOOL(result, zend_approx_compare(op1, op2) == 0);
return SUCCESS;
}
/* }}} */

ZEND_API zend_result ZEND_FASTCALL is_not_equal_function(zval *result, zval *op1, zval *op2) /* {{{ */
{
ZVAL_BOOL(result, (zend_compare(op1, op2) != 0));
Expand Down
2 changes: 2 additions & 0 deletions Zend/zend_operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval
ZEND_API bool ZEND_FASTCALL zend_is_identical(const zval *op1, const zval *op2);

ZEND_API zend_result ZEND_FASTCALL is_equal_function(zval *result, zval *op1, zval *op2);
ZEND_API zend_result ZEND_FASTCALL is_approx_equal_function(zval *result, zval *op1, zval *op2);
ZEND_API zend_result ZEND_FASTCALL is_identical_function(zval *result, zval *op1, zval *op2);
ZEND_API zend_result ZEND_FASTCALL is_not_identical_function(zval *result, zval *op1, zval *op2);
ZEND_API zend_result ZEND_FASTCALL is_not_equal_function(zval *result, zval *op1, zval *op2);
Expand Down Expand Up @@ -449,6 +450,7 @@ static zend_always_inline bool i_zend_is_true(const zval *op)
#define ZEND_UNCOMPARABLE 1

ZEND_API int ZEND_FASTCALL zend_compare(zval *op1, zval *op2);
ZEND_API int ZEND_FASTCALL zend_approx_compare(zval *op1, zval *op2);

ZEND_API zend_result ZEND_FASTCALL compare_function(zval *result, zval *op1, zval *op2);

Expand Down
Loading
Loading