Skip to content

Commit 4fbf54a

Browse files
fixes to regex membership and edit updates
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
1 parent 1ab0962 commit 4fbf54a

File tree

2 files changed

+64
-14
lines changed

2 files changed

+64
-14
lines changed

src/ast/sls/sls_seq_plugin.cpp

+63-13
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ namespace sls {
600600
VERIFY(m.is_eq(e, x, y));
601601
IF_VERBOSE(3, verbose_stream() << is_true << ": " << mk_bounded_pp(e, m, 3) << "\n");
602602
if (ctx.is_true(e)) {
603-
//return repair_down_str_eq_edit_distance(e);
603+
return repair_down_str_eq_edit_distance(e);
604604
if (ctx.rand(2) != 0)
605605
return repair_down_str_eq_unify(e);
606606
if (!is_value(x))
@@ -651,7 +651,7 @@ namespace sls {
651651
return d[n][m];
652652
}
653653

654-
void seq_plugin::add_edit_updates(ptr_vector<expr> const& w, uint_set const& chars) {
654+
void seq_plugin::add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
655655
for (auto x : w) {
656656
if (is_value(x))
657657
continue;
@@ -671,6 +671,50 @@ namespace sls {
671671
m_str_updates.push_back({ x, zstring(ch) + b, 1 }); // replace first character in a by ch
672672
}
673673
}
674+
unsigned first_diff = UINT_MAX;
675+
for (unsigned i = 0; i < val.length() && i < val_other.length(); ++i) {
676+
if (val[i] != val_other[i]) {
677+
first_diff = i;
678+
break;
679+
}
680+
}
681+
if (first_diff != UINT_MAX) {
682+
unsigned index = first_diff;
683+
for (auto x : w) {
684+
auto const & val_x = strval0(x);
685+
auto len_x = val_x.length();
686+
if (index < len_x) {
687+
if (is_value(x))
688+
break;
689+
auto new_val = val_x.extract(0, first_diff) + zstring(val_other[first_diff]) + val_x.extract(first_diff + 1, val_x.length());
690+
m_str_updates.push_back({ x, new_val, 1 });
691+
break;
692+
}
693+
index -= len_x;
694+
}
695+
}
696+
unsigned last_diff = 0;
697+
for (unsigned i = 1; i <= val.length() && i <= val_other.length(); ++i) {
698+
if (val[val.length() - i] != val_other[val_other.length() - i]) {
699+
last_diff = i;
700+
break;
701+
}
702+
}
703+
if (last_diff != 0) {
704+
unsigned index = last_diff;
705+
for (auto x : w) {
706+
auto const& val_x = strval0(x);
707+
auto len_x = val_x.length();
708+
if (index < len_x) {
709+
if (is_value(x))
710+
break;
711+
auto new_val = val_x.extract(0, len_x - last_diff) + zstring(val_other[val_other.length() - last_diff]) + val_x.extract(len_x - last_diff + 1, len_x);
712+
m_str_updates.push_back({ x, new_val, 1 });
713+
break;
714+
}
715+
index -= len_x;
716+
}
717+
}
674718
}
675719

676720
bool seq_plugin::repair_down_str_eq_edit_distance(app* eq) {
@@ -692,10 +736,12 @@ namespace sls {
692736
if (a == b)
693737
return update(eq->get_arg(0), a) && update(eq->get_arg(1), b);
694738

695-
unsigned diff = a.length() + b.length() + L.size() + R.size();
739+
unsigned diff = edit_distance(a, b);
696740

697-
add_edit_updates(L, b_chars);
698-
add_edit_updates(R, a_chars);
741+
//verbose_stream() << "solve: " << diff << " " << a << " " << b << "\n";
742+
743+
add_edit_updates(L, a, b, b_chars);
744+
add_edit_updates(R, b, a, a_chars);
699745

700746
for (auto& [x, s, score] : m_str_updates) {
701747
a.reset();
@@ -713,10 +759,14 @@ namespace sls {
713759
b += strval0(z);
714760
}
715761
unsigned local_diff = edit_distance(a, b);
716-
if (local_diff >= diff)
762+
763+
//verbose_stream() << local_diff << " " << a << " " << b << "\n";
764+
if (local_diff > diff)
765+
score = 0.01;
766+
else if (local_diff == diff)
717767
score = 0.1;
718768
else
719-
score = (diff - local_diff) * (diff - local_diff);
769+
score = 2 * (diff - local_diff) * (diff - local_diff);
720770
}
721771
return apply_update();
722772
}
@@ -1197,7 +1247,7 @@ namespace sls {
11971247
for (auto ch : value0)
11981248
chars.insert(ch);
11991249

1200-
add_edit_updates(es, chars);
1250+
add_edit_updates(es, value, value0, chars);
12011251

12021252
unsigned diff = edit_distance(value, value0);
12031253
for (auto& [x, s, score] : m_str_updates) {
@@ -1462,7 +1512,7 @@ namespace sls {
14621512
expr_ref d_r(y, m);
14631513
seq_rewriter seqrw(m);
14641514
for (unsigned i = 0; i < s.length(); ++i) {
1465-
verbose_stream() << "Derivative " << s.extract(0, i) << ": " << d_r << "\n";
1515+
IF_VERBOSE(3, verbose_stream() << "Derivative " << s.extract(0, i) << ": " << d_r << "\n");
14661516
if (seq.re.is_empty(d_r))
14671517
break;
14681518
zstring prefix = s.extract(0, i);
@@ -1478,10 +1528,10 @@ namespace sls {
14781528

14791529
unsigned global_min_length = UINT_MAX;
14801530
for (auto& [str, min_length] : lookaheads)
1481-
global_min_length = std::max(min_length, global_min_length);
1531+
global_min_length = std::min(min_length, global_min_length);
14821532

1483-
verbose_stream() << "repair in_re " << current_min_length << " "
1484-
<< global_min_length << " " << mk_pp(e, m) << " " << s << "\n";
1533+
IF_VERBOSE(3, verbose_stream() << "repair in_re " << current_min_length << " "
1534+
<< global_min_length << " " << mk_pp(e, m) << " " << s << "\n");
14851535

14861536

14871537
// TODO: do some length analysis to prune out short candidates when there are longer ones.
@@ -1499,7 +1549,7 @@ namespace sls {
14991549
score = 1 << (current_min_length - min_length);
15001550
score /= ((double)abs((int)s.length() - (int)str.length()) + 1);
15011551
}
1502-
verbose_stream() << "prefix " << score << " " << min_length << ": " << str << "\n";
1552+
IF_VERBOSE(3, verbose_stream() << "prefix " << score << " " << min_length << ": " << str << "\n");
15031553
m_str_updates.push_back({ x, str, score });
15041554
}
15051555
}

src/ast/sls/sls_seq_plugin.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ namespace sls {
9292
void repair_up_str_stoi(app* e);
9393

9494
unsigned edit_distance(zstring const& a, zstring const& b);
95-
void add_edit_updates(ptr_vector<expr> const& w, uint_set const& chars);
95+
void add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
9696

9797
// regex functionality
9898

0 commit comments

Comments
 (0)