Skip to content

Commit b529a58

Browse files
add unit test for incremental equation edit distance with repair
1 parent 31ee56c commit b529a58

File tree

5 files changed

+469
-109
lines changed

5 files changed

+469
-109
lines changed

src/ast/sls/sls_seq_plugin.cpp

+101-108
Original file line numberDiff line numberDiff line change
@@ -662,67 +662,6 @@ namespace sls {
662662
return d[n][m];
663663
}
664664

665-
/**
666-
* \brief edit distance with update calculation
667-
*/
668-
unsigned seq_plugin::edit_distance_with_updates(zstring const& a, bool_vector const& a_is_value, zstring const& b, bool_vector const& b_is_value) {
669-
unsigned n = a.length();
670-
unsigned m = b.length();
671-
vector<unsigned_vector> d(n + 1); // edit distance
672-
vector<unsigned_vector> u(n + 1); // edit distance with updates.
673-
m_string_updates.reset();
674-
for (unsigned i = 0; i <= n; ++i) {
675-
d[i].resize(m + 1, 0);
676-
u[i].resize(m + 1, 0);
677-
}
678-
for (unsigned i = 0; i <= n; ++i)
679-
d[i][0] = i, u[i][0] = i;
680-
for (unsigned j = 0; j <= m; ++j)
681-
d[0][j] = j, u[0][j] = j;
682-
for (unsigned j = 1; j <= m; ++j) {
683-
for (unsigned i = 1; i <= n; ++i) {
684-
if (a[i - 1] == b[j - 1]) {
685-
d[i][j] = d[i - 1][j - 1];
686-
u[i][j] = u[i - 1][j - 1];
687-
}
688-
else {
689-
u[i][j] = 1 + std::min(u[i - 1][j], std::min(u[i][j - 1], u[i - 1][j - 1]));
690-
d[i][j] = 1 + std::min(d[i - 1][j], std::min(d[i][j - 1], d[i - 1][j - 1]));
691-
692-
// TODO: take into account for a_is_value[i - 1] and b_is_value[j - 1]
693-
// and whether index i-1, j-1 is at the boundary of an empty string variable.
694-
695-
if (d[i - 1][j] < u[i][j] && !a_is_value[i - 1]) {
696-
m_string_updates.reset();
697-
u[i][j] = d[i - 1][j];
698-
}
699-
if (d[i][j - 1] < u[i][j] && !b_is_value[i - 1]) {
700-
m_string_updates.reset();
701-
u[i][j] = d[i][j - 1];
702-
}
703-
if (d[i - 1][j - 1] < u[i][j] && (!a_is_value[i - 1] || !b_is_value[j - 1])) {
704-
m_string_updates.reset();
705-
u[i][j] = d[i - 1][j - 1];
706-
}
707-
if (d[i - 1][j] == u[i][j] && !a_is_value[i - 1]) {
708-
add_string_update(side_t::left, op_t::del, i - 1, 0);
709-
add_string_update(side_t::left, op_t::add, j - 1, i - 1);
710-
}
711-
if (d[i][j - 1] == u[i][j] && !b_is_value[j - 1]) {
712-
add_string_update(side_t::right, op_t::del, j - 1, 0);
713-
add_string_update(side_t::right, op_t::add, i - 1, j - 1);
714-
}
715-
if (d[i - 1][j - 1] == u[i][j] && !a_is_value[i - 1])
716-
add_string_update(side_t::left, op_t::copy, j - 1, i - 1);
717-
718-
if (d[i - 1][j - 1] == u[i][j] && !b_is_value[j - 1])
719-
add_string_update(side_t::right, op_t::copy, i - 1, j - 1);
720-
721-
}
722-
}
723-
}
724-
return u[n][m];
725-
}
726665

727666
void seq_plugin::add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
728667
for (auto x : w) {
@@ -793,67 +732,124 @@ namespace sls {
793732
#endif
794733
}
795734

735+
void seq_plugin::init_string_instance(ptr_vector<expr> const& es, string_instance& a) {
736+
bool prev_is_var = false;
737+
for (auto x : es) {
738+
auto const& val = strval0(x);
739+
auto len = val.length();
740+
bool is_val = is_value(x);
741+
a.s += val;
742+
if (!prev_is_var && !is_val && !a.next_is_var.empty())
743+
a.next_is_var.back() = true;
744+
for (unsigned i = 0; i < len; ++i) {
745+
a.is_value.push_back(is_val);
746+
a.prev_is_var.push_back(false);
747+
a.next_is_var.push_back(false);
748+
}
749+
if (len > 0 && is_val && prev_is_var && !a.is_value.empty())
750+
a.prev_is_var[a.prev_is_var.size() - len] = true;
751+
prev_is_var = !is_val;
752+
}
753+
}
754+
755+
756+
/**
757+
* \brief edit distance with update calculation
758+
*/
759+
unsigned seq_plugin::edit_distance_with_updates(string_instance const& a, string_instance const& b) {
760+
unsigned n = a.s.length();
761+
unsigned m = b.s.length();
762+
vector<unsigned_vector> d(n + 1); // edit distance
763+
vector<unsigned_vector> u(n + 1); // edit distance with updates.
764+
m_string_updates.reset();
765+
for (unsigned i = 0; i <= n; ++i) {
766+
d[i].resize(m + 1, 0);
767+
u[i].resize(m + 1, 0);
768+
}
769+
for (unsigned i = 0; i <= n; ++i)
770+
d[i][0] = i, u[i][0] = i;
771+
for (unsigned j = 0; j <= m; ++j)
772+
d[0][j] = j, u[0][j] = j;
773+
for (unsigned j = 1; j <= m; ++j) {
774+
for (unsigned i = 1; i <= n; ++i) {
775+
if (a.s[i - 1] == b.s[j - 1]) {
776+
d[i][j] = d[i - 1][j - 1];
777+
u[i][j] = u[i - 1][j - 1];
778+
}
779+
else {
780+
u[i][j] = 1 + std::min(u[i - 1][j], std::min(u[i][j - 1], u[i - 1][j - 1]));
781+
d[i][j] = 1 + std::min(d[i - 1][j], std::min(d[i][j - 1], d[i - 1][j - 1]));
782+
783+
if (d[i - 1][j] < u[i][j] && a.can_add(i - 1)) {
784+
m_string_updates.reset();
785+
u[i][j] = d[i - 1][j];
786+
}
787+
if (d[i][j - 1] < u[i][j] && b.can_add(i - 1)) {
788+
m_string_updates.reset();
789+
u[i][j] = d[i][j - 1];
790+
}
791+
if (d[i - 1][j - 1] < u[i][j] && (a.can_add(i - 1) || b.can_add(j - 1))) {
792+
m_string_updates.reset();
793+
u[i][j] = d[i - 1][j - 1];
794+
}
795+
if (d[i - 1][j] == u[i][j] && a.can_add(i - 1))
796+
add_string_update(side_t::left, op_t::add, j - 1, i - 1);
797+
798+
if (d[i][j - 1] == u[i][j] && b.can_add(j - 1))
799+
add_string_update(side_t::right, op_t::add, i - 1, j - 1);
800+
801+
if (d[i - 1][j] == u[i][j] && !a.is_value[i - 1])
802+
add_string_update(side_t::left, op_t::del, i - 1, 0);
803+
804+
if (d[i][j - 1] == u[i][j] && !b.is_value[j - 1])
805+
add_string_update(side_t::right, op_t::del, j - 1, 0);
806+
807+
if (d[i - 1][j - 1] == u[i][j] && !a.is_value[i - 1])
808+
add_string_update(side_t::left, op_t::copy, j - 1, i - 1);
809+
810+
if (d[i - 1][j - 1] == u[i][j] && !b.is_value[j - 1])
811+
add_string_update(side_t::right, op_t::copy, i - 1, j - 1);
812+
}
813+
}
814+
}
815+
return u[n][m];
816+
}
817+
796818

797819
bool seq_plugin::repair_down_str_eq_edit_distance_incremental(app* eq) {
798820
auto const& L = lhs(eq);
799821
auto const& R = rhs(eq);
800-
zstring a, b;
801-
bool_vector a_is_value, b_is_value;
822+
string_instance a, b;
823+
init_string_instance(L, a);
824+
init_string_instance(R, b);
802825

803-
for (auto x : L) {
804-
auto const& val = strval0(x);
805-
auto len = val.length();
806-
auto is_val = is_value(x);
807-
a += val;
808-
for (unsigned i = 0; i < len; ++i)
809-
a_is_value.push_back(is_val);
810-
}
811-
812-
for (auto y : R) {
813-
auto const& val = strval0(y);
814-
auto len = val.length();
815-
auto is_val = is_value(y);
816-
b += val;
817-
for (unsigned i = 0; i < len; ++i)
818-
b_is_value.push_back(is_val);
819-
}
820-
821-
if (a == b)
822-
return update(eq->get_arg(0), a) && update(eq->get_arg(1), b);
826+
if (a.s == b.s)
827+
return update(eq->get_arg(0), a.s) && update(eq->get_arg(1), b.s);
828+
829+
unsigned diff = edit_distance_with_updates(a, b);
823830

824-
unsigned diff = edit_distance_with_updates(a, a_is_value, b, b_is_value);
825-
if (a.length() == 0) {
826-
m_str_updates.push_back({ eq->get_arg(1), zstring(), 1 });
827-
m_str_updates.push_back({ eq->get_arg(0), zstring(b[0]), 1});
828-
m_str_updates.push_back({ eq->get_arg(0), zstring(b[b.length() - 1]), 1});
829-
}
830-
if (b.length() == 0) {
831-
m_str_updates.push_back({ eq->get_arg(0), zstring(), 1 });
832-
m_str_updates.push_back({ eq->get_arg(1), zstring(a[0]), 1 });
833-
m_str_updates.push_back({ eq->get_arg(1), zstring(a[a.length() - 1]), 1 });
834-
}
835831

836-
verbose_stream() << "diff \"" << a << "\" \"" << b << "\" diff " << diff << " updates " << m_string_updates.size() << "\n";
832+
verbose_stream() << "diff \"" << a.s << "\" \"" << b.s << "\" diff " << diff << " updates " << m_string_updates.size() << "\n";
837833
#if 1
838834
for (auto const& [side, op, i, j] : m_string_updates) {
839835
switch (op) {
840836
case op_t::del:
841837
if (side == side_t::left)
842-
verbose_stream() << "del " << a[i] << " @ " << i << " left\n";
838+
verbose_stream() << "del " << a.s[i] << " @ " << i << " left\n";
843839
else
844-
verbose_stream() << "del " << b[i] << " @ " << i << " right\n";
840+
verbose_stream() << "del " << b.s[i] << " @ " << i << " right\n";
845841
break;
846842
case op_t::add:
847843
if (side == side_t::left)
848-
verbose_stream() << "add " << b[i] << " @ " << j << " left\n";
844+
verbose_stream() << "add " << b.s[i] << " @ " << j << " left\n";
849845
else
850-
verbose_stream() << "add " << a[i] << " @ " << j << " right\n";
846+
verbose_stream() << "add " << a.s[i] << " @ " << j << " right\n";
851847
break;
852848
case op_t::copy:
853849
if (side == side_t::left)
854-
verbose_stream() << "copy " << b[i] << " @ " << j << " left\n";
850+
verbose_stream() << "copy " << b.s[i] << " @ " << j << " left\n";
855851
else
856-
verbose_stream() << "copy " << a[i] << " @ " << j << " right\n";
852+
verbose_stream() << "copy " << a.s[i] << " @ " << j << " right\n";
857853
break;
858854
}
859855
}
@@ -905,13 +901,13 @@ namespace sls {
905901
else if (op == op_t::del && side == side_t::right)
906902
delete_char(R, i);
907903
else if (op == op_t::add && side == side_t::left)
908-
add_char(L, j, b[i]);
904+
add_char(L, j, b.s[i]);
909905
else if (op == op_t::add && side == side_t::right)
910-
add_char(R, j, a[i]);
906+
add_char(R, j, a.s[i]);
911907
else if (op == op_t::copy && side == side_t::left)
912-
copy_char(L, j, b[i]);
908+
copy_char(L, j, b.s[i]);
913909
else if (op == op_t::copy && side == side_t::right)
914-
copy_char(R, j, a[i]);
910+
copy_char(R, j, a.s[i]);
915911
}
916912
verbose_stream() << "num updates " << m_str_updates.size() << "\n";
917913
bool r = apply_update();
@@ -939,9 +935,6 @@ namespace sls {
939935
if (a == b)
940936
return update(eq->get_arg(0), a) && update(eq->get_arg(1), b);
941937

942-
943-
944-
945938
unsigned diff = edit_distance(a, b);
946939

947940
//verbose_stream() << "solve: " << diff << " " << a << " " << b << "\n";

src/ast/sls/sls_seq_plugin.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,20 @@ namespace sls {
103103
op_t op;
104104
unsigned i, j;
105105
};
106+
struct string_instance {
107+
zstring s;
108+
bool_vector is_value;
109+
bool_vector prev_is_var;
110+
bool_vector next_is_var;
111+
112+
bool can_add(unsigned i) const {
113+
return !is_value[i] || prev_is_var[i];
114+
}
115+
};
106116
svector<string_update> m_string_updates;
107117
void add_string_update(side_t side, op_t op, unsigned i, unsigned j) { m_string_updates.push_back({ side, op, i, j }); }
108-
unsigned edit_distance_with_updates(zstring const& a, bool_vector const& a_is_value, zstring const& b, bool_vector const& b_is_value);
118+
void init_string_instance(ptr_vector<expr> const& es, string_instance& a);
119+
unsigned edit_distance_with_updates(string_instance const& a, string_instance const& b);
109120
unsigned edit_distance(zstring const& a, zstring const& b);
110121
void add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
111122

src/test/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ add_executable(test-z3
111111
simplex.cpp
112112
simplifier.cpp
113113
sls_test.cpp
114+
sls_seq_plugin.cpp
114115
small_object_allocator.cpp
115116
smt2print_parse.cpp
116117
smt_context.cpp

src/test/main.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -270,4 +270,5 @@ int main(int argc, char ** argv) {
270270
TST(euf_arith_plugin);
271271
TST(sls_test);
272272
TST(scoped_vector);
273+
TST(sls_seq_plugin);
273274
}

0 commit comments

Comments
 (0)