Skip to content

Commit 31ee56c

Browse files
wip - incremental edit distance algorithm
1 parent 538f74d commit 31ee56c

File tree

1 file changed

+55
-75
lines changed

1 file changed

+55
-75
lines changed

src/ast/sls/sls_seq_plugin.cpp

+55-75
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,6 @@ namespace sls {
807807
a += val;
808808
for (unsigned i = 0; i < len; ++i)
809809
a_is_value.push_back(is_val);
810-
if (!is_val && len == 0 && !a_is_value.empty())
811-
a_is_value.back() = false;
812810
}
813811

814812
for (auto y : R) {
@@ -818,8 +816,6 @@ namespace sls {
818816
b += val;
819817
for (unsigned i = 0; i < len; ++i)
820818
b_is_value.push_back(is_val);
821-
if (!is_val && len == 0 && !b_is_value.empty())
822-
b_is_value.back() = false;
823819
}
824820

825821
if (a == b)
@@ -862,84 +858,60 @@ namespace sls {
862858
}
863859
}
864860
#endif
865-
for (auto& [side, op, i, j] : m_string_updates) {
866-
if (op == op_t::del && side == side_t::left) {
867-
for (auto x : L) {
868-
869-
auto const& value = strval0(x);
870-
if (i >= value.length())
871-
i -= value.length();
872-
else {
873-
if (!is_value(x))
874-
m_str_updates.push_back({ x, value.extract(0, i) + value.extract(i + 1, value.length()), 1 });
875-
break;
876-
}
877-
}
878-
}
879-
else if (op == op_t::del && side == side_t::right) {
880-
for (auto x : R) {
881-
auto const& value = strval0(x);
882-
if (i >= value.length())
883-
i -= value.length();
884-
else {
885-
if (!is_value(x))
886-
m_str_updates.push_back({ x, value.extract(0, i) + value.extract(i + 1, value.length()), 1 });
887-
break;
888-
}
889-
}
890-
}
891-
else if (op == op_t::add && side == side_t::left) {
892-
for (auto x : L) {
893-
auto const& value = strval0(x);
894-
//verbose_stream() << "add " << j << " " << value << " " << value.length() << " " << is_value(x) << "\n";
895-
if (j > value.length() || (j == value.length() && j > 0)) {
896-
j -= value.length();
897-
continue;
898-
}
899-
if (!is_value(x))
900-
m_str_updates.push_back({ x, value.extract(0, j) + zstring(b[i]) + value.extract(j, value.length()), 1 });
901-
if (j < value.length())
902-
break;
903-
}
904-
}
905-
else if (op == op_t::add && side == side_t::right) {
906-
for (auto x : R) {
907-
auto const& value = strval0(x);
908-
//verbose_stream() << "add " << j << " " << value << " " << value.length() << " " << is_value(x) << "\n";
909-
if (j > value.length() || (j == value.length() && j > 0)) {
910-
j -= value.length();
911-
continue;
912-
}
861+
auto delete_char = [&](auto const& es, unsigned i) {
862+
for (auto x : es) {
863+
auto const& value = strval0(x);
864+
if (i >= value.length())
865+
i -= value.length();
866+
else {
913867
if (!is_value(x))
914-
m_str_updates.push_back({ x, value.extract(0, j) + zstring(a[i]) + value.extract(j, value.length()), 1 });
915-
if (j < value.length())
916-
break;
868+
m_str_updates.push_back({ x, value.extract(0, i) + value.extract(i + 1, value.length()), 1 });
869+
break;
917870
}
918871
}
919-
else if (op == op_t::copy && side == side_t::left) {
920-
for (auto x : L) {
921-
auto const& value = strval0(x);
922-
if (j >= value.length())
923-
j -= value.length();
924-
else {
925-
if (!is_value(x))
926-
m_str_updates.push_back({ x, value.extract(0, j) + zstring(b[i]) + value.extract(j + 1, value.length()), 1 });
927-
break;
928-
}
872+
};
873+
874+
auto add_char = [&](auto const& es, unsigned j, uint32_t ch) {
875+
for (auto x : es) {
876+
auto const& value = strval0(x);
877+
//verbose_stream() << "add " << j << " " << value << " " << value.length() << " " << is_value(x) << "\n";
878+
if (j > value.length() || (j == value.length() && j > 0)) {
879+
j -= value.length();
880+
continue;
929881
}
882+
if (!is_value(x))
883+
m_str_updates.push_back({ x, value.extract(0, j) + zstring(ch) + value.extract(j, value.length()), 1 });
884+
if (j < value.length())
885+
break;
930886
}
931-
else if (op == op_t::copy && side == side_t::right) {
932-
for (auto x : R) {
933-
auto const& value = strval0(x);
934-
if (j >= value.length())
935-
j -= value.length();
936-
else {
937-
if (!is_value(x))
938-
m_str_updates.push_back({ x, value.extract(0, j) + zstring(a[i]) + value.extract(j + 1, value.length()), 1 });
939-
break;
940-
}
887+
};
888+
889+
auto copy_char = [&](auto const& es, unsigned j, uint32_t ch) {
890+
for (auto x : es) {
891+
auto const& value = strval0(x);
892+
if (j >= value.length())
893+
j -= value.length();
894+
else {
895+
if (!is_value(x))
896+
m_str_updates.push_back({ x, value.extract(0, j) + zstring(ch) + value.extract(j + 1, value.length()), 1 });
897+
break;
941898
}
942899
}
900+
};
901+
902+
for (auto& [side, op, i, j] : m_string_updates) {
903+
if (op == op_t::del && side == side_t::left)
904+
delete_char(L, i);
905+
else if (op == op_t::del && side == side_t::right)
906+
delete_char(R, i);
907+
else if (op == op_t::add && side == side_t::left)
908+
add_char(L, j, b[i]);
909+
else if (op == op_t::add && side == side_t::right)
910+
add_char(R, j, a[i]);
911+
else if (op == op_t::copy && side == side_t::left)
912+
copy_char(L, j, b[i]);
913+
else if (op == op_t::copy && side == side_t::right)
914+
copy_char(R, j, a[i]);
943915
}
944916
verbose_stream() << "num updates " << m_str_updates.size() << "\n";
945917
bool r = apply_update();
@@ -1200,7 +1172,15 @@ namespace sls {
12001172
return true;
12011173
if (!is_value(x))
12021174
m_str_updates.push_back({ x, r, 1 });
1175+
if (!is_value(y))
1176+
m_str_updates.push_back({ y, zstring(), 1});
1177+
if (!is_value(z))
1178+
m_str_updates.push_back({ z, zstring(), 1 });
1179+
12031180
// TODO some more possible ways, also deal with y, z if they are not values.
1181+
// apply reverse substitution of r to replace z by y, update x to this value
1182+
// update x using an edit distance reducing update based on the reverse substitution.
1183+
// reverse substitution isn't unique, so take into account different possibilities (randomly).
12041184
return apply_update();
12051185
}
12061186

0 commit comments

Comments
 (0)