@@ -662,67 +662,6 @@ namespace sls {
662
662
return d[n][m];
663
663
}
664
664
665
- /* *
666
- * \brief edit distance with update calculation
667
- */
668
- unsigned seq_plugin::edit_distance_with_updates (zstring const & a, bool_vector const & a_is_value, zstring const & b, bool_vector const & b_is_value) {
669
- unsigned n = a.length ();
670
- unsigned m = b.length ();
671
- vector<unsigned_vector> d (n + 1 ); // edit distance
672
- vector<unsigned_vector> u (n + 1 ); // edit distance with updates.
673
- m_string_updates.reset ();
674
- for (unsigned i = 0 ; i <= n; ++i) {
675
- d[i].resize (m + 1 , 0 );
676
- u[i].resize (m + 1 , 0 );
677
- }
678
- for (unsigned i = 0 ; i <= n; ++i)
679
- d[i][0 ] = i, u[i][0 ] = i;
680
- for (unsigned j = 0 ; j <= m; ++j)
681
- d[0 ][j] = j, u[0 ][j] = j;
682
- for (unsigned j = 1 ; j <= m; ++j) {
683
- for (unsigned i = 1 ; i <= n; ++i) {
684
- if (a[i - 1 ] == b[j - 1 ]) {
685
- d[i][j] = d[i - 1 ][j - 1 ];
686
- u[i][j] = u[i - 1 ][j - 1 ];
687
- }
688
- else {
689
- u[i][j] = 1 + std::min (u[i - 1 ][j], std::min (u[i][j - 1 ], u[i - 1 ][j - 1 ]));
690
- d[i][j] = 1 + std::min (d[i - 1 ][j], std::min (d[i][j - 1 ], d[i - 1 ][j - 1 ]));
691
-
692
- // TODO: take into account for a_is_value[i - 1] and b_is_value[j - 1]
693
- // and whether index i-1, j-1 is at the boundary of an empty string variable.
694
-
695
- if (d[i - 1 ][j] < u[i][j] && !a_is_value[i - 1 ]) {
696
- m_string_updates.reset ();
697
- u[i][j] = d[i - 1 ][j];
698
- }
699
- if (d[i][j - 1 ] < u[i][j] && !b_is_value[i - 1 ]) {
700
- m_string_updates.reset ();
701
- u[i][j] = d[i][j - 1 ];
702
- }
703
- if (d[i - 1 ][j - 1 ] < u[i][j] && (!a_is_value[i - 1 ] || !b_is_value[j - 1 ])) {
704
- m_string_updates.reset ();
705
- u[i][j] = d[i - 1 ][j - 1 ];
706
- }
707
- if (d[i - 1 ][j] == u[i][j] && !a_is_value[i - 1 ]) {
708
- add_string_update (side_t ::left, op_t ::del, i - 1 , 0 );
709
- add_string_update (side_t ::left, op_t ::add, j - 1 , i - 1 );
710
- }
711
- if (d[i][j - 1 ] == u[i][j] && !b_is_value[j - 1 ]) {
712
- add_string_update (side_t ::right, op_t ::del, j - 1 , 0 );
713
- add_string_update (side_t ::right, op_t ::add, i - 1 , j - 1 );
714
- }
715
- if (d[i - 1 ][j - 1 ] == u[i][j] && !a_is_value[i - 1 ])
716
- add_string_update (side_t ::left, op_t ::copy, j - 1 , i - 1 );
717
-
718
- if (d[i - 1 ][j - 1 ] == u[i][j] && !b_is_value[j - 1 ])
719
- add_string_update (side_t ::right, op_t ::copy, i - 1 , j - 1 );
720
-
721
- }
722
- }
723
- }
724
- return u[n][m];
725
- }
726
665
727
666
void seq_plugin::add_edit_updates (ptr_vector<expr> const & w, zstring const & val, zstring const & val_other, uint_set const & chars) {
728
667
for (auto x : w) {
@@ -793,67 +732,124 @@ namespace sls {
793
732
#endif
794
733
}
795
734
735
+ void seq_plugin::init_string_instance (ptr_vector<expr> const & es, string_instance& a) {
736
+ bool prev_is_var = false ;
737
+ for (auto x : es) {
738
+ auto const & val = strval0 (x);
739
+ auto len = val.length ();
740
+ bool is_val = is_value (x);
741
+ a.s += val;
742
+ if (!prev_is_var && !is_val && !a.next_is_var .empty ())
743
+ a.next_is_var .back () = true ;
744
+ for (unsigned i = 0 ; i < len; ++i) {
745
+ a.is_value .push_back (is_val);
746
+ a.prev_is_var .push_back (false );
747
+ a.next_is_var .push_back (false );
748
+ }
749
+ if (len > 0 && is_val && prev_is_var && !a.is_value .empty ())
750
+ a.prev_is_var [a.prev_is_var .size () - len] = true ;
751
+ prev_is_var = !is_val;
752
+ }
753
+ }
754
+
755
+
756
+ /* *
757
+ * \brief edit distance with update calculation
758
+ */
759
+ unsigned seq_plugin::edit_distance_with_updates (string_instance const & a, string_instance const & b) {
760
+ unsigned n = a.s .length ();
761
+ unsigned m = b.s .length ();
762
+ vector<unsigned_vector> d (n + 1 ); // edit distance
763
+ vector<unsigned_vector> u (n + 1 ); // edit distance with updates.
764
+ m_string_updates.reset ();
765
+ for (unsigned i = 0 ; i <= n; ++i) {
766
+ d[i].resize (m + 1 , 0 );
767
+ u[i].resize (m + 1 , 0 );
768
+ }
769
+ for (unsigned i = 0 ; i <= n; ++i)
770
+ d[i][0 ] = i, u[i][0 ] = i;
771
+ for (unsigned j = 0 ; j <= m; ++j)
772
+ d[0 ][j] = j, u[0 ][j] = j;
773
+ for (unsigned j = 1 ; j <= m; ++j) {
774
+ for (unsigned i = 1 ; i <= n; ++i) {
775
+ if (a.s [i - 1 ] == b.s [j - 1 ]) {
776
+ d[i][j] = d[i - 1 ][j - 1 ];
777
+ u[i][j] = u[i - 1 ][j - 1 ];
778
+ }
779
+ else {
780
+ u[i][j] = 1 + std::min (u[i - 1 ][j], std::min (u[i][j - 1 ], u[i - 1 ][j - 1 ]));
781
+ d[i][j] = 1 + std::min (d[i - 1 ][j], std::min (d[i][j - 1 ], d[i - 1 ][j - 1 ]));
782
+
783
+ if (d[i - 1 ][j] < u[i][j] && a.can_add (i - 1 )) {
784
+ m_string_updates.reset ();
785
+ u[i][j] = d[i - 1 ][j];
786
+ }
787
+ if (d[i][j - 1 ] < u[i][j] && b.can_add (i - 1 )) {
788
+ m_string_updates.reset ();
789
+ u[i][j] = d[i][j - 1 ];
790
+ }
791
+ if (d[i - 1 ][j - 1 ] < u[i][j] && (a.can_add (i - 1 ) || b.can_add (j - 1 ))) {
792
+ m_string_updates.reset ();
793
+ u[i][j] = d[i - 1 ][j - 1 ];
794
+ }
795
+ if (d[i - 1 ][j] == u[i][j] && a.can_add (i - 1 ))
796
+ add_string_update (side_t ::left, op_t ::add, j - 1 , i - 1 );
797
+
798
+ if (d[i][j - 1 ] == u[i][j] && b.can_add (j - 1 ))
799
+ add_string_update (side_t ::right, op_t ::add, i - 1 , j - 1 );
800
+
801
+ if (d[i - 1 ][j] == u[i][j] && !a.is_value [i - 1 ])
802
+ add_string_update (side_t ::left, op_t ::del, i - 1 , 0 );
803
+
804
+ if (d[i][j - 1 ] == u[i][j] && !b.is_value [j - 1 ])
805
+ add_string_update (side_t ::right, op_t ::del, j - 1 , 0 );
806
+
807
+ if (d[i - 1 ][j - 1 ] == u[i][j] && !a.is_value [i - 1 ])
808
+ add_string_update (side_t ::left, op_t ::copy, j - 1 , i - 1 );
809
+
810
+ if (d[i - 1 ][j - 1 ] == u[i][j] && !b.is_value [j - 1 ])
811
+ add_string_update (side_t ::right, op_t ::copy, i - 1 , j - 1 );
812
+ }
813
+ }
814
+ }
815
+ return u[n][m];
816
+ }
817
+
796
818
797
819
bool seq_plugin::repair_down_str_eq_edit_distance_incremental (app* eq) {
798
820
auto const & L = lhs (eq);
799
821
auto const & R = rhs (eq);
800
- zstring a, b;
801
- bool_vector a_is_value, b_is_value;
822
+ string_instance a, b;
823
+ init_string_instance (L, a);
824
+ init_string_instance (R, b);
802
825
803
- for (auto x : L) {
804
- auto const & val = strval0 (x);
805
- auto len = val.length ();
806
- auto is_val = is_value (x);
807
- a += val;
808
- for (unsigned i = 0 ; i < len; ++i)
809
- a_is_value.push_back (is_val);
810
- }
811
-
812
- for (auto y : R) {
813
- auto const & val = strval0 (y);
814
- auto len = val.length ();
815
- auto is_val = is_value (y);
816
- b += val;
817
- for (unsigned i = 0 ; i < len; ++i)
818
- b_is_value.push_back (is_val);
819
- }
820
-
821
- if (a == b)
822
- return update (eq->get_arg (0 ), a) && update (eq->get_arg (1 ), b);
826
+ if (a.s == b.s )
827
+ return update (eq->get_arg (0 ), a.s ) && update (eq->get_arg (1 ), b.s );
828
+
829
+ unsigned diff = edit_distance_with_updates (a, b);
823
830
824
- unsigned diff = edit_distance_with_updates (a, a_is_value, b, b_is_value);
825
- if (a.length () == 0 ) {
826
- m_str_updates.push_back ({ eq->get_arg (1 ), zstring (), 1 });
827
- m_str_updates.push_back ({ eq->get_arg (0 ), zstring (b[0 ]), 1 });
828
- m_str_updates.push_back ({ eq->get_arg (0 ), zstring (b[b.length () - 1 ]), 1 });
829
- }
830
- if (b.length () == 0 ) {
831
- m_str_updates.push_back ({ eq->get_arg (0 ), zstring (), 1 });
832
- m_str_updates.push_back ({ eq->get_arg (1 ), zstring (a[0 ]), 1 });
833
- m_str_updates.push_back ({ eq->get_arg (1 ), zstring (a[a.length () - 1 ]), 1 });
834
- }
835
831
836
- verbose_stream () << " diff \" " << a << " \" \" " << b << " \" diff " << diff << " updates " << m_string_updates.size () << " \n " ;
832
+ verbose_stream () << " diff \" " << a. s << " \" \" " << b. s << " \" diff " << diff << " updates " << m_string_updates.size () << " \n " ;
837
833
#if 1
838
834
for (auto const & [side, op, i, j] : m_string_updates) {
839
835
switch (op) {
840
836
case op_t ::del:
841
837
if (side == side_t ::left)
842
- verbose_stream () << " del " << a[i] << " @ " << i << " left\n " ;
838
+ verbose_stream () << " del " << a. s [i] << " @ " << i << " left\n " ;
843
839
else
844
- verbose_stream () << " del " << b[i] << " @ " << i << " right\n " ;
840
+ verbose_stream () << " del " << b. s [i] << " @ " << i << " right\n " ;
845
841
break ;
846
842
case op_t ::add:
847
843
if (side == side_t ::left)
848
- verbose_stream () << " add " << b[i] << " @ " << j << " left\n " ;
844
+ verbose_stream () << " add " << b. s [i] << " @ " << j << " left\n " ;
849
845
else
850
- verbose_stream () << " add " << a[i] << " @ " << j << " right\n " ;
846
+ verbose_stream () << " add " << a. s [i] << " @ " << j << " right\n " ;
851
847
break ;
852
848
case op_t ::copy:
853
849
if (side == side_t ::left)
854
- verbose_stream () << " copy " << b[i] << " @ " << j << " left\n " ;
850
+ verbose_stream () << " copy " << b. s [i] << " @ " << j << " left\n " ;
855
851
else
856
- verbose_stream () << " copy " << a[i] << " @ " << j << " right\n " ;
852
+ verbose_stream () << " copy " << a. s [i] << " @ " << j << " right\n " ;
857
853
break ;
858
854
}
859
855
}
@@ -905,13 +901,13 @@ namespace sls {
905
901
else if (op == op_t ::del && side == side_t ::right)
906
902
delete_char (R, i);
907
903
else if (op == op_t ::add && side == side_t ::left)
908
- add_char (L, j, b[i]);
904
+ add_char (L, j, b. s [i]);
909
905
else if (op == op_t ::add && side == side_t ::right)
910
- add_char (R, j, a[i]);
906
+ add_char (R, j, a. s [i]);
911
907
else if (op == op_t ::copy && side == side_t ::left)
912
- copy_char (L, j, b[i]);
908
+ copy_char (L, j, b. s [i]);
913
909
else if (op == op_t ::copy && side == side_t ::right)
914
- copy_char (R, j, a[i]);
910
+ copy_char (R, j, a. s [i]);
915
911
}
916
912
verbose_stream () << " num updates " << m_str_updates.size () << " \n " ;
917
913
bool r = apply_update ();
@@ -939,9 +935,6 @@ namespace sls {
939
935
if (a == b)
940
936
return update (eq->get_arg (0 ), a) && update (eq->get_arg (1 ), b);
941
937
942
-
943
-
944
-
945
938
unsigned diff = edit_distance (a, b);
946
939
947
940
// verbose_stream() << "solve: " << diff << " " << a << " " << b << "\n";
0 commit comments