From 8f5658b77d8c728c58e958f28f4f1cdf5f9af48a Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Mon, 9 Dec 2024 15:55:11 -0800 Subject: [PATCH] add another baseline heuristic for string equalities, add cases for array axioms. Signed-off-by: Nikolaj Bjorner --- src/ast/sls/sls_array_plugin.cpp | 104 +++++++++++--- src/ast/sls/sls_array_plugin.h | 7 +- src/ast/sls/sls_euf_plugin.cpp | 1 - src/ast/sls/sls_seq_plugin.cpp | 236 ++++++++++++++++++++++++++++++- src/ast/sls/sls_seq_plugin.h | 15 ++ src/math/lp/lar_solver.cpp | 6 +- 6 files changed, 340 insertions(+), 29 deletions(-) diff --git a/src/ast/sls/sls_array_plugin.cpp b/src/ast/sls/sls_array_plugin.cpp index 930e8f9d8d7..37e36e609b3 100644 --- a/src/ast/sls/sls_array_plugin.cpp +++ b/src/ast/sls/sls_array_plugin.cpp @@ -35,7 +35,7 @@ namespace sls { m_g = alloc(euf::egraph, m); m_kv = nullptr; init_egraph(*m_g); - saturate_store(*m_g); + saturate(*m_g); return true; } @@ -43,28 +43,73 @@ namespace sls { // ensure b[i] ~ v // ensure b[j] ~ a[j] for j != i - void array_plugin::saturate_store(euf::egraph& g) { + void array_plugin::saturate(euf::egraph& g) { unsigned sz = 0; while (sz < g.nodes().size()) { sz = g.nodes().size(); for (unsigned i = 0; i < sz; ++i) { auto n = g.nodes()[i]; - if (!a.is_store(n->get_expr())) - continue; - - force_store_axiom1(g, n); - - for (auto p : euf::enode_parents(n->get_root())) - if (a.is_select(p->get_expr())) - force_store_axiom2_down(g, n, p); + if (a.is_store(n->get_expr())) + saturate_store(g, n); + else if (a.is_const(n->get_expr())) + saturate_const(g, n); + else if (a.is_map(n->get_expr())) + saturate_map(g, n); - auto arr = n->get_arg(0); - for (auto p : euf::enode_parents(arr->get_root())) - if (a.is_select(p->get_expr())) - force_store_axiom2_up(g, n, p); } } - display(verbose_stream() << "saturated\n"); + IF_VERBOSE(2, display(verbose_stream() << "saturated\n")); + } + + void array_plugin::saturate_store(euf::egraph& g, euf::enode* n) { + force_store_axiom1(g, n); + for (auto p : euf::enode_parents(n->get_root())) + if (a.is_select(p->get_expr())) + force_store_axiom2_down(g, n, p); + auto arr = n->get_arg(0); + for (auto p : euf::enode_parents(arr->get_root())) + if (a.is_select(p->get_expr())) + force_store_axiom2_up(g, n, p); + } + + void array_plugin::saturate_const(euf::egraph& g, euf::enode* n) { + for (auto p : euf::enode_parents(n->get_root())) + if (a.is_select(p->get_expr())) + force_const_axiom(g, n, p); + } + + void array_plugin::saturate_map(euf::egraph& g, euf::enode* n) { + for (auto p : euf::enode_parents(n->get_root())) + if (a.is_select(p->get_expr())) + add_map_axiom(g, n, p); + } + + void array_plugin::add_map_axiom(euf::egraph& g, euf::enode * n, euf::enode* sel) { + func_decl* f = nullptr; + VERIFY(sel->get_arg(0)->get_root() == n->get_root()); + SASSERT(a.is_map(n->get_expr())); + VERIFY(a.is_map(n->get_decl(), f)); + expr_ref apply_map(m); + expr_ref_vector args(m); + euf::enode_vector eargs; + for (auto arg : euf::enode_args(n)) { + auto nsel = mk_select(g, arg, sel); + eargs.push_back(nsel); + args.push_back(nsel->get_expr()); + } + expr_ref f_map(m.mk_app(f, args), m); + auto nsel = mk_select(g, n, sel); + auto nmap = g.find(f_map); + if (!nmap) + nmap = g.mk(f_map, 0, eargs.size(), eargs.data()); + if (are_distinct(nsel, nmap)) { + expr_ref eq(m.mk_eq(nmap->get_expr(), nsel->get_expr()), m); + ctx.add_clause(eq); + } + else { + g.merge(nmap, nsel, nullptr); + VERIFY(g.propagate()); + } } euf::enode* array_plugin::mk_select(euf::egraph& g, euf::enode* b, euf::enode* sel) { @@ -130,6 +175,24 @@ namespace sls { } } + // const(v) ~ b, b[j] occurs -> v = (const v)[j] + void array_plugin::force_const_axiom(euf::egraph& g, euf::enode* cn, euf::enode* sel) { + SASSERT(a.is_const(cn->get_expr())); + SASSERT(a.is_select(sel->get_expr())); + if (sel->get_arg(0)->get_root() != cn->get_root()) + return; + auto val = cn->get_arg(0); + auto nsel = mk_select(g, cn, sel); + if (are_distinct(nsel, sel)) { + expr_ref eq(m.mk_eq(val->get_expr(), nsel->get_expr()), m); + ctx.add_clause(eq); + } + else { + g.merge(nsel, sel, nullptr); + VERIFY(g.propagate()); + } + } + bool array_plugin::are_distinct(euf::enode* a, euf::enode* b) { a = a->get_root(); b = b->get_root(); @@ -156,7 +219,7 @@ namespace sls { args.push_back(sto->get_arg(i)); expr_ref sel(a.mk_select(args), m); expr_ref eq(m.mk_eq(sel, to_app(sto)->get_arg(sto->get_num_args() - 1)), m); - verbose_stream() << "add store axiom 1 " << mk_bounded_pp(sto, m) << "\n"; + IF_VERBOSE(3, verbose_stream() << "add store axiom 1 " << mk_bounded_pp(sto, m) << "\n"); ctx.add_clause(eq); } @@ -177,7 +240,7 @@ namespace sls { ors.push_back(eq); for (unsigned i = 1; i < sel->get_num_args() - 1; ++i) ors.push_back(m.mk_eq(sel->get_arg(i), sto->get_arg(i))); - verbose_stream() << "add store axiom 2 " << mk_bounded_pp(sto, m) << " " << mk_bounded_pp(sel, m) << "\n"; + IF_VERBOSE(3, verbose_stream() << "add store axiom 2 " << mk_bounded_pp(sto, m) << " " << mk_bounded_pp(sel, m) << "\n"); ctx.add_clause(m.mk_or(ors)); } @@ -195,7 +258,7 @@ namespace sls { if (a.is_array(t)) continue; auto v = ctx.get_value(t); - verbose_stream() << "init " << mk_bounded_pp(t, m) << " := " << mk_bounded_pp(v, m) << "\n"; + IF_VERBOSE(3, verbose_stream() << "init " << mk_bounded_pp(t, m) << " := " << mk_bounded_pp(v, m) << "\n"); n2 = g.find(v); n2 = n2 ? n2: g.mk(v, 0, 0, nullptr); g.merge(n1, n2, nullptr); @@ -209,7 +272,7 @@ namespace sls { g.merge(g.find(x), g.find(y), nullptr); } - display(verbose_stream()); + IF_VERBOSE(3, display(verbose_stream())); } @@ -236,12 +299,13 @@ namespace sls { m_g = alloc(euf::egraph, m); init_egraph(*m_g); flet _strong(m_add_conflicts, false); - saturate_store(*m_g); + saturate(*m_g); } if (!m_kv) { m_kv = alloc(kv); init_kv(*m_g, *m_kv); } + // TODO: adapt to handle "const" arrays and multi-dimensional arrays. auto& kv = *m_kv; auto n = m_g->find(e)->get_root(); expr_ref r(n->get_expr(), m); diff --git a/src/ast/sls/sls_array_plugin.h b/src/ast/sls/sls_array_plugin.h index 4f8f051f4ce..01d177f4c3d 100644 --- a/src/ast/sls/sls_array_plugin.h +++ b/src/ast/sls/sls_array_plugin.h @@ -56,10 +56,15 @@ namespace sls { void init_egraph(euf::egraph& g); void init_kv(euf::egraph& g, kv& kv); - void saturate_store(euf::egraph& g); + void saturate(euf::egraph& g); + void saturate_store(euf::egraph& g, euf::enode* n); + void saturate_const(euf::egraph& g, euf::enode* n); + void saturate_map(euf::egraph& g, euf::enode* n); void force_store_axiom1(euf::egraph& g, euf::enode* n); void force_store_axiom2_down(euf::egraph& g, euf::enode* sto, euf::enode* sel); void force_store_axiom2_up(euf::egraph& g, euf::enode* sto, euf::enode* sel); + void force_const_axiom(euf::egraph& g, euf::enode* cn, euf::enode* sel); + void add_map_axiom(euf::egraph& g, euf::enode* n, euf::enode* sel); void add_store_axiom1(app* sto); void add_store_axiom2(app* sto, app* sel); bool are_distinct(euf::enode* a, euf::enode* b); diff --git a/src/ast/sls/sls_euf_plugin.cpp b/src/ast/sls/sls_euf_plugin.cpp index fa00cf4db72..3bb90aa0a06 100644 --- a/src/ast/sls/sls_euf_plugin.cpp +++ b/src/ast/sls/sls_euf_plugin.cpp @@ -15,7 +15,6 @@ Module Name: Todo: -- try incremental CC with backtracking for changing assignments - try determining plateau moves. - try generally a model rotation move. diff --git a/src/ast/sls/sls_seq_plugin.cpp b/src/ast/sls/sls_seq_plugin.cpp index c8715d4452e..89118cae2fc 100644 --- a/src/ast/sls/sls_seq_plugin.cpp +++ b/src/ast/sls/sls_seq_plugin.cpp @@ -600,9 +600,12 @@ namespace sls { VERIFY(m.is_eq(e, x, y)); IF_VERBOSE(3, verbose_stream() << is_true << ": " << mk_bounded_pp(e, m, 3) << "\n"); if (ctx.is_true(e)) { - return repair_down_str_eq_edit_distance(e); - if (ctx.rand(2) != 0) - return repair_down_str_eq_unify(e); + if (false && ctx.rand(2) != 0 && repair_down_str_eq_edit_distance_incremental(e)) + return true; + if (ctx.rand(2) != 0 && repair_down_str_eq_edit_distance(e)) + return true; + if (ctx.rand(2) != 0 && repair_down_str_eq_unify(e)) + return true; if (!is_value(x)) m_str_updates.push_back({ x, strval1(y), 1 }); if (!is_value(y)) @@ -645,12 +648,74 @@ namespace sls { if (a[i - 1] == b[j - 1]) d[i][j] = d[i - 1][j - 1]; else - d[i][j] = std::min(std::min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + 1); + d[i][j] = 1 + std::min(std::min(d[i - 1][j], d[i][j - 1]), d[i - 1][j - 1]); } } return d[n][m]; } + /** + * \brief edit distance with update calculation + */ + unsigned seq_plugin::edit_distance_with_updates(zstring const& a, bool_vector const& a_is_value, zstring const& b, bool_vector const& b_is_value) { + unsigned n = a.length(); + unsigned m = b.length(); + vector d(n + 1); // edit distance + vector u(n + 1); // edit distance with updates. + m_string_updates.reset(); + for (unsigned i = 0; i <= n; ++i) { + d[i].resize(m + 1, 0); + u[i].resize(m + 1, 0); + } + for (unsigned i = 0; i <= n; ++i) + d[i][0] = i, u[i][0] = i; + for (unsigned j = 0; j <= m; ++j) + d[0][j] = j, u[0][j] = j; + for (unsigned j = 1; j <= m; ++j) { + for (unsigned i = 1; i <= n; ++i) { + if (a[i - 1] == b[j - 1]) { + d[i][j] = d[i - 1][j - 1]; + u[i][j] = u[i - 1][j - 1]; + } + else { + u[i][j] = 1 + std::min(u[i - 1][j], std::min(u[i][j - 1], u[i - 1][j - 1])); + d[i][j] = 1 + std::min(d[i - 1][j], std::min(d[i][j - 1], d[i - 1][j - 1])); + + // TODO: take into account for a_is_value[i - 1] and b_is_value[j - 1] + // and whether index i-1, j-1 is at the boundary of an empty string variable. + + if (d[i - 1][j] < u[i][j] && !a_is_value[i - 1]) { + m_string_updates.reset(); + u[i][j] = d[i - 1][j]; + } + if (d[i][j - 1] < u[i][j] && !b_is_value[i - 1]) { + m_string_updates.reset(); + u[i][j] = d[i][j - 1]; + } + if (d[i - 1][j - 1] < u[i][j] && (!a_is_value[i - 1] || !b_is_value[j - 1])) { + m_string_updates.reset(); + u[i][j] = d[i - 1][j - 1]; + } + if (d[i - 1][j] == u[i][j] && !a_is_value[i - 1]) { + add_string_update(side::left, op::del, i - 1, 0); + add_string_update(side::left, op::add, j - 1, i - 1); + } + if (d[i][j - 1] == u[i][j] && !b_is_value[j - 1]) { + add_string_update(side::right, op::del, j - 1, 0); + add_string_update(side::right, op::add, i - 1, j - 1); + } + if (d[i - 1][j - 1] == u[i][j] && !a_is_value[i - 1]) + add_string_update(side::left, op::copy, j - 1, i - 1); + + if (d[i - 1][j - 1] == u[i][j] && !b_is_value[j - 1]) + add_string_update(side::right, op::copy, i - 1, j - 1); + + } + } + } + return u[n][m]; + } + void seq_plugin::add_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars) { for (auto x : w) { if (is_value(x)) @@ -700,6 +765,8 @@ namespace sls { break; } } + +#if 0 if (last_diff != 0) { unsigned index = last_diff; for (auto x : w) { @@ -715,6 +782,162 @@ namespace sls { index -= len_x; } } +#endif + } + + + bool seq_plugin::repair_down_str_eq_edit_distance_incremental(app* eq) { + auto const& L = lhs(eq); + auto const& R = rhs(eq); + zstring a, b; + bool_vector a_is_value, b_is_value; + + for (auto x : L) { + auto const& val = strval0(x); + auto len = val.length(); + auto is_val = is_value(x); + a += val; + for (unsigned i = 0; i < len; ++i) + a_is_value.push_back(is_val); + if (!is_val && len == 0 && !a_is_value.empty()) + a_is_value.back() = false; + } + + for (auto y : R) { + auto const& val = strval0(y); + auto len = val.length(); + auto is_val = is_value(y); + b += val; + for (unsigned i = 0; i < len; ++i) + b_is_value.push_back(is_val); + if (!is_val && len == 0 && !b_is_value.empty()) + b_is_value.back() = false; + } + + if (a == b) + return update(eq->get_arg(0), a) && update(eq->get_arg(1), b); + + unsigned diff = edit_distance_with_updates(a, a_is_value, b, b_is_value); + if (a.length() == 0) { + m_str_updates.push_back({ eq->get_arg(1), zstring(), 1 }); + m_str_updates.push_back({ eq->get_arg(0), zstring(b[0]), 1}); + m_str_updates.push_back({ eq->get_arg(0), zstring(b[b.length() - 1]), 1}); + } + if (b.length() == 0) { + m_str_updates.push_back({ eq->get_arg(0), zstring(), 1 }); + m_str_updates.push_back({ eq->get_arg(1), zstring(a[0]), 1 }); + m_str_updates.push_back({ eq->get_arg(1), zstring(a[a.length() - 1]), 1 }); + } + + verbose_stream() << "diff \"" << a << "\" \"" << b << "\" diff " << diff << " updates " << m_string_updates.size() << "\n"; +#if 1 + for (auto const& [side, op, i, j] : m_string_updates) { + switch (op) { + case op::del: + if (side == side::left) + verbose_stream() << "del " << a[i] << " @ " << i << " left\n"; + else + verbose_stream() << "del " << b[i] << " @ " << i << " right\n"; + break; + case op::add: + if (side == side::left) + verbose_stream() << "add " << b[i] << " @ " << j << " left\n"; + else + verbose_stream() << "add " << a[i] << " @ " << j << " right\n"; + break; + case op::copy: + if (side == side::left) + verbose_stream() << "copy " << b[i] << " @ " << j << " left\n"; + else + verbose_stream() << "copy " << a[i] << " @ " << j << " right\n"; + break; + } + } +#endif + for (auto& [side, op, i, j] : m_string_updates) { + if (op == op::del && side == side::left) { + for (auto x : L) { + + auto const& value = strval0(x); + if (i >= value.length()) + i -= value.length(); + else { + if (!is_value(x)) + m_str_updates.push_back({ x, value.extract(0, i) + value.extract(i + 1, value.length()), 1 }); + break; + } + } + } + else if (op == op::del && side == side::right) { + for (auto x : R) { + auto const& value = strval0(x); + if (i >= value.length()) + i -= value.length(); + else { + if (!is_value(x)) + m_str_updates.push_back({ x, value.extract(0, i) + value.extract(i + 1, value.length()), 1 }); + break; + } + } + } + else if (op == op::add && side == side::left) { + for (auto x : L) { + auto const& value = strval0(x); + //verbose_stream() << "add " << j << " " << value << " " << value.length() << " " << is_value(x) << "\n"; + if (j > value.length() || (j == value.length() && j > 0)) { + j -= value.length(); + continue; + } + if (!is_value(x)) + m_str_updates.push_back({ x, value.extract(0, j) + zstring(b[i]) + value.extract(j, value.length()), 1 }); + if (j < value.length()) + break; + } + } + else if (op == op::add && side == side::right) { + for (auto x : R) { + auto const& value = strval0(x); + //verbose_stream() << "add " << j << " " << value << " " << value.length() << " " << is_value(x) << "\n"; + if (j > value.length() || (j == value.length() && j > 0)) { + j -= value.length(); + continue; + } + if (!is_value(x)) + m_str_updates.push_back({ x, value.extract(0, j) + zstring(a[i]) + value.extract(j, value.length()), 1 }); + if (j < value.length()) + break; + } + } + else if (op == op::copy && side == side::left) { + for (auto x : L) { + auto const& value = strval0(x); + if (j >= value.length()) + j -= value.length(); + else { + if (!is_value(x)) + m_str_updates.push_back({ x, value.extract(0, j) + zstring(b[i]) + value.extract(j + 1, value.length()), 1 }); + break; + } + } + } + else if (op == op::copy && side == side::right) { + for (auto x : R) { + auto const& value = strval0(x); + if (j >= value.length()) + j -= value.length(); + else { + if (!is_value(x)) + m_str_updates.push_back({ x, value.extract(0, j) + zstring(a[i]) + value.extract(j + 1, value.length()), 1 }); + break; + } + } + } + } + verbose_stream() << "num updates " << m_str_updates.size() << "\n"; + bool r = apply_update(); + verbose_stream() << "apply update " << r << "\n"; + //VERIFY(r); + return r; } bool seq_plugin::repair_down_str_eq_edit_distance(app* eq) { @@ -734,7 +957,10 @@ namespace sls { b += strval0(y); } if (a == b) - return update(eq->get_arg(0), a) && update(eq->get_arg(1), b); + return update(eq->get_arg(0), a) && update(eq->get_arg(1), b); + + + unsigned diff = edit_distance(a, b); diff --git a/src/ast/sls/sls_seq_plugin.h b/src/ast/sls/sls_seq_plugin.h index ea021a2ce9e..7de3e888acf 100644 --- a/src/ast/sls/sls_seq_plugin.h +++ b/src/ast/sls/sls_seq_plugin.h @@ -72,6 +72,7 @@ namespace sls { bool repair_down_eq(app* e); bool repair_down_str_eq_unify(app* e); bool repair_down_str_eq_edit_distance(app* e); + bool repair_down_str_eq_edit_distance_incremental(app* e); bool repair_down_str_eq(app* e); bool repair_down_str_extract(app* e); bool repair_down_str_contains(expr* e); @@ -91,6 +92,20 @@ namespace sls { void repair_up_str_itos(app* e); void repair_up_str_stoi(app* e); + enum op { + add, del, copy + }; + enum side { + left, right + }; + struct string_update { + side side; + op op; + unsigned i, j; + }; + svector m_string_updates; + void add_string_update(side side, op op, unsigned i, unsigned j) { m_string_updates.push_back({ side, op, i, j }); } + unsigned edit_distance_with_updates(zstring const& a, bool_vector const& a_is_value, zstring const& b, bool_vector const& b_is_value); unsigned edit_distance(zstring const& a, zstring const& b); void add_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars); diff --git a/src/math/lp/lar_solver.cpp b/src/math/lp/lar_solver.cpp index 90c1e953762..eb4e16806d9 100644 --- a/src/math/lp/lar_solver.cpp +++ b/src/math/lp/lar_solver.cpp @@ -619,7 +619,7 @@ namespace lp { bool lar_solver::solve_for(unsigned j, lar_term& t, mpq& coeff) { t.clear(); - IF_VERBOSE(10, verbose_stream() << column_is_fixed(j) << " " << is_base(j) << "\n"); + IF_VERBOSE(10, verbose_stream() << "j " << j << " is fixed " << column_is_fixed(j) << " is-base " << is_base(j) << "\n"); if (column_is_fixed(j)) { coeff = get_value(j); return true; @@ -627,8 +627,8 @@ namespace lp { if (!is_base(j)) { for (const auto & c : A_r().m_columns[j]) { lpvar basic_in_row = r_basis()[c.var()]; - IF_VERBOSE(10, verbose_stream() << "c.var() = " << c.var() << " basic_in_row = " << basic_in_row << "\n"); pivot(j, basic_in_row); + IF_VERBOSE(10, verbose_stream() << "is base " << is_base(j) << " c.var() = " << c.var() << " basic_in_row = " << basic_in_row << "\n"); break; } } @@ -643,6 +643,8 @@ namespace lp { else t.add_monomial(-c.coeff(), c.var()); } + IF_VERBOSE(10, verbose_stream() << "j = " << j << " t = "; + print_term(t, verbose_stream()) << " coeff = " << coeff << "\n"); return true; }