From 0cc17c4fe1a4d1f445536ae15d079ee264829785 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 27 Mar 2026 22:33:25 +0000 Subject: [PATCH] Produce annotated pointer constants in incremental SMT2 traces When the incremental SMT2 decision procedure returns pointer values via get(), it now creates annotated_pointer_constant_exprt instead of plain constant_exprt. This enables the trace printer to display symbolic pointer expressions (like string literals and address-of expressions) instead of raw integer addresses. The pointer bit-vector is decomposed into object ID (high bits) and offset (low bits) using config.bv_encoding.object_bits. The object ID is looked up in the decision procedure's object_map to find the base expression, and get_subexpression_at_offset is used to construct the symbolic pointer, following the same approach as the SAT backend's pointer_logict::pointer_expr. A new overload of construct_value_expr_from_smt accepts the smt_object_mapt. The original overload without the object map continues to work unchanged for backward compatibility. Removes the no-new-smt tag from three regression tests that now pass with the incremental SMT2 backend: trace-strings, trace_address_arithmetic1, and xml-trace2. When string constants or other expressions are substituted with symbol_exprt identifiers during SMT conversion, the object_map tracks the substituted symbol rather than the original expression. This caused pointer values in traces to display as 'array_0' instead of the original string literal like '"abc"'. Add expression_identifiers parameter to construct_value_expr_from_smt to enable reverse lookup from substituted symbol names back to original expressions. In build_annotated_pointer, when the base expression from the object_map is a symbol_exprt, check if it matches any identifier in expression_identifiers and use the original expression for the symbolic pointer display. Passes all 170 smt2_incremental unit tests (1040 assertions) and the trace-strings, trace_address_arithmetic1, and xml-trace2 regression tests with both SAT and incremental SMT2 backends. When get_subexpression_at_offset fails, return nil_exprt{} instead of a default-constructed exprt{} so that the is_nil() check in build_annotated_pointer correctly detects the failure and falls back to a plain constant_exprt. Fixes: #8067 --- regression/cbmc/trace-strings/test.desc | 2 +- .../cbmc/trace_address_arithmetic1/test.desc | 2 +- regression/cbmc/xml-trace2/test.desc | 2 +- .../construct_value_expr_from_smt.cpp | 229 +++++++++++++++++- .../construct_value_expr_from_smt.h | 48 +++- .../smt2_incremental_decision_procedure.cpp | 6 +- .../construct_value_expr_from_smt.cpp | 169 +++++++++++++ 7 files changed, 451 insertions(+), 7 deletions(-) diff --git a/regression/cbmc/trace-strings/test.desc b/regression/cbmc/trace-strings/test.desc index 1abee971520..59766b4c193 100644 --- a/regression/cbmc/trace-strings/test.desc +++ b/regression/cbmc/trace-strings/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE main.c --trace ^EXIT=10$ diff --git a/regression/cbmc/trace_address_arithmetic1/test.desc b/regression/cbmc/trace_address_arithmetic1/test.desc index 1f934bfacaa..e66bc70c19f 100644 --- a/regression/cbmc/trace_address_arithmetic1/test.desc +++ b/regression/cbmc/trace_address_arithmetic1/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE main.c --trace ^EXIT=10$ diff --git a/regression/cbmc/xml-trace2/test.desc b/regression/cbmc/xml-trace2/test.desc index ce9def1a804..18273276e8f 100644 --- a/regression/cbmc/xml-trace2/test.desc +++ b/regression/cbmc/xml-trace2/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE main.c --xml-ui \{ 14, 0 \} diff --git a/src/solvers/smt2_incremental/construct_value_expr_from_smt.cpp b/src/solvers/smt2_incremental/construct_value_expr_from_smt.cpp index d9d3b7a1ce0..8aac0b1d000 100644 --- a/src/solvers/smt2_incremental/construct_value_expr_from_smt.cpp +++ b/src/solvers/smt2_incremental/construct_value_expr_from_smt.cpp @@ -1,9 +1,13 @@ // Author: Diffblue Ltd. #include +#include #include +#include #include #include +#include +#include #include #include #include @@ -11,17 +15,96 @@ #include #include +/// \brief Build a symbolic pointer expression from the given +/// \p object_expr, \p offset, and target pointer \p type, following the +/// same approach as pointer_logict::pointer_expr in pointer_logic.cpp. +static exprt build_pointer_expr( + const exprt &object_expr, + const mp_integer &offset, + const pointer_typet &type, + const namespacet &ns) +{ + typet subtype = type.base_type(); + // Void pointers with an offset are treated as char pointers, matching + // GCC behavior for pointer arithmetic on void pointers. + if(subtype.id() == ID_empty) + subtype = char_type(); + if(object_expr.id() == ID_string_constant) + { + subtype = object_expr.type(); + // A string constant must be array-typed with fixed size. + const array_typet &array_type = to_array_type(object_expr.type()); + mp_integer array_size = + numeric_cast_v(to_constant_expr(array_type.size())); + if(array_size > offset) + { + to_array_type(subtype).size() = + from_integer(array_size - offset, array_type.size().type()); + } + } + auto deep_object_opt = + get_subexpression_at_offset(object_expr, offset, subtype, ns); + if(!deep_object_opt.has_value()) + return nil_exprt{}; + exprt deep_object = deep_object_opt.value(); + simplify(deep_object, ns); + if( + deep_object.id() != ID_byte_extract_little_endian && + deep_object.id() != ID_byte_extract_big_endian) + { + return typecast_exprt::conditional_cast( + address_of_exprt(deep_object), type); + } + + const byte_extract_exprt &be = to_byte_extract_expr(deep_object); + const address_of_exprt base(be.op()); + if(be.offset() == 0) + return typecast_exprt::conditional_cast(base, type); + + const auto object_size = pointer_offset_size(be.op().type(), ns); + if(object_size.has_value() && *object_size <= 1) + { + return typecast_exprt::conditional_cast( + plus_exprt(base, from_integer(offset, pointer_diff_type())), type); + } + else if(object_size.has_value() && offset % *object_size == 0) + { + return typecast_exprt::conditional_cast( + plus_exprt( + base, from_integer(offset / *object_size, pointer_diff_type())), + type); + } + else + { + return typecast_exprt::conditional_cast( + plus_exprt( + typecast_exprt(base, pointer_type(char_type())), + from_integer(offset, pointer_diff_type())), + type); + } +} + class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort { private: const typet &type_to_construct; const namespacet &ns; + const smt_object_mapt *object_map; + const std::unordered_map + *expression_identifiers; std::optional result; explicit value_expr_from_smt_factoryt( const typet &type_to_construct, - const namespacet &ns) - : type_to_construct{type_to_construct}, ns{ns}, result{} + const namespacet &ns, + const smt_object_mapt *object_map = nullptr, + const std::unordered_map + *expression_identifiers = nullptr) + : type_to_construct{type_to_construct}, + ns{ns}, + object_map{object_map}, + expression_identifiers{expression_identifiers}, + result{} { } @@ -53,6 +136,10 @@ class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort { result = null_pointer_exprt{*pointer_type}; } + else if(object_map) + { + result = build_annotated_pointer(bit_vector_constant, *pointer_type); + } else { // The reason we are manually constructing a constant_exprt here is a @@ -97,6 +184,91 @@ class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort type_to_construct.pretty()); } + /// \brief Build an annotated pointer constant from a bit-vector value + /// using the object map to reconstruct the symbolic pointer expression. + exprt build_annotated_pointer( + const smt_bit_vector_constant_termt &bit_vector_constant, + const pointer_typet &pointer_type) + { + const auto sort_width = bit_vector_constant.get_sort().bit_width(); + const irep_idt bvrep = + integer2bvrep(bit_vector_constant.value(), sort_width); + const std::size_t object_bits = config.bv_encoding.object_bits; + const std::size_t pointer_width = pointer_type.get_width(); + INVARIANT( + pointer_width > object_bits, + "Pointer width should be wider than object bits."); + const std::size_t offset_bits = pointer_width - object_bits; + + // Extract object ID from high bits. + const mp_integer object_id = bit_vector_constant.value() >> offset_bits; + + // Extract offset from low bits, sign-extending. + mp_integer offset = bit_vector_constant.value() % power(2, offset_bits); + if(offset >= power(2, offset_bits - 1)) + offset -= power(2, offset_bits); + + // Null object with nonzero offset. + if(object_id == 0) + { + null_pointer_exprt null_ptr(pointer_type); + exprt symbolic = + plus_exprt(null_ptr, from_integer(offset, pointer_diff_type())); + return annotated_pointer_constant_exprt{bvrep, symbolic}; + } + + // Build reverse map from unique_id to base_expression. + std::unordered_map + id_to_object; + for(const auto &entry : *object_map) + id_to_object.emplace(entry.second.unique_id, &entry.second); + + const auto it = id_to_object.find(numeric_cast_v(object_id)); + if(it == id_to_object.end()) + { + // Unknown object - fall back to plain constant. + return constant_exprt{bvrep, pointer_type}; + } + + const decision_procedure_objectt &object = *it->second; + + // Invalid pointer object. + if(object.base_expression == make_invalid_pointer_expr()) + return annotated_pointer_constant_exprt{ + bvrep, constant_exprt("INVALID", pointer_type)}; + + // Resolve the base expression through the expression_identifiers + // reverse map. When string constants or other expressions are + // substituted with symbol_exprt identifiers during SMT conversion, + // the object_map tracks the substituted symbol rather than the + // original expression. We reverse that lookup here to recover the + // original expression for display in traces. + exprt resolved_expr = object.base_expression; + if( + expression_identifiers && + expr_try_dynamic_cast(object.base_expression)) + { + const auto &sym = to_symbol_expr(object.base_expression); + for(const auto &entry : *expression_identifiers) + { + if(entry.second.identifier() == sym.get_identifier()) + { + resolved_expr = entry.first; + break; + } + } + } + + exprt symbolic = + build_pointer_expr(resolved_expr, offset, pointer_type, ns); + if(symbolic.is_nil()) + { + // Could not build symbolic expression - fall back. + return constant_exprt{bvrep, pointer_type}; + } + return annotated_pointer_constant_exprt{bvrep, symbolic}; + } + void visit(const smt_function_application_termt &function_application) override { @@ -131,6 +303,37 @@ class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort INVARIANT(factory.result.has_value(), "Factory must result in expr value."); return *factory.result; } + + /// \brief Overload accepting an object map for annotated pointer + /// construction. + static exprt make( + const smt_termt &value_term, + const typet &type_to_construct, + const namespacet &ns, + const smt_object_mapt &object_map) + { + value_expr_from_smt_factoryt factory{type_to_construct, ns, &object_map}; + value_term.accept(factory); + INVARIANT(factory.result.has_value(), "Factory must result in expr value."); + return *factory.result; + } + + /// \brief Overload accepting both an object map and expression identifiers + /// for resolving substituted expressions in pointer annotation. + static exprt make( + const smt_termt &value_term, + const typet &type_to_construct, + const namespacet &ns, + const smt_object_mapt &object_map, + const std::unordered_map + &expression_identifiers) + { + value_expr_from_smt_factoryt factory{ + type_to_construct, ns, &object_map, &expression_identifiers}; + value_term.accept(factory); + INVARIANT(factory.result.has_value(), "Factory must result in expr value."); + return *factory.result; + } }; exprt construct_value_expr_from_smt( @@ -140,3 +343,25 @@ exprt construct_value_expr_from_smt( { return value_expr_from_smt_factoryt::make(value_term, type_to_construct, ns); } + +exprt construct_value_expr_from_smt( + const smt_termt &value_term, + const typet &type_to_construct, + const namespacet &ns, + const smt_object_mapt &object_map) +{ + return value_expr_from_smt_factoryt::make( + value_term, type_to_construct, ns, object_map); +} + +exprt construct_value_expr_from_smt( + const smt_termt &value_term, + const typet &type_to_construct, + const namespacet &ns, + const smt_object_mapt &object_map, + const std::unordered_map + &expression_identifiers) +{ + return value_expr_from_smt_factoryt::make( + value_term, type_to_construct, ns, object_map, expression_identifiers); +} diff --git a/src/solvers/smt2_incremental/construct_value_expr_from_smt.h b/src/solvers/smt2_incremental/construct_value_expr_from_smt.h index 1dfa442665e..43f07a2fb50 100644 --- a/src/solvers/smt2_incremental/construct_value_expr_from_smt.h +++ b/src/solvers/smt2_incremental/construct_value_expr_from_smt.h @@ -5,7 +5,9 @@ #include -class smt_termt; +#include +#include + class typet; /// \brief Given a \p value_term and a \p type_to_construct, this function @@ -32,4 +34,48 @@ exprt construct_value_expr_from_smt( const typet &type_to_construct, const namespacet &ns); +/// \brief Overload that accepts an \p object_map for constructing annotated +/// pointer constants. When the type to construct is a pointer type, the +/// object map is used to reconstruct a symbolic pointer expression from the +/// encoded object ID and offset in the bit-vector value. +/// \param value_term +/// The SMT term encoding the value. +/// \param type_to_construct +/// The type which the constructed expr returned is expected to have. +/// \param ns +/// The namespace for type lookups. +/// \param object_map +/// Map from base expressions to tracked object information, used to +/// reconstruct symbolic pointer expressions for trace display. +exprt construct_value_expr_from_smt( + const smt_termt &value_term, + const typet &type_to_construct, + const namespacet &ns, + const smt_object_mapt &object_map); + +/// \brief Overload that additionally accepts \p expression_identifiers to +/// reverse identifier substitutions for pointer annotation. When the object +/// map contains a symbol_exprt that was introduced by identifier substitution +/// (e.g., for string constants mapped to array functions), this overload +/// recovers the original expression for symbolic pointer display. +/// \param value_term +/// The SMT term encoding the value. +/// \param type_to_construct +/// The type which the constructed expr returned is expected to have. +/// \param ns +/// The namespace for type lookups. +/// \param object_map +/// Map from base expressions to tracked object information. +/// \param expression_identifiers +/// Map from original expressions to their SMT identifier terms, as used +/// during expression-to-SMT conversion. This is used to reverse the +/// substitution and recover the original expression for trace display. +exprt construct_value_expr_from_smt( + const smt_termt &value_term, + const typet &type_to_construct, + const namespacet &ns, + const smt_object_mapt &object_map, + const std::unordered_map + &expression_identifiers); + #endif // CPROVER_SOLVERS_SMT2_INCREMENTAL_CONSTRUCT_VALUE_EXPR_FROM_SMT_H diff --git a/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp b/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp index 0d8a91a6f89..1d42f69114c 100644 --- a/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp +++ b/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp @@ -516,7 +516,11 @@ std::optional smt2_incremental_decision_proceduret::get_expr( response.pretty()}; } return construct_value_expr_from_smt( - get_value_response->pairs()[0].get().value(), type, ns); + get_value_response->pairs()[0].get().value(), + type, + ns, + object_map, + expression_identifiers); } // This is a fall back which builds resulting expression based on getting the diff --git a/unit/solvers/smt2_incremental/construct_value_expr_from_smt.cpp b/unit/solvers/smt2_incremental/construct_value_expr_from_smt.cpp index 76cedc3115e..b3773d487d4 100644 --- a/unit/solvers/smt2_incremental/construct_value_expr_from_smt.cpp +++ b/unit/solvers/smt2_incremental/construct_value_expr_from_smt.cpp @@ -3,13 +3,16 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -206,3 +209,169 @@ TEST_CASE( invariant_failure_containing(invariant_reason)); } } + +TEST_CASE( + "Annotated pointer constant from smt with object map.", + "[core][smt2_incremental]") +{ + // Use default object_bits (8) with 64-bit pointers. + // Encoding: [object_id (8 bits) | offset (56 bits)] + const std::size_t object_bits = config.bv_encoding.object_bits; + const std::size_t pointer_width = 64; + const std::size_t offset_bits = pointer_width - object_bits; + const pointer_typet ptr_type{empty_typet{}, pointer_width}; + symbol_tablet symbol_table; + const namespacet ns{symbol_table}; + + SECTION("Null pointer with object map produces null_pointer_exprt") + { + const smt_object_mapt object_map = initial_smt_object_map(); + const smt_bit_vector_constant_termt zero_term{0, pointer_width}; + const exprt result = + construct_value_expr_from_smt(zero_term, ptr_type, ns, object_map); + REQUIRE(result == null_pointer_exprt{ptr_type}); + } + + SECTION("Known symbol object produces annotated_pointer_constant_exprt") + { + smt_object_mapt object_map = initial_smt_object_map(); + const symbol_exprt sym{"my_var", signedbv_typet{32}}; + decision_procedure_objectt obj; + obj.base_expression = sym; + obj.unique_id = 2; + obj.size = from_integer(4, size_type()); + obj.is_dynamic = false; + object_map.emplace(sym, obj); + + // Encode pointer: object_id=2 in high bits, offset=0 in low bits. + const mp_integer encoded_value = mp_integer{2} << offset_bits; + const smt_bit_vector_constant_termt term{encoded_value, pointer_width}; + const pointer_typet sym_ptr_type{signedbv_typet{32}, pointer_width}; + const exprt result = + construct_value_expr_from_smt(term, sym_ptr_type, ns, object_map); + REQUIRE(can_cast_expr(result)); + const auto &annotated = to_annotated_pointer_constant_expr(result); + // The symbolic pointer is address_of(my_var) possibly wrapped in a + // typecast to the correct pointer width. + const exprt expected_symbolic = + typecast_exprt::conditional_cast(address_of_exprt(sym), sym_ptr_type); + REQUIRE(annotated.symbolic_pointer() == expected_symbolic); + } + + SECTION("Invalid pointer object produces annotated constant with INVALID") + { + const smt_object_mapt object_map = initial_smt_object_map(); + // Invalid object has unique_id=1. + const mp_integer encoded_value = mp_integer{1} << offset_bits; + const smt_bit_vector_constant_termt term{encoded_value, pointer_width}; + const exprt result = + construct_value_expr_from_smt(term, ptr_type, ns, object_map); + REQUIRE(can_cast_expr(result)); + const auto &annotated = to_annotated_pointer_constant_expr(result); + REQUIRE( + annotated.symbolic_pointer() == constant_exprt("INVALID", ptr_type)); + } + + SECTION("Unknown object ID falls back to plain constant_exprt") + { + const smt_object_mapt object_map = initial_smt_object_map(); + // Use object_id=99 which does not exist in the map. + const mp_integer encoded_value = mp_integer{99} << offset_bits; + const smt_bit_vector_constant_termt term{encoded_value, pointer_width}; + const exprt result = + construct_value_expr_from_smt(term, ptr_type, ns, object_map); + REQUIRE(result.id() == ID_constant); + REQUIRE(result.type() == ptr_type); + } + + SECTION("Original overload without object map still works") + { + // Non-zero pointer without object_map returns plain constant_exprt. + const smt_bit_vector_constant_termt term{12, pointer_width}; + const exprt result = construct_value_expr_from_smt(term, ptr_type, ns); + REQUIRE( + result == constant_exprt(integer2bvrep(12, pointer_width), ptr_type)); + } +} + +TEST_CASE( + "Reverse expression_identifiers lookup for substituted expressions.", + "[core][smt2_incremental]") +{ + // Simulates the scenario where an expression (e.g., a string constant) + // was substituted with a symbol_exprt during SMT conversion, and the + // object_map tracks the substituted symbol_exprt as the base expression. + // The reverse lookup through expression_identifiers should recover the + // original expression for symbolic pointer display in traces. + const std::size_t object_bits = config.bv_encoding.object_bits; + const std::size_t pointer_width = 64; + const std::size_t offset_bits = pointer_width - object_bits; + symbol_tablet symbol_table; + const namespacet ns{symbol_table}; + + // Use a symbol_exprt as the "original" expression that was substituted. + // In practice this would be a string_constantt, but we use a symbol + // with a known type to avoid depending on config for type widths. + const signedbv_typet value_type{32}; + const symbol_exprt original_expr{"my_original_sym", value_type}; + const symbol_exprt substituted_sym{"array_0", value_type}; + + // Build expression_identifiers: original -> smt_identifier_termt + const smt_identifier_termt smt_id{ + "array_0", smt_bit_vector_sortt{pointer_width}}; + std::unordered_map + expression_identifiers; + expression_identifiers.emplace(original_expr, smt_id); + + // Build object_map with the substituted symbol as base expression. + smt_object_mapt object_map = initial_smt_object_map(); + decision_procedure_objectt obj; + obj.base_expression = substituted_sym; + obj.unique_id = 2; + obj.size = from_integer(4, signedbv_typet{64}); + obj.is_dynamic = false; + object_map.emplace(substituted_sym, obj); + + // Encode pointer: object_id=2 in high bits, offset=0 in low bits. + const mp_integer encoded_value = mp_integer{2} << offset_bits; + const smt_bit_vector_constant_termt term{encoded_value, pointer_width}; + const pointer_typet ptr_type{value_type, pointer_width}; + + SECTION("With expression_identifiers, resolves original expression") + { + const exprt result = construct_value_expr_from_smt( + term, ptr_type, ns, object_map, expression_identifiers); + REQUIRE(can_cast_expr(result)); + const auto &annotated = to_annotated_pointer_constant_expr(result); + // The symbolic pointer should reference the original expression, + // not the substituted symbol_exprt("array_0"). + const exprt &sym_ptr = annotated.symbolic_pointer(); + bool has_original = false; + sym_ptr.visit_pre( + [&](const exprt &node) + { + if(node == original_expr) + has_original = true; + }); + REQUIRE(has_original); + } + + SECTION("Without expression_identifiers, uses substituted symbol") + { + const exprt result = + construct_value_expr_from_smt(term, ptr_type, ns, object_map); + REQUIRE(can_cast_expr(result)); + const auto &annotated = to_annotated_pointer_constant_expr(result); + // Without the reverse map, the result references the substituted + // symbol_exprt("array_0"). + const exprt &sym_ptr = annotated.symbolic_pointer(); + bool has_original = false; + sym_ptr.visit_pre( + [&](const exprt &node) + { + if(node == original_expr) + has_original = true; + }); + REQUIRE_FALSE(has_original); + } +}