From 260ca61d94134b6743807e29f64b5ce4f6918d73 Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Tue, 10 Mar 2026 12:27:34 -0400 Subject: [PATCH 1/3] Rewrite GNU3 demangler for performance using DemangledTypeNode Replace the TypeBuilder-based demangling path with a lightweight DemangledTypeNode representation that defers type object construction until the symbol is fully parsed. This avoids repeated heap allocation and ref-count churn during recursive descent. Key changes: - Add DemangledTypeNode / demangled_type_node.{h,cpp}: a compact IR that mirrors the type grammar without allocating BN Type objects - Use a thread_local demangler instance to amortize vector allocations across calls - Also commonize some of the demangled string length calculations. Result: ~3x throughput improvement on a 180K-symbol corpus with 97.7% success rate (matching the previous implementation). Co-Authored-By: Claude Opus 4.6 --- demangler/gnu3/demangle_gnu3.cpp | 821 +++++++++++++------------ demangler/gnu3/demangle_gnu3.h | 136 ++-- demangler/gnu3/demangled_type_node.cpp | 531 ++++++++++++++++ demangler/gnu3/demangled_type_node.h | 171 +++++ 4 files changed, 1210 insertions(+), 449 deletions(-) create mode 100644 demangler/gnu3/demangled_type_node.cpp create mode 100644 demangler/gnu3/demangled_type_node.h diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index d36fcedf1..c8fec64fb 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -49,8 +49,7 @@ void MyLogDebug(string fmt, ...) #else #define indent() #define dedent() -void MyLogDebug(string fmt, ...) -{ (void)fmt; } +#define MyLogDebug(...) do {} while(0) #endif static inline void rtrim(string &s) @@ -59,78 +58,218 @@ static inline void rtrim(string &s) } -static string GetTemplateString(vector args) +static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) +{ + size_t n = 0; + for (const auto& s : v) + n += s.size(); + return n; +} + + +static string GetTemplateString(const vector& args) { - string name = "<"; + // Pre-calculate total length to avoid reallocations + size_t total = 2; // "<" + ">" for (size_t i = 0; i < args.size(); i++) { if (i != 0) - { - name += ", "; - } + total += 2; // ", " + total += args[i].size(); + } + total += 1; // possible " " before ">" - name += args[i].name; + string name; + name.reserve(total); + name += '<'; + for (size_t i = 0; i < args.size(); i++) + { + if (i != 0) + name += ", "; + name += args[i]; } rtrim(name); if (name.back() == '>') name += " "; //Be c++03 compliant where we can - name += ">"; + name += '>'; return name; } -static void ExtendTypeName(TypeBuilder& type, const string& extend) +static string GetOperator(char elm1, char elm2) { - QualifiedName qn = type.GetTypeName(); - if (qn.StringSize() + extend.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); - if (qn.size() > 0) - qn.back() += extend; - else - qn.push_back(extend); - - // This type might not be an NTR (Vector35/binaryninja-api#6261) - if (type.GetClass() == NamedTypeReferenceClass) + switch (hash(elm1, elm2)) { - type.SetNamedTypeReference( - NamedTypeReference::GenerateAutoDemangledTypeReference(type.GetNamedTypeReference()->GetTypeReferenceClass(), qn) - ); + case hash('d','c'): return "dynamic_cast"; + case hash('s','c'): return "static_cast"; + case hash('c','c'): return "const_cast"; + case hash('r','c'): return "reinterpret_cast"; + case hash('t','i'): return "typeid"; + case hash('t','e'): return "typeid"; + case hash('s','t'): return "sizeof"; + case hash('s','z'): return "sizeof"; + case hash('a','t'): return "alignof"; + case hash('a','z'): return "alignof"; + case hash('n','x'): return "noexcept"; + case hash('s','Z'): return "sizeof..."; + case hash('s','P'): return "sizeof..."; + case hash('s','p'): return ""; + case hash('t','w'): return "throw"; + case hash('t','r'): return "throw"; + case hash('l','s'): return "<<"; // << + case hash('r','s'): return ">>"; // >> + case hash('a','S'): return "="; // = + case hash('n','t'): return "!"; // ! + case hash('e','q'): return "=="; // == + case hash('n','e'): return "!="; // != + case hash('i','x'): return "[]"; // [] + case hash('d','t'): return "."; // . + case hash('p','t'): return "->"; // -> + case hash('m','l'): return "*"; // * + case hash('p','p'): return "++"; // ++ (postfix in context) + case hash('m','m'): return "--"; // -- (postfix in context) + case hash('n','g'): return "-"; // - (unary) + case hash('m','i'): return "-"; // - + case hash('p','s'): return "+"; // + (unary) + case hash('p','l'): return "+"; // + + case hash('a','d'): return "&"; // & (unary) + case hash('a','n'): return "&"; // & + case hash('p','m'): return "->*"; // ->* + case hash('d','v'): return "/"; // / + case hash('r','m'): return "%"; // % + case hash('l','t'): return "<"; // < + case hash('l','e'): return "<="; // <= + case hash('g','t'): return ">"; // > + case hash('g','e'): return ">="; // >= + case hash('c','m'): return ","; // , + case hash('c','l'): return "()"; // () + case hash('c','o'): return "~"; // ~ + case hash('e','o'): return "^"; // ^ + case hash('o','r'): return "|"; // | + case hash('a','a'): return "&&"; // && + case hash('o','o'): return "||"; // || + case hash('d','e'): return "*"; // * (unary) + case hash('m','L'): return "*="; // *= + case hash('p','L'): return "+="; // += + case hash('m','I'): return "-="; // -= + case hash('d','V'): return "/="; // /= + case hash('r','M'): return "%="; // %= + case hash('r','S'): return ">>="; // >>= + case hash('l','S'): return "<<="; // <<= + case hash('a','N'): return "&="; // &= + case hash('o','R'): return "|="; // |= + case hash('e','O'): return "^="; // ^= + case hash('d','l'): return "delete"; // delete + case hash('d','a'): return "delete[]"; // delete[] + case hash('n','w'): return "new"; // new + case hash('n','a'): return "new[]"; // new [] + default: return ""; } } - -static TypeBuilder CreateUnknownType(const QualifiedName& s) +static BNNameType GetNameType(char elm1, char elm2) { - return TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, s)); + switch (hash(elm1, elm2)) + { + case hash('n','t'): return OperatorNotNameType; // ! + case hash('n','g'): return OperatorMinusNameType; // - (unary) + case hash('p','s'): return OperatorPlusNameType; // + (unary) + case hash('a','d'): return OperatorBitAndNameType; // & (unary) + case hash('d','e'): return OperatorStarNameType; // * (unary) + case hash('i','x'): return OperatorArrayNameType; // [] + case hash('p','p'): return OperatorIncrementNameType; // ++ (postfix in context) + case hash('m','m'): return OperatorDecrementNameType; // -- (postfix in context) + case hash('l','s'): return OperatorLeftShiftNameType; // << + case hash('r','s'): return OperatorRightShiftNameType; // >> + case hash('a','S'): return OperatorAssignNameType; // = + case hash('e','q'): return OperatorEqualNameType; // == + case hash('n','e'): return OperatorNotEqualNameType; // != + case hash('p','t'): return OperatorArrowNameType; // -> + case hash('m','l'): return OperatorStarNameType; // * + case hash('m','i'): return OperatorMinusNameType; // - + case hash('p','l'): return OperatorPlusNameType; // + + case hash('a','n'): return OperatorBitAndNameType; // & + case hash('p','m'): return OperatorArrowStarNameType; // ->* + case hash('d','v'): return OperatorDivideNameType; // / + case hash('r','m'): return OperatorModulusNameType; // % + case hash('l','t'): return OperatorLessThanNameType; // < + case hash('l','e'): return OperatorLessThanEqualNameType; // <= + case hash('g','t'): return OperatorGreaterThanNameType; // > + case hash('g','e'): return OperatorGreaterThanEqualNameType; // >= + case hash('c','m'): return OperatorCommaNameType; // , + case hash('c','l'): return OperatorParenthesesNameType; // () + case hash('c','o'): return OperatorTildeNameType; // ~ + case hash('e','o'): return OperatorXorNameType; // ^ + case hash('o','r'): return OperatorBitOrNameType; // | + case hash('a','a'): return OperatorLogicalAndNameType; // && + case hash('o','o'): return OperatorLogicalOrNameType; // || + case hash('m','L'): return OperatorStarEqualNameType; // *= + case hash('p','L'): return OperatorPlusEqualNameType; // += + case hash('m','I'): return OperatorMinusEqualNameType; // -= + case hash('d','V'): return OperatorDivideEqualNameType; // /= + case hash('r','M'): return OperatorModulusEqualNameType; // %= + case hash('r','S'): return OperatorRightShiftEqualNameType; // >>= + case hash('l','S'): return OperatorLeftShiftEqualNameType; // <<= + case hash('a','N'): return OperatorAndEqualNameType; // &= + case hash('o','R'): return OperatorOrEqualNameType; // |= + case hash('e','O'): return OperatorXorEqualNameType; // ^= + case hash('d','l'): return OperatorDeleteNameType; // delete + case hash('d','a'): return OperatorDeleteArrayNameType; // delete[] + case hash('n','w'): return OperatorNewNameType; // new + case hash('n','a'): return OperatorNewArrayNameType; // new [] + case hash('C','1'): return ConstructorNameType; + case hash('C','2'): return ConstructorNameType; + case hash('C','3'): return ConstructorNameType; + case hash('C','4'): return ConstructorNameType; + case hash('C','5'): return ConstructorNameType; + case hash('D','0'): return DestructorNameType; + case hash('D','1'): return DestructorNameType; + case hash('D','2'): return DestructorNameType; + case hash('D','3'): return DestructorNameType; + case hash('D','4'): return DestructorNameType; + case hash('D','5'): return DestructorNameType; + default: + return NoNameType; + } } - -static TypeBuilder CreateUnknownType(const string& s) +static int8_t HexToDec(char c) { - return TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, {s})); + if (isdigit(c)) + { + return c - '0'; + } + else if(islower(c) && c <= 'f') + { + return c - 'a' + 10; + } + return -1; } -DemangleGNU3::Reader::Reader(const string& data): m_data(data), m_offset(0) + +// ===== Reader implementation (non-templated) ===== + +DemangleGNU3Reader::DemangleGNU3Reader(const string& data): m_data(data), m_offset(0) {} -string DemangleGNU3::Reader::PeekString(size_t count) +void DemangleGNU3Reader::Reset(const string& data) { - if (count > Length()) - return "\0"; - return m_data.substr(m_offset, count); + m_data = data; + m_offset = 0; } -char DemangleGNU3::Reader::Peek() +string DemangleGNU3Reader::PeekString(size_t count) { - if (1 > Length()) - return '\0'; - return (char)m_data[m_offset]; + if (count > Length()) + return "\0"; + return m_data.substr(m_offset, count); } -bool DemangleGNU3::Reader::NextIsOneOf(const string& list) + +bool DemangleGNU3Reader::NextIsOneOf(const string& list) { char elm = Peek(); for (auto a : list) @@ -142,21 +281,14 @@ bool DemangleGNU3::Reader::NextIsOneOf(const string& list) } -string DemangleGNU3::Reader::GetRaw() +string DemangleGNU3Reader::GetRaw() { return m_data.substr(m_offset); } -char DemangleGNU3::Reader::Read() -{ - if (1 > Length()) - throw DemangleException(); - return m_data[m_offset++]; -} - -string DemangleGNU3::Reader::ReadString(size_t count) +string DemangleGNU3Reader::ReadString(size_t count) { if (count > Length()) throw DemangleException(); @@ -167,7 +299,7 @@ string DemangleGNU3::Reader::ReadString(size_t count) } -string DemangleGNU3::Reader::ReadUntil(char sentinal) +string DemangleGNU3Reader::ReadUntil(char sentinal) { size_t pos = m_data.find_first_of(sentinal, m_offset); if (pos == string::npos) @@ -176,46 +308,74 @@ string DemangleGNU3::Reader::ReadUntil(char sentinal) } -void DemangleGNU3::Reader::UnRead(size_t count) + +// ===== DemangleGNU3 implementation ===== + +DemangleGNU3::DemangleGNU3(Architecture* arch, const string& mangledName) : + m_reader(mangledName), + m_arch(arch), + m_isParameter(false), + m_shouldDeleteReader(true), + m_topLevel(true), + m_isOperatorOverload(false) { - if (count <= m_offset) - m_offset -= count; + MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); } -void DemangleGNU3::Reader::Consume(size_t count) +void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) { - if (count > Length()) - throw DemangleException(); - m_offset += count; + m_reader.Reset(mangledName); + m_arch = arch; + m_varName.clear(); + m_substitute.clear(); + m_templateSubstitute.clear(); + m_functionSubstitute.clear(); + m_lastName.clear(); + m_nameType = {}; + m_localType = {}; + m_hasReturnType = {}; + m_isParameter = false; + m_shouldDeleteReader = true; + m_topLevel = true; + m_isOperatorOverload = false; } -size_t DemangleGNU3::Reader::Length() const +DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) { - return m_data.length() - m_offset; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, s); } -DemangleGNU3::DemangleGNU3(Architecture* arch, const string& mangledName) : - m_reader(mangledName), - m_arch(arch), - m_isParameter(false), - m_shouldDeleteReader(true), - m_topLevel(true), - m_isOperatorOverload(false) +DemangledTypeNode DemangleGNU3::CreateUnknownType(const string& s) { - MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{s}); +} + + +void DemangleGNU3::ExtendTypeName(DemangledTypeNode& type, const string& extend) +{ + if (type.NameStringSize() + extend.size() > MAX_DEMANGLE_LENGTH) + throw DemangleException("Detected adversarial mangled string"); + + { + auto& qn = type.GetMutableTypeName(); + if (qn.size() > 0) + qn.back() += extend; + else + qn.push_back(extend); + } } -void DemangleGNU3::PushTemplateType(TypeBuilder type) +void DemangleGNU3::PushTemplateType(const DemangledTypeNode& type) { m_templateSubstitute.push_back(type); } -const TypeBuilder& DemangleGNU3::GetTemplateType(size_t ref) +const DemangledTypeNode& DemangleGNU3::GetTemplateType(size_t ref) { if (ref >= m_templateSubstitute.size()) { @@ -226,13 +386,13 @@ const TypeBuilder& DemangleGNU3::GetTemplateType(size_t ref) } -void DemangleGNU3::PushType(TypeBuilder type) +void DemangleGNU3::PushType(const DemangledTypeNode& type) { m_substitute.push_back(type); } -const TypeBuilder& DemangleGNU3::GetType(size_t ref) +const DemangledTypeNode& DemangleGNU3::GetType(size_t ref) { if (ref >= m_substitute.size()) { @@ -287,7 +447,7 @@ string DemangleGNU3::DemangleSourceName() } -TypeBuilder DemangleGNU3::DemangleFunction(bool cnst, bool vltl) +DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) { indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -298,26 +458,26 @@ TypeBuilder DemangleGNU3::DemangleFunction(bool cnst, bool vltl) m_reader.Consume(); } - TypeBuilder retType = DemangleType(); + DemangledTypeNode retType = DemangleType(); - vector params; + ParamList params; old_isparam = m_isParameter; m_isParameter = true; m_functionSubstitute.push_back({}); int i = 0; while (m_reader.Peek() != 'E') { - TypeBuilder param = DemangleType(); + DemangledTypeNode param = DemangleType(); if (param.GetClass() == VoidTypeClass) continue; MyLogDebug("Var_%d - %s\n", i++, param.GetString().c_str()); m_functionSubstitute.back().push_back(param); - params.push_back({"", param.Finalize(), true, Variable()}); + params.push_back({"", std::make_shared(std::move(param))}); } m_reader.Consume(); m_functionSubstitute.pop_back(); m_isParameter = old_isparam; - TypeBuilder newType = TypeBuilder::FunctionType(retType.Finalize(), nullptr, params); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(retType), nullptr, std::move(params)); PushType(newType); newType.SetConst(cnst); @@ -331,7 +491,7 @@ TypeBuilder DemangleGNU3::DemangleFunction(bool cnst, bool vltl) } -const TypeBuilder& DemangleGNU3::DemangleTemplateSubstitution() +const DemangledTypeNode& DemangleGNU3::DemangleTemplateSubstitution() { indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -365,11 +525,11 @@ const TypeBuilder& DemangleGNU3::DemangleTemplateSubstitution() } -TypeBuilder DemangleGNU3::DemangleType() +DemangledTypeNode DemangleGNU3::DemangleType() { indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); - TypeBuilder type; + DemangledTypeNode type; bool cnst = false, vltl = false, rstrct = false; bool substitute = false; QualifiedName name; @@ -399,7 +559,7 @@ TypeBuilder DemangleGNU3::DemangleType() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -412,9 +572,9 @@ TypeBuilder DemangleGNU3::DemangleType() { m_reader.Consume(1); type = DemangleUnqualifiedName(); - QualifiedName qn = type.GetTypeName(); + auto qn = type.GetTypeName(); qn.insert(qn.begin(), "std"); - type.SetTypeName(qn); + type.SetTypeName(std::move(qn)); substitute = true; } else @@ -426,7 +586,7 @@ TypeBuilder DemangleGNU3::DemangleType() m_reader.Consume(); if (substitute) PushType(type); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -445,22 +605,19 @@ TypeBuilder DemangleGNU3::DemangleType() if (m_reader.Peek() == 's') { m_reader.Consume(); - type = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, {DemangleSourceName()})); + type = DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'u') { m_reader.Consume(); - type = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnionNamedTypeClass, {DemangleSourceName()})); + type = DemangledTypeNode::NamedType(UnionNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'e') { m_reader.Consume(); - type = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - EnumNamedTypeClass, {DemangleSourceName()}), + type = DemangledTypeNode::NamedType(EnumNamedTypeClass, QualifiedName({DemangleSourceName()}), m_arch->GetDefaultIntegerSize(), m_arch->GetDefaultIntegerSize()); break; } @@ -473,7 +630,7 @@ TypeBuilder DemangleGNU3::DemangleType() m_reader.Consume(); if (substitute) PushType(type); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -481,50 +638,59 @@ TypeBuilder DemangleGNU3::DemangleType() break; } case 'P': - type = TypeBuilder::PointerType(m_arch, DemangleType().Finalize(), cnst, vltl, PointerReferenceType); + { + DemangledTypeNode child = DemangleType(); + type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, PointerReferenceType); substitute = true; break; + } case 'R': - type = TypeBuilder::PointerType(m_arch, DemangleType().Finalize(), cnst, vltl, ReferenceReferenceType); + { + DemangledTypeNode child = DemangleType(); + type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, ReferenceReferenceType); substitute = true; break; + } case 'O': - type = TypeBuilder::PointerType(m_arch, DemangleType().Finalize(), cnst, vltl, RValueReferenceType); + { + DemangledTypeNode child = DemangleType(); + type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, RValueReferenceType); substitute = true; break; + } case 'C': //TODO:complex case 'G': //TODO:imaginary case 'U': //TODO:vendor extended type throw DemangleException(); - case 'v': type = TypeBuilder::VoidType(); break; - case 'w': type = TypeBuilder::IntegerType(4, false, "wchar_t"); break; //TODO: verify - case 'b': type = TypeBuilder::BoolType(); break; - case 'c': type = TypeBuilder::IntegerType(1, true); break; - case 'a': type = TypeBuilder::IntegerType(1, true); break; - case 'h': type = TypeBuilder::IntegerType(1, false); break; - case 's': type = TypeBuilder::IntegerType(2, true); break; - case 't': type = TypeBuilder::IntegerType(2, false); break; - case 'i': type = TypeBuilder::IntegerType(4, true); break; - case 'j': type = TypeBuilder::IntegerType(4, false); break; - case 'l': type = TypeBuilder::IntegerType(m_arch->GetAddressSize(), true); break; //long - case 'm': type = TypeBuilder::IntegerType(m_arch->GetAddressSize(), false); break; //ulong - case 'x': type = TypeBuilder::IntegerType(8, true); break; - case 'y': type = TypeBuilder::IntegerType(8, false); break; - case 'n': type = TypeBuilder::IntegerType(16, true); break; - case 'o': type = TypeBuilder::IntegerType(16, false); break; - case 'f': type = TypeBuilder::FloatType(4); break; - case 'd': type = TypeBuilder::FloatType(8); break; - case 'e': type = TypeBuilder::FloatType(10); break; - case 'g': type = TypeBuilder::FloatType(16); break; - case 'z': type = TypeBuilder::VarArgsType(); break; + case 'v': type = DemangledTypeNode::VoidType(); break; + case 'w': type = DemangledTypeNode::IntegerType(4, false, "wchar_t"); break; //TODO: verify + case 'b': type = DemangledTypeNode::BoolType(); break; + case 'c': type = DemangledTypeNode::IntegerType(1, true); break; + case 'a': type = DemangledTypeNode::IntegerType(1, true); break; + case 'h': type = DemangledTypeNode::IntegerType(1, false); break; + case 's': type = DemangledTypeNode::IntegerType(2, true); break; + case 't': type = DemangledTypeNode::IntegerType(2, false); break; + case 'i': type = DemangledTypeNode::IntegerType(4, true); break; + case 'j': type = DemangledTypeNode::IntegerType(4, false); break; + case 'l': type = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), true); break; //long + case 'm': type = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), false); break; //ulong + case 'x': type = DemangledTypeNode::IntegerType(8, true); break; + case 'y': type = DemangledTypeNode::IntegerType(8, false); break; + case 'n': type = DemangledTypeNode::IntegerType(16, true); break; + case 'o': type = DemangledTypeNode::IntegerType(16, false); break; + case 'f': type = DemangledTypeNode::FloatType(4); break; + case 'd': type = DemangledTypeNode::FloatType(8); break; + case 'e': type = DemangledTypeNode::FloatType(10); break; + case 'g': type = DemangledTypeNode::FloatType(16); break; + case 'z': type = DemangledTypeNode::VarArgsType(); break; case 'M': // TODO: Make into pointer to function member { - TypeBuilder name = DemangleType(); - TypeBuilder member = DemangleType(); - string fullName = member.GetStringBeforeName() + "(" + name.GetString() + "::*)" + member.GetStringAfterName(); + DemangledTypeNode memberName = DemangleType(); + DemangledTypeNode member = DemangleType(); + string fullName = member.GetStringBeforeName() + "(" + memberName.GetString() + "::*)" + member.GetStringAfterName(); //member.SetScope(NonStaticScope); - //TypeBuilder ptr = TypeBuilder::PointerType(m_arch, member, cnst, vltl); - //QualifiedName qn({name.GetString(), "*"}); + //DemangledTypeNode ptr = DemangledTypeNode::PointerType(m_arch, member, cnst, vltl); + //QualifiedName qn({memberName.GetString(), "*"}); type = CreateUnknownType(fullName); break; } @@ -532,12 +698,12 @@ TypeBuilder DemangleGNU3::DemangleType() case 'D': switch (m_reader.Read()) { - case 'd': type = TypeBuilder::FloatType(8); break; - case 'e': type = TypeBuilder::FloatType(16); break; - case 'f': type = TypeBuilder::FloatType(4); break; - case 'h': type = TypeBuilder::FloatType(2); break; - case 'i': type = TypeBuilder::IntegerType(4, true, "char32_t"); break; - case 's': type = TypeBuilder::IntegerType(2, true, "char16_t"); break; + case 'd': type = DemangledTypeNode::FloatType(8); break; + case 'e': type = DemangledTypeNode::FloatType(16); break; + case 'f': type = DemangledTypeNode::FloatType(4); break; + case 'h': type = DemangledTypeNode::FloatType(2); break; + case 'i': type = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; + case 's': type = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; case 'a': type = CreateUnknownType("auto"); break; //auto type case 'c': type = CreateUnknownType("decltype(auto)"); break; //decltype(auto) case 'n': @@ -559,7 +725,8 @@ TypeBuilder DemangleGNU3::DemangleType() uint64_t size = DemangleNumber(); if (m_reader.Read() != '_') throw DemangleException(); - type = TypeBuilder::ArrayType(DemangleType().Finalize(), size); + DemangledTypeNode child = DemangleType(); + type = DemangledTypeNode::ArrayType(std::move(child), size); break; } default: @@ -580,7 +747,8 @@ TypeBuilder DemangleGNU3::DemangleType() uint64_t size = DemangleNumber(); if (m_reader.Read() != '_') throw DemangleException(); - type = TypeBuilder::ArrayType(DemangleType().Finalize(), size); + DemangledTypeNode child = DemangleType(); + type = DemangledTypeNode::ArrayType(std::move(child), size); } else { @@ -616,7 +784,7 @@ TypeBuilder DemangleGNU3::DemangleType() substitute = false; m_reader.Consume(); PushType(type); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -633,7 +801,7 @@ TypeBuilder DemangleGNU3::DemangleType() } -TypeBuilder DemangleGNU3::DemangleSubstitution() +DemangledTypeNode DemangleGNU3::DemangleSubstitution() { static const QualifiedName stdAllocatorName(vector{"std", "allocator"}); static const QualifiedName stdBasicStringName(vector{"std", "basic_string"}); @@ -694,7 +862,7 @@ TypeBuilder DemangleGNU3::DemangleSubstitution() string DemangleGNU3::DemangleNumberAsString() { bool negativeFactor = false; - if ( m_reader.Peek() == 'n') + if (m_reader.Peek() == 'n') { negativeFactor = true; m_reader.Consume(); @@ -703,15 +871,32 @@ string DemangleGNU3::DemangleNumberAsString() string number; while (isdigit(m_reader.Peek())) { - number += m_reader.ReadString(1); + number += m_reader.Read(); } - return (negativeFactor?"-":"") + number; + if (negativeFactor) + return "-" + number; + return number; } // number ::= [n] int64_t DemangleGNU3::DemangleNumber() { - return std::stol(DemangleNumberAsString().c_str()); + bool negative = false; + if (m_reader.Peek() == 'n') + { + negative = true; + m_reader.Consume(); + } + + if (!isdigit(m_reader.Peek())) + throw DemangleException(); + + int64_t result = 0; + do + { + result = result * 10 + (m_reader.Read() - '0'); + } while (isdigit(m_reader.Peek())); + return negative ? -result : result; } @@ -728,18 +913,6 @@ string DemangleGNU3::DemangleInitializer() return out; } -static int8_t HexToDec(char c) -{ - if (isdigit(c)) - { - return c - '0'; - } - else if(islower(c) && c <= 'f') - { - return c - 'a' + 10; - } - return -1; -} string DemangleGNU3::DemanglePrimaryExpression() { @@ -757,7 +930,7 @@ string DemangleGNU3::DemanglePrimaryExpression() m_reader.Consume(2); oldTopLevel = m_topLevel; m_topLevel = false; - TypeBuilder t = DemangleSymbol(tmpList); + DemangledTypeNode t = DemangleSymbol(tmpList); m_topLevel = oldTopLevel; out += t.GetStringBeforeName(); out += tmpList.GetString(); @@ -894,149 +1067,13 @@ string DemangleGNU3::DemangleExpressionList() return expr; } -static string GetOperator(char elm1, char elm2) -{ - switch (hash(elm1, elm2)) - { - case hash('d','c'): return "dynamic_cast"; - case hash('s','c'): return "static_cast"; - case hash('c','c'): return "const_cast"; - case hash('r','c'): return "reinterpret_cast"; - case hash('t','i'): return "typeid"; - case hash('t','e'): return "typeid"; - case hash('s','t'): return "sizeof"; - case hash('s','z'): return "sizeof"; - case hash('a','t'): return "alignof"; - case hash('a','z'): return "alignof"; - case hash('n','x'): return "noexcept"; - case hash('s','Z'): return "sizeof..."; - case hash('s','P'): return "sizeof..."; - case hash('s','p'): return ""; - case hash('t','w'): return "throw"; - case hash('t','r'): return "throw"; - case hash('l','s'): return "<<"; // << - case hash('r','s'): return ">>"; // >> - case hash('a','S'): return "="; // = - case hash('n','t'): return "!"; // ! - case hash('e','q'): return "=="; // == - case hash('n','e'): return "!="; // != - case hash('i','x'): return "[]"; // [] - case hash('d','t'): return "."; // . - case hash('p','t'): return "->"; // -> - case hash('m','l'): return "*"; // * - case hash('p','p'): return "++"; // ++ (postfix in context) - case hash('m','m'): return "--"; // -- (postfix in context) - case hash('n','g'): return "-"; // - (unary) - case hash('m','i'): return "-"; // - - case hash('p','s'): return "+"; // + (unary) - case hash('p','l'): return "+"; // + - case hash('a','d'): return "&"; // & (unary) - case hash('a','n'): return "&"; // & - case hash('p','m'): return "->*"; // ->* - case hash('d','v'): return "/"; // / - case hash('r','m'): return "%"; // % - case hash('l','t'): return "<"; // < - case hash('l','e'): return "<="; // <= - case hash('g','t'): return ">"; // > - case hash('g','e'): return ">="; // >= - case hash('c','m'): return ","; // , - case hash('c','l'): return "()"; // () - case hash('c','o'): return "~"; // ~ - case hash('e','o'): return "^"; // ^ - case hash('o','r'): return "|"; // | - case hash('a','a'): return "&&"; // && - case hash('o','o'): return "||"; // || - case hash('d','e'): return "*"; // * (unary) - case hash('m','L'): return "*="; // *= - case hash('p','L'): return "+="; // += - case hash('m','I'): return "-="; // -= - case hash('d','V'): return "/="; // /= - case hash('r','M'): return "%="; // %= - case hash('r','S'): return ">>="; // >>= - case hash('l','S'): return "<<="; // <<= - case hash('a','N'): return "&="; // &= - case hash('o','R'): return "|="; // |= - case hash('e','O'): return "^="; // ^= - case hash('d','l'): return "delete"; // delete - case hash('d','a'): return "delete[]"; // delete[] - case hash('n','w'): return "new"; // new - case hash('n','a'): return "new[]"; // new [] - default: return ""; - } -} -static BNNameType GetNameType(char elm1, char elm2) -{ - switch (hash(elm1, elm2)) - { - case hash('n','t'): return OperatorNotNameType; // ! - case hash('n','g'): return OperatorMinusNameType; // - (unary) - case hash('p','s'): return OperatorPlusNameType; // + (unary) - case hash('a','d'): return OperatorBitAndNameType; // & (unary) - case hash('d','e'): return OperatorStarNameType; // * (unary) - case hash('i','x'): return OperatorArrayNameType; // [] - case hash('p','p'): return OperatorIncrementNameType; // ++ (postfix in context) - case hash('m','m'): return OperatorDecrementNameType; // -- (postfix in context) - case hash('l','s'): return OperatorLeftShiftNameType; // << - case hash('r','s'): return OperatorRightShiftNameType; // >> - case hash('a','S'): return OperatorAssignNameType; // = - case hash('e','q'): return OperatorEqualNameType; // == - case hash('n','e'): return OperatorNotEqualNameType; // != - case hash('p','t'): return OperatorArrowNameType; // -> - case hash('m','l'): return OperatorStarNameType; // * - case hash('m','i'): return OperatorMinusNameType; // - - case hash('p','l'): return OperatorPlusNameType; // + - case hash('a','n'): return OperatorBitAndNameType; // & - case hash('p','m'): return OperatorArrowStarNameType; // ->* - case hash('d','v'): return OperatorDivideNameType; // / - case hash('r','m'): return OperatorModulusNameType; // % - case hash('l','t'): return OperatorLessThanNameType; // < - case hash('l','e'): return OperatorLessThanEqualNameType; // <= - case hash('g','t'): return OperatorGreaterThanNameType; // > - case hash('g','e'): return OperatorGreaterThanEqualNameType; // >= - case hash('c','m'): return OperatorCommaNameType; // , - case hash('c','l'): return OperatorParenthesesNameType; // () - case hash('c','o'): return OperatorTildeNameType; // ~ - case hash('e','o'): return OperatorXorNameType; // ^ - case hash('o','r'): return OperatorBitOrNameType; // | - case hash('a','a'): return OperatorLogicalAndNameType; // && - case hash('o','o'): return OperatorLogicalOrNameType; // || - case hash('m','L'): return OperatorStarEqualNameType; // *= - case hash('p','L'): return OperatorPlusEqualNameType; // += - case hash('m','I'): return OperatorMinusEqualNameType; // -= - case hash('d','V'): return OperatorDivideEqualNameType; // /= - case hash('r','M'): return OperatorModulusEqualNameType; // %= - case hash('r','S'): return OperatorRightShiftEqualNameType; // >>= - case hash('l','S'): return OperatorLeftShiftEqualNameType; // <<= - case hash('a','N'): return OperatorAndEqualNameType; // &= - case hash('o','R'): return OperatorOrEqualNameType; // |= - case hash('e','O'): return OperatorXorEqualNameType; // ^= - case hash('d','l'): return OperatorDeleteNameType; // delete - case hash('d','a'): return OperatorDeleteArrayNameType; // delete[] - case hash('n','w'): return OperatorNewNameType; // new - case hash('n','a'): return OperatorNewArrayNameType; // new [] - case hash('C','1'): return ConstructorNameType; - case hash('C','2'): return ConstructorNameType; - case hash('C','3'): return ConstructorNameType; - case hash('C','4'): return ConstructorNameType; - case hash('C','5'): return ConstructorNameType; - case hash('D','0'): return DestructorNameType; - case hash('D','1'): return DestructorNameType; - case hash('D','2'): return DestructorNameType; - case hash('D','3'): return DestructorNameType; - case hash('D','4'): return DestructorNameType; - case hash('D','5'): return DestructorNameType; - default: - return NoNameType; - } -} - -TypeBuilder DemangleGNU3::DemangleUnqualifiedName() +DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() { indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - TypeBuilder outType; + DemangledTypeNode outType; char elm1 = m_reader.Read(); char elm2 = m_reader.Read(); switch (hash(elm1, elm2)) @@ -1143,13 +1180,13 @@ TypeBuilder DemangleGNU3::DemangleUnqualifiedName() { string name; name = "'lambda"; - vector params; + vector lambdaParams; do { - TypeBuilder param = DemangleType(); + DemangledTypeNode param = DemangleType(); if (param.GetClass() == VoidTypeClass) break; - params.push_back(std::move(param)); + lambdaParams.push_back(std::move(param)); }while (m_reader.Peek() != 'E'); m_reader.Consume(); @@ -1161,11 +1198,11 @@ TypeBuilder DemangleGNU3::DemangleUnqualifiedName() throw DemangleException(); name += "'("; - for (size_t i = 0; i < params.size(); i++) + for (size_t i = 0; i < lambdaParams.size(); i++) { if (i != 0) name += ", "; - name += params[i].GetString(); + name += lambdaParams[i].GetString(); } name += ")"; m_lastName = name; @@ -1232,7 +1269,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + vector args; DemangleTemplateArgs(args); out.back() += GetTemplateString(args); PushType(CreateUnknownType(out)); @@ -1253,7 +1290,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + vector args; DemangleTemplateArgs(args); out.back() += GetTemplateString(args); } @@ -1264,14 +1301,14 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() } -TypeBuilder DemangleGNU3::DemangleUnresolvedType() +DemangledTypeNode DemangleGNU3::DemangleUnresolvedType() { indent(); MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); // ::= [ ] # T:: or T:: // ::= # decltype(p):: // ::= - TypeBuilder type; + DemangledTypeNode type; if (m_reader.Peek() == 'T') { m_reader.Consume(); @@ -1280,7 +1317,7 @@ TypeBuilder DemangleGNU3::DemangleUnresolvedType() { PushType(type); m_reader.Consume(); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -1403,7 +1440,7 @@ string DemangleGNU3::DemangleExpression() return GetOperator(elm1, elm2) + "(" + DemangleTypeString() + ")"; case hash('s','P'): { - vector args; + vector args; DemangleTemplateArgs(args); return "sizeof...(" + GetTemplateString(args) + ")..."; } @@ -1471,7 +1508,7 @@ string DemangleGNU3::DemangleExpression() return "(" + DemangleExpressionList() + ")"; case hash('c','v'): //type (expression) { - TypeBuilder type = DemangleType(); + DemangledTypeNode type = DemangleType(); out = type.GetString(); if (m_reader.Peek() == '_') out += " (" + DemangleExpressionList() + ")"; @@ -1495,7 +1532,7 @@ string DemangleGNU3::DemangleExpression() // ::= fL p _ # L > 0, second and later parameters bool cnst = false, vltl = false, rstrct = false; - TypeBuilder type; + DemangledTypeNode type; int64_t listNumber = 0; int64_t elementNum = 0; char elm; @@ -1566,11 +1603,10 @@ string DemangleGNU3::DemangleExpression() do { out += DemangleSourceName(); - PushType(TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnknownNamedTypeClass, out))); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); if (m_reader.Peek() == 'I') { - vector args; + vector args; m_reader.Consume(); // DemangleTemplateArgs(args); @@ -1590,7 +1626,7 @@ string DemangleGNU3::DemangleExpression() out += DemangleSourceName(); if (m_reader.Peek() == 'I') { - vector args; + vector args; m_reader.Consume(); // DemangleTemplateArgs(args); @@ -1613,7 +1649,7 @@ string DemangleGNU3::DemangleExpression() out = DemangleSourceName(); if (m_reader.Peek() == 'I') { - vector args; + vector args; m_reader.Consume(); // DemangleTemplateArgs(args); @@ -1625,11 +1661,11 @@ string DemangleGNU3::DemangleExpression() } -void DemangleGNU3::DemangleTemplateArgs(vector& args) +void DemangleGNU3::DemangleTemplateArgs(vector& args) { indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - TypeBuilder tmp; + DemangledTypeNode tmp; bool tmpValid = false; string expr; bool topLevel; @@ -1640,12 +1676,12 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args) { case 'L': expr = DemanglePrimaryExpression(); - args.push_back({expr, nullptr, true, Variable()}); + args.push_back(expr); tmp = CreateUnknownType(expr); tmpValid = true; break; case 'X': - args.push_back({DemangleExpression(), nullptr, true, Variable()}); + args.push_back(DemangleExpression()); if (m_reader.Read() != 'E') throw DemangleException(); break; @@ -1658,7 +1694,7 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args) m_topLevel = false; tmp = DemangleType(); m_topLevel = topLevel; - args.push_back({tmp.GetString(), nullptr, true, Variable()}); + args.push_back(tmp.GetString()); tmpValid = true; } if (m_topLevel && tmpValid) @@ -1674,7 +1710,7 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args) } -TypeBuilder DemangleGNU3::DemangleNestedName() +DemangledTypeNode DemangleGNU3::DemangleNestedName() { /* This can be either a qualified name like: "foo::bar::bas" @@ -1708,13 +1744,12 @@ TypeBuilder DemangleGNU3::DemangleNestedName() indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - TypeBuilder type = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnknownNamedTypeClass, QualifiedName())); + DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, QualifiedName()); bool cnst = false, vltl = false, rstrct = false; bool ref = false; bool rvalueRef = false; bool substitute = true; - TypeBuilder newType; + DemangledTypeNode newType; bool base = false; bool isTemplate = false; bool hasB = false; @@ -1757,7 +1792,7 @@ TypeBuilder DemangleGNU3::DemangleNestedName() { if (!base) throw DemangleException(); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -1774,12 +1809,15 @@ TypeBuilder DemangleGNU3::DemangleNestedName() if (!isTemplate) { type.SetNameType(newType.GetNameType()); - QualifiedName newName = type.GetTypeName() + newType.GetTypeName(); - if (newName.StringSize() > MAX_DEMANGLE_LENGTH) + auto aNames = type.GetTypeName(); + auto bNames = newType.GetTypeName(); + _STD_VECTOR<_STD_STRING> newName; + newName.reserve(aNames.size() + bNames.size()); + newName.insert(newName.end(), aNames.begin(), aNames.end()); + newName.insert(newName.end(), bNames.begin(), bNames.end()); + if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) throw DemangleException("Detected adversarial mangled string"); - type.SetNamedTypeReference( - NamedTypeReference::GenerateAutoDemangledTypeReference(type.GetNamedTypeReference()->GetTypeReferenceClass(), newName) - ); + type.SetNTR(type.GetNTRClass(), newName); type.SetHasTemplateArguments(false); } if (substitute && m_reader.Peek() != 'E') @@ -1814,11 +1852,11 @@ TypeBuilder DemangleGNU3::DemangleNestedName() } -TypeBuilder DemangleGNU3::DemangleLocalName() +DemangledTypeNode DemangleGNU3::DemangleLocalName() { indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - TypeBuilder type; + DemangledTypeNode type; QualifiedName varName; bool oldTopLevel = m_topLevel; m_topLevel = false; @@ -1833,12 +1871,17 @@ TypeBuilder DemangleGNU3::DemangleLocalName() if (m_reader.Peek() != 's') { // - TypeBuilder tmpType = DemangleName(); - type = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, varName)); - QualifiedName newName = type.GetTypeName() + tmpType.GetTypeName(); - if (newName.StringSize() > MAX_DEMANGLE_LENGTH) + DemangledTypeNode tmpType = DemangleName(); + type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); + auto aNames = type.GetTypeName(); + auto bNames = tmpType.GetTypeName(); + _STD_VECTOR<_STD_STRING> newName; + newName.reserve(aNames.size() + bNames.size()); + newName.insert(newName.end(), aNames.begin(), aNames.end()); + newName.insert(newName.end(), bNames.begin(), bNames.end()); + if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) throw DemangleException("Detected adversarial mangled string"); - type.SetTypeName(newName); + type.SetTypeName(std::move(newName)); type.SetConst(tmpType.IsConst()); type.SetVolatile(tmpType.IsVolatile()); type.SetPointerSuffix(tmpType.GetPointerSuffix()); @@ -1846,7 +1889,7 @@ TypeBuilder DemangleGNU3::DemangleLocalName() else { m_reader.Consume(); - type = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, varName)); + type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); } // [] //TODO: What do we do with discriminators? @@ -1870,7 +1913,7 @@ TypeBuilder DemangleGNU3::DemangleLocalName() } -TypeBuilder DemangleGNU3::DemangleName() +DemangledTypeNode DemangleGNU3::DemangleName() { indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -1886,7 +1929,7 @@ TypeBuilder DemangleGNU3::DemangleName() ::= ::= */ - TypeBuilder type; + DemangledTypeNode type; bool substitute = false; switch (m_reader.Read()) { @@ -1895,9 +1938,9 @@ TypeBuilder DemangleGNU3::DemangleName() { m_reader.Consume(1); type = DemangleUnqualifiedName(); - QualifiedName qn = type.GetTypeName(); + auto qn = type.GetTypeName(); qn.insert(qn.begin(), "std"); - type.SetTypeName(qn); + type.SetTypeName(std::move(qn)); substitute = true; } else @@ -1910,7 +1953,7 @@ TypeBuilder DemangleGNU3::DemangleName() m_reader.Consume(); if (substitute) PushType(type); - vector args; + vector args; DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); @@ -1937,7 +1980,7 @@ TypeBuilder DemangleGNU3::DemangleName() { PushType(type); // - vector args; + vector args; m_reader.Consume(); // DemangleTemplateArgs(args); @@ -1952,15 +1995,15 @@ TypeBuilder DemangleGNU3::DemangleName() } -TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) +DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { indent(); MyLogDebug("%s: %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); - TypeBuilder returnType; + DemangledTypeNode returnType; bool isReturnTypeUnknown = false; - TypeBuilder type; - vector params; - Confidence cnst = false, vltl = false, rstrct = false; + DemangledTypeNode type; + ParamList params; + bool cnst = false, vltl = false, rstrct = false; bool oldTopLevel; QualifiedName name; @@ -1987,9 +2030,9 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); case 'V': { - TypeBuilder t = DemangleSymbol(name); + DemangledTypeNode t = DemangleSymbol(name); varName.push_back("guard_variable_for_" + t.GetTypeAndName(name)); - type = TypeBuilder::IntegerType(1, false); + type = DemangledTypeNode::IntegerType(1, false); if (m_reader.Length() == 0) return type; //function parameters @@ -2037,13 +2080,13 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); case 'C': { - TypeBuilder t = DemangleType(); + DemangledTypeNode t = DemangleType(); DemangleNumberAsString(); if (m_reader.Read() != '_') throw DemangleException(); - return TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, - {"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()})); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); } case 'D': LogWarn("Unsupported: 'typeinfo common proxy'\n"); @@ -2058,32 +2101,32 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); oldTopLevel = m_topLevel; m_topLevel = false; - TypeBuilder t = DemangleSymbol(name); + DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; - return TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, - {"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()})); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); } case 'H': LogWarn("Unsupported: 'TLS init function'\n"); throw DemangleException(); case 'I': - return TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(StructNamedTypeClass, - {"typeinfo_for_" + DemangleTypeString()})); + return DemangledTypeNode::NamedType(StructNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"typeinfo_for_" + DemangleTypeString()}); case 'J': LogWarn("Unsupported: 'java class'\n"); throw DemangleException(); case 'S': { - TypeBuilder t = DemangleType(); + DemangledTypeNode t = DemangleType(); varName = vector{"typeinfo_name_for_" + t.GetString()}; - return TypeBuilder::ArrayType(Type::IntegerType(1, true), 0); + DemangledTypeNode elemType = DemangledTypeNode::IntegerType(1, true); + return DemangledTypeNode::ArrayType(std::move(elemType), 0); } case 'T': //VTT { - TypeBuilder t = DemangleType(); - return TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(StructNamedTypeClass, - {"VTT_for_" + t.GetString()})); + DemangledTypeNode t = DemangleType(); + return DemangledTypeNode::NamedType(StructNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"VTT_for_" + t.GetString()}); } case 'v': //TODO: Convert to whatever the actual type is! { @@ -2095,16 +2138,14 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); oldTopLevel = m_topLevel; m_topLevel = false; - TypeBuilder t = DemangleSymbol(name); + DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; - return TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, - {"virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()})); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); } case 'V': //Vtable - return TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference(StructNamedTypeClass, - {"vtable_for_" + DemangleTypeString()})); + return DemangledTypeNode::NamedType(StructNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"vtable_for_" + DemangleTypeString()}); case 'W': MyLogDebug("Unsupported: 'TLS wrapper function'\n"); throw DemangleException(); @@ -2130,7 +2171,7 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) varName = type.GetTypeName(); cnst = type.IsConst(); vltl = type.IsVolatile(); - set suffix = type.GetPointerSuffix(); + auto suffix = type.GetPointerSuffix(); if (m_reader.Peek() == 'J') { m_reader.Consume(); @@ -2139,7 +2180,7 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) if (m_reader.Peek() == 'B') { m_reader.Consume(); - TypeBuilder t = DemangleUnqualifiedName(); + DemangledTypeNode t = DemangleUnqualifiedName(); if (t.GetString() == "cxx11") { @@ -2151,7 +2192,7 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) type.GetNameType() == ConstructorNameType || type.GetNameType() == DestructorNameType) { - returnType = TypeBuilder::VoidType(); + returnType = DemangledTypeNode::VoidType(); } else if (m_isParameter || type.HasTemplateArguments()) { @@ -2160,11 +2201,11 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) else { isReturnTypeUnknown = true; - returnType = TypeBuilder::IntegerType(m_arch->GetAddressSize(), true); + returnType = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), true); } m_functionSubstitute.push_back({}); - for (size_t i = 0; m_reader.Length() > 0; i++) + while (m_reader.Length() > 0) { if (m_reader.Peek() == 'E') { @@ -2191,7 +2232,7 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) MyLogDebug("Var_%d: %s\n", i, m_reader.GetRaw().c_str()); if (m_reader.PeekString(2) == "@@") break; - TypeBuilder param = DemangleType(); + DemangledTypeNode param = DemangleType(); if (param.GetClass() == VoidTypeClass) { if (m_reader.Peek() == 'E') @@ -2202,8 +2243,9 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) break; } m_functionSubstitute.back().push_back(param); - params.push_back({"", param.Finalize(), true, Variable()}); - if (param.GetClass() == VarArgsTypeClass) + bool isVarArgs = param.GetClass() == VarArgsTypeClass; + params.push_back({"", std::make_shared(std::move(param))}); + if (isVarArgs) { if (m_reader.Peek() == 'E') { @@ -2216,13 +2258,14 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) m_functionSubstitute.pop_back(); m_isParameter = false; - type = TypeBuilder::FunctionType(returnType.Finalize()-> - WithConfidence(isReturnTypeUnknown ? BN_MINIMUM_CONFIDENCE : BN_DEFAULT_CONFIDENCE), nullptr, params); + type = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); + if (isReturnTypeUnknown) + type.SetReturnTypeConfidence(BN_MINIMUM_CONFIDENCE); type.SetPointerSuffix(suffix); type.SetConst(cnst); type.SetVolatile(vltl); - if (rstrct.GetValue()) + if (rstrct) type.SetPointerSuffix({RestrictSuffix}); // PrintTables(); @@ -2234,7 +2277,9 @@ TypeBuilder DemangleGNU3::DemangleSymbol(QualifiedName& varName) } -bool DemangleGNU3::IsGNU3MangledString(const string& name) +// ===== Non-templated static methods ===== + +bool DemangleGNU3Static::IsGNU3MangledString(const string& name) { string headerless = name; string header; @@ -2248,7 +2293,7 @@ bool DemangleGNU3::IsGNU3MangledString(const string& name) } -bool DemangleGNU3::DemangleGlobalHeader(string& name, string& header) +bool DemangleGNU3Static::DemangleGlobalHeader(string& name, string& header) { size_t strippedCount = 0; string encoded = name; @@ -2281,19 +2326,7 @@ bool DemangleGNU3::DemangleGlobalHeader(string& name, string& header) } -bool DemangleGNU3::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName, const Ref& view) -{ - return DemangleStringGNU3(arch, name, outType, outVarName); -} - - -bool DemangleGNU3::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName, BinaryView* view) -{ - return DemangleStringGNU3(arch, name, outType, outVarName); -} - - -bool DemangleGNU3::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) { string encoding = name; string header; @@ -2305,18 +2338,17 @@ bool DemangleGNU3::DemangleStringGNU3(Architecture* arch, const string& name, Re encoding = encoding.substr(3); else if (foundHeader && !header.empty()) { - // Some variable constructors/destructors are __GLOBAL__I_name - // And there are even __GLOBAL__sub_I_file_name.cpp outVarName.clear(); outVarName.push_back(header); outVarName.push_back(encoding); - outType = CreateUnknownType(outVarName).Finalize(); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); return true; } else return false; - DemangleGNU3 demangle(arch, encoding); + thread_local DemangleGNU3 demangle(arch, encoding); + demangle.Reset(arch, encoding); try { outType = demangle.DemangleSymbol(outVarName).Finalize(); @@ -2349,6 +2381,11 @@ bool DemangleGNU3::DemangleStringGNU3(Architecture* arch, const string& name, Re } +// ===== Explicit template instantiation ===== + + +// ===== Demangler plugin registration ===== + class GNU3Demangler: public Demangler { public: @@ -2359,7 +2396,7 @@ class GNU3Demangler: public Demangler virtual bool IsMangledString(const string& name) override { - return DemangleGNU3::IsGNU3MangledString(name); + return DemangleGNU3Static::IsGNU3MangledString(name); } #ifdef BINARYNINJACORE_LIBRARY @@ -2370,9 +2407,7 @@ class GNU3Demangler: public Demangler Ref view) override #endif { - if (view) - return DemangleGNU3::DemangleStringGNU3(arch, name, outType, outVarName, view); - return DemangleGNU3::DemangleStringGNU3(arch, name, outType, outVarName); + return DemangleGNU3Static::DemangleStringGNU3(arch, name, outType, outVarName); } }; diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index e26f3305c..2e46e05d5 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -35,6 +35,8 @@ #define _STD_VECTOR std::vector #endif +#include "demangled_type_node.h" + class DemangleException: public std::exception { _STD_STRING m_message; @@ -43,47 +45,62 @@ class DemangleException: public std::exception virtual const char* what() const noexcept { return m_message.c_str(); } }; -class DemangleGNU3 +class DemangleGNU3Reader { - class Reader +public: + DemangleGNU3Reader(const _STD_STRING& data); + void Reset(const _STD_STRING& data); + _STD_STRING PeekString(size_t count=1); + bool NextIsOneOf(const _STD_STRING& list); + _STD_STRING GetRaw(); + _STD_STRING ReadString(size_t count=1); + _STD_STRING ReadUntil(char sentinal); + + size_t Length() const { return m_data.length() - m_offset; } + + char Peek() { - public: - Reader(const _STD_STRING& data); - _STD_STRING PeekString(size_t count=1); - char Peek(); - bool NextIsOneOf(const _STD_STRING& list); - _STD_STRING GetRaw(); - char Read(); - _STD_STRING ReadString(size_t count=1); - _STD_STRING ReadUntil(char sentinal); - void Consume(size_t count=1); - size_t Length() const; - void UnRead(size_t count=1); - private: - _STD_STRING m_data; - size_t m_offset; - }; - - class SubstitutionList + if (1 > Length()) + return '\0'; + return (char)m_data[m_offset]; + } + + char Read() { - _STD_VECTOR m_typeList; - public: - SubstitutionList(); - ~SubstitutionList(); - void PushType(BN::TypeBuilder t); - void PopType(); - const BN::TypeBuilder& GetType(size_t reference) const; - void PrintSubstitutionTable() const; - size_t Size() const { return m_typeList.size(); } - void Clear() { m_typeList.clear(); } - }; + if (1 > Length()) + throw DemangleException(); + return m_data[m_offset++]; + } + + void Consume(size_t count=1) + { + if (count > Length()) + throw DemangleException(); + m_offset += count; + } + + void UnRead(size_t count=1) + { + if (count <= m_offset) + m_offset -= count; + } + +private: + _STD_STRING m_data; + size_t m_offset; +}; + + +class DemangleGNU3 +{ + using ParamList = _STD_VECTOR; BN::QualifiedName m_varName; - Reader m_reader; + DemangleGNU3Reader m_reader; BN::Architecture* m_arch; - _STD_VECTOR m_substitute; - _STD_VECTOR m_templateSubstitute; - _STD_VECTOR<_STD_VECTOR> m_functionSubstitute; + _STD_VECTOR m_substitute; + _STD_VECTOR m_templateSubstitute; + _STD_VECTOR<_STD_VECTOR> m_functionSubstitute; _STD_STRING m_lastName; BNNameType m_nameType; bool m_localType; @@ -94,46 +111,53 @@ class DemangleGNU3 bool m_isOperatorOverload; enum SymbolType { Function, FunctionWithReturn, Data, VTable, Rtti, Name}; BN::QualifiedName DemangleBaseUnresolvedName(); - BN::TypeBuilder DemangleUnresolvedType(); + DemangledTypeNode DemangleUnresolvedType(); _STD_STRING DemangleUnarySuffixExpression(const _STD_STRING& op); _STD_STRING DemangleUnaryPrefixExpression(const _STD_STRING& op); _STD_STRING DemangleBinaryExpression(const _STD_STRING& op); _STD_STRING DemangleUnaryPrefixType(const _STD_STRING& op); _STD_STRING DemangleTypeString(); _STD_STRING DemangleExpressionList(); - BN::TypeBuilder DemangleUnqualifiedName(); + DemangledTypeNode DemangleUnqualifiedName(); _STD_STRING DemangleSourceName(); _STD_STRING DemangleNumberAsString(); _STD_STRING DemangleInitializer(); _STD_STRING DemangleExpression(); _STD_STRING DemanglePrimaryExpression(); - BN::TypeBuilder DemangleName(); - BN::TypeBuilder DemangleLocalName(); + DemangledTypeNode DemangleName(); + DemangledTypeNode DemangleLocalName(); void DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct); - BN::TypeBuilder DemangleSubstitution(); - const BN::TypeBuilder& DemangleTemplateSubstitution(); - void DemangleTemplateArgs(_STD_VECTOR& args); - bool DemangleEncoding(BN::Type** type, BN::QualifiedName& outName); - BN::TypeBuilder DemangleFunction(bool cnst, bool vltl); - BN::TypeBuilder DemangleType(); + DemangledTypeNode DemangleSubstitution(); + const DemangledTypeNode& DemangleTemplateSubstitution(); + void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args); + DemangledTypeNode DemangleFunction(bool cnst, bool vltl); + DemangledTypeNode DemangleType(); int64_t DemangleNumber(); - BN::TypeBuilder DemangleNestedName(); - void PushTemplateType(BN::TypeBuilder type); - const BN::TypeBuilder& GetTemplateType(size_t ref); - void PushType(BN::TypeBuilder type); - const BN::TypeBuilder& GetType(size_t ref); - static bool DemangleGlobalHeader(_STD_STRING& name, _STD_STRING& header); + DemangledTypeNode DemangleNestedName(); + void PushTemplateType(const DemangledTypeNode& type); + const DemangledTypeNode& GetTemplateType(size_t ref); + void PushType(const DemangledTypeNode& type); + const DemangledTypeNode& GetType(size_t ref); + + DemangledTypeNode CreateUnknownType(const BN::QualifiedName& s); + DemangledTypeNode CreateUnknownType(const _STD_STRING& s); + static void ExtendTypeName(DemangledTypeNode& type, const _STD_STRING& extend); public: DemangleGNU3(BN::Architecture* arch, const _STD_STRING& mangledName); - BN::TypeBuilder DemangleSymbol(BN::QualifiedName& varName); + void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); + DemangledTypeNode DemangleSymbol(BN::QualifiedName& varName); BN::QualifiedName GetVarName() const { return m_varName; } + void PrintTables(); +}; + + +class DemangleGNU3Static +{ +public: static bool IsGNU3MangledString(const _STD_STRING& name); + static bool DemangleGlobalHeader(_STD_STRING& name, _STD_STRING& header); - // Tread lightly on this landmine; a BinaryView* will be converted to a bool; use an explicit (BN::Ref)view cast - static bool DemangleStringGNU3(BN::Architecture* arch, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName, const BN::Ref& view); - static bool DemangleStringGNU3(BN::Architecture* arch, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); static bool DemangleStringGNU3(BN::Architecture* arch, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName); - void PrintTables(); }; diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp new file mode 100644 index 000000000..d8a98e169 --- /dev/null +++ b/demangler/gnu3/demangled_type_node.cpp @@ -0,0 +1,531 @@ +// Copyright 2016-2026 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "demangled_type_node.h" +#include + +#ifdef BINARYNINJACORE_LIBRARY +using namespace BinaryNinjaCore; +#define GetClass GetTypeClass +#else +using namespace BinaryNinja; +using namespace std; +#endif + + +DemangledTypeNode::DemangledTypeNode() + : m_typeClass(VoidTypeClass), m_width(0), m_alignment(0), + m_const(false), m_volatile(false), m_signed(false), + m_hasTemplateArgs(false), m_nameType(NoNameType), + m_ntrClass(UnknownNamedTypeClass), + m_pointerReference(PointerReferenceType), m_elements(0), + m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE) +{ +} + + +DemangledTypeNode DemangledTypeNode::VoidType() +{ + DemangledTypeNode n; + n.m_typeClass = VoidTypeClass; + return n; +} + + +DemangledTypeNode DemangledTypeNode::BoolType() +{ + DemangledTypeNode n; + n.m_typeClass = BoolTypeClass; + n.m_width = 1; + return n; +} + + +DemangledTypeNode DemangledTypeNode::IntegerType(size_t width, bool isSigned, const string& altName) +{ + DemangledTypeNode n; + n.m_typeClass = IntegerTypeClass; + n.m_width = width; + n.m_signed = isSigned; + n.m_altName = altName; + return n; +} + + +DemangledTypeNode DemangledTypeNode::FloatType(size_t width) +{ + DemangledTypeNode n; + n.m_typeClass = FloatTypeClass; + n.m_width = width; + return n; +} + + +DemangledTypeNode DemangledTypeNode::VarArgsType() +{ + DemangledTypeNode n; + n.m_typeClass = VarArgsTypeClass; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTypeNode child, + bool cnst, bool vltl, BNReferenceType refType) +{ + DemangledTypeNode n; + n.m_typeClass = PointerTypeClass; + n.m_width = arch->GetAddressSize(); + n.m_childType = std::make_shared(std::move(child)); + n.m_const = cnst; + n.m_volatile = vltl; + n.m_pointerReference = refType; + return n; +} + + +DemangledTypeNode DemangledTypeNode::ArrayType(DemangledTypeNode child, uint64_t count) +{ + DemangledTypeNode n; + n.m_typeClass = ArrayTypeClass; + n.m_childType = std::make_shared(std::move(child)); + n.m_elements = count; + return n; +} + + +DemangledTypeNode DemangledTypeNode::FunctionType(DemangledTypeNode retType, + std::nullptr_t, vector params) +{ + DemangledTypeNode n; + n.m_typeClass = FunctionTypeClass; + n.m_childType = std::make_shared(std::move(retType)); + n.m_params = std::move(params); + return n; +} + + +DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, + vector nameSegments, size_t width, size_t align) +{ + DemangledTypeNode n; + n.m_typeClass = NamedTypeReferenceClass; + n.m_ntrClass = cls; + n.m_nameSegments = std::make_shared>(std::move(nameSegments)); + n.m_width = width; + n.m_alignment = align; + return n; +} + + +DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, + const QualifiedName& name, size_t width, size_t align) +{ + return NamedType(cls, vector(name.begin(), name.end()), width, align); +} + + +void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, vector nameSegments) +{ + m_ntrClass = cls; + m_nameSegments = std::make_shared>(std::move(nameSegments)); +} + + +void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, const QualifiedName& name) +{ + SetNTR(cls, vector(name.begin(), name.end())); +} + + +string DemangledTypeNode::GetTypeNameString() const +{ + if (!m_nameSegments) + return {}; + const auto& segs = *m_nameSegments; + size_t n = segs.size(); + if (n == 0) + return {}; + if (n == 1) + return segs[0]; + + // Pre-reserve: sum of segments + (n-1) * 2 for "::" separators + size_t total = (n - 1) * 2; + for (const auto& s : segs) + total += s.size(); + + string result; + result.reserve(total); + result += segs[0]; + for (size_t i = 1; i < n; i++) + { + result += "::"; + result += segs[i]; + } + return result; +} + + +size_t DemangledTypeNode::NameStringSize() const +{ + if (!m_nameSegments) + return 0; + size_t total = 0; + for (const auto& s : *m_nameSegments) + total += s.size(); + return total; +} + + +string DemangledTypeNode::GetModifierString() const +{ + if (m_const && m_volatile) + return "const volatile"; + if (m_const) + return "const"; + if (m_volatile) + return "volatile"; + return ""; +} + + +string DemangledTypeNode::GetPointerSuffixString() const +{ + static const char* suffixStrings[] = { + "__ptr64", + "__unaligned", + "__restrict", + "&", + "&&" + }; + + string out; + for (auto& s : m_pointerSuffix) + { + if (!out.empty() && out.back() != ' ') + out += ' '; + out += suffixStrings[s]; + } + return out; +} + + +string DemangledTypeNode::GetStringBeforeName() const +{ + string out; + AppendBeforeName(out); + return out; +} + + +string DemangledTypeNode::GetStringAfterName() const +{ + string out; + AppendAfterName(out); + return out; +} + + +void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType) const +{ + string modifiers = GetModifierString(); + string ptrSuffix = GetPointerSuffixString(); + + switch (m_typeClass) + { + case FunctionTypeClass: + // Return type before name + if (m_childType) + { + if (!out.empty() && out.back() != ' ') + out += " "; + m_childType->AppendBeforeName(out, this); + } + // If parent is a pointer, add "(" for function pointer syntax + if (parentType && parentType->m_typeClass == PointerTypeClass) + { + if (!out.empty() && out.back() != ' ') + out += " "; + out += "("; + } + break; + + case IntegerTypeClass: + if (!m_altName.empty()) + out += m_altName; + else if (m_signed && m_width == 1) + out += "char"; + else if (m_signed) + out += "int" + to_string(m_width * 8) + "_t"; + else + out += "uint" + to_string(m_width * 8) + "_t"; + if (!modifiers.empty()) + out += " " + modifiers; + break; + + case FloatTypeClass: + switch (m_width) + { + case 2: out += "float16"; break; + case 4: out += "float"; break; + case 8: out += "double"; break; + case 10: out += "long double"; break; + default: out += "float" + to_string(m_width * 8); break; + } + if (!modifiers.empty()) + out += " " + modifiers; + break; + + case BoolTypeClass: + out += "bool"; + if (!modifiers.empty()) + out += " " + modifiers; + break; + + case VoidTypeClass: + out += "void"; + if (!modifiers.empty()) + out += " " + modifiers; + break; + + case VarArgsTypeClass: + out += "..."; + break; + + case PointerTypeClass: + if (m_childType) + m_childType->AppendBeforeName(out, this); + switch (m_pointerReference) + { + case ReferenceReferenceType: out += "&"; break; + case PointerReferenceType: out += "*"; break; + case RValueReferenceType: out += "&&"; break; + default: break; + } + if (!ptrSuffix.empty()) + out += " " + ptrSuffix; + if (!modifiers.empty()) + out += " " + modifiers; + break; + + case ArrayTypeClass: + if (m_childType) + m_childType->AppendBeforeName(out, this); + if (parentType && parentType->m_typeClass == PointerTypeClass) + { + out += " ("; + } + break; + + case NamedTypeReferenceClass: + switch (m_ntrClass) + { + case ClassNamedTypeClass: out += "class "; break; + case StructNamedTypeClass: out += "struct "; break; + case UnionNamedTypeClass: out += "union "; break; + case EnumNamedTypeClass: out += "enum "; break; + default: break; + } + out += GetTypeNameString(); + if (!modifiers.empty()) + out += " " + modifiers; + break; + + default: + break; + } +} + + +static string FormatArrayCount(uint64_t elements) +{ + return string(fmt::format("{:#x}", elements)); +} + + +void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType) const +{ + string modifiers = GetModifierString(); + string ptrSuffix = GetPointerSuffixString(); + + switch (m_typeClass) + { + case FunctionTypeClass: + { + // Close the "(" from before-name if parent is pointer + if (parentType && parentType->m_typeClass == PointerTypeClass) + out += ")"; + + out += "("; + for (size_t i = 0; i < m_params.size(); i++) + { + if (i != 0) + out += ", "; + if (m_params[i].type) + out += m_params[i].type->GetString(); + } + out += ")"; + if (!modifiers.empty()) + out += " " + modifiers; + if (!ptrSuffix.empty()) + out += ptrSuffix; + // Return type's after-name tokens + if (m_childType) + m_childType->AppendAfterName(out, this); + break; + } + case PointerTypeClass: + if (m_childType) + m_childType->AppendAfterName(out, this); + break; + case ArrayTypeClass: + if (parentType && parentType->m_typeClass == PointerTypeClass) + out += ")"; + out += "[" + FormatArrayCount(m_elements) + "]"; + if (m_childType) + m_childType->AppendAfterName(out, this); + break; + default: + break; + } +} + + +string DemangledTypeNode::GetString() const +{ + const string before = GetStringBeforeName(); + const string after = GetStringAfterName(); + if (!before.empty() && !after.empty() && before.back() != ' ' && before.back() != '*' + && before.back() != '&' && after.front() != ' ' && after.front() != '[' + && m_childType && m_childType->m_typeClass != FunctionTypeClass) + { + return before + " " + after; + } + return before + after; +} + + +string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const +{ + const string before = GetStringBeforeName(); + const string qName = name.GetString(); + const string after = GetStringAfterName(); + if ((!before.empty() && !qName.empty() && before.back() != ' ' && qName.front() != ' ') + || (!before.empty() && !after.empty() && before.back() != ' ' && after.front() != ' ')) + return before + " " + qName + after; + return before + qName + after; +} + + +Ref DemangledTypeNode::Finalize() const +{ + switch (m_typeClass) + { + case VoidTypeClass: + { + if (!m_const && !m_volatile) + return Type::VoidType(); + TypeBuilder tb = TypeBuilder::VoidType(); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case BoolTypeClass: + { + if (!m_const && !m_volatile) + return Type::BoolType(); + TypeBuilder tb = TypeBuilder::BoolType(); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case IntegerTypeClass: + { + if (!m_const && !m_volatile) + return Type::IntegerType(m_width, m_signed, m_altName); + TypeBuilder tb = TypeBuilder::IntegerType(m_width, m_signed, m_altName); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case FloatTypeClass: + { + if (!m_const && !m_volatile) + return Type::FloatType(m_width); + TypeBuilder tb = TypeBuilder::FloatType(m_width); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case VarArgsTypeClass: + return TypeBuilder::VarArgsType().Finalize(); + + case PointerTypeClass: + { + Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); + return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference).Finalize(); + } + + case ArrayTypeClass: + { + Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::ArrayType(child, m_elements); + if (m_const) + tb.SetConst(m_const); + if (m_volatile) + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case FunctionTypeClass: + { + Ref retType = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); + vector finalParams; + finalParams.reserve(m_params.size()); + for (auto& p : m_params) + { + Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); + finalParams.push_back({p.name, pType, true, Variable()}); + } + TypeBuilder tb = TypeBuilder::FunctionType(retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + for (auto ps : m_pointerSuffix) + tb.AddPointerSuffix(ps); + tb.SetNameType(m_nameType); + return tb.Finalize(); + } + + case NamedTypeReferenceClass: + { + TypeBuilder tb = TypeBuilder::NamedType( + NamedTypeReference::GenerateAutoDemangledTypeReference( + m_ntrClass, QualifiedName(m_nameSegments ? *m_nameSegments : vector{})), + m_width, m_alignment > 0 ? m_alignment : 1); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + for (auto ps : m_pointerSuffix) + tb.AddPointerSuffix(ps); + tb.SetNameType(m_nameType); + tb.SetHasTemplateArguments(m_hasTemplateArgs); + return tb.Finalize(); + } + + default: + return Type::VoidType(); + } +} diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h new file mode 100644 index 000000000..f272166cc --- /dev/null +++ b/demangler/gnu3/demangled_type_node.h @@ -0,0 +1,171 @@ +// Copyright 2016-2026 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef BINARYNINJACORE_LIBRARY +#include "qualifiedname.h" +#include "type.h" +#include "architecture.h" +#ifndef BN +#define BN BinaryNinjaCore +#endif +#ifndef _STD_STRING +#define _STD_STRING BinaryNinjaCore::string +#endif +#ifndef _STD_VECTOR +#define _STD_VECTOR BinaryNinjaCore::vector +#endif +#else +#include "binaryninjaapi.h" +#ifndef BN +#define BN BinaryNinja +#endif +#ifndef _STD_STRING +#define _STD_STRING std::string +#endif +#ifndef _STD_VECTOR +#define _STD_VECTOR std::vector +#endif +#endif + +#include +#ifdef BINARYNINJACORE_LIBRARY +#include "binaryninjacore_global.h" +#define _STD_SET BinaryNinjaCore::set +#else +#include +#define _STD_SET std::set +#endif + +// Lightweight type representation for the GNU3 demangler. +// This object serves as an abstraction layer between C++'s type system and our own. +// It also removes a source of a lot of reallocation of NamedTypeReference BinaryNinja::Type objects +// and only creates real Type objects when Finalize() is called. +class DemangledTypeNode +{ +public: + struct Param + { + _STD_STRING name; + std::shared_ptr type; + }; + + DemangledTypeNode(); + DemangledTypeNode(const DemangledTypeNode&) = default; + DemangledTypeNode(DemangledTypeNode&&) = default; + DemangledTypeNode& operator=(const DemangledTypeNode&) = default; + DemangledTypeNode& operator=(DemangledTypeNode&&) = default; + + // Static factory methods matching TypeBuilder's interface + static DemangledTypeNode VoidType(); + static DemangledTypeNode BoolType(); + static DemangledTypeNode IntegerType(size_t width, bool isSigned, const _STD_STRING& altName = ""); + static DemangledTypeNode FloatType(size_t width); + static DemangledTypeNode VarArgsType(); + static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, + bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode ArrayType(DemangledTypeNode child, uint64_t count); + static DemangledTypeNode FunctionType(DemangledTypeNode retType, + std::nullptr_t, _STD_VECTOR params); + static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, + _STD_VECTOR<_STD_STRING> nameSegments, size_t width = 0, size_t align = 0); + static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, + const BN::QualifiedName& name, size_t width = 0, size_t align = 0); + + // Getters + BNTypeClass GetClass() const { return m_typeClass; } +#ifdef BINARYNINJACORE_LIBRARY + BNTypeClass GetTypeClass() const { return m_typeClass; } +#endif + const _STD_VECTOR<_STD_STRING>& GetTypeName() const + { + if (!m_nameSegments) + { + static const _STD_VECTOR<_STD_STRING> empty; + return empty; + } + return *m_nameSegments; + } + _STD_VECTOR<_STD_STRING>& GetMutableTypeName() + { + if (!m_nameSegments) + m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(); + else if (m_nameSegments.use_count() > 1) + m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(*m_nameSegments); + return *m_nameSegments; + } + _STD_STRING GetTypeNameString() const; + size_t NameStringSize() const; + bool IsConst() const { return m_const; } + bool IsVolatile() const { return m_volatile; } + BNNameType GetNameType() const { return m_nameType; } + bool HasTemplateArguments() const { return m_hasTemplateArgs; } + const _STD_SET& GetPointerSuffix() const { return m_pointerSuffix; } + BNNamedTypeReferenceClass GetNTRClass() const { return m_ntrClass; } + + // Setters + void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(std::move(name)); } + void SetConst(bool c) { m_const = c; } + void SetVolatile(bool v) { m_volatile = v; } + void SetNameType(BNNameType nt) { m_nameType = nt; } + void SetHasTemplateArguments(bool t) { m_hasTemplateArgs = t; } + void SetPointerSuffix(const _STD_SET& s) { m_pointerSuffix = s; } + void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffix.insert(ps); } + void SetReturnTypeConfidence(uint8_t c) { m_returnTypeConfidence = c; } + + // Named type reference operations + void SetNTR(BNNamedTypeReferenceClass cls, _STD_VECTOR<_STD_STRING> nameSegments); + void SetNTR(BNNamedTypeReferenceClass cls, const BN::QualifiedName& name); + + // String formatting + _STD_STRING GetString() const; + _STD_STRING GetStringBeforeName() const; + _STD_STRING GetStringAfterName() const; + _STD_STRING GetTypeAndName(const BN::QualifiedName& name) const; + + // Conversion to real Type + BN::Ref Finalize() const; + +private: + BNTypeClass m_typeClass; + size_t m_width; + size_t m_alignment; + bool m_const; + bool m_volatile; + bool m_signed; + bool m_hasTemplateArgs; + BNNameType m_nameType; + _STD_SET m_pointerSuffix; + _STD_STRING m_altName; + + // Named type ref data + BNNamedTypeReferenceClass m_ntrClass; + std::shared_ptr<_STD_VECTOR<_STD_STRING>> m_nameSegments; + + // Child type (for pointer/array/function return) + std::shared_ptr m_childType; + BNReferenceType m_pointerReference; + uint64_t m_elements; + + // Function params + _STD_VECTOR m_params; + uint8_t m_returnTypeConfidence; + + // Helpers for string formatting + _STD_STRING GetModifierString() const; + _STD_STRING GetPointerSuffixString() const; + void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; + void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; +}; From 9d6b64e8697ec85e8111b3d847b3e6d5853cf599 Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Tue, 10 Mar 2026 14:28:03 -0400 Subject: [PATCH 2/3] GNU3 Demangler: implement new construct support New special-name and type demangling: - GTt/GTn: GCC transaction clone symbols - TH/TW: TLS init and wrapper functions - Tc: covariant return thunks with dual call-offset parsing - Demangle _block_invoke symbols (Clang/Apple block invocations) - Intel Vector Function ABI (_ZGV) with ISA, mask, vlen, parameter kind parsing; z/Z guard variable disambiguation - Vendor-extended type (U ) for Objective-C block pointers - Lowercase vendor type (u ) for __remove_cvref etc. - GR reference temporary demangling - macOS $tlv$init thread-local variable initializer suffix - Fix typeinfo rendering to avoid duplicated name in output New operator and constructor support: - ABI tag (B ) consumption in DemangleUnqualifiedName, DemangleNestedName, and DemangleFunction - CI1/CI2 inheriting constructors with m_lastName save/restore - M data-member-prefix in DemangleNestedName for lambdas in data member initializers - operator<=> (spaceship) in GetOperator and DemangleUnqualifiedName Co-Authored-By: Claude Opus 4.6 (1M context) --- demangler/gnu3/demangle_gnu3.cpp | 494 +++++++++++++++++++++++++++---- 1 file changed, 433 insertions(+), 61 deletions(-) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index c8fec64fb..8889c60b2 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -159,6 +159,7 @@ static string GetOperator(char elm1, char elm2) case hash('a','N'): return "&="; // &= case hash('o','R'): return "|="; // |= case hash('e','O'): return "^="; // ^= + case hash('s','s'): return "<=>"; // <=> case hash('d','l'): return "delete"; // delete case hash('d','a'): return "delete[]"; // delete[] case hash('n','w'): return "new"; // new @@ -503,8 +504,10 @@ const DemangledTypeNode& DemangleGNU3::DemangleTemplateSubstitution() } else if (isdigit(elm)) { - m_reader.Consume(); - number = elm - '0' + 1; + size_t n = 0; + while (isdigit(m_reader.Peek())) + n = n * 10 + (m_reader.Read() - '0'); + number = n + 1; } else if (isupper(elm)) { @@ -660,8 +663,43 @@ DemangledTypeNode DemangleGNU3::DemangleType() } case 'C': //TODO:complex case 'G': //TODO:imaginary - case 'U': //TODO:vendor extended type throw DemangleException(); + case 'U': + { + // Vendor-extended type: U [] + // Commonly used for Objective-C block pointers: + // U13block_pointer -> "void (params...) block_pointer" + string extName = DemangleSourceName(); + if (m_reader.Peek() == 'I') + { + m_reader.Consume(); + vector targs; + DemangleTemplateArgs(targs); + if (!targs.empty()) + extName += GetTemplateString(targs); + } + DemangledTypeNode inner = DemangleType(); + type = CreateUnknownType(inner.GetString() + " " + extName); + substitute = true; + break; + } + case 'u': + { + // Vendor extended type: u [] + // e.g. u14__remove_cvref, u20__remove_reference_t + string extName = DemangleSourceName(); + if (m_reader.Peek() == 'I') + { + m_reader.Consume(); + vector targs; + DemangleTemplateArgs(targs); + if (!targs.empty()) + extName += GetTemplateString(targs); + } + type = CreateUnknownType(extName); + substitute = true; + break; + } case 'v': type = DemangledTypeNode::VoidType(); break; case 'w': type = DemangledTypeNode::IntegerType(4, false, "wchar_t"); break; //TODO: verify case 'b': type = DemangledTypeNode::BoolType(); break; @@ -715,7 +753,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'p': type = DemangleType(); break; case 't': case 'T': - type = CreateUnknownType(DemangleExpression()); + type = CreateUnknownType("decltype(" + DemangleExpression() + ")"); if (m_reader.Read() != 'E') throw DemangleException(); break; @@ -1121,6 +1159,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() case hash('a','N'): // &= case hash('o','R'): // |= case hash('e','O'): // ^= + case hash('s','s'): // <=> outType = CreateUnknownType("operator" + GetOperator(elm1, elm2)); outType.SetNameType(GetNameType(elm1, elm2)); break; @@ -1161,6 +1200,19 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() outType = CreateUnknownType(m_lastName); outType.SetNameType(ConstructorNameType); break; + case hash('C','I'): // Inheriting constructor: CI1 or CI2 + { + char kind = m_reader.Read(); // '1' or '2' + if (kind != '1' && kind != '2') + throw DemangleException(); + // Save m_lastName: parsing the inherited-class type will overwrite it + string savedLastName = m_lastName; + DemangleType(); + m_lastName = savedLastName; + outType = CreateUnknownType(m_lastName); + outType.SetNameType(ConstructorNameType); + break; + } case hash('D','0'): //Destructor case hash('D','1'): case hash('D','2'): @@ -1237,6 +1289,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() string name = DemangleSourceName(); if (name.size() > 11 && name.substr(0, 11) == "_GLOBAL__N_") name = "(anonymous namespace)"; + m_lastName = name; outType = CreateUnknownType(name); } else @@ -1244,6 +1297,18 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() throw DemangleException(); } } + // Consume ABI tags: B => [abi:tagname] + // Applies to source names, operator names, and unnamed types. + while (m_reader.Peek() == 'B') + { + m_reader.Consume(); + string tag = "[abi:" + DemangleSourceName() + "]"; + auto qn = outType.GetTypeName(); + if (!qn.empty()) + qn.back() += tag; + outType.SetTypeName(std::move(qn)); + m_lastName = qn.empty() ? tag : qn.back(); + } dedent(); return outType; } @@ -1511,7 +1576,10 @@ string DemangleGNU3::DemangleExpression() DemangledTypeNode type = DemangleType(); out = type.GetString(); if (m_reader.Peek() == '_') + { + m_reader.Consume(); // consume '_' delimiter before expression list out += " (" + DemangleExpressionList() + ")"; + } else out += " (" + DemangleExpression() + ")"; return out; @@ -1541,10 +1609,12 @@ string DemangleGNU3::DemangleExpression() if (elm2 == 'L') { + // fL p [] _ + // When listNumber is out of range (e.g. fL used inside a decltype return + // type before function params are known), the fallback paths below produce + // a placeholder string "fp" / "fpN". listNumber = DemangleNumber() + 1; - if (listNumber < 0 || - (uint64_t)listNumber >= (uint64_t)m_functionSubstitute.size() || - m_reader.Read() != 'p') + if (listNumber < 0 || m_reader.Read() != 'p') throw DemangleException(); } DemangleCVQualifiers(cnst, vltl, rstrct); @@ -1752,7 +1822,6 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName() DemangledTypeNode newType; bool base = false; bool isTemplate = false; - bool hasB = false; //[] DemangleCVQualifiers(cnst, vltl, rstrct); @@ -1771,16 +1840,15 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName() while (m_reader.Peek() != 'E') { - if (m_reader.Peek() == 'B') - { - hasB = true; - break; - } isTemplate = false; substitute = true; size_t startSize = m_templateSubstitute.size(); switch (m_reader.Read()) { + case 'M': // : closure/lambda inside a data member initializer + // 'M' follows the member name and marks that subsequent components are + // scoped inside that data member. Just consume it; the name is already captured. + continue; case 'S': // newType = DemangleSubstitution(); substitute = false; @@ -1820,6 +1888,23 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName() type.SetNTR(type.GetNTRClass(), newName); type.SetHasTemplateArguments(false); } + // Consume any ABI tags (B ) following this name component. + // These appear as suffixes on in the Itanium ABI: + // ::= [] + // ::= B + // We append them as "[abi:tag]" to the last name segment for display. + // Save/restore m_lastName so that a following C1/D1 ctor/dtor name + // still resolves to the class name, not the ABI tag string. + while (m_reader.Peek() == 'B') + { + m_reader.Consume(); + string savedLastName = m_lastName; + string abiTag = DemangleSourceName(); + m_lastName = savedLastName; + auto& segs = type.GetMutableTypeName(); + if (!segs.empty()) + segs.back() += "[abi:" + abiTag + "]"; + } if (substitute && m_reader.Peek() != 'E') { //Those template arguments were not the primary arguments so clear them from the sub listType @@ -1831,8 +1916,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName() } MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); } - if (!hasB) - m_reader.Consume(); + m_reader.Consume(); if (cnst || vltl || rstrct) { @@ -2022,38 +2106,223 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) case 'A': //TODO hidden alias LogWarn("Unsupported demangle type: hidden alias\n"); throw DemangleException(); - case 'R': //TODO reference temporaries - LogWarn("Unsupported demangle type: reference temporary\n"); - throw DemangleException(); - case 'T': //TODO transaction clones - LogWarn("Unsupported demangle type: transaction clone\n"); - throw DemangleException(); + case 'R': // GR [] _ # reference temporary + { + // is a production (nested, local, or unscoped). + // For local names (Z prefix), DemangleLocalName consumes the trailing '_' + // as a zero-discriminator, so we only consume '_' if it's still present. + DemangledTypeNode nameNode = DemangleName(); + // Consume optional base-36 seq-id (digits + uppercase A-Z) before '_'. + string seqId; + while (m_reader.Length() > 0 && m_reader.Peek() != '_') + seqId += m_reader.Read(); + if (m_reader.Length() > 0) + m_reader.Consume(); // consume '_' + string result = "reference_temporary_for_" + nameNode.GetString(); + if (!seqId.empty()) + result += "[" + seqId + "]"; + varName.push_back(result); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); + } + case 'T': // transaction clone: GTt (safe) or GTn (non-safe) + { + // consume the 't' (transaction-safe) or 'n' (non-transaction-safe) qualifier + char kind = m_reader.Read(); + if (kind != 't' && kind != 'n') + throw DemangleException(); + oldTopLevel = m_topLevel; + m_topLevel = false; + DemangledTypeNode t = DemangleSymbol(name); + m_topLevel = oldTopLevel; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{name.GetString() + " [transaction clone]" + t.GetStringAfterName()}); + } case 'V': { - DemangledTypeNode t = DemangleSymbol(name); - varName.push_back("guard_variable_for_" + t.GetTypeAndName(name)); - type = DemangledTypeNode::IntegerType(1, false); - if (m_reader.Length() == 0) + // Disambiguate: Intel Vector Function ABI (_ZGV...) vs guard variable (_ZGV). + // Intel Vector ABI isa codes: b c d e x y Y z Z + // Guard variable encoding starts with: N (nested), L (local), S (substitution), digit, etc. + char peekChar = m_reader.Peek(); + bool isVectorABI = (peekChar == 'b' || peekChar == 'c' || peekChar == 'd' || peekChar == 'e' || + peekChar == 'x' || peekChar == 'y' || peekChar == 'Y'); + // 'z'/'Z' are ambiguous: also used as Z-local-name prefix in guard variables + // (e.g. _ZGVZN1A1BEvE1A = guard variable for A::B()::A). + // Disambiguate by verifying the full Vector ABI parameter pattern: + // <'_'> where vparams are only + // from {v, l, u, R, L, s, 0-9} and are immediately followed by '_'. + // A guard variable's inner symbol would have source-name chars (e.g. 'm', 'a', etc.) + // that don't appear in valid vparameter sequences. + if (!isVectorABI && (peekChar == 'z' || peekChar == 'Z')) + { + _STD_STRING ahead = m_reader.PeekString(std::min((size_t)32, m_reader.Length())); + if (ahead.size() >= 3 && (ahead[1] == 'M' || ahead[1] == 'N')) + { + size_t pos = 2; + while (pos < ahead.size() && isdigit((unsigned char)ahead[pos])) + pos++; + if (pos > 2) // had at least one vlen digit + { + // Scan through vparameter chars; valid ones are v/l/u/R/L and + // optional stride digits/'s'. Anything else means guard variable. + bool allVparam = true; + while (pos < ahead.size() && ahead[pos] != '_') + { + char c = ahead[pos]; + if (c == 'v' || c == 'l' || c == 'u' || c == 'R' || + c == 'L' || c == 's' || isdigit((unsigned char)c)) + pos++; + else + { + allVparam = false; + break; + } + } + isVectorABI = allVparam && pos < ahead.size() && ahead[pos] == '_'; + } + } + } + if (!isVectorABI) + { + // Guard variable (original behavior) + DemangledTypeNode t = DemangleSymbol(name); + varName.push_back("guard_variable_for_" + t.GetTypeAndName(name)); + type = DemangledTypeNode::IntegerType(1, false); + if (m_reader.Length() == 0) + return type; + //function parameters + string paramList; + paramList += "("; + bool first = true; + do + { + if (m_reader.Peek() == 'v') + { + m_reader.Consume(); + break; + } + if (!first) + paramList += ", "; + paramList += DemangleTypeString(); + }while (m_reader.Peek() != 'E'); + m_reader.Consume(); + varName.back() += paramList + ")"; + varName.push_back(DemangleSourceName()); + return type; - //function parameters - string paramList; - paramList += "("; - bool first = true; - do + } + + // Intel Vector Function ABI: + // GV '_' + + // Parse ISA + char isa = m_reader.Read(); + const char* isaName; + switch (isa) + { + case 'b': isaName = "SSE2"; break; + case 'c': isaName = "SSE4.2"; break; + case 'd': isaName = "AVX"; break; + case 'e': isaName = "AVX512"; break; + case 'x': isaName = "SSE2"; break; + case 'y': isaName = "AVX"; break; + case 'Y': isaName = "AVX2"; break; + case 'z': isaName = "MIC"; break; + case 'Z': isaName = "AVX512"; break; + default: isaName = "unknown"; break; + } + + // Parse mask: 'M' (mask) or 'N' (nomask) + char maskChar = m_reader.Read(); + if (maskChar != 'M' && maskChar != 'N') + throw DemangleException(); + const char* maskName = (maskChar == 'M') ? "mask" : "nomask"; + + // Parse vlen: non-negative decimal integer + if (!isdigit(m_reader.Peek())) + throw DemangleException(); + string vlenStr; + while (isdigit(m_reader.Peek())) + vlenStr += m_reader.Read(); + + // Parse vparameters until '_' separator + // + // ::= ('l'|'R'|'U'|'L') | 'u' | 'v' + // ::= empty | 's' | + // ::= empty | 'a' + string paramsStr; + bool firstParam = true; + while (m_reader.Length() > 0 && m_reader.Peek() != '_') { - if (m_reader.Peek() == 'v') + if (!firstParam) + paramsStr += ','; + firstParam = false; + + char pc = m_reader.Read(); + bool hasStride = false; + switch (pc) + { + case 'l': paramsStr += "linear"; hasStride = true; break; + case 'R': paramsStr += "linear(ref)"; hasStride = true; break; + case 'U': paramsStr += "linear(uval)"; hasStride = true; break; + case 'L': paramsStr += "linear(val)"; hasStride = true; break; + case 'u': paramsStr += "uniform"; break; + case 'v': paramsStr += "vector"; break; + default: throw DemangleException(); + } + + if (hasStride) + { + if (m_reader.Peek() == 's') + { + // linear_step passed as another argument at given 0-based position + m_reader.Consume(); + string argPos; + while (isdigit(m_reader.Peek())) + argPos += m_reader.Read(); + paramsStr += "(step=arg" + argPos + ")"; + } + else if (isdigit(m_reader.Peek()) || m_reader.Peek() == 'n') + { + // Literal stride; 'n' prefix means negative + string stride = DemangleNumberAsString(); + paramsStr += "(step=" + stride + ")"; + } + // else: empty stride means step of 1 + } + + // Optional alignment: 'a' + if (m_reader.Peek() == 'a') { m_reader.Consume(); - break; + while (isdigit(m_reader.Peek())) + m_reader.Read(); } - if (!first) - paramList += ", "; - paramList += DemangleTypeString(); - }while (m_reader.Peek() != 'E'); - m_reader.Consume(); - varName.back() += paramList + ")"; - varName.push_back(DemangleSourceName()); - return type; + } + + // Consume the '_' separator between parameters and routine name + if (m_reader.Length() == 0 || m_reader.Read() != '_') + throw DemangleException(); + + // Remainder is the scalar routine name (may be a plain C name or a _Z mangled name) + string routineName = m_reader.ReadString(m_reader.Length()); + + // Build the human-readable annotation + string annotation = " [SIMD:"; + annotation += isaName; + annotation += ','; + annotation += maskName; + annotation += ",N="; + annotation += vlenStr; + if (!paramsStr.empty()) + { + annotation += ",("; + annotation += paramsStr; + annotation += ')'; + } + annotation += ']'; + + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{routineName + annotation}); } default: throw DemangleException(); @@ -2075,9 +2344,38 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) m_reader.Consume(); switch (m_reader.Read()) { - case 'c': - LogWarn("Unsupported: 'virtual function covariant override thunk'\n"); - throw DemangleException(); + case 'c': // covariant return thunk: Tc + { + // consume a call-offset: h _ or v _ _ + auto consumeCallOffset = [&]() { + char kind = m_reader.Read(); + if (kind == 'h') + { + DemangleNumberAsString(); + if (m_reader.Read() != '_') + throw DemangleException(); + } + else if (kind == 'v') + { + DemangleNumberAsString(); + if (m_reader.Read() != '_') + throw DemangleException(); + DemangleNumberAsString(); + if (m_reader.Read() != '_') + throw DemangleException(); + } + else + throw DemangleException(); + }; + consumeCallOffset(); // this-pointer adjustment + consumeCallOffset(); // return-value adjustment + oldTopLevel = m_topLevel; + m_topLevel = false; + DemangledTypeNode t = DemangleSymbol(name); + m_topLevel = oldTopLevel; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"covariant_return_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + } case 'C': { DemangledTypeNode t = DemangleType(); @@ -2106,11 +2404,17 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); } - case 'H': - LogWarn("Unsupported: 'TLS init function'\n"); - throw DemangleException(); + case 'H': // TLS init function + { + oldTopLevel = m_topLevel; + m_topLevel = false; + DemangledTypeNode t = DemangleSymbol(name); + m_topLevel = oldTopLevel; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"tls_init_function_for_" + t.GetTypeAndName(name)}); + } case 'I': - return DemangledTypeNode::NamedType(StructNamedTypeClass, + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{"typeinfo_for_" + DemangleTypeString()}); case 'J': LogWarn("Unsupported: 'java class'\n"); @@ -2128,7 +2432,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) return DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{"VTT_for_" + t.GetString()}); } - case 'v': //TODO: Convert to whatever the actual type is! + case 'v': // virtual thunk { DemangleNumberAsString(); if (m_reader.Read() != '_') @@ -2146,9 +2450,15 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) case 'V': //Vtable return DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{"vtable_for_" + DemangleTypeString()}); - case 'W': - MyLogDebug("Unsupported: 'TLS wrapper function'\n"); - throw DemangleException(); + case 'W': // TLS wrapper function + { + oldTopLevel = m_topLevel; + m_topLevel = false; + DemangledTypeNode t = DemangleSymbol(name); + m_topLevel = oldTopLevel; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, + _STD_VECTOR<_STD_STRING>{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); + } default: throw DemangleException(); } @@ -2177,18 +2487,20 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) m_reader.Consume(); // TODO: If we get here we have a return type. What can we do with this info? } - if (m_reader.Peek() == 'B') + // Consume any ABI tags on the function/data name (e.g. B5cxx11). + // For nested names these are already consumed inside DemangleNestedName(); + // this handles the global-scope case. + while (m_reader.Peek() == 'B') { m_reader.Consume(); - DemangledTypeNode t = DemangleUnqualifiedName(); - - if (t.GetString() == "cxx11") - { - static const QualifiedName stdCxx11StringName(vector{"std", "cxx11", "string"}); - returnType = CreateUnknownType(stdCxx11StringName); - } - } - else if (m_isOperatorOverload || + string savedLastName = m_lastName; + string abiTag = DemangleSourceName(); + m_lastName = savedLastName; + auto& segs = type.GetMutableTypeName(); + if (!segs.empty()) + segs.back() += "[abi:" + abiTag + "]"; + } + if (m_isOperatorOverload || type.GetNameType() == ConstructorNameType || type.GetNameType() == DestructorNameType) { @@ -2328,6 +2640,66 @@ bool DemangleGNU3Static::DemangleGlobalHeader(string& name, string& header) bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) { + // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). + // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 + // -> "invocation_function_for_block_in_dyld::_main(...)" + static const string blockInvokeSuffix = "_block_invoke"; + size_t blockPos = name.rfind(blockInvokeSuffix); + if (blockPos != string::npos) + { + // Verify the suffix is _block_invoke optionally followed by [._] only + string tail = name.substr(blockPos + blockInvokeSuffix.size()); + bool validSuffix = tail.empty(); + if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) + { + size_t i = 1; + while (i < tail.size() && isdigit((unsigned char)tail[i])) + i++; + validSuffix = (i == tail.size() && i > 1); + } + if (validSuffix) + { + // Extract the base symbol: everything before _block_invoke + string base = name.substr(0, blockPos); + // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it + size_t zPos = base.find_first_not_of('_'); + if (zPos != string::npos && base[zPos] == 'Z') + { + string normalized = "_" + base.substr(zPos); + Ref baseType; + QualifiedName baseName; + if (DemangleStringGNU3(arch, normalized, baseType, baseName)) + { + outVarName.clear(); + outVarName.push_back("invocation_function_for_block_in_" + baseName.GetString()); + outType = baseType; + return true; + } + } + } + } + + // Handle macOS thread-local variable initializer suffix: $tlv$init + // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. + static const string tlvInitSuffix = "$tlv$init"; + if (name.size() > tlvInitSuffix.size() && + name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) + { + string base = name.substr(0, name.size() - tlvInitSuffix.size()); + Ref baseType; + QualifiedName baseName; + if (DemangleStringGNU3(arch, base, baseType, baseName)) + { + outVarName = baseName; + if (outVarName.size() > 0) + outVarName[outVarName.size() - 1] += "$tlv$init"; + else + outVarName.push_back("$tlv$init"); + outType = baseType; + return true; + } + } + string encoding = name; string header; bool foundHeader = DemangleGlobalHeader(encoding, header); From af93db2cbfd24d5da40ed2951f5b9b05bf3d565c Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Tue, 10 Mar 2026 15:35:26 -0400 Subject: [PATCH 3/3] GNU3 Demangler: fix substitution tracking, expressions, and type names Substitution table and scoping: - Fix generic lambda auto parameters: save/replace m_templateSubstitute with 'auto' placeholders for lambda-local T_/T0_/T1_ resolution - Fix local-name template scoping with m_inLocalName flag - Fix substitution table off-by-one for all-type template args - Fix pointer-to-member type (case 'M'): set substitute=true per ABI - Fix multi-character seq-id parsing (S10_, S11_, etc.) - Fix DemangleUnresolvedType decltype (DT/Dt) prefix consumption - Add forward template ref support for cv conversion operators - Fix sr qualifier-list PushType and conditional E handling - Fix multi-level sr scope resolution without N prefix - Fix sr + unscoped template substitution table tracking - Fix I...E argument pack handling (GCC I...E vs J...E) - Fix Dp pack expansion to show '...' for concrete types Expression formatting: - Fix L_Z embedded mangled name template scope isolation - Handle LZE function address template args - cl expression: callable(args) format with function substitute tracking - dt/pt expressions: obj.member and obj->member directly - fp_/fp_ placeholders in decltype return types - Fix fL function-param scope validation - Fix DemangleBaseUnresolvedName 'on' prefix consumption Type names (Itanium ABI spec compliance): - x/y: "long long"/"unsigned long long" - a: "signed char" (distinct from 'c' = "char") - n/o: "__int128"/"unsigned __int128" - g: "__float128" via FloatType altName - Dd/Df/De: "decimal64"/"decimal32"/"decimal128" - Dh: "_Float16" - Dp: always show "..." for pack expansion - Fix 'n' literal: __int128 not __uint128 - Fix dot extension spacing Cleanup: - Remove unused NextIsOneOf, ReadUntil, DemangleInitializer - Guard GetRaw/GetTemplateType/PrintTables behind GNUDEMANGLE_DEBUG - Remove unreachable GetNameType C/D cases - Remove dead BinaryExpression "." check Co-Authored-By: Claude Opus 4.6 (1M context) --- demangler/gnu3/demangle_gnu3.cpp | 440 +++++++++++++++++++------ demangler/gnu3/demangle_gnu3.h | 29 +- demangler/gnu3/demangled_type_node.cpp | 11 +- demangler/gnu3/demangled_type_node.h | 2 +- 4 files changed, 365 insertions(+), 117 deletions(-) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index 8889c60b2..5e598b4f0 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -30,7 +30,7 @@ using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 4096 +#define MAX_DEMANGLE_LENGTH 262144 #define hash(x,y) (64 * x + y) #undef GNUDEMANGLE_DEBUG @@ -39,11 +39,11 @@ static string _indent = ""; #define indent() _indent += " "; #define dedent() do {if (_indent.size() > 0) _indent = _indent.substr(1);}while(0); -void MyLogDebug(string fmt, ...) +void MyLogDebug(const char* fmt, ...) { va_list args; va_start(args, fmt); - PerformLog(0, DebugLog, (_indent + fmt).c_str(), args); + PerformLog(0, DebugLog, "", 0, (_indent + fmt).c_str(), args); va_end(args); } #else @@ -218,17 +218,9 @@ static BNNameType GetNameType(char elm1, char elm2) case hash('d','a'): return OperatorDeleteArrayNameType; // delete[] case hash('n','w'): return OperatorNewNameType; // new case hash('n','a'): return OperatorNewArrayNameType; // new [] - case hash('C','1'): return ConstructorNameType; - case hash('C','2'): return ConstructorNameType; - case hash('C','3'): return ConstructorNameType; - case hash('C','4'): return ConstructorNameType; - case hash('C','5'): return ConstructorNameType; - case hash('D','0'): return DestructorNameType; - case hash('D','1'): return DestructorNameType; - case hash('D','2'): return DestructorNameType; - case hash('D','3'): return DestructorNameType; - case hash('D','4'): return DestructorNameType; - case hash('D','5'): return DestructorNameType; + // Note: C1-C5 (constructor) and D0-D5 (destructor) are handled directly + // by DemangleUnqualifiedName with their own case blocks, so they never + // reach GetNameType. default: return NoNameType; } @@ -270,22 +262,12 @@ string DemangleGNU3Reader::PeekString(size_t count) -bool DemangleGNU3Reader::NextIsOneOf(const string& list) -{ - char elm = Peek(); - for (auto a : list) - { - if (a == elm) - return true; - } - return false; -} - - +#ifdef GNUDEMANGLE_DEBUG string DemangleGNU3Reader::GetRaw() { return m_data.substr(m_offset); } +#endif @@ -300,14 +282,6 @@ string DemangleGNU3Reader::ReadString(size_t count) } -string DemangleGNU3Reader::ReadUntil(char sentinal) -{ - size_t pos = m_data.find_first_of(sentinal, m_offset); - if (pos == string::npos) - throw DemangleException(); - return ReadString(pos); -} - // ===== DemangleGNU3 implementation ===== @@ -318,7 +292,8 @@ DemangleGNU3::DemangleGNU3(Architecture* arch, const string& mangledName) : m_isParameter(false), m_shouldDeleteReader(true), m_topLevel(true), - m_isOperatorOverload(false) + m_isOperatorOverload(false), + m_permitForwardTemplateRefs(false) { MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); } @@ -340,6 +315,9 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) m_shouldDeleteReader = true; m_topLevel = true; m_isOperatorOverload = false; + m_permitForwardTemplateRefs = false; + m_pendingForwardRefs.clear(); + m_inLocalName = false; } @@ -376,15 +354,14 @@ void DemangleGNU3::PushTemplateType(const DemangledTypeNode& type) } +#ifdef GNUDEMANGLE_DEBUG const DemangledTypeNode& DemangleGNU3::GetTemplateType(size_t ref) { if (ref >= m_templateSubstitute.size()) - { - // PrintTables(); throw DemangleException(); - } return m_templateSubstitute[ref]; } +#endif void DemangleGNU3::PushType(const DemangledTypeNode& type) @@ -396,14 +373,12 @@ void DemangleGNU3::PushType(const DemangledTypeNode& type) const DemangledTypeNode& DemangleGNU3::GetType(size_t ref) { if (ref >= m_substitute.size()) - { - // PrintTables(); throw DemangleException(); - } return m_substitute[ref]; } +#ifdef GNUDEMANGLE_DEBUG void DemangleGNU3::PrintTables() { LogDebug("Substitution Table\n"); @@ -418,6 +393,7 @@ void DemangleGNU3::PrintTables() LogDebug("[%d] %s\n", i-1, GetTemplateType(i).GetString().c_str()); } } +#endif void DemangleGNU3::DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct) @@ -492,7 +468,43 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) } -const DemangledTypeNode& DemangleGNU3::DemangleTemplateSubstitution() +string DemangleGNU3::ForwardRefPlaceholder(size_t index) +{ + return "\x01FWDREF:" + to_string(index) + "\x01"; +} + + +void DemangleGNU3::ResolveForwardTemplateRefs(DemangledTypeNode& type, const vector& args) +{ + if (m_pendingForwardRefs.empty()) + return; + auto& segs = type.GetMutableTypeName(); + bool resolved = false; + for (const auto& fr : m_pendingForwardRefs) + { + string placeholder = ForwardRefPlaceholder(fr.index); + string replacement = (fr.index < args.size()) ? args[fr.index] : "auto"; + for (auto& seg : segs) + { + size_t pos; + while ((pos = seg.find(placeholder)) != string::npos) + { + seg.replace(pos, placeholder.size(), replacement); + resolved = true; + } + } + } + // Only clear the pending list when we actually resolved something. Inner + // nested-name 'I' handlers (e.g. template args of types nested inside the + // cv-operator result type) may call here with a type that does not contain + // the placeholder; we must not discard the pending entry in that case so + // that the correct outer 'I' handler can still resolve it. + if (resolved) + m_pendingForwardRefs.clear(); +} + + +DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() { indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -524,7 +536,20 @@ const DemangledTypeNode& DemangleGNU3::DemangleTemplateSubstitution() throw DemangleException(); } dedent(); - return GetTemplateType(number); + + if (number < m_templateSubstitute.size()) + return m_templateSubstitute[number]; + + // If forward template references are permitted (e.g. inside a cv conversion + // operator type), return a placeholder that will be resolved once the outer + // template args are known. + if (m_permitForwardTemplateRefs) + { + m_pendingForwardRefs.push_back({number}); + return CreateUnknownType(ForwardRefPlaceholder(number)); + } + + throw DemangleException(); } @@ -627,8 +652,11 @@ DemangledTypeNode DemangleGNU3::DemangleType() //Template Substitution type = DemangleTemplateSubstitution(); - substitute = true; - if (m_reader.Peek() == 'I') + // In forward-ref mode (cv conversion operator type parsing), do not consume + // trailing IE — it belongs to the enclosing nested-name and will be + // processed by DemangleNestedName's 'I' case, which resolves forward refs. + substitute = !m_permitForwardTemplateRefs; + if (!m_permitForwardTemplateRefs && m_reader.Peek() == 'I') { m_reader.Consume(); if (substitute) @@ -703,8 +731,8 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'v': type = DemangledTypeNode::VoidType(); break; case 'w': type = DemangledTypeNode::IntegerType(4, false, "wchar_t"); break; //TODO: verify case 'b': type = DemangledTypeNode::BoolType(); break; - case 'c': type = DemangledTypeNode::IntegerType(1, true); break; - case 'a': type = DemangledTypeNode::IntegerType(1, true); break; + case 'c': type = DemangledTypeNode::IntegerType(1, true, "char"); break; + case 'a': type = DemangledTypeNode::IntegerType(1, true, "signed char"); break; case 'h': type = DemangledTypeNode::IntegerType(1, false); break; case 's': type = DemangledTypeNode::IntegerType(2, true); break; case 't': type = DemangledTypeNode::IntegerType(2, false); break; @@ -730,15 +758,16 @@ DemangledTypeNode DemangleGNU3::DemangleType() //DemangledTypeNode ptr = DemangledTypeNode::PointerType(m_arch, member, cnst, vltl); //QualifiedName qn({memberName.GetString(), "*"}); type = CreateUnknownType(fullName); + substitute = true; break; } case 'F': type = DemangleFunction(cnst, vltl); break; case 'D': switch (m_reader.Read()) { - case 'd': type = DemangledTypeNode::FloatType(8); break; - case 'e': type = DemangledTypeNode::FloatType(16); break; - case 'f': type = DemangledTypeNode::FloatType(4); break; + case 'd': type = DemangledTypeNode::FloatType(8, "decimal64"); break; + case 'e': type = DemangledTypeNode::FloatType(16, "decimal128"); break; + case 'f': type = DemangledTypeNode::FloatType(4, "decimal32"); break; case 'h': type = DemangledTypeNode::FloatType(2); break; case 'i': type = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; case 's': type = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; @@ -750,7 +779,12 @@ DemangledTypeNode DemangleGNU3::DemangleType() type = CreateUnknownType(stdNullptrTName); break; } - case 'p': type = DemangleType(); break; + case 'p': + { + DemangledTypeNode inner = DemangleType(); + type = CreateUnknownType(inner.GetString() + "..."); + break; + } case 't': case 'T': type = CreateUnknownType("decltype(" + DemangleExpression() + ")"); @@ -870,13 +904,24 @@ DemangledTypeNode DemangleGNU3::DemangleSubstitution() m_reader.UnRead(1); number = 0; } - else if (isdigit(elm)) - { - number = elm - '0' + 1; - } - else if (isupper(elm)) + else if (isdigit(elm) || isupper(elm)) { - number = elm - 'A' + 11; + // Seq-id is encoded in base 36 using 0-9 A-Z. + // The actual substitution index = base36_value + 1. + // This handles both single-char (S0_ ... SZ_) and + // multi-char (S10_, S11_, ...) seq-ids. + size_t base36 = isdigit(elm) ? (size_t)(elm - '0') : (size_t)(elm - 'A' + 10); + while (m_reader.Peek() != '_') + { + char c = m_reader.Read(); + if (isdigit(c)) + base36 = base36 * 36 + (size_t)(c - '0'); + else if (isupper(c)) + base36 = base36 * 36 + (size_t)(c - 'A' + 10); + else + throw DemangleException(); + } + number = base36 + 1; } else { @@ -890,7 +935,11 @@ DemangledTypeNode DemangleGNU3::DemangleSubstitution() } dedent(); - return GetType(number); + const DemangledTypeNode& resolved = GetType(number); + const auto& segs = resolved.GetTypeName(); + if (!segs.empty()) + m_lastName = segs.back(); + return resolved; } m_lastName = name.back(); dedent(); @@ -938,20 +987,6 @@ int64_t DemangleGNU3::DemangleNumber() } -string DemangleGNU3::DemangleInitializer() -{ - string out; - if (m_reader.ReadString(2) != "pi") - throw DemangleException(); - out += "("; - while (m_reader.Peek() != 'E') - out += DemangleExpression(); - m_reader.Consume(); - out += ")"; - return out; -} - - string DemangleGNU3::DemanglePrimaryExpression() { indent(); @@ -966,16 +1001,36 @@ string DemangleGNU3::DemanglePrimaryExpression() if (m_reader.PeekString(2) == "_Z") { m_reader.Consume(2); + // The embedded _Z... is an independent mangled name with its own + // template scope. Save and clear the template substitution table + // so inner T_ / T0_ etc. resolve within this symbol, not the outer + // one. Set m_topLevel = true so template args get pushed properly. + auto savedTemplateSubstitute = m_templateSubstitute; + m_templateSubstitute.clear(); oldTopLevel = m_topLevel; - m_topLevel = false; + m_topLevel = true; DemangledTypeNode t = DemangleSymbol(tmpList); m_topLevel = oldTopLevel; - out += t.GetStringBeforeName(); - out += tmpList.GetString(); - out += t.GetStringAfterName(); + m_templateSubstitute = std::move(savedTemplateSubstitute); + out += t.GetTypeAndName(tmpList); dedent() return out; } + // LZE: function address template arg (GCC/Clang, without leading underscore) + if (m_reader.Peek() == 'Z') + { + m_reader.Consume(); // 'Z' + auto savedTemplateSubstitute2 = m_templateSubstitute; + m_templateSubstitute.clear(); + oldTopLevel = m_topLevel; + m_topLevel = true; + DemangledTypeNode t2 = DemangleSymbol(tmpList); + m_topLevel = oldTopLevel; + m_templateSubstitute = std::move(savedTemplateSubstitute2); + out += t2.GetTypeAndName(tmpList); + dedent(); + return out; + } switch (m_reader.Read()) { case 'b': @@ -1026,7 +1081,7 @@ string DemangleGNU3::DemanglePrimaryExpression() case 'l': out = DemangleNumberAsString() + "l"; break; //long case 'x': out = DemangleNumberAsString() + "ll"; break; //long long case 's': out = "(short)" + DemangleNumberAsString(); break; //short - case 'n': out = "(__uint128)" + DemangleNumberAsString() + "ull"; break; //__int128 + case 'n': out = "(__int128)" + DemangleNumberAsString(); break; //__int128 case 'i': out = DemangleNumberAsString(); break; // int case 'm': out = DemangleNumberAsString() + "ul"; break; //unsigned long case 't': out = "(unsigned short)" + DemangleNumberAsString(); break; //unsigned short @@ -1065,8 +1120,6 @@ string DemangleGNU3::DemangleBinaryExpression(const string& op) const string lhs = "(" + DemangleExpression() + ")"; const string rhs = "(" + DemangleExpression() + ")"; dedent(); - if (op == ".") - return lhs + op + rhs; return lhs + " " + op + " " + rhs; } @@ -1233,6 +1286,13 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() string name; name = "'lambda"; vector lambdaParams; + // Generic lambdas encode 'auto' params as T_, T0_, T1_... which reference + // the lambda's own operator() template params, not any outer template scope. + // Save and replace the template substitution table with 'auto' placeholders. + auto savedTemplateSubstitute = m_templateSubstitute; + m_templateSubstitute.clear(); + for (int autoIdx = 0; autoIdx < 16; autoIdx++) + m_templateSubstitute.push_back(CreateUnknownType("auto")); do { DemangledTypeNode param = DemangleType(); @@ -1241,6 +1301,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() lambdaParams.push_back(std::move(param)); }while (m_reader.Peek() != 'E'); m_reader.Consume(); + m_templateSubstitute = std::move(savedTemplateSubstitute); if (isdigit(m_reader.Peek())) { @@ -1280,8 +1341,22 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() break; } case hash('c','v'): //type (expression) - outType = CreateUnknownType("operator " + DemangleType().GetString()); + { + // The conversion operator type may reference template params (T_, T0_, ...) + // that aren't yet in m_templateSubstitute (they're defined by a following + // IE in the enclosing nested name). Set m_permitForwardTemplateRefs so + // that DemangleTemplateSubstitution() returns a placeholder instead of + // throwing, and don't consume trailing IE in the T case of DemangleType. + // The outer DemangleNestedName case 'I' will parse those args and call + // ResolveForwardTemplateRefs() to patch the placeholders. + bool savedPermit = m_permitForwardTemplateRefs; + m_pendingForwardRefs.clear(); + m_permitForwardTemplateRefs = true; + DemangledTypeNode cvType = DemangleType(); + m_permitForwardTemplateRefs = savedPermit; + outType = CreateUnknownType("operator " + cvType.GetString()); break; + } default: m_reader.UnRead(2); if (isdigit(m_reader.Peek()) || m_reader.Read() == 'L') @@ -1330,6 +1405,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() const string str = m_reader.PeekString(2); if (str == "on") { + m_reader.Consume(); m_reader.Consume(); // skip 'o','n' prefix out.push_back(GetOperator(m_reader.Read(), m_reader.Read())); if (m_reader.Peek() == 'I') { @@ -1388,10 +1464,21 @@ DemangledTypeNode DemangleGNU3::DemangleUnresolvedType() type.SetHasTemplateArguments(true); PushType(type); } + else + { + // Template param used as scope qualifier (e.g. sr T_ name) is a substitution + // candidate: the compiler adds it to the main sub table so subsequent + // occurrences can use Sn_ instead of T_. + PushType(type); + } } else if (m_reader.Length() > 2 && (m_reader.PeekString(2) == "Dt" || m_reader.PeekString(2) == "DT")) { + m_reader.Consume(); // 'D' + m_reader.Consume(); // 't' or 'T' const string name = "decltype(" + DemangleExpression() + ")"; + if (m_reader.Read() != 'E') + throw DemangleException(); type = CreateUnknownType(name); } else if (m_reader.Peek() == 'S') @@ -1525,13 +1612,23 @@ string DemangleGNU3::DemangleExpression() case hash('p','p'): // ++ (postfix in context) case hash('m','m'): // -- (postfix in context) return DemangleUnarySuffixExpression(GetOperator(elm1, elm2)); + case hash('d','t'): // . + { + const string dtObj = DemangleExpression(); + const string dtMem = DemangleExpression(); + return dtObj + "." + dtMem; + } + case hash('p','t'): // -> + { + const string ptObj = DemangleExpression(); + const string ptMem = DemangleExpression(); + return ptObj + "->" + ptMem; + } case hash('l','s'): // << case hash('r','s'): // >> case hash('a','S'): // = case hash('e','q'): // == case hash('n','e'): // != - case hash('d','t'): // . - case hash('p','t'): // -> case hash('m','l'): // * case hash('m','i'): // - case hash('p','l'): // + @@ -1570,7 +1667,23 @@ string DemangleGNU3::DemangleExpression() DemangleExpression() + ":" + DemangleExpression(); case hash('c','l'): // () - return "(" + DemangleExpressionList() + ")"; + { + const string callable = DemangleExpression(); + string args; + bool firstArg = true; + m_functionSubstitute.push_back({}); + while (m_reader.Peek() != 'E') + { + if (!firstArg) args += ", "; + const string e = DemangleExpression(); + args += e; + m_functionSubstitute.back().push_back(CreateUnknownType(e)); + firstArg = false; + } + m_functionSubstitute.pop_back(); + m_reader.Consume(); // 'E' + return callable + "(" + args + ")"; + } case hash('c','v'): //type (expression) { DemangledTypeNode type = DemangleType(); @@ -1604,9 +1717,6 @@ string DemangleGNU3::DemangleExpression() int64_t listNumber = 0; int64_t elementNum = 0; char elm; - if (m_functionSubstitute.size() == 0) - throw DemangleException(); - if (elm2 == 'L') { // fL p [] _ @@ -1622,20 +1732,27 @@ string DemangleGNU3::DemangleExpression() if (elm == '_') { m_reader.Consume(1); - if ((size_t)elementNum >= m_functionSubstitute[listNumber].size()) + if ((uint64_t)listNumber >= (uint64_t)m_functionSubstitute.size() || + (size_t)elementNum >= m_functionSubstitute[listNumber].size()) { - throw DemangleException(); + // fp_ used before params are known (e.g., in decltype return type) + out = (elementNum == 0) ? "fp" : "fp" + std::to_string(elementNum - 1); + break; } type = m_functionSubstitute[listNumber][elementNum]; } else if (isdigit(elm) || isupper(elm)) { elementNum = DemangleNumber() + 1; - if (m_reader.Read() != '_' || - elementNum < 0 || + if (m_reader.Read() != '_') + throw DemangleException(); + if (elementNum < 0 || + (uint64_t)listNumber >= (uint64_t)m_functionSubstitute.size() || (size_t)elementNum >= m_functionSubstitute[listNumber].size()) { - throw DemangleException(); + // fpN_ used before params are known + out = "fp" + std::to_string(elementNum - 1); + break; } type = m_functionSubstitute[listNumber][elementNum]; } @@ -1669,10 +1786,17 @@ string DemangleGNU3::DemangleExpression() if (m_reader.Peek() == 'N') { m_reader.Consume(); - out += DemangleUnresolvedType().GetString() + "::"; + // Standard form: N + E + // where is T_, Dt, or S. + // GCC extension: N + E + // When the first component is a digit (source name), skip the + // unresolved-type and let the loop below handle all qualifiers. + if (!isdigit(m_reader.Peek())) + out += DemangleUnresolvedType().GetString() + "::"; do { out += DemangleSourceName(); + // Push bare name (before template args) to substitution table. PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); if (m_reader.Peek() == 'I') { @@ -1681,6 +1805,8 @@ string DemangleGNU3::DemangleExpression() // DemangleTemplateArgs(args); out += GetTemplateString(args); + // Also push the template instantiation (name+args). + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); } out += "::"; }while (m_reader.Peek() != 'E'); @@ -1691,26 +1817,83 @@ string DemangleGNU3::DemangleExpression() } if (isdigit(m_reader.Peek())) { + // + E + // GCC sometimes omits the explicit qualifier-list 'E' when the last + // qualifier ends with template-args (the template-args 'E' serves double + // duty). Break out of the loop immediately after any qualifier with + // template-args rather than waiting for a standalone 'E'. + // + // Each qualifier level adds to the substitution table: + // - the bare name (before template-args) as a substitution candidate + // - the template instantiation (name + args) as another candidate + // This mirrors how the compiler builds the substitution table during encoding. + bool hadTemplateArgs = false; do { - out += DemangleSourceName(); + hadTemplateArgs = false; + const string segName = DemangleSourceName(); + out += segName; + // Push bare name to substitution table. + PushType(CreateUnknownType(out)); if (m_reader.Peek() == 'I') { vector args; m_reader.Consume(); - // - DemangleTemplateArgs(args); + DemangleTemplateArgs(args); // consumes the trailing 'E' out += GetTemplateString(args); + // Also push the template instantiation. + PushType(CreateUnknownType(out)); + hadTemplateArgs = true; } out += "::"; - }while (m_reader.Peek() != 'E'); - m_reader.Consume(); + }while (!hadTemplateArgs && m_reader.Peek() != 'E'); + // Consume qualifier-list 'E' if present. GCC sometimes omits it when + // the last qualifier had template-args whose 'E' served double duty, + // so check rather than unconditionally consuming. + if (m_reader.Peek() == 'E') + m_reader.Consume(); out += DemangleBaseUnresolvedName().GetString(); return out; } else { out += DemangleUnresolvedType().GetString() + "::"; + // GCC may encode multi-level scoped names without the 'N' qualifier + // prefix, e.g. "sr St 6__and_IE 5value" for std::__and_::value. + // Process any digit-started names: if a name has template args AND + // another source name follows, it is an intermediate qualifier level; + // otherwise it is the final base-unresolved-name. + while (isdigit(m_reader.Peek())) + { + const string segName = DemangleSourceName(); + if (m_reader.Peek() == 'I') + { + vector args; + m_reader.Consume(); + DemangleTemplateArgs(args); + if (isdigit(m_reader.Peek())) + { + // Another source name follows — intermediate qualifier. + // Push to the substitution table, mirroring what the + // N-prefix sr branch does for each nested qualifier. + PushType(CreateUnknownType(out + segName + GetTemplateString(args))); + out += segName + GetTemplateString(args) + "::"; + } + else + { + // No more source names — this template-id is the final name. + out += segName + GetTemplateString(args); + return out; + } + } + else + { + // Plain source name with no template args — final base name. + out += segName; + return out; + } + } + // peek is not a digit: fall back for operator-names ("on") / destructor-names ("dn"). out += DemangleBaseUnresolvedName().GetString(); } return out; @@ -1731,7 +1914,7 @@ string DemangleGNU3::DemangleExpression() } -void DemangleGNU3::DemangleTemplateArgs(vector& args) +void DemangleGNU3::DemangleTemplateArgs(vector& args, bool* hadNonTypeArg) { indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -1749,15 +1932,23 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args) args.push_back(expr); tmp = CreateUnknownType(expr); tmpValid = true; + if (hadNonTypeArg) *hadNonTypeArg = true; break; case 'X': args.push_back(DemangleExpression()); if (m_reader.Read() != 'E') throw DemangleException(); + if (hadNonTypeArg) *hadNonTypeArg = true; break; + case 'I': // GCC sometimes uses I...E for argument packs instead of J...E case 'J': + { + size_t prevTemplateSize = m_templateSubstitute.size(); DemangleTemplateArgs(args); + if (m_topLevel && m_templateSubstitute.size() == prevTemplateSize) + PushTemplateType(CreateUnknownType("auto")); break; + } default: m_reader.UnRead(); topLevel = m_topLevel; @@ -1780,7 +1971,7 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args) } -DemangledTypeNode DemangleGNU3::DemangleNestedName() +DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) { /* This can be either a qualified name like: "foo::bar::bas" @@ -1861,7 +2052,16 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName() if (!base) throw DemangleException(); vector args; - DemangleTemplateArgs(args); + bool hadNonType = false; + DemangleTemplateArgs(args, allTypeTemplateArgs ? &hadNonType : nullptr); + if (allTypeTemplateArgs) + *allTypeTemplateArgs = !hadNonType; + // Resolve any forward template refs created while parsing a cv + // conversion operator type (e.g. cv T_ where T_ wasn't yet known). + // Only do this in the outer context (not while still inside the cv + // type parsing itself where m_permitForwardTemplateRefs is true). + if (!m_permitForwardTemplateRefs) + ResolveForwardTemplateRefs(type, args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); isTemplate = true; @@ -1942,10 +2142,21 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode type; QualifiedName varName; + // The local function has its own template scope. Save the outer template + // substitution table and set m_topLevel = true so that when the local + // function's template args are parsed (e.g. handleMessageDelayed), + // they populate m_templateSubstitute and are available for T_/T0_/T1_ + // references in the function's parameter types. + auto savedTemplateSubstitute = m_templateSubstitute; + m_templateSubstitute.clear(); bool oldTopLevel = m_topLevel; - m_topLevel = false; + m_topLevel = true; + bool savedInLocalName = m_inLocalName; + m_inLocalName = true; type = DemangleSymbol(varName); + m_inLocalName = savedInLocalName; m_topLevel = oldTopLevel; + m_templateSubstitute = std::move(savedTemplateSubstitute); if (varName.size() > 0) varName.back() += (type.GetStringAfterName()); @@ -1954,6 +2165,15 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() if (m_reader.Peek() != 's') { + // Handle default argument context: d [] _ + if (m_reader.Peek() == 'd') + { + m_reader.Consume(); + if (isdigit(m_reader.Peek())) + DemangleNumber(); + if (m_reader.Peek() == '_') + m_reader.Consume(); + } // DemangledTypeNode tmpType = DemangleName(); type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); @@ -2041,11 +2261,21 @@ DemangledTypeNode DemangleGNU3::DemangleName() DemangleTemplateArgs(args); ExtendTypeName(type, GetTemplateString(args)); type.SetHasTemplateArguments(true); + // Push the template instantiation (e.g. std::swap) so that the + // substitution table matches what the encoder built. The encoder adds + // both the unscoped-template-name (prefix, already pushed above) and + // the full template-id (instantiation). + PushType(type); } break; case 'N': // - type = DemangleNestedName(); + { + bool allTypeArgs = false; + type = DemangleNestedName(&allTypeArgs); + if (!m_inLocalName && allTypeArgs) + PushType(type); break; + } case 'Z': // type = DemangleLocalName(); break; @@ -2211,6 +2441,8 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) return type; } + + // Intel Vector Function ABI: // GV '_' @@ -2536,12 +2768,12 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) // On the off chance some invalid mangled string is passed in. if (varName.size() > 0) - varName.back() += ext; + varName.back() += " " + ext; break; } m_isParameter = true; - MyLogDebug("Var_%d: %s\n", i, m_reader.GetRaw().c_str()); + MyLogDebug("Var: %s\n", m_reader.GetRaw().c_str()); if (m_reader.PeekString(2) == "@@") break; DemangledTypeNode param = DemangleType(); diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index 2e46e05d5..8ee0fca62 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -51,10 +51,10 @@ class DemangleGNU3Reader DemangleGNU3Reader(const _STD_STRING& data); void Reset(const _STD_STRING& data); _STD_STRING PeekString(size_t count=1); - bool NextIsOneOf(const _STD_STRING& list); +#ifdef GNUDEMANGLE_DEBUG _STD_STRING GetRaw(); +#endif _STD_STRING ReadString(size_t count=1); - _STD_STRING ReadUntil(char sentinal); size_t Length() const { return m_data.length() - m_offset; } @@ -109,6 +109,17 @@ class DemangleGNU3 bool m_shouldDeleteReader; bool m_topLevel; bool m_isOperatorOverload; + // Forward template reference support (for cv conversion operator types). + // When m_permitForwardTemplateRefs is true, DemangleTemplateSubstitution() + // returns a placeholder instead of throwing for out-of-bounds template params. + // m_pendingForwardRefs records which param indices have placeholders so that + // ResolveForwardTemplateRefs() can patch them once template args are known. + bool m_permitForwardTemplateRefs; + bool m_inLocalName; + struct ForwardRef { size_t index; }; + _STD_VECTOR m_pendingForwardRefs; + void ResolveForwardTemplateRefs(DemangledTypeNode& type, const _STD_VECTOR<_STD_STRING>& args); + static _STD_STRING ForwardRefPlaceholder(size_t index); enum SymbolType { Function, FunctionWithReturn, Data, VTable, Rtti, Name}; BN::QualifiedName DemangleBaseUnresolvedName(); DemangledTypeNode DemangleUnresolvedType(); @@ -121,7 +132,6 @@ class DemangleGNU3 DemangledTypeNode DemangleUnqualifiedName(); _STD_STRING DemangleSourceName(); _STD_STRING DemangleNumberAsString(); - _STD_STRING DemangleInitializer(); _STD_STRING DemangleExpression(); _STD_STRING DemanglePrimaryExpression(); DemangledTypeNode DemangleName(); @@ -129,14 +139,13 @@ class DemangleGNU3 void DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct); DemangledTypeNode DemangleSubstitution(); - const DemangledTypeNode& DemangleTemplateSubstitution(); - void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args); + DemangledTypeNode DemangleTemplateSubstitution(); + void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args, bool* hadNonTypeArg = nullptr); DemangledTypeNode DemangleFunction(bool cnst, bool vltl); DemangledTypeNode DemangleType(); int64_t DemangleNumber(); - DemangledTypeNode DemangleNestedName(); + DemangledTypeNode DemangleNestedName(bool* allTypeTemplateArgs = nullptr); void PushTemplateType(const DemangledTypeNode& type); - const DemangledTypeNode& GetTemplateType(size_t ref); void PushType(const DemangledTypeNode& type); const DemangledTypeNode& GetType(size_t ref); @@ -144,12 +153,16 @@ class DemangleGNU3 DemangledTypeNode CreateUnknownType(const _STD_STRING& s); static void ExtendTypeName(DemangledTypeNode& type, const _STD_STRING& extend); +#ifdef GNUDEMANGLE_DEBUG + const DemangledTypeNode& GetTemplateType(size_t ref); + void PrintTables(); +#endif + public: DemangleGNU3(BN::Architecture* arch, const _STD_STRING& mangledName); void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); DemangledTypeNode DemangleSymbol(BN::QualifiedName& varName); BN::QualifiedName GetVarName() const { return m_varName; } - void PrintTables(); }; diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index d8a98e169..89bfb2fad 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -63,11 +63,12 @@ DemangledTypeNode DemangledTypeNode::IntegerType(size_t width, bool isSigned, co } -DemangledTypeNode DemangledTypeNode::FloatType(size_t width) +DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altName) { DemangledTypeNode n; n.m_typeClass = FloatTypeClass; n.m_width = width; + n.m_altName = altName; return n; } @@ -274,7 +275,9 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p break; case FloatTypeClass: - switch (m_width) + if (!m_altName.empty()) + out += m_altName; + else switch (m_width) { case 2: out += "float16"; break; case 4: out += "float"; break; @@ -464,8 +467,8 @@ Ref DemangledTypeNode::Finalize() const case FloatTypeClass: { if (!m_const && !m_volatile) - return Type::FloatType(m_width); - TypeBuilder tb = TypeBuilder::FloatType(m_width); + return Type::FloatType(m_width, m_altName); + TypeBuilder tb = TypeBuilder::FloatType(m_width, m_altName); tb.SetConst(m_const); tb.SetVolatile(m_volatile); return tb.Finalize(); diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index f272166cc..62ad9004a 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -72,7 +72,7 @@ class DemangledTypeNode static DemangledTypeNode VoidType(); static DemangledTypeNode BoolType(); static DemangledTypeNode IntegerType(size_t width, bool isSigned, const _STD_STRING& altName = ""); - static DemangledTypeNode FloatType(size_t width); + static DemangledTypeNode FloatType(size_t width, const _STD_STRING& altName = ""); static DemangledTypeNode VarArgsType(); static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType);