From 7b9d4bae93246270d3412cba505c6200811b754e Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Fri, 3 Oct 2025 16:43:24 -0700 Subject: [PATCH 1/7] Working on COFF parsing --- include/orc/coff.hpp | 23 ++++++ include/orc/parse_file.hpp | 23 +++++- src/coff.cpp | 156 +++++++++++++++++++++++++++++++++++++ src/dwarf.cpp | 12 +-- src/parse_file.cpp | 13 ++++ 5 files changed, 218 insertions(+), 9 deletions(-) create mode 100644 include/orc/coff.hpp create mode 100644 src/coff.cpp diff --git a/include/orc/coff.hpp b/include/orc/coff.hpp new file mode 100644 index 0000000..da4ad63 --- /dev/null +++ b/include/orc/coff.hpp @@ -0,0 +1,23 @@ +// Copyright 2021 Adobe +// All Rights Reserved. +// +// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms +// of the Adobe license agreement accompanying it. + +#pragma once + +// stdc++ +#include + +// application +#include "orc/parse_file.hpp" + +//-------------------------------------------------------------------------------------------------- + +void read_coff(object_ancestry&& ancestry, + freader& s, + std::istream::pos_type end_pos, + file_details details, + macho_params params); + +//-------------------------------------------------------------------------------------------------- diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index c8bb0d4..1fad85d 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -44,6 +44,11 @@ struct freader { return _p - _f; } + std::size_t leftovers() const { + ADOBE_INVARIANT(*this); + return _l - _p; + } + void seekg(std::istream::off_type offset) { _p = _f + offset; ADOBE_INVARIANT(*this); @@ -70,13 +75,24 @@ struct freader { ADOBE_INVARIANT(*this); } - void read(char* p, std::size_t n) { + // Read a exactly `n` bytes into `p`. + // Assumes the bytes are plain old data. + void read(void* p, std::size_t n) { + ADOBE_INVARIANT(leftovers() > n); std::memcpy(p, _p, n); _p += n; ADOBE_INVARIANT(*this); } + // Read exactly `sizeof(T)` bytes into `x` + // Assumes the value is plain old data. + template + void read(T& x) { + read(&x, sizeof(T)); + } + char get() { + ADOBE_INVARIANT(leftovers() > 0); char result = *_p++; ADOBE_INVARIANT(*this); return result; @@ -153,6 +169,7 @@ struct file_details { macho, ar, fat, + coff, }; std::size_t _offset{0}; format _format{format::unknown}; @@ -181,7 +198,7 @@ void endian_swap(T& c) { template T read_pod(freader& s) { T x; - s.read(reinterpret_cast(&x), sizeof(T)); + s.read(x); return x; } @@ -193,7 +210,7 @@ inline bool read_pod(freader& s) { template T read_pod(freader& s, bool byteswap) { T x; - s.read(reinterpret_cast(&x), sizeof(T)); + s.read(&x, sizeof(T)); if (byteswap) { endian_swap(x); } diff --git a/src/coff.cpp b/src/coff.cpp new file mode 100644 index 0000000..aca0166 --- /dev/null +++ b/src/coff.cpp @@ -0,0 +1,156 @@ +// Copyright 2021 Adobe +// All Rights Reserved. +// +// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms +// of the Adobe license agreement accompanying it. + +// identity +#include "orc/coff.hpp" + +// stdc++ +#include + +// adobe contract checks +#include "adobe/contract_checks.hpp" + +// application +#include "orc/dwarf.hpp" +#include "orc/object_file_registry.hpp" + +//-------------------------------------------------------------------------------------------------- + +namespace { + +//-------------------------------------------------------------------------------------------------- +// +// Relevant documentation: +// - Portable Executable (PE) format: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format +// - image_file_header: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_file_header +// - image_section_header: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header +// + +struct image_file_header { + std::int16_t machine{0}; + std::int16_t section_count{0}; + std::int32_t datetimestamp{0}; + std::int32_t symbol_table_pointer{0}; + std::int32_t symbol_count{0}; + std::int16_t optional_header_size{0}; + std::int16_t characteristics{0}; +}; + +static_assert(sizeof(image_file_header) == 20); + +struct image_section_header { + std::int8_t name[8]{0}; + union { + std::int32_t physical_address{0}; + std::int32_t virtual_size; + } misc; + std::int32_t virtual_address{0}; + std::int32_t raw_data_size{0}; + std::int32_t raw_data_pointer{0}; + std::int32_t relocations_pointer{0}; + std::int32_t line_numbers_pointer{0}; + std::int16_t relocations_count{0}; + std::int16_t line_numbers_count{0}; + std::int32_t characteristics{0}; +}; + +static_assert(sizeof(image_section_header) == 40); + +struct section { + image_section_header header; + std::string actual_name; +}; + +//-------------------------------------------------------------------------------------------------- +#if 0 +/// Similar to strlen, except with an upper limit as to the size of the string. +/// APPARENTLY this is already available in some POSIX extensions \ macOS. +/// Keeping this around just in case. +std::size_t strnlen(const char* s, std::size_t n) { + std::size_t result{0}; + for (; *s; ++s) { + if (++result == n) { + break; + } + } + return result; +} +#endif +//-------------------------------------------------------------------------------------------------- + +} // namespace + +//-------------------------------------------------------------------------------------------------- + +void read_coff(object_ancestry&& ancestry, + freader& s, + std::istream::pos_type end_pos, + file_details details, + macho_params params) { + std::uint32_t ofd_index = + static_cast(object_file_register(std::move(ancestry), copy(details))); + dwarf dwarf(ofd_index, copy(s), copy(details)); + + // If you hit this, you're running ORC in a mode not supported by COFF. + ADOBE_INVARIANT(params._mode == macho_reader_mode::register_dies || + params._mode == macho_reader_mode::odrv_reporting); + + // The general format of COFF is: + // header + // section headers + // section data + // symbols + // strings + // In our case, we're just looking for the DWARF data, which is housed + // in one of the "raw data" blocks in COFF sections whose names are + // the DWARF segments we are interested in (debug_info, debug_abbrev, + // etc.) So we don't need to read anything beyond the section headers. + + const auto header = read_pod(s); + + // According to the PE format docs there should be no optional header for object files. + ADOBE_INVARIANT(header.optional_header_size == 0); + + // Grab the string table offset and size, which we'll need when deriving + // the name of some of the sections we read below. + const auto string_table_offset = header.symbol_table_pointer + header.symbol_count * 18; + const auto string_table_size = temp_seek(s, string_table_offset, [&]{ + return read_pod(s); + }); + + // Read the section headers. As we go, derive the actual section header + // name, which may be in the string table. If the name is a DWARF segment, + // add it to the DWARF processor. + std::vector
sections(header.section_count); + for (auto& section : sections) { + s.read(section.header); + const char* name = reinterpret_cast(§ion.header.name[0]); + if (*name != '/') { + // strnlen is the same as strlen but with a string length upper limit. + // Apparently its available via POSIX extension? Who knew. Not this guy. + std::size_t len = strnlen(name, 8); + section.actual_name = std::string(name, len); + } else { + ++name; + int section_name_offset = std::atoi(name); + ADOBE_INVARIANT(section_name_offset < string_table_size); + section.actual_name = temp_seek(s, string_table_offset + section_name_offset, [&]{ + return s.read_c_string_view(); + }); + } + + if (section.actual_name.starts_with(".debug")) { + std::cout << section.actual_name << '\n'; + dwarf.register_section(section.actual_name, + section.header.raw_data_pointer, + section.header.raw_data_size); + } + } + + dwarf.process_all_dies(); +} + +//-------------------------------------------------------------------------------------------------- diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 10dbd95..b6bbce0 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -793,17 +793,17 @@ void dwarf::implementation::register_section(const std::string& name, // Instead, the section registration must be complete and cannot be revisited. ADOBE_PRECONDITION(!_ready); - if (name == "__debug_str") { + if (name == "__debug_str" || name == ".debug_str") { _debug_str = section{offset, size}; - } else if (name == "__debug_info") { + } else if (name == "__debug_info" || name == ".debug_info") { _debug_info = section{offset, size}; - } else if (name == "__debug_abbrev") { + } else if (name == "__debug_abbrev" || name == ".debug_abbrev") { _debug_abbrev = section{offset, size}; - } else if (name == "__debug_line") { + } else if (name == "__debug_line" || name == ".debug_line") { _debug_line = section{offset, size}; - } else if (name == "__debug_line_str__DWARF") { + } else if (name == "__debug_line_str__DWARF" || name == ".debug_line_str__DWARF") { _debug_line_str = section{offset, size}; - } else if (name == "__debug_str_offs__DWARF") { + } else if (name == "__debug_str_offs__DWARF" || name == ".debug_str_offs__DWARF") { _debug_str_offsets = section{offset, size}; } else { // save for debugging. diff --git a/src/parse_file.cpp b/src/parse_file.cpp index 37433eb..0b8912c 100644 --- a/src/parse_file.cpp +++ b/src/parse_file.cpp @@ -26,6 +26,7 @@ // application #include "orc/ar.hpp" +#include "orc/coff.hpp" #include "orc/fat.hpp" #include "orc/macho.hpp" #include "orc/orc.hpp" @@ -53,6 +54,15 @@ file_details detect_file(freader& s) { } else if (header == FAT_MAGIC || header == FAT_CIGAM || header == FAT_MAGIC_64 || header == FAT_CIGAM_64) { result._format = file_details::format::fat; + } else if ((header & 0xffff) == 0x8664) { + // In COFF, the first 16 bits are a machine type code, + // which we are treating here like a magic number. We + // only check for one (0x8664) which means x64. This + // will likely need updating as other machine types are + // observed. + // See https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types + result._format = file_details::format::coff; + result._is_64_bit = true; } result._is_64_bit = header == MH_MAGIC_64 || header == MH_CIGAM_64 || @@ -175,6 +185,9 @@ void parse_file(std::string_view object_name, case file_details::format::fat: return read_fat(std::move(new_ancestry), s, end_pos, std::move(detection), std::move(params)); + case file_details::format::coff: + return read_coff(std::move(new_ancestry), s, end_pos, std::move(detection), + std::move(params)); } } From 20021eadbe9672c68b8b25f04c25a43f5a28de96 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Sat, 4 Oct 2025 11:12:12 -0700 Subject: [PATCH 2/7] renaming a section --- src/dwarf.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dwarf.cpp b/src/dwarf.cpp index b6bbce0..bd8a689 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -801,9 +801,9 @@ void dwarf::implementation::register_section(const std::string& name, _debug_abbrev = section{offset, size}; } else if (name == "__debug_line" || name == ".debug_line") { _debug_line = section{offset, size}; - } else if (name == "__debug_line_str__DWARF" || name == ".debug_line_str__DWARF") { + } else if (name == "__debug_line_str__DWARF" || name == ".debug_line_str") { _debug_line_str = section{offset, size}; - } else if (name == "__debug_str_offs__DWARF" || name == ".debug_str_offs__DWARF") { + } else if (name == "__debug_str_offs__DWARF" || name == ".debug_str_offsets") { _debug_str_offsets = section{offset, size}; } else { // save for debugging. From f00bed54105b61178321382f20a26cc3f7d11f34 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Mon, 6 Oct 2025 09:01:43 -0700 Subject: [PATCH 3/7] coff is crawling --- include/orc/ar.hpp | 2 +- include/orc/coff.hpp | 6 +- include/orc/dwarf_constants.hpp | 32 +- include/orc/dwarf_structs.hpp | 110 +-- include/orc/fat.hpp | 2 +- include/orc/features.hpp | 14 +- include/orc/fixed_vector.hpp | 84 +- include/orc/macho.hpp | 7 +- include/orc/orc.hpp | 4 +- include/orc/parse_file.hpp | 13 +- include/orc/settings.hpp | 6 +- include/orc/str.hpp | 4 +- include/orc/string_pool.hpp | 51 +- include/orc/task_system.hpp | 34 +- include/orc/tracy.hpp | 12 +- justfile | 5 + src/ar.cpp | 2 +- src/coff.cpp | 61 +- src/dwarf.cpp | 282 +++--- src/dwarf_constants.cpp | 1574 ++++++++++++++++++++----------- src/fat.cpp | 4 +- src/macho.cpp | 32 +- src/main.cpp | 46 +- src/orc.cpp | 48 +- src/parse_file.cpp | 4 +- src/str.cpp | 21 +- src/string_pool.cpp | 4 +- 27 files changed, 1502 insertions(+), 962 deletions(-) diff --git a/include/orc/ar.hpp b/include/orc/ar.hpp index 2108586..4040c13 100644 --- a/include/orc/ar.hpp +++ b/include/orc/ar.hpp @@ -18,6 +18,6 @@ void read_ar(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/coff.hpp b/include/orc/coff.hpp index da4ad63..a8e22de 100644 --- a/include/orc/coff.hpp +++ b/include/orc/coff.hpp @@ -18,6 +18,10 @@ void read_coff(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); + +//-------------------------------------------------------------------------------------------------- + +struct dwarf dwarf_from_coff(std::uint32_t ofd_index, reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/dwarf_constants.hpp b/include/orc/dwarf_constants.hpp index bab92e7..342c2b5 100644 --- a/include/orc/dwarf_constants.hpp +++ b/include/orc/dwarf_constants.hpp @@ -106,8 +106,9 @@ enum class at : std::uint16_t { abstract_origin = 0x31, accessibility = 0x32, address_class = 0x33, - // DW_AT_artificial attribute indicates that the associated entity (e.g., a function, variable, or parameter) - // is compiler-generated rather than explicitly written by the programmer in the source code. + // DW_AT_artificial attribute indicates that the associated entity (e.g., a function, variable, + // or parameter) is compiler-generated rather than explicitly written by the programmer in the + // source code. artificial = 0x34, base_types = 0x35, calling_convention = 0x36, @@ -116,27 +117,26 @@ enum class at : std::uint16_t { decl_column = 0x39, decl_file = 0x3a, decl_line = 0x3b, - // DW_AT_declaration indicates that the associated entity is a declaration rather than a definition. - // A function declaration is typically represented as a DW_TAG_subprogram entry with the attribute - // DW_AT_declaration set to true (or 1). - // It does not have attributes like DW_AT_low_pc or DW_AT_high_pc, as it does not correspond to actual code. - // Example: + // DW_AT_declaration indicates that the associated entity is a declaration rather than a + // definition. A function declaration is typically represented as a DW_TAG_subprogram entry with + // the attribute DW_AT_declaration set to true (or 1). It does not have attributes like + // DW_AT_low_pc or DW_AT_high_pc, as it does not correspond to actual code. Example: // <1><0x0000003a> DW_TAG_subprogram // DW_AT_name ("myFunction") // DW_AT_declaration (true) - // A function implementation is also represented as a DW_TAG_subprogram entry but does not have the DW_AT_declaration attribute. - // Instead, it includes attributes like DW_AT_low_pc and DW_AT_high_pc (or DW_AT_ranges), - // which specify the address range of the function's code in memory. - // Example: - // <1><0x0000003a> DW_TAG_subprogram + // A function implementation is also represented as a DW_TAG_subprogram entry but does not have + // the DW_AT_declaration attribute. Instead, it includes attributes like DW_AT_low_pc and + // DW_AT_high_pc (or DW_AT_ranges), which specify the address range of the function's code in + // memory. Example: <1><0x0000003a> DW_TAG_subprogram // DW_AT_name ("myFunction") // DW_AT_low_pc (0x0000000000401000) // DW_AT_high_pc (0x0000000000401020) declaration = 0x3c, discr_list = 0x3d, encoding = 0x3e, - // DW_AT_external attribute indicates that the corresponding entity (e.g., a variable, function, or type) has external linkage. + // DW_AT_external attribute indicates that the corresponding entity (e.g., a variable, function, + // or type) has external linkage. external = 0x3f, frame_base = 0x40, friend_ = 0x41, @@ -551,15 +551,15 @@ const char* to_string(tag t); /** * @brief Determines if a given DWARF tag represents a type - * + * * This function classifies whether a given DWARF tag represents a type definition * or declaration. This is used to identify type-related DIEs in the DWARF debug * information. * * @param t The DWARF tag to check - * + * * @return true if the tag represents a type, false otherwise - * + * * @pre The tag must be a valid DWARF tag * @post The return value will be true for all type-related tags and false for all others */ diff --git a/include/orc/dwarf_structs.hpp b/include/orc/dwarf_structs.hpp index 2984872..97eeaaa 100644 --- a/include/orc/dwarf_structs.hpp +++ b/include/orc/dwarf_structs.hpp @@ -84,7 +84,7 @@ struct attribute_value { int number() const { return has(type::sint) ? static_cast(sint()) : static_cast(uint()); } - + void string(pool_string x) { _type |= type::string; _string = x; @@ -188,9 +188,7 @@ struct attribute_sequence { using iterator = typename attributes_type::iterator; using const_iterator = typename attributes_type::const_iterator; - void reserve(std::size_t size) { - _attributes.reserve(size); - } + void reserve(std::size_t size) { _attributes.reserve(size); } bool has(dw::at name) const { auto [valid, iterator] = find(name); @@ -202,17 +200,11 @@ struct attribute_sequence { return valid && iterator->has(t); } - bool has_uint(dw::at name) const { - return has(name, attribute_value::type::uint); - } + bool has_uint(dw::at name) const { return has(name, attribute_value::type::uint); } - bool has_string(dw::at name) const { - return has(name, attribute_value::type::string); - } + bool has_string(dw::at name) const { return has(name, attribute_value::type::string); } - bool has_reference(dw::at name) const { - return has(name, attribute_value::type::reference); - } + bool has_reference(dw::at name) const { return has(name, attribute_value::type::reference); } auto& get(dw::at name) { auto [valid, iterator] = find(name); @@ -226,33 +218,19 @@ struct attribute_sequence { return *iterator; } - std::size_t hash(dw::at name) const { - return get(name)._value.hash(); - } + std::size_t hash(dw::at name) const { return get(name)._value.hash(); } - std::uint64_t uint(dw::at name) const { - return get(name).uint(); - } + std::uint64_t uint(dw::at name) const { return get(name).uint(); } - int number(dw::at name) const { - return get(name)._value.number(); - } + int number(dw::at name) const { return get(name)._value.number(); } - std::int64_t sint(dw::at name) const { - return get(name).sint(); - } + std::int64_t sint(dw::at name) const { return get(name).sint(); } - pool_string string(dw::at name) const { - return get(name).string(); - } + pool_string string(dw::at name) const { return get(name).string(); } - std::uint64_t reference(dw::at name) const { - return get(name).reference(); - } + std::uint64_t reference(dw::at name) const { return get(name).reference(); } - void push_back(const value_type& x) { - _attributes.push_back(x); - } + void push_back(const value_type& x) { _attributes.push_back(x); } bool empty() const { return _attributes.empty(); } @@ -270,23 +248,22 @@ struct attribute_sequence { } void move_append(attribute_sequence&& rhs) { - _attributes.insert(_attributes.end(), std::move_iterator(rhs.begin()), std::move_iterator(rhs.end())); + _attributes.insert(_attributes.end(), std::move_iterator(rhs.begin()), + std::move_iterator(rhs.end())); } private: /// NOTE: Consider sorting these attribues by `dw::at` to improve performance. std::tuple find(dw::at name) { - auto result = std::find_if(_attributes.begin(), _attributes.end(), [&](const auto& attr){ - return attr._name == name; - }); + auto result = std::find_if(_attributes.begin(), _attributes.end(), + [&](const auto& attr) { return attr._name == name; }); return std::make_tuple(result != _attributes.end(), result); } /// NOTE: Consider sorting these attribues by `dw::at` to improve performance. std::tuple find(dw::at name) const { - auto result = std::find_if(_attributes.begin(), _attributes.end(), [&](const auto& attr){ - return attr._name == name; - }); + auto result = std::find_if(_attributes.begin(), _attributes.end(), + [&](const auto& attr) { return attr._name == name; }); return std::make_tuple(result != _attributes.end(), result); } @@ -303,16 +280,14 @@ std::ostream& operator<<(std::ostream& s, const attribute_sequence& x); * typically used to identify where a symbol is defined or declared in DWARF debug info. */ struct location { - pool_string file; /// The source file path or name + pool_string file; /// The source file path or name std::uint64_t loc{0}; /// The 1-indexed line number within the file }; inline bool operator==(const location& x, const location& y) { return x.file == y.file && x.loc == y.loc; } -inline bool operator!=(const location& x, const location& y) { - return !(x == y); -} +inline bool operator!=(const location& x, const location& y) { return !(x == y); } inline bool operator<(const location& x, const location& y) { return x.file.hash() < y.file.hash() || (x.file == y.file && x.loc < y.loc); } @@ -364,8 +339,8 @@ const char* to_string(arch arch); * @brief Represents the ancestry of an object file * * Object files can be stored within an arbitrarily nested set of archive formats. For example, - * the `.o` file may be stored within an archive (`.a`) file, which itself may be stored within another - * archive, etc. This structure keeps track of the file(s) that contain the object file in + * the `.o` file may be stored within an archive (`.a`) file, which itself may be stored within + * another archive, etc. This structure keeps track of the file(s) that contain the object file in * question. This facilitates reporting when ODRVs are found, giving the user a breadcrumb as * to how the ODRV is being introduced. For efficiency purposes, we fix the max number of ancestors * at compile time, but this can be adjusted if necessary. @@ -393,18 +368,14 @@ struct object_ancestry { } bool operator<(const object_ancestry& rhs) const { - if (_ancestors.size() < rhs._ancestors.size()) - return true; + if (_ancestors.size() < rhs._ancestors.size()) return true; - if (_ancestors.size() > rhs._ancestors.size()) - return false; + if (_ancestors.size() > rhs._ancestors.size()) return false; for (size_t i = 0; i < _ancestors.size(); ++i) { - if (_ancestors[i].view() < rhs._ancestors[i].view()) - return true; + if (_ancestors[i].view() < rhs._ancestors[i].view()) return true; - if (_ancestors[i].view() > rhs._ancestors[i].view()) - return false; + if (_ancestors[i].view() > rhs._ancestors[i].view()) return false; } return false; @@ -431,21 +402,26 @@ std::ostream& operator<<(std::ostream& s, const object_ancestry& x); // // During an ORC scan, multiple translation units worth of DIEs are brought together to determine // if any of them violate the One Definition Rule. DIEs across those units that are "the same" will -// have the same `_hash` value, and will be linked together via the `_next_die` pointer. The top-level -// ORC scan will then have a collection of singly-linked lists, one per unique symbol / `_hash`. -// Once all these lists are constructed, each are checked individually for ODRVs. +// have the same `_hash` value, and will be linked together via the `_next_die` pointer. The +// top-level ORC scan will then have a collection of singly-linked lists, one per unique symbol / +// `_hash`. Once all these lists are constructed, each are checked individually for ODRVs. struct die { // Because the quantity of these created at runtime can beon the order of millions of instances, // these are ordered for optimal alignment. If you change the ordering, or add/remove items // here, please consider alignment issues. - pool_string _path; // the user-readable symbol name, "pathed"/namespaced by containing DIEs. May be mangled. - die* _next_die{nullptr}; // pointer to the next DIE that has the same `_hash` value. + pool_string _path; // the user-readable symbol name, "pathed"/namespaced by containing DIEs. May + // be mangled. + die* _next_die{nullptr}; // pointer to the next DIE that has the same `_hash` value. std::optional _location; // file_decl and file_line, if they exist for the DIE. - std::size_t _hash{0}; // uniquely identifies the DIE across differing targets (e.g., the same symbol in a FAT binary.) - std::size_t _fatal_attribute_hash{0}; // within a target, a hash of attributes that contribute to ODRVs. + std::size_t _hash{0}; // uniquely identifies the DIE across differing targets (e.g., the same + // symbol in a FAT binary.) + std::size_t _fatal_attribute_hash{ + 0}; // within a target, a hash of attributes that contribute to ODRVs. std::uint32_t _ofd_index{0}; // object file descriptor index - std::size_t _cu_header_offset{0}; // offset to the compilation unit that contains this DIE; relative to `__debug_info` - std::size_t _cu_die_offset{0}; // offset to the associated compilation unit DIE entry; relative to `__debug_info` + std::size_t _cu_header_offset{ + 0}; // offset to the compilation unit that contains this DIE; relative to `__debug_info` + std::size_t _cu_die_offset{ + 0}; // offset to the associated compilation unit DIE entry; relative to `__debug_info` std::size_t _offset{0}; // offset of this DIE; relative to `__debug_info` dw::tag _tag{dw::tag::none}; arch _arch{arch::unknown}; @@ -464,17 +440,17 @@ using dies = std::vector; /** * @brief Determines if a DWARF attribute is considered non-fatal for ODRV purposes - * + * * This function identifies attributes that can be safely ignored when checking for * One Definition Rule Violations (ODRVs). These attributes typically contain * information that doesn't affect the actual definition of a symbol, such as * debug-specific metadata or compiler-specific extensions. * * @param at The DWARF attribute to check - * + * * @return true if the attribute is non-fatal and can be ignored for ODRV checks, * false if the attribute must be considered when checking for ODRVs - * + * * @pre The attribute must be a valid DWARF attribute * @post The return value will be consistent with the internal list of nonfatal attributes */ diff --git a/include/orc/fat.hpp b/include/orc/fat.hpp index 6df85fd..096858c 100644 --- a/include/orc/fat.hpp +++ b/include/orc/fat.hpp @@ -18,6 +18,6 @@ void read_fat(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/features.hpp b/include/orc/features.hpp index 1eda5d3..7c317bb 100644 --- a/include/orc/features.hpp +++ b/include/orc/features.hpp @@ -8,20 +8,20 @@ //-------------------------------------------------------------------------------------------------- -#define ORC_FEATURE(X) (ORC_PRIVATE_FEATURE_ ## X()) +#define ORC_FEATURE(X) (ORC_PRIVATE_FEATURE_##X()) #ifndef NDEBUG - #define ORC_PRIVATE_FEATURE_DEBUG() 1 - #define ORC_PRIVATE_FEATURE_RELEASE() 0 +#define ORC_PRIVATE_FEATURE_DEBUG() 1 +#define ORC_PRIVATE_FEATURE_RELEASE() 0 #else - #define ORC_PRIVATE_FEATURE_DEBUG() 0 - #define ORC_PRIVATE_FEATURE_RELEASE() 1 +#define ORC_PRIVATE_FEATURE_DEBUG() 0 +#define ORC_PRIVATE_FEATURE_RELEASE() 1 #endif // !defined(NDEBUG) #if defined(TRACY_ENABLE) - #define ORC_PRIVATE_FEATURE_TRACY() 1 +#define ORC_PRIVATE_FEATURE_TRACY() 1 #else - #define ORC_PRIVATE_FEATURE_TRACY() 0 +#define ORC_PRIVATE_FEATURE_TRACY() 0 #endif //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/fixed_vector.hpp b/include/orc/fixed_vector.hpp index 3a8650b..6448654 100644 --- a/include/orc/fixed_vector.hpp +++ b/include/orc/fixed_vector.hpp @@ -7,9 +7,9 @@ #pragma once // stdc++ +#include #include #include -#include // adobe contract checks #include "adobe/contract_checks.hpp" @@ -22,10 +22,10 @@ namespace orc { /** * @brief A fixed-size vector container that provides a subset of `std::vector` functionality - * + * * @tparam T The type of elements stored in the vector * @tparam N The maximum number of elements the vector can hold - * + * * This container provides a fixed-size alternative to `std::vector` with similar interface. * It guarantees that memory is allocated on the stack and never reallocates. * Operations that would exceed the fixed capacity `N` will terminate the program. @@ -47,21 +47,19 @@ struct fixed_vector { /** * @brief Move constructor - * + * * @param rhs The `fixed_vector` to move from - * + * * @post `rhs` is left in an empty state */ - fixed_vector(fixed_vector&& rhs) : _a(std::move(rhs._a)), _n(rhs._n) { - rhs._n = 0; - } + fixed_vector(fixed_vector&& rhs) : _a(std::move(rhs._a)), _n(rhs._n) { rhs._n = 0; } /** * @brief Move assignment operator - * + * * @param rhs The `fixed_vector` to move from * @return Reference to this `fixed_vector` - * + * * @post `rhs` is left in an empty state */ fixed_vector& operator=(fixed_vector&& rhs) { @@ -73,10 +71,10 @@ struct fixed_vector { /** * @brief Constructs a fixed_vector with count copies of value - * + * * @param count Number of elements to create * @param value Value to initialize elements with - * + * * @pre count <= N * @note If count > N, the program will terminate. */ @@ -89,10 +87,10 @@ struct fixed_vector { /** * @brief Access element at specified position with bounds checking - * + * * @param pos Position of the element to return * @return Reference to the requested element - * + * * @throw std::out_of_range if pos >= size() */ T& at(size_type pos) { @@ -104,10 +102,10 @@ struct fixed_vector { /** * @brief Access element at specified position with bounds checking (const version) - * + * * @param pos Position of the element to return * @return Const reference to the requested element - * + * * @throw std::out_of_range if pos >= size() */ const T& at(size_type pos) const { @@ -119,41 +117,41 @@ struct fixed_vector { /** * @brief Access element at specified position without bounds checking - * + * * @param pos Position of the element to return * @return Reference to the requested element - * + * * @pre pos < size() */ T& operator[](size_type pos) { return _a[pos]; } /** * @brief Access element at specified position without bounds checking (const version) - * + * * @param pos Position of the element to return * @return Const reference to the requested element - * + * * @pre pos < size() */ const T& operator[](size_type pos) const { return _a[pos]; } /** * @brief Returns reference to the first element - * + * * @return Reference to the first element - * + * * @pre !empty() */ - T& front() { + T& front() { ADOBE_PRECONDITION(!empty(), "fixed_vector is empty"); return _a[0]; } /** * @brief Returns const reference to the first element - * + * * @return Const reference to the first element - * + * * @pre !empty() */ const T& front() const { @@ -163,24 +161,24 @@ struct fixed_vector { /** * @brief Returns reference to the last element - * + * * @return Reference to the last element - * + * * @pre !empty() */ - T& back() { + T& back() { ADOBE_PRECONDITION(!empty(), "fixed_vector is empty"); return _a[_n - 1]; } /** * @brief Returns const reference to the last element - * + * * @return Const reference to the last element - * + * * @pre !empty() */ - const T& back() const { + const T& back() const { ADOBE_PRECONDITION(!empty(), "fixed_vector is empty"); return _a[_n - 1]; } @@ -193,9 +191,9 @@ struct fixed_vector { /** * @brief Adds an element to the end - * + * * @param x Value to append - * + * * @pre size() < N, otherwise the program will terminate. */ void push_back(const T& x) { @@ -205,7 +203,7 @@ struct fixed_vector { /** * @brief Removes the last element - * + * * @pre !empty(), otherwise the program will terminate. * @post The last element is destroyed and size() is decremented by 1 */ @@ -217,7 +215,7 @@ struct fixed_vector { /** * @brief Removes all elements - * + * * @post size() == 0 */ void clear() { @@ -228,11 +226,11 @@ struct fixed_vector { /** * @brief Inserts value before pos - * + * * @param pos Iterator before which the content will be inserted * @param value Element value to insert * @return Iterator pointing to the inserted value - * + * * @pre size() < N, otherwise the program will terminate. */ iterator insert(iterator pos, const T& value) { @@ -244,12 +242,12 @@ struct fixed_vector { /** * @brief Inserts elements from range [first, last) before pos - * + * * @param pos Iterator before which the content will be inserted * @param first Iterator to the first element to insert * @param last Iterator past the last element to insert * @return Iterator pointing to the first inserted element - * + * * @pre size() + std::distance(first, last) <= N, otherwise the program will terminate. */ template @@ -264,10 +262,10 @@ struct fixed_vector { /** * @brief Removes element at pos - * + * * @param pos Iterator to the element to remove * @return Iterator following the last removed element - * + * * @pre !empty(), otherwise the program will terminate. * @post size() is decremented by 1 */ @@ -308,7 +306,7 @@ struct fixed_vector { /** * @brief Equality comparison operator - * + * * @param lhs First fixed_vector to compare * @param rhs Second fixed_vector to compare * @return true if the vectors have the same size and elements, false otherwise @@ -320,7 +318,7 @@ bool operator==(const fixed_vector& lhs, const fixed_vector& rhs) { /** * @brief Inequality comparison operator - * + * * @param lhs First fixed_vector to compare * @param rhs Second fixed_vector to compare * @return true if the vectors are not equal, false otherwise diff --git a/include/orc/macho.hpp b/include/orc/macho.hpp index 0c90ace..037fb7d 100644 --- a/include/orc/macho.hpp +++ b/include/orc/macho.hpp @@ -18,14 +18,15 @@ void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- -struct dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params); +struct dwarf dwarf_from_macho(std::uint32_t ofd_index, reader_params params); //-------------------------------------------------------------------------------------------------- -std::vector macho_derive_dylibs(const std::vector& root_binaries); +std::vector macho_derive_dylibs( + const std::vector& root_binaries); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/orc.hpp b/include/orc/orc.hpp index 643178e..1030c60 100644 --- a/include/orc/orc.hpp +++ b/include/orc/orc.hpp @@ -8,10 +8,10 @@ // stdc++ #include +#include +#include #include #include -#include -#include // application #include "orc/dwarf_structs.hpp" diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index 1fad85d..e5aa0bf 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -244,18 +244,19 @@ constexpr std::decay_t copy(T&& value) noexcept(noexcept(std::decay_t{ //-------------------------------------------------------------------------------------------------- -enum class macho_reader_mode { +enum class reader_mode { invalid, register_dies, derive_dylibs, odrv_reporting, }; -struct macho_params { - using register_dependencies_callback = std::function&&)>; +struct reader_params { + using register_dependencies_callback = + std::function&&)>; - macho_reader_mode _mode{macho_reader_mode::invalid}; - std::filesystem::path _executable_path; // only required if mode == derive_dylibs + reader_mode _mode{reader_mode::invalid}; + std::filesystem::path _executable_path; // only required if mode == derive_dylibs register_dependencies_callback _register_dependencies; // only required if mode == derive_dylibs }; @@ -263,6 +264,6 @@ void parse_file(std::string_view object_name, const object_ancestry& ancestry, freader& s, std::istream::pos_type end_pos, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/settings.hpp b/include/orc/settings.hpp index a4fc87b..d640003 100644 --- a/include/orc/settings.hpp +++ b/include/orc/settings.hpp @@ -9,9 +9,9 @@ // stdc++ #include #include +#include #include #include -#include // application #include "orc/features.hpp" @@ -20,9 +20,9 @@ struct settings { enum class log_level { - silent, // emit nothing but ODRVs + silent, // emit nothing but ODRVs warning, // emit issues that need to be fixed - info, // emit brief, informative status + info, // emit brief, informative status verbose, // emit as much as possible }; diff --git a/include/orc/str.hpp b/include/orc/str.hpp index 6e0cbac..8d5ee4b 100644 --- a/include/orc/str.hpp +++ b/include/orc/str.hpp @@ -29,9 +29,7 @@ std::string format_size(std::size_t x, format_mode mode = format_mode::binary); // e.g., .123 -> "12.3%", or 1.23456 -> "123.46%" std::string format_pct(float x); -inline std::string format_pct(float x, float total) { - return format_pct(total ? x / total : 0); -} +inline std::string format_pct(float x, float total) { return format_pct(total ? x / total : 0); } std::string toupper(std::string&& s); diff --git a/include/orc/string_pool.hpp b/include/orc/string_pool.hpp index 56138b8..761b341 100644 --- a/include/orc/string_pool.hpp +++ b/include/orc/string_pool.hpp @@ -8,10 +8,10 @@ // stdc++ #include +#include #include #include #include -#include #include "orc/features.hpp" @@ -20,7 +20,7 @@ struct pool_string; /* - Stores interned strings. Thread safe in that the pool resources are per thread. + Stores interned strings. Thread safe in that the pool resources are per thread. A string pool per thread reduces the total memory usage from 83GB to 53GB. It also significantly improves performance. (This is the result for the application as a whole. That @@ -35,14 +35,15 @@ pool_string empool(std::string_view src); * A `pool_string` is one pointer in size * A `pool_string` is thread safe - * Two `pool_strings` pointing to the same `_data` are always equal, and if the `_data` is different, - they are not equal.< + * Two `pool_strings` pointing to the same `_data` are always equal, and if the `_data` is + different, they are not equal.< * `_data` is a char* to null terminated data, which is just a c string. Useful for debugging. - * if `_data` is null, it is intepreted as an empty string (""). If `_data` is not-null, it always is size() > 0 - - When empooled, the hash (64 bits) and size/length (32 bits) are stored before the char* to the data. - Note there is no memory alignment in the pool - it is fully packed - so data needs to be memcpy'd in - and out, just in case the processor doesn't like un-aligned reads. + * if `_data` is null, it is intepreted as an empty string (""). If `_data` is not-null, it + always is size() > 0 + + When empooled, the hash (64 bits) and size/length (32 bits) are stored before the char* to the + data. Note there is no memory alignment in the pool - it is fully packed - so data needs to be + memcpy'd in and out, just in case the processor doesn't like un-aligned reads. */ struct pool_string { pool_string() = default; @@ -57,14 +58,10 @@ struct pool_string { if (!_data) return default_view; return std::string_view(_data, get_size(_data)); } - - std::string allocate_string() const { - return std::string(view()); - } - - std::filesystem::path allocate_path() const { - return std::filesystem::path(view()); - } + + std::string allocate_string() const { return std::string(view()); } + + std::filesystem::path allocate_path() const { return std::filesystem::path(view()); } std::size_t hash() const { if (!_data) return 0; @@ -82,13 +79,9 @@ struct pool_string { return equal; } - friend inline bool operator!=(const pool_string& x, const pool_string& y) { - return !(x == y); - } + friend inline bool operator!=(const pool_string& x, const pool_string& y) { return !(x == y); } - friend inline auto& operator<<(std::ostream& x, const pool_string& y) { - return x << y.view(); - } + friend inline auto& operator<<(std::ostream& x, const pool_string& y) { return x << y.view(); } private: static std::size_t get_size(const char* d); @@ -98,14 +91,16 @@ struct pool_string { static std::string_view default_view; // an empty string return if the _data pointer is null explicit pool_string(const char* data) : _data(data) {} - + const char* _data{nullptr}; }; // pool_string is just a pointer with methods. It needs to be small as strings are a large part -// of ORC's considerable memory usage. pool_string doesn't have a copy constructor or move semantics. -// Copying and low memory usage depend on pool_string being really a pointer, so double check that here, -// and don't remove this unless you are careful about performance of large projects. -static_assert(sizeof(pool_string) <= sizeof(intptr_t), "pool_string is design to be as small and fast to copy as a pointer."); +// of ORC's considerable memory usage. pool_string doesn't have a copy constructor or move +// semantics. Copying and low memory usage depend on pool_string being really a pointer, so double +// check that here, and don't remove this unless you are careful about performance of large +// projects. +static_assert(sizeof(pool_string) <= sizeof(intptr_t), + "pool_string is design to be as small and fast to copy as a pointer."); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/task_system.hpp b/include/orc/task_system.hpp index 65e8499..54cd6a8 100644 --- a/include/orc/task_system.hpp +++ b/include/orc/task_system.hpp @@ -16,9 +16,7 @@ namespace orc { using stlab::task; -inline auto queue_size() { - return std::max(1u, std::thread::hardware_concurrency()); -} +inline auto queue_size() { return std::max(1u, std::thread::hardware_concurrency()); } class notification_queue { using lock_t = std::unique_lock; @@ -28,7 +26,7 @@ class notification_queue { task _task; template - element_t(F&& f, unsigned priority) : _priority{priority}, _task{std::forward(f)} { } + element_t(F&& f, unsigned priority) : _priority{priority}, _task{std::forward(f)} {} struct greater { bool operator()(const element_t& a, const element_t& b) const { @@ -60,7 +58,8 @@ class notification_queue { bool pop(task& x) { lock_t lock{_mutex}; - while (_q.empty() && !_done) _ready.wait(lock); + while (_q.empty() && !_done) + _ready.wait(lock); if (_q.empty()) return false; x = pop_not_empty(); return true; @@ -110,11 +109,11 @@ class priority_task_system { std::atomic_bool _done{false}; void run(unsigned i) { - #if STLAB_FEATURE(THREAD_NAME_POSIX) +#if STLAB_FEATURE(THREAD_NAME_POSIX) pthread_setname_np(pthread_self(), "adobe.orc.worker"); - #elif STLAB_FEATURE(THREAD_NAME_APPLE) +#elif STLAB_FEATURE(THREAD_NAME_APPLE) pthread_setname_np("adobe.orc.worker"); - #endif +#endif while (true) { task f; @@ -131,17 +130,18 @@ class priority_task_system { priority_task_system() { _threads.reserve(_count); for (unsigned n = 0; n != _count; ++n) { - _threads.emplace_back([&, n]{ run(n); }); + _threads.emplace_back([&, n] { run(n); }); } } ~priority_task_system() { - for (auto& e : _q) e.done(); - for (auto& e : _threads) e.join(); + for (auto& e : _q) + e.done(); + for (auto& e : _threads) + e.join(); } - template void execute(F&& f) { static_assert(P < 3, "More than 3 priorities are not known!"); @@ -173,16 +173,10 @@ inline priority_task_system& pts() { return only_task_system; } -enum class executor_priority -{ - high, - medium, - low -}; +enum class executor_priority { high, medium, low }; template -struct task_system -{ +struct task_system { using result_type = void; void operator()(task f) const { diff --git a/include/orc/tracy.hpp b/include/orc/tracy.hpp index 7ef6168..2647d7c 100644 --- a/include/orc/tracy.hpp +++ b/include/orc/tracy.hpp @@ -12,9 +12,9 @@ // By default the macros unwrap to __FUNCTION__, __FILE__ and __LINE__ respectively. #if defined(__clang__) || defined(__GNUC__) - #define TracyFunction __PRETTY_FUNCTION__ +#define TracyFunction __PRETTY_FUNCTION__ #elif defined(_MSC_VER) - #define TracyFunction __FUNCSIG__ +#define TracyFunction __FUNCSIG__ #endif #include @@ -24,11 +24,11 @@ // These `L` suffixes are wrong; they're not compile-time literals #if ORC_FEATURE(TRACY) - #define ZoneTextL(msg) ZoneText((msg), std::strlen(msg)); - #define ZoneNameL(msg) ZoneName((msg), std::strlen(msg)); +#define ZoneTextL(msg) ZoneText((msg), std::strlen(msg)); +#define ZoneNameL(msg) ZoneName((msg), std::strlen(msg)); #else - #define ZoneTextL(msg) - #define ZoneNameL(msg) +#define ZoneTextL(msg) +#define ZoneNameL(msg) #endif //================================================================================================== diff --git a/justfile b/justfile index 9b66ca0..51e20dd 100644 --- a/justfile +++ b/justfile @@ -21,3 +21,8 @@ nuke: && gen # Generate the cmake project (Tracy enabled) tracy: cmake -B build -GXcode -DTRACY_ENABLE=ON + +# Reformat the dialector sources via clang-format +fmt: + find src -name '*.cpp' | xargs clang-format -i + find include -name '*.hpp' | xargs clang-format -i diff --git a/src/ar.cpp b/src/ar.cpp index c1edc83..00b2bf1 100644 --- a/src/ar.cpp +++ b/src/ar.cpp @@ -34,7 +34,7 @@ void read_ar(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { std::string magic = read_fixed_string<8>(s); assert(magic == "!\n"); diff --git a/src/coff.cpp b/src/coff.cpp index aca0166..330a362 100644 --- a/src/coff.cpp +++ b/src/coff.cpp @@ -24,9 +24,12 @@ namespace { //-------------------------------------------------------------------------------------------------- // // Relevant documentation: -// - Portable Executable (PE) format: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format -// - image_file_header: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_file_header -// - image_section_header: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header +// - Portable Executable (PE) format: +// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format +// - image_file_header: +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_file_header +// - image_section_header: +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header // struct image_file_header { @@ -89,15 +92,26 @@ void read_coff(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { std::uint32_t ofd_index = - static_cast(object_file_register(std::move(ancestry), copy(details))); - dwarf dwarf(ofd_index, copy(s), copy(details)); - + static_cast(object_file_register(std::move(ancestry), copy(details))); + + dwarf_from_coff(ofd_index, std::move(params)).process_all_dies(); +} + +//-------------------------------------------------------------------------------------------------- + +dwarf dwarf_from_coff(std::uint32_t ofd_index, reader_params params) { + const auto& entry = object_file_fetch(ofd_index); + freader s(entry._ancestry.begin()->allocate_path()); + dwarf dwarf(ofd_index, copy(s), copy(entry._details)); + + s.seekg(entry._details._offset); + // If you hit this, you're running ORC in a mode not supported by COFF. - ADOBE_INVARIANT(params._mode == macho_reader_mode::register_dies || - params._mode == macho_reader_mode::odrv_reporting); - + ADOBE_INVARIANT(params._mode == reader_mode::register_dies || + params._mode == reader_mode::odrv_reporting); + // The general format of COFF is: // header // section headers @@ -108,19 +122,18 @@ void read_coff(object_ancestry&& ancestry, // in one of the "raw data" blocks in COFF sections whose names are // the DWARF segments we are interested in (debug_info, debug_abbrev, // etc.) So we don't need to read anything beyond the section headers. - + const auto header = read_pod(s); - + // According to the PE format docs there should be no optional header for object files. ADOBE_INVARIANT(header.optional_header_size == 0); - + // Grab the string table offset and size, which we'll need when deriving // the name of some of the sections we read below. const auto string_table_offset = header.symbol_table_pointer + header.symbol_count * 18; - const auto string_table_size = temp_seek(s, string_table_offset, [&]{ - return read_pod(s); - }); - + const auto string_table_size = + temp_seek(s, string_table_offset, [&] { return read_pod(s); }); + // Read the section headers. As we go, derive the actual section header // name, which may be in the string table. If the name is a DWARF segment, // add it to the DWARF processor. @@ -137,20 +150,18 @@ void read_coff(object_ancestry&& ancestry, ++name; int section_name_offset = std::atoi(name); ADOBE_INVARIANT(section_name_offset < string_table_size); - section.actual_name = temp_seek(s, string_table_offset + section_name_offset, [&]{ - return s.read_c_string_view(); - }); + section.actual_name = temp_seek(s, string_table_offset + section_name_offset, + [&] { return s.read_c_string_view(); }); } - + if (section.actual_name.starts_with(".debug")) { - std::cout << section.actual_name << '\n'; - dwarf.register_section(section.actual_name, - section.header.raw_data_pointer, + // std::cout << section.actual_name << '\n'; + dwarf.register_section(section.actual_name, section.header.raw_data_pointer, section.header.raw_data_size); } } - dwarf.process_all_dies(); + return dwarf; } //-------------------------------------------------------------------------------------------------- diff --git a/src/dwarf.cpp b/src/dwarf.cpp index bd8a689..d7bb6b8 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -91,14 +91,14 @@ std::uint32_t form_length(dw::form f, freader& s, std::uint16_t version) { // this is different than `leb_block` in // that the value of the ULEB is immaterial. auto leb_length = [&] { - return temp_seek(s, [&]{ + return temp_seek(s, [&] { const std::size_t beginning = s.tellg(); (void)uleb128(s); // do the uleb read to find out how much was read. const std::size_t end = s.tellg(); return static_cast(end - beginning); }); }; - + switch (f) { case dw::form::addr: return 8; @@ -223,7 +223,7 @@ std::uint32_t form_length(dw::form f, freader& s, std::uint16_t version) { /** * @brief Represents a DWARF section in the object file - * + * * This struct stores information about a DWARF section, including its offset * and size within the object file. It is used to track the location of various * DWARF debug sections like .debug_info, .debug_abbrev, etc. @@ -234,9 +234,9 @@ struct section { /** * @brief Checks if the section is valid - * + * * A section is considered valid if it has both a non-zero offset and size. - * + * * @return true if the section is valid, false otherwise */ bool valid() const { return _offset != 0 && _size != 0; } @@ -247,20 +247,21 @@ struct section { // Think of it like a cookie cutter that needs to get stamped on some dough to make an actual // cookie. Only in this case instead of a cookie, it'll make a DIE (DWARF Information Entry.) struct abbrev { - std::size_t _g{0}; /// the offset of the abbreviation entry in the debug_abbrev section (relative to the start of the section) - std::uint32_t _code{0}; /// the abbreviation code - dw::tag _tag{0}; /// the tag of the DIE + std::size_t _g{0}; /// the offset of the abbreviation entry in the debug_abbrev section + /// (relative to the start of the section) + std::uint32_t _code{0}; /// the abbreviation code + dw::tag _tag{0}; /// the tag of the DIE bool _has_children{false}; /// whether the DIE has children std::vector _attributes; /// the attributes of the DIE /** * @brief Reads an abbreviation entry from the file - * + * * This function reads the abbreviation entry from the file reader, * parsing its code, tag, children flag, and attributes. - * + * * @param s The file reader to read from - * + * * @pre The file reader must be positioned at the start of an abbreviation entry * @post The file reader will be positioned after the abbreviation entry */ @@ -301,18 +302,18 @@ using md5_hash = std::array; //-------------------------------------------------------------------------------------------------- /** * @brief Represents a source file entry in DWARF debug information - * + * * This struct stores information about a source file referenced in the DWARF * debug information, including its name, directory index, modification time, * and length. Used to represent both directories and file names in the * compilation header. */ struct file_name { - pool_string _name; ///< The name of the source file + pool_string _name; ///< The name of the source file std::uint32_t _directory_index{0}; ///< Index into the include directories list - std::uint32_t _mod_time{0}; ///< File modification time - std::uint32_t _file_length{0}; ///< Length of the file in bytes - md5_hash _md5{0}; ///< MD5 hash of the source file + std::uint32_t _mod_time{0}; ///< File modification time + std::uint32_t _file_length{0}; ///< Length of the file in bytes + md5_hash _md5{0}; ///< MD5 hash of the source file }; //-------------------------------------------------------------------------------------------------- @@ -334,7 +335,7 @@ bool has_flag_attribute(const attribute_sequence& attributes, dw::at name) { * * @param d The DIE to hash * @param attributes The attribute sequence associated with the DIE - * + * * @return A size_t hash value that uniquely identifies the DIE * * @note Struct and class tags are treated as equivalent @@ -386,22 +387,24 @@ std::size_t die_hash(const die& d, const attribute_sequence& attributes) { * version, unit type, and the offset of the debug_abbrev section. */ struct cu_header { - std::uint64_t _length{0}; ///< Length of the compilation unit (4 or 12 bytes (if extended length is used)) - bool _is_64_bit{false}; ///< Whether the DWARF is 64-bit + std::uint64_t _length{ + 0}; ///< Length of the compilation unit (4 or 12 bytes (if extended length is used)) + bool _is_64_bit{false}; ///< Whether the DWARF is 64-bit std::uint16_t _version{0}; ///< DWARF spec version (DWARF4, DWARF5, etc.) - std::uint8_t _unit_type{0}; ///< Type of compilation unit (SPECREF: DWARF5 page 218 (200) line 15) + std::uint8_t _unit_type{ + 0}; ///< Type of compilation unit (SPECREF: DWARF5 page 218 (200) line 15) std::uint64_t _debug_abbrev_offset{0}; ///< Offset to debug abbreviations section - std::uint32_t _address_size{0}; ///< Size of an address in bytes + std::uint32_t _address_size{0}; ///< Size of an address in bytes /** * @brief Reads a compilation unit header from the file - * + * * This function reads the compilation unit header from the file reader, * parsing its length, version, and other metadata. - * + * * @param s The file reader to read from * @param needs_byteswap Whether the data needs byte swapping - * + * * @pre The file reader must be positioned at the start of a compilation unit header * @post The file reader will be positioned after the header */ @@ -469,7 +472,7 @@ void cu_header::read(freader& s, bool needs_byteswap) { /** * @brief Represents a DWARF line number program header - * + * * This struct stores information from a DWARF line number program header, * which contains metadata about how line number information is encoded * in the debug information. For ORC's purposes, this is largely ignored @@ -482,36 +485,36 @@ struct line_header { // Note this will change for DWARF5, so we need to look out // for DWARF data that uses the new version number and // account for it differently. - std::uint64_t _length{0}; ///< Length of the header (4 or 8 bytes) - std::uint16_t _version{0}; ///< DWARF version - std::int8_t _address_size{0}; ///< Size of an address in bytes (DWARF5) + std::uint64_t _length{0}; ///< Length of the header (4 or 8 bytes) + std::uint16_t _version{0}; ///< DWARF version + std::int8_t _address_size{0}; ///< Size of an address in bytes (DWARF5) std::int8_t _segment_selector_size{0}; ///< Size of segment selector (DWARF5) std::uint32_t _header_length{0}; ///< Length of the header (4 (DWARF) or 8 (DWARF64) bytes) - std::uint32_t _min_instruction_length{0}; ///< Minimum instruction length + std::uint32_t _min_instruction_length{0}; ///< Minimum instruction length std::uint32_t _max_ops_per_instruction{0}; ///< Maximum operations per instruction (DWARF4+) - std::uint32_t _default_is_statement{0}; ///< Default is_statement value - std::int32_t _line_base{0}; ///< Base value for line number calculations - std::uint32_t _line_range{0}; ///< Range of line numbers - std::uint32_t _opcode_base{0}; ///< Base value for opcodes - std::vector _standard_opcode_lengths; ///< Lengths of standard opcodes - std::uint8_t _directory_entry_format_count{0}; // DWARF5 + std::uint32_t _default_is_statement{0}; ///< Default is_statement value + std::int32_t _line_base{0}; ///< Base value for line number calculations + std::uint32_t _line_range{0}; ///< Range of line numbers + std::uint32_t _opcode_base{0}; ///< Base value for opcodes + std::vector _standard_opcode_lengths; ///< Lengths of standard opcodes + std::uint8_t _directory_entry_format_count{0}; // DWARF5 std::vector _directory_entry_format; // DWARF5 - std::uint32_t _directories_count{0}; // DWARF5 - std::vector _directories; ///< Include directories - std::uint8_t _file_name_entry_format_count{0}; // DWARF5 + std::uint32_t _directories_count{0}; // DWARF5 + std::vector _directories; ///< Include directories + std::uint8_t _file_name_entry_format_count{0}; // DWARF5 std::vector _file_name_entry_format; // DWARF5 - std::uint32_t _file_names_count{0}; // DWARF5 - std::vector _file_names; ///< Source file names + std::uint32_t _file_names_count{0}; // DWARF5 + std::vector _file_names; ///< Source file names /** * @brief Reads a line number program header from the file - * + * * This function reads the line number program header from the file reader, * parsing its version, opcode information, and file/directory lists. - * + * * @param s The file reader to read from * @param needs_byteswap Whether the data needs byte swapping - * + * * @pre The file reader must be positioned at the start of a line number program header * @post The file reader will be positioned after the header */ @@ -527,7 +530,8 @@ struct line_header { const std::vector& formats); pool_string read_one_path_content_path(dwarf::implementation& dwarf, dw::form form); - std::uint32_t read_one_path_content_directory_index(dwarf::implementation& dwarf, dw::form form); + std::uint32_t read_one_path_content_directory_index(dwarf::implementation& dwarf, + dw::form form); md5_hash read_one_path_content_md5(dwarf::implementation& dwarf, dw::form form); }; @@ -543,13 +547,13 @@ using fixed_attribute_array = orc::fixed_vector; * attributes by value and returns them in a fixed-size array. * * @param attributes The attribute sequence to filter for fatal attributes - * + * * @return A fixed-size array containing the fatal attributes, sorted by value * * @pre The attributes parameter must be a valid `attribute_sequence` * @post The returned array contains only fatal attributes, sorted by value, with any unused * elements set to dw::at::none - * + * * @note The function is limited to processing `max_names_k` fatal attributes. */ fixed_attribute_array fatal_attributes_within(const attribute_sequence& attributes) { @@ -575,16 +579,16 @@ fixed_attribute_array fatal_attributes_within(const attribute_sequence& attribut * This function generates a hash value based on the attributes of a DIE that could contribute * to an ODR violation (One Definition Rule violation). It filters out non-fatal attributes, * sorts them by name for consistent traversal, and returns the subset that are considered fatal. - * Note this is not the same as `die_hash`. + * Note this is not the same as `die_hash`. * * @param attributes The attribute sequence to filter for fatal attributes - * + * * @return A fixed-size array containing the fatal attributes, sorted by name * * @pre The attributes parameter must be a valid attribute_sequence * @post The returned array contains only fatal attributes, sorted by name, with any unused * elements set to dw::at::none - * + * * @note The function is limited to processing max_names_k attributes and will throw * an exception if more fatal attributes are found * @note This function is used as part of ODR violation detection @@ -646,14 +650,14 @@ enum class process_mode { //-------------------------------------------------------------------------------------------------- struct dwarf::implementation { - implementation(std::uint32_t ofd_index, - freader&& s, - file_details&& details) + implementation(std::uint32_t ofd_index, freader&& s, file_details&& details) : _s(std::move(s)), _details(std::move(details)), _ofd_index(ofd_index) {} void register_section(const std::string& name, std::size_t offset, std::size_t size); - bool register_sections_done(); + // Called in between on-disk DWARF section discovery and die processing + // to set up the state machine to process dies. + void finalize_section_registration(); void report_die_processing_failure(std::size_t die_absolute_offset, std::string&& error); void process_all_dies(); @@ -719,10 +723,13 @@ struct dwarf::implementation { std::unordered_map _debug_str_cache; std::unordered_map _debug_line_str_cache; std::unordered_map _debug_str_offs_cache; - pool_string _last_typedef_name; // for unnamed structs - see https://github.com/adobe/orc/issues/84 + pool_string + _last_typedef_name; // for unnamed structs - see https://github.com/adobe/orc/issues/84 cu_header _cu_header; - std::size_t _cu_header_offset{0}; // offset of the compilation unit header. Relative to __debug_info. - std::size_t _cu_die_offset{0}; // offset of the `compile_unit` die. Relative to start of `debug_info` + std::size_t _cu_header_offset{ + 0}; // offset of the compilation unit header. Relative to __debug_info. + std::size_t _cu_die_offset{ + 0}; // offset of the `compile_unit` die. Relative to start of `debug_info` pool_string _cu_compilation_directory; std::uint32_t _ofd_index{0}; // index to the obj_registry in macho.cpp section _debug_abbrev; @@ -844,7 +851,8 @@ void dwarf::implementation::read_lines(std::size_t header_offset) { for (const auto& name : header._file_names) { if (name._directory_index > 0) { ADOBE_INVARIANT(name._directory_index - 1 < header._directories.size()); - std::string path = header._directories[name._directory_index - 1]._name.allocate_string(); + std::string path = + header._directories[name._directory_index - 1]._name.allocate_string(); path += '/'; path += name._name.allocate_string(); _decl_files.push_back(empool(path)); @@ -918,18 +926,21 @@ pool_string dwarf::implementation::read_debug_str(std::size_t offset) { //-------------------------------------------------------------------------------------------------- pool_string dwarf::implementation::read_debug_line_str(std::size_t offset) { - if (const auto found = _debug_line_str_cache.find(offset); found != _debug_line_str_cache.end()) { + if (const auto found = _debug_line_str_cache.find(offset); + found != _debug_line_str_cache.end()) { return found->second; } - return _debug_line_str_cache[offset] = temp_seek(_s, _debug_line_str._offset + offset, - [&] { return empool(_s.read_c_string_view()); }); + return _debug_line_str_cache[offset] = temp_seek(_s, _debug_line_str._offset + offset, [&] { + return empool(_s.read_c_string_view()); + }); } //-------------------------------------------------------------------------------------------------- // SPECREF: DWARF5 page 26 (8) line 28 -- v4 -> v5 changes pool_string dwarf::implementation::read_debug_str_offs(std::size_t entry) { - if (const auto found = _debug_str_offs_cache.find(entry); found != _debug_str_offs_cache.end()) { + if (const auto found = _debug_str_offs_cache.find(entry); + found != _debug_str_offs_cache.end()) { return found->second; } @@ -964,16 +975,16 @@ pool_string dwarf::implementation::read_debug_str_offs(std::size_t entry) { // temp seek to its location and read 4 bytes. (Note that // all of this assumes 32-bit DWARF.) std::size_t entry_offset = 4 * entry; - const std::uint32_t entry_offset_value = temp_seek(_s, entry_offset, std::ios::cur, [&]{ - return read32(); - }); + const std::uint32_t entry_offset_value = + temp_seek(_s, entry_offset, std::ios::cur, [&] { return read32(); }); // This result is relative to `_debug_str_offsets._offset`. return header_size + entry_offsets_size + entry_offset_value; }); - return _debug_str_offs_cache[entry] = temp_seek(_s, _debug_str_offsets._offset + entry_offset, - [&] { return empool(_s.read_c_string_view()); }); + return _debug_str_offs_cache[entry] = + temp_seek(_s, _debug_str_offsets._offset + entry_offset, + [&] { return empool(_s.read_c_string_view()); }); } //-------------------------------------------------------------------------------------------------- @@ -1007,7 +1018,7 @@ std::string dwarf::implementation::qualified_symbol_name( // those that inherit a linkage name through their specification attribute, whose attributes // `post_process_die_attributes` should copy into this die. However, this routine may be called // before `post_process_die_attributes` is, so we look to that attribute directly here, too. - // + // const dw::at qualified_attributes[] = { dw::at::linkage_name, @@ -1815,7 +1826,8 @@ pool_string dwarf::implementation::resolve_type(attribute type) { // After this call has happened, the current identifier stack entry will be updated from // information in this die's attributes, and the `path` of the die will be set to something // user-readable for ODRV reporting purposes. `die` is an out-arg for performance reasons. -void dwarf::implementation::update_die_identifier_and_path(die& die, const attribute_sequence& attributes) { +void dwarf::implementation::update_die_identifier_and_path(die& die, + const attribute_sequence& attributes) { path_identifier_set(die_identifier(die, attributes)); die._path = empool(std::string_view(qualified_symbol_name(die, attributes))); } @@ -1875,11 +1887,21 @@ die_pair dwarf::implementation::abbreviation_to_die(std::size_t die_address, pro //-------------------------------------------------------------------------------------------------- -bool dwarf::implementation::register_sections_done() { - ADOBE_PRECONDITION(!_ready); +void dwarf::implementation::finalize_section_registration() { + if (_ready) { + return; + } // Houston, we have a problem. - if (!(_debug_info.valid() && _debug_abbrev.valid() && _debug_line.valid())) return false; + if (!_debug_info.valid()) { + throw std::runtime_error("Unread section: debug_info"); + } + if (!_debug_abbrev.valid()) { + throw std::runtime_error("Unread section: debug_abbrev"); + } + if (!_debug_line.valid()) { + throw std::runtime_error("Unread section: debug_line"); + } // the declaration files are 1-indexed. The 0th index is reserved for the compilation unit / // partial unit name. We need to prime this here because in single process mode we don't get @@ -1897,14 +1919,12 @@ bool dwarf::implementation::register_sections_done() { read_abbreviations(); _ready = true; - - return true; } //-------------------------------------------------------------------------------------------------- /** * @brief Determines if a DIE should be skipped during processing - * + * * This function applies a series of filters to determine if a DIE should be skipped * during processing. It checks for various conditions that would make a DIE unsuitable * for registration, such as: @@ -1918,16 +1938,16 @@ bool dwarf::implementation::register_sections_done() { * - Objective-C based DIEs * - Symbols listed in the ignore list * - Self-referential types - * + * * @param d The DIE to check * @param attributes The attribute sequence associated with the DIE - * + * * @pre The DIE and its attributes must be properly initialized * @pre The DIE's tag and path must be set * @pre The attributes sequence must contain all relevant attributes for the DIE - * + * * @return true if the DIE should be skipped, false if it should be processed - * + * * @note Some filters are architecture-specific (e.g., handling of Objective-C) */ bool dwarf::implementation::is_skippable_die(const die& d, const attribute_sequence& attributes) { @@ -1951,7 +1971,7 @@ bool dwarf::implementation::is_skippable_die(const die& d, const attribute_seque // incomplete or non-defining, so cannot contribute to an ODRV. if (has_flag_attribute(attributes, dw::at::declaration)) { #if ORC_FEATURE(PROFILE_DIE_DETAILS) - ZoneTextL("skipping: declaration flag"); + ZoneTextL("skipping: declaration flag"); #endif // ORC_FEATURE(PROFILE_DIE_DETAILS) return true; } @@ -1968,7 +1988,7 @@ bool dwarf::implementation::is_skippable_die(const die& d, const attribute_seque // SPECREF DWARF5 251 (233) line 1 -- value of 0 -> "not inlined" if (attributes.has(dw::at::inline_) && attributes.number(dw::at::inline_) != 0) { #if ORC_FEATURE(PROFILE_DIE_DETAILS) - ZoneTextL("skipping: abstract instance root / tree"); + ZoneTextL("skipping: abstract instance root / tree"); #endif // ORC_FEATURE(PROFILE_DIE_DETAILS) return true; } @@ -2105,7 +2125,8 @@ void dwarf::implementation::report_die_processing_failure(std::size_t die_addres * It traverses the DIE tree and processes the attributes of each, collecting some metadata * along the way. After processing all DIEs, it registers them with the global DIE registry. * - * @pre The DWARF implementation must be ready to process DIEs, meaning that required DWARF sections (like `debug_info`) have been registered + * @pre The DWARF implementation must be ready to process DIEs, meaning that required DWARF sections + * (like `debug_info`) have been registered * * @post All DIEs in the debug_info section are processed and registered * @post The path identifier stack is restored to its original state @@ -2113,7 +2134,8 @@ void dwarf::implementation::report_die_processing_failure(std::size_t die_addres * @throws std::runtime_error If DIE processing fails and cannot be recovered */ void dwarf::implementation::process_all_dies() { - if (!_ready && !register_sections_done()) return; + finalize_section_registration(); + ADOBE_PRECONDITION(_ready); auto section_begin = _debug_info._offset; @@ -2135,7 +2157,7 @@ void dwarf::implementation::process_all_dies() { while (true) { #if ORC_FEATURE(PROFILE_DIE_DETAILS) ZoneScopedN("process_one_die"); // name matters for stats tracking -#endif // ORC_FEATURE(PROFILE_DIE_DETAILS) +#endif // ORC_FEATURE(PROFILE_DIE_DETAILS) const std::size_t die_address = _s.tellg(); die die; @@ -2208,8 +2230,7 @@ void dwarf::implementation::process_all_dies() { // we find a typedef, and use it if an ensuing structure_type has no name. if (die._tag == dw::tag::typedef_ && attributes.has(dw::at::name)) { _last_typedef_name = attributes.get(dw::at::name).string(); - } else if (die._tag == dw::tag::structure_type && - !attributes.has(dw::at::name) && + } else if (die._tag == dw::tag::structure_type && !attributes.has(dw::at::name) && _last_typedef_name) { attribute name; name._name = dw::at::name; @@ -2229,8 +2250,10 @@ void dwarf::implementation::process_all_dies() { // collect some metadata about this DIE for later ODR processing die._skippable = is_skippable_die(die, attributes); die._ofd_index = _ofd_index; - die._hash = die_hash(die, attributes); // DIE "thumbprint" to determine if two DIEs are "equal" - die._fatal_attribute_hash = fatal_attribute_hash(attributes); // If the thumbprints are equal but this is not, it's an ODRV. + die._hash = + die_hash(die, attributes); // DIE "thumbprint" to determine if two DIEs are "equal" + die._fatal_attribute_hash = fatal_attribute_hash( + attributes); // If the thumbprints are equal but this is not, it's an ODRV. die._location = derive_definition_location(attributes); #if ORC_FEATURE(PROFILE_DIE_DETAILS) @@ -2326,11 +2349,11 @@ void dwarf::implementation::post_process_compilation_unit_die( * * @pre The attributes parameter must be a valid attribute_sequence * @pre The DWARF information must be properly loaded to resolve type references - * + * * @post The `DW_AT_type` and `DW_AT_containing_type` attributes in the sequence * will have their values replaced with resolved type names * - * @note `die` and `attributes` are out-args for performance reasons. + * @note `die` and `attributes` are out-args for performance reasons. */ void dwarf::implementation::post_process_die_attributes(die& die, attribute_sequence& attributes) { if (attributes.has(dw::at::type)) { @@ -2357,10 +2380,8 @@ void dwarf::implementation::post_process_die_attributes(die& die, attribute_sequ // // Passing `0` here as the offset is okay because `fetch_one_die` will seek to the correct // offset before calling `abbreviation_to_die`. - auto original_die_pair = temp_seek(_s, 0, [&](){ - return fetch_one_die(original_die_offset, - die._cu_header_offset, - die._cu_die_offset); + auto original_die_pair = temp_seek(_s, 0, [&]() { + return fetch_one_die(original_die_offset, die._cu_header_offset, die._cu_die_offset); }); // Smash the original and this die's attributes together into an aggregate, well-defined set @@ -2382,9 +2403,9 @@ void dwarf::implementation::post_process_die_attributes(die& die, attribute_sequ /** * @brief Fetches a single DIE (Debug Information Entry) from the DWARF data * - * This function retrieves a DIE and its attributes from a specific offset in the `debug_info` section. - * The function first processes the compilation unit DIE to establish necessary context before - * fetching the requested DIE. + * This function retrieves a DIE and its attributes from a specific offset in the `debug_info` + * section. The function first processes the compilation unit DIE to establish necessary context + * before fetching the requested DIE. * * @param die_offset The offset of the DIE to fetch within the `debug_info` section * @param cu_header_offset The offset of the compilation unit header containing this DIE @@ -2405,7 +2426,7 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t die_offset, ZoneScoped; #endif // ORC_FEATURE(PROFILE_DIE_DETAILS) - if (!_ready && !register_sections_done()) throw std::runtime_error("dwarf setup failed"); + finalize_section_registration(); _cu_header_offset = cu_header_offset; @@ -2418,9 +2439,8 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t die_offset, // Read the compilation unit header. We need this to know what version of // DWARF we are processing, which can affect how dies are processed (e.g., // `form_length`'s `dw::form::addrx` passover.) - temp_seek(_s, _debug_info._offset + _cu_header_offset, [&]{ - _cu_header.read(_s, _details._needs_byteswap); - }); + temp_seek(_s, _debug_info._offset + _cu_header_offset, + [&] { _cu_header.read(_s, _details._needs_byteswap); }); // Now grab the compilation unit die itself to fill in additional state details. die_pair cu_pair = fetch_one_die(cu_die_offset, cu_header_offset, cu_die_offset); @@ -2498,7 +2518,8 @@ pool_string line_header::read_one_path_content_path(dwarf::implementation& dwarf * @post The file reader will be positioned after the directory index * @post The returned value will contain the directory index */ -std::uint32_t line_header::read_one_path_content_directory_index(dwarf::implementation& dwarf, dw::form form) { +std::uint32_t line_header::read_one_path_content_directory_index(dwarf::implementation& dwarf, + dw::form form) { // SPECREF DWARF5 177 (159) lines 1-11 switch (form) { case dw::form::data1: { @@ -2551,14 +2572,16 @@ md5_hash line_header::read_one_path_content_md5(dwarf::implementation& dwarf, dw * type (path, directory index, MD5 hash) according to its corresponding form. * * @param dwarf The DWARF implementation providing access to the file reader - * @param formats A vector of content type and form pairs that define the structure of the path entry + * @param formats A vector of content type and form pairs that define the structure of the path + * entry * * @return A file_name structure containing the parsed path information * * @pre The file reader must be positioned at the start of a path content entry * @pre The formats vector must contain valid content type and form pairs * @post The file reader will be positioned after the path content entry - * @post The returned file_name structure will contain the path information as specified by the formats + * @post The returned file_name structure will contain the path information as specified by the + * formats */ file_name line_header::read_one_path_content(dwarf::implementation& dwarf, const std::vector& formats) { @@ -2569,7 +2592,8 @@ file_name line_header::read_one_path_content(dwarf::implementation& dwarf, result._name = read_one_path_content_path(dwarf, format.second); } break; case dw::lnct::directory_index: { - result._directory_index = read_one_path_content_directory_index(dwarf, format.second); + result._directory_index = + read_one_path_content_directory_index(dwarf, format.second); } break; case dw::lnct::md5: { result._md5 = read_one_path_content_md5(dwarf, format.second); @@ -2584,7 +2608,8 @@ file_name line_header::read_one_path_content(dwarf::implementation& dwarf, //-------------------------------------------------------------------------------------------------- /** - * @brief Reads all path content entries (directories or file names) from the DWARF line number program + * @brief Reads all path content entries (directories or file names) from the DWARF line number + * program * * This function reads a collection of path content entries from the DWARF line number program. * It first reads the format specifications that define the structure of each path entry, @@ -2601,7 +2626,8 @@ file_name line_header::read_one_path_content(dwarf::implementation& dwarf, * @pre The file reader must be positioned at the start of a path content section * @pre The output parameters must be valid references * @post format_count will contain the number of format entries read - * @post formats will contain the content type and form pairs that define the structure of each path entry + * @post formats will contain the content type and form pairs that define the structure of each path + * entry * @post path_count will contain the number of path entries read * @post paths will contain the parsed path information for all entries * @post The file reader will be positioned after the path content section @@ -2664,7 +2690,7 @@ void line_header::read(dwarf::implementation& dwarf) { // so the baseline implementation should match that. } else if (_version == 5) { // SPECREF: DWARF5 page 26 (8) line 11 -- changes from DWARF4 to DWARF5 - + // SPECREF: DWARF5 page 172 (154) line 10 _address_size = dwarf.read8(); @@ -2685,11 +2711,11 @@ void line_header::read(dwarf::implementation& dwarf) { _line_base = dwarf.read8(); _line_range = dwarf.read8(); _opcode_base = dwarf.read8(); - + for (std::size_t i{0}; i < (_opcode_base - 1); ++i) { _standard_opcode_lengths.push_back(dwarf.read8()); } - + if (_version < 5) { while (true) { auto cur_directory = dwarf._s.read_c_string_view(); @@ -2698,15 +2724,15 @@ void line_header::read(dwarf::implementation& dwarf) { cur_name._name = empool(cur_directory); _directories.push_back(std::move(cur_name)); } - - // REVIST (fosterbrereton): The reading here isn't entirely accurate. The current code stops the - // first time an empty name is found, and interprets that as the end of the file names (and thus - // the `line_header`). However, the spec (as the end of section 6.2.4) states "A compiler may - // generate a single null byte for the file names field and define file names using the - // extended opcode DW_LNE_define_file." This loop, then, should iterate through the end of the - // defined size of `_header_length` instead of using an empty name as a sentry. Any additional - // null bytes should be interpreted as a placeholder file name description. (Admittedly, I - // haven't seen one of these in the wild yet.) + + // REVIST (fosterbrereton): The reading here isn't entirely accurate. The current code stops + // the first time an empty name is found, and interprets that as the end of the file names + // (and thus the `line_header`). However, the spec (as the end of section 6.2.4) states "A + // compiler may generate a single null byte for the file names field and define file names + // using the extended opcode DW_LNE_define_file." This loop, then, should iterate through + // the end of the defined size of `_header_length` instead of using an empty name as a + // sentry. Any additional null bytes should be interpreted as a placeholder file name + // description. (Admittedly, I haven't seen one of these in the wild yet.) while (true) { file_name cur_file_name; cur_file_name._name = empool(dwarf._s.read_c_string_view()); @@ -2717,17 +2743,11 @@ void line_header::read(dwarf::implementation& dwarf) { _file_names.push_back(std::move(cur_file_name)); } } else { - read_all_path_contents(dwarf, - _directory_entry_format_count, - _directory_entry_format, - _directories_count, - _directories); - - read_all_path_contents(dwarf, - _file_name_entry_format_count, - _file_name_entry_format, - _file_names_count, - _file_names); + read_all_path_contents(dwarf, _directory_entry_format_count, _directory_entry_format, + _directories_count, _directories); + + read_all_path_contents(dwarf, _file_name_entry_format_count, _file_name_entry_format, + _file_names_count, _file_names); } } @@ -2737,9 +2757,7 @@ void line_header::read(dwarf::implementation& dwarf) { //-------------------------------------------------------------------------------------------------- -dwarf::dwarf(std::uint32_t ofd_index, - freader&& s, - file_details&& details) +dwarf::dwarf(std::uint32_t ofd_index, freader&& s, file_details&& details) : _impl(new implementation(ofd_index, std::move(s), std::move(details)), [](auto x) { delete x; }) {} diff --git a/src/dwarf_constants.cpp b/src/dwarf_constants.cpp index c37ed84..f5a821d 100644 --- a/src/dwarf_constants.cpp +++ b/src/dwarf_constants.cpp @@ -19,327 +19,632 @@ namespace dw { const char* to_string(at attr) { switch (attr) { - case at::none: return "none"; - case at::sibling: return "sibling"; - case at::location: return "location"; - case at::name: return "name"; - case at::ordering: return "ordering"; - case at::subscr_data: return "subscr_data"; - case at::byte_size: return "byte_size"; - case at::bit_offset: return "bit_offset"; - case at::bit_size: return "bit_size"; - case at::element_list: return "element_list"; - case at::stmt_list: return "stmt_list"; - case at::low_pc: return "low_pc"; - case at::high_pc: return "high_pc"; - case at::language: return "language"; - case at::member: return "member"; - case at::discr: return "discr"; - case at::discr_value: return "discr_value"; - case at::visibility: return "visibility"; - case at::import_: return "import"; - case at::string_length: return "string_length"; - case at::common_reference: return "common_reference"; - case at::comp_dir: return "comp_dir"; - case at::const_value: return "const_value"; - case at::containing_type: return "containing_type"; - case at::default_value: return "default_value"; - case at::inline_: return "inline_"; - case at::is_optional: return "is_optional"; - case at::lower_bound: return "lower_bound"; - case at::producer: return "producer"; - case at::prototyped: return "prototyped"; - case at::return_addr: return "return_addr"; - case at::start_scope: return "start_scope"; - case at::bit_stride: return "bit_stride"; - // case at::stride_size: return "stride_size"; - case at::upper_bound: return "upper_bound"; - case at::abstract_origin: return "abstract_origin"; - case at::accessibility: return "accessibility"; - case at::address_class: return "address_class"; - case at::artificial: return "artificial"; - case at::base_types: return "base_types"; - case at::calling_convention: return "calling_convention"; - case at::count: return "count"; - case at::data_member_location: return "data_member_location"; - case at::decl_column: return "decl_column"; - case at::decl_file: return "decl_file"; - case at::decl_line: return "decl_line"; - case at::declaration: return "declaration"; - case at::discr_list: return "discr_list"; - case at::encoding: return "encoding"; - case at::external: return "external"; - case at::frame_base: return "frame_base"; - case at::friend_: return "friend_"; - case at::identifier_case: return "identifier_case"; - case at::macro_info: return "macro_info"; - case at::namelist_item: return "namelist_item"; - case at::priority: return "priority"; - case at::segment: return "segment"; - case at::specification: return "specification"; - case at::static_link: return "static_link"; - case at::type: return "type"; - case at::use_location: return "use_location"; - case at::variable_parameter: return "variable_parameter"; - case at::virtuality: return "virtuality"; - case at::vtable_elem_location: return "vtable_elem_location"; - case at::allocated: return "allocated"; - case at::associated: return "associated"; - case at::data_location: return "data_location"; - case at::byte_stride: return "byte_stride"; - // case at::stride: return "stride"; - case at::entry_pc: return "entry_pc"; - case at::use_utf8: return "use_utf8"; - case at::extension: return "extension"; - case at::ranges: return "ranges"; - case at::trampoline: return "trampoline"; - case at::call_column: return "call_column"; - case at::call_file: return "call_file"; - case at::call_line: return "call_line"; - case at::description: return "description"; - case at::binary_scale: return "binary_scale"; - case at::decimal_scale: return "decimal_scale"; - case at::small: return "small"; - case at::decimal_sign: return "decimal_sign"; - case at::digit_count: return "digit_count"; - case at::picture_string: return "picture_string"; - case at::mutable_: return "mutable_"; - case at::threads_scaled: return "threads_scaled"; - case at::explicit_: return "explicit_"; - case at::object_pointer: return "object_pointer"; - case at::endianity: return "endianity"; - case at::elemental: return "elemental"; - case at::pure: return "pure"; - case at::recursive: return "recursive"; - case at::signature: return "signature"; - case at::main_subprogram: return "main_subprogram"; - case at::data_bit_offset: return "data_bit_offset"; - case at::const_expr: return "const_expr"; - case at::enum_class: return "enum_class"; - case at::linkage_name: return "linkage_name"; - case at::string_length_bit_size: return "string_length_bit_size"; - case at::string_length_byte_size: return "string_length_byte_size"; - case at::rank: return "rank"; - case at::str_offsets_base: return "str_offsets_base"; - case at::addr_base: return "addr_base"; - case at::rnglists_base: return "rnglists_base"; - case at::dwo_id: return "dwo_id"; - case at::dwo_name: return "dwo_name"; - case at::reference: return "reference"; - case at::rvalue_reference: return "rvalue_reference"; - case at::macros: return "macros"; - case at::call_all_calls: return "call_all_calls"; - case at::call_all_source_calls: return "call_all_source_calls"; - case at::call_all_tail_calls: return "call_all_tail_calls"; - case at::call_return_pc: return "call_return_pc"; - case at::call_value: return "call_value"; - case at::call_origin: return "call_origin"; - case at::call_parameter: return "call_parameter"; - case at::call_pc: return "call_pc"; - case at::call_tail_call: return "call_tail_call"; - case at::call_target: return "call_target"; - case at::call_target_clobbered: return "call_target_clobbered"; - case at::call_data_location: return "call_data_location"; - case at::call_data_value: return "call_data_value"; - case at::noreturn: return "noreturn"; - case at::alignment: return "alignment"; - case at::export_symbols: return "export_symbols"; - case at::deleted: return "deleted"; - case at::defaulted: return "defaulted"; - case at::loclists_base: return "loclists_base"; - case at::ghs_namespace_alias: return "ghs_namespace_alias"; - case at::ghs_using_namespace: return "ghs_using_namespace"; - case at::ghs_using_declaration: return "ghs_using_declaration"; - case at::hp_block_index: return "hp_block_index"; - // case at::lo_user: return "lo_user"; - case at::mips_fde: return "mips_fde"; - case at::mips_loop_begin: return "mips_loop_begin"; - case at::mips_tail_loop_begin: return "mips_tail_loop_begin"; - case at::mips_epilog_begin: return "mips_epilog_begin"; - case at::mips_loop_unroll_factor: return "mips_loop_unroll_factor"; - case at::mips_software_pipeline_depth: return "mips_software_pipeline_depth"; - case at::mips_linkage_name: return "mips_linkage_name"; - case at::mips_stride: return "mips_stride"; - case at::mips_abstract_name: return "mips_abstract_name"; - case at::mips_clone_origin: return "mips_clone_origin"; - case at::mips_has_inlines: return "mips_has_inlines"; - case at::mips_stride_byte: return "mips_stride_byte"; - case at::mips_stride_elem: return "mips_stride_elem"; - case at::mips_ptr_dopetype: return "mips_ptr_dopetype"; - case at::mips_allocatable_dopetype: return "mips_allocatable_dopetype"; - case at::mips_assumed_shape_dopetype: return "mips_assumed_shape_dopetype"; - case at::mips_assumed_size: return "mips_assumed_size"; - // case at::hp_unmodifiable: return "hp_unmodifiable"; - // case at::hp_prologue: return "hp_prologue"; - // case at::hp_epilogue: return "hp_epilogue"; - // case at::hp_actuals_stmt_list: return "hp_actuals_stmt_list"; - // case at::hp_proc_per_section: return "hp_proc_per_section"; - case at::hp_raw_data_ptr: return "hp_raw_data_ptr"; - case at::hp_pass_by_reference: return "hp_pass_by_reference"; - case at::hp_opt_level: return "hp_opt_level"; - case at::hp_prof_version_id: return "hp_prof_version_id"; - case at::hp_opt_flags: return "hp_opt_flags"; - case at::hp_cold_region_low_pc: return "hp_cold_region_low_pc"; - case at::hp_cold_region_high_pc: return "hp_cold_region_high_pc"; - case at::hp_all_variables_modifiable: return "hp_all_variables_modifiable"; - case at::hp_linkage_name: return "hp_linkage_name"; - case at::hp_prof_flags: return "hp_prof_flags"; - case at::hp_unit_name: return "hp_unit_name"; - case at::hp_unit_size: return "hp_unit_size"; - case at::hp_widened_byte_size: return "hp_widened_byte_size"; - case at::hp_definition_points: return "hp_definition_points"; - case at::hp_default_location: return "hp_default_location"; - case at::hp_is_result_param: return "hp_is_result_param"; - // case at::cpq_discontig_ranges: return "cpq_discontig_ranges"; - // case at::cpq_semantic_events: return "cpq_semantic_events"; - // case at::cpq_split_lifetimes_var: return "cpq_split_lifetimes_var"; - // case at::cpq_split_lifetimes_rtn: return "cpq_split_lifetimes_rtn"; - // case at::cpq_prologue_length: return "cpq_prologue_length"; - // case at::ghs_mangled: return "ghs_mangled"; - case at::ghs_rsm: return "ghs_rsm"; - case at::ghs_frsm: return "ghs_frsm"; - case at::ghs_frames: return "ghs_frames"; - case at::ghs_rso: return "ghs_rso"; - case at::ghs_subcpu: return "ghs_subcpu"; - case at::ghs_lbrace_line: return "ghs_lbrace_line"; - case at::intel_other_endian: return "intel_other_endian"; - case at::sf_names: return "sf_names"; - case at::src_info: return "src_info"; - case at::mac_info: return "mac_info"; - case at::src_coords: return "src_coords"; - case at::body_begin: return "body_begin"; - case at::body_end: return "body_end"; - case at::gnu_vector: return "gnu_vector"; - case at::gnu_guarded_by: return "gnu_guarded_by"; - case at::gnu_pt_guarded_by: return "gnu_pt_guarded_by"; - case at::gnu_guarded: return "gnu_guarded"; - case at::gnu_pt_guarded: return "gnu_pt_guarded"; - case at::gnu_locks_excluded: return "gnu_locks_excluded"; - case at::gnu_exclusive_locks_required: return "gnu_exclusive_locks_required"; - case at::gnu_shared_locks_required: return "gnu_shared_locks_required"; - case at::gnu_odr_signature: return "gnu_odr_signature"; - case at::gnu_template_name: return "gnu_template_name"; - case at::gnu_call_site_value: return "gnu_call_site_value"; - case at::gnu_call_site_data_value: return "gnu_call_site_data_value"; - case at::gnu_call_site_target: return "gnu_call_site_target"; - case at::gnu_call_site_target_clobbered: return "gnu_call_site_target_clobbered"; - case at::gnu_tail_call: return "gnu_tail_call"; - case at::gnu_all_tail_call_sites: return "gnu_all_tail_call_sites"; - case at::gnu_all_call_sites: return "gnu_all_call_sites"; - case at::gnu_all_source_call_sites: return "gnu_all_source_call_sites"; - case at::gnu_macros: return "gnu_macros"; - case at::gnu_deleted: return "gnu_deleted"; - case at::gnu_dwo_name: return "gnu_dwo_name"; - case at::gnu_dwo_id: return "gnu_dwo_id"; - case at::gnu_ranges_base: return "gnu_ranges_base"; - case at::gnu_addr_base: return "gnu_addr_base"; - case at::gnu_pubnames: return "gnu_pubnames"; - case at::gnu_pubtypes: return "gnu_pubtypes"; - case at::gnu_discriminator: return "gnu_discriminator"; - case at::gnu_locviews: return "gnu_locviews"; - case at::gnu_entry_view: return "gnu_entry_view"; - case at::gnu_bias: return "gnu_bias"; - case at::sun_template: return "sun_template"; - // case at::vms_rtnbeg_pd_address: return "vms_rtnbeg_pd_address"; - case at::sun_alignment: return "sun_alignment"; - case at::sun_vtable: return "sun_vtable"; - case at::sun_count_guarantee: return "sun_count_guarantee"; - case at::sun_command_line: return "sun_command_line"; - case at::sun_vbase: return "sun_vbase"; - case at::sun_compile_options: return "sun_compile_options"; - case at::sun_language: return "sun_language"; - case at::sun_browser_file: return "sun_browser_file"; - case at::sun_vtable_abi: return "sun_vtable_abi"; - case at::sun_func_offsets: return "sun_func_offsets"; - case at::sun_cf_kind: return "sun_cf_kind"; - case at::sun_vtable_index: return "sun_vtable_index"; - case at::sun_omp_tpriv_addr: return "sun_omp_tpriv_addr"; - case at::sun_omp_child_func: return "sun_omp_child_func"; - case at::sun_func_offset: return "sun_func_offset"; - case at::sun_memop_type_ref: return "sun_memop_type_ref"; - case at::sun_profile_id: return "sun_profile_id"; - case at::sun_memop_signature: return "sun_memop_signature"; - case at::sun_obj_dir: return "sun_obj_dir"; - case at::sun_obj_file: return "sun_obj_file"; - case at::sun_original_name: return "sun_original_name"; - case at::sun_hwcprof_signature: return "sun_hwcprof_signature"; - case at::sun_amd64_parmdump: return "sun_amd64_parmdump"; - case at::sun_part_link_name: return "sun_part_link_name"; - case at::sun_link_name: return "sun_link_name"; - case at::sun_pass_with_const: return "sun_pass_with_const"; - case at::sun_return_with_const: return "sun_return_with_const"; - case at::sun_import_by_name: return "sun_import_by_name"; - case at::sun_f90_pointer: return "sun_f90_pointer"; - case at::sun_pass_by_ref: return "sun_pass_by_ref"; - case at::sun_f90_allocatable: return "sun_f90_allocatable"; - case at::sun_f90_assumed_shape_array: return "sun_f90_assumed_shape_array"; - case at::sun_c_vla: return "sun_c_vla"; - case at::sun_return_value_ptr: return "sun_return_value_ptr"; - case at::sun_dtor_start: return "sun_dtor_start"; - case at::sun_dtor_length: return "sun_dtor_length"; - case at::sun_dtor_state_initial: return "sun_dtor_state_initial"; - case at::sun_dtor_state_final: return "sun_dtor_state_final"; - case at::sun_dtor_state_deltas: return "sun_dtor_state_deltas"; - case at::sun_import_by_lname: return "sun_import_by_lname"; - case at::sun_f90_use_only: return "sun_f90_use_only"; - case at::sun_namelist_spec: return "sun_namelist_spec"; - case at::sun_is_omp_child_func: return "sun_is_omp_child_func"; - case at::sun_fortran_main_alias: return "sun_fortran_main_alias"; - case at::sun_fortran_based: return "sun_fortran_based"; - case at::altium_loclist: return "altium_loclist"; - case at::use_gnat_descriptive_type: return "use_gnat_descriptive_type"; - case at::gnat_descriptive_type: return "gnat_descriptive_type"; - case at::gnu_numerator: return "gnu_numerator"; - case at::gnu_denominator: return "gnu_denominator"; - case at::go_kind: return "go_kind"; - case at::go_key: return "go_key"; - case at::go_elem: return "go_elem"; - case at::go_embedded_field: return "go_embedded_field"; - case at::go_runtime_type: return "go_runtime_type"; - case at::upc_threads_scaled: return "upc_threads_scaled"; - case at::ibm_wsa_addr: return "ibm_wsa_addr"; - case at::ibm_home_location: return "ibm_home_location"; - case at::ibm_alt_srcview: return "ibm_alt_srcview"; - case at::pgi_lbase: return "pgi_lbase"; - case at::pgi_soffset: return "pgi_soffset"; - case at::pgi_lstride: return "pgi_lstride"; - case at::borland_property_read: return "borland_property_read"; - case at::borland_property_write: return "borland_property_write"; - case at::borland_property_implements: return "borland_property_implements"; - case at::borland_property_index: return "borland_property_index"; - case at::borland_property_default: return "borland_property_default"; - case at::borland_delphi_unit: return "borland_delphi_unit"; - case at::borland_delphi_class: return "borland_delphi_class"; - case at::borland_delphi_record: return "borland_delphi_record"; - case at::borland_delphi_metaclass: return "borland_delphi_metaclass"; - case at::borland_delphi_constructor: return "borland_delphi_constructor"; - case at::borland_delphi_destructor: return "borland_delphi_destructor"; - case at::borland_delphi_anonymous_method: return "borland_delphi_anonymous_method"; - case at::borland_delphi_interface: return "borland_delphi_interface"; - case at::borland_delphi_abi: return "borland_delphi_abi"; - case at::borland_delphi_frameptr: return "borland_delphi_frameptr"; - case at::borland_closure: return "borland_closure"; - case at::llvm_include_path: return "llvm_include_path"; - case at::llvm_config_macros: return "llvm_config_macros"; - case at::llvm_sysroot: return "llvm_sysroot"; - case at::llvm_tag_offset: return "llvm_tag_offset"; - // case at::llvm_apinotes: return "llvm_apinotes"; - case at::apple_optimized: return "apple_optimized"; - case at::apple_flags: return "apple_flags"; - case at::apple_isa: return "apple_isa"; - case at::apple_block: return "apple_block"; - case at::apple_major_runtime_vers: return "apple_major_runtime_vers"; - case at::apple_runtime_class: return "apple_runtime_class"; - case at::apple_omit_frame_ptr: return "apple_omit_frame_ptr"; - case at::apple_property_name: return "apple_property_name"; - case at::apple_property_getter: return "apple_property_getter"; - case at::apple_property_setter: return "apple_property_setter"; - case at::apple_property_attribute: return "apple_property_attribute"; - case at::apple_objc_complete_type: return "apple_objc_complete_type"; - case at::apple_property: return "apple_property"; - case at::apple_objc_direct: return "apple_objc_direct"; - case at::apple_sdk: return "apple_sdk"; - case at::hi_user: return "hi_user"; + case at::none: + return "none"; + case at::sibling: + return "sibling"; + case at::location: + return "location"; + case at::name: + return "name"; + case at::ordering: + return "ordering"; + case at::subscr_data: + return "subscr_data"; + case at::byte_size: + return "byte_size"; + case at::bit_offset: + return "bit_offset"; + case at::bit_size: + return "bit_size"; + case at::element_list: + return "element_list"; + case at::stmt_list: + return "stmt_list"; + case at::low_pc: + return "low_pc"; + case at::high_pc: + return "high_pc"; + case at::language: + return "language"; + case at::member: + return "member"; + case at::discr: + return "discr"; + case at::discr_value: + return "discr_value"; + case at::visibility: + return "visibility"; + case at::import_: + return "import"; + case at::string_length: + return "string_length"; + case at::common_reference: + return "common_reference"; + case at::comp_dir: + return "comp_dir"; + case at::const_value: + return "const_value"; + case at::containing_type: + return "containing_type"; + case at::default_value: + return "default_value"; + case at::inline_: + return "inline_"; + case at::is_optional: + return "is_optional"; + case at::lower_bound: + return "lower_bound"; + case at::producer: + return "producer"; + case at::prototyped: + return "prototyped"; + case at::return_addr: + return "return_addr"; + case at::start_scope: + return "start_scope"; + case at::bit_stride: + return "bit_stride"; + // case at::stride_size: return "stride_size"; + case at::upper_bound: + return "upper_bound"; + case at::abstract_origin: + return "abstract_origin"; + case at::accessibility: + return "accessibility"; + case at::address_class: + return "address_class"; + case at::artificial: + return "artificial"; + case at::base_types: + return "base_types"; + case at::calling_convention: + return "calling_convention"; + case at::count: + return "count"; + case at::data_member_location: + return "data_member_location"; + case at::decl_column: + return "decl_column"; + case at::decl_file: + return "decl_file"; + case at::decl_line: + return "decl_line"; + case at::declaration: + return "declaration"; + case at::discr_list: + return "discr_list"; + case at::encoding: + return "encoding"; + case at::external: + return "external"; + case at::frame_base: + return "frame_base"; + case at::friend_: + return "friend_"; + case at::identifier_case: + return "identifier_case"; + case at::macro_info: + return "macro_info"; + case at::namelist_item: + return "namelist_item"; + case at::priority: + return "priority"; + case at::segment: + return "segment"; + case at::specification: + return "specification"; + case at::static_link: + return "static_link"; + case at::type: + return "type"; + case at::use_location: + return "use_location"; + case at::variable_parameter: + return "variable_parameter"; + case at::virtuality: + return "virtuality"; + case at::vtable_elem_location: + return "vtable_elem_location"; + case at::allocated: + return "allocated"; + case at::associated: + return "associated"; + case at::data_location: + return "data_location"; + case at::byte_stride: + return "byte_stride"; + // case at::stride: return "stride"; + case at::entry_pc: + return "entry_pc"; + case at::use_utf8: + return "use_utf8"; + case at::extension: + return "extension"; + case at::ranges: + return "ranges"; + case at::trampoline: + return "trampoline"; + case at::call_column: + return "call_column"; + case at::call_file: + return "call_file"; + case at::call_line: + return "call_line"; + case at::description: + return "description"; + case at::binary_scale: + return "binary_scale"; + case at::decimal_scale: + return "decimal_scale"; + case at::small: + return "small"; + case at::decimal_sign: + return "decimal_sign"; + case at::digit_count: + return "digit_count"; + case at::picture_string: + return "picture_string"; + case at::mutable_: + return "mutable_"; + case at::threads_scaled: + return "threads_scaled"; + case at::explicit_: + return "explicit_"; + case at::object_pointer: + return "object_pointer"; + case at::endianity: + return "endianity"; + case at::elemental: + return "elemental"; + case at::pure: + return "pure"; + case at::recursive: + return "recursive"; + case at::signature: + return "signature"; + case at::main_subprogram: + return "main_subprogram"; + case at::data_bit_offset: + return "data_bit_offset"; + case at::const_expr: + return "const_expr"; + case at::enum_class: + return "enum_class"; + case at::linkage_name: + return "linkage_name"; + case at::string_length_bit_size: + return "string_length_bit_size"; + case at::string_length_byte_size: + return "string_length_byte_size"; + case at::rank: + return "rank"; + case at::str_offsets_base: + return "str_offsets_base"; + case at::addr_base: + return "addr_base"; + case at::rnglists_base: + return "rnglists_base"; + case at::dwo_id: + return "dwo_id"; + case at::dwo_name: + return "dwo_name"; + case at::reference: + return "reference"; + case at::rvalue_reference: + return "rvalue_reference"; + case at::macros: + return "macros"; + case at::call_all_calls: + return "call_all_calls"; + case at::call_all_source_calls: + return "call_all_source_calls"; + case at::call_all_tail_calls: + return "call_all_tail_calls"; + case at::call_return_pc: + return "call_return_pc"; + case at::call_value: + return "call_value"; + case at::call_origin: + return "call_origin"; + case at::call_parameter: + return "call_parameter"; + case at::call_pc: + return "call_pc"; + case at::call_tail_call: + return "call_tail_call"; + case at::call_target: + return "call_target"; + case at::call_target_clobbered: + return "call_target_clobbered"; + case at::call_data_location: + return "call_data_location"; + case at::call_data_value: + return "call_data_value"; + case at::noreturn: + return "noreturn"; + case at::alignment: + return "alignment"; + case at::export_symbols: + return "export_symbols"; + case at::deleted: + return "deleted"; + case at::defaulted: + return "defaulted"; + case at::loclists_base: + return "loclists_base"; + case at::ghs_namespace_alias: + return "ghs_namespace_alias"; + case at::ghs_using_namespace: + return "ghs_using_namespace"; + case at::ghs_using_declaration: + return "ghs_using_declaration"; + case at::hp_block_index: + return "hp_block_index"; + // case at::lo_user: return "lo_user"; + case at::mips_fde: + return "mips_fde"; + case at::mips_loop_begin: + return "mips_loop_begin"; + case at::mips_tail_loop_begin: + return "mips_tail_loop_begin"; + case at::mips_epilog_begin: + return "mips_epilog_begin"; + case at::mips_loop_unroll_factor: + return "mips_loop_unroll_factor"; + case at::mips_software_pipeline_depth: + return "mips_software_pipeline_depth"; + case at::mips_linkage_name: + return "mips_linkage_name"; + case at::mips_stride: + return "mips_stride"; + case at::mips_abstract_name: + return "mips_abstract_name"; + case at::mips_clone_origin: + return "mips_clone_origin"; + case at::mips_has_inlines: + return "mips_has_inlines"; + case at::mips_stride_byte: + return "mips_stride_byte"; + case at::mips_stride_elem: + return "mips_stride_elem"; + case at::mips_ptr_dopetype: + return "mips_ptr_dopetype"; + case at::mips_allocatable_dopetype: + return "mips_allocatable_dopetype"; + case at::mips_assumed_shape_dopetype: + return "mips_assumed_shape_dopetype"; + case at::mips_assumed_size: + return "mips_assumed_size"; + // case at::hp_unmodifiable: return "hp_unmodifiable"; + // case at::hp_prologue: return "hp_prologue"; + // case at::hp_epilogue: return "hp_epilogue"; + // case at::hp_actuals_stmt_list: return "hp_actuals_stmt_list"; + // case at::hp_proc_per_section: return "hp_proc_per_section"; + case at::hp_raw_data_ptr: + return "hp_raw_data_ptr"; + case at::hp_pass_by_reference: + return "hp_pass_by_reference"; + case at::hp_opt_level: + return "hp_opt_level"; + case at::hp_prof_version_id: + return "hp_prof_version_id"; + case at::hp_opt_flags: + return "hp_opt_flags"; + case at::hp_cold_region_low_pc: + return "hp_cold_region_low_pc"; + case at::hp_cold_region_high_pc: + return "hp_cold_region_high_pc"; + case at::hp_all_variables_modifiable: + return "hp_all_variables_modifiable"; + case at::hp_linkage_name: + return "hp_linkage_name"; + case at::hp_prof_flags: + return "hp_prof_flags"; + case at::hp_unit_name: + return "hp_unit_name"; + case at::hp_unit_size: + return "hp_unit_size"; + case at::hp_widened_byte_size: + return "hp_widened_byte_size"; + case at::hp_definition_points: + return "hp_definition_points"; + case at::hp_default_location: + return "hp_default_location"; + case at::hp_is_result_param: + return "hp_is_result_param"; + // case at::cpq_discontig_ranges: return "cpq_discontig_ranges"; + // case at::cpq_semantic_events: return "cpq_semantic_events"; + // case at::cpq_split_lifetimes_var: return "cpq_split_lifetimes_var"; + // case at::cpq_split_lifetimes_rtn: return "cpq_split_lifetimes_rtn"; + // case at::cpq_prologue_length: return "cpq_prologue_length"; + // case at::ghs_mangled: return "ghs_mangled"; + case at::ghs_rsm: + return "ghs_rsm"; + case at::ghs_frsm: + return "ghs_frsm"; + case at::ghs_frames: + return "ghs_frames"; + case at::ghs_rso: + return "ghs_rso"; + case at::ghs_subcpu: + return "ghs_subcpu"; + case at::ghs_lbrace_line: + return "ghs_lbrace_line"; + case at::intel_other_endian: + return "intel_other_endian"; + case at::sf_names: + return "sf_names"; + case at::src_info: + return "src_info"; + case at::mac_info: + return "mac_info"; + case at::src_coords: + return "src_coords"; + case at::body_begin: + return "body_begin"; + case at::body_end: + return "body_end"; + case at::gnu_vector: + return "gnu_vector"; + case at::gnu_guarded_by: + return "gnu_guarded_by"; + case at::gnu_pt_guarded_by: + return "gnu_pt_guarded_by"; + case at::gnu_guarded: + return "gnu_guarded"; + case at::gnu_pt_guarded: + return "gnu_pt_guarded"; + case at::gnu_locks_excluded: + return "gnu_locks_excluded"; + case at::gnu_exclusive_locks_required: + return "gnu_exclusive_locks_required"; + case at::gnu_shared_locks_required: + return "gnu_shared_locks_required"; + case at::gnu_odr_signature: + return "gnu_odr_signature"; + case at::gnu_template_name: + return "gnu_template_name"; + case at::gnu_call_site_value: + return "gnu_call_site_value"; + case at::gnu_call_site_data_value: + return "gnu_call_site_data_value"; + case at::gnu_call_site_target: + return "gnu_call_site_target"; + case at::gnu_call_site_target_clobbered: + return "gnu_call_site_target_clobbered"; + case at::gnu_tail_call: + return "gnu_tail_call"; + case at::gnu_all_tail_call_sites: + return "gnu_all_tail_call_sites"; + case at::gnu_all_call_sites: + return "gnu_all_call_sites"; + case at::gnu_all_source_call_sites: + return "gnu_all_source_call_sites"; + case at::gnu_macros: + return "gnu_macros"; + case at::gnu_deleted: + return "gnu_deleted"; + case at::gnu_dwo_name: + return "gnu_dwo_name"; + case at::gnu_dwo_id: + return "gnu_dwo_id"; + case at::gnu_ranges_base: + return "gnu_ranges_base"; + case at::gnu_addr_base: + return "gnu_addr_base"; + case at::gnu_pubnames: + return "gnu_pubnames"; + case at::gnu_pubtypes: + return "gnu_pubtypes"; + case at::gnu_discriminator: + return "gnu_discriminator"; + case at::gnu_locviews: + return "gnu_locviews"; + case at::gnu_entry_view: + return "gnu_entry_view"; + case at::gnu_bias: + return "gnu_bias"; + case at::sun_template: + return "sun_template"; + // case at::vms_rtnbeg_pd_address: return "vms_rtnbeg_pd_address"; + case at::sun_alignment: + return "sun_alignment"; + case at::sun_vtable: + return "sun_vtable"; + case at::sun_count_guarantee: + return "sun_count_guarantee"; + case at::sun_command_line: + return "sun_command_line"; + case at::sun_vbase: + return "sun_vbase"; + case at::sun_compile_options: + return "sun_compile_options"; + case at::sun_language: + return "sun_language"; + case at::sun_browser_file: + return "sun_browser_file"; + case at::sun_vtable_abi: + return "sun_vtable_abi"; + case at::sun_func_offsets: + return "sun_func_offsets"; + case at::sun_cf_kind: + return "sun_cf_kind"; + case at::sun_vtable_index: + return "sun_vtable_index"; + case at::sun_omp_tpriv_addr: + return "sun_omp_tpriv_addr"; + case at::sun_omp_child_func: + return "sun_omp_child_func"; + case at::sun_func_offset: + return "sun_func_offset"; + case at::sun_memop_type_ref: + return "sun_memop_type_ref"; + case at::sun_profile_id: + return "sun_profile_id"; + case at::sun_memop_signature: + return "sun_memop_signature"; + case at::sun_obj_dir: + return "sun_obj_dir"; + case at::sun_obj_file: + return "sun_obj_file"; + case at::sun_original_name: + return "sun_original_name"; + case at::sun_hwcprof_signature: + return "sun_hwcprof_signature"; + case at::sun_amd64_parmdump: + return "sun_amd64_parmdump"; + case at::sun_part_link_name: + return "sun_part_link_name"; + case at::sun_link_name: + return "sun_link_name"; + case at::sun_pass_with_const: + return "sun_pass_with_const"; + case at::sun_return_with_const: + return "sun_return_with_const"; + case at::sun_import_by_name: + return "sun_import_by_name"; + case at::sun_f90_pointer: + return "sun_f90_pointer"; + case at::sun_pass_by_ref: + return "sun_pass_by_ref"; + case at::sun_f90_allocatable: + return "sun_f90_allocatable"; + case at::sun_f90_assumed_shape_array: + return "sun_f90_assumed_shape_array"; + case at::sun_c_vla: + return "sun_c_vla"; + case at::sun_return_value_ptr: + return "sun_return_value_ptr"; + case at::sun_dtor_start: + return "sun_dtor_start"; + case at::sun_dtor_length: + return "sun_dtor_length"; + case at::sun_dtor_state_initial: + return "sun_dtor_state_initial"; + case at::sun_dtor_state_final: + return "sun_dtor_state_final"; + case at::sun_dtor_state_deltas: + return "sun_dtor_state_deltas"; + case at::sun_import_by_lname: + return "sun_import_by_lname"; + case at::sun_f90_use_only: + return "sun_f90_use_only"; + case at::sun_namelist_spec: + return "sun_namelist_spec"; + case at::sun_is_omp_child_func: + return "sun_is_omp_child_func"; + case at::sun_fortran_main_alias: + return "sun_fortran_main_alias"; + case at::sun_fortran_based: + return "sun_fortran_based"; + case at::altium_loclist: + return "altium_loclist"; + case at::use_gnat_descriptive_type: + return "use_gnat_descriptive_type"; + case at::gnat_descriptive_type: + return "gnat_descriptive_type"; + case at::gnu_numerator: + return "gnu_numerator"; + case at::gnu_denominator: + return "gnu_denominator"; + case at::go_kind: + return "go_kind"; + case at::go_key: + return "go_key"; + case at::go_elem: + return "go_elem"; + case at::go_embedded_field: + return "go_embedded_field"; + case at::go_runtime_type: + return "go_runtime_type"; + case at::upc_threads_scaled: + return "upc_threads_scaled"; + case at::ibm_wsa_addr: + return "ibm_wsa_addr"; + case at::ibm_home_location: + return "ibm_home_location"; + case at::ibm_alt_srcview: + return "ibm_alt_srcview"; + case at::pgi_lbase: + return "pgi_lbase"; + case at::pgi_soffset: + return "pgi_soffset"; + case at::pgi_lstride: + return "pgi_lstride"; + case at::borland_property_read: + return "borland_property_read"; + case at::borland_property_write: + return "borland_property_write"; + case at::borland_property_implements: + return "borland_property_implements"; + case at::borland_property_index: + return "borland_property_index"; + case at::borland_property_default: + return "borland_property_default"; + case at::borland_delphi_unit: + return "borland_delphi_unit"; + case at::borland_delphi_class: + return "borland_delphi_class"; + case at::borland_delphi_record: + return "borland_delphi_record"; + case at::borland_delphi_metaclass: + return "borland_delphi_metaclass"; + case at::borland_delphi_constructor: + return "borland_delphi_constructor"; + case at::borland_delphi_destructor: + return "borland_delphi_destructor"; + case at::borland_delphi_anonymous_method: + return "borland_delphi_anonymous_method"; + case at::borland_delphi_interface: + return "borland_delphi_interface"; + case at::borland_delphi_abi: + return "borland_delphi_abi"; + case at::borland_delphi_frameptr: + return "borland_delphi_frameptr"; + case at::borland_closure: + return "borland_closure"; + case at::llvm_include_path: + return "llvm_include_path"; + case at::llvm_config_macros: + return "llvm_config_macros"; + case at::llvm_sysroot: + return "llvm_sysroot"; + case at::llvm_tag_offset: + return "llvm_tag_offset"; + // case at::llvm_apinotes: return "llvm_apinotes"; + case at::apple_optimized: + return "apple_optimized"; + case at::apple_flags: + return "apple_flags"; + case at::apple_isa: + return "apple_isa"; + case at::apple_block: + return "apple_block"; + case at::apple_major_runtime_vers: + return "apple_major_runtime_vers"; + case at::apple_runtime_class: + return "apple_runtime_class"; + case at::apple_omit_frame_ptr: + return "apple_omit_frame_ptr"; + case at::apple_property_name: + return "apple_property_name"; + case at::apple_property_getter: + return "apple_property_getter"; + case at::apple_property_setter: + return "apple_property_setter"; + case at::apple_property_attribute: + return "apple_property_attribute"; + case at::apple_objc_complete_type: + return "apple_objc_complete_type"; + case at::apple_property: + return "apple_property"; + case at::apple_objc_direct: + return "apple_objc_direct"; + case at::apple_sdk: + return "apple_sdk"; + case at::hi_user: + return "hi_user"; } } @@ -347,127 +652,244 @@ const char* to_string(at attr) { const char* to_string(tag t) { switch (t) { - case tag::none: return "none"; - case tag::array_type: return "array"; - case tag::class_type: return "class"; - case tag::entry_point: return "entry point"; - case tag::enumeration_type: return "enumeration"; - case tag::formal_parameter: return "formal parameter"; - case tag::imported_declaration: return "imported declaration"; - case tag::label: return "label"; - case tag::lexical_block: return "lexical block"; - case tag::member: return "member"; - case tag::pointer_type: return "pointer"; - case tag::reference_type: return "reference"; - case tag::compile_unit: return "compile unit"; - case tag::string_type: return "string"; - case tag::structure_type: return "structure"; - case tag::subroutine_type: return "subroutine"; - case tag::typedef_: return "typedef"; - case tag::union_type: return "union"; - case tag::unspecified_parameters: return "unspecified parameters"; - case tag::variant: return "variant"; - case tag::common_block: return "common block"; - case tag::common_inclusion: return "common inclusion"; - case tag::inheritance: return "inheritance"; - case tag::inlined_subroutine: return "inlined subroutine"; - case tag::module: return "module"; - case tag::ptr_to_member_type: return "ptr to member"; - case tag::set_type: return "set"; - case tag::subrange_type: return "subrange"; - case tag::with_stmt: return "with stmt"; - case tag::access_declaration: return "access declaration"; - case tag::base_type: return "base"; - case tag::catch_block: return "catch block"; - case tag::const_type: return "const"; - case tag::constant: return "constant"; - case tag::enumerator: return "enumerator"; - case tag::file_type: return "file"; - case tag::friend_: return "friend"; - case tag::namelist: return "namelist"; - case tag::namelist_item: return "namelist item"; + case tag::none: + return "none"; + case tag::array_type: + return "array"; + case tag::class_type: + return "class"; + case tag::entry_point: + return "entry point"; + case tag::enumeration_type: + return "enumeration"; + case tag::formal_parameter: + return "formal parameter"; + case tag::imported_declaration: + return "imported declaration"; + case tag::label: + return "label"; + case tag::lexical_block: + return "lexical block"; + case tag::member: + return "member"; + case tag::pointer_type: + return "pointer"; + case tag::reference_type: + return "reference"; + case tag::compile_unit: + return "compile unit"; + case tag::string_type: + return "string"; + case tag::structure_type: + return "structure"; + case tag::subroutine_type: + return "subroutine"; + case tag::typedef_: + return "typedef"; + case tag::union_type: + return "union"; + case tag::unspecified_parameters: + return "unspecified parameters"; + case tag::variant: + return "variant"; + case tag::common_block: + return "common block"; + case tag::common_inclusion: + return "common inclusion"; + case tag::inheritance: + return "inheritance"; + case tag::inlined_subroutine: + return "inlined subroutine"; + case tag::module: + return "module"; + case tag::ptr_to_member_type: + return "ptr to member"; + case tag::set_type: + return "set"; + case tag::subrange_type: + return "subrange"; + case tag::with_stmt: + return "with stmt"; + case tag::access_declaration: + return "access declaration"; + case tag::base_type: + return "base"; + case tag::catch_block: + return "catch block"; + case tag::const_type: + return "const"; + case tag::constant: + return "constant"; + case tag::enumerator: + return "enumerator"; + case tag::file_type: + return "file"; + case tag::friend_: + return "friend"; + case tag::namelist: + return "namelist"; + case tag::namelist_item: + return "namelist item"; // case tag::namelist_items: return "namelist items"; - case tag::packed_type: return "packed"; - case tag::subprogram: return "subprogram"; - case tag::template_type_parameter: return "template type parameter"; + case tag::packed_type: + return "packed"; + case tag::subprogram: + return "subprogram"; + case tag::template_type_parameter: + return "template type parameter"; // case tag::template_type_param: return "template type param"; - case tag::template_value_parameter: return "template value parameter"; + case tag::template_value_parameter: + return "template value parameter"; // case tag::template_value_param: return "template value param"; - case tag::thrown_type: return "thrown"; - case tag::try_block: return "try block"; - case tag::variant_part: return "variant part"; - case tag::variable: return "variable"; - case tag::volatile_type: return "volatile"; - case tag::dwarf_procedure: return "dwarf procedure"; - case tag::restrict_type: return "restrict"; - case tag::interface_type: return "interface"; - case tag::namespace_: return "namespace"; - case tag::imported_module: return "imported module"; - case tag::unspecified_type: return "unspecified"; - case tag::partial_unit: return "partial unit"; - case tag::imported_unit: return "imported unit"; - case tag::mutable_type: return "mutable"; - case tag::condition: return "condition"; - case tag::shared_type: return "shared"; - case tag::type_unit: return "type unit"; - case tag::rvalue_reference_type: return "rvalue reference"; - case tag::template_alias: return "template alias"; - case tag::coarray_type: return "coarray"; - case tag::generic_subrange: return "generic subrange"; - case tag::dynamic_type: return "dynamic"; - case tag::atomic_type: return "atomic"; - case tag::call_site: return "call site"; - case tag::call_site_parameter: return "call site parameter"; - case tag::skeleton_unit: return "skeleton unit"; - case tag::immutable_type: return "immutable"; - case tag::lo_user: return "lo user"; - case tag::mips_loop: return "mips loop"; - case tag::hp_array_descriptor: return "hp array descriptor"; - case tag::format_label: return "format label"; - case tag::function_template: return "function template"; - case tag::class_template: return "class template"; - case tag::gnu_bincl: return "gnu bincl"; - case tag::gnu_eincl: return "gnu eincl"; - case tag::gnu_template_template_parameter: return "gnu template template parameter"; + case tag::thrown_type: + return "thrown"; + case tag::try_block: + return "try block"; + case tag::variant_part: + return "variant part"; + case tag::variable: + return "variable"; + case tag::volatile_type: + return "volatile"; + case tag::dwarf_procedure: + return "dwarf procedure"; + case tag::restrict_type: + return "restrict"; + case tag::interface_type: + return "interface"; + case tag::namespace_: + return "namespace"; + case tag::imported_module: + return "imported module"; + case tag::unspecified_type: + return "unspecified"; + case tag::partial_unit: + return "partial unit"; + case tag::imported_unit: + return "imported unit"; + case tag::mutable_type: + return "mutable"; + case tag::condition: + return "condition"; + case tag::shared_type: + return "shared"; + case tag::type_unit: + return "type unit"; + case tag::rvalue_reference_type: + return "rvalue reference"; + case tag::template_alias: + return "template alias"; + case tag::coarray_type: + return "coarray"; + case tag::generic_subrange: + return "generic subrange"; + case tag::dynamic_type: + return "dynamic"; + case tag::atomic_type: + return "atomic"; + case tag::call_site: + return "call site"; + case tag::call_site_parameter: + return "call site parameter"; + case tag::skeleton_unit: + return "skeleton unit"; + case tag::immutable_type: + return "immutable"; + case tag::lo_user: + return "lo user"; + case tag::mips_loop: + return "mips loop"; + case tag::hp_array_descriptor: + return "hp array descriptor"; + case tag::format_label: + return "format label"; + case tag::function_template: + return "function template"; + case tag::class_template: + return "class template"; + case tag::gnu_bincl: + return "gnu bincl"; + case tag::gnu_eincl: + return "gnu eincl"; + case tag::gnu_template_template_parameter: + return "gnu template template parameter"; // case tag::gnu_template_template_param: return "gnu template template param"; - case tag::gnu_template_parameter_pack: return "gnu template parameter pack"; - case tag::gnu_formal_parameter_pack: return "gnu formal parameter pack"; - case tag::gnu_call_site: return "gnu call site"; - case tag::gnu_call_site_parameter: return "gnu call site parameter"; - case tag::altium_circ_type: return "altium circ"; - case tag::altium_mwa_circ_type: return "altium mwa circ"; - case tag::altium_rev_carry_type: return "altium rev carry"; - case tag::altium_rom: return "altium rom"; - case tag::upc_shared_type: return "upc shared"; - case tag::upc_strict_type: return "upc strict"; - case tag::upc_relaxed_type: return "upc relaxed"; - case tag::apple_property: return "apple property"; - case tag::sun_function_template: return "sun function template"; - case tag::sun_class_template: return "sun class template"; - case tag::sun_struct_template: return "sun struct template"; - case tag::sun_union_template: return "sun union template"; - case tag::sun_indirect_inheritance: return "sun indirect inheritance"; - case tag::sun_codeflags: return "sun codeflags"; - case tag::sun_memop_info: return "sun memop info"; - case tag::sun_omp_child_func: return "sun omp child func"; - case tag::sun_rtti_descriptor: return "sun rtti descriptor"; - case tag::sun_dtor_info: return "sun dtor info"; - case tag::sun_dtor: return "sun dtor"; - case tag::sun_f90_interface: return "sun f90 interface"; - case tag::sun_fortran_vax_structure: return "sun fortran vax structure"; - case tag::sun_hi: return "sun hi"; - case tag::ghs_namespace: return "ghs namespace"; - case tag::ghs_using_namespace: return "ghs using namespace"; - case tag::ghs_using_declaration: return "ghs using declaration"; - case tag::ghs_template_templ_param: return "ghs template templ param"; - case tag::pgi_kanji_type: return "pgi kanji"; - case tag::pgi_interface_block: return "pgi interface block"; - case tag::borland_property: return "borland property"; - case tag::borland_delphi_string: return "borland delphi string"; - case tag::borland_delphi_dynamic_array: return "borland delphi dynamic array"; - case tag::borland_delphi_set: return "borland delphi set"; - case tag::borland_delphi_variant: return "borland delphi variant"; - case tag::hi_user: return "hi user"; + case tag::gnu_template_parameter_pack: + return "gnu template parameter pack"; + case tag::gnu_formal_parameter_pack: + return "gnu formal parameter pack"; + case tag::gnu_call_site: + return "gnu call site"; + case tag::gnu_call_site_parameter: + return "gnu call site parameter"; + case tag::altium_circ_type: + return "altium circ"; + case tag::altium_mwa_circ_type: + return "altium mwa circ"; + case tag::altium_rev_carry_type: + return "altium rev carry"; + case tag::altium_rom: + return "altium rom"; + case tag::upc_shared_type: + return "upc shared"; + case tag::upc_strict_type: + return "upc strict"; + case tag::upc_relaxed_type: + return "upc relaxed"; + case tag::apple_property: + return "apple property"; + case tag::sun_function_template: + return "sun function template"; + case tag::sun_class_template: + return "sun class template"; + case tag::sun_struct_template: + return "sun struct template"; + case tag::sun_union_template: + return "sun union template"; + case tag::sun_indirect_inheritance: + return "sun indirect inheritance"; + case tag::sun_codeflags: + return "sun codeflags"; + case tag::sun_memop_info: + return "sun memop info"; + case tag::sun_omp_child_func: + return "sun omp child func"; + case tag::sun_rtti_descriptor: + return "sun rtti descriptor"; + case tag::sun_dtor_info: + return "sun dtor info"; + case tag::sun_dtor: + return "sun dtor"; + case tag::sun_f90_interface: + return "sun f90 interface"; + case tag::sun_fortran_vax_structure: + return "sun fortran vax structure"; + case tag::sun_hi: + return "sun hi"; + case tag::ghs_namespace: + return "ghs namespace"; + case tag::ghs_using_namespace: + return "ghs using namespace"; + case tag::ghs_using_declaration: + return "ghs using declaration"; + case tag::ghs_template_templ_param: + return "ghs template templ param"; + case tag::pgi_kanji_type: + return "pgi kanji"; + case tag::pgi_interface_block: + return "pgi interface block"; + case tag::borland_property: + return "borland property"; + case tag::borland_delphi_string: + return "borland delphi string"; + case tag::borland_delphi_dynamic_array: + return "borland delphi dynamic array"; + case tag::borland_delphi_set: + return "borland delphi set"; + case tag::borland_delphi_variant: + return "borland delphi variant"; + case tag::hi_user: + return "hi user"; } } @@ -503,98 +925,190 @@ bool is_type(tag t) { encoding_class attribute_encoding_class(at attribute) { switch (attribute) { - case dw::at::sibling: return encoding_class::reference; - case dw::at::location: return encoding_class::exprloc; // loclistptr - case dw::at::name: return encoding_class::string; - case dw::at::ordering: return encoding_class::constant; - case dw::at::byte_size: return encoding_class::constant; // exprloc, reference - case dw::at::bit_offset: return encoding_class::constant; // exprloc, reference - case dw::at::bit_size: return encoding_class::constant; // exprloc, reference - case dw::at::stmt_list: return encoding_class::lineptr; - case dw::at::low_pc: return encoding_class::address; - case dw::at::high_pc: return encoding_class::address; // constant - case dw::at::language: return encoding_class::constant; - case dw::at::discr: return encoding_class::reference; - case dw::at::discr_value: return encoding_class::constant; - case dw::at::visibility: return encoding_class::constant; - case dw::at::import_: return encoding_class::reference; - case dw::at::string_length: return encoding_class::exprloc; // loclistptr - case dw::at::common_reference: return encoding_class::reference; - case dw::at::comp_dir: return encoding_class::string; - case dw::at::const_value: return encoding_class::block; // constant, string - case dw::at::containing_type: return encoding_class::reference; - case dw::at::default_value: return encoding_class::reference; - case dw::at::inline_: return encoding_class::constant; - case dw::at::is_optional: return encoding_class::flag; - case dw::at::lower_bound: return encoding_class::constant; // exprloc, reference - case dw::at::producer: return encoding_class::string; - case dw::at::prototyped: return encoding_class::flag; - case dw::at::return_addr: return encoding_class::exprloc; // loclistptr - case dw::at::start_scope: return encoding_class::constant; // rangelistptr - case dw::at::bit_stride: return encoding_class::constant; // exprloc, reference - case dw::at::upper_bound: return encoding_class::constant; // exprloc, reference - case dw::at::abstract_origin: return encoding_class::reference; - case dw::at::accessibility: return encoding_class::constant; - case dw::at::address_class: return encoding_class::constant; - case dw::at::artificial: return encoding_class::flag; - case dw::at::base_types: return encoding_class::reference; - case dw::at::calling_convention: return encoding_class::constant; - case dw::at::count: return encoding_class::constant; // exprloc, reference - case dw::at::data_member_location: return encoding_class::exprloc; // constant, loclistptr - case dw::at::decl_column: return encoding_class::constant; - case dw::at::decl_file: return encoding_class::constant; - case dw::at::decl_line: return encoding_class::constant; - case dw::at::declaration: return encoding_class::flag; - case dw::at::discr_list: return encoding_class::block; - case dw::at::encoding: return encoding_class::constant; - case dw::at::external: return encoding_class::flag; - case dw::at::frame_base: return encoding_class::exprloc; // loclistptr - case dw::at::friend_: return encoding_class::reference; - case dw::at::identifier_case: return encoding_class::constant; - case dw::at::macro_info: return encoding_class::macptr; - case dw::at::namelist_item: return encoding_class::reference; - case dw::at::priority: return encoding_class::reference; - case dw::at::segment: return encoding_class::exprloc; // loclistptr - case dw::at::specification: return encoding_class::reference; - case dw::at::static_link: return encoding_class::exprloc; // loclistptr - case dw::at::type: return encoding_class::reference; - case dw::at::use_location: return encoding_class::exprloc; // loclistptr - case dw::at::variable_parameter: return encoding_class::flag; - case dw::at::virtuality: return encoding_class::constant; - case dw::at::vtable_elem_location: return encoding_class::exprloc; // loclistptr - case dw::at::allocated: return encoding_class::constant; // exprloc, reference - case dw::at::associated: return encoding_class::constant; // exprloc, reference - case dw::at::data_location: return encoding_class::exprloc; - case dw::at::byte_stride: return encoding_class::constant; // exprloc, reference - case dw::at::entry_pc: return encoding_class::address; - case dw::at::use_utf8: return encoding_class::flag; - case dw::at::extension: return encoding_class::reference; - case dw::at::ranges: return encoding_class::rangelistptr; - case dw::at::trampoline: return encoding_class::address; // flag, reference, string - case dw::at::call_column: return encoding_class::constant; - case dw::at::call_file: return encoding_class::constant; - case dw::at::call_line: return encoding_class::constant; - case dw::at::description: return encoding_class::string; - case dw::at::binary_scale: return encoding_class::constant; - case dw::at::decimal_scale: return encoding_class::constant; - case dw::at::small: return encoding_class::reference; - case dw::at::decimal_sign: return encoding_class::constant; - case dw::at::digit_count: return encoding_class::constant; - case dw::at::picture_string: return encoding_class::string; - case dw::at::mutable_: return encoding_class::flag; - case dw::at::threads_scaled: return encoding_class::flag; - case dw::at::explicit_: return encoding_class::flag; - case dw::at::object_pointer: return encoding_class::reference; - case dw::at::endianity: return encoding_class::constant; - case dw::at::elemental: return encoding_class::flag; - case dw::at::pure: return encoding_class::flag; - case dw::at::recursive: return encoding_class::flag; - case dw::at::signature: return encoding_class::reference; - case dw::at::main_subprogram: return encoding_class::flag; - case dw::at::data_bit_offset: return encoding_class::constant; - case dw::at::const_expr: return encoding_class::flag; - case dw::at::enum_class: return encoding_class::flag; - case dw::at::linkage_name: return encoding_class::string; + case dw::at::sibling: + return encoding_class::reference; + case dw::at::location: + return encoding_class::exprloc; // loclistptr + case dw::at::name: + return encoding_class::string; + case dw::at::ordering: + return encoding_class::constant; + case dw::at::byte_size: + return encoding_class::constant; // exprloc, reference + case dw::at::bit_offset: + return encoding_class::constant; // exprloc, reference + case dw::at::bit_size: + return encoding_class::constant; // exprloc, reference + case dw::at::stmt_list: + return encoding_class::lineptr; + case dw::at::low_pc: + return encoding_class::address; + case dw::at::high_pc: + return encoding_class::address; // constant + case dw::at::language: + return encoding_class::constant; + case dw::at::discr: + return encoding_class::reference; + case dw::at::discr_value: + return encoding_class::constant; + case dw::at::visibility: + return encoding_class::constant; + case dw::at::import_: + return encoding_class::reference; + case dw::at::string_length: + return encoding_class::exprloc; // loclistptr + case dw::at::common_reference: + return encoding_class::reference; + case dw::at::comp_dir: + return encoding_class::string; + case dw::at::const_value: + return encoding_class::block; // constant, string + case dw::at::containing_type: + return encoding_class::reference; + case dw::at::default_value: + return encoding_class::reference; + case dw::at::inline_: + return encoding_class::constant; + case dw::at::is_optional: + return encoding_class::flag; + case dw::at::lower_bound: + return encoding_class::constant; // exprloc, reference + case dw::at::producer: + return encoding_class::string; + case dw::at::prototyped: + return encoding_class::flag; + case dw::at::return_addr: + return encoding_class::exprloc; // loclistptr + case dw::at::start_scope: + return encoding_class::constant; // rangelistptr + case dw::at::bit_stride: + return encoding_class::constant; // exprloc, reference + case dw::at::upper_bound: + return encoding_class::constant; // exprloc, reference + case dw::at::abstract_origin: + return encoding_class::reference; + case dw::at::accessibility: + return encoding_class::constant; + case dw::at::address_class: + return encoding_class::constant; + case dw::at::artificial: + return encoding_class::flag; + case dw::at::base_types: + return encoding_class::reference; + case dw::at::calling_convention: + return encoding_class::constant; + case dw::at::count: + return encoding_class::constant; // exprloc, reference + case dw::at::data_member_location: + return encoding_class::exprloc; // constant, loclistptr + case dw::at::decl_column: + return encoding_class::constant; + case dw::at::decl_file: + return encoding_class::constant; + case dw::at::decl_line: + return encoding_class::constant; + case dw::at::declaration: + return encoding_class::flag; + case dw::at::discr_list: + return encoding_class::block; + case dw::at::encoding: + return encoding_class::constant; + case dw::at::external: + return encoding_class::flag; + case dw::at::frame_base: + return encoding_class::exprloc; // loclistptr + case dw::at::friend_: + return encoding_class::reference; + case dw::at::identifier_case: + return encoding_class::constant; + case dw::at::macro_info: + return encoding_class::macptr; + case dw::at::namelist_item: + return encoding_class::reference; + case dw::at::priority: + return encoding_class::reference; + case dw::at::segment: + return encoding_class::exprloc; // loclistptr + case dw::at::specification: + return encoding_class::reference; + case dw::at::static_link: + return encoding_class::exprloc; // loclistptr + case dw::at::type: + return encoding_class::reference; + case dw::at::use_location: + return encoding_class::exprloc; // loclistptr + case dw::at::variable_parameter: + return encoding_class::flag; + case dw::at::virtuality: + return encoding_class::constant; + case dw::at::vtable_elem_location: + return encoding_class::exprloc; // loclistptr + case dw::at::allocated: + return encoding_class::constant; // exprloc, reference + case dw::at::associated: + return encoding_class::constant; // exprloc, reference + case dw::at::data_location: + return encoding_class::exprloc; + case dw::at::byte_stride: + return encoding_class::constant; // exprloc, reference + case dw::at::entry_pc: + return encoding_class::address; + case dw::at::use_utf8: + return encoding_class::flag; + case dw::at::extension: + return encoding_class::reference; + case dw::at::ranges: + return encoding_class::rangelistptr; + case dw::at::trampoline: + return encoding_class::address; // flag, reference, string + case dw::at::call_column: + return encoding_class::constant; + case dw::at::call_file: + return encoding_class::constant; + case dw::at::call_line: + return encoding_class::constant; + case dw::at::description: + return encoding_class::string; + case dw::at::binary_scale: + return encoding_class::constant; + case dw::at::decimal_scale: + return encoding_class::constant; + case dw::at::small: + return encoding_class::reference; + case dw::at::decimal_sign: + return encoding_class::constant; + case dw::at::digit_count: + return encoding_class::constant; + case dw::at::picture_string: + return encoding_class::string; + case dw::at::mutable_: + return encoding_class::flag; + case dw::at::threads_scaled: + return encoding_class::flag; + case dw::at::explicit_: + return encoding_class::flag; + case dw::at::object_pointer: + return encoding_class::reference; + case dw::at::endianity: + return encoding_class::constant; + case dw::at::elemental: + return encoding_class::flag; + case dw::at::pure: + return encoding_class::flag; + case dw::at::recursive: + return encoding_class::flag; + case dw::at::signature: + return encoding_class::reference; + case dw::at::main_subprogram: + return encoding_class::flag; + case dw::at::data_bit_offset: + return encoding_class::constant; + case dw::at::const_expr: + return encoding_class::flag; + case dw::at::enum_class: + return encoding_class::flag; + case dw::at::linkage_name: + return encoding_class::string; default: throw std::runtime_error("uknown encoding class for attribute"); } diff --git a/src/fat.cpp b/src/fat.cpp index 1121ad9..cad400c 100644 --- a/src/fat.cpp +++ b/src/fat.cpp @@ -8,8 +8,8 @@ #include "orc/fat.hpp" // mach-o -#include #include +#include //-------------------------------------------------------------------------------------------------- @@ -44,7 +44,7 @@ void read_fat(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { auto header = read_pod(s); if (details._needs_byteswap) { endian_swap(header.magic); diff --git a/src/macho.cpp b/src/macho.cpp index 3ea9350..3a55434 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -46,26 +46,22 @@ struct macho_reader { macho_reader(std::uint32_t ofd_index, freader&& s, file_details&& details, - macho_params&& params) + reader_params&& params) : _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), _params(std::move(params)), _dwarf(ofd_index, copy(_s), copy(_details)) { - if (params._mode == macho_reader_mode::invalid) { + if (params._mode == reader_mode::invalid) { cerr_safe([&](auto& s) { s << "Invalid reader mode.\n"; }); std::terminate(); } populate_dwarf(); } - struct dwarf& dwarf() & { - return _dwarf; - } - struct dwarf&& dwarf() && { - return std::move(_dwarf); - } + struct dwarf& dwarf() & { return _dwarf; } + struct dwarf&& dwarf() && { return std::move(_dwarf); } - bool register_dies_mode() const { return _params._mode == macho_reader_mode::register_dies; } - bool derive_dylibs_mode() const { return _params._mode == macho_reader_mode::derive_dylibs; } - // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; } + bool register_dies_mode() const { return _params._mode == reader_mode::register_dies; } + bool derive_dylibs_mode() const { return _params._mode == reader_mode::derive_dylibs; } + // bool odrv_reporting_mode() const { return _params._mode == reader_mode::odrv_reporting; } void derive_dependencies(); @@ -82,7 +78,7 @@ struct macho_reader { const std::uint32_t _ofd_index{0}; freader _s; const file_details _details; - const macho_params _params; + const reader_params _params; std::vector _unresolved_dylibs; std::vector _rpaths; struct dwarf _dwarf; // must be last @@ -386,7 +382,7 @@ void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), _params = std::move(params)]() mutable { ZoneScopedN("read_macho"); @@ -415,7 +411,7 @@ void read_macho(object_ancestry&& ancestry, //-------------------------------------------------------------------------------------------------- -dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params) { +dwarf dwarf_from_macho(std::uint32_t ofd_index, reader_params params) { const auto& entry = object_file_fetch(ofd_index); freader s(entry._ancestry.begin()->allocate_path()); @@ -478,8 +474,8 @@ std::vector derive_immediate_dylibs( TracyLockable(std::mutex, dylib_result_mutex); std::vector result; freader input(input_path); - macho_params params; - params._mode = macho_reader_mode::derive_dylibs; + reader_params params; + params._mode = reader_mode::derive_dylibs; params._executable_path = executable_path; params._register_dependencies = [&](std::vector&& p) { ZoneScopedN("register_dependencies"); @@ -529,8 +525,8 @@ std::vector derive_all_dylibs(const std::filesystem::path // If that set of files is empty, then we have found all our // dependencies, and can stop. pass = std::vector(); // ensure `pass` is valid and empty. - std::set_difference(pass_dependencies.begin(), pass_dependencies.end(), - scanned.begin(), scanned.end(), std::back_inserter(pass)); + std::set_difference(pass_dependencies.begin(), pass_dependencies.end(), scanned.begin(), + scanned.end(), std::back_inserter(pass)); if (pass.empty()) { break; diff --git a/src/main.cpp b/src/main.cpp index 7949c6d..16bbdb6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -107,9 +107,7 @@ std::size_t parse_enval(std::string&& x) { } template -T derive_configuration(const char* key, - const toml::parse_result& settings, - T&& fallback) { +T derive_configuration(const char* key, const toml::parse_result& settings, T&& fallback) { T result = settings[key].value_or(fallback); std::string envar = toupper(std::string("ORC_") + key); if (const char* enval = std::getenv(envar.c_str())) { @@ -166,18 +164,23 @@ void process_orc_configuration(const char* bin_path_string) { auto& app_settings = settings::instance(); app_settings._graceful_exit = derive_configuration("graceful_exit", settings, false); - app_settings._max_violation_count = derive_configuration("max_error_count", settings, std::size_t(0)); + app_settings._max_violation_count = + derive_configuration("max_error_count", settings, std::size_t(0)); app_settings._forward_to_linker = derive_configuration("forward_to_linker", settings, true); app_settings._standalone_mode = derive_configuration("standalone_mode", settings, false); app_settings._dylib_scan_mode = derive_configuration("dylib_scan_mode", settings, false); app_settings._parallel_processing = derive_configuration("parallel_processing", settings, true); app_settings._filter_redundant = derive_configuration("filter_redundant", settings, true); - app_settings._print_object_file_list = derive_configuration("print_object_file_list", settings, false); - app_settings._relative_output_file = derive_configuration("relative_output_file", settings, std::string()); + app_settings._print_object_file_list = + derive_configuration("print_object_file_list", settings, false); + app_settings._relative_output_file = + derive_configuration("relative_output_file", settings, std::string()); - const std::string log_level = derive_configuration("log_level", settings, std::string("warning")); + const std::string log_level = + derive_configuration("log_level", settings, std::string("warning")); const std::string output_file = derive_configuration("output_file", settings, std::string()); - const std::string output_file_mode = derive_configuration("output_file_mode", settings, std::string("text")); + const std::string output_file_mode = + derive_configuration("output_file_mode", settings, std::string("text")); // Do this early so we can log the ensuing output if it happens. if (!output_file.empty()) { @@ -206,13 +209,13 @@ void process_orc_configuration(const char* bin_path_string) { } else { // not a known value. Switch to verbose! app_settings._log_level = settings::log_level::verbose; - cout_safe( - [&](auto& s) { s << "warning: unknown log_level '" << log_level << "'; using verbose\n"; }); + cout_safe([&](auto& s) { + s << "warning: unknown log_level '" << log_level << "'; using verbose\n"; + }); } if (app_settings._standalone_mode && app_settings._dylib_scan_mode) { - throw std::logic_error( - "Both standalone and dylib scanning mode are enabled. Pick one."); + throw std::logic_error("Both standalone and dylib scanning mode are enabled. Pick one."); } if (app_settings._dylib_scan_mode) { @@ -236,8 +239,7 @@ void process_orc_configuration(const char* bin_path_string) { app_settings._violation_report = read_string_list("violation_report"); app_settings._violation_ignore = read_string_list("violation_ignore"); - if (!app_settings._violation_report.empty() && - !app_settings._violation_ignore.empty()) { + if (!app_settings._violation_report.empty() && !app_settings._violation_ignore.empty()) { if (log_level_at_least(settings::log_level::warning)) { cout_safe([&](auto& s) { s << "warning: Both `violation_report` and `violation_ignore` lists found\n"; @@ -247,9 +249,7 @@ void process_orc_configuration(const char* bin_path_string) { } if (log_level_at_least(settings::log_level::info)) { - cout_safe([&](auto& s) { - s << "info: ORC config file: " << config_path.string() << "\n"; - }); + cout_safe([&](auto& s) { s << "info: ORC config file: " << config_path.string() << "\n"; }); } } @@ -337,8 +337,7 @@ cmdline_results process_command_line(int argc, char** argv) { result._file_object_list.push_back(argv[i]); } - if (settings::instance()._dylib_scan_mode && - result._file_object_list.size() != 1 && + if (settings::instance()._dylib_scan_mode && result._file_object_list.size() != 1 && log_level_at_least(settings::log_level::warning)) { cout_safe([&](auto& s) { s << "warning: dylib scanning with more than one top-level artifact may yield false positives.\n"; @@ -452,13 +451,14 @@ auto epilogue(bool exception) { if (g._object_file_count == 0) { if (settings::instance()._output_file_mode == settings::output_file_mode::json) { - cout_safe([](auto& s){ - s << orc::version_json() << '\n'; - }); + cout_safe([](auto& s) { s << orc::version_json() << '\n'; }); } else { cout_safe([&](auto& s) { const auto local_build = ORC_VERSION_STR() == std::string("local"); - const std::string tag_url = local_build ? "" : std::string(" (https://github.com/adobe/orc/releases/tag/") + ORC_VERSION_STR() + ")"; + const std::string tag_url = + local_build ? "" : + std::string(" (https://github.com/adobe/orc/releases/tag/") + + ORC_VERSION_STR() + ")"; s << "ORC (https://github.com/adobe/orc)\n"; s << " version: " << ORC_VERSION_STR() << tag_url << '\n'; s << " sha: " << ORC_SHA_STR() << '\n'; diff --git a/src/orc.cpp b/src/orc.cpp index f957d76..e98886c 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -39,6 +39,7 @@ // application #include "orc/async.hpp" +#include "orc/coff.hpp" #include "orc/dwarf.hpp" #include "orc/features.hpp" #include "orc/macho.hpp" @@ -180,11 +181,32 @@ const char* problem_prefix() { return settings::instance()._graceful_exit ? "war //-------------------------------------------------------------------------------------------------- +dwarf dwarf_from_object_file(std::uint32_t ofd_index, reader_params params) { + const object_file_descriptor& descriptor = object_file_fetch(ofd_index); + + switch (descriptor._details._format) { + case file_details::format::macho: { + return dwarf_from_macho(ofd_index, std::move(params)); + } break; + case file_details::format::coff: { + return dwarf_from_coff(ofd_index, std::move(params)); + } break; + default: { + // If you get here, the object file format is either new and + // unaccounted for, or the format is a container type (ar, fat) + // and not a low-level variant where actual DWARF data is found. + throw std::runtime_error("dwarf_from_object_file: unknown / bad object file"); + } + } +} + +//-------------------------------------------------------------------------------------------------- + attribute_sequence fetch_attributes_for_die(const die& d) { // Too verbose for larger projects, but keep around for debugging/smaller projects. // ZoneScoped; - auto dwarf = dwarf_from_macho(d._ofd_index, macho_params{macho_reader_mode::odrv_reporting}); + auto dwarf = dwarf_from_object_file(d._ofd_index, reader_params{reader_mode::odrv_reporting}); auto [die, attributes] = dwarf.fetch_one_die(d._offset, d._cu_header_offset, d._cu_die_offset); ADOBE_INVARIANT(die._tag == d._tag); @@ -451,14 +473,15 @@ void parse_dsym(const std::filesystem::path& dsym) { // // For now, assume the symbol data is stored within file(s) inside the directory below, and // requires no additional data in order to grok it for the purpose of ODRV scanning. - for (const auto& entry : std::filesystem::directory_iterator(dsym / "Contents" / "Resources" / "DWARF")) { + for (const auto& entry : + std::filesystem::directory_iterator(dsym / "Contents" / "Resources" / "DWARF")) { const auto path = entry.path(); if (!is_regular_file(path)) continue; - orc::do_work([_input_path = std::move(path)]{ + orc::do_work([_input_path = std::move(path)] { freader input(_input_path); parse_file(_input_path.string(), object_ancestry(), input, input.size(), - macho_params{macho_reader_mode::register_dies}); + reader_params{reader_mode::register_dies}); }); } } @@ -502,7 +525,7 @@ std::vector orc_process(std::vector&& file_l freader input(_input_path); parse_file(_input_path.string(), object_ancestry(), input, input.size(), - macho_params{macho_reader_mode::register_dies}); + reader_params{reader_mode::register_dies}); } }); } @@ -585,7 +608,8 @@ void to_json(nlohmann::json& j, const odrv_report::conflict_details& c) { const auto& locations = c._locations; auto& instances = j["locations"]; for (const auto& location : sorted_keys(locations)) { - const std::string location_str = location.file.allocate_string() + ":" + std::to_string(location.loc); + const std::string location_str = + location.file.allocate_string() + ":" + std::to_string(location.loc); auto& location_json = instances[location_str]; for (const auto& ancestry : locations.at(location)) { auto* node = &location_json; @@ -627,7 +651,8 @@ void register_dies(dies die_vector) { // Erase the skippable dies and shrink the vector to fit, which will preserve only the necessary // dies in a vector whose memory consumption is exactly what's needed. - globals::instance()._die_skipped_count += std::erase_if(die_vector, std::mem_fn(&die::_skippable)); + globals::instance()._die_skipped_count += + std::erase_if(die_vector, std::mem_fn(&die::_skippable)); die_vector.shrink_to_fit(); // This is a list so the die vectors don't move about. The dies become pretty entangled as they @@ -685,12 +710,13 @@ std::string to_json(const std::vector& reports) { synopsis["object_files_scanned"] = g._object_file_count.load(); synopsis["dies_processed"] = g._die_processed_count.load(); synopsis["dies_skipped"] = g._die_skipped_count.load(); - synopsis["dies_skipped_pct"] = g._die_processed_count ? (g._die_skipped_count * 100. / g._die_processed_count) : 0; + synopsis["dies_skipped_pct"] = + g._die_processed_count ? (g._die_skipped_count * 100. / g._die_processed_count) : 0; synopsis["unique_symbols"] = g._unique_symbol_count.load(); - nlohmann::json result = nlohmann::json::object_t { - { "violations", std::move(violations) }, - { "synopsis", std::move(synopsis) }, + nlohmann::json result = nlohmann::json::object_t{ + {"violations", std::move(violations)}, + {"synopsis", std::move(synopsis)}, }; return result.dump(spaces_k); diff --git a/src/parse_file.cpp b/src/parse_file.cpp index 0b8912c..351d94f 100644 --- a/src/parse_file.cpp +++ b/src/parse_file.cpp @@ -21,8 +21,8 @@ #include // close // mach-o -#include #include +#include // application #include "orc/ar.hpp" @@ -166,7 +166,7 @@ void parse_file(std::string_view object_name, const object_ancestry& ancestry, freader& s, std::istream::pos_type end_pos, - macho_params params) { + reader_params params) { auto detection = detect_file(s); // append this object name to the ancestry diff --git a/src/str.cpp b/src/str.cpp index d05e25e..be06315 100644 --- a/src/str.cpp +++ b/src/str.cpp @@ -60,13 +60,18 @@ std::string format_size(std::size_t x, format_mode mode) { ++exponent; } - const char* label = [&]{ + const char* label = [&] { switch (exponent) { - case 0: return "bytes"; - case 1: return mode == format_mode::binary ? "KiB" : "KB"; - case 2: return mode == format_mode::binary ? "MiB" : "MB"; - case 3: return mode == format_mode::binary ? "GiB" : "GB"; - default: return mode == format_mode::binary ? "TiB" : "TB"; + case 0: + return "bytes"; + case 1: + return mode == format_mode::binary ? "KiB" : "KB"; + case 2: + return mode == format_mode::binary ? "MiB" : "MB"; + case 3: + return mode == format_mode::binary ? "GiB" : "GB"; + default: + return mode == format_mode::binary ? "TiB" : "TB"; } }(); @@ -92,9 +97,7 @@ std::string format_pct(float x) { //-------------------------------------------------------------------------------------------------- std::string toupper(std::string&& s) { - std::transform(s.begin(), s.end(), s.begin(), [](auto c){ - return std::toupper(c); - }); + std::transform(s.begin(), s.end(), s.begin(), [](auto c) { return std::toupper(c); }); return s; } diff --git a/src/string_pool.cpp b/src/string_pool.cpp index a6bd6fc..8e75172 100644 --- a/src/string_pool.cpp +++ b/src/string_pool.cpp @@ -216,7 +216,7 @@ pool_string empool(std::string_view src) { if (const char* c = find_key(h)) { #if ORC_FEATURE(PROFILE_EMPOOL) ZoneColor(tracy::Color::ColorType::Orange); // cache "half-hit" -#endif // ORC_FEATURE(PROFILE_EMPOOL) +#endif // ORC_FEATURE(PROFILE_EMPOOL) pool_string ps(c); assert(ps.view() == src); @@ -231,7 +231,7 @@ pool_string empool(std::string_view src) { #if ORC_FEATURE(PROFILE_EMPOOL) ZoneColor(tracy::Color::ColorType::Red); // cache miss -#endif // ORC_FEATURE(PROFILE_EMPOOL) +#endif // ORC_FEATURE(PROFILE_EMPOOL) return pool_string(ptr); } From 7e21bc544c36f0532ba7aaed771cf448da5b239c Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Mon, 6 Oct 2025 09:04:15 -0700 Subject: [PATCH 4/7] build break --- include/orc/parse_file.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index 9a4f915..e5aa0bf 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -244,6 +244,7 @@ constexpr std::decay_t copy(T&& value) noexcept(noexcept(std::decay_t{ //-------------------------------------------------------------------------------------------------- +enum class reader_mode { invalid, register_dies, derive_dylibs, From 49d007c6f299a612ab3a4bc30e533638fd0f4969 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Mon, 6 Oct 2025 09:08:08 -0700 Subject: [PATCH 5/7] better reporting --- src/coff.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coff.cpp b/src/coff.cpp index 330a362..b3e250d 100644 --- a/src/coff.cpp +++ b/src/coff.cpp @@ -16,6 +16,7 @@ // application #include "orc/dwarf.hpp" #include "orc/object_file_registry.hpp" +#include "orc/settings.hpp" // for globals //-------------------------------------------------------------------------------------------------- @@ -161,6 +162,10 @@ dwarf dwarf_from_coff(std::uint32_t ofd_index, reader_params params) { } } + if (params._mode == reader_mode::register_dies) { + ++globals::instance()._object_file_count; + } + return dwarf; } From 62a7e86beed9816e864f54df079a2f52843ea61a Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Mon, 6 Oct 2025 17:19:25 -0700 Subject: [PATCH 6/7] proper `debug_str_offs` reading --- src/dwarf.cpp | 123 +++++++++++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 51 deletions(-) diff --git a/src/dwarf.cpp b/src/dwarf.cpp index d7bb6b8..3143bf6 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -731,6 +731,7 @@ struct dwarf::implementation { std::size_t _cu_die_offset{ 0}; // offset of the `compile_unit` die. Relative to start of `debug_info` pool_string _cu_compilation_directory; + std::optional _cu_str_offsets_base; std::uint32_t _ofd_index{0}; // index to the obj_registry in macho.cpp section _debug_abbrev; section _debug_info; @@ -938,53 +939,30 @@ pool_string dwarf::implementation::read_debug_line_str(std::size_t offset) { //-------------------------------------------------------------------------------------------------- // SPECREF: DWARF5 page 26 (8) line 28 -- v4 -> v5 changes -pool_string dwarf::implementation::read_debug_str_offs(std::size_t entry) { - if (const auto found = _debug_str_offs_cache.find(entry); +pool_string dwarf::implementation::read_debug_str_offs(std::size_t index) { + if (const auto found = _debug_str_offs_cache.find(index); found != _debug_str_offs_cache.end()) { return found->second; } - // SPECREF: DWARF5 page 259 (241) line 6 -- - // Apparently `DW_AT_str_offsets_base` points to the first entry in this table, but I am not - // sure where that attribute lives. So we'll take the time to derive that offset every time. - // If that becomes too expensive we can revisit hunting down `DW_AT_str_offsets_base` and - // caching it. - - // This section contains a header, then a series of offsets stored as 4- or 8-byte - // values, then a series of strings. So we have to jump twice: first to get - // the offset, then to get the string. The 0th string immediately follows the last - // entry offset. - - const std::size_t entry_offset = temp_seek(_s, _debug_str_offsets._offset, [&] { - const std::size_t startoff = _s.tellg(); - // SPECREF: DWARF5 page 258 (240) line 9 -- string offsets table details - const std::uint64_t length = read_initial_length(); - const std::uint16_t version = read16(); - ADOBE_INVARIANT(version == 5); - const std::uint16_t padding = read16(); - ADOBE_INVARIANT(padding == 0); - const std::size_t endoff = _s.tellg(); - const std::size_t header_size = endoff - startoff; - - // length does not include itself. So the on-disk size taken - // up by the entry offsets is the length minus version and padding. - const std::size_t entry_offsets_size = length - 4; - - // At this point tellg() is at the 0th entry offset value. - // To get the entry offset value we are interested in, we - // temp seek to its location and read 4 bytes. (Note that - // all of this assumes 32-bit DWARF.) - std::size_t entry_offset = 4 * entry; - const std::uint32_t entry_offset_value = - temp_seek(_s, entry_offset, std::ios::cur, [&] { return read32(); }); - - // This result is relative to `_debug_str_offsets._offset`. - return header_size + entry_offsets_size + entry_offset_value; - }); + // It is possible for the compilation unit header itself to + // want to use debug_str_offsets _before_ + // `DW_AT_str_offsets_base` has been encountered. In + // such case we punt on the resolved value, and hope + // we don't actually need it while processing dies. + if (!_cu_str_offsets_base) { + static const auto no_resolution_k(empool("read_debug_str_offs_FIXME")); + return no_resolution_k; + } + + const auto base = _debug_str_offsets._offset + *_cu_str_offsets_base; + const auto offset = index * 4; // 8 on DWARF64? + + const auto debug_str_offset = temp_seek(_s, base + offset, [&] { return read32(); }); - return _debug_str_offs_cache[entry] = - temp_seek(_s, _debug_str_offsets._offset + entry_offset, - [&] { return empool(_s.read_c_string_view()); }); + // SPECREF: DWARF5 page 204 (186) line 23 -- these are offsets into + // the `.debug_str` section + return read_debug_str(debug_str_offset); } //-------------------------------------------------------------------------------------------------- @@ -1720,27 +1698,37 @@ attribute_value dwarf::implementation::process_form(const attribute& attr, case dw::form::strx: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read_uleb())); + // (cache the uint value for possible use later.) + result.uint(read_uleb()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx1: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read8())); + // (cache the uint value for possible use later.) + result.uint(read8()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx2: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read16())); + // (cache the uint value for possible use later.) + result.uint(read16()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx3: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read24())); + // (cache the uint value for possible use later.) + result.uint(read24()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx4: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read32())); + // (cache the uint value for possible use later.) + result.uint(read32()); + result.string(read_debug_str_offs(result.uint())); } break; default: { handle_passover(); @@ -2308,6 +2296,18 @@ void dwarf::implementation::post_process_compilation_unit_die( const die& die, const attribute_sequence& attributes) { _cu_die_offset = die._offset; + // SPECREF DWARF5 84 (66) line 1 -- + // The compilation unit header may (should?) have `DW_AT_str_offsets_base`. + // This is used to figure out where strings are coming from out of the + // debug_str_offsets section. Save this for later. + // + // SPECREF DWARF5 237 (219) line 18 -- + // This value is apparently of type `stroffsetsptr` which is a + // 4- or 8-byte unsigned value. + if (attributes.has(dw::at::str_offsets_base)) { + _cu_str_offsets_base = attributes.uint(dw::at::str_offsets_base); + } + // Spec (section 3.1.1) says that compilation and partial units may specify which // __debug_line subsection they want to draw their decl_files list from. This also // means we need to clear our current decl_files list (from index 1 to the end) @@ -2324,16 +2324,23 @@ void dwarf::implementation::post_process_compilation_unit_die( } // Grab the comp_dir value here, and apply it to relative paths so we can - // display the full path whenever necessary. - if (attributes.has_string(dw::at::comp_dir)) { - _cu_compilation_directory = attributes.string(dw::at::comp_dir); + // display the full path whenever necessary. We don't read the string + // directly, as the resolution of the string may have happened before + // `DW_AT_str_offsets_base` was found, and thus would be an invalid + // value. For this value specifically, then, we re-grab the string based + // on the offset. + if (attributes.has_uint(dw::at::comp_dir)) { + _cu_compilation_directory = read_debug_str_offs(attributes.uint(dw::at::comp_dir)); } // REVISIT (fosterbrereton): If the name is a relative path, there may be a // DW_AT_comp_dir attribute that specifies the path it is relative from. // Is it worth making this path absolute? + // + // (This string suffers from the same `DW_AT_str_offsets_base` issue as + // comp_dir, hence the call to `read_debug_str_offs`.) - _decl_files[0] = attributes.string(dw::at::name); + _decl_files[0] = read_debug_str_offs(attributes.uint(dw::at::name)); } //-------------------------------------------------------------------------------------------------- @@ -2397,6 +2404,20 @@ void dwarf::implementation::post_process_die_attributes(die& die, attribute_sequ // a little bit of identifier/path housekeeping. update_die_identifier_and_path(die, attributes); } + + // COFF: COFF restricts the name of a symbol to be 8 characters. Longer symbol + // names are made by reference e.g., ("\214") and denote a byte offset into the + // COFF string table. + if (_details._format == file_details::format::coff) { + if (attributes.has(dw::at::name)) { + auto name = attributes.string(dw::at::name); + if (name.size() && (name.view()[0] == '\\')) { + // look up the string table name at the offset. + int x(42); + (void)x; + } + } + } } //-------------------------------------------------------------------------------------------------- From 1674f7416a7b2fea6b0fa1550e0a832053f93ee4 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Mon, 6 Oct 2025 19:01:37 -0700 Subject: [PATCH 7/7] removing dead code --- src/dwarf.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 3143bf6..fd1f138 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -2404,20 +2404,6 @@ void dwarf::implementation::post_process_die_attributes(die& die, attribute_sequ // a little bit of identifier/path housekeeping. update_die_identifier_and_path(die, attributes); } - - // COFF: COFF restricts the name of a symbol to be 8 characters. Longer symbol - // names are made by reference e.g., ("\214") and denote a byte offset into the - // COFF string table. - if (_details._format == file_details::format::coff) { - if (attributes.has(dw::at::name)) { - auto name = attributes.string(dw::at::name); - if (name.size() && (name.view()[0] == '\\')) { - // look up the string table name at the offset. - int x(42); - (void)x; - } - } - } } //--------------------------------------------------------------------------------------------------