[PATCH 2/2] covoar: Store address-to-line info outside of DWARF

Chris Johns chrisj at rtems.org
Wed May 26 00:24:23 UTC 2021



On 1/5/21 8:01 am, Alex White wrote:
> This adds the AddressToLineMapper class and supporting classes to
> assume responsibility of tracking address-to-line information.
> 
> This allows the DWARF library to properly cleanup all of its resources
> and leads to significant memory savings.
> 
> Closes #4383
> ---
>  rtemstoolkit/rld-dwarf.cpp           |   5 +
>  rtemstoolkit/rld-dwarf.h             |   5 +
>  tester/covoar/AddressToLineMapper.cc | 105 +++++++++++++++
>  tester/covoar/AddressToLineMapper.h  | 193 +++++++++++++++++++++++++++
>  tester/covoar/ExecutableInfo.cc      |  73 +++++-----
>  tester/covoar/ExecutableInfo.h       |  11 +-
>  tester/covoar/wscript                |   1 +
>  7 files changed, 351 insertions(+), 42 deletions(-)
>  create mode 100644 tester/covoar/AddressToLineMapper.cc
>  create mode 100644 tester/covoar/AddressToLineMapper.h
> 
> diff --git a/rtemstoolkit/rld-dwarf.cpp b/rtemstoolkit/rld-dwarf.cpp
> index 2fce0e4..1eae50c 100644
> --- a/rtemstoolkit/rld-dwarf.cpp
> +++ b/rtemstoolkit/rld-dwarf.cpp
> @@ -1893,6 +1893,11 @@ namespace rld
>        return false;
>      }
>  
> +    const addresses& compilation_unit::get_addresses () const
> +    {
> +      return addr_lines_;
> +    }
> +
>      functions&
>      compilation_unit::get_functions ()
>      {
> diff --git a/rtemstoolkit/rld-dwarf.h b/rtemstoolkit/rld-dwarf.h
> index 1210813..eadb50d 100644
> --- a/rtemstoolkit/rld-dwarf.h
> +++ b/rtemstoolkit/rld-dwarf.h
> @@ -707,6 +707,11 @@ namespace rld
>         */
>        unsigned int pc_high () const;
>  
> +      /**
> +       * The addresses associated with this compilation unit.
> +       */
> +      const addresses& get_addresses () const;
> +
>        /**
>         * Get the source and line for an address. If the address does not match
>         * false is returned the file is set to 'unknown' and the line is set to
> diff --git a/tester/covoar/AddressToLineMapper.cc b/tester/covoar/AddressToLineMapper.cc
> new file mode 100644
> index 0000000..1062fd7
> --- /dev/null
> +++ b/tester/covoar/AddressToLineMapper.cc
> @@ -0,0 +1,105 @@
> +/*! @file AddressToLineMapper.cc
> + *  @brief AddressToLineMapper Implementation
> + *
> + *  This file contains the implementation of the functionality
> + *  of the AddressToLineMapper class.
> + */
> +
> +#include "AddressToLineMapper.h"
> +
> +namespace Coverage {
> +
> +  uint64_t SourceLine::location() const
> +  {
> +    return address;
> +  }
> +
> +  bool SourceLine::is_an_end_sequence() const
> +  {
> +    return is_end_sequence;
> +  }
> +
> +  std::string SourceLine::path() const
> +  {
> +    return *sourceIterator;
> +  }
> +
> +  uint64_t SourceLine::line() const
> +  {
> +    return line_num;
> +  }
> +
> +  void AddressLineRange::addSourceLine(const rld::dwarf::address& address)
> +  {
> +    auto insertResult = sourcePaths.insert(address.path());
> +
> +    sourceLines.emplace_back(
> +      SourceLine (
> +        address.location(),
> +        insertResult.first,
> +        address.line(),
> +        address.is_an_end_sequence()
> +      )
> +    );
> +  }
> +
> +  const SourceLine* AddressLineRange::getSourceLine(uint32_t address) const
> +  {
> +    if (address < lowAddress || address > highAddress) {
> +      return nullptr;
> +    }
> +
> +    const SourceLine* last_line = nullptr;
> +    for (const auto &line : sourceLines) {
> +      if (address <= line.location())
> +      {
> +        if (address == line.location())
> +          last_line = &line;
> +        break;
> +      }
> +      last_line = &line;
> +    }
> +
> +    return last_line;
> +  }
> +
> +  void AddressToLineMapper::getSource(
> +    uint32_t address,
> +    std::string& sourceFile,
> +    int& sourceLine
> +  ) const {
> +    sourceFile = "unknown";
> +    sourceLine = -1;
> +
> +    const SourceLine* match = nullptr;
> +    for (const auto &range : addressLineRanges) {
> +      const SourceLine* potential_match = range.getSourceLine(address);
> +
> +      if (potential_match != nullptr) {
> +        if (match == nullptr) {
> +          match = potential_match;
> +        } else if (match->is_an_end_sequence() || !potential_match->is_an_end_sequence()) {
> +          match = potential_match;
> +        }
> +      }
> +    }
> +
> +    if (match != nullptr) {
> +      sourceFile = match->path();
> +      sourceLine = match->line();
> +    }
> +  }
> +
> +  AddressLineRange& AddressToLineMapper::makeRange(
> +    uint32_t low,
> +    uint32_t high
> +  )
> +  {
> +    addressLineRanges.emplace_back(
> +      AddressLineRange(low, high)
> +    );
> +
> +    return addressLineRanges.back();
> +  }
> +
> +}
> diff --git a/tester/covoar/AddressToLineMapper.h b/tester/covoar/AddressToLineMapper.h
> new file mode 100644
> index 0000000..ee0a633
> --- /dev/null
> +++ b/tester/covoar/AddressToLineMapper.h
> @@ -0,0 +1,193 @@
> +/*! @file AddressToLineMapper.h
> + *  @brief AddressToLineMapper Specification
> + *
> + *  This file contains the specification of the AddressToLineMapper class.
> + */
> +
> +#ifndef __ADDRESS_TO_LINE_MAPPER_H__
> +#define __ADDRESS_TO_LINE_MAPPER_H__
> +
> +#include <cstdint>
> +#include <set>
> +#include <string>
> +#include <vector>
> +
> +#include <rld-dwarf.h>
> +
> +namespace Coverage {
> +
> +  /*! @class SourceLine
> +   *
> +   *  This class stores source information for a specific address.
> +   */
> +  class SourceLine {
> +
> +  public:
> +
> +    SourceLine(
> +      uint64_t addr,> +      std::set<std::string>::const_iterator src,

Why not just pass in a reference to the object the iterator references? You do
not touch the iterator which is a good thing because the iterator held in the
object is a copy of a copy of the outside iterator.

> +      int64_t ln,
> +      bool end_sequence
> +    ) : address(addr),
> +        sourceIterator(src),
> +        line_num(ln),
> +        is_end_sequence(end_sequence)
> +    {
> +    }
> +
> +    /*!
> +     *  This method gets the address of this source information.
> +     *
> +     *  @return Returns the address of this source information
> +     */
> +    uint64_t location() const;
> +
> +    /*!
> +     *  This method gets a value indicating whether this address represents an 
> +     *  end sequence.
> +     *
> +     *  @return Returns whether this address represents an end sequence or not
> +     */
> +    bool is_an_end_sequence() const;
> +
> +    /*!
> +     *  This method gets the source file path of this address.
> +     *
> +     *  @return Returns the source file path of this address
> +     */
> +    std::string path() const;
> +
> +    /*!
> +     *  This method gets the source line number of this address.
> +     *
> +     *  @return Returns the source line number of this address
> +     */
> +    uint64_t line() const;

Why not `unsigned int` or `size_t`?

> +
> +  private:
> +
> +    /*!
> +     *  The address of this source information.
> +     */
> +    uint64_t address;
> +
> +    /*!
> +     *  An iterator pointing to the location in the set that contains the
> +     *  source file path of the address.
> +     */
> +    std::set<std::string>::const_iterator sourceIterator;

Would `const std::string& path_;` work ?

> +
> +    /*!
> +     *  The source line number of the address.
> +     */
> +    int64_t line_num;
> +
> +    /*!
> +     *  Whether the address represents an end sequence or not.
> +     */
> +    bool is_end_sequence;
> +
> +  };
> +
> +  /*! @class AddressLineRange
> +   *
> +   *  This class stores source information for an address range.
> +   */
> +  class AddressLineRange {
> +
> +  public:
> +
> +    AddressLineRange(
> +      uint32_t low,
> +      uint32_t high
> +    ) : lowAddress(low), highAddress(high)
> +    {
> +    }
> +
> +    /*!
> +     *  This method adds source and line information for a specified address.
> +     *
> +     *  @param[in] address specifies the DWARF address information
> +     */
> +    void addSourceLine(const rld::dwarf::address& address);
> +
> +    /*!
> +     *  This method gets the source file name and line number for a given 
> +     *  address.
> +     *
> +     *  @param[in] address specifies the address to look up
> +     * 
> +     *  @return Returns the source information for the specified address or
> +     *          nullptr if not found.
> +     */
> +    const SourceLine* getSourceLine(uint32_t address) const;

Why a pointer? Returning `nullptr` is a potential source of bugs where the
pointer is not checked. All you have done is encoded an address and a state in
the one variable, something that is done in C.

An `operator[](uint32_t address)` could be used here that throws an exception on
an invalid address. You just need to catch the exception in
`AddressToLineMapper::getSource()` and make `match` be in instance. Just add a
default constructor to SourceLine with the path set to "uknown" and line to
"-1". This however means you need to make the change to not use an iterator as a
reference. It seems the choice to use the iterator in the class has cascaded out
into other areas.

> +
> +  private:
> +
> +    /*!
> +     *  The low address for this range.
> +     */
> +    uint32_t lowAddress;
> +
> +    /*!
> +     *  The high address for this range.
> +     */
> +    uint32_t highAddress;
> +
> +    /*!
> +     *  The source information for addresses in this range.
> +     */
> +    std::vector<SourceLine> sourceLines;
> +
> +    /*!
> +     *  The set of source file names for this range.
> +     */
> +    std::set<std::string> sourcePaths;

I always add a local class/struct typedef for containers.

> +
> +  };
> +
> +  /*! @class AddressToLineMapper
> +   *
> +   *  This class provides address-to-line capabilities.
> +   */
> +  class AddressToLineMapper {
> +
> +  public:
> +
> +    /*!
> +     *  This method gets the source file name and line number for a given 
> +     *  address.
> +     *
> +     *  @param[in] address specifies the address to look up
> +     *  @param[out] sourceFile specifies the name of the source file
> +     *  @param[out] sourceLine specifies the line number in the source file
> +     */
> +    void getSource(
> +      uint32_t address,
> +      std::string& sourceFile,
> +      int& sourceLine
> +    ) const;
> +
> +    /*!
> +     *  This method creates a new range with the specified addresses.
> +     *
> +     *  @param[in] low specifies the low address of the range
> +     *  @param[in] high specifies the high address of the range
> +     * 
> +     *  @return Returns a reference to the newly created range
> +     */
> +    AddressLineRange& makeRange(uint32_t low, uint32_t high);
> +
> +  private:
> +
> +    /*!
> +     *  The address and line information ranges.
> +     */
> +    std::vector<AddressLineRange> addressLineRanges;

Why is this private? :)

Chris

> +
> +  };
> +
> +}
> +
> +#endif
> diff --git a/tester/covoar/ExecutableInfo.cc b/tester/covoar/ExecutableInfo.cc
> index 861e60d..a6184e7 100644
> --- a/tester/covoar/ExecutableInfo.cc
> +++ b/tester/covoar/ExecutableInfo.cc
> @@ -40,61 +40,60 @@ namespace Coverage {
>      executable.begin();
>      executable.load_symbols(symbols);
>  
> +    rld::dwarf::file debug;
> +
>      debug.begin(executable.elf());
>      debug.load_debug();
>      debug.load_functions();
>  
> -    try {
> -      for (auto& cu : debug.get_cus()) {
> -        for (auto& func : cu.get_functions()) {
> -          if (!func.has_machine_code()) {
> -            continue;
> -          }
> +    for (auto& cu : debug.get_cus()) {
> +      AddressLineRange& range = mapper.makeRange(cu.pc_low(), cu.pc_high());
> +      // Does not filter on desired symbols under the assumption that the test
> +      // code and any support code is small relative to what is being tested.
> +      for (const auto &address : cu.get_addresses()) {
> +        range.addSourceLine(address);
> +      }
>  
> -          if (!SymbolsToAnalyze->isDesired(func.name())) {
> -            continue;
> +      for (auto& func : cu.get_functions()) {
> +        if (!func.has_machine_code()) {
> +          continue;
> +        }
> +
> +        if (!SymbolsToAnalyze->isDesired(func.name())) {
> +          continue;
> +        }
> +
> +        if (func.is_inlined()) {
> +          if (func.is_external()) {
> +            // Flag it
> +            std::cerr << "Function is both external and inlined: "
> +                      << func.name() << std::endl;
>            }
>  
> -          if (func.is_inlined()) {
> -            if (func.is_external()) {
> -              // Flag it
> -              std::cerr << "Function is both external and inlined: "
> -                        << func.name() << std::endl;
> -            }
> -
> -            if (func.has_entry_pc()) {
> -              continue;
> -            }
> -
> -            // If the low PC address is zero, the symbol does not appear in
> -            // this executable.
> -            if (func.pc_low() == 0) {
> -              continue;
> -            }
> +          if (func.has_entry_pc()) {
> +            continue;
>            }
>  
> -          // We can't process a zero size function.
> -          if (func.pc_high() == 0) {
> +          // If the low PC address is zero, the symbol does not appear in
> +          // this executable.
> +          if (func.pc_low() == 0) {
>              continue;
>            }
> +        }
>  
> -          createCoverageMap (cu.name(), func.name(),
> -                              func.pc_low(), func.pc_high() - 1);
> +        // We can't process a zero size function.
> +        if (func.pc_high() == 0) {
> +          continue;
>          }
> +
> +        createCoverageMap (cu.name(), func.name(),
> +                            func.pc_low(), func.pc_high() - 1);
>        }
> -    } catch (...) {
> -      debug.end();
> -      throw;
>      }
> -
> -    // Can't cleanup handles until the destructor because the information is
> -    // referenced elsewhere. NOTE: This could cause problems from too many open
> -    // file descriptors.
>    }
>  
>    ExecutableInfo::~ExecutableInfo()
>    {
> -    debug.end();
>    }
>  
>    void ExecutableInfo::dumpCoverageMaps( void ) {
> @@ -197,7 +196,7 @@ namespace Coverage {
>    {
>      std::string file;
>      int         lno;
> -    debug.get_source (address, file, lno);
> +    mapper.getSource (address, file, lno);
>      std::ostringstream ss;
>      ss << file << ':' << lno;
>      line = ss.str ();
> diff --git a/tester/covoar/ExecutableInfo.h b/tester/covoar/ExecutableInfo.h
> index 5d5a595..dfc71aa 100644
> --- a/tester/covoar/ExecutableInfo.h
> +++ b/tester/covoar/ExecutableInfo.h
> @@ -15,6 +15,7 @@
>  #include <rld-files.h>
>  #include <rld-symbols.h>
>  
> +#include "AddressToLineMapper.h"
>  #include "CoverageMapBase.h"
>  #include "SymbolTable.h"
>  
> @@ -157,11 +158,6 @@ namespace Coverage {
>        uint32_t           highAddress
>      );
>  
> -    /*!
> -     *  The DWARF data to the ELF executable.
> -     */
> -    rld::dwarf::file debug;
> -
>      /*!
>       *  The executable's file name.
>       */
> @@ -172,6 +168,11 @@ namespace Coverage {
>       */
>      rld::symbols::table symbols;
>  
> +    /*!
> +     *  The address-to-line mapper for this executable.
> +     */
> +    AddressToLineMapper mapper;
> +
>      /*!
>       *  This map associates a symbol with its coverage map.
>       */
> diff --git a/tester/covoar/wscript b/tester/covoar/wscript
> index 82599b0..a928b1b 100644
> --- a/tester/covoar/wscript
> +++ b/tester/covoar/wscript
> @@ -83,6 +83,7 @@ def build(bld):
>  
>      bld.stlib(target = 'ccovoar',
>                source = ['app_common.cc',
> +                        'AddressToLineMapper.cc',
>                          'CoverageFactory.cc',
>                          'CoverageMap.cc',
>                          'CoverageMapBase.cc',
> 


More information about the devel mailing list