From aa146013a1d8c2d9f567ec23f12927f5d838780f Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Wed, 15 Nov 2023 08:34:44 -0800 Subject: Speed-up absl::Symbolize by ~6x via faster file reads. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit absl::Symbolize does tons of tiny reads. Speed this up by switching from lseek+read to a pread, and by reading more data than requested into a buffer. A faster absl::Symbolize will be helpful in tests and when printing stack traces on /threadz etc. Results for absl::Symbolize benchmark that exercises uncached behavior of absl::Symbolize: ``` name old time/op new time/op delta BM_Symbolize 16.4ms ±12% 2.6ms ± 0% -84.06% (p=0.001 n=5+9) ``` PiperOrigin-RevId: 582687566 Change-Id: I44caf189d81867f3fd8c050a3100a4b9a8e744d7 --- absl/debugging/symbolize_elf.inc | 152 +++++++++++++++++++++++++++------------ 1 file changed, 108 insertions(+), 44 deletions(-) (limited to 'absl/debugging/symbolize_elf.inc') diff --git a/absl/debugging/symbolize_elf.inc b/absl/debugging/symbolize_elf.inc index 30638cb2..ae75cd41 100644 --- a/absl/debugging/symbolize_elf.inc +++ b/absl/debugging/symbolize_elf.inc @@ -289,6 +289,30 @@ ObjFile *AddrMap::Add() { return new (&obj_[size_++]) ObjFile; } +class CachingFile { + public: + // Setup reader for fd that uses buf[0, buf_size-1] as a cache. + CachingFile(int fd, char *buf, size_t buf_size) + : fd_(fd), + cache_(buf), + cache_size_(buf_size), + cache_start_(0), + cache_limit_(0) {} + + int fd() const { return fd_; } + ssize_t ReadFromOffset(void *buf, size_t count, off_t offset); + bool ReadFromOffsetExact(void *buf, size_t count, off_t offset); + + private: + // Bytes [cache_start_, cache_limit_-1] from fd_ are stored in + // a prefix of cache_[0, cache_size_-1]. + int fd_; + char *cache_; + size_t cache_size_; + off_t cache_start_; + off_t cache_limit_; +}; + // --------------------------------------------------------------- enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND }; @@ -330,6 +354,7 @@ class Symbolizer { SYMBOL_BUF_SIZE = 3072, TMP_BUF_SIZE = 1024, SYMBOL_CACHE_LINES = 128, + FILE_CACHE_SIZE = 8192, }; AddrMap addr_map_; @@ -338,6 +363,7 @@ class Symbolizer { bool addr_map_read_; char symbol_buf_[SYMBOL_BUF_SIZE]; + char file_cache_[FILE_CACHE_SIZE]; // tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym) // so we ensure that tmp_buf_ is properly aligned to store either. @@ -436,34 +462,58 @@ static ssize_t ReadPersistent(int fd, void *buf, size_t count) { return static_cast(num_bytes); } -// Read up to "count" bytes from "offset" in the file pointed by file -// descriptor "fd" into the buffer starting at "buf". On success, -// return the number of bytes read. Otherwise, return -1. -static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count, - const off_t offset) { - off_t off = lseek(fd, offset, SEEK_SET); - if (off == (off_t)-1) { - ABSL_RAW_LOG(WARNING, "lseek(%d, %jd, SEEK_SET) failed: errno=%d", fd, - static_cast(offset), errno); - return -1; +// Read up to "count" bytes from "offset" into the buffer starting at "buf", +// while handling short reads and EINTR. On success, return the number of bytes +// read. Otherwise, return -1. +ssize_t CachingFile::ReadFromOffset(void *buf, size_t count, off_t offset) { + char *dst = static_cast(buf); + size_t read = 0; + while (read < count) { + // Look in cache first. + if (offset >= cache_start_ && offset < cache_limit_) { + const char *hit_start = &cache_[offset - cache_start_]; + const size_t n = + std::min(count - read, static_cast(cache_limit_ - offset)); + memcpy(dst, hit_start, n); + dst += n; + read += static_cast(n); + offset += static_cast(n); + continue; + } + + cache_start_ = 0; + cache_limit_ = 0; + ssize_t n = pread(fd_, cache_, cache_size_, offset); + if (n < 0) { + if (errno == EINTR) { + continue; + } + ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno); + return -1; + } + if (n == 0) { // Reached EOF. + break; + } + + cache_start_ = offset; + cache_limit_ = offset + static_cast(n); + // Next iteration will copy from cache into dst. } - return ReadPersistent(fd, buf, count); + return static_cast(read); } -// Try reading exactly "count" bytes from "offset" bytes in a file -// pointed by "fd" into the buffer starting at "buf" while handling -// short reads and EINTR. On success, return true. Otherwise, return -// false. -static bool ReadFromOffsetExact(const int fd, void *buf, const size_t count, - const off_t offset) { - ssize_t len = ReadFromOffset(fd, buf, count, offset); +// Try reading exactly "count" bytes from "offset" bytes into the buffer +// starting at "buf" while handling short reads and EINTR. On success, return +// true. Otherwise, return false. +bool CachingFile::ReadFromOffsetExact(void *buf, size_t count, off_t offset) { + ssize_t len = ReadFromOffset(buf, count, offset); return len >= 0 && static_cast(len) == count; } // Returns elf_header.e_type if the file pointed by fd is an ELF binary. -static int FileGetElfType(const int fd) { +static int FileGetElfType(CachingFile *file) { ElfW(Ehdr) elf_header; - if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + if (!file->ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) { return -1; } if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { @@ -478,8 +528,8 @@ static int FileGetElfType(const int fd) { // To keep stack consumption low, we would like this function to not get // inlined. static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( - const int fd, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type, - ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { + CachingFile *file, ElfW(Half) sh_num, const off_t sh_offset, + ElfW(Word) type, ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { ElfW(Shdr) *buf = reinterpret_cast(tmp_buf); const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); const size_t buf_bytes = buf_entries * sizeof(buf[0]); @@ -490,7 +540,7 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( const size_t num_bytes_to_read = (buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes; const off_t offset = sh_offset + static_cast(i * sizeof(buf[0])); - const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, offset); + const ssize_t len = file->ReadFromOffset(buf, num_bytes_to_read, offset); if (len < 0) { ABSL_RAW_LOG( WARNING, @@ -524,11 +574,17 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( // but there has (as yet) been no need for anything longer either. const int kMaxSectionNameLen = 64; +// Small cache to use for miscellaneous file reads. +const int kSmallFileCacheSize = 100; + bool ForEachSection(int fd, const std::function &callback) { + char buf[kSmallFileCacheSize]; + CachingFile file(fd, buf, sizeof(buf)); + ElfW(Ehdr) elf_header; - if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) { return false; } @@ -540,7 +596,7 @@ bool ForEachSection(int fd, ElfW(Shdr) shstrtab; off_t shstrtab_offset = static_cast(elf_header.e_shoff) + elf_header.e_shentsize * elf_header.e_shstrndx; - if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { + if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) { return false; } @@ -548,13 +604,13 @@ bool ForEachSection(int fd, ElfW(Shdr) out; off_t section_header_offset = static_cast(elf_header.e_shoff) + elf_header.e_shentsize * i; - if (!ReadFromOffsetExact(fd, &out, sizeof(out), section_header_offset)) { + if (!file.ReadFromOffsetExact(&out, sizeof(out), section_header_offset)) { return false; } off_t name_offset = static_cast(shstrtab.sh_offset) + out.sh_name; char header_name[kMaxSectionNameLen]; ssize_t n_read = - ReadFromOffset(fd, &header_name, kMaxSectionNameLen, name_offset); + file.ReadFromOffset(&header_name, kMaxSectionNameLen, name_offset); if (n_read < 0) { return false; } else if (n_read > kMaxSectionNameLen) { @@ -584,8 +640,10 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, return false; } + char buf[kSmallFileCacheSize]; + CachingFile file(fd, buf, sizeof(buf)); ElfW(Ehdr) elf_header; - if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) { return false; } @@ -597,18 +655,18 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, ElfW(Shdr) shstrtab; off_t shstrtab_offset = static_cast(elf_header.e_shoff) + elf_header.e_shentsize * elf_header.e_shstrndx; - if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { + if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) { return false; } for (int i = 0; i < elf_header.e_shnum; ++i) { off_t section_header_offset = static_cast(elf_header.e_shoff) + elf_header.e_shentsize * i; - if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { + if (!file.ReadFromOffsetExact(out, sizeof(*out), section_header_offset)) { return false; } off_t name_offset = static_cast(shstrtab.sh_offset) + out->sh_name; - ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); + ssize_t n_read = file.ReadFromOffset(&header_name, name_len, name_offset); if (n_read < 0) { return false; } else if (static_cast(n_read) != name_len) { @@ -683,7 +741,7 @@ static const char *ComputeOffset(const char *base, ptrdiff_t offset) { // To keep stack consumption low, we would like this function to not get // inlined. static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( - const void *const pc, const int fd, char *out, size_t out_size, + const void *const pc, CachingFile *file, char *out, size_t out_size, ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab, const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) { if (symtab == nullptr) { @@ -716,7 +774,7 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( const size_t entries_in_chunk = std::min(num_remaining_symbols, buf_entries); const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]); - const ssize_t len = ReadFromOffset(fd, buf, bytes_in_chunk, offset); + const ssize_t len = file->ReadFromOffset(buf, bytes_in_chunk, offset); SAFE_ASSERT(len >= 0); SAFE_ASSERT(static_cast(len) % sizeof(buf[0]) == 0); const size_t num_symbols_in_buf = static_cast(len) / sizeof(buf[0]); @@ -772,12 +830,12 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( if (found_match) { const off_t off = static_cast(strtab->sh_offset) + best_match.st_name; - const ssize_t n_read = ReadFromOffset(fd, out, out_size, off); + const ssize_t n_read = file->ReadFromOffset(out, out_size, off); if (n_read <= 0) { // This should never happen. ABSL_RAW_LOG(WARNING, - "Unable to read from fd %d at offset %lld: n_read = %zd", fd, - static_cast(off), n_read); + "Unable to read from fd %d at offset %lld: n_read = %zd", + file->fd(), static_cast(off), n_read); return SYMBOL_NOT_FOUND; } ABSL_RAW_CHECK(static_cast(n_read) <= out_size, @@ -827,22 +885,24 @@ FindSymbolResult Symbolizer::GetSymbolFromObjectFile( } } + CachingFile file(obj.fd, file_cache_, sizeof(file_cache_)); + // Consult a regular symbol table, then fall back to the dynamic symbol table. for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) { - if (!GetSectionHeaderByType(obj.fd, obj.elf_header.e_shnum, + if (!GetSectionHeaderByType(&file, obj.elf_header.e_shnum, static_cast(obj.elf_header.e_shoff), static_cast(symbol_table_type), &symtab, tmp_buf, tmp_buf_size)) { continue; } - if (!ReadFromOffsetExact( - obj.fd, &strtab, sizeof(strtab), + if (!file.ReadFromOffsetExact( + &strtab, sizeof(strtab), static_cast(obj.elf_header.e_shoff + symtab.sh_link * sizeof(symtab)))) { continue; } const FindSymbolResult rc = - FindSymbol(pc, obj.fd, out, out_size, relocation, &strtab, &symtab, + FindSymbol(pc, &file, out, out_size, relocation, &strtab, &symtab, opd_ptr, tmp_buf, tmp_buf_size); if (rc != SYMBOL_NOT_FOUND) { return rc; @@ -1323,15 +1383,19 @@ static bool MaybeInitializeObjFile(ObjFile *obj) { ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno); return false; } - obj->elf_type = FileGetElfType(obj->fd); + + char buf[kSmallFileCacheSize]; + CachingFile file(obj->fd, buf, sizeof(buf)); + + obj->elf_type = FileGetElfType(&file); if (obj->elf_type < 0) { ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename, obj->elf_type); return false; } - if (!ReadFromOffsetExact(obj->fd, &obj->elf_header, sizeof(obj->elf_header), - 0)) { + if (!file.ReadFromOffsetExact(&obj->elf_header, sizeof(obj->elf_header), + 0)) { ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename); return false; } @@ -1341,7 +1405,7 @@ static bool MaybeInitializeObjFile(ObjFile *obj) { size_t num_interesting_load_segments = 0; for (int j = 0; j < phnum; j++) { ElfW(Phdr) phdr; - if (!ReadFromOffsetExact(obj->fd, &phdr, sizeof(phdr), phoff)) { + if (!file.ReadFromOffsetExact(&phdr, sizeof(phdr), phoff)) { ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d", obj->filename, j); return false; -- cgit v1.2.3