summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-11-15 08:34:44 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2023-11-15 08:35:20 -0800
commitaa146013a1d8c2d9f567ec23f12927f5d838780f (patch)
tree3d2f10ece8031e3640da3d0a723074bd45231122
parent61ea5d253f152b3be61fcd836b9eaeda669a56a7 (diff)
Speed-up absl::Symbolize by ~6x via faster file reads.
absl::Symbolize does tons of tiny reads. Speed this up by switching from lseek+read to a pread, and by reading more data than requested into a buffer. A faster absl::Symbolize will be helpful in tests and when printing stack traces on /threadz etc. Results for absl::Symbolize benchmark that exercises uncached behavior of absl::Symbolize: ``` name old time/op new time/op delta BM_Symbolize 16.4ms ±12% 2.6ms ± 0% -84.06% (p=0.001 n=5+9) ``` PiperOrigin-RevId: 582687566 Change-Id: I44caf189d81867f3fd8c050a3100a4b9a8e744d7
-rw-r--r--absl/debugging/symbolize_elf.inc152
1 files changed, 108 insertions, 44 deletions
diff --git a/absl/debugging/symbolize_elf.inc b/absl/debugging/symbolize_elf.inc
index 30638cb2..ae75cd41 100644
--- a/absl/debugging/symbolize_elf.inc
+++ b/absl/debugging/symbolize_elf.inc
@@ -289,6 +289,30 @@ ObjFile *AddrMap::Add() {
return new (&obj_[size_++]) ObjFile;
}
+class CachingFile {
+ public:
+ // Setup reader for fd that uses buf[0, buf_size-1] as a cache.
+ CachingFile(int fd, char *buf, size_t buf_size)
+ : fd_(fd),
+ cache_(buf),
+ cache_size_(buf_size),
+ cache_start_(0),
+ cache_limit_(0) {}
+
+ int fd() const { return fd_; }
+ ssize_t ReadFromOffset(void *buf, size_t count, off_t offset);
+ bool ReadFromOffsetExact(void *buf, size_t count, off_t offset);
+
+ private:
+ // Bytes [cache_start_, cache_limit_-1] from fd_ are stored in
+ // a prefix of cache_[0, cache_size_-1].
+ int fd_;
+ char *cache_;
+ size_t cache_size_;
+ off_t cache_start_;
+ off_t cache_limit_;
+};
+
// ---------------------------------------------------------------
enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND };
@@ -330,6 +354,7 @@ class Symbolizer {
SYMBOL_BUF_SIZE = 3072,
TMP_BUF_SIZE = 1024,
SYMBOL_CACHE_LINES = 128,
+ FILE_CACHE_SIZE = 8192,
};
AddrMap addr_map_;
@@ -338,6 +363,7 @@ class Symbolizer {
bool addr_map_read_;
char symbol_buf_[SYMBOL_BUF_SIZE];
+ char file_cache_[FILE_CACHE_SIZE];
// tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym)
// so we ensure that tmp_buf_ is properly aligned to store either.
@@ -436,34 +462,58 @@ static ssize_t ReadPersistent(int fd, void *buf, size_t count) {
return static_cast<ssize_t>(num_bytes);
}
-// Read up to "count" bytes from "offset" in the file pointed by file
-// descriptor "fd" into the buffer starting at "buf". On success,
-// return the number of bytes read. Otherwise, return -1.
-static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count,
- const off_t offset) {
- off_t off = lseek(fd, offset, SEEK_SET);
- if (off == (off_t)-1) {
- ABSL_RAW_LOG(WARNING, "lseek(%d, %jd, SEEK_SET) failed: errno=%d", fd,
- static_cast<intmax_t>(offset), errno);
- return -1;
+// Read up to "count" bytes from "offset" into the buffer starting at "buf",
+// while handling short reads and EINTR. On success, return the number of bytes
+// read. Otherwise, return -1.
+ssize_t CachingFile::ReadFromOffset(void *buf, size_t count, off_t offset) {
+ char *dst = static_cast<char *>(buf);
+ size_t read = 0;
+ while (read < count) {
+ // Look in cache first.
+ if (offset >= cache_start_ && offset < cache_limit_) {
+ const char *hit_start = &cache_[offset - cache_start_];
+ const size_t n =
+ std::min(count - read, static_cast<size_t>(cache_limit_ - offset));
+ memcpy(dst, hit_start, n);
+ dst += n;
+ read += static_cast<size_t>(n);
+ offset += static_cast<off_t>(n);
+ continue;
+ }
+
+ cache_start_ = 0;
+ cache_limit_ = 0;
+ ssize_t n = pread(fd_, cache_, cache_size_, offset);
+ if (n < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno);
+ return -1;
+ }
+ if (n == 0) { // Reached EOF.
+ break;
+ }
+
+ cache_start_ = offset;
+ cache_limit_ = offset + static_cast<off_t>(n);
+ // Next iteration will copy from cache into dst.
}
- return ReadPersistent(fd, buf, count);
+ return static_cast<ssize_t>(read);
}
-// Try reading exactly "count" bytes from "offset" bytes in a file
-// pointed by "fd" into the buffer starting at "buf" while handling
-// short reads and EINTR. On success, return true. Otherwise, return
-// false.
-static bool ReadFromOffsetExact(const int fd, void *buf, const size_t count,
- const off_t offset) {
- ssize_t len = ReadFromOffset(fd, buf, count, offset);
+// Try reading exactly "count" bytes from "offset" bytes into the buffer
+// starting at "buf" while handling short reads and EINTR. On success, return
+// true. Otherwise, return false.
+bool CachingFile::ReadFromOffsetExact(void *buf, size_t count, off_t offset) {
+ ssize_t len = ReadFromOffset(buf, count, offset);
return len >= 0 && static_cast<size_t>(len) == count;
}
// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
-static int FileGetElfType(const int fd) {
+static int FileGetElfType(CachingFile *file) {
ElfW(Ehdr) elf_header;
- if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
+ if (!file->ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return -1;
}
if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
@@ -478,8 +528,8 @@ static int FileGetElfType(const int fd) {
// To keep stack consumption low, we would like this function to not get
// inlined.
static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
- const int fd, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type,
- ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) {
+ CachingFile *file, ElfW(Half) sh_num, const off_t sh_offset,
+ ElfW(Word) type, ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) {
ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf);
const size_t buf_entries = tmp_buf_size / sizeof(buf[0]);
const size_t buf_bytes = buf_entries * sizeof(buf[0]);
@@ -490,7 +540,7 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
const size_t num_bytes_to_read =
(buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes;
const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0]));
- const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, offset);
+ const ssize_t len = file->ReadFromOffset(buf, num_bytes_to_read, offset);
if (len < 0) {
ABSL_RAW_LOG(
WARNING,
@@ -524,11 +574,17 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
// but there has (as yet) been no need for anything longer either.
const int kMaxSectionNameLen = 64;
+// Small cache to use for miscellaneous file reads.
+const int kSmallFileCacheSize = 100;
+
bool ForEachSection(int fd,
const std::function<bool(absl::string_view name,
const ElfW(Shdr) &)> &callback) {
+ char buf[kSmallFileCacheSize];
+ CachingFile file(fd, buf, sizeof(buf));
+
ElfW(Ehdr) elf_header;
- if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
+ if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return false;
}
@@ -540,7 +596,7 @@ bool ForEachSection(int fd,
ElfW(Shdr) shstrtab;
off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) +
elf_header.e_shentsize * elf_header.e_shstrndx;
- if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
+ if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false;
}
@@ -548,13 +604,13 @@ bool ForEachSection(int fd,
ElfW(Shdr) out;
off_t section_header_offset =
static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i;
- if (!ReadFromOffsetExact(fd, &out, sizeof(out), section_header_offset)) {
+ if (!file.ReadFromOffsetExact(&out, sizeof(out), section_header_offset)) {
return false;
}
off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name;
char header_name[kMaxSectionNameLen];
ssize_t n_read =
- ReadFromOffset(fd, &header_name, kMaxSectionNameLen, name_offset);
+ file.ReadFromOffset(&header_name, kMaxSectionNameLen, name_offset);
if (n_read < 0) {
return false;
} else if (n_read > kMaxSectionNameLen) {
@@ -584,8 +640,10 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
return false;
}
+ char buf[kSmallFileCacheSize];
+ CachingFile file(fd, buf, sizeof(buf));
ElfW(Ehdr) elf_header;
- if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
+ if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return false;
}
@@ -597,18 +655,18 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
ElfW(Shdr) shstrtab;
off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) +
elf_header.e_shentsize * elf_header.e_shstrndx;
- if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
+ if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false;
}
for (int i = 0; i < elf_header.e_shnum; ++i) {
off_t section_header_offset =
static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i;
- if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
+ if (!file.ReadFromOffsetExact(out, sizeof(*out), section_header_offset)) {
return false;
}
off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name;
- ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
+ ssize_t n_read = file.ReadFromOffset(&header_name, name_len, name_offset);
if (n_read < 0) {
return false;
} else if (static_cast<size_t>(n_read) != name_len) {
@@ -683,7 +741,7 @@ static const char *ComputeOffset(const char *base, ptrdiff_t offset) {
// To keep stack consumption low, we would like this function to not get
// inlined.
static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
- const void *const pc, const int fd, char *out, size_t out_size,
+ const void *const pc, CachingFile *file, char *out, size_t out_size,
ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab,
const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) {
if (symtab == nullptr) {
@@ -716,7 +774,7 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
const size_t entries_in_chunk =
std::min(num_remaining_symbols, buf_entries);
const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]);
- const ssize_t len = ReadFromOffset(fd, buf, bytes_in_chunk, offset);
+ const ssize_t len = file->ReadFromOffset(buf, bytes_in_chunk, offset);
SAFE_ASSERT(len >= 0);
SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0);
const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]);
@@ -772,12 +830,12 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
if (found_match) {
const off_t off =
static_cast<off_t>(strtab->sh_offset) + best_match.st_name;
- const ssize_t n_read = ReadFromOffset(fd, out, out_size, off);
+ const ssize_t n_read = file->ReadFromOffset(out, out_size, off);
if (n_read <= 0) {
// This should never happen.
ABSL_RAW_LOG(WARNING,
- "Unable to read from fd %d at offset %lld: n_read = %zd", fd,
- static_cast<long long>(off), n_read);
+ "Unable to read from fd %d at offset %lld: n_read = %zd",
+ file->fd(), static_cast<long long>(off), n_read);
return SYMBOL_NOT_FOUND;
}
ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size,
@@ -827,22 +885,24 @@ FindSymbolResult Symbolizer::GetSymbolFromObjectFile(
}
}
+ CachingFile file(obj.fd, file_cache_, sizeof(file_cache_));
+
// Consult a regular symbol table, then fall back to the dynamic symbol table.
for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) {
- if (!GetSectionHeaderByType(obj.fd, obj.elf_header.e_shnum,
+ if (!GetSectionHeaderByType(&file, obj.elf_header.e_shnum,
static_cast<off_t>(obj.elf_header.e_shoff),
static_cast<ElfW(Word)>(symbol_table_type),
&symtab, tmp_buf, tmp_buf_size)) {
continue;
}
- if (!ReadFromOffsetExact(
- obj.fd, &strtab, sizeof(strtab),
+ if (!file.ReadFromOffsetExact(
+ &strtab, sizeof(strtab),
static_cast<off_t>(obj.elf_header.e_shoff +
symtab.sh_link * sizeof(symtab)))) {
continue;
}
const FindSymbolResult rc =
- FindSymbol(pc, obj.fd, out, out_size, relocation, &strtab, &symtab,
+ FindSymbol(pc, &file, out, out_size, relocation, &strtab, &symtab,
opd_ptr, tmp_buf, tmp_buf_size);
if (rc != SYMBOL_NOT_FOUND) {
return rc;
@@ -1323,15 +1383,19 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno);
return false;
}
- obj->elf_type = FileGetElfType(obj->fd);
+
+ char buf[kSmallFileCacheSize];
+ CachingFile file(obj->fd, buf, sizeof(buf));
+
+ obj->elf_type = FileGetElfType(&file);
if (obj->elf_type < 0) {
ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename,
obj->elf_type);
return false;
}
- if (!ReadFromOffsetExact(obj->fd, &obj->elf_header, sizeof(obj->elf_header),
- 0)) {
+ if (!file.ReadFromOffsetExact(&obj->elf_header, sizeof(obj->elf_header),
+ 0)) {
ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename);
return false;
}
@@ -1341,7 +1405,7 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
size_t num_interesting_load_segments = 0;
for (int j = 0; j < phnum; j++) {
ElfW(Phdr) phdr;
- if (!ReadFromOffsetExact(obj->fd, &phdr, sizeof(phdr), phoff)) {
+ if (!file.ReadFromOffsetExact(&phdr, sizeof(phdr), phoff)) {
ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d",
obj->filename, j);
return false;