diff options
author | Catena cyber <35799796+catenacyber@users.noreply.github.com> | 2021-10-11 18:52:33 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-11 12:52:33 -0400 |
commit | 7bda69cbeb3c58d117e2e13b06da16606870303e (patch) | |
tree | bfe72c1d68959d55935f3474880a6bd9a17f89ba /infra/base-images | |
parent | da4ea33901d7660d59be500d6419878a119088f6 (diff) |
profraw: move from golang to python (#6565)
Diffstat (limited to 'infra/base-images')
-rwxr-xr-x | infra/base-images/base-runner/Dockerfile | 1 | ||||
-rwxr-xr-x | infra/base-images/base-runner/coverage | 2 | ||||
-rw-r--r-- | infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go | 192 | ||||
-rw-r--r-- | infra/base-images/base-runner/profraw_update.py | 123 |
4 files changed, 125 insertions, 193 deletions
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile index 3ac07158..fadd00ac 100755 --- a/infra/base-images/base-runner/Dockerfile +++ b/infra/base-images/base-runner/Dockerfile @@ -115,6 +115,7 @@ COPY bad_build_check \ reproduce \ run_fuzzer \ parse_options.py \ + profraw_update.py \ targets_list \ test_all.py \ test_one.py \ diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage index 40c31e07..3c7b274e 100755 --- a/infra/base-images/base-runner/coverage +++ b/infra/base-images/base-runner/coverage @@ -100,7 +100,7 @@ function run_fuzz_target { fi # If necessary translate to latest profraw version. - llvm-cov-rel $OUT/$target $profraw_file_mask tmp.profraw + profraw_update.py $OUT/$target $profraw_file_mask tmp.profraw mv tmp.profraw $profraw_file_mask llvm-profdata merge -j=1 -sparse $profraw_file_mask -o $profdata_file diff --git a/infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go b/infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go deleted file mode 100644 index 6106e520..00000000 --- a/infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go +++ /dev/null @@ -1,192 +0,0 @@ -package main - -import ( - "debug/elf" - "encoding/binary" - "flag" - "fmt" - "io" - "io/ioutil" - "log" -) - -type ProfrawHeaderVersion7 struct { - ProfrawHeaderGeneric - BinaryIdsSize uint64 - DataSize uint64 - PaddingBytesBeforeCounters uint64 - CountersSize uint64 - PaddingBytesAfterCounters uint64 - NamesSize uint64 - CountersDelta uint64 - NamesDelta uint64 - ValueKindLast uint64 -} - -type ProfrawHeaderGeneric struct { - Magic uint64 - Version uint64 -} - -type ProfrawData struct { - NameRef uint64 - FuncHash uint64 - CounterPtr uint64 - FunctionPointer uint64 - Values uint64 - NumCounters uint32 - NumValueSites []uint16 -} - -const PROFRAW_HEADER_GENERIC_LEN = 16 -const PROFRAW_HEADER_7_LEN = 88 - -func parseProfrawHeaderGeneric(data []byte) (ProfrawHeaderGeneric, error) { - r := ProfrawHeaderGeneric{} - if len(data) < PROFRAW_HEADER_GENERIC_LEN { - return r, io.EOF - } - r.Magic = binary.LittleEndian.Uint64(data[:8]) - r.Version = binary.LittleEndian.Uint64(data[8:16]) - if r.Magic != 0xff6c70726f667281 { - return r, fmt.Errorf("Invalid magic %x", r.Magic) - } - return r, nil -} - -func relativizeAddress(data []byte, offset int, databegin uint64, sectPrfCnts uint64, sectPrfData uint64) { - value := binary.LittleEndian.Uint64(data[offset : offset+8]) - if value >= sectPrfCnts && value < sectPrfData { - // If the value is an address in the right section, - // Make it relative. - value = value - databegin - binary.LittleEndian.PutUint64(data[offset:offset+8], value) - } - -} - -func profrawDataLen(ipvklast uint64) int { - return 44 + 2*(int(ipvklast)+1) -} - -func relativizeProfraw(data []byte, sectPrfCnts uint64, sectPrfData uint64) (error, []byte) { - h := ProfrawHeaderVersion7{} - var err error - h.ProfrawHeaderGeneric, err = parseProfrawHeaderGeneric(data) - if err != nil { - return err, data - } - if h.Version == 5 { - // Upgrade from 5 to 7 by adding a zero binaryids in the header. - binary.LittleEndian.PutUint64(data[8:16], 7) - h.Version = 7 - data2 := make([]byte, len(data)+8) - copy(data2, data[0:16]) - copy(data2[24:], data[16:]) - data = data2 - } - if h.Version < 7 { - return fmt.Errorf("Invalid version for profraw file: %v", h.Version), data - } - // At one point clang-14 will update to 8, and more work will be needed. - if len(data) < PROFRAW_HEADER_7_LEN { - return io.EOF, data - } - h.BinaryIdsSize = binary.LittleEndian.Uint64(data[16:24]) - h.DataSize = binary.LittleEndian.Uint64(data[24:32]) - h.PaddingBytesBeforeCounters = binary.LittleEndian.Uint64(data[32:40]) - h.CountersSize = binary.LittleEndian.Uint64(data[40:48]) - h.PaddingBytesAfterCounters = binary.LittleEndian.Uint64(data[48:56]) - h.NamesSize = binary.LittleEndian.Uint64(data[56:64]) - h.CountersDelta = binary.LittleEndian.Uint64(data[64:72]) - h.NamesDelta = binary.LittleEndian.Uint64(data[72:80]) - h.ValueKindLast = binary.LittleEndian.Uint64(data[80:88]) - - if h.BinaryIdsSize%8 != 0 { - // adds padding for binary ids - // cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be https://reviews.llvm.org/D110365 - padlen := 8 - (h.BinaryIdsSize % 8) - data2 := make([]byte, len(data)+int(padlen)) - copy(data2, data[0:88+h.BinaryIdsSize]) - copy(data2[88+h.BinaryIdsSize+padlen:], data[88+h.BinaryIdsSize:]) - data = data2 - h.BinaryIdsSize += padlen - binary.LittleEndian.PutUint64(data[16:24], h.BinaryIdsSize) - } - - if h.CountersDelta != sectPrfCnts-sectPrfData { - // Rust linking adds an offset ? not seen in readelf. - sectPrfData = h.CountersDelta - sectPrfCnts + sectPrfData - sectPrfCnts = h.CountersDelta - } - dataref := sectPrfData - relativizeAddress(data, 64, dataref, sectPrfCnts, sectPrfData) - - offset := PROFRAW_HEADER_7_LEN + int(h.BinaryIdsSize) - for i := uint64(0); i < h.DataSize; i++ { - if len(data) < offset+profrawDataLen(h.ValueKindLast) { - return io.EOF, data - } - d := ProfrawData{} - d.NameRef = binary.LittleEndian.Uint64(data[offset : offset+8]) - d.FuncHash = binary.LittleEndian.Uint64(data[offset+8 : offset+16]) - d.CounterPtr = binary.LittleEndian.Uint64(data[offset+16 : offset+24]) - d.FunctionPointer = binary.LittleEndian.Uint64(data[offset+24 : offset+32]) - d.Values = binary.LittleEndian.Uint64(data[offset+32 : offset+40]) - d.NumCounters = binary.LittleEndian.Uint32(data[offset+40 : offset+44]) - d.NumValueSites = make([]uint16, h.ValueKindLast+1) - for j := 0; j <= int(h.ValueKindLast); j++ { - d.NumValueSites[j] = binary.LittleEndian.Uint16(data[offset+44+2*j : offset+46+2*j]) - } - - relativizeAddress(data, offset+16, dataref, sectPrfCnts, sectPrfData) - // We need this because of CountersDelta -= sizeof(*SrcData); in __llvm_profile_merge_from_buffer. - dataref += uint64(profrawDataLen(h.ValueKindLast)) - - offset += profrawDataLen(h.ValueKindLast) - } - return nil, data -} - -func main() { - flag.Parse() - - if len(flag.Args()) != 3 { - log.Fatalf("needs exactly three arguments : binary, profraw, output") - } - - // First find llvm profile sections addresses in the elf. - f, err := elf.Open(flag.Args()[0]) - if err != nil { - log.Fatalf("failed to read elf: %v", err) - } - sectPrfCnts := uint64(0) - sectPrfData := uint64(0) - for i := range f.Sections { - if f.Sections[i].Name == "__llvm_prf_cnts" { - sectPrfCnts = f.Sections[i].Addr - } else if f.Sections[i].Name == "__llvm_prf_data" { - sectPrfData = f.Sections[i].Addr - // Maybe rather sectPrfCntsEnd as f.Sections[i].Addr + f.Sections[i].Size for __llvm_prf_cnts. - } - } - if sectPrfCnts == 0 || sectPrfData == 0 { - log.Fatalf("Elf has not __llvm_prf_cnts and __llvm_prf_data sections") - } - - // Process profraw file. - data, err := ioutil.ReadFile(flag.Args()[1]) - if err != nil { - log.Fatalf("failed to read file: %v", err) - } - err, data = relativizeProfraw(data, sectPrfCnts, sectPrfData) - if err != nil { - log.Fatalf("failed to process file: %v", err) - } - - // Write output file. - err = ioutil.WriteFile(flag.Args()[2], data, 0644) - if err != nil { - log.Fatalf("failed to write file: %v", err) - } -} diff --git a/infra/base-images/base-runner/profraw_update.py b/infra/base-images/base-runner/profraw_update.py new file mode 100644 index 00000000..408b5fb9 --- /dev/null +++ b/infra/base-images/base-runner/profraw_update.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Helper script for upgrading a profraw file to latest version.""" + +from collections import namedtuple +import struct +import subprocess +import sys + +HeaderGeneric = namedtuple('HeaderGeneric', 'magic version') +HeaderVersion7 = namedtuple( + 'HeaderVersion7', + 'BinaryIdsSize DataSize PaddingBytesBeforeCounters CountersSize \ + PaddingBytesAfterCounters NamesSize CountersDelta NamesDelta ValueKindLast') + +PROFRAW_MAGIC = 0xff6c70726f667281 + + +def relativize_address(data, offset, databegin, sect_prf_cnts, sect_prf_data): + """Turns an absolute offset into a relative one.""" + value = struct.unpack('Q', data[offset:offset + 8])[0] + if sect_prf_cnts <= value < sect_prf_data: + # If the value is an address in the right section, make it relative. + value = (value - databegin) & 0xffffffffffffffff + value = struct.pack('Q', value) + for i in range(8): + data[offset + i] = value[i] + + +def upgrade(data, sect_prf_cnts, sect_prf_data): + """Upgrades profraw data, knowing the sections addresses.""" + generic_header = HeaderGeneric._make(struct.unpack('QQ', data[:16])) + if generic_header.magic != PROFRAW_MAGIC: + raise Exception('Bad magic.') + if generic_header.version == 5: + generic_header = generic_header._replace(version=7) + # Upgrade from version 5 to 7 by adding binaryids field. + data = struct.pack('QQ', generic_header) + struct.pack('Q', 0) + data[16:] + if generic_header.version < 7: + raise Exception('Unhandled version.') + v7_header = HeaderVersion7._make(struct.unpack('QQQQQQQQQ', data[16:88])) + + if v7_header.BinaryIdsSize % 8 != 0: + # Adds padding for binary ids. + # cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be + # cf https://reviews.llvm.org/D110365 + padlen = 8 - (v7_header.BinaryIdsSize % 8) + v7_header = v7_header._replace(BinaryIdsSize=v7_header.BinaryIdsSize + + padlen) + data = data[:16] + struct.pack('Q', v7_header.BinaryIdsSize) + data[24:] + data = data[:88 + v7_header.BinaryIdsSize] + bytes( + padlen) + data[88 + v7_header.BinaryIdsSize:] + + if v7_header.CountersDelta != sect_prf_cnts - sect_prf_data: + # Rust linking seems to add an offset... + sect_prf_data = v7_header.CountersDelta - sect_prf_cnts + sect_prf_data + sect_prf_cnts = v7_header.CountersDelta + + dataref = sect_prf_data + relativize_address(data, 64, dataref, sect_prf_cnts, sect_prf_data) + + offset = 88 + v7_header.BinaryIdsSize + # This also works for C+Rust binaries compiled with + # clang-14/rust-nightly-clang-13. + for _ in range(v7_header.DataSize): + # 16 is the offset of CounterPtr in ProfrawData structure. + relativize_address(data, offset + 16, dataref, sect_prf_cnts, sect_prf_data) + # We need this because of CountersDelta -= sizeof(*SrcData); + # seen in __llvm_profile_merge_from_buffer. + dataref += 44 + 2 * (v7_header.ValueKindLast + 1) + # This is the size of one ProfrawData structure. + offset += 44 + 2 * (v7_header.ValueKindLast + 1) + + return data + + +def main(): + """Helper script for upgrading a profraw file to latest version.""" + if len(sys.argv) != 4: + sys.stderr.write('Usage: %s <binary> <profraw> <output>\n' % sys.argv[0]) + return 1 + + # First find llvm profile sections addresses in the elf, quick and dirty. + process = subprocess.Popen(['readelf', '-S', sys.argv[1]], + stdout=subprocess.PIPE) + output, err = process.communicate() + if err: + print('readelf failed') + return 2 + for line in iter(output.split(b'\n')): + if b'__llvm_prf_cnts' in line: + sect_prf_cnts = int(line.split()[4], 16) + elif b'__llvm_prf_data' in line: + sect_prf_data = int(line.split()[4], 16) + + # Then open and read the input profraw file. + with open(sys.argv[2], 'rb') as input_file: + profraw_base = bytearray(input_file.read()) + # Do the upgrade, returning a bytes object. + profraw_latest = upgrade(profraw_base, sect_prf_cnts, sect_prf_data) + # Write the output to the file given to the command line. + with open(sys.argv[3], 'wb') as output_file: + output_file.write(profraw_latest) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) |