profraw: move from golang to python (#6565)

author: Catena cyber <35799796+catenacyber@users.noreply.github.com> 2021-10-11 18:52:33 +0200
committer: GitHub <noreply@github.com> 2021-10-11 12:52:33 -0400
commit: 7bda69cbeb3c58d117e2e13b06da16606870303e (patch)
tree: bfe72c1d68959d55935f3474880a6bd9a17f89ba /infra/base-images
parent: da4ea33901d7660d59be500d6419878a119088f6 (diff)
4 files changed, 125 insertions, 193 deletions
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile
index 3ac07158..fadd00ac 100755
--- a/infra/base-images/base-runner/Dockerfile
+++ b/infra/base-images/base-runner/Dockerfile
@@ -115,6 +115,7 @@ COPY bad_build_check \
     reproduce \
     run_fuzzer \
     parse_options.py \
+    profraw_update.py \
     targets_list \
     test_all.py \
     test_one.py \
diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage
index 40c31e07..3c7b274e 100755
--- a/infra/base-images/base-runner/coverage
+++ b/infra/base-images/base-runner/coverage
@@ -100,7 +100,7 @@ function run_fuzz_target {
   fi
 
   # If necessary translate to latest profraw version.
-  llvm-cov-rel $OUT/$target $profraw_file_mask tmp.profraw
+  profraw_update.py $OUT/$target $profraw_file_mask tmp.profraw
   mv tmp.profraw $profraw_file_mask
   llvm-profdata merge -j=1 -sparse $profraw_file_mask -o $profdata_file
 
diff --git a/infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go b/infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go
deleted file mode 100644
index 6106e520..00000000
--- a/infra/base-images/base-runner/gocoverage/llvm-cov-rel/llvm-profraw-relative.go
+++ /dev/null
@@ -1,192 +0,0 @@
-package main
-
-import (
-	"debug/elf"
-	"encoding/binary"
-	"flag"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"log"
-)
-
-type ProfrawHeaderVersion7 struct {
-	ProfrawHeaderGeneric
-	BinaryIdsSize              uint64
-	DataSize                   uint64
-	PaddingBytesBeforeCounters uint64
-	CountersSize               uint64
-	PaddingBytesAfterCounters  uint64
-	NamesSize                  uint64
-	CountersDelta              uint64
-	NamesDelta                 uint64
-	ValueKindLast              uint64
-}
-
-type ProfrawHeaderGeneric struct {
-	Magic   uint64
-	Version uint64
-}
-
-type ProfrawData struct {
-	NameRef         uint64
-	FuncHash        uint64
-	CounterPtr      uint64
-	FunctionPointer uint64
-	Values          uint64
-	NumCounters     uint32
-	NumValueSites   []uint16
-}
-
-const PROFRAW_HEADER_GENERIC_LEN = 16
-const PROFRAW_HEADER_7_LEN = 88
-
-func parseProfrawHeaderGeneric(data []byte) (ProfrawHeaderGeneric, error) {
-	r := ProfrawHeaderGeneric{}
-	if len(data) < PROFRAW_HEADER_GENERIC_LEN {
-		return r, io.EOF
-	}
-	r.Magic = binary.LittleEndian.Uint64(data[:8])
-	r.Version = binary.LittleEndian.Uint64(data[8:16])
-	if r.Magic != 0xff6c70726f667281 {
-		return r, fmt.Errorf("Invalid magic %x", r.Magic)
-	}
-	return r, nil
-}
-
-func relativizeAddress(data []byte, offset int, databegin uint64, sectPrfCnts uint64, sectPrfData uint64) {
-	value := binary.LittleEndian.Uint64(data[offset : offset+8])
-	if value >= sectPrfCnts && value < sectPrfData {
-		// If the value is an address in the right section,
-		// Make it relative.
-		value = value - databegin
-		binary.LittleEndian.PutUint64(data[offset:offset+8], value)
-	}
-
-}
-
-func profrawDataLen(ipvklast uint64) int {
-	return 44 + 2*(int(ipvklast)+1)
-}
-
-func relativizeProfraw(data []byte, sectPrfCnts uint64, sectPrfData uint64) (error, []byte) {
-	h := ProfrawHeaderVersion7{}
-	var err error
-	h.ProfrawHeaderGeneric, err = parseProfrawHeaderGeneric(data)
-	if err != nil {
-		return err, data
-	}
-	if h.Version == 5 {
-		// Upgrade from 5 to 7 by adding a zero binaryids in the header.
-		binary.LittleEndian.PutUint64(data[8:16], 7)
-		h.Version = 7
-		data2 := make([]byte, len(data)+8)
-		copy(data2, data[0:16])
-		copy(data2[24:], data[16:])
-		data = data2
-	}
-	if h.Version < 7 {
-		return fmt.Errorf("Invalid version for profraw file: %v", h.Version), data
-	}
-	// At one point clang-14 will update to 8, and more work will be needed.
-	if len(data) < PROFRAW_HEADER_7_LEN {
-		return io.EOF, data
-	}
-	h.BinaryIdsSize = binary.LittleEndian.Uint64(data[16:24])
-	h.DataSize = binary.LittleEndian.Uint64(data[24:32])
-	h.PaddingBytesBeforeCounters = binary.LittleEndian.Uint64(data[32:40])
-	h.CountersSize = binary.LittleEndian.Uint64(data[40:48])
-	h.PaddingBytesAfterCounters = binary.LittleEndian.Uint64(data[48:56])
-	h.NamesSize = binary.LittleEndian.Uint64(data[56:64])
-	h.CountersDelta = binary.LittleEndian.Uint64(data[64:72])
-	h.NamesDelta = binary.LittleEndian.Uint64(data[72:80])
-	h.ValueKindLast = binary.LittleEndian.Uint64(data[80:88])
-
-	if h.BinaryIdsSize%8 != 0 {
-		// adds padding for binary ids
-		// cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be https://reviews.llvm.org/D110365
-		padlen := 8 - (h.BinaryIdsSize % 8)
-		data2 := make([]byte, len(data)+int(padlen))
-		copy(data2, data[0:88+h.BinaryIdsSize])
-		copy(data2[88+h.BinaryIdsSize+padlen:], data[88+h.BinaryIdsSize:])
-		data = data2
-		h.BinaryIdsSize += padlen
-		binary.LittleEndian.PutUint64(data[16:24], h.BinaryIdsSize)
-	}
-
-	if h.CountersDelta != sectPrfCnts-sectPrfData {
-		// Rust linking adds an offset ? not seen in readelf.
-		sectPrfData = h.CountersDelta - sectPrfCnts + sectPrfData
-		sectPrfCnts = h.CountersDelta
-	}
-	dataref := sectPrfData
-	relativizeAddress(data, 64, dataref, sectPrfCnts, sectPrfData)
-
-	offset := PROFRAW_HEADER_7_LEN + int(h.BinaryIdsSize)
-	for i := uint64(0); i < h.DataSize; i++ {
-		if len(data) < offset+profrawDataLen(h.ValueKindLast) {
-			return io.EOF, data
-		}
-		d := ProfrawData{}
-		d.NameRef = binary.LittleEndian.Uint64(data[offset : offset+8])
-		d.FuncHash = binary.LittleEndian.Uint64(data[offset+8 : offset+16])
-		d.CounterPtr = binary.LittleEndian.Uint64(data[offset+16 : offset+24])
-		d.FunctionPointer = binary.LittleEndian.Uint64(data[offset+24 : offset+32])
-		d.Values = binary.LittleEndian.Uint64(data[offset+32 : offset+40])
-		d.NumCounters = binary.LittleEndian.Uint32(data[offset+40 : offset+44])
-		d.NumValueSites = make([]uint16, h.ValueKindLast+1)
-		for j := 0; j <= int(h.ValueKindLast); j++ {
-			d.NumValueSites[j] = binary.LittleEndian.Uint16(data[offset+44+2*j : offset+46+2*j])
-		}
-
-		relativizeAddress(data, offset+16, dataref, sectPrfCnts, sectPrfData)
-		// We need this because of CountersDelta -= sizeof(*SrcData); in __llvm_profile_merge_from_buffer.
-		dataref += uint64(profrawDataLen(h.ValueKindLast))
-
-		offset += profrawDataLen(h.ValueKindLast)
-	}
-	return nil, data
-}
-
-func main() {
-	flag.Parse()
-
-	if len(flag.Args()) != 3 {
-		log.Fatalf("needs exactly three arguments : binary, profraw, output")
-	}
-
-	// First find llvm profile sections addresses in the elf.
-	f, err := elf.Open(flag.Args()[0])
-	if err != nil {
-		log.Fatalf("failed to read elf: %v", err)
-	}
-	sectPrfCnts := uint64(0)
-	sectPrfData := uint64(0)
-	for i := range f.Sections {
-		if f.Sections[i].Name == "__llvm_prf_cnts" {
-			sectPrfCnts = f.Sections[i].Addr
-		} else if f.Sections[i].Name == "__llvm_prf_data" {
-			sectPrfData = f.Sections[i].Addr
-			// Maybe rather sectPrfCntsEnd as f.Sections[i].Addr + f.Sections[i].Size for __llvm_prf_cnts.
-		}
-	}
-	if sectPrfCnts == 0 || sectPrfData == 0 {
-		log.Fatalf("Elf has not __llvm_prf_cnts and __llvm_prf_data sections")
-	}
-
-	// Process profraw file.
-	data, err := ioutil.ReadFile(flag.Args()[1])
-	if err != nil {
-		log.Fatalf("failed to read file: %v", err)
-	}
-	err, data = relativizeProfraw(data, sectPrfCnts, sectPrfData)
-	if err != nil {
-		log.Fatalf("failed to process file: %v", err)
-	}
-
-	// Write output file.
-	err = ioutil.WriteFile(flag.Args()[2], data, 0644)
-	if err != nil {
-		log.Fatalf("failed to write file: %v", err)
-	}
-}
diff --git a/infra/base-images/base-runner/profraw_update.py b/infra/base-images/base-runner/profraw_update.py
new file mode 100644
index 00000000..408b5fb9
--- /dev/null
+++ b/infra/base-images/base-runner/profraw_update.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Helper script for upgrading a profraw file to latest version."""
+
+from collections import namedtuple
+import struct
+import subprocess
+import sys
+
+HeaderGeneric = namedtuple('HeaderGeneric', 'magic version')
+HeaderVersion7 = namedtuple(
+    'HeaderVersion7',
+    'BinaryIdsSize DataSize PaddingBytesBeforeCounters CountersSize \
+    PaddingBytesAfterCounters NamesSize CountersDelta NamesDelta ValueKindLast')
+
+PROFRAW_MAGIC = 0xff6c70726f667281
+
+
+def relativize_address(data, offset, databegin, sect_prf_cnts, sect_prf_data):
+  """Turns an absolute offset into a relative one."""
+  value = struct.unpack('Q', data[offset:offset + 8])[0]
+  if sect_prf_cnts <= value < sect_prf_data:
+    # If the value is an address in the right section, make it relative.
+    value = (value - databegin) & 0xffffffffffffffff
+    value = struct.pack('Q', value)
+    for i in range(8):
+      data[offset + i] = value[i]
+
+
+def upgrade(data, sect_prf_cnts, sect_prf_data):
+  """Upgrades profraw data, knowing the sections addresses."""
+  generic_header = HeaderGeneric._make(struct.unpack('QQ', data[:16]))
+  if generic_header.magic != PROFRAW_MAGIC:
+    raise Exception('Bad magic.')
+  if generic_header.version == 5:
+    generic_header = generic_header._replace(version=7)
+    # Upgrade from version 5 to 7 by adding binaryids field.
+    data = struct.pack('QQ', generic_header) + struct.pack('Q', 0) + data[16:]
+  if generic_header.version < 7:
+    raise Exception('Unhandled version.')
+  v7_header = HeaderVersion7._make(struct.unpack('QQQQQQQQQ', data[16:88]))
+
+  if v7_header.BinaryIdsSize % 8 != 0:
+    # Adds padding for binary ids.
+    # cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be
+    # cf https://reviews.llvm.org/D110365
+    padlen = 8 - (v7_header.BinaryIdsSize % 8)
+    v7_header = v7_header._replace(BinaryIdsSize=v7_header.BinaryIdsSize +
+                                   padlen)
+    data = data[:16] + struct.pack('Q', v7_header.BinaryIdsSize) + data[24:]
+    data = data[:88 + v7_header.BinaryIdsSize] + bytes(
+        padlen) + data[88 + v7_header.BinaryIdsSize:]
+
+  if v7_header.CountersDelta != sect_prf_cnts - sect_prf_data:
+    # Rust linking seems to add an offset...
+    sect_prf_data = v7_header.CountersDelta - sect_prf_cnts + sect_prf_data
+    sect_prf_cnts = v7_header.CountersDelta
+
+  dataref = sect_prf_data
+  relativize_address(data, 64, dataref, sect_prf_cnts, sect_prf_data)
+
+  offset = 88 + v7_header.BinaryIdsSize
+  # This also works for C+Rust binaries compiled with
+  # clang-14/rust-nightly-clang-13.
+  for _ in range(v7_header.DataSize):
+    # 16 is the offset of CounterPtr in ProfrawData structure.
+    relativize_address(data, offset + 16, dataref, sect_prf_cnts, sect_prf_data)
+    # We need this because of CountersDelta -= sizeof(*SrcData);
+    # seen in __llvm_profile_merge_from_buffer.
+    dataref += 44 + 2 * (v7_header.ValueKindLast + 1)
+    # This is the size of one ProfrawData structure.
+    offset += 44 + 2 * (v7_header.ValueKindLast + 1)
+
+  return data
+
+
+def main():
+  """Helper script for upgrading a profraw file to latest version."""
+  if len(sys.argv) != 4:
+    sys.stderr.write('Usage: %s <binary> <profraw> <output>\n' % sys.argv[0])
+    return 1
+
+  # First find llvm profile sections addresses in the elf, quick and dirty.
+  process = subprocess.Popen(['readelf', '-S', sys.argv[1]],
+                             stdout=subprocess.PIPE)
+  output, err = process.communicate()
+  if err:
+    print('readelf failed')
+    return 2
+  for line in iter(output.split(b'\n')):
+    if b'__llvm_prf_cnts' in line:
+      sect_prf_cnts = int(line.split()[4], 16)
+    elif b'__llvm_prf_data' in line:
+      sect_prf_data = int(line.split()[4], 16)
+
+  # Then open and read the input profraw file.
+  with open(sys.argv[2], 'rb') as input_file:
+    profraw_base = bytearray(input_file.read())
+  # Do the upgrade, returning a bytes object.
+  profraw_latest = upgrade(profraw_base, sect_prf_cnts, sect_prf_data)
+  # Write the output to the file given to the command line.
+  with open(sys.argv[3], 'wb') as output_file:
+    output_file.write(profraw_latest)
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
author	Catena cyber <35799796+catenacyber@users.noreply.github.com>	2021-10-11 18:52:33 +0200
committer	GitHub <noreply@github.com>	2021-10-11 12:52:33 -0400
commit	7bda69cbeb3c58d117e2e13b06da16606870303e (patch)
tree	bfe72c1d68959d55935f3474880a6bd9a17f89ba /infra/base-images
parent	da4ea33901d7660d59be500d6419878a119088f6 (diff)