Re-organize Skia recipes

Break Skia recipe module into: - skia_vars: defines and stores variables (eg. paths) - skia_step: utilities for running Skia steps - skia_flavor: flavor-specific stuff - skia: top-level setup, checkout, test/perf steps, etc etc This establishes a saner dependency structure for the recipes; skia_vars is at the bottom level, skia_step depends on it, skia_flavor depends on both of them, skia depends on all of the above, and the recipes themselves may depend on any or all of them. Next steps: - Merge buildbot_spec into skia_vars - Move test_steps and perf_steps from skia recipe_module into swarm_test and swarm_perf recipes - Cleaner checkout_steps process BUG=skia:5578 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2198173002 Review-Url: https://codereview.chromium.org/2198173002
author: borenet <borenet@chromium.org> 2016-08-03 08:23:10 -0700
committer: Commit bot <commit-bot@chromium.org> 2016-08-03 08:23:10 -0700
commit: 1436a09e1fc3be4655af960d4ffb31066bfe4cdd (patch)
tree: e0f37f3938b2bf1dcd7a432943343839e994714c /infra/bots/recipe_modules/core/resources
parent: 7a9f3766aa07f69323f37670a7aeb13605f266bb (diff)
6 files changed, 1609 insertions, 0 deletions
diff --git a/infra/bots/recipe_modules/core/resources/binary_size_utils.py b/infra/bots/recipe_modules/core/resources/binary_size_utils.py
new file mode 100644
index 0000000000..c09a65dccd
--- /dev/null
+++ b/infra/bots/recipe_modules/core/resources/binary_size_utils.py
@@ -0,0 +1,67 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Common utilities for tools that deal with binary size information.
+
+Copied from chromium/src/build/android/pylib/symbols/binary_size_tools.py.
+"""
+
+import logging
+import re
+
+
+def ParseNm(nm_lines):
+  """Parse nm output, returning data for all relevant (to binary size)
+  symbols and ignoring the rest.
+
+  Args:
+      nm_lines: an iterable over lines of nm output.
+
+  Yields:
+      (symbol name, symbol type, symbol size, source file path).
+
+      Path may be None if nm couldn't figure out the source file.
+  """
+
+  # Match lines with size, symbol, optional location, optional discriminator
+  sym_re = re.compile(r'^[0-9a-f]{8,} ' # address (8+ hex digits)
+                      '([0-9a-f]{8,}) ' # size (8+ hex digits)
+                      '(.) ' # symbol type, one character
+                      '([^\t]+)' # symbol name, separated from next by tab
+                      '(?:\t(.*):[\d\?]+)?.*$') # location
+  # Match lines with addr but no size.
+  addr_re = re.compile(r'^[0-9a-f]{8,} (.) ([^\t]+)(?:\t.*)?$')
+  # Match lines that don't have an address at all -- typically external symbols.
+  noaddr_re = re.compile(r'^ {8,} (.) (.*)$')
+  # Match lines with no symbol name, only addr and type
+  addr_only_re = re.compile(r'^[0-9a-f]{8,} (.)$')
+
+  for line in nm_lines:
+    line = line.rstrip()
+    match = sym_re.match(line)
+    if match:
+      size, sym_type, sym = match.groups()[0:3]
+      size = int(size, 16)
+      if sym_type in ('B', 'b'):
+        continue  # skip all BSS for now.
+      path = match.group(4)
+      yield sym, sym_type, size, path
+      continue
+    match = addr_re.match(line)
+    if match:
+      # sym_type, sym = match.groups()[0:2]
+      continue  # No size == we don't care.
+    match = noaddr_re.match(line)
+    if match:
+      sym_type, sym = match.groups()
+      if sym_type in ('U', 'w'):
+        continue  # external or weak symbol
+    match = addr_only_re.match(line)
+    if match:
+      continue  # Nothing to do.
+
+
+    # If we reach this part of the loop, there was something in the
+    # line that we didn't expect or recognize.
+    logging.warning('nm output parser failed to parse: %s', repr(line))
diff --git a/infra/bots/recipe_modules/core/resources/elf_symbolizer.py b/infra/bots/recipe_modules/core/resources/elf_symbolizer.py
new file mode 100644
index 0000000000..de9c141219
--- /dev/null
+++ b/infra/bots/recipe_modules/core/resources/elf_symbolizer.py
@@ -0,0 +1,477 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""The ElfSymbolizer class for symbolizing Executable and Linkable Files.
+
+Adapted for Skia's use from
+chromium/src/build/android/pylib/symbols/elf_symbolizer.py.
+
+Main changes:
+-- Added prefix_to_remove param to remove path prefix from tree data.
+"""
+
+import collections
+import datetime
+import logging
+import multiprocessing
+import os
+import posixpath
+import Queue
+import re
+import subprocess
+import sys
+import threading
+
+
+# addr2line builds a possibly infinite memory cache that can exhaust
+# the computer's memory if allowed to grow for too long. This constant
+# controls how many lookups we do before restarting the process. 4000
+# gives near peak performance without extreme memory usage.
+ADDR2LINE_RECYCLE_LIMIT = 4000
+
+
+class ELFSymbolizer(object):
+  """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer.
+
+  This class is a frontend for addr2line (part of GNU binutils), designed to
+  symbolize batches of large numbers of symbols for a given ELF file. It
+  supports sharding symbolization against many addr2line instances and
+  pipelining of multiple requests per each instance (in order to hide addr2line
+  internals and OS pipe latencies).
+
+  The interface exhibited by this class is a very simple asynchronous interface,
+  which is based on the following three methods:
+  - SymbolizeAsync(): used to request (enqueue) resolution of a given address.
+  - The |callback| method: used to communicated back the symbol information.
+  - Join(): called to conclude the batch to gather the last outstanding results.
+  In essence, before the Join method returns, this class will have issued as
+  many callbacks as the number of SymbolizeAsync() calls. In this regard, note
+  that due to multiprocess sharding, callbacks can be delivered out of order.
+
+  Some background about addr2line:
+  - it is invoked passing the elf path in the cmdline, piping the addresses in
+    its stdin and getting results on its stdout.
+  - it has pretty large response times for the first requests, but it
+    works very well in streaming mode once it has been warmed up.
+  - it doesn't scale by itself (on more cores). However, spawning multiple
+    instances at the same time on the same file is pretty efficient as they
+    keep hitting the pagecache and become mostly CPU bound.
+  - it might hang or crash, mostly for OOM. This class deals with both of these
+    problems.
+
+  Despite the "scary" imports and the multi* words above, (almost) no multi-
+  threading/processing is involved from the python viewpoint. Concurrency
+  here is achieved by spawning several addr2line subprocesses and handling their
+  output pipes asynchronously. Therefore, all the code here (with the exception
+  of the Queue instance in Addr2Line) should be free from mind-blowing
+  thread-safety concerns.
+
+  The multiprocess sharding works as follows:
+  The symbolizer tries to use the lowest number of addr2line instances as
+  possible (with respect of |max_concurrent_jobs|) and enqueue all the requests
+  in a single addr2line instance. For few symbols (i.e. dozens) sharding isn't
+  worth the startup cost.
+  The multiprocess logic kicks in as soon as the queues for the existing
+  instances grow. Specifically, once all the existing instances reach the
+  |max_queue_size| bound, a new addr2line instance is kicked in.
+  In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances
+  have a backlog of |max_queue_size|), back-pressure is applied on the caller by
+  blocking the SymbolizeAsync method.
+
+  This module has been deliberately designed to be dependency free (w.r.t. of
+  other modules in this project), to allow easy reuse in external projects.
+  """
+
+  def __init__(self, elf_file_path, addr2line_path, callback, inlines=False,
+      max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50,
+      source_root_path=None, strip_base_path=None, prefix_to_remove=None):
+    """Args:
+      elf_file_path: path of the elf file to be symbolized.
+      addr2line_path: path of the toolchain's addr2line binary.
+      callback: a callback which will be invoked for each resolved symbol with
+          the two args (sym_info, callback_arg). The former is an instance of
+          |ELFSymbolInfo| and contains the symbol information. The latter is an
+          embedder-provided argument which is passed to SymbolizeAsync().
+      inlines: when True, the ELFSymbolInfo will contain also the details about
+          the outer inlining functions. When False, only the innermost function
+          will be provided.
+      max_concurrent_jobs: Max number of addr2line instances spawned.
+          Parallelize responsibly, addr2line is a memory and I/O monster.
+      max_queue_size: Max number of outstanding requests per addr2line instance.
+      addr2line_timeout: Max time (in seconds) to wait for a addr2line response.
+          After the timeout, the instance will be considered hung and respawned.
+      source_root_path: In some toolchains only the name of the source file is
+          is output, without any path information; disambiguation searches
+          through the source directory specified by |source_root_path| argument
+          for files whose name matches, adding the full path information to the
+          output. For example, if the toolchain outputs "unicode.cc" and there
+          is a file called "unicode.cc" located under |source_root_path|/foo,
+          the tool will replace "unicode.cc" with
+          "|source_root_path|/foo/unicode.cc". If there are multiple files with
+          the same name, disambiguation will fail because the tool cannot
+          determine which of the files was the source of the symbol.
+      strip_base_path: Rebases the symbols source paths onto |source_root_path|
+          (i.e replace |strip_base_path| with |source_root_path).
+      prefix_to_remove: Removes the prefix from ElfSymbolInfo output. Skia added
+    """
+    assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path
+    self.elf_file_path = elf_file_path
+    self.addr2line_path = addr2line_path
+    self.callback = callback
+    self.inlines = inlines
+    self.max_concurrent_jobs = (max_concurrent_jobs or
+                                min(multiprocessing.cpu_count(), 4))
+    self.max_queue_size = max_queue_size
+    self.addr2line_timeout = addr2line_timeout
+    self.requests_counter = 0  # For generating monotonic request IDs.
+    self._a2l_instances = []  # Up to |max_concurrent_jobs| _Addr2Line inst.
+
+    # Skia addition: remove the given prefix from tree paths.
+    self.prefix_to_remove = prefix_to_remove
+
+    # If necessary, create disambiguation lookup table
+    self.disambiguate = source_root_path is not None
+    self.disambiguation_table = {}
+    self.strip_base_path = strip_base_path
+    if(self.disambiguate):
+      self.source_root_path = os.path.abspath(source_root_path)
+      self._CreateDisambiguationTable()
+
+    # Create one addr2line instance. More instances will be created on demand
+    # (up to |max_concurrent_jobs|) depending on the rate of the requests.
+    self._CreateNewA2LInstance()
+
+  def SymbolizeAsync(self, addr, callback_arg=None):
+    """Requests symbolization of a given address.
+
+    This method is not guaranteed to return immediately. It generally does, but
+    in some scenarios (e.g. all addr2line instances have full queues) it can
+    block to create back-pressure.
+
+    Args:
+      addr: address to symbolize.
+      callback_arg: optional argument which will be passed to the |callback|."""
+    assert(isinstance(addr, int))
+
+    # Process all the symbols that have been resolved in the meanwhile.
+    # Essentially, this drains all the addr2line(s) out queues.
+    for a2l_to_purge in self._a2l_instances:
+      a2l_to_purge.ProcessAllResolvedSymbolsInQueue()
+      a2l_to_purge.RecycleIfNecessary()
+
+    # Find the best instance according to this logic:
+    # 1. Find an existing instance with the shortest queue.
+    # 2. If all of instances' queues are full, but there is room in the pool,
+    #    (i.e. < |max_concurrent_jobs|) create a new instance.
+    # 3. If there were already |max_concurrent_jobs| instances and all of them
+    #    had full queues, make back-pressure.
+
+    # 1.
+    def _SortByQueueSizeAndReqID(a2l):
+      return (a2l.queue_size, a2l.first_request_id)
+    a2l = min(self._a2l_instances, key=_SortByQueueSizeAndReqID)
+
+    # 2.
+    if (a2l.queue_size >= self.max_queue_size and
+        len(self._a2l_instances) < self.max_concurrent_jobs):
+      a2l = self._CreateNewA2LInstance()
+
+    # 3.
+    if a2l.queue_size >= self.max_queue_size:
+      a2l.WaitForNextSymbolInQueue()
+
+    a2l.EnqueueRequest(addr, callback_arg)
+
+  def Join(self):
+    """Waits for all the outstanding requests to complete and terminates."""
+    for a2l in self._a2l_instances:
+      a2l.WaitForIdle()
+      a2l.Terminate()
+
+  def _CreateNewA2LInstance(self):
+    assert(len(self._a2l_instances) < self.max_concurrent_jobs)
+    a2l = ELFSymbolizer.Addr2Line(self)
+    self._a2l_instances.append(a2l)
+    return a2l
+
+  def _CreateDisambiguationTable(self):
+    """ Non-unique file names will result in None entries"""
+    self.disambiguation_table = {}
+
+    for root, _, filenames in os.walk(self.source_root_path):
+      for f in filenames:
+        self.disambiguation_table[f] = os.path.join(root, f) if (f not in
+                                       self.disambiguation_table) else None
+
+
+  class Addr2Line(object):
+    """A python wrapper around an addr2line instance.
+
+    The communication with the addr2line process looks as follows:
+      [STDIN]         [STDOUT]  (from addr2line's viewpoint)
+    > f001111
+    > f002222
+                    < Symbol::Name(foo, bar) for f001111
+                    < /path/to/source/file.c:line_number
+    > f003333
+                    < Symbol::Name2() for f002222
+                    < /path/to/source/file.c:line_number
+                    < Symbol::Name3() for f003333
+                    < /path/to/source/file.c:line_number
+    """
+
+    SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*')
+
+    def __init__(self, symbolizer):
+      self._symbolizer = symbolizer
+      self._lib_file_name = posixpath.basename(symbolizer.elf_file_path)
+
+      # The request queue (i.e. addresses pushed to addr2line's stdin and not
+      # yet retrieved on stdout)
+      self._request_queue = collections.deque()
+
+      # This is essentially len(self._request_queue). It has been optimized to a
+      # separate field because turned out to be a perf hot-spot.
+      self.queue_size = 0
+
+      # Keep track of the number of symbols a process has processed to
+      # avoid a single process growing too big and using all the memory.
+      self._processed_symbols_count = 0
+
+      # Objects required to handle the addr2line subprocess.
+      self._proc = None  # Subprocess.Popen(...) instance.
+      self._thread = None  # Threading.thread instance.
+      self._out_queue = None  # Queue.Queue instance (for buffering a2l stdout).
+      self._RestartAddr2LineProcess()
+
+    def EnqueueRequest(self, addr, callback_arg):
+      """Pushes an address to addr2line's stdin (and keeps track of it)."""
+      self._symbolizer.requests_counter += 1  # For global "age" of requests.
+      req_idx = self._symbolizer.requests_counter
+      self._request_queue.append((addr, callback_arg, req_idx))
+      self.queue_size += 1
+      self._WriteToA2lStdin(addr)
+
+    def WaitForIdle(self):
+      """Waits until all the pending requests have been symbolized."""
+      while self.queue_size > 0:
+        self.WaitForNextSymbolInQueue()
+
+    def WaitForNextSymbolInQueue(self):
+      """Waits for the next pending request to be symbolized."""
+      if not self.queue_size:
+        return
+
+      # This outer loop guards against a2l hanging (detecting stdout timeout).
+      while True:
+        start_time = datetime.datetime.now()
+        timeout = datetime.timedelta(seconds=self._symbolizer.addr2line_timeout)
+
+        # The inner loop guards against a2l crashing (checking if it exited).
+        while (datetime.datetime.now() - start_time < timeout):
+          # poll() returns !None if the process exited. a2l should never exit.
+          if self._proc.poll():
+            logging.warning('addr2line crashed, respawning (lib: %s).' %
+                            self._lib_file_name)
+            self._RestartAddr2LineProcess()
+            # TODO(primiano): the best thing to do in this case would be
+            # shrinking the pool size as, very likely, addr2line is crashed
+            # due to low memory (and the respawned one will die again soon).
+
+          try:
+            lines = self._out_queue.get(block=True, timeout=0.25)
+          except Queue.Empty:
+            # On timeout (1/4 s.) repeat the inner loop and check if either the
+            # addr2line process did crash or we waited its output for too long.
+            continue
+
+          # In nominal conditions, we get straight to this point.
+          self._ProcessSymbolOutput(lines)
+          return
+
+        # If this point is reached, we waited more than |addr2line_timeout|.
+        logging.warning('Hung addr2line process, respawning (lib: %s).' %
+                        self._lib_file_name)
+        self._RestartAddr2LineProcess()
+
+    def ProcessAllResolvedSymbolsInQueue(self):
+      """Consumes all the addr2line output lines produced (without blocking)."""
+      if not self.queue_size:
+        return
+      while True:
+        try:
+          lines = self._out_queue.get_nowait()
+        except Queue.Empty:
+          break
+        self._ProcessSymbolOutput(lines)
+
+    def RecycleIfNecessary(self):
+      """Restarts the process if it has been used for too long.
+
+      A long running addr2line process will consume excessive amounts
+      of memory without any gain in performance."""
+      if self._processed_symbols_count >= ADDR2LINE_RECYCLE_LIMIT:
+        self._RestartAddr2LineProcess()
+
+
+    def Terminate(self):
+      """Kills the underlying addr2line process.
+
+      The poller |_thread| will terminate as well due to the broken pipe."""
+      try:
+        self._proc.kill()
+        self._proc.communicate()  # Essentially wait() without risking deadlock.
+      except Exception:  # An exception while terminating? How interesting.
+        pass
+      self._proc = None
+
+    def _WriteToA2lStdin(self, addr):
+      self._proc.stdin.write('%s\n' % hex(addr))
+      if self._symbolizer.inlines:
+        # In the case of inlines we output an extra blank line, which causes
+        # addr2line to emit a (??,??:0) tuple that we use as a boundary marker.
+        self._proc.stdin.write('\n')
+      self._proc.stdin.flush()
+
+    def _ProcessSymbolOutput(self, lines):
+      """Parses an addr2line symbol output and triggers the client callback."""
+      (_, callback_arg, _) = self._request_queue.popleft()
+      self.queue_size -= 1
+
+      innermost_sym_info = None
+      sym_info = None
+      for (line1, line2) in lines:
+        prev_sym_info = sym_info
+        name = line1 if not line1.startswith('?') else None
+        source_path = None
+        source_line = None
+        m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2)
+        if m:
+          if not m.group(1).startswith('?'):
+            source_path = m.group(1)
+            if not m.group(2).startswith('?'):
+              source_line = int(m.group(2))
+        else:
+          logging.warning('Got invalid symbol path from addr2line: %s' % line2)
+
+        # In case disambiguation is on, and needed
+        was_ambiguous = False
+        disambiguated = False
+        if self._symbolizer.disambiguate:
+          if source_path and not posixpath.isabs(source_path):
+            path = self._symbolizer.disambiguation_table.get(source_path)
+            was_ambiguous = True
+            disambiguated = path is not None
+            source_path = path if disambiguated else source_path
+
+          # Use absolute paths (so that paths are consistent, as disambiguation
+          # uses absolute paths)
+          if source_path and not was_ambiguous:
+            source_path = os.path.abspath(source_path)
+
+        if source_path and self._symbolizer.strip_base_path:
+          # Strip the base path
+          source_path = re.sub('^' + self._symbolizer.strip_base_path,
+              self._symbolizer.source_root_path or '', source_path)
+
+        sym_info = ELFSymbolInfo(name, source_path, source_line, was_ambiguous,
+                                 disambiguated,
+                                 self._symbolizer.prefix_to_remove)
+        if prev_sym_info:
+          prev_sym_info.inlined_by = sym_info
+        if not innermost_sym_info:
+          innermost_sym_info = sym_info
+
+      self._processed_symbols_count += 1
+      self._symbolizer.callback(innermost_sym_info, callback_arg)
+
+    def _RestartAddr2LineProcess(self):
+      if self._proc:
+        self.Terminate()
+
+      # The only reason of existence of this Queue (and the corresponding
+      # Thread below) is the lack of a subprocess.stdout.poll_avail_lines().
+      # Essentially this is a pipe able to extract a couple of lines atomically.
+      self._out_queue = Queue.Queue()
+
+      # Start the underlying addr2line process in line buffered mode.
+
+      cmd = [self._symbolizer.addr2line_path, '--functions', '--demangle',
+          '--exe=' + self._symbolizer.elf_file_path]
+      if self._symbolizer.inlines:
+        cmd += ['--inlines']
+      self._proc = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE,
+          stdin=subprocess.PIPE, stderr=sys.stderr, close_fds=True)
+
+      # Start the poller thread, which simply moves atomically the lines read
+      # from the addr2line's stdout to the |_out_queue|.
+      self._thread = threading.Thread(
+          target=ELFSymbolizer.Addr2Line.StdoutReaderThread,
+          args=(self._proc.stdout, self._out_queue, self._symbolizer.inlines))
+      self._thread.daemon = True  # Don't prevent early process exit.
+      self._thread.start()
+
+      self._processed_symbols_count = 0
+
+      # Replay the pending requests on the new process (only for the case
+      # of a hung addr2line timing out during the game).
+      for (addr, _, _) in self._request_queue:
+        self._WriteToA2lStdin(addr)
+
+    @staticmethod
+    def StdoutReaderThread(process_pipe, queue, inlines):
+      """The poller thread fn, which moves the addr2line stdout to the |queue|.
+
+      This is the only piece of code not running on the main thread. It merely
+      writes to a Queue, which is thread-safe. In the case of inlines, it
+      detects the ??,??:0 marker and sends the lines atomically, such that the
+      main thread always receives all the lines corresponding to one symbol in
+      one shot."""
+      try:
+        lines_for_one_symbol = []
+        while True:
+          line1 = process_pipe.readline().rstrip('\r\n')
+          line2 = process_pipe.readline().rstrip('\r\n')
+          if not line1 or not line2:
+            break
+          inline_has_more_lines = inlines and (len(lines_for_one_symbol) == 0 or
+                                  (line1 != '??' and line2 != '??:0'))
+          if not inlines or inline_has_more_lines:
+            lines_for_one_symbol += [(line1, line2)]
+          if inline_has_more_lines:
+            continue
+          queue.put(lines_for_one_symbol)
+          lines_for_one_symbol = []
+        process_pipe.close()
+
+      # Every addr2line processes will die at some point, please die silently.
+      except (IOError, OSError):
+        pass
+
+    @property
+    def first_request_id(self):
+      """Returns the request_id of the oldest pending request in the queue."""
+      return self._request_queue[0][2] if self._request_queue else 0
+
+
+class ELFSymbolInfo(object):
+  """The result of the symbolization passed as first arg. of each callback."""
+
+  def __init__(self, name, source_path, source_line, was_ambiguous=False,
+               disambiguated=False, prefix_to_remove=None):
+    """All the fields here can be None (if addr2line replies with '??')."""
+    self.name = name
+    if source_path and source_path.startswith(prefix_to_remove):
+      source_path = source_path[len(prefix_to_remove) : ]
+    self.source_path = source_path
+    self.source_line = source_line
+    # In the case of |inlines|=True, the |inlined_by| points to the outer
+    # function inlining the current one (and so on, to form a chain).
+    self.inlined_by = None
+    self.disambiguated = disambiguated
+    self.was_ambiguous = was_ambiguous
+
+  def __str__(self):
+    return '%s [%s:%d]' % (
+        self.name or '??', self.source_path or '??', self.source_line or 0)
diff --git a/infra/bots/recipe_modules/core/resources/generate_and_upload_doxygen.py b/infra/bots/recipe_modules/core/resources/generate_and_upload_doxygen.py
new file mode 100755
index 0000000000..f06ea96bc7
--- /dev/null
+++ b/infra/bots/recipe_modules/core/resources/generate_and_upload_doxygen.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+"""Generate Doxygen documentation."""
+
+
+import datetime
+import os
+import shutil
+import subprocess
+import sys
+
+
+DOXYFILE_BASENAME = 'Doxyfile'  # must match name of Doxyfile in skia root
+DOXYGEN_BINARY = 'doxygen'
+WORKDIR = os.path.join(os.pardir, 'doxygen_workdir')
+DOXYGEN_CONFIG_DIR = os.path.join(WORKDIR, 'doxygen-config')
+DOXYGEN_WORKING_DIR = os.path.join(WORKDIR, 'doxygen')
+DOXYGEN_GS_PATH = '/'.join(['gs://chromium-skia-gm', 'doxygen'])
+
+IFRAME_FOOTER_TEMPLATE = """
+<html><body><address style="text-align: right;"><small>
+Generated at %s for skia
+by <a href="http://www.doxygen.org/index.html">doxygen</a>
+%s </small></address></body></html>
+"""
+
+
+def recreate_dir(path):
+  """Delete and recreate the directory."""
+  try:
+    shutil.rmtree(path)
+  except OSError:
+    if os.path.exists(path):
+      raise Exception('Could not remove %s' % path)
+  os.makedirs(path)
+
+
+def generate_and_upload_doxygen(gsutil_path):
+  """Generate Doxygen."""
+  # Create empty dir and add static_footer.txt
+  recreate_dir(DOXYGEN_WORKING_DIR)
+  static_footer_path = os.path.join(DOXYGEN_WORKING_DIR, 'static_footer.txt')
+  shutil.copyfile(os.path.join('tools', 'doxygen_footer.txt'),
+                  static_footer_path)
+
+  # Make copy of doxygen config file, overriding any necessary configs,
+  # and run doxygen.
+  recreate_dir(DOXYGEN_CONFIG_DIR)
+  modified_doxyfile = os.path.join(DOXYGEN_CONFIG_DIR, DOXYFILE_BASENAME)
+  with open(DOXYFILE_BASENAME, 'r') as reader:
+    with open(modified_doxyfile, 'w') as writer:
+      shutil.copyfileobj(reader, writer)
+      writer.write('OUTPUT_DIRECTORY = %s\n' % DOXYGEN_WORKING_DIR)
+      writer.write('HTML_FOOTER = %s\n' % static_footer_path)
+  subprocess.check_call([DOXYGEN_BINARY, modified_doxyfile])
+
+  # Create iframe_footer.html
+  with open(os.path.join(DOXYGEN_WORKING_DIR, 'iframe_footer.html'), 'w') as f:
+    f.write(IFRAME_FOOTER_TEMPLATE % (
+        datetime.datetime.now().isoformat(' '),
+        subprocess.check_output([DOXYGEN_BINARY, '--version']).rstrip()))
+
+  # Upload.
+  cmd = [gsutil_path, 'cp', '-a', 'public-read', '-R',
+         DOXYGEN_WORKING_DIR, DOXYGEN_GS_PATH]
+  subprocess.check_call(cmd)
+
+
+if '__main__' == __name__:
+  generate_and_upload_doxygen(*sys.argv[1:])
+
diff --git a/infra/bots/recipe_modules/core/resources/run_binary_size_analysis.py b/infra/bots/recipe_modules/core/resources/run_binary_size_analysis.py
new file mode 100755
index 0000000000..5cb24d967a
--- /dev/null
+++ b/infra/bots/recipe_modules/core/resources/run_binary_size_analysis.py
@@ -0,0 +1,817 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate a spatial analysis against an arbitrary library.
+
+Adapted for Skia's use case from
+chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes:
+
+-- Cleans up some deprecated codes.
+-- Always use relative code path so the tree root is Skia repo's root.
+-- Instead of outputting the standalone HTML/CSS/JS filesets, writes the
+    TreeMap JSON data into a Google Storage bucket.
+-- Adds githash and total_size to the JSON data.
+-- Outputs another summary data in JSON Bench format for skiaperf ingestion.
+
+The output JSON data for visualization is in the following format:
+
+{
+  "githash": 123abc,
+  "commit_ts": 1234567890,
+  "total_size": 1234567,
+  "key": {"source_type": "binary_size"},
+  "tree_data": {
+    "maxDepth": 9,
+    "k": "p", "children":[
+      {"k":"p","children":[
+        {"k":"p","children":[
+          {"k":"p","lastPathElement":true,"children":[
+            {"k":"b","t":"t","children":[
+              {"k":"s", "t":"t", "value":4029,
+               "n":"etc_encode_subblock_helper(unsigned char const*, ...)"
+              },
+          ......
+  }
+}
+
+Another JSON file is generated for size summaries to be used in skiaperf. The
+JSON format details can be found at:
+  https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54
+and:
+  https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go
+
+In the binary size case, outputs look like:
+
+{
+  "gitHash": "123abc",
+  "key": {
+    "source_type": "binarysize"
+  }
+  "results: {
+    "src_lazy_global_weak_symbol": {
+      "memory": {
+        "bytes": 41,
+        "options": {
+          "path": "src_lazy",
+          "symbol": "global_weak_symbol"
+        }
+      }
+    },
+    "src_lazy_global_read_only_data": {
+      "memory": {
+        "bytes": 13476,
+        "options": {
+          "path": "src_lazy",
+          "symbol": "global_read_only_data"
+        }
+      }
+    },
+    ...
+  }
+}
+
+"""
+
+import collections
+import datetime
+import json
+import logging
+import multiprocessing
+import optparse
+import os
+import re
+import shutil
+import struct
+import subprocess
+import sys
+import tempfile
+import time
+import urllib2
+
+import binary_size_utils
+import elf_symbolizer
+
+from recipe_engine.types import freeze
+
+# Node dictionary keys. These are output in json read by the webapp so
+# keep them short to save file size.
+# Note: If these change, the webapp must also change.
+NODE_TYPE_KEY = 'k'
+NODE_NAME_KEY = 'n'
+NODE_CHILDREN_KEY = 'children'
+NODE_SYMBOL_TYPE_KEY = 't'
+NODE_SYMBOL_SIZE_KEY = 'value'
+NODE_MAX_DEPTH_KEY = 'maxDepth'
+NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
+
+# The display name of the bucket where we put symbols without path.
+NAME_NO_PATH_BUCKET = '(No Path)'
+
+# Try to keep data buckets smaller than this to avoid killing the
+# graphing lib.
+BIG_BUCKET_LIMIT = 3000
+
+# Skia addition: relative dir for libskia.so from code base.
+LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib')
+
+# Skia addition: dictionary mapping symbol type code to symbol name.
+# See
+# https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/template/D3SymbolTreeMap.js&l=74
+SYMBOL_MAP = freeze({
+    'A': 'global_absolute',
+    'B': 'global_uninitialized_data',
+    'b': 'local_uninitialized_data',
+    'C': 'global_uninitialized_common',
+    'D': 'global_initialized_data',
+    'd': 'local_initialized_data',
+    'G': 'global_small initialized_data',
+    'g': 'local_small_initialized_data',
+    'i': 'indirect_function',
+    'N': 'debugging',
+    'p': 'stack_unwind',
+    'R': 'global_read_only_data',
+    'r': 'local_read_only_data',
+    'S': 'global_small_uninitialized_data',
+    's': 'local_small_uninitialized_data',
+    'T': 'global_code',
+    't': 'local_code',
+    'U': 'undefined',
+    'u': 'unique',
+    'V': 'global_weak_object',
+    'v': 'local_weak_object',
+    'W': 'global_weak_symbol',
+    'w': 'local_weak_symbol',
+    '@': 'vtable_entry',
+    '-': 'stabs_debugging',
+    '?': 'unrecognized',
+})
+
+
+def _MkChild(node, name):
+  child = node[NODE_CHILDREN_KEY].get(name)
+  if child is None:
+    child = {NODE_NAME_KEY: name,
+             NODE_CHILDREN_KEY: {}}
+    node[NODE_CHILDREN_KEY][name] = child
+  return child
+
+
+def SplitNoPathBucket(node):
+  """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
+  handle. Split it into sub-buckets in that case."""
+  root_children = node[NODE_CHILDREN_KEY]
+  if NAME_NO_PATH_BUCKET in root_children:
+    no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
+    old_children = no_path_bucket[NODE_CHILDREN_KEY]
+    count = 0
+    for symbol_type, symbol_bucket in old_children.iteritems():
+      count += len(symbol_bucket[NODE_CHILDREN_KEY])
+    if count > BIG_BUCKET_LIMIT:
+      new_children = {}
+      no_path_bucket[NODE_CHILDREN_KEY] = new_children
+      current_bucket = None
+      index = 0
+      for symbol_type, symbol_bucket in old_children.iteritems():
+        for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
+          if index % BIG_BUCKET_LIMIT == 0:
+            group_no = (index / BIG_BUCKET_LIMIT) + 1
+            current_bucket = _MkChild(no_path_bucket,
+                                      '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
+                                                          group_no))
+            assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
+            node[NODE_TYPE_KEY] = 'p'  # p for path
+          index += 1
+          symbol_size = value[NODE_SYMBOL_SIZE_KEY]
+          AddSymbolIntoFileNode(current_bucket, symbol_type,
+                                symbol_name, symbol_size)
+
+
+def MakeChildrenDictsIntoLists(node):
+  largest_list_len = 0
+  if NODE_CHILDREN_KEY in node:
+    largest_list_len = len(node[NODE_CHILDREN_KEY])
+    child_list = []
+    for child in node[NODE_CHILDREN_KEY].itervalues():
+      child_largest_list_len = MakeChildrenDictsIntoLists(child)
+      if child_largest_list_len > largest_list_len:
+        largest_list_len = child_largest_list_len
+      child_list.append(child)
+    node[NODE_CHILDREN_KEY] = child_list
+
+  return largest_list_len
+
+
+def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
+  """Puts symbol into the file path node |node|.
+  Returns the number of added levels in tree. I.e. returns 2."""
+
+  # 'node' is the file node and first step is to find its symbol-type bucket.
+  node[NODE_LAST_PATH_ELEMENT_KEY] = True
+  node = _MkChild(node, symbol_type)
+  assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
+  node[NODE_SYMBOL_TYPE_KEY] = symbol_type
+  node[NODE_TYPE_KEY] = 'b'  # b for bucket
+
+  # 'node' is now the symbol-type bucket. Make the child entry.
+  node = _MkChild(node, symbol_name)
+  if NODE_CHILDREN_KEY in node:
+    if node[NODE_CHILDREN_KEY]:
+      logging.warning('A container node used as symbol for %s.' % symbol_name)
+    # This is going to be used as a leaf so no use for child list.
+    del node[NODE_CHILDREN_KEY]
+  node[NODE_SYMBOL_SIZE_KEY] = symbol_size
+  node[NODE_SYMBOL_TYPE_KEY] = symbol_type
+  node[NODE_TYPE_KEY] = 's'  # s for symbol
+
+  return 2  # Depth of the added subtree.
+
+
+def MakeCompactTree(symbols, symbol_path_origin_dir):
+  result = {NODE_NAME_KEY: '/',
+            NODE_CHILDREN_KEY: {},
+            NODE_TYPE_KEY: 'p',
+            NODE_MAX_DEPTH_KEY: 0}
+  seen_symbol_with_path = False
+  for symbol_name, symbol_type, symbol_size, file_path in symbols:
+
+    if 'vtable for ' in symbol_name:
+      symbol_type = '@'  # hack to categorize these separately
+    if file_path and file_path != "??":
+      seen_symbol_with_path = True
+    else:
+      file_path = NAME_NO_PATH_BUCKET
+
+    path_parts = file_path.split('/')
+
+    # Find pre-existing node in tree, or update if it already exists
+    node = result
+    depth = 0
+    while len(path_parts) > 0:
+      path_part = path_parts.pop(0)
+      if len(path_part) == 0:
+        continue
+      depth += 1
+      node = _MkChild(node, path_part)
+      assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
+      node[NODE_TYPE_KEY] = 'p'  # p for path
+
+    depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
+    result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
+
+  if not seen_symbol_with_path:
+    logging.warning('Symbols lack paths. Data will not be structured.')
+
+  # The (no path) bucket can be extremely large if we failed to get
+  # path information. Split it into subgroups if needed.
+  SplitNoPathBucket(result)
+
+  largest_list_len = MakeChildrenDictsIntoLists(result)
+
+  if largest_list_len > BIG_BUCKET_LIMIT:
+    logging.warning('There are sections with %d nodes. '
+                    'Results might be unusable.' % largest_list_len)
+  return result
+
+
+# Skia added: summarizes tree size by symbol type for the given root node.
+# Returns a dict keyed by symbol type, and value the type's overall size.
+# e.g., {"t": 12345, "W": 543}.
+def GetTreeSizes(node):
+  if 'children' not in node or not node['children']:
+    return {node['t']: node['value']}
+  dic = {}
+  for i in node['children']:
+    for k, v in GetTreeSizes(i).items():
+      dic.setdefault(k, 0)
+      dic[k] += v
+
+  return dic
+
+
+# Skia added: creates dict to be converted to JSON in bench format.
+# See top of file for the structure description.
+def GetBenchDict(githash, tree_root):
+  dic = {'gitHash': githash,
+         'key': {'source_type': 'binarysize'},
+         'results': {},}
+  for i in tree_root['children']:
+    if '(No Path)' == i['n']:  # Already at symbol summary level.
+      for k, v in GetTreeSizes(i).items():
+        dic['results']['no_path_' + SYMBOL_MAP[k]] = {
+            'memory': {
+              'bytes': v,
+              'options': {'path': 'no_path',
+                          'symbol': SYMBOL_MAP[k],},}}
+    else:  # We need to go deeper.
+      for c in i['children']:
+        path = i['n'] + '_' + c['n']
+        for k, v in GetTreeSizes(c).items():
+          dic['results'][path + '_' + SYMBOL_MAP[k]] = {
+              'memory': {
+                'bytes': v,
+                'options': {'path': path,
+                            'symbol': SYMBOL_MAP[k],}}}
+
+  return dic
+
+
+# Skia added: constructs 'gsutil cp' subprocess command list.
+def GetGsCopyCommandList(gsutil, src, dst):
+  return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a',
+          'public-read', src, dst]
+
+
+def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil):
+  tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)
+  json_data = {'tree_data': tree_root,
+               'githash': ha,
+               'commit_ts': ts,
+               'key': {'source_type': 'binary_size'},
+               'total_size': sum(GetTreeSizes(tree_root).values()),}
+  tmpfile = tempfile.NamedTemporaryFile(delete=False).name
+  with open(tmpfile, 'w') as out:
+    # Use separators without whitespace to get a smaller file.
+    json.dump(json_data, out, separators=(',', ':'))
+
+  GS_PREFIX = 'gs://chromium-skia-gm/'
+  # Writes to Google Storage for visualization.
+  subprocess.check_call(GetGsCopyCommandList(
+      gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json'))
+  # Updates the latest data.
+  if not issue:
+    subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,
+                                               GS_PREFIX + 'size/latest.json'))
+  # Writes an extra copy using year/month/day/hour path for easy ingestion.
+  with open(tmpfile, 'w') as out:
+    json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':'))
+  now = datetime.datetime.utcnow()
+  ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4),
+                          str(now.month).zfill(2), str(now.day).zfill(2),
+                          str(now.hour).zfill(2)))
+  if issue:
+    ingest_path = '/'.join('trybot', ingest_path, issue)
+  subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,
+      GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json'))
+
+
+def MakeSourceMap(symbols):
+  sources = {}
+  for _sym, _symbol_type, size, path in symbols:
+    key = None
+    if path:
+      key = os.path.normpath(path)
+    else:
+      key = '[no path]'
+    if key not in sources:
+      sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
+    record = sources[key]
+    record['size'] += size
+    record['symbol_count'] += 1
+  return sources
+
+
+# Regex for parsing "nm" output. A sample line looks like this:
+# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
+#
+# The fields are: address, size, type, name, source location
+# Regular expression explained ( see also: https://xkcd.com/208 ):
+# ([0-9a-f]{8,}+)   The address
+# [\s]+             Whitespace separator
+# ([0-9a-f]{8,}+)   The size. From here on out it's all optional.
+# [\s]+             Whitespace separator
+# (\S?)             The symbol type, which is any non-whitespace char
+# [\s*]             Whitespace separator
+# ([^\t]*)          Symbol name, any non-tab character (spaces ok!)
+# [\t]?             Tab separator
+# (.*)              The location (filename[:linennum|?][ (discriminator n)]
+sNmPattern = re.compile(
+  r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
+
+class Progress():
+  def __init__(self):
+    self.count = 0
+    self.skip_count = 0
+    self.collisions = 0
+    self.time_last_output = time.time()
+    self.count_last_output = 0
+    self.disambiguations = 0
+    self.was_ambiguous = 0
+
+
+def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
+                     disambiguate, src_path):
+  nm_output = RunNm(library, nm_binary)
+  nm_output_lines = nm_output.splitlines()
+  nm_output_lines_len = len(nm_output_lines)
+  address_symbol = {}
+  progress = Progress()
+  def map_address_symbol(symbol, addr):
+    progress.count += 1
+    if addr in address_symbol:
+      # 'Collision between %s and %s.' % (str(symbol.name),
+      #                                   str(address_symbol[addr].name))
+      progress.collisions += 1
+    else:
+      if symbol.disambiguated:
+        progress.disambiguations += 1
+      if symbol.was_ambiguous:
+        progress.was_ambiguous += 1
+
+      address_symbol[addr] = symbol
+
+    progress_output()
+
+  def progress_output():
+    progress_chunk = 100
+    if progress.count % progress_chunk == 0:
+      time_now = time.time()
+      time_spent = time_now - progress.time_last_output
+      if time_spent > 1.0:
+        # Only output at most once per second.
+        progress.time_last_output = time_now
+        chunk_size = progress.count - progress.count_last_output
+        progress.count_last_output = progress.count
+        if time_spent > 0:
+          speed = chunk_size / time_spent
+        else:
+          speed = 0
+        progress_percent = (100.0 * (progress.count + progress.skip_count) /
+                            nm_output_lines_len)
+        disambiguation_percent = 0
+        if progress.disambiguations != 0:
+          disambiguation_percent = (100.0 * progress.disambiguations /
+                                    progress.was_ambiguous)
+
+        sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '
+              '%d disambiguations where %.1f%% succeeded)'
+              ' - %.1f lookups/s.' %
+              (progress_percent, progress.count, progress.collisions,
+               progress.disambiguations, disambiguation_percent, speed))
+
+  # In case disambiguation was disabled, we remove the source path (which upon
+  # being set signals the symbolizer to enable disambiguation)
+  if not disambiguate:
+    src_path = None
+  symbol_path_origin_dir = os.path.dirname(library)
+  # Skia specific.
+  symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')
+  symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
+                                            map_address_symbol,
+                                            max_concurrent_jobs=jobs,
+                                            source_root_path=src_path,
+                                            prefix_to_remove=symbol_path_prefix)
+  user_interrupted = False
+  try:
+    for line in nm_output_lines:
+      match = sNmPattern.match(line)
+      if match:
+        location = match.group(5)
+        if not location:
+          addr = int(match.group(1), 16)
+          size = int(match.group(2), 16)
+          if addr in address_symbol:  # Already looked up, shortcut
+                                      # ELFSymbolizer.
+            map_address_symbol(address_symbol[addr], addr)
+            continue
+          elif size == 0:
+            # Save time by not looking up empty symbols (do they even exist?)
+            print('Empty symbol: ' + line)
+          else:
+            symbolizer.SymbolizeAsync(addr, addr)
+            continue
+
+      progress.skip_count += 1
+  except KeyboardInterrupt:
+    user_interrupted = True
+    print('Interrupting - killing subprocesses. Please wait.')
+
+  try:
+    symbolizer.Join()
+  except KeyboardInterrupt:
+    # Don't want to abort here since we will be finished in a few seconds.
+    user_interrupted = True
+    print('Patience you must have my young padawan.')
+
+  print ''
+
+  if user_interrupted:
+    print('Skipping the rest of the file mapping. '
+          'Output will not be fully classified.')
+
+  symbol_path_origin_dir = os.path.dirname(library)
+  # Skia specific: path prefix to strip.
+  symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')
+
+  with open(outfile, 'w') as out:
+    for line in nm_output_lines:
+      match = sNmPattern.match(line)
+      if match:
+        location = match.group(5)
+        if not location:
+          addr = int(match.group(1), 16)
+          symbol = address_symbol.get(addr)
+          if symbol is not None:
+            path = '??'
+            if symbol.source_path is not None:
+              path = symbol.source_path.replace(symbol_path_prefix, '')
+            line_number = 0
+            if symbol.source_line is not None:
+              line_number = symbol.source_line
+            out.write('%s\t%s:%d\n' % (line, path, line_number))
+            continue
+
+      out.write('%s\n' % line)
+
+  print('%d symbols in the results.' % len(address_symbol))
+
+
+def RunNm(binary, nm_binary):
+  cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
+         binary]
+  nm_process = subprocess.Popen(cmd,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE)
+  (process_output, err_output) = nm_process.communicate()
+
+  if nm_process.returncode != 0:
+    if err_output:
+      raise Exception, err_output
+    else:
+      raise Exception, process_output
+
+  return process_output
+
+
+def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
+                 addr2line_binary, nm_binary, disambiguate, src_path):
+  if nm_infile is None:
+    if outfile is None:
+      outfile = tempfile.NamedTemporaryFile(delete=False).name
+
+    if verbose:
+      print 'Running parallel addr2line, dumping symbols to ' + outfile
+    RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
+                     disambiguate, src_path)
+
+    nm_infile = outfile
+
+  elif verbose:
+    print 'Using nm input from ' + nm_infile
+  with file(nm_infile, 'r') as infile:
+    return list(binary_size_utils.ParseNm(infile))
+
+
+PAK_RESOURCE_ID_TO_STRING = { "inited": False }
+
+def LoadPakIdsFromResourceFile(filename):
+  """Given a file name, it loads everything that looks like a resource id
+  into PAK_RESOURCE_ID_TO_STRING."""
+  with open(filename) as resource_header:
+    for line in resource_header:
+      if line.startswith("#define "):
+        line_data = line.split()
+        if len(line_data) == 3:
+          try:
+            resource_number = int(line_data[2])
+            resource_name = line_data[1]
+            PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name
+          except ValueError:
+            pass
+
+def GetReadablePakResourceName(pak_file, resource_id):
+  """Pak resources have a numeric identifier. It is not helpful when
+  trying to locate where footprint is generated. This does its best to
+  map the number to a usable string."""
+  if not PAK_RESOURCE_ID_TO_STRING['inited']:
+    # Try to find resource header files generated by grit when
+    # building the pak file. We'll look for files named *resources.h"
+    # and lines of the type:
+    #    #define MY_RESOURCE_JS 1234
+    PAK_RESOURCE_ID_TO_STRING['inited'] = True
+    gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')
+    if os.path.isdir(gen_dir):
+      for dirname, _dirs, files in os.walk(gen_dir):
+        for filename in files:
+          if filename.endswith('resources.h'):
+            LoadPakIdsFromResourceFile(os.path.join(dirname, filename))
+  return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
+                                       'Pak Resource %d' % resource_id)
+
+def AddPakData(symbols, pak_file):
+  """Adds pseudo-symbols from a pak file."""
+  pak_file = os.path.abspath(pak_file)
+  with open(pak_file, 'rb') as pak:
+    data = pak.read()
+
+  PAK_FILE_VERSION = 4
+  HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries)
+                             # and one uint8 (encoding of text resources)
+  INDEX_ENTRY_SIZE = 2 + 4  # Each entry is a uint16 and a uint32.
+  version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
+  assert version == PAK_FILE_VERSION, ('Unsupported pak file '
+                                       'version (%d) in %s. Only '
+                                       'support version %d' %
+                                       (version, pak_file, PAK_FILE_VERSION))
+  if num_entries > 0:
+    # Read the index and data.
+    data = data[HEADER_LENGTH:]
+    for _ in range(num_entries):
+      resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
+      data = data[INDEX_ENTRY_SIZE:]
+      _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
+      resource_size = next_offset - offset
+
+      symbol_name = GetReadablePakResourceName(pak_file, resource_id)
+      symbol_path = pak_file
+      symbol_type = 'd' # Data. Approximation.
+      symbol_size = resource_size
+      symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
+
+def _find_in_system_path(binary):
+  """Locate the full path to binary in the system path or return None
+  if not found."""
+  system_path = os.environ["PATH"].split(os.pathsep)
+  for path in system_path:
+    binary_path = os.path.join(path, binary)
+    if os.path.isfile(binary_path):
+      return binary_path
+  return None
+
+def CheckDebugFormatSupport(library, addr2line_binary):
+  """Kills the program if debug data is in an unsupported format.
+
+  There are two common versions of the DWARF debug formats and
+  since we are right now transitioning from DWARF2 to newer formats,
+  it's possible to have a mix of tools that are not compatible. Detect
+  that and abort rather than produce meaningless output."""
+  tool_output = subprocess.check_output([addr2line_binary, '--version'])
+  version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
+  parsed_output = version_re.match(tool_output)
+  major = int(parsed_output.group(1))
+  minor = int(parsed_output.group(2))
+  supports_dwarf4 = major > 2 or major == 2 and minor > 22
+
+  if supports_dwarf4:
+    return
+
+  print('Checking version of debug information in %s.' % library)
+  debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
+                                       '--dwarf-depth=1', library])
+  dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
+  parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
+  version = int(parsed_dwarf_format_output.group(1))
+  if version > 2:
+    print('The supplied tools only support DWARF2 debug data but the binary\n' +
+          'uses DWARF%d. Update the tools or compile the binary\n' % version +
+          'with -gdwarf-2.')
+    sys.exit(1)
+
+
+def main():
+  usage = """%prog [options]
+
+  Runs a spatial analysis on a given library, looking up the source locations
+  of its symbols and calculating how much space each directory, source file,
+  and so on is taking. The result is a report that can be used to pinpoint
+  sources of large portions of the binary, etceteras.
+
+  Under normal circumstances, you only need to pass two arguments, thusly:
+
+      %prog --library /path/to/library --destdir /path/to/output
+
+  In this mode, the program will dump the symbols from the specified library
+  and map those symbols back to source locations, producing a web-based
+  report in the specified output directory.
+
+  Other options are available via '--help'.
+  """
+  parser = optparse.OptionParser(usage=usage)
+  parser.add_option('--nm-in', metavar='PATH',
+                    help='if specified, use nm input from <path> instead of '
+                    'generating it. Note that source locations should be '
+                    'present in the file; i.e., no addr2line symbol lookups '
+                    'will be performed when this option is specified. '
+                    'Mutually exclusive with --library.')
+  parser.add_option('--destdir', metavar='PATH',
+                    help='write output to the specified directory. An HTML '
+                    'report is generated here along with supporting files; '
+                    'any existing report will be overwritten. Not used in '
+                    'Skia.')
+  parser.add_option('--library', metavar='PATH',
+                    help='if specified, process symbols in the library at '
+                    'the specified path. Mutually exclusive with --nm-in.')
+  parser.add_option('--pak', metavar='PATH',
+                    help='if specified, includes the contents of the '
+                    'specified *.pak file in the output.')
+  parser.add_option('--nm-binary',
+                    help='use the specified nm binary to analyze library. '
+                    'This is to be used when the nm in the path is not for '
+                    'the right architecture or of the right version.')
+  parser.add_option('--addr2line-binary',
+                    help='use the specified addr2line binary to analyze '
+                    'library. This is to be used when the addr2line in '
+                    'the path is not for the right architecture or '
+                    'of the right version.')
+  parser.add_option('--jobs', type='int',
+                    help='number of jobs to use for the parallel '
+                    'addr2line processing pool; defaults to 1. More '
+                    'jobs greatly improve throughput but eat RAM like '
+                    'popcorn, and take several gigabytes each. Start low '
+                    'and ramp this number up until your machine begins to '
+                    'struggle with RAM. '
+                    'This argument is only valid when using --library.')
+  parser.add_option('-v', dest='verbose', action='store_true',
+                    help='be verbose, printing lots of status information.')
+  parser.add_option('--nm-out', metavar='PATH',
+                    help='keep the nm output file, and store it at the '
+                    'specified path. This is useful if you want to see the '
+                    'fully processed nm output after the symbols have been '
+                    'mapped to source locations. By default, a tempfile is '
+                    'used and is deleted when the program terminates.'
+                    'This argument is only valid when using --library.')
+  parser.add_option('--legacy', action='store_true',
+                    help='emit legacy binary size report instead of modern')
+  parser.add_option('--disable-disambiguation', action='store_true',
+                    help='disables the disambiguation process altogether,'
+                    ' NOTE: this may, depending on your toolchain, produce'
+                    ' output with some symbols at the top layer if addr2line'
+                    ' could not get the entire source path.')
+  parser.add_option('--source-path', default='./',
+                    help='the path to the source code of the output binary, '
+                    'default set to current directory. Used in the'
+                    ' disambiguation process.')
+  parser.add_option('--githash', default='latest',
+                    help='Git hash for the binary version. Added by Skia.')
+  parser.add_option('--commit_ts', type='int', default=-1,
+                    help='Timestamp for the commit. Added by Skia.')
+  parser.add_option('--issue_number', default='',
+                    help='The trybot issue number in string. Added by Skia.')
+  parser.add_option('--gsutil_path', default='gsutil',
+                    help='Path to gsutil binary. Added by Skia.')
+  opts, _args = parser.parse_args()
+
+  if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
+    parser.error('exactly one of --library or --nm-in is required')
+  if (opts.nm_in):
+    if opts.jobs:
+      print >> sys.stderr, ('WARNING: --jobs has no effect '
+                            'when used with --nm-in')
+  if not opts.jobs:
+    # Use the number of processors but cap between 2 and 4 since raw
+    # CPU power isn't the limiting factor. It's I/O limited, memory
+    # bus limited and available-memory-limited. Too many processes and
+    # the computer will run out of memory and it will be slow.
+    opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
+
+  if opts.addr2line_binary:
+    assert os.path.isfile(opts.addr2line_binary)
+    addr2line_binary = opts.addr2line_binary
+  else:
+    addr2line_binary = _find_in_system_path('addr2line')
+    assert addr2line_binary, 'Unable to find addr2line in the path. '\
+        'Use --addr2line-binary to specify location.'
+
+  if opts.nm_binary:
+    assert os.path.isfile(opts.nm_binary)
+    nm_binary = opts.nm_binary
+  else:
+    nm_binary = _find_in_system_path('nm')
+    assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
+        'to specify location.'
+
+  if opts.pak:
+    assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak
+
+  print('addr2line: %s' % addr2line_binary)
+  print('nm: %s' % nm_binary)
+
+  if opts.library:
+    CheckDebugFormatSupport(opts.library, addr2line_binary)
+
+  symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
+                         opts.jobs, opts.verbose is True,
+                         addr2line_binary, nm_binary,
+                         opts.disable_disambiguation is None,
+                         opts.source_path)
+
+  if opts.pak:
+    AddPakData(symbols, opts.pak)
+
+  if opts.legacy: # legacy report
+    print 'Do Not set legacy flag.'
+
+  else: # modern report
+    if opts.library:
+      symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
+    else:
+      # Just a guess. Hopefully all paths in the input file are absolute.
+      symbol_path_origin_dir = os.path.abspath(os.getcwd())
+    DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash,
+                    opts.commit_ts, opts.issue_number, opts.gsutil_path)
+    print 'Report data uploaded to GS.'
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/infra/bots/recipe_modules/core/resources/upload_bench_results.py b/infra/bots/recipe_modules/core/resources/upload_bench_results.py
new file mode 100755
index 0000000000..25cfcc2631
--- /dev/null
+++ b/infra/bots/recipe_modules/core/resources/upload_bench_results.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+""" Upload benchmark performance data results. """
+
+import gzip
+import os
+import os.path
+import re
+import subprocess
+import sys
+import tempfile
+
+from datetime import datetime
+
+
+def _UploadJSONResults(builder_name, build_number, dest_gsbase, gs_subdir,
+                       full_json_path, gzipped=True, gsutil_path='gsutil',
+                       issue_number=None):
+  now = datetime.utcnow()
+  gs_json_path = '/'.join((str(now.year).zfill(4), str(now.month).zfill(2),
+                           str(now.day).zfill(2), str(now.hour).zfill(2)))
+  gs_dir = '/'.join((gs_subdir, gs_json_path, builder_name))
+  if builder_name.endswith('-Trybot'):
+    if not issue_number:
+      raise Exception('issue_number build property is missing!')
+    gs_dir = '/'.join(('trybot', gs_dir, build_number, issue_number))
+  full_path_to_upload = full_json_path
+  file_to_upload = os.path.basename(full_path_to_upload)
+  http_header = ['Content-Type:application/json']
+  if gzipped:
+    http_header.append('Content-Encoding:gzip')
+    gzipped_file = os.path.join(tempfile.gettempdir(), file_to_upload)
+    # Apply gzip.
+    with open(full_path_to_upload, 'rb') as f_in:
+      with gzip.open(gzipped_file, 'wb') as f_out:
+        f_out.writelines(f_in)
+    full_path_to_upload = gzipped_file
+  cmd = ['python', gsutil_path]
+  for header in http_header:
+    cmd.extend(['-h', header])
+  cmd.extend(['cp', '-a', 'public-read', full_path_to_upload,
+              '/'.join((dest_gsbase, gs_dir, file_to_upload))])
+  print ' '.join(cmd)
+  subprocess.check_call(cmd)
+
+
+def main(builder_name, build_number, perf_data_dir, got_revision, gsutil_path,
+         issue_number=None):
+  """Uploads gzipped nanobench JSON data."""
+  # Find the nanobench JSON
+  file_list = os.listdir(perf_data_dir)
+  RE_FILE_SEARCH = re.compile(
+      'nanobench_({})_[0-9]+\.json'.format(got_revision))
+  nanobench_name = None
+
+  for file_name in file_list:
+    if RE_FILE_SEARCH.search(file_name):
+      nanobench_name = file_name
+      break
+
+  if nanobench_name:
+    dest_gsbase = 'gs://skia-perf'
+    nanobench_json_file = os.path.join(perf_data_dir,
+                                       nanobench_name)
+    _UploadJSONResults(builder_name, build_number, dest_gsbase, 'nano-json-v1',
+                       nanobench_json_file, gsutil_path=gsutil_path,
+                       issue_number=issue_number)
+
+
+if __name__ == '__main__':
+  main(*sys.argv[1:])
+
diff --git a/infra/bots/recipe_modules/core/resources/upload_dm_results.py b/infra/bots/recipe_modules/core/resources/upload_dm_results.py
new file mode 100755
index 0000000000..1bee64fb78
--- /dev/null
+++ b/infra/bots/recipe_modules/core/resources/upload_dm_results.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Upload DM output PNG files and JSON summary to Google Storage."""
+
+import datetime
+import json
+import os
+import shutil
+import sys
+import tempfile
+
+def main(dm_dir, git_hash, builder_name, build_number, try_issue, import_path):
+  """Upload DM output PNG files and JSON summary to Google Storage.
+
+    dm_dir:        path to PNG files and JSON summary    (str)
+    git_hash:      this build's Git hash                 (str)
+    builder_name:  name of this builder                  (str)
+    build_number:  nth build on this builder             (str or int)
+    try_issue:     Rietveld issue if this is a try job   (str, int, or None)
+    import_path:   Path to import the gs_utils package   (str)
+  """
+  # import gs_utils
+  sys.path.insert(0, import_path)
+  import gs_utils
+
+  # Private, but Google-readable.
+  ACL = gs_utils.GSUtils.PredefinedACL.PRIVATE
+  FINE_ACLS = [(
+    gs_utils.GSUtils.IdType.GROUP_BY_DOMAIN,
+    'google.com',
+    gs_utils.GSUtils.Permission.READ
+  )]
+
+  # Move dm.json and verbose.log to their own directory for easy upload.
+  tmp = tempfile.mkdtemp()
+  shutil.move(os.path.join(dm_dir, 'dm.json'),
+              os.path.join(tmp,    'dm.json'))
+  shutil.move(os.path.join(dm_dir, 'verbose.log'),
+              os.path.join(tmp,    'verbose.log'))
+
+  # Make sure the JSON file parses correctly.
+  json_file_name = os.path.join(tmp, 'dm.json')
+  with open(json_file_name) as jsonFile:
+    try:
+      json.load(jsonFile)
+    except ValueError:
+      json_content = open(json_file_name).read()
+      print >> sys.stderr, "Invalid JSON: \n\n%s\n" % json_content
+      raise
+
+  # Only images are left in dm_dir.  Upload any new ones.
+  gs = gs_utils.GSUtils()
+  bucket, image_dest_dir = 'chromium-skia-gm', 'dm-images-v1'
+  print 'Uploading images to gs://' + bucket + '/' + image_dest_dir
+  gs.upload_dir_contents(dm_dir,
+                         bucket,
+                         image_dest_dir,
+                         upload_if = gs.UploadIf.ALWAYS,
+                         predefined_acl = ACL,
+                         fine_grained_acl_list = FINE_ACLS)
+
+
+  # /dm-json-v1/year/month/day/hour/git-hash/builder/build-number/dm.json
+  now = datetime.datetime.utcnow()
+  summary_dest_dir = '/'.join(['dm-json-v1',
+                               str(now.year ).zfill(4),
+                               str(now.month).zfill(2),
+                               str(now.day  ).zfill(2),
+                               str(now.hour ).zfill(2),
+                               git_hash,
+                               builder_name,
+                               str(build_number)])
+
+  # Trybot results are further siloed by CL.
+  if try_issue:
+    summary_dest_dir = '/'.join(['trybot', summary_dest_dir, str(try_issue)])
+
+  # Upload the JSON summary and verbose.log.
+  print 'Uploading logs to gs://' + bucket + '/' + summary_dest_dir
+  gs.upload_dir_contents(tmp,
+                         bucket,
+                         summary_dest_dir,
+                         predefined_acl = ACL,
+                         fine_grained_acl_list = FINE_ACLS)
+
+
+  # Just for hygiene, put dm.json and verbose.log back.
+  shutil.move(os.path.join(tmp,    'dm.json'),
+              os.path.join(dm_dir, 'dm.json'))
+  shutil.move(os.path.join(tmp,    'verbose.log'),
+              os.path.join(dm_dir, 'verbose.log'))
+  os.rmdir(tmp)
+
+if '__main__' == __name__:
+  main(*sys.argv[1:])
author	borenet <borenet@chromium.org>	2016-08-03 08:23:10 -0700
committer	Commit bot <commit-bot@chromium.org>	2016-08-03 08:23:10 -0700
commit	1436a09e1fc3be4655af960d4ffb31066bfe4cdd (patch)
tree	e0f37f3938b2bf1dcd7a432943343839e994714c /infra/bots/recipe_modules/core/resources
parent	7a9f3766aa07f69323f37670a7aeb13605f266bb (diff)