1 files changed, 542 insertions, 0 deletions
diff --git a/infra/bots/recipe_modules/flavor/android.py b/infra/bots/recipe_modules/flavor/android.py
new file mode 100644
index 0000000000..ed8d9af2d4
--- /dev/null
+++ b/infra/bots/recipe_modules/flavor/android.py
@@ -0,0 +1,542 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+from recipe_engine import recipe_api
+
+from . import default
+import subprocess  # TODO(borenet): No! Remove this.
+
+
+"""Android flavor, used for running code on Android."""
+
+
+class AndroidFlavor(default.DefaultFlavor):
+  def __init__(self, m):
+    super(AndroidFlavor, self).__init__(m)
+    self._ever_ran_adb = False
+    self.ADB_BINARY = '/usr/bin/adb.1.0.35'
+    self.ADB_PUB_KEY = '/home/chrome-bot/.android/adbkey'
+    self._golo_devices = ['Nexus5x']
+    if self.m.vars.builder_cfg.get('model') in self._golo_devices:
+      self.ADB_BINARY = '/opt/infra-android/tools/adb'
+      self.ADB_PUB_KEY = ('/home/chrome-bot/.android/'
+                          'chrome_infrastructure_adbkey')
+
+    # Data should go in android_data_dir, which may be preserved across runs.
+    android_data_dir = '/sdcard/revenge_of_the_skiabot/'
+    self.device_dirs = default.DeviceDirs(
+        bin_dir       = '/data/local/tmp/',
+        dm_dir        = android_data_dir + 'dm_out',
+        perf_data_dir = android_data_dir + 'perf',
+        resource_dir  = android_data_dir + 'resources',
+        images_dir    = android_data_dir + 'images',
+        skp_dir       = android_data_dir + 'skps',
+        svg_dir       = android_data_dir + 'svgs',
+        tmp_dir       = android_data_dir)
+
+    # A list of devices we can't root.  If rooting fails and a device is not
+    # on the list, we fail the task to avoid perf inconsistencies.
+    self.rootable_blacklist = ['GalaxyS6', 'GalaxyS7_G930A', 'GalaxyS7_G930FD',
+                               'MotoG4', 'NVIDIA_Shield']
+
+    # Maps device type -> CPU ids that should be scaled for nanobench.
+    # Many devices have two (or more) different CPUs (e.g. big.LITTLE
+    # on Nexus5x). The CPUs listed are the biggest cpus on the device.
+    # The CPUs are grouped together, so we only need to scale one of them
+    # (the one listed) in order to scale them all.
+    # E.g. Nexus5x has cpu0-3 as one chip and cpu4-5 as the other. Thus,
+    # if one wants to run a single-threaded application (e.g. nanobench), one
+    # can disable cpu0-3 and scale cpu 4 to have only cpu4 and 5 at the same
+    # frequency.  See also disable_for_nanobench.
+    self.cpus_to_scale = {
+      'Nexus5x': [4],
+      'NexusPlayer': [0, 2], # has 2 identical chips, so scale them both.
+      'Pixel': [2],
+      'Pixel2XL': [4]
+    }
+
+    # Maps device type -> CPU ids that should be turned off when running
+    # single-threaded applications like nanobench. The devices listed have
+    # multiple, differnt CPUs. We notice a lot of noise that seems to be
+    # caused by nanobench running on the slow CPU, then the big CPU. By
+    # disabling this, we see less of that noise by forcing the same CPU
+    # to be used for the performance testing every time.
+    self.disable_for_nanobench = {
+      'Nexus5x': range(0, 4),
+      'Pixel': range(0, 2),
+      'Pixel2XL': range(0, 4),
+      'PixelC': range(0, 2)
+    }
+
+    self.gpu_scaling = {
+      "Nexus5":  450000000,
+      "Nexus5x": 600000000,
+    }
+
+  def _run(self, title, *cmd, **kwargs):
+    with self.m.context(cwd=self.m.path['start_dir'].join('skia')):
+      return self.m.run(self.m.step, title, cmd=list(cmd), **kwargs)
+
+  def _adb(self, title, *cmd, **kwargs):
+    # The only non-infra adb steps (dm / nanobench) happen to not use _adb().
+    if 'infra_step' not in kwargs:
+      kwargs['infra_step'] = True
+
+    self._ever_ran_adb = True
+    attempts = 1
+    flaky_devices = ['NexusPlayer', 'PixelC']
+    if self.m.vars.builder_cfg.get('model') in flaky_devices:
+      attempts = 3
+
+    def wait_for_device(attempt):
+      self.m.run(self.m.step,
+                 'kill adb server after failure of \'%s\' (attempt %d)' % (
+                     title, attempt),
+                 cmd=[self.ADB_BINARY, 'kill-server'],
+                 infra_step=True, timeout=30, abort_on_failure=False,
+                 fail_build_on_failure=False)
+      self.m.run(self.m.step,
+                 'wait for device after failure of \'%s\' (attempt %d)' % (
+                     title, attempt),
+                 cmd=[self.ADB_BINARY, 'wait-for-device'], infra_step=True,
+                 timeout=180, abort_on_failure=False,
+                 fail_build_on_failure=False)
+
+    with self.m.context(cwd=self.m.path['start_dir'].join('skia')):
+      with self.m.env({'ADB_VENDOR_KEYS': self.ADB_PUB_KEY}):
+        return self.m.run.with_retry(self.m.step, title, attempts,
+                                     cmd=[self.ADB_BINARY]+list(cmd),
+                                     between_attempts_fn=wait_for_device,
+                                     **kwargs)
+
+  def _scale_for_dm(self):
+    device = self.m.vars.builder_cfg.get('model')
+    if (device in self.rootable_blacklist or
+        self.m.vars.internal_hardware_label):
+      return
+
+    # This is paranoia... any CPUs we disabled while running nanobench
+    # ought to be back online now that we've restarted the device.
+    for i in self.disable_for_nanobench.get(device, []):
+      self._set_cpu_online(i, 1) # enable
+
+    scale_up = self.cpus_to_scale.get(device, [0])
+    # For big.LITTLE devices, make sure we scale the LITTLE cores up;
+    # there is a chance they are still in powersave mode from when
+    # swarming slows things down for cooling down and charging.
+    if 0 not in scale_up:
+      scale_up.append(0)
+    for i in scale_up:
+      # AndroidOne doesn't support ondemand governor. hotplug is similar.
+      if device == 'AndroidOne':
+        self._set_governor(i, 'hotplug')
+      else:
+        self._set_governor(i, 'ondemand')
+
+  def _scale_for_nanobench(self):
+    device = self.m.vars.builder_cfg.get('model')
+    if (device in self.rootable_blacklist or
+      self.m.vars.internal_hardware_label):
+      return
+
+    for i in self.cpus_to_scale.get(device, [0]):
+      self._set_governor(i, 'userspace')
+      self._scale_cpu(i, 0.6)
+
+    for i in self.disable_for_nanobench.get(device, []):
+      self._set_cpu_online(i, 0) # disable
+
+    if device in self.gpu_scaling:
+      #https://developer.qualcomm.com/qfile/28823/lm80-p0436-11_adb_commands.pdf
+      # Section 3.2.1 Commands to put the GPU in performance mode
+      # Nexus 5 is  320000000 by default
+      # Nexus 5x is 180000000 by default
+      gpu_freq = self.gpu_scaling[device]
+      self.m.run.with_retry(self.m.python.inline,
+        "Lock GPU to %d (and other perf tweaks)" % gpu_freq,
+        3, # attempts
+        program="""
+import os
+import subprocess
+import sys
+import time
+ADB = sys.argv[1]
+freq = sys.argv[2]
+idle_timer = "10000"
+
+log = subprocess.check_output([ADB, 'root'])
+# check for message like 'adbd cannot run as root in production builds'
+print log
+if 'cannot' in log:
+  raise Exception('adb root failed')
+
+subprocess.check_output([ADB, 'shell', 'stop', 'thermald'])
+
+subprocess.check_output([ADB, 'shell', 'echo "%s" > '
+    '/sys/class/kgsl/kgsl-3d0/gpuclk' % freq])
+
+actual_freq = subprocess.check_output([ADB, 'shell', 'cat '
+    '/sys/class/kgsl/kgsl-3d0/gpuclk']).strip()
+if actual_freq != freq:
+  raise Exception('Frequency (actual, expected) (%s, %s)'
+                  % (actual_freq, freq))
+
+subprocess.check_output([ADB, 'shell', 'echo "%s" > '
+    '/sys/class/kgsl/kgsl-3d0/idle_timer' % idle_timer])
+
+actual_timer = subprocess.check_output([ADB, 'shell', 'cat '
+    '/sys/class/kgsl/kgsl-3d0/idle_timer']).strip()
+if actual_timer != idle_timer:
+  raise Exception('idle_timer (actual, expected) (%s, %s)'
+                  % (actual_timer, idle_timer))
+
+for s in ['force_bus_on', 'force_rail_on', 'force_clk_on']:
+  subprocess.check_output([ADB, 'shell', 'echo "1" > '
+      '/sys/class/kgsl/kgsl-3d0/%s' % s])
+  actual_set = subprocess.check_output([ADB, 'shell', 'cat '
+      '/sys/class/kgsl/kgsl-3d0/%s' % s]).strip()
+  if actual_set != "1":
+    raise Exception('%s (actual, expected) (%s, 1)'
+                    % (s, actual_set))
+""",
+        args = [self.ADB_BINARY, gpu_freq],
+        infra_step=True,
+        timeout=30)
+
+  def _set_governor(self, cpu, gov):
+    self._ever_ran_adb = True
+    self.m.run.with_retry(self.m.python.inline,
+        "Set CPU %d's governor to %s" % (cpu, gov),
+        3, # attempts
+        program="""
+import os
+import subprocess
+import sys
+import time
+ADB = sys.argv[1]
+cpu = int(sys.argv[2])
+gov = sys.argv[3]
+
+log = subprocess.check_output([ADB, 'root'])
+# check for message like 'adbd cannot run as root in production builds'
+print log
+if 'cannot' in log:
+  raise Exception('adb root failed')
+
+subprocess.check_output([ADB, 'shell', 'echo "%s" > '
+    '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor' % (gov, cpu)])
+actual_gov = subprocess.check_output([ADB, 'shell', 'cat '
+    '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor' % cpu]).strip()
+if actual_gov != gov:
+  raise Exception('(actual, expected) (%s, %s)'
+                  % (actual_gov, gov))
+""",
+        args = [self.ADB_BINARY, cpu, gov],
+        infra_step=True,
+        timeout=30)
+
+
+  def _set_cpu_online(self, cpu, value):
+    """Set /sys/devices/system/cpu/cpu{N}/online to value (0 or 1)."""
+    self._ever_ran_adb = True
+    msg = 'Disabling'
+    if value:
+      msg = 'Enabling'
+    self.m.run.with_retry(self.m.python.inline,
+        '%s CPU %d' % (msg, cpu),
+        3, # attempts
+        program="""
+import os
+import subprocess
+import sys
+import time
+ADB = sys.argv[1]
+cpu = int(sys.argv[2])
+value = int(sys.argv[3])
+
+log = subprocess.check_output([ADB, 'root'])
+# check for message like 'adbd cannot run as root in production builds'
+print log
+if 'cannot' in log:
+  raise Exception('adb root failed')
+
+# If we try to echo 1 to an already online cpu, adb returns exit code 1.
+# So, check the value before trying to write it.
+prior_status = subprocess.check_output([ADB, 'shell', 'cat '
+    '/sys/devices/system/cpu/cpu%d/online' % cpu]).strip()
+if prior_status == str(value):
+  print 'CPU %d online already %d' % (cpu, value)
+  sys.exit()
+
+subprocess.check_output([ADB, 'shell', 'echo %s > '
+    '/sys/devices/system/cpu/cpu%d/online' % (value, cpu)])
+actual_status = subprocess.check_output([ADB, 'shell', 'cat '
+    '/sys/devices/system/cpu/cpu%d/online' % cpu]).strip()
+if actual_status != str(value):
+  raise Exception('(actual, expected) (%s, %d)'
+                  % (actual_status, value))
+""",
+        args = [self.ADB_BINARY, cpu, value],
+        infra_step=True,
+        timeout=30)
+
+
+  def _scale_cpu(self, cpu, target_percent):
+    self._ever_ran_adb = True
+    self.m.run.with_retry(self.m.python.inline,
+        'Scale CPU %d to %f' % (cpu, target_percent),
+        3, # attempts
+        program="""
+import os
+import subprocess
+import sys
+import time
+ADB = sys.argv[1]
+target_percent = float(sys.argv[2])
+cpu = int(sys.argv[3])
+log = subprocess.check_output([ADB, 'root'])
+# check for message like 'adbd cannot run as root in production builds'
+print log
+if 'cannot' in log:
+  raise Exception('adb root failed')
+
+root = '/sys/devices/system/cpu/cpu%d/cpufreq' %cpu
+
+# All devices we test on give a list of their available frequencies.
+available_freqs = subprocess.check_output([ADB, 'shell',
+    'cat %s/scaling_available_frequencies' % root])
+
+# Check for message like '/system/bin/sh: file not found'
+if available_freqs and '/system/bin/sh' not in available_freqs:
+  available_freqs = sorted(
+      int(i) for i in available_freqs.strip().split())
+else:
+  raise Exception('Could not get list of available frequencies: %s' %
+                  available_freqs)
+
+maxfreq = available_freqs[-1]
+target = int(round(maxfreq * target_percent))
+freq = maxfreq
+for f in reversed(available_freqs):
+  if f <= target:
+    freq = f
+    break
+
+print 'Setting frequency to %d' % freq
+
+# If scaling_max_freq is lower than our attempted setting, it won't take.
+# We must set min first, because if we try to set max to be less than min
+# (which sometimes happens after certain devices reboot) it returns a
+# perplexing permissions error.
+subprocess.check_output([ADB, 'shell', 'echo 0 > '
+    '%s/scaling_min_freq' % root])
+subprocess.check_output([ADB, 'shell', 'echo %d > '
+    '%s/scaling_max_freq' % (freq, root)])
+subprocess.check_output([ADB, 'shell', 'echo %d > '
+    '%s/scaling_setspeed' % (freq, root)])
+time.sleep(5)
+actual_freq = subprocess.check_output([ADB, 'shell', 'cat '
+    '%s/scaling_cur_freq' % root]).strip()
+if actual_freq != str(freq):
+  raise Exception('(actual, expected) (%s, %d)'
+                  % (actual_freq, freq))
+""",
+        args = [self.ADB_BINARY, str(target_percent), cpu],
+        infra_step=True,
+        timeout=30)
+
+  def install(self):
+    self._adb('mkdir ' + self.device_dirs.resource_dir,
+              'shell', 'mkdir', '-p', self.device_dirs.resource_dir)
+    if 'ASAN' in self.m.vars.extra_tokens:
+      asan_setup = self.m.vars.slave_dir.join(
+            'android_ndk_linux', 'toolchains', 'llvm', 'prebuilt',
+            'linux-x86_64', 'lib64', 'clang', '6.0.2', 'bin',
+            'asan_device_setup')
+      self.m.run(self.m.python.inline, 'Setting up device to run ASAN',
+        program="""
+import os
+import subprocess
+import sys
+import time
+ADB = sys.argv[1]
+ASAN_SETUP = sys.argv[2]
+
+def wait_for_device():
+  while True:
+    time.sleep(5)
+    print 'Waiting for device'
+    subprocess.check_output([ADB, 'wait-for-device'])
+    bit1 = subprocess.check_output([ADB, 'shell', 'getprop',
+                                   'dev.bootcomplete'])
+    bit2 = subprocess.check_output([ADB, 'shell', 'getprop',
+                                   'sys.boot_completed'])
+    if '1' in bit1 and '1' in bit2:
+      print 'Device detected'
+      break
+
+log = subprocess.check_output([ADB, 'root'])
+# check for message like 'adbd cannot run as root in production builds'
+print log
+if 'cannot' in log:
+  raise Exception('adb root failed')
+
+output = subprocess.check_output([ADB, 'disable-verity'])
+print output
+
+if 'already disabled' not in output:
+  print 'Rebooting device'
+  subprocess.check_output([ADB, 'reboot'])
+  wait_for_device()
+
+def installASAN(revert=False):
+  # ASAN setup script is idempotent, either it installs it or
+  # says it's installed.  Returns True on success, false otherwise.
+  out = subprocess.check_output([ADB, 'wait-for-device'])
+  print out
+  cmd = [ASAN_SETUP]
+  if revert:
+    cmd = [ASAN_SETUP, '--revert']
+  process = subprocess.Popen(cmd, env={'ADB': ADB},
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+  # this also blocks until command finishes
+  (stdout, stderr) = process.communicate()
+  print stdout
+  print 'Stderr: %s' % stderr
+  return process.returncode == 0
+
+if not installASAN():
+  print 'Trying to revert the ASAN install and then re-install'
+  # ASAN script sometimes has issues if it was interrupted or partially applied
+  # Try reverting it, then re-enabling it
+  if not installASAN(revert=True):
+    raise Exception('reverting ASAN install failed')
+
+  # Sleep because device does not reboot instantly
+  time.sleep(10)
+
+  if not installASAN():
+    raise Exception('Tried twice to setup ASAN and failed.')
+
+# Sleep because device does not reboot instantly
+time.sleep(10)
+wait_for_device()
+""",
+        args = [self.ADB_BINARY, asan_setup],
+          infra_step=True,
+          timeout=300,
+          abort_on_failure=True)
+
+  def cleanup_steps(self):
+    if self._ever_ran_adb:
+      self.m.run(self.m.python.inline, 'dump log', program="""
+          import os
+          import subprocess
+          import sys
+          out = sys.argv[1]
+          log = subprocess.check_output(['%s', 'logcat', '-d'])
+          for line in log.split('\\n'):
+            tokens = line.split()
+            if len(tokens) == 11 and tokens[-7] == 'F' and tokens[-3] == 'pc':
+              addr, path = tokens[-2:]
+              local = os.path.join(out, os.path.basename(path))
+              if os.path.exists(local):
+                sym = subprocess.check_output(['addr2line', '-Cfpe', local, addr])
+                line = line.replace(addr, addr + ' ' + sym.strip())
+            print line
+          """ % self.ADB_BINARY,
+          args=[self.m.vars.skia_out],
+          infra_step=True,
+          timeout=300,
+          abort_on_failure=False)
+
+    # Only quarantine the bot if the first failed step
+    # is an infra step. If, instead, we did this for any infra failures, we
+    # would do this too much. For example, if a Nexus 10 died during dm
+    # and the following pull step would also fail "device not found" - causing
+    # us to run the shutdown command when the device was probably not in a
+    # broken state; it was just rebooting.
+    if (self.m.run.failed_steps and
+        isinstance(self.m.run.failed_steps[0], recipe_api.InfraFailure)):
+      bot_id = self.m.vars.swarming_bot_id
+      self.m.file.write_text('Quarantining Bot',
+                             '/home/chrome-bot/%s.force_quarantine' % bot_id,
+                             ' ')
+
+    if self._ever_ran_adb:
+      self._adb('kill adb server', 'kill-server')
+
+  def step(self, name, cmd, **kwargs):
+    if (cmd[0] == 'nanobench'):
+      self._scale_for_nanobench()
+    else:
+      self._scale_for_dm()
+    app = self.m.vars.skia_out.join(cmd[0])
+    self._adb('push %s' % cmd[0],
+              'push', app, self.device_dirs.bin_dir)
+
+    sh = '%s.sh' % cmd[0]
+    self.m.run.writefile(self.m.vars.tmp_dir.join(sh),
+        'set -x; %s%s; echo $? >%src' % (
+            self.device_dirs.bin_dir, subprocess.list2cmdline(map(str, cmd)),
+            self.device_dirs.bin_dir))
+    self._adb('push %s' % sh,
+              'push', self.m.vars.tmp_dir.join(sh), self.device_dirs.bin_dir)
+
+    self._adb('clear log', 'logcat', '-c')
+    self.m.python.inline('%s' % cmd[0], """
+    import subprocess
+    import sys
+    bin_dir = sys.argv[1]
+    sh      = sys.argv[2]
+    subprocess.check_call(['%s', 'shell', 'sh', bin_dir + sh])
+    try:
+      sys.exit(int(subprocess.check_output(['%s', 'shell', 'cat',
+                                            bin_dir + 'rc'])))
+    except ValueError:
+      print "Couldn't read the return code.  Probably killed for OOM."
+      sys.exit(1)
+    """ % (self.ADB_BINARY, self.ADB_BINARY),
+      args=[self.device_dirs.bin_dir, sh])
+
+  def copy_file_to_device(self, host, device):
+    self._adb('push %s %s' % (host, device), 'push', host, device)
+
+  def copy_directory_contents_to_device(self, host, device):
+    # Copy the tree, avoiding hidden directories and resolving symlinks.
+    self.m.run(self.m.python.inline, 'push %s/* %s' % (host, device),
+               program="""
+    import os
+    import subprocess
+    import sys
+    host   = sys.argv[1]
+    device = sys.argv[2]
+    for d, _, fs in os.walk(host):
+      p = os.path.relpath(d, host)
+      if p != '.' and p.startswith('.'):
+        continue
+      for f in fs:
+        print os.path.join(p,f)
+        subprocess.check_call(['%s', 'push',
+                               os.path.realpath(os.path.join(host, p, f)),
+                               os.path.join(device, p, f)])
+    """ % self.ADB_BINARY, args=[host, device], infra_step=True)
+
+  def copy_directory_contents_to_host(self, device, host):
+    self._adb('pull %s %s' % (device, host), 'pull', device, host)
+
+  def read_file_on_device(self, path, **kwargs):
+    rv = self._adb('read %s' % path,
+                   'shell', 'cat', path, stdout=self.m.raw_io.output(),
+                   **kwargs)
+    return rv.stdout.rstrip() if rv and rv.stdout else None
+
+  def remove_file_on_device(self, path):
+    self._adb('rm %s' % path, 'shell', 'rm', '-f', path)
+
+  def create_clean_device_dir(self, path):
+    self._adb('rm %s' % path, 'shell', 'rm', '-rf', path)
+    self._adb('mkdir %s' % path, 'shell', 'mkdir', '-p', path)