#!/usr/bin/env python3
# Copyright 2019 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
"""Use git bisect to find the Clang/LLVM commit causing a regression."""

import logging
import os
import re
import shutil
import subprocess
import sys


def execute(command, *args, expect_zero=True, **kwargs):
  """Execute |command| and return the returncode, stdout and stderr."""
  kwargs['stdout'] = subprocess.PIPE
  kwargs['stderr'] = subprocess.PIPE
  logging.debug('Running command: "%s"', str(command))
  process = subprocess.Popen(command, *args, **kwargs)
  stdout, stderr = process.communicate()
  stdout = stdout.decode('utf-8')
  stderr = stderr.decode('utf-8')
  retcode = process.returncode
  logging.info('Command: "%s" returned: %d.\nStdout: %s.\nStderr: %s',
               str(command), retcode, stdout, stderr)
  if expect_zero and retcode != 0:
    raise subprocess.CalledProcessError(retcode, command)
  return retcode, stdout, stderr


def search_bisect_output(output):
  """Search |output| for a message indicating the culprit commit has been
  found."""
  # TODO(metzman): Is it necessary to look for "good"?
  culprit_regex = re.compile('([a-z0-9]{40}) is the first (good|bad) commit')
  match = re.match(culprit_regex, output)
  return match.group(1) if match is not None else None


class GitRepo:
  """Class for executing commmands on a git repo."""

  def __init__(self, repo_dir):
    self.repo_dir = repo_dir

  def do_command(self, git_subcommand):
    """Execute a |git_subcommand| (a list of strings)."""
    command = ['git', '-C', self.repo_dir] + git_subcommand
    return execute(command)

  def test_commit(self, test_command):
    """Build LLVM at the currently checkedout commit, then run |test_command|.
    If returncode is 0 run 'git bisect good' otherwise return 'git bisect bad'.
    Return None if bisect didn't finish yet. Return the culprit commit if it
    does."""
    build_clang(self.repo_dir)
    retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
    if retcode == 0:
      retcode, stdout, _ = self.do_bisect_command('good')
    else:
      retcode, stdout, _ = self.do_bisect_command('bad')
    return search_bisect_output(stdout)

  def bisect(self, good_commit, bad_commit, test_command):
    """Do git bisect assuming |good_commit| is good, |bad_commit| is bad and
    |test_command| is an oracle. Return the culprit commit."""
    self.bisect_start(good_commit, bad_commit, test_command)
    result = self.test_commit(test_command)
    while result is None:
      result = self.test_commit(test_command)
    return result

  def bisect_start(self, good_commit, bad_commit, test_command):
    """Start doing git bisect."""
    self.do_bisect_command('start')
    # Do bad commit first since it is more likely to be recent.
    self.test_start_commit(bad_commit, 'bad', test_command)
    self.test_start_commit(good_commit, 'good', test_command)

  def do_bisect_command(self, subcommand):
    """Execute a git bisect |subcommand| (string) and return the result."""
    return self.do_command(['bisect', subcommand])

  def test_start_commit(self, commit, label, test_command):
    """Use |test_command| to test the first good or bad |commit| (depending on
    |label|)."""
    assert label in ('good', 'bad'), label
    self.do_command(['checkout', commit])
    build_clang(self.repo_dir)
    retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
    if label == 'good' and retcode != 0:
      raise BisectError('Test command "%s" returns %d on first good commit %s' %
                        (test_command, retcode, commit))
    if label == 'bad' and retcode == 0:
      raise BisectError('Test command "%s" returns %d on first bad commit %s' %
                        (test_command, retcode, commit))

    self.do_bisect_command(label)


class BisectError(Exception):
  """Error that was encountered during bisection."""


def get_clang_build_env():
  """Get an environment for building Clang."""
  env = os.environ.copy()
  for variable in ['CXXFLAGS', 'CFLAGS']:
    if variable in env:
      del env[variable]
  return env


def install_clang_build_deps():
  """Instal dependencies necessary to build clang."""
  execute([
      'apt-get', 'install', '-y', 'build-essential', 'make', 'cmake',
      'ninja-build', 'git', 'subversion', 'g++-multilib'
  ])


def clone_with_retries(repo, local_path, num_retries=10):
  """Clone |repo| to |local_path| if it doesn't exist already. Try up to
  |num_retries| times. Return False if unable to checkout."""
  if os.path.isdir(local_path):
    return
  for _ in range(num_retries):
    if os.path.isdir(local_path):
      shutil.rmtree(local_path)
    retcode, _, _ = execute(['git', 'clone', repo, local_path],
                            expect_zero=False)
    if retcode == 0:
      return
  raise Exception('Could not checkout %s.' % repo)


def get_clang_target_arch():
  """Get target architecture we want clang to target when we build it."""
  _, arch, _ = execute(['uname', '-m'])
  if 'x86_64' in arch:
    return 'X86'
  if 'aarch64' in arch:
    return 'AArch64'
  raise Exception('Unsupported target: %s.' % arch)


def prepare_build(llvm_project_path):
  """Prepare to build clang."""
  llvm_build_dir = os.path.join(os.getenv('WORK'), 'llvm-build')
  if not os.path.exists(llvm_build_dir):
    os.mkdir(llvm_build_dir)
  execute([
      'cmake', '-G', 'Ninja', '-DLIBCXX_ENABLE_SHARED=OFF',
      '-DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON', '-DLIBCXXABI_ENABLE_SHARED=OFF',
      '-DCMAKE_BUILD_TYPE=Release',
      '-DLLVM_ENABLE_PROJECTS=libcxx;libcxxabi;compiler-rt;clang',
      '-DLLVM_TARGETS_TO_BUILD=' + get_clang_target_arch(),
      os.path.join(llvm_project_path, 'llvm')
  ],
          env=get_clang_build_env(),
          cwd=llvm_build_dir)
  return llvm_build_dir


def build_clang(llvm_project_path):
  """Checkout, build and install Clang."""
  # TODO(metzman): Merge Python checkout and build code with
  # checkout_build_install_llvm.sh.
  # TODO(metzman): Look into speeding this process using ccache.
  # TODO(metzman): Make this program capable of handling MSAN and i386 Clang
  # regressions.
  llvm_build_dir = prepare_build(llvm_project_path)
  execute(['ninja', '-C', llvm_build_dir, 'install'], env=get_clang_build_env())


def find_culprit_commit(test_command, good_commit, bad_commit):
  """Returns the culprit LLVM commit that introduced a bug revealed by running
  |test_command|. Uses git bisect and treats |good_commit| as the first latest
   known good commit and |bad_commit| as the first known bad commit."""
  llvm_project_path = os.path.join(os.getenv('SRC'), 'llvm-project')
  clone_with_retries('https://github.com/llvm/llvm-project.git',
                     llvm_project_path)
  git_repo = GitRepo(llvm_project_path)
  result = git_repo.bisect(good_commit, bad_commit, test_command)
  print('Culprit commit', result)
  return result


def main():
  # pylint: disable=line-too-long
  """Finds the culprit LLVM commit that introduced a clang regression.
  Can be tested using this command in a libsodium shell:
  python3 bisect_clang.py "cd /src/libsodium; make clean; cd -; compile && /out/secret_key_auth_fuzzer -runs=100" \
                          f7e52fbdb5a7af8ea0808e98458b497125a5eca1 \
                          8288453f6aac05080b751b680455349e09d49825
  """
  # pylint: enable=line-too-long
  # TODO(metzman): Check CFLAGS for things like -fsanitize=fuzzer-no-link.
  # TODO(metzman): Allow test_command to be optional and for just build.sh to be
  # used instead.
  test_command = sys.argv[1]
  # TODO(metzman): Add in more automation so that the script can automatically
  # determine the commits used in last Clang roll.
  good_commit = sys.argv[2]
  bad_commit = sys.argv[3]
  # TODO(metzman): Make verbosity configurable.
  logging.getLogger().setLevel(logging.DEBUG)
  install_clang_build_deps()
  find_culprit_commit(test_command, good_commit, bad_commit)
  return 0


if __name__ == '__main__':
  sys.exit(main())