1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library for getting system information during TensorFlow tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import re
import shlex
import subprocess
import tempfile
import time
from tensorflow.core.util import test_log_pb2
from tensorflow.python.platform import gfile
from tensorflow.tools.test import gpu_info_lib
from tensorflow.tools.test import system_info_lib
class MissingLogsError(Exception):
pass
def get_git_commit_sha():
"""Get git commit SHA for this build.
Attempt to get the SHA from environment variable GIT_COMMIT, which should
be available on Jenkins build agents.
Returns:
SHA hash of the git commit used for the build, if available
"""
return os.getenv("GIT_COMMIT")
def process_test_logs(name, test_name, test_args, benchmark_type,
start_time, run_time, log_files):
"""Gather test information and put it in a TestResults proto.
Args:
name: Benchmark target identifier.
test_name: A unique bazel target, e.g. "//path/to:test"
test_args: A string containing all arguments to run the target with.
benchmark_type: A string representing the BenchmarkType enum; the
benchmark type for this target.
start_time: Test starting time (epoch)
run_time: Wall time that the test ran for
log_files: Paths to the log files
Returns:
A TestResults proto
"""
results = test_log_pb2.TestResults()
results.name = name
results.target = test_name
results.start_time = start_time
results.run_time = run_time
results.benchmark_type = test_log_pb2.TestResults.BenchmarkType.Value(
benchmark_type.upper())
# Gather source code information
git_sha = get_git_commit_sha()
if git_sha:
results.commit_id.hash = git_sha
results.entries.CopyFrom(process_benchmarks(log_files))
results.run_configuration.argument.extend(test_args)
results.machine_configuration.CopyFrom(
system_info_lib.gather_machine_configuration())
return results
def process_benchmarks(log_files):
benchmarks = test_log_pb2.BenchmarkEntries()
for f in log_files:
content = gfile.GFile(f, "rb").read()
if benchmarks.MergeFromString(content) != len(content):
raise Exception("Failed parsing benchmark entry from %s" % f)
return benchmarks
def run_and_gather_logs(name, test_name, test_args,
benchmark_type):
"""Run the bazel test given by test_name. Gather and return the logs.
Args:
name: Benchmark target identifier.
test_name: A unique bazel target, e.g. "//path/to:test"
test_args: A string containing all arguments to run the target with.
benchmark_type: A string representing the BenchmarkType enum; the
benchmark type for this target.
Returns:
A tuple (test_results, mangled_test_name), where
test_results: A test_log_pb2.TestResults proto
test_adjusted_name: Unique benchmark name that consists of
benchmark name optionally followed by GPU type.
Raises:
ValueError: If the test_name is not a valid target.
subprocess.CalledProcessError: If the target itself fails.
IOError: If there are problems gathering test log output from the test.
MissingLogsError: If we couldn't find benchmark logs.
"""
if not (test_name and test_name.startswith("//") and ".." not in test_name and
not test_name.endswith(":") and not test_name.endswith(":all") and
not test_name.endswith("...") and len(test_name.split(":")) == 2):
raise ValueError("Expected test_name parameter with a unique test, e.g.: "
"--test_name=//path/to:test")
test_executable = test_name.rstrip().strip("/").replace(":", "/")
if gfile.Exists(os.path.join("bazel-bin", test_executable)):
# Running in standalone mode from core of the repository
test_executable = os.path.join("bazel-bin", test_executable)
else:
# Hopefully running in sandboxed mode
test_executable = os.path.join(".", test_executable)
test_adjusted_name = name
gpu_config = gpu_info_lib.gather_gpu_devices()
if gpu_config:
gpu_name = gpu_config[0].model
gpu_short_name_match = re.search(r"Tesla (K40|K80|P100|V100)", gpu_name)
if gpu_short_name_match:
gpu_short_name = gpu_short_name_match.group(0)
test_adjusted_name = name + "|" + gpu_short_name.replace(" ", "_")
temp_directory = tempfile.mkdtemp(prefix="run_and_gather_logs")
mangled_test_name = (test_adjusted_name.strip("/")
.replace("|", "_").replace("/", "_").replace(":", "_"))
test_file_prefix = os.path.join(temp_directory, mangled_test_name)
test_file_prefix = "%s." % test_file_prefix
try:
if not gfile.Exists(test_executable):
raise ValueError("Executable does not exist: %s" % test_executable)
test_args = shlex.split(test_args)
# This key is defined in tf/core/util/reporter.h as
# TestReporter::kTestReporterEnv.
os.environ["TEST_REPORT_FILE_PREFIX"] = test_file_prefix
start_time = time.time()
subprocess.check_call([test_executable] + test_args)
run_time = time.time() - start_time
log_files = gfile.Glob("{}*".format(test_file_prefix))
if not log_files:
raise MissingLogsError("No log files found at %s." % test_file_prefix)
return (process_test_logs(
test_adjusted_name,
test_name=test_name,
test_args=test_args,
benchmark_type=benchmark_type,
start_time=int(start_time),
run_time=run_time,
log_files=log_files), test_adjusted_name)
finally:
try:
gfile.DeleteRecursively(temp_directory)
except OSError:
pass
|