aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/tools/test/gpu_info_lib.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/tools/test/gpu_info_lib.py')
-rw-r--r--tensorflow/tools/test/gpu_info_lib.py184
1 files changed, 184 insertions, 0 deletions
diff --git a/tensorflow/tools/test/gpu_info_lib.py b/tensorflow/tools/test/gpu_info_lib.py
new file mode 100644
index 0000000000..cfb7d89920
--- /dev/null
+++ b/tensorflow/tools/test/gpu_info_lib.py
@@ -0,0 +1,184 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Library for getting system information during TensorFlow tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import ctypes as ct
+import platform
+
+import tensorflow as tf
+
+from tensorflow.core.util import test_log_pb2
+
+
+def _gather_gpu_devices_proc():
+ """Try to gather NVidia GPU device information via /proc/driver."""
+ dev_info = []
+ for f in tf.gfile.Glob("/proc/driver/nvidia/gpus/*/information"):
+ bus_id = f.split("/")[5]
+ key_values = dict(
+ line.rstrip().replace("\t", "").split(":", 1)
+ for line in tf.gfile.GFile(f, "r"))
+ key_values = dict(
+ (k.lower(), v.strip(" ").rstrip(" "))
+ for (k, v) in key_values.items())
+ info = test_log_pb2.GPUInfo()
+ info.model = key_values.get("model", "Unknown")
+ info.uuid = key_values.get("gpu uuid", "Unknown")
+ info.bus_id = bus_id
+ dev_info.append(info)
+ return dev_info
+
+
+class CUDADeviceProperties(ct.Structure):
+ # See $CUDA_HOME/include/cuda_runtime_api.h for the definition of
+ # the cudaDeviceProp struct.
+ _fields_ = [
+ ("name", ct.c_char * 256),
+ ("totalGlobalMem", ct.c_size_t),
+ ("sharedMemPerBlock", ct.c_size_t),
+ ("regsPerBlock", ct.c_int),
+ ("warpSize", ct.c_int),
+ ("memPitch", ct.c_size_t),
+ ("maxThreadsPerBlock", ct.c_int),
+ ("maxThreadsDim", ct.c_int * 3),
+ ("maxGridSize", ct.c_int * 3),
+ ("clockRate", ct.c_int),
+ ("totalConstMem", ct.c_size_t),
+ ("major", ct.c_int),
+ ("minor", ct.c_int),
+ ("textureAlignment", ct.c_size_t),
+ ("texturePitchAlignment", ct.c_size_t),
+ ("deviceOverlap", ct.c_int),
+ ("multiProcessorCount", ct.c_int),
+ ("kernelExecTimeoutEnabled", ct.c_int),
+ ("integrated", ct.c_int),
+ ("canMapHostMemory", ct.c_int),
+ ("computeMode", ct.c_int),
+ ("maxTexture1D", ct.c_int),
+ ("maxTexture1DMipmap", ct.c_int),
+ ("maxTexture1DLinear", ct.c_int),
+ ("maxTexture2D", ct.c_int * 2),
+ ("maxTexture2DMipmap", ct.c_int * 2),
+ ("maxTexture2DLinear", ct.c_int * 3),
+ ("maxTexture2DGather", ct.c_int * 2),
+ ("maxTexture3D", ct.c_int * 3),
+ ("maxTexture3DAlt", ct.c_int * 3),
+ ("maxTextureCubemap", ct.c_int),
+ ("maxTexture1DLayered", ct.c_int * 2),
+ ("maxTexture2DLayered", ct.c_int * 3),
+ ("maxTextureCubemapLayered", ct.c_int * 2),
+ ("maxSurface1D", ct.c_int),
+ ("maxSurface2D", ct.c_int * 2),
+ ("maxSurface3D", ct.c_int * 3),
+ ("maxSurface1DLayered", ct.c_int * 2),
+ ("maxSurface2DLayered", ct.c_int * 3),
+ ("maxSurfaceCubemap", ct.c_int),
+ ("maxSurfaceCubemapLayered", ct.c_int * 2),
+ ("surfaceAlignment", ct.c_size_t),
+ ("concurrentKernels", ct.c_int),
+ ("ECCEnabled", ct.c_int),
+ ("pciBusID", ct.c_int),
+ ("pciDeviceID", ct.c_int),
+ ("pciDomainID", ct.c_int),
+ ("tccDriver", ct.c_int),
+ ("asyncEngineCount", ct.c_int),
+ ("unifiedAddressing", ct.c_int),
+ ("memoryClockRate", ct.c_int),
+ ("memoryBusWidth", ct.c_int),
+ ("l2CacheSize", ct.c_int),
+ ("maxThreadsPerMultiProcessor", ct.c_int),
+ ("streamPrioritiesSupported", ct.c_int),
+ ("globalL1CacheSupported", ct.c_int),
+ ("localL1CacheSupported", ct.c_int),
+ ("sharedMemPerMultiprocessor", ct.c_size_t),
+ ("regsPerMultiprocessor", ct.c_int),
+ ("managedMemSupported", ct.c_int),
+ ("isMultiGpuBoard", ct.c_int),
+ ("multiGpuBoardGroupID", ct.c_int),
+ # Pad with extra space to avoid dereference crashes if future
+ # versions of CUDA extend the size of this struct.
+ ("__future_buffer", ct.c_char * 4096)]
+
+
+def _gather_gpu_devices_cudart():
+ """Try to gather NVidia GPU device information via libcudart."""
+ dev_info = []
+
+ system = platform.system()
+ if system == "Linux":
+ libcudart = ct.cdll.LoadLibrary("libcudart.so")
+ elif system == "Darwin":
+ libcudart = ct.cdll.LoadLibrary("libcudart.dylib")
+ elif system == "Windows":
+ libcudart = ct.windll.LoadLibrary("libcudart.dll")
+ else:
+ raise NotImplementedError("Cannot identify system.")
+
+ version = ct.c_int()
+ rc = libcudart.cudaRuntimeGetVersion(ct.byref(version))
+ if rc != 0:
+ raise ValueError("Could not get version")
+ if version.value < 6050:
+ raise NotImplementedError("CUDA version must be between >= 6.5")
+
+ device_count = ct.c_int()
+ libcudart.cudaGetDeviceCount(ct.byref(device_count))
+
+ for i in range(device_count.value):
+ properties = CUDADeviceProperties()
+ rc = libcudart.cudaGetDeviceProperties(ct.byref(properties), i)
+ if rc != 0:
+ raise ValueError("Could not get device properties")
+ pci_bus_id = " " * 13
+ rc = libcudart.cudaDeviceGetPCIBusId(ct.c_char_p(pci_bus_id), 13, i)
+ if rc != 0:
+ raise ValueError("Could not get device PCI bus id")
+
+ info = test_log_pb2.GPUInfo() # No UUID available
+ info.model = properties.name
+ info.bus_id = pci_bus_id
+ dev_info.append(info)
+
+ del properties
+
+ return dev_info
+
+
+def gather_gpu_devices():
+ """Gather gpu device info.
+
+ Returns:
+ A list of test_log_pb2.GPUInfo messages.
+ """
+ try:
+ # Prefer using /proc if possible, it provides the UUID.
+ dev_info = _gather_gpu_devices_proc()
+ if not dev_info:
+ raise ValueError("No devices found")
+ return dev_info
+ except (IOError, ValueError):
+ pass
+
+ try:
+ # Fall back on using libcudart
+ return _gather_gpu_devices_cudart()
+ except (OSError, ValueError, NotImplementedError):
+ return []