diff options
Diffstat (limited to 'tools/pyutils/gs_utils.py')
-rwxr-xr-x | tools/pyutils/gs_utils.py | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/tools/pyutils/gs_utils.py b/tools/pyutils/gs_utils.py new file mode 100755 index 0000000000..745276e693 --- /dev/null +++ b/tools/pyutils/gs_utils.py @@ -0,0 +1,82 @@ +#!/usr/bin/python + +""" +Copyright 2014 Google Inc. + +Use of this source code is governed by a BSD-style license that can be +found in the LICENSE file. + +Utilities for accessing Google Cloud Storage. + +TODO(epoger): move this into tools/utils for broader use? +""" + +# System-level imports +import os +import posixpath +import sys + +# Imports from third-party code +TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +APICLIENT_DIRECTORY = os.path.join( + TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client') +if APICLIENT_DIRECTORY not in sys.path: + sys.path.append(APICLIENT_DIRECTORY) +from googleapiclient.discovery import build as build_service + +# Local imports +import url_utils + + +def download_file(source_bucket, source_path, dest_path, + create_subdirs_if_needed=False): + """ Downloads a single file from Google Cloud Storage to local disk. + + Args: + source_bucket: GCS bucket to download the file from + source_path: full path (Posix-style) within that bucket + dest_path: full path (local-OS-style) on local disk to copy the file to + create_subdirs_if_needed: boolean; whether to create subdirectories as + needed to create dest_path + """ + source_http_url = posixpath.join( + 'http://storage.googleapis.com', source_bucket, source_path) + url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, + create_subdirs_if_needed=create_subdirs_if_needed) + + +def list_bucket_contents(bucket, subdir=None): + """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. + + Uses the API documented at + https://developers.google.com/storage/docs/json_api/v1/objects/list + + Args: + bucket: name of the Google Storage bucket + subdir: directory within the bucket to list, or None for root directory + """ + # The GCS command relies on the subdir name (if any) ending with a slash. + if subdir and not subdir.endswith('/'): + subdir += '/' + subdir_length = len(subdir) if subdir else 0 + + storage = build_service('storage', 'v1') + command = storage.objects().list( + bucket=bucket, delimiter='/', fields='items(name),prefixes', + prefix=subdir) + results = command.execute() + + # The GCS command returned two subdicts: + # prefixes: the full path of every directory within subdir, with trailing '/' + # items: property dict for each file object within subdir + # (including 'name', which is full path of the object) + dirs = [] + for dir_fullpath in results.get('prefixes', []): + dir_basename = dir_fullpath[subdir_length:] + dirs.append(dir_basename[:-1]) # strip trailing slash + files = [] + for file_properties in results.get('items', []): + file_fullpath = file_properties['name'] + file_basename = file_fullpath[subdir_length:] + files.append(file_basename) + return (dirs, files) |