#!/usr/bin/python """ Copyright 2014 Google Inc. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. Utilities for accessing Google Cloud Storage. TODO(epoger): move this into tools/utils for broader use? """ # System-level imports import os import posixpath import sys try: from apiclient.discovery import build as build_service except ImportError: print ('Missing google-api-python-client. Please install it; directions ' 'can be found at https://developers.google.com/api-client-library/' 'python/start/installation') raise # Local imports import url_utils def download_file(source_bucket, source_path, dest_path, create_subdirs_if_needed=False): """ Downloads a single file from Google Cloud Storage to local disk. Args: source_bucket: GCS bucket to download the file from source_path: full path (Posix-style) within that bucket dest_path: full path (local-OS-style) on local disk to copy the file to create_subdirs_if_needed: boolean; whether to create subdirectories as needed to create dest_path """ source_http_url = posixpath.join( 'http://storage.googleapis.com', source_bucket, source_path) url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, create_subdirs_if_needed=create_subdirs_if_needed) def list_bucket_contents(bucket, subdir=None): """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. Uses the API documented at https://developers.google.com/storage/docs/json_api/v1/objects/list Args: bucket: name of the Google Storage bucket subdir: directory within the bucket to list, or None for root directory """ # The GCS command relies on the subdir name (if any) ending with a slash. if subdir and not subdir.endswith('/'): subdir += '/' subdir_length = len(subdir) if subdir else 0 storage = build_service('storage', 'v1') command = storage.objects().list( bucket=bucket, delimiter='/', fields='items(name),prefixes', prefix=subdir) results = command.execute() # The GCS command returned two subdicts: # prefixes: the full path of every directory within subdir, with trailing '/' # items: property dict for each file object within subdir # (including 'name', which is full path of the object) dirs = [] for dir_fullpath in results.get('prefixes', []): dir_basename = dir_fullpath[subdir_length:] dirs.append(dir_basename[:-1]) # strip trailing slash files = [] for file_properties in results.get('items', []): file_fullpath = file_properties['name'] file_basename = file_fullpath[subdir_length:] files.append(file_basename) return (dirs, files)