aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/pyutils/gs_utils.py
blob: 2659c03e63198996d22a295fa008053173b59042 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/python

"""
Copyright 2014 Google Inc.

Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.

Utilities for accessing Google Cloud Storage.

TODO(epoger): move this into tools/utils for broader use?
"""

# System-level imports
import os
import posixpath
import sys
try:
  from apiclient.discovery import build as build_service
except ImportError:
  print ('Missing google-api-python-client.  Please install it; directions '
         'can be found at https://developers.google.com/api-client-library/'
         'python/start/installation')
  raise

# Local imports
import url_utils


def download_file(source_bucket, source_path, dest_path,
                  create_subdirs_if_needed=False):
  """ Downloads a single file from Google Cloud Storage to local disk.

  Args:
    source_bucket: GCS bucket to download the file from
    source_path: full path (Posix-style) within that bucket
    dest_path: full path (local-OS-style) on local disk to copy the file to
    create_subdirs_if_needed: boolean; whether to create subdirectories as
        needed to create dest_path
  """
  source_http_url = posixpath.join(
      'http://storage.googleapis.com', source_bucket, source_path)
  url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
                          create_subdirs_if_needed=create_subdirs_if_needed)


def list_bucket_contents(bucket, subdir=None):
  """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.

  Uses the API documented at
  https://developers.google.com/storage/docs/json_api/v1/objects/list

  Args:
    bucket: name of the Google Storage bucket
    subdir: directory within the bucket to list, or None for root directory
  """
  # The GCS command relies on the subdir name (if any) ending with a slash.
  if subdir and not subdir.endswith('/'):
    subdir += '/'
  subdir_length = len(subdir) if subdir else 0

  storage = build_service('storage', 'v1')
  command = storage.objects().list(
      bucket=bucket, delimiter='/', fields='items(name),prefixes',
      prefix=subdir)
  results = command.execute()

  # The GCS command returned two subdicts:
  # prefixes: the full path of every directory within subdir, with trailing '/'
  # items: property dict for each file object within subdir
  #        (including 'name', which is full path of the object)
  dirs = []
  for dir_fullpath in results.get('prefixes', []):
    dir_basename = dir_fullpath[subdir_length:]
    dirs.append(dir_basename[:-1])  # strip trailing slash
  files = []
  for file_properties in results.get('items', []):
    file_fullpath = file_properties['name']
    file_basename = file_fullpath[subdir_length:]
    files.append(file_basename)
  return (dirs, files)