aboutsummaryrefslogtreecommitdiffhomepage
path: root/platform_tools/android/bin/download_utils.py
blob: 298ba9a863aa8c68badb2ab8a1fe0d3ce8c955a0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
#!/usr/bin/python
# Copyright (c) 2012 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""A library to assist automatically downloading files.

This library is used by scripts that download tarballs, zipfiles, etc. as part
of the build process.
"""

import hashlib
import http_download
import os.path
import re
import shutil
import sys
import time
import urllib2

SOURCE_STAMP = 'SOURCE_URL'
HASH_STAMP = 'SOURCE_SHA1'


# Designed to handle more general inputs than sys.platform because the platform
# name may come from the command line.
PLATFORM_COLLAPSE = {
    'windows': 'windows',
    'win32': 'windows',
    'cygwin': 'windows',
    'linux': 'linux',
    'linux2': 'linux',
    'linux3': 'linux',
    'darwin': 'mac',
    'mac': 'mac',
}

ARCH_COLLAPSE = {
    'i386'  : 'x86',
    'i686'  : 'x86',
    'x86_64': 'x86',
    'armv7l': 'arm',
}


class HashError(Exception):
  def __init__(self, download_url, expected_hash, actual_hash):
    self.download_url = download_url
    self.expected_hash = expected_hash
    self.actual_hash = actual_hash

  def __str__(self):
    return 'Got hash "%s" but expected hash "%s" for "%s"' % (
        self.actual_hash, self.expected_hash, self.download_url)


def PlatformName(name=None):
  if name is None:
    name = sys.platform
  return PLATFORM_COLLAPSE[name]

def ArchName(name=None):
  if name is None:
    if PlatformName() == 'windows':
      # TODO(pdox): Figure out how to auto-detect 32-bit vs 64-bit Windows.
      name = 'i386'
    else:
      import platform
      name = platform.machine()
  return ARCH_COLLAPSE[name]

def EnsureFileCanBeWritten(filename):
  directory = os.path.dirname(filename)
  if not os.path.exists(directory):
    os.makedirs(directory)


def WriteData(filename, data):
  EnsureFileCanBeWritten(filename)
  f = open(filename, 'wb')
  f.write(data)
  f.close()


def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
  EnsureFileCanBeWritten(filename)
  dst = open(filename, 'wb')
  try:
    while True:
      data = stream.read(chunk_size)
      if len(data) == 0:
        break
      dst.write(data)
      if verbose:
        # Indicate that we're still writing.
        sys.stdout.write('.')
        sys.stdout.flush()
  finally:
    if verbose:
      sys.stdout.write('\n')
    dst.close()


def DoesStampMatch(stampfile, expected, index):
  try:
    f = open(stampfile, 'r')
    stamp = f.read()
    f.close()
    if stamp.split('\n')[index] == expected:
      return "already up-to-date."
    elif stamp.startswith('manual'):
      return "manual override."
    return False
  except IOError:
    return False


def WriteStamp(stampfile, data):
  EnsureFileCanBeWritten(stampfile)
  f = open(stampfile, 'w')
  f.write(data)
  f.close()


def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
  stampfile = os.path.join(path, stamp_name)

  # Check if the stampfile is older than the minimum last mod time
  if min_time:
    try:
      stamp_time = os.stat(stampfile).st_mtime
      if stamp_time <= min_time:
        return False
    except OSError:
      return False

  return DoesStampMatch(stampfile, stamp_contents, index)


def WriteSourceStamp(path, url):
  stampfile = os.path.join(path, SOURCE_STAMP)
  WriteStamp(stampfile, url)

def WriteHashStamp(path, hash_val):
  hash_stampfile = os.path.join(path, HASH_STAMP)
  WriteStamp(hash_stampfile, hash_val)


def Retry(op, *args):
  # Windows seems to be prone to having commands that delete files or
  # directories fail.  We currently do not have a complete understanding why,
  # and as a workaround we simply retry the command a few times.
  # It appears that file locks are hanging around longer than they should.  This
  # may be a secondary effect of processes hanging around longer than they
  # should.  This may be because when we kill a browser sel_ldr does not exit
  # immediately, etc.
  # Virus checkers can also accidently prevent files from being deleted, but
  # that shouldn't be a problem on the bots.
  if sys.platform in ('win32', 'cygwin'):
    count = 0
    while True:
      try:
        op(*args)
        break
      except Exception:
        sys.stdout.write("FAILED: %s %s\n" % (op.__name__, repr(args)))
        count += 1
        if count < 5:
          sys.stdout.write("RETRY: %s %s\n" % (op.__name__, repr(args)))
          time.sleep(pow(2, count))
        else:
          # Don't mask the exception.
          raise
  else:
    op(*args)


def MoveDirCleanly(src, dst):
  RemoveDir(dst)
  MoveDir(src, dst)


def MoveDir(src, dst):
  Retry(shutil.move, src, dst)


def RemoveDir(path):
  if os.path.exists(path):
    Retry(shutil.rmtree, path)


def RemoveFile(path):
  if os.path.exists(path):
    Retry(os.unlink, path)


def _HashFileHandle(fh):
  """sha1 of a file like object.

  Arguments:
    fh: file handle like object to hash.
  Returns:
    sha1 as a string.
  """
  hasher = hashlib.sha1()
  try:
    while True:
      data = fh.read(4096)
      if not data:
        break
      hasher.update(data)
  finally:
    fh.close()
  return hasher.hexdigest()


def HashFile(filename):
  """sha1 a file on disk.

  Arguments:
    filename: filename to hash.
  Returns:
    sha1 as a string.
  """
  fh = open(filename, 'rb')
  return _HashFileHandle(fh)


def HashUrlByDownloading(url):
  """sha1 the data at an url.

  Arguments:
    url: url to download from.
  Returns:
    sha1 of the data at the url.
  """
  try:
    fh = urllib2.urlopen(url)
  except:
    sys.stderr.write("Failed fetching URL: %s\n" % url)
    raise
  return _HashFileHandle(fh)


# Attempts to get the SHA1 hash of a file given a URL by looking for
# an adjacent file with a ".sha1hash" suffix.  This saves having to
# download a large tarball just to get its hash.  Otherwise, we fall
# back to downloading the main file.
def HashUrl(url):
  hash_url = '%s.sha1hash' % url
  try:
    fh = urllib2.urlopen(hash_url)
    data = fh.read(100)
    fh.close()
  except urllib2.HTTPError, exn:
    if exn.code == 404:
      return HashUrlByDownloading(url)
    raise
  else:
    if not re.match('[0-9a-f]{40}\n?$', data):
      raise AssertionError('Bad SHA1 hash file: %r' % data)
    return data.strip()


def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
            hash_val=None, keep=False, verbose=False, stamp_index=0):
  """Synchronize a destination file with a URL

  if the URL does not match the URL stamp, then we must re-download it.

  Arugments:
    url: the url which will to compare against and download
    filename: the file to create on download
    path: the download path
    stamp_dir: the filename containing the URL stamp to check against
    hash_val: if set, the expected hash which must be matched
    verbose: prints out status as it runs
    stamp_index: index within the stamp file to check.
  Returns:
    True if the file is replaced
    False if the file is not replaced
  Exception:
    HashError: if the hash does not match
  """

  assert url and filename

  # If we are not keeping the tarball, or we already have it, we can
  # skip downloading it for this reason. If we are keeping it,
  # it must exist.
  if keep:
    tarball_ok = os.path.isfile(filename)
  else:
    tarball_ok = True

  # If we don't need the tarball and the stamp_file matches the url, then
  # we must be up to date.  If the URL differs but the recorded hash matches
  # the one we'll insist the tarball has, then that's good enough too.
  # TODO(mcgrathr): Download the .sha1sum file first to compare with
  # the cached hash, in case --file-hash options weren't used.
  if tarball_ok and stamp_dir is not None:
    if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
      if verbose:
        print '%s is already up to date.' % filename
      return False
    if (hash_val is not None and
        StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
      if verbose:
        print '%s is identical to the up to date file.' % filename
      return False

  if verbose:
    print 'Updating %s\n\tfrom %s.' % (filename, url)
  EnsureFileCanBeWritten(filename)
  http_download.HttpDownload(url, filename)

  if hash_val:
    tar_hash = HashFile(filename)
    if hash_val != tar_hash:
      raise HashError(actual_hash=tar_hash, expected_hash=hash_val,
                      download_url=url)

  return True