aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools
diff options
context:
space:
mode:
authorGravatar Hadrien Chauvin <hadrienchauvin@gmail.com>2018-02-05 06:05:06 -0800
committerGravatar Copybara-Service <copybara-piper@google.com>2018-02-05 06:06:59 -0800
commit02873d85f529104bb8243a1569018c1053e48419 (patch)
treec5bf363f17220ff1b99c5b1fff4c79d830494c07 /tools
parenta610a2b77893ed9edd3038cffe803bce68f83a80 (diff)
sha256 python tool: stream instead of reading whole file beforehand
This is a rather small change to a Python tool used to produce a SHA256 hash. Currently, the whole file is loaded in memory before computing the hash, which causes problem when large files are processed. For instance, github.com/bazelbuild/rules_docker uses it to compute the hash of Docker images, which can be multiple GB in size. This PR avoids the tool to cause issues in a limited-memory environment. Closes #4243. PiperOrigin-RevId: 184518900
Diffstat (limited to 'tools')
-rw-r--r--tools/build_defs/hash/BUILD10
-rwxr-xr-x[-rw-r--r--]tools/build_defs/hash/sha256.py8
-rwxr-xr-xtools/build_defs/hash/sha256_test.sh37
3 files changed, 54 insertions, 1 deletions
diff --git a/tools/build_defs/hash/BUILD b/tools/build_defs/hash/BUILD
index f06f5deffe..fc966173b6 100644
--- a/tools/build_defs/hash/BUILD
+++ b/tools/build_defs/hash/BUILD
@@ -21,3 +21,13 @@ exports_files(
["hash.bzl"],
visibility = ["//visibility:public"],
)
+
+sh_test(
+ name = "sha256_test",
+ size = "small",
+ srcs = ["sha256_test.sh"],
+ data = ["sha256"],
+ # TODO(laszlocsomor): use the runfiles library after
+ # https://github.com/bazelbuild/bazel/issues/4460 is fixed for sh_*
+ tags = ["-no_windows"],
+)
diff --git a/tools/build_defs/hash/sha256.py b/tools/build_defs/hash/sha256.py
index 12c39dfdc2..ec43ae9d3e 100644..100755
--- a/tools/build_defs/hash/sha256.py
+++ b/tools/build_defs/hash/sha256.py
@@ -25,4 +25,10 @@ if __name__ == "__main__":
sys.exit(-1)
with open(sys.argv[2], "w") as outputfile:
with open(sys.argv[1], "rb") as inputfile:
- outputfile.write(hashlib.sha256(inputfile.read()).hexdigest())
+ sha256 = hashlib.sha256()
+ while True:
+ data = inputfile.read(65536)
+ if not data:
+ break
+ sha256.update(data)
+ outputfile.write(sha256.hexdigest())
diff --git a/tools/build_defs/hash/sha256_test.sh b/tools/build_defs/hash/sha256_test.sh
new file mode 100755
index 0000000000..efb1c3a471
--- /dev/null
+++ b/tools/build_defs/hash/sha256_test.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+# Copyright 2017 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+# The following code produces a 120MB file (30*2^22 bytes)
+cat > input.txt <<EOF
+01234567890123456789012345678
+EOF
+
+cp input.txt tmp.txt
+for i in {1..22}; do
+ cat tmp.txt >> input.txt
+ cp input.txt tmp.txt
+done
+
+tools/build_defs/hash/sha256 input.txt output.txt
+
+expected=b89e2ebd615b1d32be9cec7bf687f3a00476835fe2ea8fb560394d79f420390c
+if [ "$(cat output.txt)" != "$expected" ]; then
+ echo "Wrong hash $(cat output.txt); expected $expected"
+ exit 1
+fi
+