aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-08-18 21:40:25 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-08-18 22:47:37 -0700
commit37000ef3b5a63a8cf9b6e8fd3dd8059aba0e6ddc (patch)
treeb486cdc8cf3c6b84b1adc8484b5856b543af45ef
parentc8ac01b55d0656f79f98fabd2e53a8e7d5f08639 (diff)
Some OSes return filenames as Unicode, some as UTF-8.
Attempting to encode('utf-8') a non-Unicode string raises an exception. This CL changes the behavior to encode('utf-8') iff the filename is a Unicode string. Change: 130720340
-rw-r--r--tensorflow/examples/image_retraining/retrain.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index b009262d6e..056664778d 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -80,6 +80,7 @@ import tensorflow as tf
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import tensor_shape
from tensorflow.python.platform import gfile
+from tensorflow.python.util import compat
import struct
@@ -226,7 +227,7 @@ def create_image_lists(image_dir, testing_percentage, validation_percentage):
# To do that, we need a stable way of deciding based on just the file name
# itself, so we do a hash of that and then use that to generate a
# probability value that we use to assign it.
- hash_name_hashed = hashlib.sha1(hash_name.encode('utf-8')).hexdigest()
+ hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest()
percentage_hash = (int(hash_name_hashed, 16) % (65536)) * (100 / 65535.0)
if percentage_hash < validation_percentage:
validation_images.append(base_name)