diff options
Diffstat (limited to 'tensorflow/examples')
88 files changed, 1192 insertions, 2194 deletions
diff --git a/tensorflow/examples/adding_an_op/BUILD b/tensorflow/examples/adding_an_op/BUILD index b3ed6589ed..cf8054be6a 100644 --- a/tensorflow/examples/adding_an_op/BUILD +++ b/tensorflow/examples/adding_an_op/BUILD @@ -139,15 +139,3 @@ tf_cc_binary( "//tensorflow/core:framework", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index bb75431a1f..5c47ce6b67 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -40,6 +40,7 @@ <intent-filter> <action android:name="android.intent.action.MAIN" /> <category android:name="android.intent.category.LAUNCHER" /> + <category android:name="android.intent.category.LEANBACK_LAUNCHER" /> </intent-filter> </activity> @@ -49,6 +50,7 @@ <intent-filter> <action android:name="android.intent.action.MAIN" /> <category android:name="android.intent.category.LAUNCHER" /> + <category android:name="android.intent.category.LEANBACK_LAUNCHER" /> </intent-filter> </activity> @@ -58,6 +60,7 @@ <intent-filter> <action android:name="android.intent.action.MAIN" /> <category android:name="android.intent.category.LAUNCHER" /> + <category android:name="android.intent.category.LEANBACK_LAUNCHER" /> </intent-filter> </activity> @@ -67,6 +70,7 @@ <intent-filter> <action android:name="android.intent.action.MAIN" /> <category android:name="android.intent.category.LAUNCHER" /> + <category android:name="android.intent.category.LEANBACK_LAUNCHER" /> </intent-filter> </activity> </application> diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index 46df5973e8..f327b645f5 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -1,6 +1,8 @@ # Description: # TensorFlow camera demo app for Android. +load("@build_bazel_rules_android//android:rules.bzl", "android_binary") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 @@ -36,7 +38,7 @@ cc_binary( "-z defs", "-s", "-Wl,--version-script", # This line must be directly followed by LINKER_SCRIPT. - LINKER_SCRIPT, + "$(location {})".format(LINKER_SCRIPT), ], linkshared = 1, linkstatic = 1, @@ -76,7 +78,6 @@ android_binary( custom_package = "org.tensorflow.demo", inline_constants = 1, manifest = "AndroidManifest.xml", - manifest_merger = "legacy", resource_files = glob(["res/**"]), tags = [ "manual", @@ -92,7 +93,7 @@ android_binary( filegroup( name = "external_assets", srcs = [ - "@inception5h//:model_files", + "@inception_v1//:model_files", "@mobile_ssd//:model_files", "@speech_commands//:model_files", "@stylize//:model_files", @@ -101,22 +102,6 @@ filegroup( # LINT.ThenChange(//tensorflow/examples/android/download-models.gradle) filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - "gradleBuild/**", - "libs/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - -filegroup( name = "java_files", srcs = glob(["src/**/*.java"]), ) diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle index f7bdf8b816..0767726aa9 100644 --- a/tensorflow/examples/android/build.gradle +++ b/tensorflow/examples/android/build.gradle @@ -56,10 +56,12 @@ def nativeOutDir = 'libs/' + cpuType def nativeBuildRule = 'buildNativeBazel' def demoLibPath = '../../../bazel-bin/tensorflow/examples/android/libtensorflow_demo.so' def inferenceLibPath = '../../../bazel-bin/tensorflow/contrib/android/libtensorflow_inference.so' + +// Override for Makefile builds. if (nativeBuildSystem == 'makefile') { nativeBuildRule = 'buildNativeMake' - demoLibPath = '../../../tensorflow/contrib/makefile/gen/lib/libtensorflow_demo.so' - inferenceLibPath = '../../../tensorflow/contrib/makefile/gen/lib/libtensorflow_inference.so' + demoLibPath = '../../../tensorflow/contrib/makefile/gen/lib/android_' + cpuType + '/libtensorflow_demo.so' + inferenceLibPath = '../../../tensorflow/contrib/makefile/gen/lib/android_' + cpuType + '/libtensorflow_inference.so' } // If building with Bazel, this is the location of the bazel binary. @@ -154,7 +156,8 @@ task buildNativeMake(type: Exec) { '-s', \ 'tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in', \ '-t', \ - 'libtensorflow_inference.so libtensorflow_demo.so' \ + 'libtensorflow_inference.so libtensorflow_demo.so all' \ + , '-a', cpuType \ //, '-T' // Uncomment to skip protobuf and speed up subsequent builds. } diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle index 0e2cf65f53..d3b67eab52 100644 --- a/tensorflow/examples/android/download-models.gradle +++ b/tensorflow/examples/android/download-models.gradle @@ -9,7 +9,7 @@ */ // hard coded model files // LINT.IfChange -def models = ['inception5h.zip', +def models = ['inception_v1.zip', 'object_detection/ssd_mobilenet_v1_android_export.zip', 'stylize_v1.zip', 'speech_commands_conv_actions.zip'] diff --git a/tensorflow/examples/android/jni/object_tracking/config.h b/tensorflow/examples/android/jni/object_tracking/config.h index 86e9fc71b6..47de2d2c15 100644 --- a/tensorflow/examples/android/jni/object_tracking/config.h +++ b/tensorflow/examples/android/jni/object_tracking/config.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ #include <math.h> @@ -297,4 +297,4 @@ struct TrackerConfig { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/flow_cache.h b/tensorflow/examples/android/jni/object_tracking/flow_cache.h index 8813ab6d71..b62e334ecd 100644 --- a/tensorflow/examples/android/jni/object_tracking/flow_cache.h +++ b/tensorflow/examples/android/jni/object_tracking/flow_cache.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FLOW_CACHE_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FLOW_CACHE_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FLOW_CACHE_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FLOW_CACHE_H_ #include "tensorflow/examples/android/jni/object_tracking/geom.h" #include "tensorflow/examples/android/jni/object_tracking/utils.h" @@ -303,4 +303,4 @@ class FlowCache { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FLOW_CACHE_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FLOW_CACHE_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/frame_pair.h b/tensorflow/examples/android/jni/object_tracking/frame_pair.h index 8f409fe806..6c8ac9be98 100644 --- a/tensorflow/examples/android/jni/object_tracking/frame_pair.h +++ b/tensorflow/examples/android/jni/object_tracking/frame_pair.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FRAME_PAIR_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FRAME_PAIR_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FRAME_PAIR_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FRAME_PAIR_H_ #include "tensorflow/examples/android/jni/object_tracking/keypoint.h" @@ -100,4 +100,4 @@ class FramePair { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FRAME_PAIR_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_FRAME_PAIR_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/geom.h b/tensorflow/examples/android/jni/object_tracking/geom.h index 2819063616..c975e40144 100644 --- a/tensorflow/examples/android/jni/object_tracking/geom.h +++ b/tensorflow/examples/android/jni/object_tracking/geom.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GEOM_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GEOM_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GEOM_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GEOM_H_ #include "tensorflow/examples/android/jni/object_tracking/logging.h" #include "tensorflow/examples/android/jni/object_tracking/utils.h" @@ -316,4 +316,4 @@ inline BoundingSquare GetCenteredSquare(const BoundingBox& original_box) { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GEOM_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GEOM_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/gl_utils.h b/tensorflow/examples/android/jni/object_tracking/gl_utils.h index bd5c233f4f..a29e677d3c 100755 --- a/tensorflow/examples/android/jni/object_tracking/gl_utils.h +++ b/tensorflow/examples/android/jni/object_tracking/gl_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GL_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GL_UTILS_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GL_UTILS_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GL_UTILS_H_ #include <GLES/gl.h> #include <GLES/glext.h> @@ -52,4 +52,4 @@ inline static void MapWorldSquareToUnitSquare(const BoundingSquare& square) { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GL_UTILS_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_GL_UTILS_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/image-inl.h b/tensorflow/examples/android/jni/object_tracking/image-inl.h index 9c4c389aa7..61d69908b5 100644 --- a/tensorflow/examples/android/jni/object_tracking/image-inl.h +++ b/tensorflow/examples/android/jni/object_tracking/image-inl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ #include <stdint.h> @@ -641,4 +641,4 @@ inline void Image<T>::FromArray(const T* const pixels, const int stride, } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/image.h b/tensorflow/examples/android/jni/object_tracking/image.h index b7a2301f5e..a436f0e0a1 100644 --- a/tensorflow/examples/android/jni/object_tracking/image.h +++ b/tensorflow/examples/android/jni/object_tracking/image.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ #include <stdint.h> @@ -338,4 +338,4 @@ inline std::ostream& operator<<(std::ostream& stream, const Image<t>& image) { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/image_data.h b/tensorflow/examples/android/jni/object_tracking/image_data.h index 445cdb57a3..c4f91d8cbd 100644 --- a/tensorflow/examples/android/jni/object_tracking/image_data.h +++ b/tensorflow/examples/android/jni/object_tracking/image_data.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_DATA_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_DATA_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_DATA_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_DATA_H_ #include <stdint.h> #include <memory> @@ -261,4 +261,4 @@ class ImageData { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_DATA_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_DATA_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/image_utils.h b/tensorflow/examples/android/jni/object_tracking/image_utils.h index ac9ffd90f8..b4ad7000b3 100644 --- a/tensorflow/examples/android/jni/object_tracking/image_utils.h +++ b/tensorflow/examples/android/jni/object_tracking/image_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_ #include <stdint.h> @@ -295,4 +295,4 @@ inline void NormalizeImage(Image<float>* const image) { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/integral_image.h b/tensorflow/examples/android/jni/object_tracking/integral_image.h index 8e82334abf..caf9b7d2ab 100755 --- a/tensorflow/examples/android/jni/object_tracking/integral_image.h +++ b/tensorflow/examples/android/jni/object_tracking/integral_image.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_INTEGRAL_IMAGE_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_INTEGRAL_IMAGE_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_INTEGRAL_IMAGE_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_INTEGRAL_IMAGE_H_ #include "tensorflow/examples/android/jni/object_tracking/geom.h" #include "tensorflow/examples/android/jni/object_tracking/image-inl.h" @@ -184,4 +184,4 @@ class IntegralImage : public Image<uint32_t> { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_INTEGRAL_IMAGE_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_INTEGRAL_IMAGE_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/jni_utils.h b/tensorflow/examples/android/jni/object_tracking/jni_utils.h index 21fbabb521..b81d9e0c12 100644 --- a/tensorflow/examples/android/jni/object_tracking/jni_utils.h +++ b/tensorflow/examples/android/jni/object_tracking/jni_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_JNI_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_JNI_UTILS_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_JNI_UTILS_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_JNI_UTILS_H_ #include <stdint.h> diff --git a/tensorflow/examples/android/jni/object_tracking/keypoint.h b/tensorflow/examples/android/jni/object_tracking/keypoint.h index 719f9aff3f..93405a5b2a 100644 --- a/tensorflow/examples/android/jni/object_tracking/keypoint.h +++ b/tensorflow/examples/android/jni/object_tracking/keypoint.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_H_ #include "tensorflow/examples/android/jni/object_tracking/geom.h" #include "tensorflow/examples/android/jni/object_tracking/image-inl.h" @@ -45,4 +45,4 @@ inline std::ostream& operator<<(std::ostream& stream, const Keypoint keypoint) { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/keypoint_detector.h b/tensorflow/examples/android/jni/object_tracking/keypoint_detector.h index 33d228128d..2e85b835a7 100644 --- a/tensorflow/examples/android/jni/object_tracking/keypoint_detector.h +++ b/tensorflow/examples/android/jni/object_tracking/keypoint_detector.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_DETECTOR_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_DETECTOR_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_DETECTOR_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_DETECTOR_H_ #include <stdint.h> #include <vector> @@ -125,4 +125,4 @@ class KeypointDetector { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_DETECTOR_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_KEYPOINT_DETECTOR_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/logging.h b/tensorflow/examples/android/jni/object_tracking/logging.h index dbc89af2f7..852a749399 100644 --- a/tensorflow/examples/android/jni/object_tracking/logging.h +++ b/tensorflow/examples/android/jni/object_tracking/logging.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_LOG_STREAMING_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_LOG_STREAMING_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_LOG_STREAMING_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_LOG_STREAMING_H_ #include <android/log.h> #include <string.h> @@ -118,4 +118,4 @@ void LogPrintF(const int severity, const char* format, ...); #endif -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_LOG_STREAMING_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_LOG_STREAMING_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/object_detector.h b/tensorflow/examples/android/jni/object_tracking/object_detector.h index 2525567678..a65c7b0db7 100644 --- a/tensorflow/examples/android/jni/object_tracking/object_detector.h +++ b/tensorflow/examples/android/jni/object_tracking/object_detector.h @@ -20,8 +20,8 @@ limitations under the License. // Defines the ObjectDetector class that is the main interface for detecting // ObjectModelBases in frames. -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_DETECTOR_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_DETECTOR_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_DETECTOR_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_DETECTOR_H_ #include <float.h> #include <map> @@ -227,4 +227,4 @@ class ObjectDetector : public ObjectDetectorBase { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_DETECTOR_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_DETECTOR_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/object_model.h b/tensorflow/examples/android/jni/object_tracking/object_model.h index be33aea638..5e81c49080 100644 --- a/tensorflow/examples/android/jni/object_tracking/object_model.h +++ b/tensorflow/examples/android/jni/object_tracking/object_model.h @@ -19,8 +19,8 @@ limitations under the License. // Contains ObjectModelBase declaration. -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_DETECTION_OBJECT_MODEL_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_DETECTION_OBJECT_MODEL_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_DETECTION_OBJECT_MODEL_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_DETECTION_OBJECT_MODEL_H_ #ifdef __RENDER_OPENGL__ #include <GLES/gl.h> @@ -99,4 +99,4 @@ class ObjectModel : public ObjectModelBase { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_DETECTION_OBJECT_MODEL_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_DETECTION_OBJECT_MODEL_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/object_tracker.h b/tensorflow/examples/android/jni/object_tracking/object_tracker.h index eb281fad37..20c7627fc5 100644 --- a/tensorflow/examples/android/jni/object_tracking/object_tracker.h +++ b/tensorflow/examples/android/jni/object_tracking/object_tracker.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_TRACKER_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_TRACKER_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_TRACKER_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_TRACKER_H_ #include <map> #include <string> @@ -267,4 +267,4 @@ inline std::ostream& operator<<(std::ostream& stream, } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_TRACKER_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OBJECT_TRACKER_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/optical_flow.h b/tensorflow/examples/android/jni/object_tracking/optical_flow.h index 2206375beb..f98ae22bd6 100644 --- a/tensorflow/examples/android/jni/object_tracking/optical_flow.h +++ b/tensorflow/examples/android/jni/object_tracking/optical_flow.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OPTICAL_FLOW_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OPTICAL_FLOW_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OPTICAL_FLOW_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OPTICAL_FLOW_H_ #include "tensorflow/examples/android/jni/object_tracking/geom.h" #include "tensorflow/examples/android/jni/object_tracking/image-inl.h" @@ -97,4 +97,4 @@ class OpticalFlow { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OPTICAL_FLOW_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_OPTICAL_FLOW_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/sprite.h b/tensorflow/examples/android/jni/object_tracking/sprite.h index 05a13fea11..b54a68458f 100755 --- a/tensorflow/examples/android/jni/object_tracking/sprite.h +++ b/tensorflow/examples/android/jni/object_tracking/sprite.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_SPRITE_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_SPRITE_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_SPRITE_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_SPRITE_H_ #include <GLES/gl.h> #include <GLES/glext.h> @@ -199,4 +199,4 @@ class Sprite { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_SPRITE_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_SPRITE_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/time_log.h b/tensorflow/examples/android/jni/object_tracking/time_log.h index 60911da396..0073e11596 100644 --- a/tensorflow/examples/android/jni/object_tracking/time_log.h +++ b/tensorflow/examples/android/jni/object_tracking/time_log.h @@ -15,8 +15,8 @@ limitations under the License. // Utility functions for performance profiling. -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TIME_LOG_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TIME_LOG_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TIME_LOG_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TIME_LOG_H_ #include <stdint.h> @@ -134,4 +134,4 @@ inline static void TimeLog(const char* const str) { inline static void PrintTimeLog() {} #endif -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TIME_LOG_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TIME_LOG_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/tracked_object.h b/tensorflow/examples/android/jni/object_tracking/tracked_object.h index cda14e19d2..d7f1a7019b 100644 --- a/tensorflow/examples/android/jni/object_tracking/tracked_object.h +++ b/tensorflow/examples/android/jni/object_tracking/tracked_object.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TRACKED_OBJECT_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TRACKED_OBJECT_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TRACKED_OBJECT_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TRACKED_OBJECT_H_ #ifdef __RENDER_OPENGL__ #include "tensorflow/examples/android/jni/object_tracking/gl_utils.h" @@ -183,4 +183,4 @@ inline std::ostream& operator<<(std::ostream& stream, } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TRACKED_OBJECT_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_TRACKED_OBJECT_H_ diff --git a/tensorflow/examples/android/jni/object_tracking/utils.h b/tensorflow/examples/android/jni/object_tracking/utils.h index 51cdfcdcfb..2e98734ec4 100644 --- a/tensorflow/examples/android/jni/object_tracking/utils.h +++ b/tensorflow/examples/android/jni/object_tracking/utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_UTILS_H_ +#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_UTILS_H_ +#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_UTILS_H_ #include <math.h> #include <stdint.h> @@ -378,4 +378,4 @@ inline bool Invert2x2(const T* const a, float* const a_inv) { } // namespace tf_tracking -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_UTILS_H_ +#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_UTILS_H_ diff --git a/tensorflow/examples/android/res/animator/color_animation.xml b/tensorflow/examples/android/res/animator/color_animation.xml new file mode 100644 index 0000000000..891d8cc1d4 --- /dev/null +++ b/tensorflow/examples/android/res/animator/color_animation.xml @@ -0,0 +1,30 @@ +<?xml version="1.0" encoding="utf-8"?><!-- + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<set xmlns:android="http://schemas.android.com/apk/res/android" + android:ordering="sequentially"> + <objectAnimator + android:propertyName="backgroundColor" + android:duration="375" + android:valueFrom="0x00b3ccff" + android:valueTo="0xffb3ccff" + android:valueType="colorType"/> + <objectAnimator + android:propertyName="backgroundColor" + android:duration="375" + android:valueFrom="0xffb3ccff" + android:valueTo="0x00b3ccff" + android:valueType="colorType"/> +</set> diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java index 8bd4abb154..429138abe5 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java @@ -351,6 +351,10 @@ public abstract class CameraActivity extends Activity protected void setFragment() { String cameraId = chooseCamera(); + if (cameraId == null) { + Toast.makeText(this, "No Camera Detected", Toast.LENGTH_SHORT).show(); + finish(); + } Fragment fragment; if (useCamera2API) { @@ -416,7 +420,8 @@ public abstract class CameraActivity extends Activity @Override public boolean onKeyDown(final int keyCode, final KeyEvent event) { - if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP) { + if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP + || keyCode == KeyEvent.KEYCODE_BUTTON_L1 || keyCode == KeyEvent.KEYCODE_DPAD_CENTER) { debug = !debug; requestRender(); onSetDebug(debug); diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java b/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java index a317273acd..068c7b0d94 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java @@ -81,8 +81,11 @@ public class LegacyCameraConnectionFragment extends Fragment { try { Camera.Parameters parameters = camera.getParameters(); - parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE); - + List<String> focusModes = parameters.getSupportedFocusModes(); + if (focusModes != null + && focusModes.contains(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE)) { + parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE); + } List<Camera.Size> cameraSizes = parameters.getSupportedPreviewSizes(); Size[] sizes = new Size[cameraSizes.size()]; int i = 0; diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java index 184df1bdb4..1cddf3dc55 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java @@ -31,7 +31,8 @@ the RecognizeCommands helper class. package org.tensorflow.demo; -import android.animation.ValueAnimator; +import android.animation.AnimatorInflater; +import android.animation.AnimatorSet; import android.app.Activity; import android.content.pm.PackageManager; import android.media.AudioFormat; @@ -329,17 +330,13 @@ public class SpeechActivity extends Activity { labelIndex = i; } } - final View labelView = (View) labelsListView.getChildAt(labelIndex - 2); - ValueAnimator colorAnimation = - ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff); - colorAnimation.setDuration(750); - colorAnimation.addUpdateListener( - new ValueAnimator.AnimatorUpdateListener() { - @Override - public void onAnimationUpdate(ValueAnimator animator) { - labelView.setBackgroundColor((int) animator.getAnimatedValue()); - } - }); + final View labelView = labelsListView.getChildAt(labelIndex - 2); + + AnimatorSet colorAnimation = + (AnimatorSet) + AnimatorInflater.loadAnimator( + SpeechActivity.this, R.animator.color_animation); + colorAnimation.setTarget(labelView); colorAnimation.start(); } } diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java index 6a66ec3927..33ec65e9f7 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java @@ -16,8 +16,10 @@ package org.tensorflow.demo; +import android.app.UiModeManager; import android.content.Context; import android.content.res.AssetManager; +import android.content.res.Configuration; import android.graphics.Bitmap; import android.graphics.Bitmap.Config; import android.graphics.BitmapFactory; @@ -31,9 +33,11 @@ import android.graphics.Typeface; import android.media.ImageReader.OnImageAvailableListener; import android.os.Bundle; import android.os.SystemClock; +import android.util.DisplayMetrics; import android.util.Size; import android.util.TypedValue; import android.view.Display; +import android.view.KeyEvent; import android.view.MotionEvent; import android.view.View; import android.view.View.OnClickListener; @@ -43,6 +47,7 @@ import android.widget.BaseAdapter; import android.widget.Button; import android.widget.GridView; import android.widget.ImageView; +import android.widget.RelativeLayout; import android.widget.Toast; import java.io.IOException; import java.io.InputStream; @@ -381,6 +386,27 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL grid = (GridView) findViewById(R.id.grid_layout); grid.setAdapter(adapter); grid.setOnTouchListener(gridTouchAdapter); + + // Change UI on Android TV + UiModeManager uiModeManager = (UiModeManager) getSystemService(UI_MODE_SERVICE); + if (uiModeManager.getCurrentModeType() == Configuration.UI_MODE_TYPE_TELEVISION) { + DisplayMetrics displayMetrics = new DisplayMetrics(); + getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); + int styleSelectorHeight = displayMetrics.heightPixels; + int styleSelectorWidth = displayMetrics.widthPixels - styleSelectorHeight; + RelativeLayout.LayoutParams layoutParams = new RelativeLayout.LayoutParams(styleSelectorWidth, ViewGroup.LayoutParams.MATCH_PARENT); + + // Calculate number of style in a row, so all the style can show up without scrolling + int numOfStylePerRow = 3; + while (styleSelectorWidth / numOfStylePerRow * Math.ceil((float) (adapter.getCount() - 2) / numOfStylePerRow) > styleSelectorHeight) { + numOfStylePerRow++; + } + grid.setNumColumns(numOfStylePerRow); + layoutParams.addRule(RelativeLayout.ALIGN_PARENT_RIGHT); + grid.setLayoutParams(layoutParams); + adapter.buttons.clear(); + } + setStyle(adapter.items[0], 1.0f); } @@ -602,4 +628,38 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL borderedText.drawLines(canvas, 10, canvas.getHeight() - 10, lines); } + + @Override + public boolean onKeyDown(int keyCode, KeyEvent event) { + int moveOffset = 0; + switch (keyCode) { + case KeyEvent.KEYCODE_DPAD_LEFT: + moveOffset = -1; + break; + case KeyEvent.KEYCODE_DPAD_RIGHT: + moveOffset = 1; + break; + case KeyEvent.KEYCODE_DPAD_UP: + moveOffset = -1 * grid.getNumColumns(); + break; + case KeyEvent.KEYCODE_DPAD_DOWN: + moveOffset = grid.getNumColumns(); + break; + default: + return super.onKeyDown(keyCode, event); + } + + // get the highest selected style + int currentSelect = 0; + float highestValue = 0; + for (int i = 0; i < adapter.getCount(); i++) { + if (adapter.items[i].value > highestValue) { + currentSelect = i; + highestValue = adapter.items[i].value; + } + } + setStyle(adapter.items[(currentSelect + moveOffset + adapter.getCount()) % adapter.getCount()], 1); + + return true; + } } diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowObjectDetectionAPIModel.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowObjectDetectionAPIModel.java index 614d3c7dd7..9739e58018 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowObjectDetectionAPIModel.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowObjectDetectionAPIModel.java @@ -137,7 +137,7 @@ public class TensorFlowObjectDetectionAPIModel implements Classifier { Trace.beginSection("recognizeImage"); Trace.beginSection("preprocessBitmap"); - // Preprocess the image data from 0-255 int to normalized float based + // Preprocess the image data to extract R, G and B bytes from int of form 0x00RRGGBB // on the provided parameters. bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight()); diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java b/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java index 2fe2ba539e..af6af2bc8f 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java @@ -199,7 +199,7 @@ public class MultiBoxTracker { final int w, final int h, final int rowStride, - final int sensorOrienation, + final int sensorOrientation, final byte[] frame, final long timestamp) { if (objectTracker == null && !initialized) { @@ -209,7 +209,7 @@ public class MultiBoxTracker { objectTracker = ObjectTracker.getInstance(w, h, rowStride, true); frameWidth = w; frameHeight = h; - this.sensorOrientation = sensorOrienation; + this.sensorOrientation = sensorOrientation; initialized = true; if (objectTracker == null) { diff --git a/tensorflow/examples/benchmark/BUILD b/tensorflow/examples/benchmark/BUILD index c4bb0a5bd9..98611a9aad 100644 --- a/tensorflow/examples/benchmark/BUILD +++ b/tensorflow/examples/benchmark/BUILD @@ -23,9 +23,3 @@ tf_py_logged_benchmark( name = "sample_logged_benchmark", target = "//tensorflow/examples/benchmark:sample_benchmark", ) - -filegroup( - name = "all_files", - srcs = glob(["**/*"]), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/get_started/regression/BUILD b/tensorflow/examples/get_started/regression/BUILD index 577b970c90..bee94d7d90 100644 --- a/tensorflow/examples/get_started/regression/BUILD +++ b/tensorflow/examples/get_started/regression/BUILD @@ -2,18 +2,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_test( name = "test", size = "medium", diff --git a/tensorflow/examples/get_started/regression/imports85.py b/tensorflow/examples/get_started/regression/imports85.py index 6bee556eb8..4fdaceea9a 100644 --- a/tensorflow/examples/get_started/regression/imports85.py +++ b/tensorflow/examples/get_started/regression/imports85.py @@ -131,11 +131,12 @@ def dataset(y_name="price", train_fraction=0.7): # booleans but we are dealing with symbolic tensors. return ~in_training_set(line) - base_dataset = (tf.contrib.data - # Get the lines from the file. - .TextLineDataset(path) - # drop lines with question marks. - .filter(has_no_question_marks)) + base_dataset = ( + tf.data + # Get the lines from the file. + .TextLineDataset(path) + # drop lines with question marks. + .filter(has_no_question_marks)) train = (base_dataset # Take only the training-set lines. diff --git a/tensorflow/examples/how_tos/reading_data/BUILD b/tensorflow/examples/how_tos/reading_data/BUILD index 4a43585d53..64a054d371 100644 --- a/tensorflow/examples/how_tos/reading_data/BUILD +++ b/tensorflow/examples/how_tos/reading_data/BUILD @@ -54,15 +54,3 @@ py_binary( "//tensorflow/examples/tutorials/mnist:input_data", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py index fa4c1c0da5..7402247448 100644 --- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py +++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Train and Eval the MNIST network. This version is like fully_connected_feed.py but uses data converted to a TFRecords file containing tf.train.Example protocol buffers. See: -https://www.tensorflow.org/programmers_guide/reading_data#reading_from_files +https://www.tensorflow.org/guide/reading_data#reading_from_files for context. YOU MUST run convert_to_records before running this (but you only need to @@ -46,6 +45,7 @@ VALIDATION_FILE = 'validation.tfrecords' def decode(serialized_example): + """Parses an image and label from the given `serialized_example`.""" features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. @@ -65,19 +65,22 @@ def decode(serialized_example): return image, label + def augment(image, label): + """Placeholder for data augmentation.""" # OPTIONAL: Could reshape into a 28x28 image and apply distortions # here. Since we are not applying any distortions in this # example, and the next step expects the image to be flattened # into a vector, we don't bother. return image, label + def normalize(image, label): - # Convert from [0, 255] -> [-0.5, 0.5] floats. + """Convert `image` from [0, 255] -> [-0.5, 0.5] floats.""" image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 - return image, label + def inputs(train, batch_size, num_epochs): """Reads input data num_epochs times. @@ -98,23 +101,29 @@ def inputs(train, batch_size, num_epochs): over the dataset once. On the other hand there is no special initialization required. """ - if not num_epochs: num_epochs = None - filename = os.path.join(FLAGS.train_dir, - TRAIN_FILE if train else VALIDATION_FILE) + if not num_epochs: + num_epochs = None + filename = os.path.join(FLAGS.train_dir, TRAIN_FILE + if train else VALIDATION_FILE) with tf.name_scope('input'): - # TFRecordDataset opens a protobuf and reads entries line by line - # could also be [list, of, filenames] + # TFRecordDataset opens a binary file and reads one record at a time. + # `filename` could also be a list of filenames, which will be read in order. dataset = tf.data.TFRecordDataset(filename) - dataset = dataset.repeat(num_epochs) - # map takes a python function and applies it to every sample + # The map transformation takes a function and applies it to every element + # of the dataset. dataset = dataset.map(decode) dataset = dataset.map(augment) dataset = dataset.map(normalize) - #the parameter is the queue size + # The shuffle transformation uses a finite-sized buffer to shuffle elements + # in memory. The parameter is the number of elements in the buffer. For + # completely uniform shuffling, set the parameter to be the same as the + # number of elements in the dataset. dataset = dataset.shuffle(1000 + 3 * batch_size) + + dataset = dataset.repeat(num_epochs) dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() @@ -127,13 +136,11 @@ def run_training(): # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Input images and labels. - image_batch, label_batch = inputs(train=True, batch_size=FLAGS.batch_size, - num_epochs=FLAGS.num_epochs) + image_batch, label_batch = inputs( + train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs) # Build a Graph that computes predictions from the inference model. - logits = mnist.inference(image_batch, - FLAGS.hidden1, - FLAGS.hidden2) + logits = mnist.inference(image_batch, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the loss calculation. loss = mnist.loss(logits, label_batch) @@ -152,7 +159,7 @@ def run_training(): sess.run(init_op) try: step = 0 - while True: #train until OutOfRangeError + while True: # Train until OutOfRangeError start_time = time.time() # Run one step of the model. The return values are @@ -168,10 +175,12 @@ def run_training(): # Print an overview fairly often. if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, - duration)) + duration)) step += 1 except tf.errors.OutOfRangeError: - print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) + print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, + step)) + def main(_): run_training() @@ -183,37 +192,27 @@ if __name__ == '__main__': '--learning_rate', type=float, default=0.01, - help='Initial learning rate.' - ) + help='Initial learning rate.') parser.add_argument( '--num_epochs', type=int, default=2, - help='Number of epochs to run trainer.' - ) + help='Number of epochs to run trainer.') parser.add_argument( '--hidden1', type=int, default=128, - help='Number of units in hidden layer 1.' - ) + help='Number of units in hidden layer 1.') parser.add_argument( '--hidden2', type=int, default=32, - help='Number of units in hidden layer 2.' - ) - parser.add_argument( - '--batch_size', - type=int, - default=100, - help='Batch size.' - ) + help='Number of units in hidden layer 2.') + parser.add_argument('--batch_size', type=int, default=100, help='Batch size.') parser.add_argument( '--train_dir', type=str, default='/tmp/data', - help='Directory with the training data.' - ) + help='Directory with the training data.') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/image_retraining/BUILD b/tensorflow/examples/image_retraining/BUILD deleted file mode 100644 index 9f9244a74c..0000000000 --- a/tensorflow/examples/image_retraining/BUILD +++ /dev/null @@ -1,63 +0,0 @@ -# Description: -# Transfer learning example for TensorFlow. - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -load("//tensorflow:tensorflow.bzl", "py_test") - -py_binary( - name = "retrain", - srcs = [ - "retrain.py", - ], - srcs_version = "PY2AND3", - visibility = ["//tensorflow:__subpackages__"], - deps = [ - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:graph_util", - "//tensorflow/python:platform", - "//tensorflow/python:util", - "//third_party/py/numpy", - ], -) - -py_test( - name = "retrain_test", - size = "small", - srcs = [ - "retrain.py", - "retrain_test.py", - ], - data = [ - ":data/labels.txt", - "//tensorflow/examples/label_image:data/grace_hopper.jpg", - ], - srcs_version = "PY2AND3", - deps = [ - ":retrain", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:graph_util", - "//tensorflow/python:platform", - "//tensorflow/python:platform_test", - "//tensorflow/python:tensor_shape", - "//tensorflow/python:util", - "//third_party/py/numpy", - ], -) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/image_retraining/README.md b/tensorflow/examples/image_retraining/README.md index 8a49525c6e..3f0b3d1268 100644 --- a/tensorflow/examples/image_retraining/README.md +++ b/tensorflow/examples/image_retraining/README.md @@ -1,12 +1,15 @@ -retrain.py is an example script that shows how one can adapt a pretrained -network for other classification problems. A detailed overview of this script -can be found at: -https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0 +**NOTE: This code has moved to** +https://github.com/tensorflow/hub/tree/master/examples/image_retraining -The script also shows how one can train layers -with quantized weights and activations instead of taking a pre-trained floating -point model and then quantizing weights and activations. -The output graphdef produced by this script is compatible with the TensorFlow -Lite Optimizing Converter and can be converted to TFLite format. +retrain.py is an example script that shows how one can adapt a pretrained +network for other classification problems (including use with TFLite and +quantization). +As of TensorFlow 1.7, it is recommended to use a pretrained network from +TensorFlow Hub, using the new version of this example found in the location +above, as explained in TensorFlow's revised [image retraining +tutorial](https://www.tensorflow.org/tutorials/image_retraining). +Older versions of this example (using frozen GraphDefs instead of +TensorFlow Hub modules) are available in the release branches of +TensorFlow versions up to and including 1.7. diff --git a/tensorflow/examples/image_retraining/data/labels.txt b/tensorflow/examples/image_retraining/data/labels.txt deleted file mode 100644 index bc1131ac45..0000000000 --- a/tensorflow/examples/image_retraining/data/labels.txt +++ /dev/null @@ -1,3 +0,0 @@ -Runner-up -Winner -Loser diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py deleted file mode 100644 index ec22684eaf..0000000000 --- a/tensorflow/examples/image_retraining/retrain.py +++ /dev/null @@ -1,1366 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Simple transfer learning with Inception v3 or Mobilenet models. - -With support for TensorBoard. - -This example shows how to take a Inception v3 or Mobilenet model trained on -ImageNet images, and train a new top layer that can recognize other classes of -images. - -The top layer receives as input a 2048-dimensional vector (1001-dimensional for -Mobilenet) for each image. We train a softmax layer on top of this -representation. Assuming the softmax layer contains N labels, this corresponds -to learning N + 2048*N (or 1001*N) model parameters corresponding to the -learned biases and weights. - -Here's an example, which assumes you have a folder containing class-named -subfolders, each full of images for each label. The example folder flower_photos -should have a structure like this: - -~/flower_photos/daisy/photo1.jpg -~/flower_photos/daisy/photo2.jpg -... -~/flower_photos/rose/anotherphoto77.jpg -... -~/flower_photos/sunflower/somepicture.jpg - -The subfolder names are important, since they define what label is applied to -each image, but the filenames themselves don't matter. Once your images are -prepared, you can run the training with a command like this: - - -```bash -bazel build tensorflow/examples/image_retraining:retrain && \ -bazel-bin/tensorflow/examples/image_retraining/retrain \ - --image_dir ~/flower_photos -``` - -Or, if you have a pip installation of tensorflow, `retrain.py` can be run -without bazel: - -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos -``` - -You can replace the image_dir argument with any folder containing subfolders of -images. The label for each image is taken from the name of the subfolder it's -in. - -This produces a new model file that can be loaded and run by any TensorFlow -program, for example the label_image sample code. - -By default this script will use the high accuracy, but comparatively large and -slow Inception v3 model architecture. It's recommended that you start with this -to validate that you have gathered good training data, but if you want to deploy -on resource-limited platforms, you can try the `--architecture` flag with a -Mobilenet model. For example: - -Run floating-point version of mobilenet: -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos --architecture mobilenet_1.0_224 -``` - -Run quantized version of mobilenet: -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized -``` - -There are 32 different Mobilenet models to choose from, with a variety of file -size and latency options. The first number can be '1.0', '0.75', '0.50', or -'0.25' to control the size, and the second controls the input image size, either -'224', '192', '160', or '128', with smaller sizes running faster. See -https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html -for more information on Mobilenet. - -To use with TensorBoard: - -By default, this script will log summaries to /tmp/retrain_logs directory - -Visualize the summaries with this command: - -tensorboard --logdir /tmp/retrain_logs - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -from datetime import datetime -import hashlib -import os.path -import random -import re -import sys -import tarfile - -import numpy as np -from six.moves import urllib -import tensorflow as tf - -from tensorflow.contrib.quantize.python import quant_ops -from tensorflow.python.framework import graph_util -from tensorflow.python.framework import tensor_shape -from tensorflow.python.platform import gfile -from tensorflow.python.util import compat - -FLAGS = None - -# These are all parameters that are tied to the particular model architecture -# we're using for Inception v3. These include things like tensor names and their -# sizes. If you want to adapt this script to work with another model, you will -# need to update these to reflect the values in the network you're using. -MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M - - -def create_image_lists(image_dir, testing_percentage, validation_percentage): - """Builds a list of training images from the file system. - - Analyzes the sub folders in the image directory, splits them into stable - training, testing, and validation sets, and returns a data structure - describing the lists of images for each label and their paths. - - Args: - image_dir: String path to a folder containing subfolders of images. - testing_percentage: Integer percentage of the images to reserve for tests. - validation_percentage: Integer percentage of images reserved for validation. - - Returns: - A dictionary containing an entry for each label subfolder, with images split - into training, testing, and validation sets within each label. - """ - if not gfile.Exists(image_dir): - tf.logging.error("Image directory '" + image_dir + "' not found.") - return None - result = {} - sub_dirs = [x[0] for x in gfile.Walk(image_dir)] - # The root directory comes first, so skip it. - is_root_dir = True - for sub_dir in sub_dirs: - if is_root_dir: - is_root_dir = False - continue - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - file_list = [] - dir_name = os.path.basename(sub_dir) - if dir_name == image_dir: - continue - tf.logging.info("Looking for images in '" + dir_name + "'") - for extension in extensions: - file_glob = os.path.join(image_dir, dir_name, '*.' + extension) - file_list.extend(gfile.Glob(file_glob)) - if not file_list: - tf.logging.warning('No files found') - continue - if len(file_list) < 20: - tf.logging.warning( - 'WARNING: Folder has less than 20 images, which may cause issues.') - elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS: - tf.logging.warning( - 'WARNING: Folder {} has more than {} images. Some images will ' - 'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS)) - label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) - training_images = [] - testing_images = [] - validation_images = [] - for file_name in file_list: - base_name = os.path.basename(file_name) - # We want to ignore anything after '_nohash_' in the file name when - # deciding which set to put an image in, the data set creator has a way of - # grouping photos that are close variations of each other. For example - # this is used in the plant disease data set to group multiple pictures of - # the same leaf. - hash_name = re.sub(r'_nohash_.*$', '', file_name) - # This looks a bit magical, but we need to decide whether this file should - # go into the training, testing, or validation sets, and we want to keep - # existing files in the same set even if more files are subsequently - # added. - # To do that, we need a stable way of deciding based on just the file name - # itself, so we do a hash of that and then use that to generate a - # probability value that we use to assign it. - hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() - percentage_hash = ((int(hash_name_hashed, 16) % - (MAX_NUM_IMAGES_PER_CLASS + 1)) * - (100.0 / MAX_NUM_IMAGES_PER_CLASS)) - if percentage_hash < validation_percentage: - validation_images.append(base_name) - elif percentage_hash < (testing_percentage + validation_percentage): - testing_images.append(base_name) - else: - training_images.append(base_name) - result[label_name] = { - 'dir': dir_name, - 'training': training_images, - 'testing': testing_images, - 'validation': validation_images, - } - return result - - -def get_image_path(image_lists, label_name, index, image_dir, category): - """"Returns a path to an image for a label at the given index. - - Args: - image_lists: Dictionary of training images for each label. - label_name: Label string we want to get an image for. - index: Int offset of the image we want. This will be moduloed by the - available number of images for the label, so it can be arbitrarily large. - image_dir: Root folder string of the subfolders containing the training - images. - category: Name string of set to pull images from - training, testing, or - validation. - - Returns: - File system path string to an image that meets the requested parameters. - - """ - if label_name not in image_lists: - tf.logging.fatal('Label does not exist %s.', label_name) - label_lists = image_lists[label_name] - if category not in label_lists: - tf.logging.fatal('Category does not exist %s.', category) - category_list = label_lists[category] - if not category_list: - tf.logging.fatal('Label %s has no images in the category %s.', - label_name, category) - mod_index = index % len(category_list) - base_name = category_list[mod_index] - sub_dir = label_lists['dir'] - full_path = os.path.join(image_dir, sub_dir, base_name) - return full_path - - -def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir, - category, architecture): - """"Returns a path to a bottleneck file for a label at the given index. - - Args: - image_lists: Dictionary of training images for each label. - label_name: Label string we want to get an image for. - index: Integer offset of the image we want. This will be moduloed by the - available number of images for the label, so it can be arbitrarily large. - bottleneck_dir: Folder string holding cached files of bottleneck values. - category: Name string of set to pull images from - training, testing, or - validation. - architecture: The name of the model architecture. - - Returns: - File system path string to an image that meets the requested parameters. - """ - return get_image_path(image_lists, label_name, index, bottleneck_dir, - category) + '_' + architecture + '.txt' - - -def create_model_graph(model_info): - """"Creates a graph from saved GraphDef file and returns a Graph object. - - Args: - model_info: Dictionary containing information about the model architecture. - - Returns: - Graph holding the trained Inception network, and various tensors we'll be - manipulating. - """ - with tf.Graph().as_default() as graph: - model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name']) - print('Model path: ', model_path) - with gfile.FastGFile(model_path, 'rb') as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - bottleneck_tensor, resized_input_tensor = (tf.import_graph_def( - graph_def, - name='', - return_elements=[ - model_info['bottleneck_tensor_name'], - model_info['resized_input_tensor_name'], - ])) - return graph, bottleneck_tensor, resized_input_tensor - - -def run_bottleneck_on_image(sess, image_data, image_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor): - """Runs inference on an image to extract the 'bottleneck' summary layer. - - Args: - sess: Current active TensorFlow Session. - image_data: String of raw JPEG data. - image_data_tensor: Input data layer in the graph. - decoded_image_tensor: Output of initial image resizing and preprocessing. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: Layer before the final softmax. - - Returns: - Numpy array of bottleneck values. - """ - # First decode the JPEG image, resize it, and rescale the pixel values. - resized_input_values = sess.run(decoded_image_tensor, - {image_data_tensor: image_data}) - # Then run it through the recognition network. - bottleneck_values = sess.run(bottleneck_tensor, - {resized_input_tensor: resized_input_values}) - bottleneck_values = np.squeeze(bottleneck_values) - return bottleneck_values - - -def maybe_download_and_extract(data_url): - """Download and extract model tar file. - - If the pretrained model we're using doesn't already exist, this function - downloads it from the TensorFlow.org website and unpacks it into a directory. - - Args: - data_url: Web location of the tar file containing the pretrained model. - """ - dest_directory = FLAGS.model_dir - if not os.path.exists(dest_directory): - os.makedirs(dest_directory) - filename = data_url.split('/')[-1] - filepath = os.path.join(dest_directory, filename) - if not os.path.exists(filepath): - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % - (filename, - float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - - filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress) - print() - statinfo = os.stat(filepath) - tf.logging.info('Successfully downloaded', filename, statinfo.st_size, - 'bytes.') - print('Extracting file from ', filepath) - tarfile.open(filepath, 'r:gz').extractall(dest_directory) - else: - print('Not extracting or downloading files, model already present in disk') - - -def ensure_dir_exists(dir_name): - """Makes sure the folder exists on disk. - - Args: - dir_name: Path string to the folder we want to create. - """ - if not os.path.exists(dir_name): - os.makedirs(dir_name) - - -bottleneck_path_2_bottleneck_values = {} - - -def create_bottleneck_file(bottleneck_path, image_lists, label_name, index, - image_dir, category, sess, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor): - """Create a single bottleneck file.""" - tf.logging.info('Creating bottleneck at ' + bottleneck_path) - image_path = get_image_path(image_lists, label_name, index, - image_dir, category) - if not gfile.Exists(image_path): - tf.logging.fatal('File does not exist %s', image_path) - image_data = gfile.FastGFile(image_path, 'rb').read() - try: - bottleneck_values = run_bottleneck_on_image( - sess, image_data, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor) - except Exception as e: - raise RuntimeError('Error during processing file %s (%s)' % (image_path, - str(e))) - bottleneck_string = ','.join(str(x) for x in bottleneck_values) - with open(bottleneck_path, 'w') as bottleneck_file: - bottleneck_file.write(bottleneck_string) - - -def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir, - category, bottleneck_dir, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor, architecture): - """Retrieves or calculates bottleneck values for an image. - - If a cached version of the bottleneck data exists on-disk, return that, - otherwise calculate the data and save it to disk for future use. - - Args: - sess: The current active TensorFlow Session. - image_lists: Dictionary of training images for each label. - label_name: Label string we want to get an image for. - index: Integer offset of the image we want. This will be modulo-ed by the - available number of images for the label, so it can be arbitrarily large. - image_dir: Root folder string of the subfolders containing the training - images. - category: Name string of which set to pull images from - training, testing, - or validation. - bottleneck_dir: Folder string holding cached files of bottleneck values. - jpeg_data_tensor: The tensor to feed loaded jpeg data into. - decoded_image_tensor: The output of decoding and resizing the image. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The output tensor for the bottleneck values. - architecture: The name of the model architecture. - - Returns: - Numpy array of values produced by the bottleneck layer for the image. - """ - label_lists = image_lists[label_name] - sub_dir = label_lists['dir'] - sub_dir_path = os.path.join(bottleneck_dir, sub_dir) - ensure_dir_exists(sub_dir_path) - bottleneck_path = get_bottleneck_path(image_lists, label_name, index, - bottleneck_dir, category, architecture) - if not os.path.exists(bottleneck_path): - create_bottleneck_file(bottleneck_path, image_lists, label_name, index, - image_dir, category, sess, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor) - with open(bottleneck_path, 'r') as bottleneck_file: - bottleneck_string = bottleneck_file.read() - did_hit_error = False - try: - bottleneck_values = [float(x) for x in bottleneck_string.split(',')] - except ValueError: - tf.logging.warning('Invalid float found, recreating bottleneck') - did_hit_error = True - if did_hit_error: - create_bottleneck_file(bottleneck_path, image_lists, label_name, index, - image_dir, category, sess, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor) - with open(bottleneck_path, 'r') as bottleneck_file: - bottleneck_string = bottleneck_file.read() - # Allow exceptions to propagate here, since they shouldn't happen after a - # fresh creation - bottleneck_values = [float(x) for x in bottleneck_string.split(',')] - return bottleneck_values - - -def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir, - jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture): - """Ensures all the training, testing, and validation bottlenecks are cached. - - Because we're likely to read the same image multiple times (if there are no - distortions applied during training) it can speed things up a lot if we - calculate the bottleneck layer values once for each image during - preprocessing, and then just read those cached values repeatedly during - training. Here we go through all the images we've found, calculate those - values, and save them off. - - Args: - sess: The current active TensorFlow Session. - image_lists: Dictionary of training images for each label. - image_dir: Root folder string of the subfolders containing the training - images. - bottleneck_dir: Folder string holding cached files of bottleneck values. - jpeg_data_tensor: Input tensor for jpeg data from file. - decoded_image_tensor: The output of decoding and resizing the image. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The penultimate output layer of the graph. - architecture: The name of the model architecture. - - Returns: - Nothing. - """ - how_many_bottlenecks = 0 - ensure_dir_exists(bottleneck_dir) - for label_name, label_lists in image_lists.items(): - for category in ['training', 'testing', 'validation']: - category_list = label_lists[category] - for index, unused_base_name in enumerate(category_list): - get_or_create_bottleneck( - sess, image_lists, label_name, index, image_dir, category, - bottleneck_dir, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture) - - how_many_bottlenecks += 1 - if how_many_bottlenecks % 100 == 0: - tf.logging.info( - str(how_many_bottlenecks) + ' bottleneck files created.') - - -def get_random_cached_bottlenecks(sess, image_lists, how_many, category, - bottleneck_dir, image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor, architecture): - """Retrieves bottleneck values for cached images. - - If no distortions are being applied, this function can retrieve the cached - bottleneck values directly from disk for images. It picks a random set of - images from the specified category. - - Args: - sess: Current TensorFlow Session. - image_lists: Dictionary of training images for each label. - how_many: If positive, a random sample of this size will be chosen. - If negative, all bottlenecks will be retrieved. - category: Name string of which set to pull from - training, testing, or - validation. - bottleneck_dir: Folder string holding cached files of bottleneck values. - image_dir: Root folder string of the subfolders containing the training - images. - jpeg_data_tensor: The layer to feed jpeg image data into. - decoded_image_tensor: The output of decoding and resizing the image. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The bottleneck output layer of the CNN graph. - architecture: The name of the model architecture. - - Returns: - List of bottleneck arrays, their corresponding ground truths, and the - relevant filenames. - """ - class_count = len(image_lists.keys()) - bottlenecks = [] - ground_truths = [] - filenames = [] - if how_many >= 0: - # Retrieve a random sample of bottlenecks. - for unused_i in range(how_many): - label_index = random.randrange(class_count) - label_name = list(image_lists.keys())[label_index] - image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) - image_name = get_image_path(image_lists, label_name, image_index, - image_dir, category) - bottleneck = get_or_create_bottleneck( - sess, image_lists, label_name, image_index, image_dir, category, - bottleneck_dir, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture) - bottlenecks.append(bottleneck) - ground_truths.append(label_index) - filenames.append(image_name) - else: - # Retrieve all bottlenecks. - for label_index, label_name in enumerate(image_lists.keys()): - for image_index, image_name in enumerate( - image_lists[label_name][category]): - image_name = get_image_path(image_lists, label_name, image_index, - image_dir, category) - bottleneck = get_or_create_bottleneck( - sess, image_lists, label_name, image_index, image_dir, category, - bottleneck_dir, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture) - bottlenecks.append(bottleneck) - ground_truths.append(label_index) - filenames.append(image_name) - return bottlenecks, ground_truths, filenames - - -def get_random_distorted_bottlenecks( - sess, image_lists, how_many, category, image_dir, input_jpeg_tensor, - distorted_image, resized_input_tensor, bottleneck_tensor): - """Retrieves bottleneck values for training images, after distortions. - - If we're training with distortions like crops, scales, or flips, we have to - recalculate the full model for every image, and so we can't use cached - bottleneck values. Instead we find random images for the requested category, - run them through the distortion graph, and then the full graph to get the - bottleneck results for each. - - Args: - sess: Current TensorFlow Session. - image_lists: Dictionary of training images for each label. - how_many: The integer number of bottleneck values to return. - category: Name string of which set of images to fetch - training, testing, - or validation. - image_dir: Root folder string of the subfolders containing the training - images. - input_jpeg_tensor: The input layer we feed the image data to. - distorted_image: The output node of the distortion graph. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The bottleneck output layer of the CNN graph. - - Returns: - List of bottleneck arrays and their corresponding ground truths. - """ - class_count = len(image_lists.keys()) - bottlenecks = [] - ground_truths = [] - for unused_i in range(how_many): - label_index = random.randrange(class_count) - label_name = list(image_lists.keys())[label_index] - image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) - image_path = get_image_path(image_lists, label_name, image_index, image_dir, - category) - if not gfile.Exists(image_path): - tf.logging.fatal('File does not exist %s', image_path) - jpeg_data = gfile.FastGFile(image_path, 'rb').read() - # Note that we materialize the distorted_image_data as a numpy array before - # sending running inference on the image. This involves 2 memory copies and - # might be optimized in other implementations. - distorted_image_data = sess.run(distorted_image, - {input_jpeg_tensor: jpeg_data}) - bottleneck_values = sess.run(bottleneck_tensor, - {resized_input_tensor: distorted_image_data}) - bottleneck_values = np.squeeze(bottleneck_values) - bottlenecks.append(bottleneck_values) - ground_truths.append(label_index) - return bottlenecks, ground_truths - - -def should_distort_images(flip_left_right, random_crop, random_scale, - random_brightness): - """Whether any distortions are enabled, from the input flags. - - Args: - flip_left_right: Boolean whether to randomly mirror images horizontally. - random_crop: Integer percentage setting the total margin used around the - crop box. - random_scale: Integer percentage of how much to vary the scale by. - random_brightness: Integer range to randomly multiply the pixel values by. - - Returns: - Boolean value indicating whether any distortions should be applied. - """ - return (flip_left_right or (random_crop != 0) or (random_scale != 0) or - (random_brightness != 0)) - - -def add_input_distortions(flip_left_right, random_crop, random_scale, - random_brightness, input_width, input_height, - input_depth, input_mean, input_std): - """Creates the operations to apply the specified distortions. - - During training it can help to improve the results if we run the images - through simple distortions like crops, scales, and flips. These reflect the - kind of variations we expect in the real world, and so can help train the - model to cope with natural data more effectively. Here we take the supplied - parameters and construct a network of operations to apply them to an image. - - Cropping - ~~~~~~~~ - - Cropping is done by placing a bounding box at a random position in the full - image. The cropping parameter controls the size of that box relative to the - input image. If it's zero, then the box is the same size as the input and no - cropping is performed. If the value is 50%, then the crop box will be half the - width and height of the input. In a diagram it looks like this: - - < width > - +---------------------+ - | | - | width - crop% | - | < > | - | +------+ | - | | | | - | | | | - | | | | - | +------+ | - | | - | | - +---------------------+ - - Scaling - ~~~~~~~ - - Scaling is a lot like cropping, except that the bounding box is always - centered and its size varies randomly within the given range. For example if - the scale percentage is zero, then the bounding box is the same size as the - input and no scaling is applied. If it's 50%, then the bounding box will be in - a random range between half the width and height and full size. - - Args: - flip_left_right: Boolean whether to randomly mirror images horizontally. - random_crop: Integer percentage setting the total margin used around the - crop box. - random_scale: Integer percentage of how much to vary the scale by. - random_brightness: Integer range to randomly multiply the pixel values by. - graph. - input_width: Horizontal size of expected input image to model. - input_height: Vertical size of expected input image to model. - input_depth: How many channels the expected input image should have. - input_mean: Pixel value that should be zero in the image for the graph. - input_std: How much to divide the pixel values by before recognition. - - Returns: - The jpeg input layer and the distorted result tensor. - """ - - jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') - decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) - decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) - decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) - margin_scale = 1.0 + (random_crop / 100.0) - resize_scale = 1.0 + (random_scale / 100.0) - margin_scale_value = tf.constant(margin_scale) - resize_scale_value = tf.random_uniform(tensor_shape.scalar(), - minval=1.0, - maxval=resize_scale) - scale_value = tf.multiply(margin_scale_value, resize_scale_value) - precrop_width = tf.multiply(scale_value, input_width) - precrop_height = tf.multiply(scale_value, input_height) - precrop_shape = tf.stack([precrop_height, precrop_width]) - precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) - precropped_image = tf.image.resize_bilinear(decoded_image_4d, - precrop_shape_as_int) - precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) - cropped_image = tf.random_crop(precropped_image_3d, - [input_height, input_width, input_depth]) - if flip_left_right: - flipped_image = tf.image.random_flip_left_right(cropped_image) - else: - flipped_image = cropped_image - brightness_min = 1.0 - (random_brightness / 100.0) - brightness_max = 1.0 + (random_brightness / 100.0) - brightness_value = tf.random_uniform(tensor_shape.scalar(), - minval=brightness_min, - maxval=brightness_max) - brightened_image = tf.multiply(flipped_image, brightness_value) - offset_image = tf.subtract(brightened_image, input_mean) - mul_image = tf.multiply(offset_image, 1.0 / input_std) - distort_result = tf.expand_dims(mul_image, 0, name='DistortResult') - return jpeg_data, distort_result - - -def variable_summaries(var): - """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" - with tf.name_scope('summaries'): - mean = tf.reduce_mean(var) - tf.summary.scalar('mean', mean) - with tf.name_scope('stddev'): - stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) - tf.summary.scalar('stddev', stddev) - tf.summary.scalar('max', tf.reduce_max(var)) - tf.summary.scalar('min', tf.reduce_min(var)) - tf.summary.histogram('histogram', var) - - -def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer): - """Adds a new softmax and fully-connected layer for training. - - We need to retrain the top layer to identify our new classes, so this function - adds the right operations to the graph, along with some variables to hold the - weights, and then sets up all the gradients for the backward pass. - - The set up for the softmax and fully-connected layers is based on: - https://www.tensorflow.org/versions/master/tutorials/mnist/beginners/index.html - - Args: - class_count: Integer of how many categories of things we're trying to - recognize. - final_tensor_name: Name string for the new final node that produces results. - bottleneck_tensor: The output of the main CNN graph. - bottleneck_tensor_size: How many entries in the bottleneck vector. - quantize_layer: Boolean, specifying whether the newly added layer should be - quantized. - - Returns: - The tensors for the training and cross entropy results, and tensors for the - bottleneck input and ground truth input. - """ - with tf.name_scope('input'): - bottleneck_input = tf.placeholder_with_default( - bottleneck_tensor, - shape=[None, bottleneck_tensor_size], - name='BottleneckInputPlaceholder') - - ground_truth_input = tf.placeholder( - tf.int64, [None], name='GroundTruthInput') - - # Organizing the following ops as `final_training_ops` so they're easier - # to see in TensorBoard - layer_name = 'final_training_ops' - with tf.name_scope(layer_name): - with tf.name_scope('weights'): - initial_value = tf.truncated_normal( - [bottleneck_tensor_size, class_count], stddev=0.001) - layer_weights = tf.Variable(initial_value, name='final_weights') - if quantize_layer: - quantized_layer_weights = quant_ops.MovingAvgQuantize( - layer_weights, is_training=True) - variable_summaries(quantized_layer_weights) - - variable_summaries(layer_weights) - with tf.name_scope('biases'): - layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - if quantize_layer: - quantized_layer_biases = quant_ops.MovingAvgQuantize( - layer_biases, is_training=True) - variable_summaries(quantized_layer_biases) - - variable_summaries(layer_biases) - - with tf.name_scope('Wx_plus_b'): - if quantize_layer: - logits = tf.matmul(bottleneck_input, - quantized_layer_weights) + quantized_layer_biases - logits = quant_ops.MovingAvgQuantize( - logits, - init_min=-32.0, - init_max=32.0, - is_training=True, - num_bits=8, - narrow_range=False, - ema_decay=0.5) - tf.summary.histogram('pre_activations', logits) - else: - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) - - final_tensor = tf.nn.softmax(logits, name=final_tensor_name) - - tf.summary.histogram('activations', final_tensor) - - with tf.name_scope('cross_entropy'): - cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( - labels=ground_truth_input, logits=logits) - - tf.summary.scalar('cross_entropy', cross_entropy_mean) - - with tf.name_scope('train'): - optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) - train_step = optimizer.minimize(cross_entropy_mean) - - return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, - final_tensor) - - -def add_evaluation_step(result_tensor, ground_truth_tensor): - """Inserts the operations we need to evaluate the accuracy of our results. - - Args: - result_tensor: The new final node that produces results. - ground_truth_tensor: The node we feed ground truth data - into. - - Returns: - Tuple of (evaluation step, prediction). - """ - with tf.name_scope('accuracy'): - with tf.name_scope('correct_prediction'): - prediction = tf.argmax(result_tensor, 1) - correct_prediction = tf.equal(prediction, ground_truth_tensor) - with tf.name_scope('accuracy'): - evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - tf.summary.scalar('accuracy', evaluation_step) - return evaluation_step, prediction - - -def save_graph_to_file(sess, graph, graph_file_name): - output_graph_def = graph_util.convert_variables_to_constants( - sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) - - with gfile.FastGFile(graph_file_name, 'wb') as f: - f.write(output_graph_def.SerializeToString()) - return - - -def prepare_file_system(): - # Setup the directory we'll write summaries to for TensorBoard - if tf.gfile.Exists(FLAGS.summaries_dir): - tf.gfile.DeleteRecursively(FLAGS.summaries_dir) - tf.gfile.MakeDirs(FLAGS.summaries_dir) - if FLAGS.intermediate_store_frequency > 0: - ensure_dir_exists(FLAGS.intermediate_output_graphs_dir) - return - - -def create_model_info(architecture): - """Given the name of a model architecture, returns information about it. - - There are different base image recognition pretrained models that can be - retrained using transfer learning, and this function translates from the name - of a model to the attributes that are needed to download and train with it. - - Args: - architecture: Name of a model architecture. - - Returns: - Dictionary of information about the model, or None if the name isn't - recognized - - Raises: - ValueError: If architecture name is unknown. - """ - architecture = architecture.lower() - is_quantized = False - if architecture == 'inception_v3': - # pylint: disable=line-too-long - data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' - # pylint: enable=line-too-long - bottleneck_tensor_name = 'pool_3/_reshape:0' - bottleneck_tensor_size = 2048 - input_width = 299 - input_height = 299 - input_depth = 3 - resized_input_tensor_name = 'Mul:0' - model_file_name = 'classify_image_graph_def.pb' - input_mean = 128 - input_std = 128 - elif architecture.startswith('mobilenet_'): - parts = architecture.split('_') - if len(parts) != 3 and len(parts) != 4: - tf.logging.error("Couldn't understand architecture name '%s'", - architecture) - return None - version_string = parts[1] - if (version_string != '1.0' and version_string != '0.75' and - version_string != '0.50' and version_string != '0.25'): - tf.logging.error( - """"The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25', - but found '%s' for architecture '%s'""", - version_string, architecture) - return None - size_string = parts[2] - if (size_string != '224' and size_string != '192' and - size_string != '160' and size_string != '128'): - tf.logging.error( - """The Mobilenet input size should be '224', '192', '160', or '128', - but found '%s' for architecture '%s'""", - size_string, architecture) - return None - if len(parts) == 3: - is_quantized = False - else: - if parts[3] != 'quantized': - tf.logging.error( - "Couldn't understand architecture suffix '%s' for '%s'", parts[3], - architecture) - return None - is_quantized = True - - if is_quantized: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'Placeholder:0' - model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + - '_quantized_frozen') - model_base_name = 'quantized_frozen_graph.pb' - - else: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string - model_base_name = 'frozen_graph.pb' - - bottleneck_tensor_size = 1001 - input_width = int(size_string) - input_height = int(size_string) - input_depth = 3 - model_file_name = os.path.join(model_dir_name, model_base_name) - input_mean = 127.5 - input_std = 127.5 - else: - tf.logging.error("Couldn't understand architecture name '%s'", architecture) - raise ValueError('Unknown architecture', architecture) - - return { - 'data_url': data_url, - 'bottleneck_tensor_name': bottleneck_tensor_name, - 'bottleneck_tensor_size': bottleneck_tensor_size, - 'input_width': input_width, - 'input_height': input_height, - 'input_depth': input_depth, - 'resized_input_tensor_name': resized_input_tensor_name, - 'model_file_name': model_file_name, - 'input_mean': input_mean, - 'input_std': input_std, - 'quantize_layer': is_quantized, - } - - -def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, - input_std): - """Adds operations that perform JPEG decoding and resizing to the graph.. - - Args: - input_width: Desired width of the image fed into the recognizer graph. - input_height: Desired width of the image fed into the recognizer graph. - input_depth: Desired channels of the image fed into the recognizer graph. - input_mean: Pixel value that should be zero in the image for the graph. - input_std: How much to divide the pixel values by before recognition. - - Returns: - Tensors for the node to feed JPEG data into, and the output of the - preprocessing steps. - """ - jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') - decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) - decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) - decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) - resize_shape = tf.stack([input_height, input_width]) - resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) - resized_image = tf.image.resize_bilinear(decoded_image_4d, - resize_shape_as_int) - offset_image = tf.subtract(resized_image, input_mean) - mul_image = tf.multiply(offset_image, 1.0 / input_std) - return jpeg_data, mul_image - - -def main(_): - # Needed to make sure the logging output is visible. - # See https://github.com/tensorflow/tensorflow/issues/3047 - tf.logging.set_verbosity(tf.logging.INFO) - - # Prepare necessary directories that can be used during training - prepare_file_system() - - # Gather information about the model architecture we'll be using. - model_info = create_model_info(FLAGS.architecture) - if not model_info: - tf.logging.error('Did not recognize architecture flag') - return -1 - - # Set up the pre-trained graph. - maybe_download_and_extract(model_info['data_url']) - graph, bottleneck_tensor, resized_image_tensor = ( - create_model_graph(model_info)) - - # Look at the folder structure, and create lists of all the images. - image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, - FLAGS.validation_percentage) - class_count = len(image_lists.keys()) - if class_count == 0: - tf.logging.error('No valid folders of images found at ' + FLAGS.image_dir) - return -1 - if class_count == 1: - tf.logging.error('Only one valid folder of images found at ' + - FLAGS.image_dir + - ' - multiple classes are needed for classification.') - return -1 - - # See if the command-line flags mean we're applying any distortions. - do_distort_images = should_distort_images( - FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, - FLAGS.random_brightness) - - with tf.Session(graph=graph) as sess: - # Set up the image decoding sub-graph. - jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( - model_info['input_width'], model_info['input_height'], - model_info['input_depth'], model_info['input_mean'], - model_info['input_std']) - - if do_distort_images: - # We will be applying distortions, so setup the operations we'll need. - (distorted_jpeg_data_tensor, - distorted_image_tensor) = add_input_distortions( - FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, - FLAGS.random_brightness, model_info['input_width'], - model_info['input_height'], model_info['input_depth'], - model_info['input_mean'], model_info['input_std']) - else: - # We'll make sure we've calculated the 'bottleneck' image summaries and - # cached them on disk. - cache_bottlenecks(sess, image_lists, FLAGS.image_dir, - FLAGS.bottleneck_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, - bottleneck_tensor, FLAGS.architecture) - - # Add the new layer that we'll be training. - (train_step, cross_entropy, bottleneck_input, ground_truth_input, - final_tensor) = add_final_training_ops( - len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer']) - - # Create the operations we need to evaluate the accuracy of our new layer. - evaluation_step, prediction = add_evaluation_step( - final_tensor, ground_truth_input) - - # Merge all the summaries and write them out to the summaries_dir - merged = tf.summary.merge_all() - train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', - sess.graph) - - validation_writer = tf.summary.FileWriter( - FLAGS.summaries_dir + '/validation') - - # Set up all our weights to their initial default values. - init = tf.global_variables_initializer() - sess.run(init) - - # Run the training for as many cycles as requested on the command line. - for i in range(FLAGS.how_many_training_steps): - # Get a batch of input bottleneck values, either calculated fresh every - # time with distortions applied, or from the cache stored on disk. - if do_distort_images: - (train_bottlenecks, - train_ground_truth) = get_random_distorted_bottlenecks( - sess, image_lists, FLAGS.train_batch_size, 'training', - FLAGS.image_dir, distorted_jpeg_data_tensor, - distorted_image_tensor, resized_image_tensor, bottleneck_tensor) - else: - (train_bottlenecks, - train_ground_truth, _) = get_random_cached_bottlenecks( - sess, image_lists, FLAGS.train_batch_size, 'training', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture) - # Feed the bottlenecks and ground truth into the graph, and run a training - # step. Capture training summaries for TensorBoard with the `merged` op. - train_summary, _ = sess.run( - [merged, train_step], - feed_dict={bottleneck_input: train_bottlenecks, - ground_truth_input: train_ground_truth}) - train_writer.add_summary(train_summary, i) - - # Every so often, print out how well the graph is training. - is_last_step = (i + 1 == FLAGS.how_many_training_steps) - if (i % FLAGS.eval_step_interval) == 0 or is_last_step: - train_accuracy, cross_entropy_value = sess.run( - [evaluation_step, cross_entropy], - feed_dict={bottleneck_input: train_bottlenecks, - ground_truth_input: train_ground_truth}) - tf.logging.info('%s: Step %d: Train accuracy = %.1f%%' % - (datetime.now(), i, train_accuracy * 100)) - tf.logging.info('%s: Step %d: Cross entropy = %f' % - (datetime.now(), i, cross_entropy_value)) - validation_bottlenecks, validation_ground_truth, _ = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.validation_batch_size, 'validation', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - # Run a validation step and capture training summaries for TensorBoard - # with the `merged` op. - validation_summary, validation_accuracy = sess.run( - [merged, evaluation_step], - feed_dict={bottleneck_input: validation_bottlenecks, - ground_truth_input: validation_ground_truth}) - validation_writer.add_summary(validation_summary, i) - tf.logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)' % - (datetime.now(), i, validation_accuracy * 100, - len(validation_bottlenecks))) - - # Store intermediate results - intermediate_frequency = FLAGS.intermediate_store_frequency - - if (intermediate_frequency > 0 and (i % intermediate_frequency == 0) - and i > 0): - intermediate_file_name = (FLAGS.intermediate_output_graphs_dir + - 'intermediate_' + str(i) + '.pb') - tf.logging.info('Save intermediate result to : ' + - intermediate_file_name) - save_graph_to_file(sess, graph, intermediate_file_name) - - # We've completed all our training, so run a final test evaluation on - # some new images we haven't used before. - test_bottlenecks, test_ground_truth, test_filenames = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.test_batch_size, 'testing', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - test_accuracy, predictions = sess.run( - [evaluation_step, prediction], - feed_dict={bottleneck_input: test_bottlenecks, - ground_truth_input: test_ground_truth}) - tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % - (test_accuracy * 100, len(test_bottlenecks))) - - if FLAGS.print_misclassified_test_images: - tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') - for i, test_filename in enumerate(test_filenames): - if predictions[i] != test_ground_truth[i]: - tf.logging.info('%70s %s' % - (test_filename, - list(image_lists.keys())[predictions[i]])) - - # Write out the trained graph and labels with the weights stored as - # constants. - save_graph_to_file(sess, graph, FLAGS.output_graph) - with gfile.FastGFile(FLAGS.output_labels, 'w') as f: - f.write('\n'.join(image_lists.keys()) + '\n') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument( - '--image_dir', - type=str, - default='', - help='Path to folders of labeled images.' - ) - parser.add_argument( - '--output_graph', - type=str, - default='/tmp/output_graph.pb', - help='Where to save the trained graph.' - ) - parser.add_argument( - '--intermediate_output_graphs_dir', - type=str, - default='/tmp/intermediate_graph/', - help='Where to save the intermediate graphs.' - ) - parser.add_argument( - '--intermediate_store_frequency', - type=int, - default=0, - help="""\ - How many steps to store intermediate graph. If "0" then will not - store.\ - """ - ) - parser.add_argument( - '--output_labels', - type=str, - default='/tmp/output_labels.txt', - help='Where to save the trained graph\'s labels.' - ) - parser.add_argument( - '--summaries_dir', - type=str, - default='/tmp/retrain_logs', - help='Where to save summary logs for TensorBoard.' - ) - parser.add_argument( - '--how_many_training_steps', - type=int, - default=4000, - help='How many training steps to run before ending.' - ) - parser.add_argument( - '--learning_rate', - type=float, - default=0.01, - help='How large a learning rate to use when training.' - ) - parser.add_argument( - '--testing_percentage', - type=int, - default=10, - help='What percentage of images to use as a test set.' - ) - parser.add_argument( - '--validation_percentage', - type=int, - default=10, - help='What percentage of images to use as a validation set.' - ) - parser.add_argument( - '--eval_step_interval', - type=int, - default=10, - help='How often to evaluate the training results.' - ) - parser.add_argument( - '--train_batch_size', - type=int, - default=100, - help='How many images to train on at a time.' - ) - parser.add_argument( - '--test_batch_size', - type=int, - default=-1, - help="""\ - How many images to test on. This test set is only used once, to evaluate - the final accuracy of the model after training completes. - A value of -1 causes the entire test set to be used, which leads to more - stable results across runs.\ - """ - ) - parser.add_argument( - '--validation_batch_size', - type=int, - default=100, - help="""\ - How many images to use in an evaluation batch. This validation set is - used much more often than the test set, and is an early indicator of how - accurate the model is during training. - A value of -1 causes the entire validation set to be used, which leads to - more stable results across training iterations, but may be slower on large - training sets.\ - """ - ) - parser.add_argument( - '--print_misclassified_test_images', - default=False, - help="""\ - Whether to print out a list of all misclassified test images.\ - """, - action='store_true' - ) - parser.add_argument( - '--model_dir', - type=str, - default='/tmp/imagenet', - help="""\ - Path to classify_image_graph_def.pb, - imagenet_synset_to_human_label_map.txt, and - imagenet_2012_challenge_label_map_proto.pbtxt.\ - """ - ) - parser.add_argument( - '--bottleneck_dir', - type=str, - default='/tmp/bottleneck', - help='Path to cache bottleneck layer values as files.' - ) - parser.add_argument( - '--final_tensor_name', - type=str, - default='final_result', - help="""\ - The name of the output classification layer in the retrained graph.\ - """ - ) - parser.add_argument( - '--flip_left_right', - default=False, - help="""\ - Whether to randomly flip half of the training images horizontally.\ - """, - action='store_true' - ) - parser.add_argument( - '--random_crop', - type=int, - default=0, - help="""\ - A percentage determining how much of a margin to randomly crop off the - training images.\ - """ - ) - parser.add_argument( - '--random_scale', - type=int, - default=0, - help="""\ - A percentage determining how much to randomly scale up the size of the - training images by.\ - """ - ) - parser.add_argument( - '--random_brightness', - type=int, - default=0, - help="""\ - A percentage determining how much to randomly multiply the training image - input pixels up or down by.\ - """ - ) - parser.add_argument( - '--architecture', - type=str, - default='inception_v3', - help="""\ - Which model architecture to use. 'inception_v3' is the most accurate, but - also the slowest. For faster or smaller models, chose a MobileNet with the - form 'mobilenet_<parameter size>_<input_size>[_quantized]'. For example, - 'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224 - pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much - less accurate, but smaller and faster network that's 920 KB on disk and - takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html - for more information on Mobilenet.\ - """) - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py deleted file mode 100644 index 8b8dd45fd7..0000000000 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=g-bad-import-order,unused-import -"""Tests the graph freezing tool.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -import os - -from tensorflow.examples.image_retraining import retrain -from tensorflow.python.framework import test_util - - -class ImageRetrainingTest(test_util.TensorFlowTestCase): - - def dummyImageLists(self): - return {'label_one': {'dir': 'somedir', 'training': ['image_one.jpg', - 'image_two.jpg'], - 'testing': ['image_three.jpg', 'image_four.jpg'], - 'validation': ['image_five.jpg', 'image_six.jpg']}, - 'label_two': {'dir': 'otherdir', 'training': ['image_one.jpg', - 'image_two.jpg'], - 'testing': ['image_three.jpg', 'image_four.jpg'], - 'validation': ['image_five.jpg', 'image_six.jpg']}} - - def testGetImagePath(self): - image_lists = self.dummyImageLists() - self.assertEqual('image_dir/somedir/image_one.jpg', retrain.get_image_path( - image_lists, 'label_one', 0, 'image_dir', 'training')) - self.assertEqual('image_dir/otherdir/image_four.jpg', - retrain.get_image_path(image_lists, 'label_two', 1, - 'image_dir', 'testing')) - - def testGetBottleneckPath(self): - image_lists = self.dummyImageLists() - self.assertEqual('bottleneck_dir/somedir/image_five.jpg_imagenet_v3.txt', - retrain.get_bottleneck_path( - image_lists, 'label_one', 0, 'bottleneck_dir', - 'validation', 'imagenet_v3')) - - def testShouldDistortImage(self): - self.assertEqual(False, retrain.should_distort_images(False, 0, 0, 0)) - self.assertEqual(True, retrain.should_distort_images(True, 0, 0, 0)) - self.assertEqual(True, retrain.should_distort_images(False, 10, 0, 0)) - self.assertEqual(True, retrain.should_distort_images(False, 0, 1, 0)) - self.assertEqual(True, retrain.should_distort_images(False, 0, 0, 50)) - - def testAddInputDistortions(self): - with tf.Graph().as_default(): - with tf.Session() as sess: - retrain.add_input_distortions(True, 10, 10, 10, 299, 299, 3, 128, 128) - self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortJPGInput:0')) - self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0')) - - @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOps(self, flags_mock): - with tf.Graph().as_default(): - with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) - self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) - - @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOpsQuantized(self, flags_mock): - with tf.Graph().as_default(): - with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) - self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) - - def testAddEvaluationStep(self): - with tf.Graph().as_default(): - final = tf.placeholder(tf.float32, [1], name='final') - gt = tf.placeholder(tf.int64, [1], name='gt') - self.assertIsNotNone(retrain.add_evaluation_step(final, gt)) - - def testAddJpegDecoding(self): - with tf.Graph().as_default(): - jpeg_data, mul_image = retrain.add_jpeg_decoding(10, 10, 3, 0, 255) - self.assertIsNotNone(jpeg_data) - self.assertIsNotNone(mul_image) - - def testCreateModelInfo(self): - did_raise_value_error = False - try: - retrain.create_model_info('no_such_model_name') - except ValueError: - did_raise_value_error = True - self.assertTrue(did_raise_value_error) - model_info = retrain.create_model_info('inception_v3') - self.assertIsNotNone(model_info) - self.assertEqual(299, model_info['input_width']) - - def testCreateModelInfoQuantized(self): - # Test for mobilenet_quantized - model_info = retrain.create_model_info('mobilenet_1.0_224') - self.assertIsNotNone(model_info) - self.assertEqual(224, model_info['input_width']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/examples/ios/.gitignore b/tensorflow/examples/ios/.gitignore index e572b3012c..dbabfb33bf 100644 --- a/tensorflow/examples/ios/.gitignore +++ b/tensorflow/examples/ios/.gitignore @@ -2,3 +2,6 @@ project.xcworkspace xcuserdata imagenet_comp_graph_label_strings.txt tensorflow_inception_graph.pb +simple/data/LICENSE +camera/data/LICENSE +benchmark/data/LICENSE diff --git a/tensorflow/examples/ios/README.md b/tensorflow/examples/ios/README.md index 5bdaeb43ce..5d7bd36837 100644 --- a/tensorflow/examples/ios/README.md +++ b/tensorflow/examples/ios/README.md @@ -119,11 +119,13 @@ rundown: `tensorflow/contrib/makefile/gen/lib` to the Library Search Paths setting. - You'll also need to add `libprotobuf.a` and `libprotobuf-lite.a` from - `tensorflow/contrib/makefile/gen/protobuf_ios/lib` to your _Build Stages_ and - _Library Search Paths_. + `tensorflow/contrib/makefile/gen/protobuf_ios/lib` + and `nsync.a` from `tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11` + to your _Build Stages_ and _Library Search Paths_. - The _Header Search_ paths needs to contain: - the root folder of tensorflow, + - `tensorflow/contrib/makefile/downloads/nsync/public` - `tensorflow/contrib/makefile/downloads/protobuf/src` - `tensorflow/contrib/makefile/downloads`, - `tensorflow/contrib/makefile/downloads/eigen`, and diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD index 9207fc6332..c50fd93d03 100644 --- a/tensorflow/examples/label_image/BUILD +++ b/tensorflow/examples/label_image/BUILD @@ -9,6 +9,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +exports_files(["data/grace_hopper.jpg"]) + load("//tensorflow:tensorflow.bzl", "tf_cc_binary") tf_cc_binary( @@ -51,16 +53,12 @@ tf_cc_binary( }), ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], +py_binary( + name = "label_image_py", + srcs = ["label_image.py"], + main = "label_image.py", + srcs_version = "PY2AND3", + deps = [ + "//tensorflow:tensorflow_py", + ], ) diff --git a/tensorflow/examples/label_image/README.md b/tensorflow/examples/label_image/README.md index a9e44745e5..cfd0132a7a 100644 --- a/tensorflow/examples/label_image/README.md +++ b/tensorflow/examples/label_image/README.md @@ -73,10 +73,23 @@ Python than the Python code mentioned in the [Inception tutorial](https://www.tensorflow.org/tutorials/image_recognition/). and could be easier to add visualization or debug code. -With tensorflow python package installed, you can run it like: + +`bazel-bin/tensorflow/examples/label_image/label_image_py` should be there after +```bash +$ bazel build tensorflow/examples/label_image/... +``` + +Run + +```bash +$ bazel-bin/tensorflow/examples/label_image/label_image_py +``` + +Or, with tensorflow python package installed, you can run it like: ```bash $ python3 tensorflow/examples/label_image/label_image.py ``` + And get result similar to this: ``` military uniform 0.834305 diff --git a/tensorflow/examples/label_image/label_image.py b/tensorflow/examples/label_image/label_image.py index 39d0981337..fe5e0fc684 100644 --- a/tensorflow/examples/label_image/label_image.py +++ b/tensorflow/examples/label_image/label_image.py @@ -18,11 +18,11 @@ from __future__ import division from __future__ import print_function import argparse -import sys import numpy as np import tensorflow as tf + def load_graph(model_file): graph = tf.Graph() graph_def = tf.GraphDef() @@ -34,24 +34,28 @@ def load_graph(model_file): return graph -def read_tensor_from_image_file(file_name, input_height=299, input_width=299, - input_mean=0, input_std=255): + +def read_tensor_from_image_file(file_name, + input_height=299, + input_width=299, + input_mean=0, + input_std=255): input_name = "file_reader" output_name = "normalized" file_reader = tf.read_file(file_name, input_name) if file_name.endswith(".png"): - image_reader = tf.image.decode_png(file_reader, channels = 3, - name='png_reader') + image_reader = tf.image.decode_png( + file_reader, channels=3, name="png_reader") elif file_name.endswith(".gif"): - image_reader = tf.squeeze(tf.image.decode_gif(file_reader, - name='gif_reader')) + image_reader = tf.squeeze( + tf.image.decode_gif(file_reader, name="gif_reader")) elif file_name.endswith(".bmp"): - image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader') + image_reader = tf.image.decode_bmp(file_reader, name="bmp_reader") else: - image_reader = tf.image.decode_jpeg(file_reader, channels = 3, - name='jpeg_reader') + image_reader = tf.image.decode_jpeg( + file_reader, channels=3, name="jpeg_reader") float_caster = tf.cast(image_reader, tf.float32) - dims_expander = tf.expand_dims(float_caster, 0); + dims_expander = tf.expand_dims(float_caster, 0) resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width]) normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) sess = tf.Session() @@ -59,6 +63,7 @@ def read_tensor_from_image_file(file_name, input_height=299, input_width=299, return result + def load_labels(label_file): label = [] proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines() @@ -66,6 +71,7 @@ def load_labels(label_file): label.append(l.rstrip()) return label + if __name__ == "__main__": file_name = "tensorflow/examples/label_image/data/grace_hopper.jpg" model_file = \ @@ -110,20 +116,22 @@ if __name__ == "__main__": output_layer = args.output_layer graph = load_graph(model_file) - t = read_tensor_from_image_file(file_name, - input_height=input_height, - input_width=input_width, - input_mean=input_mean, - input_std=input_std) + t = read_tensor_from_image_file( + file_name, + input_height=input_height, + input_width=input_width, + input_mean=input_mean, + input_std=input_std) input_name = "import/" + input_layer output_name = "import/" + output_layer - input_operation = graph.get_operation_by_name(input_name); - output_operation = graph.get_operation_by_name(output_name); + input_operation = graph.get_operation_by_name(input_name) + output_operation = graph.get_operation_by_name(output_name) with tf.Session(graph=graph) as sess: - results = sess.run(output_operation.outputs[0], - {input_operation.outputs[0]: t}) + results = sess.run(output_operation.outputs[0], { + input_operation.outputs[0]: t + }) results = np.squeeze(results) top_k = results.argsort()[-5:][::-1] diff --git a/tensorflow/examples/label_image/main.cc b/tensorflow/examples/label_image/main.cc index 63bc39de6c..baa65d3243 100644 --- a/tensorflow/examples/label_image/main.cc +++ b/tensorflow/examples/label_image/main.cc @@ -49,6 +49,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" @@ -137,15 +138,15 @@ Status ReadTensorFromImageFile(const string& file_name, const int input_height, // Now try to figure out what kind of file it is and decode it. const int wanted_channels = 3; tensorflow::Output image_reader; - if (tensorflow::StringPiece(file_name).ends_with(".png")) { + if (tensorflow::str_util::EndsWith(file_name, ".png")) { image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels)); - } else if (tensorflow::StringPiece(file_name).ends_with(".gif")) { + } else if (tensorflow::str_util::EndsWith(file_name, ".gif")) { // gif decoder returns 4-D tensor, remove the first dim image_reader = Squeeze(root.WithOpName("squeeze_first_dim"), DecodeGif(root.WithOpName("gif_reader"), file_reader)); - } else if (tensorflow::StringPiece(file_name).ends_with(".bmp")) { + } else if (tensorflow::str_util::EndsWith(file_name, ".bmp")) { image_reader = DecodeBmp(root.WithOpName("bmp_reader"), file_reader); } else { // Assume if it's neither a PNG nor a GIF then it must be a JPEG. diff --git a/tensorflow/examples/learn/BUILD b/tensorflow/examples/learn/BUILD index aba7f600b5..bdbcb0b163 100644 --- a/tensorflow/examples/learn/BUILD +++ b/tensorflow/examples/learn/BUILD @@ -152,15 +152,3 @@ sh_test( "notap", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 03e60972aa..86f5204ec3 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -21,7 +21,8 @@ from __future__ import division from __future__ import print_function import os -import urllib + +from six.moves.urllib.request import urlretrieve import tensorflow as tf @@ -38,9 +39,7 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] def maybe_download_iris_data(file_name, download_url): """Downloads the file and returns the number of data.""" if not os.path.exists(file_name): - raw = urllib.urlopen(download_url).read() - with open(file_name, 'w') as f: - f.write(raw) + urlretrieve(download_url, file_name) # The first line is a comma-separated string. The first one is the number of # total data in the file. diff --git a/tensorflow/examples/learn/mnist.py b/tensorflow/examples/learn/mnist.py index 98819b20bf..3ead8614b6 100644 --- a/tensorflow/examples/learn/mnist.py +++ b/tensorflow/examples/learn/mnist.py @@ -61,8 +61,10 @@ def conv_model(features, labels, mode): # Densely connected layer with 1024 neurons. h_fc1 = tf.layers.dense(h_pool2_flat, 1024, activation=tf.nn.relu) - if mode == tf.estimator.ModeKeys.TRAIN: - h_fc1 = tf.layers.dropout(h_fc1, rate=0.5) + h_fc1 = tf.layers.dropout( + h_fc1, + rate=0.5, + training=(mode == tf.estimator.ModeKeys.TRAIN)) # Compute logits (1 per class) and compute loss. logits = tf.layers.dense(h_fc1, N_DIGITS, activation=None) diff --git a/tensorflow/examples/learn/resnet.py b/tensorflow/examples/learn/resnet.py index 9542e55250..c00de932a8 100755 --- a/tensorflow/examples/learn/resnet.py +++ b/tensorflow/examples/learn/resnet.py @@ -53,6 +53,8 @@ def res_net_model(features, labels, mode): ndim = int(sqrt(input_shape[1])) x = tf.reshape(x, [-1, ndim, ndim, 1]) + training = (mode == tf.estimator.ModeKeys.TRAIN) + # First convolution expands to 64 channels with tf.variable_scope('conv_layer1'): net = tf.layers.conv2d( @@ -60,7 +62,7 @@ def res_net_model(features, labels, mode): filters=64, kernel_size=7, activation=tf.nn.relu) - net = tf.layers.batch_normalization(net) + net = tf.layers.batch_normalization(net, training=training) # Max pool net = tf.layers.max_pooling2d( @@ -88,7 +90,7 @@ def res_net_model(features, labels, mode): kernel_size=1, padding='valid', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) with tf.variable_scope(name + '/conv_bottleneck'): conv = tf.layers.conv2d( @@ -97,7 +99,7 @@ def res_net_model(features, labels, mode): kernel_size=3, padding='same', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) # 1x1 convolution responsible for restoring dimension with tf.variable_scope(name + '/conv_out'): @@ -108,7 +110,7 @@ def res_net_model(features, labels, mode): kernel_size=1, padding='valid', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) # shortcut connections that turn the network into its counterpart # residual function (identity shortcut) @@ -154,7 +156,7 @@ def res_net_model(features, labels, mode): loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) # Create training op. - if mode == tf.estimator.ModeKeys.TRAIN: + if training: optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py index eb117c39a1..e4e61862b0 100644 --- a/tensorflow/examples/learn/text_classification.py +++ b/tensorflow/examples/learn/text_classification.py @@ -34,8 +34,7 @@ MAX_LABEL = 15 WORDS_FEATURE = 'words' # Name of the input words feature. -def estimator_spec_for_softmax_classification( - logits, labels, mode): +def estimator_spec_for_softmax_classification(logits, labels, mode): """Returns EstimatorSpec instance for softmax classification.""" predicted_classes = tf.argmax(logits, 1) if mode == tf.estimator.ModeKeys.PREDICT: @@ -53,8 +52,8 @@ def estimator_spec_for_softmax_classification( return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) eval_metric_ops = { - 'accuracy': tf.metrics.accuracy( - labels=labels, predictions=predicted_classes) + 'accuracy': + tf.metrics.accuracy(labels=labels, predictions=predicted_classes) } return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) @@ -67,8 +66,7 @@ def bag_of_words_model(features, labels, mode): bow_embedding_column = tf.feature_column.embedding_column( bow_column, dimension=EMBEDDING_SIZE) bow = tf.feature_column.input_layer( - features, - feature_columns=[bow_embedding_column]) + features, feature_columns=[bow_embedding_column]) logits = tf.layers.dense(bow, MAX_LABEL, activation=None) return estimator_spec_for_softmax_classification( @@ -110,9 +108,9 @@ def main(unused_argv): # Prepare training and testing data dbpedia = tf.contrib.learn.datasets.load_dataset( 'dbpedia', test_with_fake_data=FLAGS.test_with_fake_data) - x_train = pandas.Series(dbpedia.train.data[:,1]) + x_train = pandas.Series(dbpedia.train.data[:, 1]) y_train = pandas.Series(dbpedia.train.target) - x_test = pandas.Series(dbpedia.test.data[:,1]) + x_test = pandas.Series(dbpedia.test.data[:, 1]) y_test = pandas.Series(dbpedia.test.target) # Process vocabulary @@ -152,10 +150,7 @@ def main(unused_argv): # Predict. test_input_fn = tf.estimator.inputs.numpy_input_fn( - x={WORDS_FEATURE: x_test}, - y=y_test, - num_epochs=1, - shuffle=False) + x={WORDS_FEATURE: x_test}, y=y_test, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=test_input_fn) y_predicted = np.array(list(p['class'] for p in predictions)) y_predicted = y_predicted.reshape(np.array(y_test).shape) diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py index 9e21aee87f..a40a9eaecb 100644 --- a/tensorflow/examples/learn/text_classification_cnn.py +++ b/tensorflow/examples/learn/text_classification_cnn.py @@ -73,7 +73,7 @@ def cnn_model(features, labels, mode): kernel_size=FILTER_SHAPE2, padding='VALID') # Max across each filter to get useful features for classification. - pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1]) + pool2 = tf.squeeze(tf.reduce_max(conv2, 1), axis=[1]) # Apply regular WX + B and classification. logits = tf.layers.dense(pool2, MAX_LABEL, activation=None) diff --git a/tensorflow/examples/multibox_detector/BUILD b/tensorflow/examples/multibox_detector/BUILD index 91a5bfa51c..4f9908cd52 100644 --- a/tensorflow/examples/multibox_detector/BUILD +++ b/tensorflow/examples/multibox_detector/BUILD @@ -27,17 +27,3 @@ tf_cc_binary( "//tensorflow/core:tensorflow", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/multibox_detector/main.cc b/tensorflow/examples/multibox_detector/main.cc index e38704fd98..96ea525a4e 100644 --- a/tensorflow/examples/multibox_detector/main.cc +++ b/tensorflow/examples/multibox_detector/main.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" @@ -84,10 +85,10 @@ Status ReadTensorFromImageFile(const string& file_name, const int input_height, // Now try to figure out what kind of file it is and decode it. const int wanted_channels = 3; tensorflow::Output image_reader; - if (tensorflow::StringPiece(file_name).ends_with(".png")) { + if (tensorflow::str_util::EndsWith(file_name, ".png")) { image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels)); - } else if (tensorflow::StringPiece(file_name).ends_with(".gif")) { + } else if (tensorflow::str_util::EndsWith(file_name, ".gif")) { image_reader = DecodeGif(root.WithOpName("gif_reader"), file_reader); } else { // Assume if it's neither a PNG nor a GIF then it must be a JPEG. @@ -131,7 +132,7 @@ Status ReadTensorFromImageFile(const string& file_name, const int input_height, Status SaveImage(const Tensor& tensor, const string& file_path) { LOG(INFO) << "Saving image to " << file_path; - CHECK(tensorflow::StringPiece(file_path).ends_with(".png")) + CHECK(tensorflow::str_util::EndsWith(file_path, ".png")) << "Only saving of png files is supported."; auto root = tensorflow::Scope::NewRootScope(); diff --git a/tensorflow/examples/saved_model/BUILD b/tensorflow/examples/saved_model/BUILD index 1cdf5ec6e1..ebefc6576d 100644 --- a/tensorflow/examples/saved_model/BUILD +++ b/tensorflow/examples/saved_model/BUILD @@ -8,19 +8,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), - visibility = ["//visibility:public"], -) - py_binary( name = "saved_model_half_plus_two", srcs = [ diff --git a/tensorflow/examples/speech_commands/BUILD b/tensorflow/examples/speech_commands/BUILD index 12479211c3..7a44e2ee4f 100644 --- a/tensorflow/examples/speech_commands/BUILD +++ b/tensorflow/examples/speech_commands/BUILD @@ -56,6 +56,7 @@ tf_py_test( srcs = ["input_data_test.py"], additional_deps = [ ":input_data", + ":models", "//tensorflow/python:client_testlib", ], ) @@ -245,15 +246,3 @@ tf_cc_binary( "//tensorflow/core:protos_all_cc", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/speech_commands/accuracy_utils.h b/tensorflow/examples/speech_commands/accuracy_utils.h index 8d918cb64b..eea048365b 100644 --- a/tensorflow/examples/speech_commands/accuracy_utils.h +++ b/tensorflow/examples/speech_commands/accuracy_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_ACCURACY_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_ACCURACY_UTILS_H_ +#ifndef TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_ACCURACY_UTILS_H_ +#define TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_ACCURACY_UTILS_H_ #include <vector> @@ -57,4 +57,4 @@ void PrintAccuracyStats(const StreamingAccuracyStats& stats); } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_ACCURACY_UTILS_H_ +#endif // TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_ACCURACY_UTILS_H_ diff --git a/tensorflow/examples/speech_commands/freeze.py b/tensorflow/examples/speech_commands/freeze.py index c8671d9c41..89e790d4e4 100644 --- a/tensorflow/examples/speech_commands/freeze.py +++ b/tensorflow/examples/speech_commands/freeze.py @@ -54,7 +54,7 @@ FLAGS = None def create_inference_graph(wanted_words, sample_rate, clip_duration_ms, clip_stride_ms, window_size_ms, window_stride_ms, - dct_coefficient_count, model_architecture): + feature_bin_count, model_architecture, preprocess): """Creates an audio model with the nodes needed for inference. Uses the supplied arguments to create a model, and inserts the input and @@ -67,14 +67,19 @@ def create_inference_graph(wanted_words, sample_rate, clip_duration_ms, clip_stride_ms: How often to run recognition. Useful for models with cache. window_size_ms: Time slice duration to estimate frequencies from. window_stride_ms: How far apart time slices should be. - dct_coefficient_count: Number of frequency bands to analyze. + feature_bin_count: Number of frequency bands to analyze. model_architecture: Name of the kind of model to generate. + preprocess: How the spectrogram is processed to produce features, for + example 'mfcc' or 'average'. + + Raises: + Exception: If the preprocessing mode isn't recognized. """ words_list = input_data.prepare_words_list(wanted_words.split(',')) model_settings = models.prepare_model_settings( len(words_list), sample_rate, clip_duration_ms, window_size_ms, - window_stride_ms, dct_coefficient_count) + window_stride_ms, feature_bin_count, preprocess) runtime_settings = {'clip_stride_ms': clip_stride_ms} wav_data_placeholder = tf.placeholder(tf.string, [], name='wav_data') @@ -88,15 +93,25 @@ def create_inference_graph(wanted_words, sample_rate, clip_duration_ms, window_size=model_settings['window_size_samples'], stride=model_settings['window_stride_samples'], magnitude_squared=True) - fingerprint_input = contrib_audio.mfcc( - spectrogram, - decoded_sample_data.sample_rate, - dct_coefficient_count=dct_coefficient_count) - fingerprint_frequency_size = model_settings['dct_coefficient_count'] - fingerprint_time_size = model_settings['spectrogram_length'] - reshaped_input = tf.reshape(fingerprint_input, [ - -1, fingerprint_time_size * fingerprint_frequency_size - ]) + + if preprocess == 'average': + fingerprint_input = tf.nn.pool( + tf.expand_dims(spectrogram, -1), + window_shape=[1, model_settings['average_window_width']], + strides=[1, model_settings['average_window_width']], + pooling_type='AVG', + padding='SAME') + elif preprocess == 'mfcc': + fingerprint_input = contrib_audio.mfcc( + spectrogram, + sample_rate, + dct_coefficient_count=model_settings['fingerprint_width']) + else: + raise Exception('Unknown preprocess mode "%s" (should be "mfcc" or' + ' "average")' % (preprocess)) + + fingerprint_size = model_settings['fingerprint_size'] + reshaped_input = tf.reshape(fingerprint_input, [-1, fingerprint_size]) logits = models.create_model( reshaped_input, model_settings, model_architecture, is_training=False, @@ -110,10 +125,12 @@ def main(_): # Create the model and load its weights. sess = tf.InteractiveSession() - create_inference_graph(FLAGS.wanted_words, FLAGS.sample_rate, - FLAGS.clip_duration_ms, FLAGS.clip_stride_ms, - FLAGS.window_size_ms, FLAGS.window_stride_ms, - FLAGS.dct_coefficient_count, FLAGS.model_architecture) + create_inference_graph( + FLAGS.wanted_words, FLAGS.sample_rate, FLAGS.clip_duration_ms, + FLAGS.clip_stride_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, + FLAGS.feature_bin_count, FLAGS.model_architecture, FLAGS.preprocess) + if FLAGS.quantize: + tf.contrib.quantize.create_eval_graph() models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) # Turn all the variables into inline constants inside the graph and save it. @@ -155,10 +172,11 @@ if __name__ == '__main__': default=10.0, help='How long the stride is between spectrogram timeslices',) parser.add_argument( - '--dct_coefficient_count', + '--feature_bin_count', type=int, default=40, - help='How many bins to use for the MFCC fingerprint',) + help='How many bins to use for the MFCC fingerprint', + ) parser.add_argument( '--start_checkpoint', type=str, @@ -176,5 +194,15 @@ if __name__ == '__main__': help='Words to use (others will be added to an unknown label)',) parser.add_argument( '--output_file', type=str, help='Where to save the frozen graph.') + parser.add_argument( + '--quantize', + type=bool, + default=False, + help='Whether to train the model for eight-bit deployment') + parser.add_argument( + '--preprocess', + type=str, + default='mfcc', + help='Spectrogram processing mode. Can be "mfcc" or "average"') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/speech_commands/freeze_test.py b/tensorflow/examples/speech_commands/freeze_test.py index 97c6eac675..c8de6c2152 100644 --- a/tensorflow/examples/speech_commands/freeze_test.py +++ b/tensorflow/examples/speech_commands/freeze_test.py @@ -24,14 +24,62 @@ from tensorflow.python.platform import test class FreezeTest(test.TestCase): - def testCreateInferenceGraph(self): + def testCreateInferenceGraphWithMfcc(self): with self.test_session() as sess: - freeze.create_inference_graph('a,b,c,d', 16000, 1000.0, 30.0, 30.0, 10.0, - 40, 'conv') + freeze.create_inference_graph( + wanted_words='a,b,c,d', + sample_rate=16000, + clip_duration_ms=1000.0, + clip_stride_ms=30.0, + window_size_ms=30.0, + window_stride_ms=10.0, + feature_bin_count=40, + model_architecture='conv', + preprocess='mfcc') self.assertIsNotNone(sess.graph.get_tensor_by_name('wav_data:0')) self.assertIsNotNone( sess.graph.get_tensor_by_name('decoded_sample_data:0')) self.assertIsNotNone(sess.graph.get_tensor_by_name('labels_softmax:0')) + ops = [node.op for node in sess.graph_def.node] + self.assertEqual(1, ops.count('Mfcc')) + + def testCreateInferenceGraphWithoutMfcc(self): + with self.test_session() as sess: + freeze.create_inference_graph( + wanted_words='a,b,c,d', + sample_rate=16000, + clip_duration_ms=1000.0, + clip_stride_ms=30.0, + window_size_ms=30.0, + window_stride_ms=10.0, + feature_bin_count=40, + model_architecture='conv', + preprocess='average') + self.assertIsNotNone(sess.graph.get_tensor_by_name('wav_data:0')) + self.assertIsNotNone( + sess.graph.get_tensor_by_name('decoded_sample_data:0')) + self.assertIsNotNone(sess.graph.get_tensor_by_name('labels_softmax:0')) + ops = [node.op for node in sess.graph_def.node] + self.assertEqual(0, ops.count('Mfcc')) + + def testFeatureBinCount(self): + with self.test_session() as sess: + freeze.create_inference_graph( + wanted_words='a,b,c,d', + sample_rate=16000, + clip_duration_ms=1000.0, + clip_stride_ms=30.0, + window_size_ms=30.0, + window_stride_ms=10.0, + feature_bin_count=80, + model_architecture='conv', + preprocess='average') + self.assertIsNotNone(sess.graph.get_tensor_by_name('wav_data:0')) + self.assertIsNotNone( + sess.graph.get_tensor_by_name('decoded_sample_data:0')) + self.assertIsNotNone(sess.graph.get_tensor_by_name('labels_softmax:0')) + ops = [node.op for node in sess.graph_def.node] + self.assertEqual(0, ops.count('Mfcc')) if __name__ == '__main__': diff --git a/tensorflow/examples/speech_commands/generate_streaming_test_wav.py b/tensorflow/examples/speech_commands/generate_streaming_test_wav.py index 053206ae2f..9858906927 100644 --- a/tensorflow/examples/speech_commands/generate_streaming_test_wav.py +++ b/tensorflow/examples/speech_commands/generate_streaming_test_wav.py @@ -87,11 +87,12 @@ def main(_): words_list = input_data.prepare_words_list(FLAGS.wanted_words.split(',')) model_settings = models.prepare_model_settings( len(words_list), FLAGS.sample_rate, FLAGS.clip_duration_ms, - FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) + FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.feature_bin_count, + 'mfcc') audio_processor = input_data.AudioProcessor( '', FLAGS.data_dir, FLAGS.silence_percentage, 10, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, - FLAGS.testing_percentage, model_settings) + FLAGS.testing_percentage, model_settings, FLAGS.data_dir) output_audio_sample_count = FLAGS.sample_rate * FLAGS.test_duration_seconds output_audio = np.zeros((output_audio_sample_count,), dtype=np.float32) @@ -242,10 +243,11 @@ if __name__ == '__main__': default=10.0, help='How long the stride is between spectrogram timeslices',) parser.add_argument( - '--dct_coefficient_count', + '--feature_bin_count', type=int, default=40, - help='How many bins to use for the MFCC fingerprint',) + help='How many bins to use for the MFCC fingerprint', + ) parser.add_argument( '--wanted_words', type=str, diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py index e7db9cddf0..30f2cfa9fe 100644 --- a/tensorflow/examples/speech_commands/input_data.py +++ b/tensorflow/examples/speech_commands/input_data.py @@ -153,14 +153,14 @@ class AudioProcessor(object): def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage, wanted_words, validation_percentage, testing_percentage, - model_settings): + model_settings, summaries_dir): self.data_dir = data_dir self.maybe_download_and_extract_dataset(data_url, data_dir) self.prepare_data_index(silence_percentage, unknown_percentage, wanted_words, validation_percentage, testing_percentage) self.prepare_background_data() - self.prepare_processing_graph(model_settings) + self.prepare_processing_graph(model_settings, summaries_dir) def maybe_download_and_extract_dataset(self, data_url, dest_directory): """Download and extract data set tar file. @@ -325,7 +325,7 @@ class AudioProcessor(object): if not self.background_data: raise Exception('No background wav files were found in ' + search_path) - def prepare_processing_graph(self, model_settings): + def prepare_processing_graph(self, model_settings, summaries_dir): """Builds a TensorFlow graph to apply the input distortions. Creates a graph that loads a WAVE file, decodes it, scales the volume, @@ -341,48 +341,88 @@ class AudioProcessor(object): - time_shift_offset_placeholder_: How much to move the clip in time. - background_data_placeholder_: PCM sample data for background noise. - background_volume_placeholder_: Loudness of mixed-in background. - - mfcc_: Output 2D fingerprint of processed audio. + - output_: Output 2D fingerprint of processed audio. Args: model_settings: Information about the current model being trained. + summaries_dir: Path to save training summary information to. + + Raises: + ValueError: If the preprocessing mode isn't recognized. """ - desired_samples = model_settings['desired_samples'] - self.wav_filename_placeholder_ = tf.placeholder(tf.string, []) - wav_loader = io_ops.read_file(self.wav_filename_placeholder_) - wav_decoder = contrib_audio.decode_wav( - wav_loader, desired_channels=1, desired_samples=desired_samples) - # Allow the audio sample's volume to be adjusted. - self.foreground_volume_placeholder_ = tf.placeholder(tf.float32, []) - scaled_foreground = tf.multiply(wav_decoder.audio, - self.foreground_volume_placeholder_) - # Shift the sample's start position, and pad any gaps with zeros. - self.time_shift_padding_placeholder_ = tf.placeholder(tf.int32, [2, 2]) - self.time_shift_offset_placeholder_ = tf.placeholder(tf.int32, [2]) - padded_foreground = tf.pad( - scaled_foreground, - self.time_shift_padding_placeholder_, - mode='CONSTANT') - sliced_foreground = tf.slice(padded_foreground, - self.time_shift_offset_placeholder_, - [desired_samples, -1]) - # Mix in background noise. - self.background_data_placeholder_ = tf.placeholder(tf.float32, - [desired_samples, 1]) - self.background_volume_placeholder_ = tf.placeholder(tf.float32, []) - background_mul = tf.multiply(self.background_data_placeholder_, - self.background_volume_placeholder_) - background_add = tf.add(background_mul, sliced_foreground) - background_clamp = tf.clip_by_value(background_add, -1.0, 1.0) - # Run the spectrogram and MFCC ops to get a 2D 'fingerprint' of the audio. - spectrogram = contrib_audio.audio_spectrogram( - background_clamp, - window_size=model_settings['window_size_samples'], - stride=model_settings['window_stride_samples'], - magnitude_squared=True) - self.mfcc_ = contrib_audio.mfcc( - spectrogram, - wav_decoder.sample_rate, - dct_coefficient_count=model_settings['dct_coefficient_count']) + with tf.get_default_graph().name_scope('data'): + desired_samples = model_settings['desired_samples'] + self.wav_filename_placeholder_ = tf.placeholder( + tf.string, [], name='wav_filename') + wav_loader = io_ops.read_file(self.wav_filename_placeholder_) + wav_decoder = contrib_audio.decode_wav( + wav_loader, desired_channels=1, desired_samples=desired_samples) + # Allow the audio sample's volume to be adjusted. + self.foreground_volume_placeholder_ = tf.placeholder( + tf.float32, [], name='foreground_volume') + scaled_foreground = tf.multiply(wav_decoder.audio, + self.foreground_volume_placeholder_) + # Shift the sample's start position, and pad any gaps with zeros. + self.time_shift_padding_placeholder_ = tf.placeholder( + tf.int32, [2, 2], name='time_shift_padding') + self.time_shift_offset_placeholder_ = tf.placeholder( + tf.int32, [2], name='time_shift_offset') + padded_foreground = tf.pad( + scaled_foreground, + self.time_shift_padding_placeholder_, + mode='CONSTANT') + sliced_foreground = tf.slice(padded_foreground, + self.time_shift_offset_placeholder_, + [desired_samples, -1]) + # Mix in background noise. + self.background_data_placeholder_ = tf.placeholder( + tf.float32, [desired_samples, 1], name='background_data') + self.background_volume_placeholder_ = tf.placeholder( + tf.float32, [], name='background_volume') + background_mul = tf.multiply(self.background_data_placeholder_, + self.background_volume_placeholder_) + background_add = tf.add(background_mul, sliced_foreground) + background_clamp = tf.clip_by_value(background_add, -1.0, 1.0) + # Run the spectrogram and MFCC ops to get a 2D 'fingerprint' of the audio. + spectrogram = contrib_audio.audio_spectrogram( + background_clamp, + window_size=model_settings['window_size_samples'], + stride=model_settings['window_stride_samples'], + magnitude_squared=True) + tf.summary.image( + 'spectrogram', tf.expand_dims(spectrogram, -1), max_outputs=1) + # The number of buckets in each FFT row in the spectrogram will depend on + # how many input samples there are in each window. This can be quite + # large, with a 160 sample window producing 127 buckets for example. We + # don't need this level of detail for classification, so we often want to + # shrink them down to produce a smaller result. That's what this section + # implements. One method is to use average pooling to merge adjacent + # buckets, but a more sophisticated approach is to apply the MFCC + # algorithm to shrink the representation. + if model_settings['preprocess'] == 'average': + self.output_ = tf.nn.pool( + tf.expand_dims(spectrogram, -1), + window_shape=[1, model_settings['average_window_width']], + strides=[1, model_settings['average_window_width']], + pooling_type='AVG', + padding='SAME') + tf.summary.image('shrunk_spectrogram', self.output_, max_outputs=1) + elif model_settings['preprocess'] == 'mfcc': + self.output_ = contrib_audio.mfcc( + spectrogram, + wav_decoder.sample_rate, + dct_coefficient_count=model_settings['fingerprint_width']) + tf.summary.image( + 'mfcc', tf.expand_dims(self.output_, -1), max_outputs=1) + else: + raise ValueError('Unknown preprocess mode "%s" (should be "mfcc" or' + ' "average")' % (model_settings['preprocess'])) + + # Merge all the summaries and write them out to /tmp/retrain_logs (by + # default) + self.merged_summaries_ = tf.summary.merge_all(scope='data') + self.summary_writer_ = tf.summary.FileWriter(summaries_dir + '/data', + tf.get_default_graph()) def set_size(self, mode): """Calculates the number of samples in the dataset partition. @@ -418,6 +458,9 @@ class AudioProcessor(object): Returns: List of sample data for the transformed samples, and list of label indexes + + Raises: + ValueError: If background samples are too short. """ # Pick one of the partitions to choose samples from. candidates = self.data_index[mode] @@ -457,15 +500,22 @@ class AudioProcessor(object): self.time_shift_offset_placeholder_: time_shift_offset, } # Choose a section of background noise to mix in. - if use_background: + if use_background or sample['label'] == SILENCE_LABEL: background_index = np.random.randint(len(self.background_data)) background_samples = self.background_data[background_index] + if len(background_samples) <= model_settings['desired_samples']: + raise ValueError( + 'Background sample is too short! Need more than %d' + ' samples but only %d were found' % + (model_settings['desired_samples'], len(background_samples))) background_offset = np.random.randint( 0, len(background_samples) - model_settings['desired_samples']) background_clipped = background_samples[background_offset:( background_offset + desired_samples)] background_reshaped = background_clipped.reshape([desired_samples, 1]) - if np.random.uniform(0, 1) < background_frequency: + if sample['label'] == SILENCE_LABEL: + background_volume = np.random.uniform(0, 1) + elif np.random.uniform(0, 1) < background_frequency: background_volume = np.random.uniform(0, background_volume_range) else: background_volume = 0 @@ -480,7 +530,10 @@ class AudioProcessor(object): else: input_dict[self.foreground_volume_placeholder_] = 1 # Run the graph to produce the output audio. - data[i - offset, :] = sess.run(self.mfcc_, feed_dict=input_dict).flatten() + summary, data_tensor = sess.run( + [self.merged_summaries_, self.output_], feed_dict=input_dict) + self.summary_writer_.add_summary(summary) + data[i - offset, :] = data_tensor.flatten() label_index = self.word_to_index[sample['label']] labels[i - offset] = label_index return data, labels diff --git a/tensorflow/examples/speech_commands/input_data_test.py b/tensorflow/examples/speech_commands/input_data_test.py index 13f294d39d..2e551be9a2 100644 --- a/tensorflow/examples/speech_commands/input_data_test.py +++ b/tensorflow/examples/speech_commands/input_data_test.py @@ -25,6 +25,7 @@ import tensorflow as tf from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio from tensorflow.examples.speech_commands import input_data +from tensorflow.examples.speech_commands import models from tensorflow.python.platform import test @@ -32,7 +33,7 @@ class InputDataTest(test.TestCase): def _getWavData(self): with self.test_session() as sess: - sample_data = tf.zeros([1000, 2]) + sample_data = tf.zeros([32000, 2]) wav_encoder = contrib_audio.encode_wav(sample_data, 16000) wav_data = sess.run(wav_encoder) return wav_data @@ -57,9 +58,31 @@ class InputDataTest(test.TestCase): "label_count": 4, "window_size_samples": 100, "window_stride_samples": 100, - "dct_coefficient_count": 40, + "fingerprint_width": 40, + "preprocess": "mfcc", } + def _runGetDataTest(self, preprocess, window_length_ms): + tmp_dir = self.get_temp_dir() + wav_dir = os.path.join(tmp_dir, "wavs") + os.mkdir(wav_dir) + self._saveWavFolders(wav_dir, ["a", "b", "c"], 100) + background_dir = os.path.join(wav_dir, "_background_noise_") + os.mkdir(background_dir) + wav_data = self._getWavData() + for i in range(10): + file_path = os.path.join(background_dir, "background_audio_%d.wav" % i) + self._saveTestWavFile(file_path, wav_data) + model_settings = models.prepare_model_settings( + 4, 16000, 1000, window_length_ms, 20, 40, preprocess) + with self.test_session() as sess: + audio_processor = input_data.AudioProcessor( + "", wav_dir, 10, 10, ["a", "b"], 10, 10, model_settings, tmp_dir) + result_data, result_labels = audio_processor.get_data( + 10, 0, model_settings, 0.3, 0.1, 100, "training", sess) + self.assertEqual(10, len(result_data)) + self.assertEqual(10, len(result_labels)) + def testPrepareWordsList(self): words_list = ["a", "b"] self.assertGreater( @@ -76,8 +99,9 @@ class InputDataTest(test.TestCase): def testPrepareDataIndex(self): tmp_dir = self.get_temp_dir() self._saveWavFolders(tmp_dir, ["a", "b", "c"], 100) - audio_processor = input_data.AudioProcessor("", tmp_dir, 10, 10, ["a", "b"], - 10, 10, self._model_settings()) + audio_processor = input_data.AudioProcessor("", tmp_dir, 10, 10, + ["a", "b"], 10, 10, + self._model_settings(), tmp_dir) self.assertLess(0, audio_processor.set_size("training")) self.assertTrue("training" in audio_processor.data_index) self.assertTrue("validation" in audio_processor.data_index) @@ -90,7 +114,7 @@ class InputDataTest(test.TestCase): self._saveWavFolders(tmp_dir, ["a", "b", "c"], 0) with self.assertRaises(Exception) as e: _ = input_data.AudioProcessor("", tmp_dir, 10, 10, ["a", "b"], 10, 10, - self._model_settings()) + self._model_settings(), tmp_dir) self.assertTrue("No .wavs found" in str(e.exception)) def testPrepareDataIndexMissing(self): @@ -98,7 +122,7 @@ class InputDataTest(test.TestCase): self._saveWavFolders(tmp_dir, ["a", "b", "c"], 100) with self.assertRaises(Exception) as e: _ = input_data.AudioProcessor("", tmp_dir, 10, 10, ["a", "b", "d"], 10, - 10, self._model_settings()) + 10, self._model_settings(), tmp_dir) self.assertTrue("Expected to find" in str(e.exception)) def testPrepareBackgroundData(self): @@ -110,8 +134,9 @@ class InputDataTest(test.TestCase): file_path = os.path.join(background_dir, "background_audio_%d.wav" % i) self._saveTestWavFile(file_path, wav_data) self._saveWavFolders(tmp_dir, ["a", "b", "c"], 100) - audio_processor = input_data.AudioProcessor("", tmp_dir, 10, 10, ["a", "b"], - 10, 10, self._model_settings()) + audio_processor = input_data.AudioProcessor("", tmp_dir, 10, 10, + ["a", "b"], 10, 10, + self._model_settings(), tmp_dir) self.assertEqual(10, len(audio_processor.background_data)) def testLoadWavFile(self): @@ -148,44 +173,27 @@ class InputDataTest(test.TestCase): "label_count": 4, "window_size_samples": 100, "window_stride_samples": 100, - "dct_coefficient_count": 40, + "fingerprint_width": 40, + "preprocess": "mfcc", } audio_processor = input_data.AudioProcessor("", wav_dir, 10, 10, ["a", "b"], - 10, 10, model_settings) + 10, 10, model_settings, tmp_dir) self.assertIsNotNone(audio_processor.wav_filename_placeholder_) self.assertIsNotNone(audio_processor.foreground_volume_placeholder_) self.assertIsNotNone(audio_processor.time_shift_padding_placeholder_) self.assertIsNotNone(audio_processor.time_shift_offset_placeholder_) self.assertIsNotNone(audio_processor.background_data_placeholder_) self.assertIsNotNone(audio_processor.background_volume_placeholder_) - self.assertIsNotNone(audio_processor.mfcc_) + self.assertIsNotNone(audio_processor.output_) - def testGetData(self): - tmp_dir = self.get_temp_dir() - wav_dir = os.path.join(tmp_dir, "wavs") - os.mkdir(wav_dir) - self._saveWavFolders(wav_dir, ["a", "b", "c"], 100) - background_dir = os.path.join(wav_dir, "_background_noise_") - os.mkdir(background_dir) - wav_data = self._getWavData() - for i in range(10): - file_path = os.path.join(background_dir, "background_audio_%d.wav" % i) - self._saveTestWavFile(file_path, wav_data) - model_settings = { - "desired_samples": 160, - "fingerprint_size": 40, - "label_count": 4, - "window_size_samples": 100, - "window_stride_samples": 100, - "dct_coefficient_count": 40, - } - audio_processor = input_data.AudioProcessor("", wav_dir, 10, 10, ["a", "b"], - 10, 10, model_settings) - with self.test_session() as sess: - result_data, result_labels = audio_processor.get_data( - 10, 0, model_settings, 0.3, 0.1, 100, "training", sess) - self.assertEqual(10, len(result_data)) - self.assertEqual(10, len(result_labels)) + def testGetDataAverage(self): + self._runGetDataTest("average", 10) + + def testGetDataAverageLongWindow(self): + self._runGetDataTest("average", 30) + + def testGetDataMfcc(self): + self._runGetDataTest("mfcc", 30) def testGetUnprocessedData(self): tmp_dir = self.get_temp_dir() @@ -198,10 +206,11 @@ class InputDataTest(test.TestCase): "label_count": 4, "window_size_samples": 100, "window_stride_samples": 100, - "dct_coefficient_count": 40, + "fingerprint_width": 40, + "preprocess": "mfcc", } audio_processor = input_data.AudioProcessor("", wav_dir, 10, 10, ["a", "b"], - 10, 10, model_settings) + 10, 10, model_settings, tmp_dir) result_data, result_labels = audio_processor.get_unprocessed_data( 10, model_settings, "training") self.assertEqual(10, len(result_data)) diff --git a/tensorflow/examples/speech_commands/label_wav_dir.py b/tensorflow/examples/speech_commands/label_wav_dir.py new file mode 100644 index 0000000000..a34db512dd --- /dev/null +++ b/tensorflow/examples/speech_commands/label_wav_dir.py @@ -0,0 +1,136 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Runs a trained audio graph against WAVE files and reports the results. + +The model, labels and .wav files specified in the arguments will be loaded, and +then the predictions from running the model against the audio data will be +printed to the console. This is a useful script for sanity checking trained +models, and as an example of how to use an audio model from Python. + +Here's an example of running it: + +python tensorflow/examples/speech_commands/label_wav_dir.py \ +--graph=/tmp/my_frozen_graph.pb \ +--labels=/tmp/speech_commands_train/conv_labels.txt \ +--wav_dir=/tmp/speech_dataset/left + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import glob +import sys + +import tensorflow as tf + +# pylint: disable=unused-import +from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio +# pylint: enable=unused-import + +FLAGS = None + + +def load_graph(filename): + """Unpersists graph from file as default graph.""" + with tf.gfile.FastGFile(filename, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + +def load_labels(filename): + """Read in labels, one label per line.""" + return [line.rstrip() for line in tf.gfile.GFile(filename)] + + +def run_graph(wav_dir, labels, input_layer_name, output_layer_name, + num_top_predictions): + """Runs the audio data through the graph and prints predictions.""" + with tf.Session() as sess: + # Feed the audio data as input to the graph. + # predictions will contain a two-dimensional array, where one + # dimension represents the input image count, and the other has + # predictions per class + for wav_path in glob.glob(wav_dir + '/*.wav'): + if not wav_path or not tf.gfile.Exists(wav_path): + tf.logging.fatal('Audio file does not exist %s', wav_path) + + with open(wav_path, 'rb') as wav_file: + wav_data = wav_file.read() + + softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name) + predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data}) + + # Sort to show labels in order of confidence + print('\n%s' % (wav_path.split('/')[-1])) + top_k = predictions.argsort()[-num_top_predictions:][::-1] + for node_id in top_k: + human_string = labels[node_id] + score = predictions[node_id] + print('%s (score = %.5f)' % (human_string, score)) + + return 0 + + +def label_wav(wav_dir, labels, graph, input_name, output_name, how_many_labels): + """Loads the model and labels, and runs the inference to print predictions.""" + if not labels or not tf.gfile.Exists(labels): + tf.logging.fatal('Labels file does not exist %s', labels) + + if not graph or not tf.gfile.Exists(graph): + tf.logging.fatal('Graph file does not exist %s', graph) + + labels_list = load_labels(labels) + + # load graph, which is stored in the default session + load_graph(graph) + + run_graph(wav_dir, labels_list, input_name, output_name, how_many_labels) + + +def main(_): + """Entry point for script, converts flags to arguments.""" + label_wav(FLAGS.wav_dir, FLAGS.labels, FLAGS.graph, FLAGS.input_name, + FLAGS.output_name, FLAGS.how_many_labels) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--wav_dir', type=str, default='', help='Audio file to be identified.') + parser.add_argument( + '--graph', type=str, default='', help='Model to use for identification.') + parser.add_argument( + '--labels', type=str, default='', help='Path to file containing labels.') + parser.add_argument( + '--input_name', + type=str, + default='wav_data:0', + help='Name of WAVE data input node in model.') + parser.add_argument( + '--output_name', + type=str, + default='labels_softmax:0', + help='Name of node outputting a prediction in the model.') + parser.add_argument( + '--how_many_labels', + type=int, + default=3, + help='Number of results to show.') + + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py index ab611f414a..4d1454be0d 100644 --- a/tensorflow/examples/speech_commands/models.py +++ b/tensorflow/examples/speech_commands/models.py @@ -24,9 +24,21 @@ import math import tensorflow as tf +def _next_power_of_two(x): + """Calculates the smallest enclosing power of two for an input. + + Args: + x: Positive float or integer number. + + Returns: + Next largest power of two integer. + """ + return 1 if x == 0 else 2**(int(x) - 1).bit_length() + + def prepare_model_settings(label_count, sample_rate, clip_duration_ms, - window_size_ms, window_stride_ms, - dct_coefficient_count): + window_size_ms, window_stride_ms, feature_bin_count, + preprocess): """Calculates common settings needed for all models. Args: @@ -35,10 +47,14 @@ def prepare_model_settings(label_count, sample_rate, clip_duration_ms, clip_duration_ms: Length of each audio clip to be analyzed. window_size_ms: Duration of frequency analysis window. window_stride_ms: How far to move in time between frequency windows. - dct_coefficient_count: Number of frequency bins to use for analysis. + feature_bin_count: Number of frequency bins to use for analysis. + preprocess: How the spectrogram is processed to produce features. Returns: Dictionary containing common settings. + + Raises: + ValueError: If the preprocessing mode isn't recognized. """ desired_samples = int(sample_rate * clip_duration_ms / 1000) window_size_samples = int(sample_rate * window_size_ms / 1000) @@ -48,16 +64,28 @@ def prepare_model_settings(label_count, sample_rate, clip_duration_ms, spectrogram_length = 0 else: spectrogram_length = 1 + int(length_minus_window / window_stride_samples) - fingerprint_size = dct_coefficient_count * spectrogram_length + if preprocess == 'average': + fft_bin_count = 1 + (_next_power_of_two(window_size_samples) / 2) + average_window_width = int(math.floor(fft_bin_count / feature_bin_count)) + fingerprint_width = int(math.ceil(fft_bin_count / average_window_width)) + elif preprocess == 'mfcc': + average_window_width = -1 + fingerprint_width = feature_bin_count + else: + raise ValueError('Unknown preprocess mode "%s" (should be "mfcc" or' + ' "average")' % (preprocess)) + fingerprint_size = fingerprint_width * spectrogram_length return { 'desired_samples': desired_samples, 'window_size_samples': window_size_samples, 'window_stride_samples': window_stride_samples, 'spectrogram_length': spectrogram_length, - 'dct_coefficient_count': dct_coefficient_count, + 'fingerprint_width': fingerprint_width, 'fingerprint_size': fingerprint_size, 'label_count': label_count, 'sample_rate': sample_rate, + 'preprocess': preprocess, + 'average_window_width': average_window_width, } @@ -106,10 +134,14 @@ def create_model(fingerprint_input, model_settings, model_architecture, elif model_architecture == 'low_latency_svdf': return create_low_latency_svdf_model(fingerprint_input, model_settings, is_training, runtime_settings) + elif model_architecture == 'tiny_conv': + return create_tiny_conv_model(fingerprint_input, model_settings, + is_training) else: raise Exception('model_architecture argument "' + model_architecture + '" not recognized, should be one of "single_fc", "conv",' + - ' "low_latency_conv, or "low_latency_svdf"') + ' "low_latency_conv, "low_latency_svdf",' + + ' or "tiny_conv"') def load_variables_from_checkpoint(sess, start_checkpoint): @@ -152,9 +184,12 @@ def create_single_fc_model(fingerprint_input, model_settings, is_training): dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] - weights = tf.Variable( - tf.truncated_normal([fingerprint_size, label_count], stddev=0.001)) - bias = tf.Variable(tf.zeros([label_count])) + weights = tf.get_variable( + name='weights', + initializer=tf.truncated_normal_initializer(stddev=0.001), + shape=[fingerprint_size, label_count]) + bias = tf.get_variable( + name='bias', initializer=tf.zeros_initializer, shape=[label_count]) logits = tf.matmul(fingerprint_input, weights) + bias if is_training: return logits, dropout_prob @@ -212,18 +247,21 @@ def create_conv_model(fingerprint_input, model_settings, is_training): """ if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') - input_frequency_size = model_settings['dct_coefficient_count'] + input_frequency_size = model_settings['fingerprint_width'] input_time_size = model_settings['spectrogram_length'] fingerprint_4d = tf.reshape(fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) first_filter_width = 8 first_filter_height = 20 first_filter_count = 64 - first_weights = tf.Variable( - tf.truncated_normal( - [first_filter_height, first_filter_width, 1, first_filter_count], - stddev=0.01)) - first_bias = tf.Variable(tf.zeros([first_filter_count])) + first_weights = tf.get_variable( + name='first_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_filter_height, first_filter_width, 1, first_filter_count]) + first_bias = tf.get_variable( + name='first_bias', + initializer=tf.zeros_initializer, + shape=[first_filter_count]) first_conv = tf.nn.conv2d(fingerprint_4d, first_weights, [1, 1, 1, 1], 'SAME') + first_bias first_relu = tf.nn.relu(first_conv) @@ -235,14 +273,17 @@ def create_conv_model(fingerprint_input, model_settings, is_training): second_filter_width = 4 second_filter_height = 10 second_filter_count = 64 - second_weights = tf.Variable( - tf.truncated_normal( - [ - second_filter_height, second_filter_width, first_filter_count, - second_filter_count - ], - stddev=0.01)) - second_bias = tf.Variable(tf.zeros([second_filter_count])) + second_weights = tf.get_variable( + name='second_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[ + second_filter_height, second_filter_width, first_filter_count, + second_filter_count + ]) + second_bias = tf.get_variable( + name='second_bias', + initializer=tf.zeros_initializer, + shape=[second_filter_count]) second_conv = tf.nn.conv2d(max_pool, second_weights, [1, 1, 1, 1], 'SAME') + second_bias second_relu = tf.nn.relu(second_conv) @@ -259,10 +300,14 @@ def create_conv_model(fingerprint_input, model_settings, is_training): flattened_second_conv = tf.reshape(second_dropout, [-1, second_conv_element_count]) label_count = model_settings['label_count'] - final_fc_weights = tf.Variable( - tf.truncated_normal( - [second_conv_element_count, label_count], stddev=0.01)) - final_fc_bias = tf.Variable(tf.zeros([label_count])) + final_fc_weights = tf.get_variable( + name='final_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[second_conv_element_count, label_count]) + final_fc_bias = tf.get_variable( + name='final_fc_bias', + initializer=tf.zeros_initializer, + shape=[label_count]) final_fc = tf.matmul(flattened_second_conv, final_fc_weights) + final_fc_bias if is_training: return final_fc, dropout_prob @@ -318,7 +363,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, """ if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') - input_frequency_size = model_settings['dct_coefficient_count'] + input_frequency_size = model_settings['fingerprint_width'] input_time_size = model_settings['spectrogram_length'] fingerprint_4d = tf.reshape(fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) @@ -327,11 +372,14 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, first_filter_count = 186 first_filter_stride_x = 1 first_filter_stride_y = 1 - first_weights = tf.Variable( - tf.truncated_normal( - [first_filter_height, first_filter_width, 1, first_filter_count], - stddev=0.01)) - first_bias = tf.Variable(tf.zeros([first_filter_count])) + first_weights = tf.get_variable( + name='first_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_filter_height, first_filter_width, 1, first_filter_count]) + first_bias = tf.get_variable( + name='first_bias', + initializer=tf.zeros_initializer, + shape=[first_filter_count]) first_conv = tf.nn.conv2d(fingerprint_4d, first_weights, [ 1, first_filter_stride_y, first_filter_stride_x, 1 ], 'VALID') + first_bias @@ -351,30 +399,42 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, flattened_first_conv = tf.reshape(first_dropout, [-1, first_conv_element_count]) first_fc_output_channels = 128 - first_fc_weights = tf.Variable( - tf.truncated_normal( - [first_conv_element_count, first_fc_output_channels], stddev=0.01)) - first_fc_bias = tf.Variable(tf.zeros([first_fc_output_channels])) + first_fc_weights = tf.get_variable( + name='first_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_conv_element_count, first_fc_output_channels]) + first_fc_bias = tf.get_variable( + name='first_fc_bias', + initializer=tf.zeros_initializer, + shape=[first_fc_output_channels]) first_fc = tf.matmul(flattened_first_conv, first_fc_weights) + first_fc_bias if is_training: second_fc_input = tf.nn.dropout(first_fc, dropout_prob) else: second_fc_input = first_fc second_fc_output_channels = 128 - second_fc_weights = tf.Variable( - tf.truncated_normal( - [first_fc_output_channels, second_fc_output_channels], stddev=0.01)) - second_fc_bias = tf.Variable(tf.zeros([second_fc_output_channels])) + second_fc_weights = tf.get_variable( + name='second_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_fc_output_channels, second_fc_output_channels]) + second_fc_bias = tf.get_variable( + name='second_fc_bias', + initializer=tf.zeros_initializer, + shape=[second_fc_output_channels]) second_fc = tf.matmul(second_fc_input, second_fc_weights) + second_fc_bias if is_training: final_fc_input = tf.nn.dropout(second_fc, dropout_prob) else: final_fc_input = second_fc label_count = model_settings['label_count'] - final_fc_weights = tf.Variable( - tf.truncated_normal( - [second_fc_output_channels, label_count], stddev=0.01)) - final_fc_bias = tf.Variable(tf.zeros([label_count])) + final_fc_weights = tf.get_variable( + name='final_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[second_fc_output_channels, label_count]) + final_fc_bias = tf.get_variable( + name='final_fc_bias', + initializer=tf.zeros_initializer, + shape=[label_count]) final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias if is_training: return final_fc, dropout_prob @@ -422,7 +482,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, Args: fingerprint_input: TensorFlow node that will output audio feature vectors. The node is expected to produce a 2D Tensor of shape: - [batch, model_settings['dct_coefficient_count'] * + [batch, model_settings['fingerprint_width'] * model_settings['spectrogram_length']] with the features corresponding to the same time slot arranged contiguously, and the oldest slot at index [:, 0], and newest at [:, -1]. @@ -440,7 +500,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') - input_frequency_size = model_settings['dct_coefficient_count'] + input_frequency_size = model_settings['fingerprint_width'] input_time_size = model_settings['spectrogram_length'] # Validation. @@ -462,8 +522,11 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, num_filters = rank * num_units # Create the runtime memory: [num_filters, batch, input_time_size] batch = 1 - memory = tf.Variable(tf.zeros([num_filters, batch, input_time_size]), - trainable=False, name='runtime-memory') + memory = tf.get_variable( + initializer=tf.zeros_initializer, + shape=[num_filters, batch, input_time_size], + trainable=False, + name='runtime-memory') # Determine the number of new frames in the input, such that we only operate # on those. For training we do not use the memory, and thus use all frames # provided in the input. @@ -483,8 +546,10 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, new_fingerprint_input = tf.expand_dims(new_fingerprint_input, 2) # Create the frequency filters. - weights_frequency = tf.Variable( - tf.truncated_normal([input_frequency_size, num_filters], stddev=0.01)) + weights_frequency = tf.get_variable( + name='weights_frequency', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[input_frequency_size, num_filters]) # Expand to add input channels dimensions. # weights_frequency: [input_frequency_size, 1, num_filters] weights_frequency = tf.expand_dims(weights_frequency, 1) @@ -506,8 +571,10 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, activations_time = new_memory # Create the time filters. - weights_time = tf.Variable( - tf.truncated_normal([num_filters, input_time_size], stddev=0.01)) + weights_time = tf.get_variable( + name='weights_time', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[num_filters, input_time_size]) # Apply the time filter on the outputs of the feature filters. # weights_time: [num_filters, input_time_size, 1] # outputs: [num_filters, batch, 1] @@ -524,7 +591,8 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, units_output = tf.transpose(units_output) # Appy bias. - bias = tf.Variable(tf.zeros([num_units])) + bias = tf.get_variable( + name='bias', initializer=tf.zeros_initializer, shape=[num_units]) first_bias = tf.nn.bias_add(units_output, bias) # Relu. @@ -536,31 +604,135 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings, first_dropout = first_relu first_fc_output_channels = 256 - first_fc_weights = tf.Variable( - tf.truncated_normal([num_units, first_fc_output_channels], stddev=0.01)) - first_fc_bias = tf.Variable(tf.zeros([first_fc_output_channels])) + first_fc_weights = tf.get_variable( + name='first_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[num_units, first_fc_output_channels]) + first_fc_bias = tf.get_variable( + name='first_fc_bias', + initializer=tf.zeros_initializer, + shape=[first_fc_output_channels]) first_fc = tf.matmul(first_dropout, first_fc_weights) + first_fc_bias if is_training: second_fc_input = tf.nn.dropout(first_fc, dropout_prob) else: second_fc_input = first_fc second_fc_output_channels = 256 - second_fc_weights = tf.Variable( - tf.truncated_normal( - [first_fc_output_channels, second_fc_output_channels], stddev=0.01)) - second_fc_bias = tf.Variable(tf.zeros([second_fc_output_channels])) + second_fc_weights = tf.get_variable( + name='second_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_fc_output_channels, second_fc_output_channels]) + second_fc_bias = tf.get_variable( + name='second_fc_bias', + initializer=tf.zeros_initializer, + shape=[second_fc_output_channels]) second_fc = tf.matmul(second_fc_input, second_fc_weights) + second_fc_bias if is_training: final_fc_input = tf.nn.dropout(second_fc, dropout_prob) else: final_fc_input = second_fc label_count = model_settings['label_count'] - final_fc_weights = tf.Variable( - tf.truncated_normal( - [second_fc_output_channels, label_count], stddev=0.01)) - final_fc_bias = tf.Variable(tf.zeros([label_count])) + final_fc_weights = tf.get_variable( + name='final_fc_weights', + initializer=tf.truncated_normal(stddev=0.01), + shape=[second_fc_output_channels, label_count]) + final_fc_bias = tf.get_variable( + name='final_fc_bias', + initializer=tf.zeros_initializer, + shape=[label_count]) final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias if is_training: return final_fc, dropout_prob else: return final_fc + + +def create_tiny_conv_model(fingerprint_input, model_settings, is_training): + """Builds a convolutional model aimed at microcontrollers. + + Devices like DSPs and microcontrollers can have very small amounts of + memory and limited processing power. This model is designed to use less + than 20KB of working RAM, and fit within 32KB of read-only (flash) memory. + + Here's the layout of the graph: + + (fingerprint_input) + v + [Conv2D]<-(weights) + v + [BiasAdd]<-(bias) + v + [Relu] + v + [MatMul]<-(weights) + v + [BiasAdd]<-(bias) + v + + This doesn't produce particularly accurate results, but it's designed to be + used as the first stage of a pipeline, running on a low-energy piece of + hardware that can always be on, and then wake higher-power chips when a + possible utterance has been found, so that more accurate analysis can be done. + + During training, a dropout node is introduced after the relu, controlled by a + placeholder. + + Args: + fingerprint_input: TensorFlow node that will output audio feature vectors. + model_settings: Dictionary of information about the model. + is_training: Whether the model is going to be used for training. + + Returns: + TensorFlow node outputting logits results, and optionally a dropout + placeholder. + """ + if is_training: + dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') + input_frequency_size = model_settings['fingerprint_width'] + input_time_size = model_settings['spectrogram_length'] + fingerprint_4d = tf.reshape(fingerprint_input, + [-1, input_time_size, input_frequency_size, 1]) + first_filter_width = 8 + first_filter_height = 10 + first_filter_count = 8 + first_weights = tf.get_variable( + name='first_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_filter_height, first_filter_width, 1, first_filter_count]) + first_bias = tf.get_variable( + name='first_bias', + initializer=tf.zeros_initializer, + shape=[first_filter_count]) + first_conv_stride_x = 2 + first_conv_stride_y = 2 + first_conv = tf.nn.conv2d(fingerprint_4d, first_weights, + [1, first_conv_stride_y, first_conv_stride_x, 1], + 'SAME') + first_bias + first_relu = tf.nn.relu(first_conv) + if is_training: + first_dropout = tf.nn.dropout(first_relu, dropout_prob) + else: + first_dropout = first_relu + first_dropout_shape = first_dropout.get_shape() + first_dropout_output_width = first_dropout_shape[2] + first_dropout_output_height = first_dropout_shape[1] + first_dropout_element_count = int( + first_dropout_output_width * first_dropout_output_height * + first_filter_count) + flattened_first_dropout = tf.reshape(first_dropout, + [-1, first_dropout_element_count]) + label_count = model_settings['label_count'] + final_fc_weights = tf.get_variable( + name='final_fc_weights', + initializer=tf.truncated_normal_initializer(stddev=0.01), + shape=[first_dropout_element_count, label_count]) + final_fc_bias = tf.get_variable( + name='final_fc_bias', + initializer=tf.zeros_initializer, + shape=[label_count]) + final_fc = ( + tf.matmul(flattened_first_dropout, final_fc_weights) + final_fc_bias) + if is_training: + return final_fc, dropout_prob + else: + return final_fc diff --git a/tensorflow/examples/speech_commands/models_test.py b/tensorflow/examples/speech_commands/models_test.py index 80c795367f..0c373967ed 100644 --- a/tensorflow/examples/speech_commands/models_test.py +++ b/tensorflow/examples/speech_commands/models_test.py @@ -26,12 +26,29 @@ from tensorflow.python.platform import test class ModelsTest(test.TestCase): + def _modelSettings(self): + return models.prepare_model_settings( + label_count=10, + sample_rate=16000, + clip_duration_ms=1000, + window_size_ms=20, + window_stride_ms=10, + feature_bin_count=40, + preprocess="mfcc") + def testPrepareModelSettings(self): self.assertIsNotNone( - models.prepare_model_settings(10, 16000, 1000, 20, 10, 40)) + models.prepare_model_settings( + label_count=10, + sample_rate=16000, + clip_duration_ms=1000, + window_size_ms=20, + window_stride_ms=10, + feature_bin_count=40, + preprocess="mfcc")) def testCreateModelConvTraining(self): - model_settings = models.prepare_model_settings(10, 16000, 1000, 20, 10, 40) + model_settings = self._modelSettings() with self.test_session() as sess: fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) logits, dropout_prob = models.create_model(fingerprint_input, @@ -42,7 +59,7 @@ class ModelsTest(test.TestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) def testCreateModelConvInference(self): - model_settings = models.prepare_model_settings(10, 16000, 1000, 20, 10, 40) + model_settings = self._modelSettings() with self.test_session() as sess: fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) logits = models.create_model(fingerprint_input, model_settings, "conv", @@ -51,7 +68,7 @@ class ModelsTest(test.TestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name)) def testCreateModelLowLatencyConvTraining(self): - model_settings = models.prepare_model_settings(10, 16000, 1000, 20, 10, 40) + model_settings = self._modelSettings() with self.test_session() as sess: fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) logits, dropout_prob = models.create_model( @@ -62,7 +79,7 @@ class ModelsTest(test.TestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) def testCreateModelFullyConnectedTraining(self): - model_settings = models.prepare_model_settings(10, 16000, 1000, 20, 10, 40) + model_settings = self._modelSettings() with self.test_session() as sess: fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) logits, dropout_prob = models.create_model( @@ -73,7 +90,7 @@ class ModelsTest(test.TestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) def testCreateModelBadArchitecture(self): - model_settings = models.prepare_model_settings(10, 16000, 1000, 20, 10, 40) + model_settings = self._modelSettings() with self.test_session(): fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) with self.assertRaises(Exception) as e: @@ -81,6 +98,17 @@ class ModelsTest(test.TestCase): "bad_architecture", True) self.assertTrue("not recognized" in str(e.exception)) + def testCreateModelTinyConvTraining(self): + model_settings = self._modelSettings() + with self.test_session() as sess: + fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) + logits, dropout_prob = models.create_model( + fingerprint_input, model_settings, "tiny_conv", True) + self.assertIsNotNone(logits) + self.assertIsNotNone(dropout_prob) + self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name)) + self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) + if __name__ == "__main__": test.main() diff --git a/tensorflow/examples/speech_commands/recognize_commands.h b/tensorflow/examples/speech_commands/recognize_commands.h index 7f8041f9ed..a7cd194bec 100644 --- a/tensorflow/examples/speech_commands/recognize_commands.h +++ b/tensorflow/examples/speech_commands/recognize_commands.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ +#ifndef TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ +#define TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ #include <deque> #include <unordered_set> @@ -76,4 +76,4 @@ class RecognizeCommands { } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ +#endif // TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py index a4e80041f8..eca34f8812 100644 --- a/tensorflow/examples/speech_commands/train.py +++ b/tensorflow/examples/speech_commands/train.py @@ -98,12 +98,12 @@ def main(_): model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, - FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) + FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess) audio_processor = input_data.AudioProcessor( - FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, - FLAGS.unknown_percentage, + FLAGS.data_url, FLAGS.data_dir, + FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, - FLAGS.testing_percentage, model_settings) + FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) @@ -122,8 +122,25 @@ def main(_): 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) - fingerprint_input = tf.placeholder( + input_placeholder = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') + if FLAGS.quantize: + # TODO(petewarden): These values have been derived from the observed ranges + # of spectrogram and MFCC inputs. If the preprocessing pipeline changes, + # they may need to be updated. + if FLAGS.preprocess == 'average': + fingerprint_min = 0.0 + fingerprint_max = 2048.0 + elif FLAGS.preprocess == 'mfcc': + fingerprint_min = -247.0 + fingerprint_max = 30.0 + else: + raise Exception('Unknown preprocess mode "%s" (should be "mfcc" or' + ' "average")' % (FLAGS.preprocess)) + fingerprint_input = tf.fake_quant_with_min_max_args( + input_placeholder, fingerprint_min, fingerprint_max) + else: + fingerprint_input = input_placeholder logits, dropout_prob = models.create_model( fingerprint_input, @@ -146,7 +163,8 @@ def main(_): with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) - tf.summary.scalar('cross_entropy', cross_entropy_mean) + if FLAGS.quantize: + tf.contrib.quantize.create_training_graph(quant_delay=0) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') @@ -157,7 +175,9 @@ def main(_): confusion_matrix = tf.confusion_matrix( ground_truth_input, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - tf.summary.scalar('accuracy', evaluation_step) + with tf.get_default_graph().name_scope('eval'): + tf.summary.scalar('cross_entropy', cross_entropy_mean) + tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) @@ -165,7 +185,7 @@ def main(_): saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) - merged_summaries = tf.summary.merge_all() + merged_summaries = tf.summary.merge_all(scope='eval') train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') @@ -207,8 +227,11 @@ def main(_): # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ - merged_summaries, evaluation_step, cross_entropy_mean, train_step, - increment_global_step + merged_summaries, + evaluation_step, + cross_entropy_mean, + train_step, + increment_global_step, ], feed_dict={ fingerprint_input: train_fingerprints, @@ -288,7 +311,7 @@ if __name__ == '__main__': '--data_url', type=str, # pylint: disable=line-too-long - default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', + default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz', # pylint: enable=line-too-long help='Location of speech training data archive on the web.') parser.add_argument( @@ -357,17 +380,18 @@ if __name__ == '__main__': '--window_size_ms', type=float, default=30.0, - help='How long each spectrogram timeslice is',) + help='How long each spectrogram timeslice is.',) parser.add_argument( '--window_stride_ms', type=float, default=10.0, - help='How long each spectrogram timeslice is',) + help='How far to move in time between spectogram timeslices.',) parser.add_argument( - '--dct_coefficient_count', + '--feature_bin_count', type=int, default=40, - help='How many bins to use for the MFCC fingerprint',) + help='How many bins to use for the MFCC fingerprint', + ) parser.add_argument( '--how_many_training_steps', type=str, @@ -423,6 +447,16 @@ if __name__ == '__main__': type=bool, default=False, help='Whether to check for invalid numbers during processing') + parser.add_argument( + '--quantize', + type=bool, + default=False, + help='Whether to train the model for eight-bit deployment') + parser.add_argument( + '--preprocess', + type=str, + default='mfcc', + help='Spectrogram processing mode. Can be "mfcc" or "average"') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/tutorials/estimators/BUILD b/tensorflow/examples/tutorials/estimators/BUILD index ecbc1a431d..bab609f208 100644 --- a/tensorflow/examples/tutorials/estimators/BUILD +++ b/tensorflow/examples/tutorials/estimators/BUILD @@ -20,15 +20,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/image_retraining/__init__.py b/tensorflow/examples/tutorials/estimators/__init__.py index e69de29bb2..e69de29bb2 100644 --- a/tensorflow/examples/image_retraining/__init__.py +++ b/tensorflow/examples/tutorials/estimators/__init__.py diff --git a/tensorflow/examples/tutorials/input_fn/__init__.py b/tensorflow/examples/tutorials/input_fn/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tensorflow/examples/tutorials/input_fn/__init__.py diff --git a/tensorflow/examples/tutorials/layers/BUILD b/tensorflow/examples/tutorials/layers/BUILD index f8a29c79c6..aad78b1840 100644 --- a/tensorflow/examples/tutorials/layers/BUILD +++ b/tensorflow/examples/tutorials/layers/BUILD @@ -19,15 +19,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/layers/__init__.py b/tensorflow/examples/tutorials/layers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tensorflow/examples/tutorials/layers/__init__.py diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD index 6d4e67063d..d4070fdd1e 100644 --- a/tensorflow/examples/tutorials/mnist/BUILD +++ b/tensorflow/examples/tutorials/mnist/BUILD @@ -51,6 +51,7 @@ py_binary( "fully_connected_feed.py", ], srcs_version = "PY2AND3", + tags = ["optonly"], deps = [ ":input_data", ":mnist", @@ -96,7 +97,7 @@ py_binary( py_test( name = "fully_connected_feed_test", - size = "small", + size = "medium", srcs = [ "fully_connected_feed.py", ], @@ -132,15 +133,3 @@ py_test( "//tensorflow:tensorflow_py", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/mnist/input_data.py b/tensorflow/examples/tutorials/mnist/input_data.py index f1a7e1c4af..fa148ae3e6 100644 --- a/tensorflow/examples/tutorials/mnist/input_data.py +++ b/tensorflow/examples/tutorials/mnist/input_data.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +# pylint: disable=unused-import import gzip import os import tempfile @@ -27,3 +28,4 @@ from six.moves import urllib from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets +# pylint: enable=unused-import diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py index fb3ac94203..47dd6a1947 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py +++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """A very simple MNIST classifier. See extensive documentation at @@ -67,12 +66,19 @@ def main(_): # Test trained model correct_prediction = tf.equal(tf.argmax(y, 1), y_) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - print(sess.run(accuracy, feed_dict={x: mnist.test.images, - y_: mnist.test.labels})) + print(sess.run( + accuracy, feed_dict={ + x: mnist.test.images, + y_: mnist.test.labels + })) + if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', - help='Directory for storing input data') + parser.add_argument( + '--data_dir', + type=str, + default='/tmp/tensorflow/mnist/input_data', + help='Directory for storing input data') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/tutorials/monitors/BUILD b/tensorflow/examples/tutorials/monitors/BUILD index 4220e8144d..1c49e3fe53 100644 --- a/tensorflow/examples/tutorials/monitors/BUILD +++ b/tensorflow/examples/tutorials/monitors/BUILD @@ -23,15 +23,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/monitors/__init__.py b/tensorflow/examples/tutorials/monitors/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tensorflow/examples/tutorials/monitors/__init__.py diff --git a/tensorflow/examples/tutorials/monitors/iris_monitors.py b/tensorflow/examples/tutorials/monitors/iris_monitors.py index 850d105f7b..a2b7fe6023 100644 --- a/tensorflow/examples/tutorials/monitors/iris_monitors.py +++ b/tensorflow/examples/tutorials/monitors/iris_monitors.py @@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv") def main(unused_argv): # Load datasets. training_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float) + filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) test_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float) + filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) validation_metrics = { "accuracy": @@ -83,7 +83,7 @@ def main(unused_argv): # Classify two new flower samples. new_samples = np.array( - [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) + [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32) y = list(classifier.predict(new_samples)) print("Predictions: {}".format(str(y))) diff --git a/tensorflow/examples/tutorials/word2vec/BUILD b/tensorflow/examples/tutorials/word2vec/BUILD index 42d6355b4f..2e19c038bd 100644 --- a/tensorflow/examples/tutorials/word2vec/BUILD +++ b/tensorflow/examples/tutorials/word2vec/BUILD @@ -13,19 +13,11 @@ py_binary( "word2vec_basic.py", ], srcs_version = "PY2AND3", + tags = [ + "no-internal-py3", + ], deps = [ "//tensorflow:tensorflow_py", "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 142e45a2e8..b09ee99768 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -21,6 +21,8 @@ from __future__ import print_function import collections import math import os +import sys +import argparse import random from tempfile import gettempdir import zipfile @@ -30,6 +32,24 @@ from six.moves import urllib from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf +from tensorflow.contrib.tensorboard.plugins import projector + +# Give a folder path as an argument with '--log_dir' to save +# TensorBoard summaries. Default is a log folder in current directory. +current_path = os.path.dirname(os.path.realpath(sys.argv[0])) + +parser = argparse.ArgumentParser() +parser.add_argument( + '--log_dir', + type=str, + default=os.path.join(current_path, 'log'), + help='The log directory for TensorBoard summaries.') +FLAGS, unparsed = parser.parse_known_args() + +# Create the directory for TensorBoard variables if there is not. +if not os.path.exists(FLAGS.log_dir): + os.makedirs(FLAGS.log_dir) + # Step 1: Download the data. url = 'http://mattmahoney.net/dc/' @@ -61,6 +81,7 @@ def read_data(filename): data = tf.compat.as_str(f.read(f.namelist()[0])).split() return data + vocabulary = read_data(filename) print('Data size', len(vocabulary)) @@ -86,20 +107,22 @@ def build_dataset(words, n_words): reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return data, count, dictionary, reversed_dictionary + # Filling 4 global variables: # data - list of codes (integers from 0 to vocabulary_size-1). # This is the original text but words are replaced by their codes # count - map of words(strings) to count of occurrences # dictionary - map of words(strings) to their codes(integers) # reverse_dictionary - maps codes(integers) to words(strings) -data, count, dictionary, reverse_dictionary = build_dataset(vocabulary, - vocabulary_size) +data, count, dictionary, reverse_dictionary = build_dataset( + vocabulary, vocabulary_size) del vocabulary # Hint to reduce memory. print('Most common words (+UNK)', count[:5]) print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) data_index = 0 + # Step 3: Function to generate a training batch for the skip-gram model. def generate_batch(batch_size, num_skips, skip_window): global data_index @@ -108,7 +131,7 @@ def generate_batch(batch_size, num_skips, skip_window): batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) span = 2 * skip_window + 1 # [ skip_window target skip_window ] - buffer = collections.deque(maxlen=span) + buffer = collections.deque(maxlen=span) # pylint: disable=redefined-builtin if data_index + span > len(data): data_index = 0 buffer.extend(data[data_index:data_index + span]) @@ -120,7 +143,7 @@ def generate_batch(batch_size, num_skips, skip_window): batch[i * num_skips + j] = buffer[skip_window] labels[i * num_skips + j, 0] = buffer[context_word] if data_index == len(data): - buffer[:] = data[:span] + buffer.extend(data[0:span]) data_index = span else: buffer.append(data[data_index]) @@ -129,96 +152,130 @@ def generate_batch(batch_size, num_skips, skip_window): data_index = (data_index + len(data) - span) % len(data) return batch, labels + batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) for i in range(8): - print(batch[i], reverse_dictionary[batch[i]], - '->', labels[i, 0], reverse_dictionary[labels[i, 0]]) + print(batch[i], reverse_dictionary[batch[i]], '->', labels[i, 0], + reverse_dictionary[labels[i, 0]]) # Step 4: Build and train a skip-gram model. batch_size = 128 embedding_size = 128 # Dimension of the embedding vector. -skip_window = 1 # How many words to consider left and right. -num_skips = 2 # How many times to reuse an input to generate a label. -num_sampled = 64 # Number of negative examples to sample. +skip_window = 1 # How many words to consider left and right. +num_skips = 2 # How many times to reuse an input to generate a label. +num_sampled = 64 # Number of negative examples to sample. # We pick a random validation set to sample nearest neighbors. Here we limit the # validation samples to the words that have a low numeric ID, which by # construction are also the most frequent. These 3 variables are used only for # displaying model accuracy, they don't affect calculation. -valid_size = 16 # Random set of words to evaluate similarity on. +valid_size = 16 # Random set of words to evaluate similarity on. valid_window = 100 # Only pick dev samples in the head of the distribution. valid_examples = np.random.choice(valid_window, valid_size, replace=False) - graph = tf.Graph() with graph.as_default(): # Input data. - train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) - train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) - valid_dataset = tf.constant(valid_examples, dtype=tf.int32) + with tf.name_scope('inputs'): + train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) + train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) + valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # Ops and variables pinned to the CPU because of missing GPU implementation with tf.device('/cpu:0'): # Look up embeddings for inputs. - embeddings = tf.Variable( - tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) - embed = tf.nn.embedding_lookup(embeddings, train_inputs) + with tf.name_scope('embeddings'): + embeddings = tf.Variable( + tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) + embed = tf.nn.embedding_lookup(embeddings, train_inputs) # Construct the variables for the NCE loss - nce_weights = tf.Variable( - tf.truncated_normal([vocabulary_size, embedding_size], - stddev=1.0 / math.sqrt(embedding_size))) - nce_biases = tf.Variable(tf.zeros([vocabulary_size])) + with tf.name_scope('weights'): + nce_weights = tf.Variable( + tf.truncated_normal( + [vocabulary_size, embedding_size], + stddev=1.0 / math.sqrt(embedding_size))) + with tf.name_scope('biases'): + nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. # Explanation of the meaning of NCE loss: # http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/ - loss = tf.reduce_mean( - tf.nn.nce_loss(weights=nce_weights, - biases=nce_biases, - labels=train_labels, - inputs=embed, - num_sampled=num_sampled, - num_classes=vocabulary_size)) + with tf.name_scope('loss'): + loss = tf.reduce_mean( + tf.nn.nce_loss( + weights=nce_weights, + biases=nce_biases, + labels=train_labels, + inputs=embed, + num_sampled=num_sampled, + num_classes=vocabulary_size)) + + # Add the loss value as a scalar to summary. + tf.summary.scalar('loss', loss) # Construct the SGD optimizer using a learning rate of 1.0. - optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) + with tf.name_scope('optimizer'): + optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) # Compute the cosine similarity between minibatch examples and all embeddings. - norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) + norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True)) normalized_embeddings = embeddings / norm - valid_embeddings = tf.nn.embedding_lookup( - normalized_embeddings, valid_dataset) + valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, + valid_dataset) similarity = tf.matmul( valid_embeddings, normalized_embeddings, transpose_b=True) + # Merge all summaries. + merged = tf.summary.merge_all() + # Add variable initializer. init = tf.global_variables_initializer() + # Create a saver. + saver = tf.train.Saver() + # Step 5: Begin training. num_steps = 100001 with tf.Session(graph=graph) as session: + # Open a writer to write summaries. + writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph) + # We must initialize all variables before we use them. init.run() print('Initialized') average_loss = 0 for step in xrange(num_steps): - batch_inputs, batch_labels = generate_batch( - batch_size, num_skips, skip_window) + batch_inputs, batch_labels = generate_batch(batch_size, num_skips, + skip_window) feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels} + # Define metadata variable. + run_metadata = tf.RunMetadata() + # We perform one update step by evaluating the optimizer op (including it # in the list of returned values for session.run() - _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict) + # Also, evaluate the merged op to get all summaries from the returned "summary" variable. + # Feed metadata variable to session for visualizing the graph in TensorBoard. + _, summary, loss_val = session.run( + [optimizer, merged, loss], + feed_dict=feed_dict, + run_metadata=run_metadata) average_loss += loss_val + # Add returned summaries to writer in each step. + writer.add_summary(summary, step) + # Add metadata to visualize the graph for the last run. + if step == (num_steps - 1): + writer.add_run_metadata(run_metadata, 'step%d' % step) + if step % 2000 == 0: if step > 0: average_loss /= 2000 @@ -240,6 +297,23 @@ with tf.Session(graph=graph) as session: print(log_str) final_embeddings = normalized_embeddings.eval() + # Write corresponding labels for the embeddings. + with open(FLAGS.log_dir + '/metadata.tsv', 'w') as f: + for i in xrange(vocabulary_size): + f.write(reverse_dictionary[i] + '\n') + + # Save the model for checkpoints. + saver.save(session, os.path.join(FLAGS.log_dir, 'model.ckpt')) + + # Create a configuration for visualizing embeddings with the labels in TensorBoard. + config = projector.ProjectorConfig() + embedding_conf = config.embeddings.add() + embedding_conf.tensor_name = embeddings.name + embedding_conf.metadata_path = os.path.join(FLAGS.log_dir, 'metadata.tsv') + projector.visualize_embeddings(writer, config) + +writer.close() + # Step 6: Visualize the embeddings. @@ -251,21 +325,24 @@ def plot_with_labels(low_dim_embs, labels, filename): for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) - plt.annotate(label, - xy=(x, y), - xytext=(5, 2), - textcoords='offset points', - ha='right', - va='bottom') + plt.annotate( + label, + xy=(x, y), + xytext=(5, 2), + textcoords='offset points', + ha='right', + va='bottom') plt.savefig(filename) + try: # pylint: disable=g-import-not-at-top from sklearn.manifold import TSNE import matplotlib.pyplot as plt - tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact') + tsne = TSNE( + perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact') plot_only = 500 low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :]) labels = [reverse_dictionary[i] for i in xrange(plot_only)] diff --git a/tensorflow/examples/udacity/5_word2vec.ipynb b/tensorflow/examples/udacity/5_word2vec.ipynb index 18c456cad7..3b43d1fb55 100644 --- a/tensorflow/examples/udacity/5_word2vec.ipynb +++ b/tensorflow/examples/udacity/5_word2vec.ipynb @@ -455,7 +455,7 @@ " \n", " # Compute the similarity between minibatch examples and all embeddings.\n", " # We use the cosine distance:\n", - " norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))\n", + " norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))\n", " normalized_embeddings = embeddings / norm\n", " valid_embeddings = tf.nn.embedding_lookup(\n", " normalized_embeddings, valid_dataset)\n", diff --git a/tensorflow/examples/udacity/Dockerfile b/tensorflow/examples/udacity/Dockerfile index 3ca58566c1..00eb853e52 100644 --- a/tensorflow/examples/udacity/Dockerfile +++ b/tensorflow/examples/udacity/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN pip install scikit-learn pyreadline Pillow +RUN pip install scikit-learn pyreadline Pillow imageio RUN rm -rf /notebooks/* ADD *.ipynb /notebooks/ WORKDIR /notebooks diff --git a/tensorflow/examples/wav_to_spectrogram/BUILD b/tensorflow/examples/wav_to_spectrogram/BUILD index c99870c686..cc8835728d 100644 --- a/tensorflow/examples/wav_to_spectrogram/BUILD +++ b/tensorflow/examples/wav_to_spectrogram/BUILD @@ -49,17 +49,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h index fa8cb0abe9..eada07e06f 100644 --- a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h +++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ -#define THIRD_PARTY_TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ +#ifndef TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ +#define TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" @@ -28,4 +28,4 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav, tensorflow::int32 stride, float brightness, const tensorflow::string& output_image); -#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ +#endif // TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ |