aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Pete Warden <petewarden@google.com>2017-08-18 09:32:30 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-18 09:36:47 -0700
commit7d01f89cc3a05fbd4d79dd5713b9856a8e2764e1 (patch)
treec7db59fdd9d1d47fea78411afbb81eb36dcb5563
parenta6729325a3534ef4aeb2065be82bb2963b9b03de (diff)
Android demo app for speech recognition
PiperOrigin-RevId: 165714459
-rw-r--r--WORKSPACE10
-rw-r--r--tensorflow/contrib/makefile/Makefile19
-rwxr-xr-xtensorflow/contrib/makefile/download_dependencies.sh2
-rw-r--r--tensorflow/contrib/makefile/tf_op_files.txt8
-rw-r--r--tensorflow/core/BUILD2
-rw-r--r--tensorflow/core/kernels/BUILD14
-rw-r--r--tensorflow/docs_src/tutorials/audio_recognition.md47
-rw-r--r--tensorflow/examples/android/AndroidManifest.xml10
-rw-r--r--tensorflow/examples/android/BUILD1
-rw-r--r--tensorflow/examples/android/README.md118
-rw-r--r--tensorflow/examples/android/download-models.gradle3
-rw-r--r--tensorflow/examples/android/res/drawable/border.xml19
-rw-r--r--tensorflow/examples/android/res/layout/activity_speech.xml55
-rw-r--r--tensorflow/examples/android/res/layout/list_text_item.xml25
-rw-r--r--tensorflow/examples/android/res/values/base-strings.xml1
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java186
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java353
17 files changed, 808 insertions, 65 deletions
diff --git a/WORKSPACE b/WORKSPACE
index 959587387e..5e9b991fcc 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -80,3 +80,13 @@ new_http_archive(
"http://download.tensorflow.org/models/stylize_v1.zip",
],
)
+
+new_http_archive(
+ name = "speech_commands",
+ build_file = "models.BUILD",
+ sha256 = "c3ec4fea3158eb111f1d932336351edfe8bd515bb6e87aad4f25dbad0a600d0c",
+ urls = [
+ "http://storage.googleapis.com/download.tensorflow.org/models/speech_commands_v0.01.zip",
+ "http://download.tensorflow.org/models/speech_commands_v0.01.zip",
+ ],
+)
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index a4f7453ed5..f8837e3f58 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -73,8 +73,9 @@ HOST_INCLUDES := \
-I. \
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
- -I$(MAKEFILE_DIR)/downloads/gemmlowp \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/nsync/public \
+-I$(MAKEFILE_DIR)/downloads/fft2d \
-I$(HOST_GENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true)
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@@ -156,6 +157,7 @@ INCLUDES := \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/nsync/public \
+-I$(MAKEFILE_DIR)/downloads/fft2d \
-I$(PROTOGENDIR) \
-I$(PBTGENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true)
@@ -237,6 +239,7 @@ ifeq ($(TARGET),ANDROID)
$(error "NDK_ROOT is not defined.")
endif
CXX := $(CC_PREFIX) $(NDK_ROOT)/toolchains/arm-linux-androideabi-4.9/prebuilt/$(OS_PATH)-x86_64/bin/arm-linux-androideabi-g++
+ CC := $(CC_PREFIX) $(NDK_ROOT)/toolchains/arm-linux-androideabi-4.9/prebuilt/$(OS_PATH)-x86_64/bin/arm-linux-androideabi-gcc
CXXFLAGS +=\
--sysroot $(NDK_ROOT)/platforms/android-21/arch-arm \
-Wno-narrowing \
@@ -244,7 +247,6 @@ ifeq ($(TARGET),ANDROID)
-mfloat-abi=softfp \
-mfpu=neon \
-fPIE
-
INCLUDES = \
-I$(NDK_ROOT)/sources/android/support/include \
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
@@ -254,6 +256,7 @@ ifeq ($(TARGET),ANDROID)
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/nsync/public \
+-I$(MAKEFILE_DIR)/downloads/fft2d \
-I$(MAKEFILE_DIR)/gen/protobuf/include \
-I$(PROTOGENDIR) \
-I$(PBTGENDIR)
@@ -507,6 +510,7 @@ $(wildcard tensorflow/core/grappler/clusters/single_machine.*)
TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS))
# Add in any extra files that don't fit the patterns easily
TF_CC_SRCS += tensorflow/core/platform/default/gpu_tracer.cc
+TF_CC_SRCS += tensorflow/contrib/makefile/downloads/fft2d/fftsg.c
# Also include the op and kernel definitions.
TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt)
PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt)
@@ -529,7 +533,8 @@ tensorflow/core/kernels/hexagon/hexagon_remote_fused_graph_executor_build.cc
endif
# File names of the intermediate files target compilation generates.
-TF_CC_OBJS := $(addprefix $(OBJDIR), $(TF_CC_SRCS:.cc=.o))
+TF_CC_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(TF_CC_SRCS))))
PBT_GEN_FILES := $(addprefix $(PBTGENDIR), $(PBT_CC_SRCS))
PBT_OBJS := $(addprefix $(OBJDIR), $(PBT_CC_SRCS:.cc=.o))
PROTO_CC_SRCS := $(addprefix $(PROTOGENDIR), $(PROTO_SRCS:.proto=.pb.cc))
@@ -567,6 +572,14 @@ $(OBJDIR)%.o: %.cc | $(PBT_GEN_FILES)
$(CXX) $(CXXFLAGS) $(DEPFLAGS) $(INCLUDES) -c $< -o $@
@mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d
+# Matches on plain C files.
+$(OBJDIR)%.o: %.c
+ @mkdir -p $(dir $@)
+ @mkdir -p $(dir $(DEPDIR)$*)
+ $(CXX) $(patsubst --std=c++11,--std=c99, $(CXXFLAGS)) -x c $(DEPFLAGS) \
+$(INCLUDES) -c $< -o $@
+ @mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d
+
# Compiles C++ source files that have been generated by protoc.
$(OBJDIR)%.pb.o: $(PROTOGENDIR)%.pb.cc
@mkdir -p $(dir $@)
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index bb30a3b5a7..1e9958584c 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -25,6 +25,7 @@ GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.g
NSYNC_URL="$(grep -o 'http.*github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
RE2_URL="$(grep -o 'http.*github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
# TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
# so work around it by patching the source.
@@ -60,6 +61,7 @@ download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest"
download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
+download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d"
replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
"${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 9132a4344b..a7f2be9790 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -38,6 +38,8 @@ tensorflow/core/kernels/stack_ops.cc
tensorflow/core/kernels/split_op.cc
tensorflow/core/kernels/split_v_op.cc
tensorflow/core/kernels/split_lib_cpu.cc
+tensorflow/core/kernels/spectrogram_op.cc
+tensorflow/core/kernels/spectrogram.cc
tensorflow/core/kernels/sparse_to_dense_op.cc
tensorflow/core/kernels/sparse_matmul_op.cc
tensorflow/core/kernels/softsign_op.cc
@@ -100,6 +102,10 @@ tensorflow/core/kernels/mirror_pad_op_cpu_impl_2.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_3.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_4.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_5.cc
+tensorflow/core/kernels/mfcc_op.cc
+tensorflow/core/kernels/mfcc_mel_filterbank.cc
+tensorflow/core/kernels/mfcc_dct.cc
+tensorflow/core/kernels/mfcc.cc
tensorflow/core/kernels/maxpooling_op.cc
tensorflow/core/kernels/matmul_op.cc
tensorflow/core/kernels/lrn_op.cc
@@ -117,6 +123,7 @@ tensorflow/core/kernels/fill_functor.cc
tensorflow/core/kernels/fifo_queue.cc
tensorflow/core/kernels/fake_quant_ops.cc
tensorflow/core/kernels/example_parsing_ops.cc
+tensorflow/core/kernels/encode_wav_op.cc
tensorflow/core/kernels/dynamic_stitch_op.cc
tensorflow/core/kernels/dynamic_partition_op.cc
tensorflow/core/kernels/decode_bmp_op.cc
@@ -124,6 +131,7 @@ tensorflow/core/kernels/depthtospace_op.cc
tensorflow/core/kernels/spacetodepth_op.cc
tensorflow/core/kernels/dense_update_ops.cc
tensorflow/core/kernels/deep_conv2d.cc
+tensorflow/core/kernels/decode_wav_op.cc
tensorflow/core/kernels/xsmm_conv2d.cc
tensorflow/core/kernels/cwise_ops_common.cc
tensorflow/core/kernels/cwise_op_tanh.cc
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index f7b79e82e1..54f2ff7e13 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -981,6 +981,8 @@ cc_library(
deps = [
":protos_cc",
"//third_party/eigen3",
+ "//third_party/fft2d:fft2d_headers",
+ "@fft2d//:fft2d",
"@gemmlowp//:gemmlowp",
"@nsync//:nsync_cpp",
],
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index d833ed9e38..9f638eebee 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4322,6 +4322,9 @@ filegroup(
"gemm_functors.h",
"image_resizer_state.h",
"maxpooling_op.h",
+ "mfcc.h",
+ "mfcc_dct.h",
+ "mfcc_mel_filterbank.h",
"mirror_pad_op.h",
"mirror_pad_op_cpu_impl.h",
"pad_op.h",
@@ -4338,6 +4341,7 @@ filegroup(
"softsign_op.h",
"spacetobatch_functor.h",
"spacetodepth_op.h",
+ "spectrogram.h",
"tensor_array.h",
"tile_functor.h",
"tile_ops_cpu_impl.h",
@@ -4411,10 +4415,12 @@ filegroup(
"cwise_op_squared_difference.cc",
"cwise_op_sub.cc",
"cwise_op_tanh.cc",
+ "decode_wav_op.cc",
"deep_conv2d.cc",
"deep_conv2d.h",
"depthwise_conv_op.cc",
"dynamic_partition_op.cc",
+ "encode_wav_op.cc",
"fake_quant_ops.cc",
"fifo_queue.cc",
"fused_batch_norm_op.cc",
@@ -4443,6 +4449,10 @@ filegroup(
"logging_ops.cc",
"lrn_op.cc",
"maxpooling_op.cc",
+ "mfcc.cc",
+ "mfcc_dct.cc",
+ "mfcc_mel_filterbank.cc",
+ "mfcc_op.cc",
"mirror_pad_op.cc",
"mirror_pad_op_cpu_impl_1.cc",
"mirror_pad_op_cpu_impl_2.cc",
@@ -4478,6 +4488,8 @@ filegroup(
"spacetobatch_op.cc",
"spacetodepth_op.cc",
"sparse_to_dense_op.cc",
+ "spectrogram.cc",
+ "spectrogram_op.cc",
"stack_ops.cc",
"string_join_op.cc",
"summary_op.cc",
@@ -4614,6 +4626,8 @@ cc_library(
"//tensorflow/core:android_tensorflow_lib_lite",
"//tensorflow/core:protos_cc",
"//third_party/eigen3",
+ "//third_party/fft2d:fft2d_headers",
+ "@fft2d//:fft2d",
"@gemmlowp//:gemmlowp",
],
alwayslink = 1,
diff --git a/tensorflow/docs_src/tutorials/audio_recognition.md b/tensorflow/docs_src/tutorials/audio_recognition.md
index 57d3ebb996..2caa3ec0d2 100644
--- a/tensorflow/docs_src/tutorials/audio_recognition.md
+++ b/tensorflow/docs_src/tutorials/audio_recognition.md
@@ -214,6 +214,41 @@ of the other .wav files in that same folder to see how well it does.
The scores are between zero and one, and higher values mean the model is more
confident in its prediction.
+## Running the Model in an Android App
+
+The easiest way to see how this model works in a real application is to download
+[the prebuilt Android demo
+applications](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#prebuilt-components)
+and install them on your phone. You'll see 'TF Speech' appear in your app list,
+and opening it will show you the same list of action words we've just trained
+our model on, starting with "Yes" and "No". Once you've given the app permission
+to use the microphone, you should be able to try saying those words and see them
+highlighted in the UI when the model recognizes one of them.
+
+You can also build this application yourself, since it's open source and
+[available as part of the TensorFlow repository on
+github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#building-in-android-studio-using-the-tensorflow-aar-from-jcenter).
+By default it downloads [a pretrained model from
+tensorflow.org](http://download.tensorflow.org/models/speech_commands_v0.01.zip),
+but you can easily [replace it with a model you've trained
+yourself](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-model-files-optional).
+If you do this, you'll need to make sure that the constants in [the main
+SpeechActivity Java source
+file](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java)
+like `SAMPLE_RATE` and `SAMPLE_DURATION` match any changes you've made to the
+defaults while training. You'll also see that there's a [Java version of the
+RecognizeCommands
+module](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java)
+that's very similar to the C++ version in this tutorial. If you've tweaked
+parameters for that, you can also update them in SpeechActivity to get the same
+results as in your server testing.
+
+The demo app updates its UI list of results automatically based on the labels
+text file you copy into assets alongside your frozen graph, which means you can
+easily try out different models without needing to make any code changes. You
+will need to updaye `LABEL_FILENAME` and `MODEL_FILENAME` to point to the files
+you've added if you change the paths though.
+
## How does this Model Work?
The architecture used in this tutorial is based on some described in the paper
@@ -341,13 +376,14 @@ aren't detected (high precision). The numbers from the tool give you an idea of
how your model will perform in an application, and you can try tweaking the
signal averaging parameters to tune it to give the kind of performance you want.
To understand what the right parameters are for your application, you can look
-at generating an [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic)
-to help you understand the tradeoffs.
+at generating an [ROC
+curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) to help
+you understand the tradeoffs.
## RecognizeCommands
-The streaming accuracy tool uses a simple decoder contained in a small
-C++ class called
+The streaming accuracy tool uses a simple decoder contained in a small C++ class
+called
[RecognizeCommands](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/recognize_commands.h).
This class is fed the output of running the TensorFlow model over time, it
averages the signals, and returns information about a label when it has enough
@@ -480,7 +516,8 @@ variations in starting time in the training data, and is controlled with the
`--time_shift_ms` flag, which defaults to 100ms. Increasing this value will
provide more variation, but at the risk of cutting off important parts of the
audio. A related way of augmenting the data with realistic distortions is by
-using [time stretching and pitch scaling](https://en.wikipedia.org/wiki/Audio_time_stretching_and_pitch_scaling),
+using [time stretching and pitch
+scaling](https://en.wikipedia.org/wiki/Audio_time_stretching_and_pitch_scaling),
but that's outside the scope of this tutorial.
## Customizing the Model
diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml
index 9f229d8b9d..bb75431a1f 100644
--- a/tensorflow/examples/android/AndroidManifest.xml
+++ b/tensorflow/examples/android/AndroidManifest.xml
@@ -22,6 +22,7 @@
<uses-feature android:name="android.hardware.camera" />
<uses-feature android:name="android.hardware.camera.autofocus" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
+ <uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-sdk
android:minSdkVersion="21"
@@ -59,6 +60,15 @@
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
+
+ <activity android:name="org.tensorflow.demo.SpeechActivity"
+ android:screenOrientation="portrait"
+ android:label="@string/activity_name_speech">
+ <intent-filter>
+ <action android:name="android.intent.action.MAIN" />
+ <category android:name="android.intent.category.LAUNCHER" />
+ </intent-filter>
+ </activity>
</application>
</manifest>
diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index 2d3b0911fc..2347e6b023 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD
@@ -93,6 +93,7 @@ filegroup(
srcs = [
"@inception5h//:model_files",
"@mobile_ssd//:model_files",
+ "@speech_commands//:model_files",
"@stylize//:model_files",
],
)
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index f9881287cd..883f8e664f 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -8,10 +8,11 @@ devices.
The demos in this folder are designed to give straightforward samples of using
TensorFlow in mobile applications.
-Inference is done using the [TensorFlow Android Inference Interface](../../../tensorflow/contrib/android),
-which may be built separately if you want a standalone library to drop into your
-existing application. Object tracking and efficient YUV -> RGB conversion are
-handled by `libtensorflow_demo.so`.
+Inference is done using the [TensorFlow Android Inference
+Interface](../../../tensorflow/contrib/android), which may be built separately
+if you want a standalone library to drop into your existing application. Object
+tracking and efficient YUV -> RGB conversion are handled by
+`libtensorflow_demo.so`.
A device running Android 5.0 (API 21) or higher is required to run the demo due
to the use of the camera2 API, although the native libraries themselves can run
@@ -33,6 +34,12 @@ on API >= 14 devices.
Uses a model based on [A Learned Representation For Artistic
Style](https://arxiv.org/abs/1610.07629) to restyle the camera preview
image to that of a number of different artists.
+4. [TF
+ Speech](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java):
+ Runs a simple speech recognition model built by the [audio training
+ tutorial](https://www.tensorflow.org/tutorials/image_retraining). Listens
+ for a small set of words, and highlights them in the UI when they are
+ recognized.
<img src="sample_images/classify1.jpg" width="30%"><img src="sample_images/stylize1.jpg" width="30%"><img src="sample_images/detect1.jpg" width="30%">
@@ -51,20 +58,22 @@ for more details.
## Running the Demo
-Once the app is installed it can be started via the "TF Classify", "TF Detect"
-and "TF Stylize" icons, which have the orange TensorFlow logo as their icon.
+Once the app is installed it can be started via the "TF Classify", "TF Detect",
+"TF Stylize", and "TF Speech" icons, which have the orange TensorFlow logo as
+their icon.
While running the activities, pressing the volume keys on your device will
-toggle debug visualizations on/off, rendering additional info to the screen
-that may be useful for development purposes.
+toggle debug visualizations on/off, rendering additional info to the screen that
+may be useful for development purposes.
## Building in Android Studio using the TensorFlow AAR from JCenter
The simplest way to compile the demo app yourself, and try out changes to the
-project code is to use AndroidStudio. Simply set this `android` directory as the project root.
+project code is to use AndroidStudio. Simply set this `android` directory as the
+project root.
-Then edit the `build.gradle` file and change the value of `nativeBuildSystem`
-to `'none'` so that the project is built in the simplest way possible:
+Then edit the `build.gradle` file and change the value of `nativeBuildSystem` to
+`'none'` so that the project is built in the simplest way possible:
```None
def nativeBuildSystem = 'none'
@@ -77,8 +86,8 @@ Note: Currently, in this build mode, YUV -> RGB is done using a less efficient
Java implementation, and object tracking is not available in the "TF Detect"
activity. Setting the build system to `'cmake'` currently only builds
`libtensorflow_demo.so`, which provides fast YUV -> RGB conversion and object
-tracking, while still acquiring TensorFlow support via the downloaded AAR, so
-it may be a lightweight way to enable these features.
+tracking, while still acquiring TensorFlow support via the downloaded AAR, so it
+may be a lightweight way to enable these features.
For any project that does not include custom low level TensorFlow code, this is
likely sufficient.
@@ -104,50 +113,51 @@ protobuf compilation.
NOTE: Bazel does not currently support building for Android on Windows. Full
support for gradle/cmake builds is coming soon, but in the meantime we suggest
-that Windows users download the
-[prebuilt binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)
-instead.
+that Windows users download the [prebuilt
+binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) instead.
##### Install Bazel and Android Prerequisites
-Bazel is the primary build system for TensorFlow. To build with Bazel,
-it and the Android NDK and SDK must be installed on your system.
-
-1. Install the latest version of Bazel as per the instructions [on the Bazel website](https://bazel.build/versions/master/docs/install.html).
-2. The Android NDK is required to build the native (C/C++) TensorFlow code.
- The current recommended version is 12b, which may be found
- [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
-3. The Android SDK and build tools may be obtained
- [here](https://developer.android.com/tools/revisions/build-tools.html),
- or alternatively as part of
- [Android Studio](https://developer.android.com/studio/index.html). Build
- tools API >= 23 is required to build the TF Android demo (though it will
- run on API >= 21 devices).
+Bazel is the primary build system for TensorFlow. To build with Bazel, it and
+the Android NDK and SDK must be installed on your system.
+
+1. Install the latest version of Bazel as per the instructions [on the Bazel
+ website](https://bazel.build/versions/master/docs/install.html).
+2. The Android NDK is required to build the native (C/C++) TensorFlow code. The
+ current recommended version is 12b, which may be found
+ [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
+3. The Android SDK and build tools may be obtained
+ [here](https://developer.android.com/tools/revisions/build-tools.html), or
+ alternatively as part of [Android
+ Studio](https://developer.android.com/studio/index.html). Build tools API >=
+ 23 is required to build the TF Android demo (though it will run on API >= 21
+ devices).
##### Edit WORKSPACE
-The Android entries in [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36)
-must be uncommented with the paths filled in appropriately depending on where
-you installed the NDK and SDK. Otherwise an error such as:
-"The external label '//external:android/sdk' is not bound to anything" will
-be reported.
+The Android entries in
+[`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented
+with the paths filled in appropriately depending on where you installed the NDK
+and SDK. Otherwise an error such as: "The external label
+'//external:android/sdk' is not bound to anything" will be reported.
-Also edit the API levels for the SDK in WORKSPACE to the highest level you
-have installed in your SDK. This must be >= 23 (this is completely independent
-of the API level of the demo, which is defined in AndroidManifest.xml).
-The NDK API level may remain at 14.
+Also edit the API levels for the SDK in WORKSPACE to the highest level you have
+installed in your SDK. This must be >= 23 (this is completely independent of the
+API level of the demo, which is defined in AndroidManifest.xml). The NDK API
+level may remain at 14.
##### Install Model Files (optional)
-The TensorFlow `GraphDef`s that contain the model definitions and weights
-are not packaged in the repo because of their size. They are downloaded
+The TensorFlow `GraphDef`s that contain the model definitions and weights are
+not packaged in the repo because of their size. They are downloaded
automatically and packaged with the APK by Bazel via a new_http_archive defined
-in `WORKSPACE` during the build process, and by Gradle via download-models.gradle.
+in `WORKSPACE` during the build process, and by Gradle via
+download-models.gradle.
-**Optional**: If you wish to place the models in your assets manually,
-remove all of the `model_files` entries from the `assets`
-list in `tensorflow_demo` found in the `[BUILD](BUILD)` file. Then download
-and extract the archives yourself to the `assets` directory in the source tree:
+**Optional**: If you wish to place the models in your assets manually, remove
+all of the `model_files` entries from the `assets` list in `tensorflow_demo`
+found in the `[BUILD](BUILD)` file. Then download and extract the archives
+yourself to the `assets` directory in the source tree:
```bash
BASE_URL=https://storage.googleapis.com/download.tensorflow.org/models
@@ -162,27 +172,23 @@ This will extract the models and their associated metadata files to the local
assets/ directory.
If you are using Gradle, make sure to remove download-models.gradle reference
-from build.gradle after your manually download models; otherwise gradle
-might download models again and overwrite your models.
+from build.gradle after your manually download models; otherwise gradle might
+download models again and overwrite your models.
##### Build
-After editing your WORKSPACE file to update the SDK/NDK configuration,
-you may build the APK. Run this from your workspace root:
+After editing your WORKSPACE file to update the SDK/NDK configuration, you may
+build the APK. Run this from your workspace root:
```bash
bazel build -c opt //tensorflow/examples/android:tensorflow_demo
```
-If you get build errors about protocol buffers, run
-`git submodule update --init` and make sure that you've modified your WORKSPACE
-file as instructed, then try building again.
-
##### Install
-Make sure that adb debugging is enabled on your Android 5.0 (API 21) or
-later device, then after building use the following command from your workspace
-root to install the APK:
+Make sure that adb debugging is enabled on your Android 5.0 (API 21) or later
+device, then after building use the following command from your workspace root
+to install the APK:
```bash
adb install -r bazel-bin/tensorflow/examples/android/tensorflow_demo.apk
diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle
index a19ca36d7f..0e2cf65f53 100644
--- a/tensorflow/examples/android/download-models.gradle
+++ b/tensorflow/examples/android/download-models.gradle
@@ -11,7 +11,8 @@
// LINT.IfChange
def models = ['inception5h.zip',
'object_detection/ssd_mobilenet_v1_android_export.zip',
- 'stylize_v1.zip']
+ 'stylize_v1.zip',
+ 'speech_commands_conv_actions.zip']
// LINT.ThenChange(//tensorflow/examples/android/BUILD)
// Root URL for model archives
diff --git a/tensorflow/examples/android/res/drawable/border.xml b/tensorflow/examples/android/res/drawable/border.xml
new file mode 100644
index 0000000000..dd1d64d1d6
--- /dev/null
+++ b/tensorflow/examples/android/res/drawable/border.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<shape xmlns:android="http://schemas.android.com/apk/res/android" android:shape="rectangle" >
+ <solid android:color="#00000000" />
+ <stroke android:width="1dip" android:color="#cccccc" />
+</shape>
diff --git a/tensorflow/examples/android/res/layout/activity_speech.xml b/tensorflow/examples/android/res/layout/activity_speech.xml
new file mode 100644
index 0000000000..2fe1338da5
--- /dev/null
+++ b/tensorflow/examples/android/res/layout/activity_speech.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<FrameLayout
+ xmlns:android="http://schemas.android.com/apk/res/android"
+ xmlns:app="http://schemas.android.com/apk/res-auto"
+ xmlns:tools="http://schemas.android.com/tools"
+ android:layout_width="match_parent"
+ android:layout_height="match_parent"
+ tools:context="org.tensorflow.demo.SpeechActivity">
+
+ <TextView
+ android:layout_width="wrap_content"
+ android:layout_height="wrap_content"
+ android:text="Say one of the words below!"
+ android:id="@+id/textView"
+ android:textAlignment="center"
+ android:layout_gravity="top"
+ android:textSize="24dp"
+ android:layout_marginTop="10dp"
+ android:layout_marginLeft="10dp"
+ />
+
+ <ListView
+ android:id="@+id/list_view"
+ android:layout_width="240dp"
+ android:layout_height="wrap_content"
+ android:background="@drawable/border"
+ android:layout_gravity="top|center_horizontal"
+ android:textAlignment="center"
+ android:layout_marginTop="100dp"
+ />
+
+ <Button
+ android:id="@+id/quit"
+ android:layout_width="wrap_content"
+ android:layout_height="wrap_content"
+ android:text="Quit"
+ android:layout_gravity="bottom|center_horizontal"
+ android:layout_marginBottom="10dp"
+ />
+
+</FrameLayout>
diff --git a/tensorflow/examples/android/res/layout/list_text_item.xml b/tensorflow/examples/android/res/layout/list_text_item.xml
new file mode 100644
index 0000000000..526017fbb2
--- /dev/null
+++ b/tensorflow/examples/android/res/layout/list_text_item.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<TextView
+ xmlns:android="http://schemas.android.com/apk/res/android"
+ android:id="@+id/list_text_item"
+ android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:text="TextView"
+ android:textSize="24dp"
+ android:textAlignment="center"
+ android:gravity="center_horizontal"
+ />
diff --git a/tensorflow/examples/android/res/values/base-strings.xml b/tensorflow/examples/android/res/values/base-strings.xml
index 56edb55def..81d144a135 100644
--- a/tensorflow/examples/android/res/values/base-strings.xml
+++ b/tensorflow/examples/android/res/values/base-strings.xml
@@ -20,4 +20,5 @@
<string name="activity_name_classification">TF Classify</string>
<string name="activity_name_detection">TF Detect</string>
<string name="activity_name_stylize">TF Stylize</string>
+ <string name="activity_name_speech">TF Speech</string>
</resources>
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java
new file mode 100644
index 0000000000..9e91aea7ef
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.tensorflow.demo;
+
+import android.util.Log;
+import android.util.Pair;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
+
+/** Reads in results from an instantaneous audio recognition model and smoothes them over time. */
+public class RecognizeCommands {
+ // Configuration settings.
+ private List<String> labels = new ArrayList<String>();
+ private long averageWindowDurationMs;
+ private float detectionThreshold;
+ private int suppressionMs;
+ private int minimumCount;
+ private long minimumTimeBetweenSamplesMs;
+
+ // Working variables.
+ private Deque<Pair<Long, float[]>> previousResults = new ArrayDeque<Pair<Long, float[]>>();
+ private String previousTopLabel;
+ private int labelsCount;
+ private long previousTopLabelTime;
+ private float previousTopLabelScore;
+
+ private static final String SILENCE_LABEL = "_silence_";
+ private static final long MINIMUM_TIME_FRACTION = 4;
+
+ public RecognizeCommands(
+ List<String> inLabels,
+ long inAverageWindowDurationMs,
+ float inDetectionThreshold,
+ int inSuppressionMS,
+ int inMinimumCount,
+ long inMinimumTimeBetweenSamplesMS) {
+ labels = inLabels;
+ averageWindowDurationMs = inAverageWindowDurationMs;
+ detectionThreshold = inDetectionThreshold;
+ suppressionMs = inSuppressionMS;
+ minimumCount = inMinimumCount;
+ labelsCount = inLabels.size();
+ previousTopLabel = SILENCE_LABEL;
+ previousTopLabelTime = Long.MIN_VALUE;
+ previousTopLabelScore = 0.0f;
+ minimumTimeBetweenSamplesMs = inMinimumTimeBetweenSamplesMS;
+ }
+
+ /** Holds information about what's been recognized. */
+ public static class RecognitionResult {
+ public final String foundCommand;
+ public final float score;
+ public final boolean isNewCommand;
+
+ public RecognitionResult(String inFoundCommand, float inScore, boolean inIsNewCommand) {
+ foundCommand = inFoundCommand;
+ score = inScore;
+ isNewCommand = inIsNewCommand;
+ }
+ }
+
+ private static class ScoreForSorting implements Comparable<ScoreForSorting> {
+ public final float score;
+ public final int index;
+
+ public ScoreForSorting(float inScore, int inIndex) {
+ score = inScore;
+ index = inIndex;
+ }
+
+ @Override
+ public int compareTo(ScoreForSorting other) {
+ if (this.score > other.score) {
+ return -1;
+ } else if (this.score < other.score) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ public RecognitionResult processLatestResults(float[] currentResults, long currentTimeMS) {
+ if (currentResults.length != labelsCount) {
+ throw new RuntimeException(
+ "The results for recognition should contain "
+ + labelsCount
+ + " elements, but there are "
+ + currentResults.length);
+ }
+
+ if ((!previousResults.isEmpty()) && (currentTimeMS < previousResults.getFirst().first)) {
+ throw new RuntimeException(
+ "You must feed results in increasing time order, but received a timestamp of "
+ + currentTimeMS
+ + " that was earlier than the previous one of "
+ + previousResults.getFirst().first);
+ }
+
+ final int howManyResults = previousResults.size();
+ // Ignore any results that are coming in too frequently.
+ if (howManyResults > 1) {
+ final long timeSinceMostRecent = currentTimeMS - previousResults.getLast().first;
+ if (timeSinceMostRecent < minimumTimeBetweenSamplesMs) {
+ return new RecognitionResult(previousTopLabel, previousTopLabelScore, false);
+ }
+ }
+
+ // Add the latest results to the head of the queue.
+ previousResults.addLast(new Pair<Long, float[]>(currentTimeMS, currentResults));
+
+ // Prune any earlier results that are too old for the averaging window.
+ final long timeLimit = currentTimeMS - averageWindowDurationMs;
+ while (previousResults.getFirst().first < timeLimit) {
+ previousResults.removeFirst();
+ }
+
+ // If there are too few results, assume the result will be unreliable and
+ // bail.
+ final long earliestTime = previousResults.getFirst().first;
+ final long samplesDuration = currentTimeMS - earliestTime;
+ if ((howManyResults < minimumCount)
+ || (samplesDuration < (averageWindowDurationMs / MINIMUM_TIME_FRACTION))) {
+ Log.v("RecognizeResult", "Too few results");
+ return new RecognitionResult(previousTopLabel, 0.0f, false);
+ }
+
+ // Calculate the average score across all the results in the window.
+ float[] averageScores = new float[labelsCount];
+ for (Pair<Long, float[]> previousResult : previousResults) {
+ final float[] scoresTensor = previousResult.second;
+ int i = 0;
+ while (i < scoresTensor.length) {
+ averageScores[i] += scoresTensor[i] / howManyResults;
+ ++i;
+ }
+ }
+
+ // Sort the averaged results in descending score order.
+ ScoreForSorting[] sortedAverageScores = new ScoreForSorting[labelsCount];
+ for (int i = 0; i < labelsCount; ++i) {
+ sortedAverageScores[i] = new ScoreForSorting(averageScores[i], i);
+ }
+ Arrays.sort(sortedAverageScores);
+
+ // See if the latest top score is enough to trigger a detection.
+ final int currentTopIndex = sortedAverageScores[0].index;
+ final String currentTopLabel = labels.get(currentTopIndex);
+ final float currentTopScore = sortedAverageScores[0].score;
+ // If we've recently had another label trigger, assume one that occurs too
+ // soon afterwards is a bad result.
+ long timeSinceLastTop;
+ if (previousTopLabel.equals(SILENCE_LABEL) || (previousTopLabelTime == Long.MIN_VALUE)) {
+ timeSinceLastTop = Long.MAX_VALUE;
+ } else {
+ timeSinceLastTop = currentTimeMS - previousTopLabelTime;
+ }
+ boolean isNewCommand;
+ if ((currentTopScore > detectionThreshold) && (timeSinceLastTop > suppressionMs)) {
+ previousTopLabel = currentTopLabel;
+ previousTopLabelTime = currentTimeMS;
+ previousTopLabelScore = currentTopScore;
+ isNewCommand = true;
+ } else {
+ isNewCommand = false;
+ }
+ return new RecognitionResult(currentTopLabel, currentTopScore, isNewCommand);
+ }
+}
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java
new file mode 100644
index 0000000000..eb4dc69d63
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Demonstrates how to run an audio recognition model in Android.
+
+This example loads a simple speech recognition model trained by the tutorial at
+https://www.tensorflow.org/tutorials/audio_training
+
+The model files should be downloaded automatically from the TensorFlow website,
+but if you have a custom model you can update the LABEL_FILENAME and
+MODEL_FILENAME constants to point to your own files.
+
+The example application displays a list view with all of the known audio labels,
+and highlights each one when it thinks it has detected one through the
+microphone. The averaging of results to give a more reliable signal happens in
+the RecognizeCommands helper class.
+*/
+
+package org.tensorflow.demo;
+
+import android.animation.ValueAnimator;
+import android.app.Activity;
+import android.content.pm.PackageManager;
+import android.media.AudioFormat;
+import android.media.AudioRecord;
+import android.media.MediaRecorder;
+import android.os.Bundle;
+import android.util.Log;
+import android.view.View;
+import android.widget.ArrayAdapter;
+import android.widget.Button;
+import android.widget.ListView;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.locks.ReentrantLock;
+import org.tensorflow.contrib.android.TensorFlowInferenceInterface;
+import org.tensorflow.demo.R;
+
+/**
+ * An activity that listens for audio and then uses a TensorFlow model to detect particular classes,
+ * by default a small set of action words.
+ */
+public class SpeechActivity extends Activity {
+
+ // Constants that control the behavior of the recognition code and model
+ // settings. See the audio recognition tutorial for a detailed explanation of
+ // all these, but you should customize them to match your training settings if
+ // you are running your own model.
+ private static final int SAMPLE_RATE = 16000;
+ private static final int SAMPLE_DURATION_MS = 1000;
+ private static final int RECORDING_LENGTH = (int) (SAMPLE_RATE * SAMPLE_DURATION_MS / 1000);
+ private static final long AVERAGE_WINDOW_DURATION_MS = 500;
+ private static final float DETECTION_THRESHOLD = 0.70f;
+ private static final int SUPPRESSION_MS = 1500;
+ private static final int MINIMUM_COUNT = 3;
+ private static final long MINIMUM_TIME_BETWEEN_SAMPLES_MS = 30;
+ private static final String LABEL_FILENAME = "file:///android_asset/conv_actions_labels.txt";
+ private static final String MODEL_FILENAME = "file:///android_asset/conv_actions_frozen.pb";
+ private static final String INPUT_DATA_NAME = "decoded_sample_data:0";
+ private static final String SAMPLE_RATE_NAME = "decoded_sample_data:1";
+ private static final String OUTPUT_SCORES_NAME = "labels_softmax";
+
+ // UI elements.
+ private static final int REQUEST_RECORD_AUDIO = 13;
+ private Button quitButton;
+ private ListView labelsListView;
+ private static final String LOG_TAG = SpeechActivity.class.getSimpleName();
+
+ // Working variables.
+ short[] recordingBuffer = new short[RECORDING_LENGTH];
+ int recordingOffset = 0;
+ boolean shouldContinue = true;
+ private Thread recordingThread;
+ boolean shouldContinueRecognition = true;
+ private Thread recognitionThread;
+ private final ReentrantLock recordingBufferLock = new ReentrantLock();
+ private TensorFlowInferenceInterface inferenceInterface;
+ private List<String> labels = new ArrayList<String>();
+ private List<String> displayedLabels = new ArrayList<>();
+ private RecognizeCommands recognizeCommands = null;
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ // Set up the UI.
+ super.onCreate(savedInstanceState);
+ setContentView(R.layout.activity_speech);
+ quitButton = (Button) findViewById(R.id.quit);
+ quitButton.setOnClickListener(
+ new View.OnClickListener() {
+ @Override
+ public void onClick(View view) {
+ moveTaskToBack(true);
+ android.os.Process.killProcess(android.os.Process.myPid());
+ System.exit(1);
+ }
+ });
+ labelsListView = (ListView) findViewById(R.id.list_view);
+
+ // Load the labels for the model, but only display those that don't start
+ // with an underscore.
+ String actualFilename = LABEL_FILENAME.split("file:///android_asset/")[1];
+ Log.i(LOG_TAG, "Reading labels from: " + actualFilename);
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(getAssets().open(actualFilename)));
+ String line;
+ while ((line = br.readLine()) != null) {
+ labels.add(line);
+ if (line.charAt(0) != '_') {
+ displayedLabels.add(line.substring(0, 1).toUpperCase() + line.substring(1));
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ throw new RuntimeException("Problem reading label file!", e);
+ }
+
+ // Build a list view based on these labels.
+ ArrayAdapter<String> arrayAdapter =
+ new ArrayAdapter<String>(this, R.layout.list_text_item, displayedLabels);
+ labelsListView.setAdapter(arrayAdapter);
+
+ // Set up an object to smooth recognition results to increase accuracy.
+ recognizeCommands =
+ new RecognizeCommands(
+ labels,
+ AVERAGE_WINDOW_DURATION_MS,
+ DETECTION_THRESHOLD,
+ SUPPRESSION_MS,
+ MINIMUM_COUNT,
+ MINIMUM_TIME_BETWEEN_SAMPLES_MS);
+
+ // Load the TensorFlow model.
+ inferenceInterface = new TensorFlowInferenceInterface(getAssets(), MODEL_FILENAME);
+
+ // Start the recording and recognition threads.
+ requestMicrophonePermission();
+ startRecognition();
+ }
+
+ private void requestMicrophonePermission() {
+ requestPermissions(
+ new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO);
+ }
+
+ @Override
+ public void onRequestPermissionsResult(
+ int requestCode, String[] permissions, int[] grantResults) {
+ if (requestCode == REQUEST_RECORD_AUDIO
+ && grantResults.length > 0
+ && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
+ startRecording();
+ startRecognition();
+ }
+ }
+
+ public synchronized void startRecording() {
+ if (recordingThread != null) {
+ return;
+ }
+ shouldContinue = true;
+ recordingThread =
+ new Thread(
+ new Runnable() {
+ @Override
+ public void run() {
+ record();
+ }
+ });
+ recordingThread.start();
+ }
+
+ public synchronized void stopRecording() {
+ if (recordingThread == null) {
+ return;
+ }
+ shouldContinue = false;
+ recordingThread = null;
+ }
+
+ private void record() {
+ android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
+
+ // Estimate the buffer size we'll need for this device.
+ int bufferSize =
+ AudioRecord.getMinBufferSize(
+ SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT);
+ if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) {
+ bufferSize = SAMPLE_RATE * 2;
+ }
+ short[] audioBuffer = new short[bufferSize / 2];
+
+ AudioRecord record =
+ new AudioRecord(
+ MediaRecorder.AudioSource.DEFAULT,
+ SAMPLE_RATE,
+ AudioFormat.CHANNEL_IN_MONO,
+ AudioFormat.ENCODING_PCM_16BIT,
+ bufferSize);
+
+ if (record.getState() != AudioRecord.STATE_INITIALIZED) {
+ Log.e(LOG_TAG, "Audio Record can't initialize!");
+ return;
+ }
+
+ record.startRecording();
+
+ Log.v(LOG_TAG, "Start recording");
+
+ // Loop, gathering audio data and copying it to a round-robin buffer.
+ while (shouldContinue) {
+ int numberRead = record.read(audioBuffer, 0, audioBuffer.length);
+ int maxLength = recordingBuffer.length;
+ int newRecordingOffset = recordingOffset + numberRead;
+ int secondCopyLength = Math.max(0, newRecordingOffset - maxLength);
+ int firstCopyLength = numberRead - secondCopyLength;
+ // We store off all the data for the recognition thread to access. The ML
+ // thread will copy out of this buffer into its own, while holding the
+ // lock, so this should be thread safe.
+ recordingBufferLock.lock();
+ try {
+ System.arraycopy(audioBuffer, 0, recordingBuffer, recordingOffset, firstCopyLength);
+ System.arraycopy(audioBuffer, firstCopyLength, recordingBuffer, 0, secondCopyLength);
+ recordingOffset = newRecordingOffset % maxLength;
+ } finally {
+ recordingBufferLock.unlock();
+ }
+ }
+
+ record.stop();
+ record.release();
+ }
+
+ public synchronized void startRecognition() {
+ if (recognitionThread != null) {
+ return;
+ }
+ shouldContinueRecognition = true;
+ recognitionThread =
+ new Thread(
+ new Runnable() {
+ @Override
+ public void run() {
+ recognize();
+ }
+ });
+ recognitionThread.start();
+ }
+
+ public synchronized void stopRecognition() {
+ if (recognitionThread == null) {
+ return;
+ }
+ shouldContinueRecognition = false;
+ recognitionThread = null;
+ }
+
+ private void recognize() {
+ Log.v(LOG_TAG, "Start recognition");
+
+ short[] inputBuffer = new short[RECORDING_LENGTH];
+ float[] floatInputBuffer = new float[RECORDING_LENGTH];
+ float[] outputScores = new float[labels.size()];
+ String[] outputScoresNames = new String[] {OUTPUT_SCORES_NAME};
+ int[] sampleRateList = new int[] {SAMPLE_RATE};
+
+ // Loop, grabbing recorded data and running the recognition model on it.
+ while (shouldContinueRecognition) {
+ // The recording thread places data in this round-robin buffer, so lock to
+ // make sure there's no writing happening and then copy it to our own
+ // local version.
+ recordingBufferLock.lock();
+ try {
+ int maxLength = recordingBuffer.length;
+ int firstCopyLength = maxLength - recordingOffset;
+ int secondCopyLength = recordingOffset;
+ System.arraycopy(recordingBuffer, recordingOffset, inputBuffer, 0, firstCopyLength);
+ System.arraycopy(recordingBuffer, 0, inputBuffer, firstCopyLength, secondCopyLength);
+ } finally {
+ recordingBufferLock.unlock();
+ }
+
+ // We need to feed in float values between -1.0f and 1.0f, so divide the
+ // signed 16-bit inputs.
+ for (int i = 0; i < RECORDING_LENGTH; ++i) {
+ floatInputBuffer[i] = inputBuffer[i] / 32767.0f;
+ }
+
+ // Run the model.
+ inferenceInterface.feed(SAMPLE_RATE_NAME, sampleRateList);
+ inferenceInterface.feed(INPUT_DATA_NAME, floatInputBuffer, RECORDING_LENGTH, 1);
+ inferenceInterface.run(outputScoresNames);
+ inferenceInterface.fetch(OUTPUT_SCORES_NAME, outputScores);
+
+ // Use the smoother to figure out if we've had a real recognition event.
+ long currentTime = System.currentTimeMillis();
+ final RecognizeCommands.RecognitionResult result =
+ recognizeCommands.processLatestResults(outputScores, currentTime);
+
+ runOnUiThread(
+ new Runnable() {
+ @Override
+ public void run() {
+ // If we do have a new command, highlight the right list entry.
+ if (!result.foundCommand.startsWith("_") && result.isNewCommand) {
+ int labelIndex = -1;
+ for (int i = 0; i < labels.size(); ++i) {
+ if (labels.get(i).equals(result.foundCommand)) {
+ labelIndex = i;
+ }
+ }
+ final View labelView = (View) labelsListView.getChildAt(labelIndex - 2);
+ ValueAnimator colorAnimation =
+ ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff);
+ colorAnimation.setDuration(750);
+ colorAnimation.addUpdateListener(
+ new ValueAnimator.AnimatorUpdateListener() {
+ @Override
+ public void onAnimationUpdate(ValueAnimator animator) {
+ labelView.setBackgroundColor((int) animator.getAnimatedValue());
+ }
+ });
+ colorAnimation.start();
+ }
+ }
+ });
+ try {
+ // We don't need to run too frequently, so snooze for a bit.
+ Thread.sleep(MINIMUM_TIME_BETWEEN_SAMPLES_MS);
+ } catch (InterruptedException e) {
+ // Ignore
+ }
+ }
+
+ Log.v(LOG_TAG, "End recognition");
+ }
+}