aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/examples/android
diff options
context:
space:
mode:
authorGravatar Pete Warden <petewarden@google.com>2017-08-16 15:16:54 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-16 15:23:16 -0700
commitbf2365e7cbcb54bc06baaae5c6d3fef7236626c4 (patch)
tree16e2301cc7e6e1a747b2371bc226c49f22d60c75 /tensorflow/examples/android
parent89617e72e7cb103dfefc6a627fc78d0314c5eb9f (diff)
Android demo app for speech recognition
PiperOrigin-RevId: 165504820
Diffstat (limited to 'tensorflow/examples/android')
-rw-r--r--tensorflow/examples/android/AndroidManifest.xml10
-rw-r--r--tensorflow/examples/android/BUILD1
-rw-r--r--tensorflow/examples/android/README.md118
-rw-r--r--tensorflow/examples/android/download-models.gradle3
-rw-r--r--tensorflow/examples/android/res/drawable/border.xml19
-rw-r--r--tensorflow/examples/android/res/layout/activity_speech.xml55
-rw-r--r--tensorflow/examples/android/res/layout/list_text_item.xml25
-rw-r--r--tensorflow/examples/android/res/values/base-strings.xml1
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java186
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java353
10 files changed, 714 insertions, 57 deletions
diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml
index 9f229d8b9d..bb75431a1f 100644
--- a/tensorflow/examples/android/AndroidManifest.xml
+++ b/tensorflow/examples/android/AndroidManifest.xml
@@ -22,6 +22,7 @@
<uses-feature android:name="android.hardware.camera" />
<uses-feature android:name="android.hardware.camera.autofocus" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
+ <uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-sdk
android:minSdkVersion="21"
@@ -59,6 +60,15 @@
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
+
+ <activity android:name="org.tensorflow.demo.SpeechActivity"
+ android:screenOrientation="portrait"
+ android:label="@string/activity_name_speech">
+ <intent-filter>
+ <action android:name="android.intent.action.MAIN" />
+ <category android:name="android.intent.category.LAUNCHER" />
+ </intent-filter>
+ </activity>
</application>
</manifest>
diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index 2d3b0911fc..2347e6b023 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD
@@ -93,6 +93,7 @@ filegroup(
srcs = [
"@inception5h//:model_files",
"@mobile_ssd//:model_files",
+ "@speech_commands//:model_files",
"@stylize//:model_files",
],
)
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index f9881287cd..883f8e664f 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -8,10 +8,11 @@ devices.
The demos in this folder are designed to give straightforward samples of using
TensorFlow in mobile applications.
-Inference is done using the [TensorFlow Android Inference Interface](../../../tensorflow/contrib/android),
-which may be built separately if you want a standalone library to drop into your
-existing application. Object tracking and efficient YUV -> RGB conversion are
-handled by `libtensorflow_demo.so`.
+Inference is done using the [TensorFlow Android Inference
+Interface](../../../tensorflow/contrib/android), which may be built separately
+if you want a standalone library to drop into your existing application. Object
+tracking and efficient YUV -> RGB conversion are handled by
+`libtensorflow_demo.so`.
A device running Android 5.0 (API 21) or higher is required to run the demo due
to the use of the camera2 API, although the native libraries themselves can run
@@ -33,6 +34,12 @@ on API >= 14 devices.
Uses a model based on [A Learned Representation For Artistic
Style](https://arxiv.org/abs/1610.07629) to restyle the camera preview
image to that of a number of different artists.
+4. [TF
+ Speech](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java):
+ Runs a simple speech recognition model built by the [audio training
+ tutorial](https://www.tensorflow.org/tutorials/image_retraining). Listens
+ for a small set of words, and highlights them in the UI when they are
+ recognized.
<img src="sample_images/classify1.jpg" width="30%"><img src="sample_images/stylize1.jpg" width="30%"><img src="sample_images/detect1.jpg" width="30%">
@@ -51,20 +58,22 @@ for more details.
## Running the Demo
-Once the app is installed it can be started via the "TF Classify", "TF Detect"
-and "TF Stylize" icons, which have the orange TensorFlow logo as their icon.
+Once the app is installed it can be started via the "TF Classify", "TF Detect",
+"TF Stylize", and "TF Speech" icons, which have the orange TensorFlow logo as
+their icon.
While running the activities, pressing the volume keys on your device will
-toggle debug visualizations on/off, rendering additional info to the screen
-that may be useful for development purposes.
+toggle debug visualizations on/off, rendering additional info to the screen that
+may be useful for development purposes.
## Building in Android Studio using the TensorFlow AAR from JCenter
The simplest way to compile the demo app yourself, and try out changes to the
-project code is to use AndroidStudio. Simply set this `android` directory as the project root.
+project code is to use AndroidStudio. Simply set this `android` directory as the
+project root.
-Then edit the `build.gradle` file and change the value of `nativeBuildSystem`
-to `'none'` so that the project is built in the simplest way possible:
+Then edit the `build.gradle` file and change the value of `nativeBuildSystem` to
+`'none'` so that the project is built in the simplest way possible:
```None
def nativeBuildSystem = 'none'
@@ -77,8 +86,8 @@ Note: Currently, in this build mode, YUV -> RGB is done using a less efficient
Java implementation, and object tracking is not available in the "TF Detect"
activity. Setting the build system to `'cmake'` currently only builds
`libtensorflow_demo.so`, which provides fast YUV -> RGB conversion and object
-tracking, while still acquiring TensorFlow support via the downloaded AAR, so
-it may be a lightweight way to enable these features.
+tracking, while still acquiring TensorFlow support via the downloaded AAR, so it
+may be a lightweight way to enable these features.
For any project that does not include custom low level TensorFlow code, this is
likely sufficient.
@@ -104,50 +113,51 @@ protobuf compilation.
NOTE: Bazel does not currently support building for Android on Windows. Full
support for gradle/cmake builds is coming soon, but in the meantime we suggest
-that Windows users download the
-[prebuilt binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)
-instead.
+that Windows users download the [prebuilt
+binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) instead.
##### Install Bazel and Android Prerequisites
-Bazel is the primary build system for TensorFlow. To build with Bazel,
-it and the Android NDK and SDK must be installed on your system.
-
-1. Install the latest version of Bazel as per the instructions [on the Bazel website](https://bazel.build/versions/master/docs/install.html).
-2. The Android NDK is required to build the native (C/C++) TensorFlow code.
- The current recommended version is 12b, which may be found
- [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
-3. The Android SDK and build tools may be obtained
- [here](https://developer.android.com/tools/revisions/build-tools.html),
- or alternatively as part of
- [Android Studio](https://developer.android.com/studio/index.html). Build
- tools API >= 23 is required to build the TF Android demo (though it will
- run on API >= 21 devices).
+Bazel is the primary build system for TensorFlow. To build with Bazel, it and
+the Android NDK and SDK must be installed on your system.
+
+1. Install the latest version of Bazel as per the instructions [on the Bazel
+ website](https://bazel.build/versions/master/docs/install.html).
+2. The Android NDK is required to build the native (C/C++) TensorFlow code. The
+ current recommended version is 12b, which may be found
+ [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
+3. The Android SDK and build tools may be obtained
+ [here](https://developer.android.com/tools/revisions/build-tools.html), or
+ alternatively as part of [Android
+ Studio](https://developer.android.com/studio/index.html). Build tools API >=
+ 23 is required to build the TF Android demo (though it will run on API >= 21
+ devices).
##### Edit WORKSPACE
-The Android entries in [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36)
-must be uncommented with the paths filled in appropriately depending on where
-you installed the NDK and SDK. Otherwise an error such as:
-"The external label '//external:android/sdk' is not bound to anything" will
-be reported.
+The Android entries in
+[`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented
+with the paths filled in appropriately depending on where you installed the NDK
+and SDK. Otherwise an error such as: "The external label
+'//external:android/sdk' is not bound to anything" will be reported.
-Also edit the API levels for the SDK in WORKSPACE to the highest level you
-have installed in your SDK. This must be >= 23 (this is completely independent
-of the API level of the demo, which is defined in AndroidManifest.xml).
-The NDK API level may remain at 14.
+Also edit the API levels for the SDK in WORKSPACE to the highest level you have
+installed in your SDK. This must be >= 23 (this is completely independent of the
+API level of the demo, which is defined in AndroidManifest.xml). The NDK API
+level may remain at 14.
##### Install Model Files (optional)
-The TensorFlow `GraphDef`s that contain the model definitions and weights
-are not packaged in the repo because of their size. They are downloaded
+The TensorFlow `GraphDef`s that contain the model definitions and weights are
+not packaged in the repo because of their size. They are downloaded
automatically and packaged with the APK by Bazel via a new_http_archive defined
-in `WORKSPACE` during the build process, and by Gradle via download-models.gradle.
+in `WORKSPACE` during the build process, and by Gradle via
+download-models.gradle.
-**Optional**: If you wish to place the models in your assets manually,
-remove all of the `model_files` entries from the `assets`
-list in `tensorflow_demo` found in the `[BUILD](BUILD)` file. Then download
-and extract the archives yourself to the `assets` directory in the source tree:
+**Optional**: If you wish to place the models in your assets manually, remove
+all of the `model_files` entries from the `assets` list in `tensorflow_demo`
+found in the `[BUILD](BUILD)` file. Then download and extract the archives
+yourself to the `assets` directory in the source tree:
```bash
BASE_URL=https://storage.googleapis.com/download.tensorflow.org/models
@@ -162,27 +172,23 @@ This will extract the models and their associated metadata files to the local
assets/ directory.
If you are using Gradle, make sure to remove download-models.gradle reference
-from build.gradle after your manually download models; otherwise gradle
-might download models again and overwrite your models.
+from build.gradle after your manually download models; otherwise gradle might
+download models again and overwrite your models.
##### Build
-After editing your WORKSPACE file to update the SDK/NDK configuration,
-you may build the APK. Run this from your workspace root:
+After editing your WORKSPACE file to update the SDK/NDK configuration, you may
+build the APK. Run this from your workspace root:
```bash
bazel build -c opt //tensorflow/examples/android:tensorflow_demo
```
-If you get build errors about protocol buffers, run
-`git submodule update --init` and make sure that you've modified your WORKSPACE
-file as instructed, then try building again.
-
##### Install
-Make sure that adb debugging is enabled on your Android 5.0 (API 21) or
-later device, then after building use the following command from your workspace
-root to install the APK:
+Make sure that adb debugging is enabled on your Android 5.0 (API 21) or later
+device, then after building use the following command from your workspace root
+to install the APK:
```bash
adb install -r bazel-bin/tensorflow/examples/android/tensorflow_demo.apk
diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle
index a19ca36d7f..0e2cf65f53 100644
--- a/tensorflow/examples/android/download-models.gradle
+++ b/tensorflow/examples/android/download-models.gradle
@@ -11,7 +11,8 @@
// LINT.IfChange
def models = ['inception5h.zip',
'object_detection/ssd_mobilenet_v1_android_export.zip',
- 'stylize_v1.zip']
+ 'stylize_v1.zip',
+ 'speech_commands_conv_actions.zip']
// LINT.ThenChange(//tensorflow/examples/android/BUILD)
// Root URL for model archives
diff --git a/tensorflow/examples/android/res/drawable/border.xml b/tensorflow/examples/android/res/drawable/border.xml
new file mode 100644
index 0000000000..dd1d64d1d6
--- /dev/null
+++ b/tensorflow/examples/android/res/drawable/border.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<shape xmlns:android="http://schemas.android.com/apk/res/android" android:shape="rectangle" >
+ <solid android:color="#00000000" />
+ <stroke android:width="1dip" android:color="#cccccc" />
+</shape>
diff --git a/tensorflow/examples/android/res/layout/activity_speech.xml b/tensorflow/examples/android/res/layout/activity_speech.xml
new file mode 100644
index 0000000000..2fe1338da5
--- /dev/null
+++ b/tensorflow/examples/android/res/layout/activity_speech.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<FrameLayout
+ xmlns:android="http://schemas.android.com/apk/res/android"
+ xmlns:app="http://schemas.android.com/apk/res-auto"
+ xmlns:tools="http://schemas.android.com/tools"
+ android:layout_width="match_parent"
+ android:layout_height="match_parent"
+ tools:context="org.tensorflow.demo.SpeechActivity">
+
+ <TextView
+ android:layout_width="wrap_content"
+ android:layout_height="wrap_content"
+ android:text="Say one of the words below!"
+ android:id="@+id/textView"
+ android:textAlignment="center"
+ android:layout_gravity="top"
+ android:textSize="24dp"
+ android:layout_marginTop="10dp"
+ android:layout_marginLeft="10dp"
+ />
+
+ <ListView
+ android:id="@+id/list_view"
+ android:layout_width="240dp"
+ android:layout_height="wrap_content"
+ android:background="@drawable/border"
+ android:layout_gravity="top|center_horizontal"
+ android:textAlignment="center"
+ android:layout_marginTop="100dp"
+ />
+
+ <Button
+ android:id="@+id/quit"
+ android:layout_width="wrap_content"
+ android:layout_height="wrap_content"
+ android:text="Quit"
+ android:layout_gravity="bottom|center_horizontal"
+ android:layout_marginBottom="10dp"
+ />
+
+</FrameLayout>
diff --git a/tensorflow/examples/android/res/layout/list_text_item.xml b/tensorflow/examples/android/res/layout/list_text_item.xml
new file mode 100644
index 0000000000..526017fbb2
--- /dev/null
+++ b/tensorflow/examples/android/res/layout/list_text_item.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<TextView
+ xmlns:android="http://schemas.android.com/apk/res/android"
+ android:id="@+id/list_text_item"
+ android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:text="TextView"
+ android:textSize="24dp"
+ android:textAlignment="center"
+ android:gravity="center_horizontal"
+ />
diff --git a/tensorflow/examples/android/res/values/base-strings.xml b/tensorflow/examples/android/res/values/base-strings.xml
index 56edb55def..81d144a135 100644
--- a/tensorflow/examples/android/res/values/base-strings.xml
+++ b/tensorflow/examples/android/res/values/base-strings.xml
@@ -20,4 +20,5 @@
<string name="activity_name_classification">TF Classify</string>
<string name="activity_name_detection">TF Detect</string>
<string name="activity_name_stylize">TF Stylize</string>
+ <string name="activity_name_speech">TF Speech</string>
</resources>
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java
new file mode 100644
index 0000000000..9e91aea7ef
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.tensorflow.demo;
+
+import android.util.Log;
+import android.util.Pair;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
+
+/** Reads in results from an instantaneous audio recognition model and smoothes them over time. */
+public class RecognizeCommands {
+ // Configuration settings.
+ private List<String> labels = new ArrayList<String>();
+ private long averageWindowDurationMs;
+ private float detectionThreshold;
+ private int suppressionMs;
+ private int minimumCount;
+ private long minimumTimeBetweenSamplesMs;
+
+ // Working variables.
+ private Deque<Pair<Long, float[]>> previousResults = new ArrayDeque<Pair<Long, float[]>>();
+ private String previousTopLabel;
+ private int labelsCount;
+ private long previousTopLabelTime;
+ private float previousTopLabelScore;
+
+ private static final String SILENCE_LABEL = "_silence_";
+ private static final long MINIMUM_TIME_FRACTION = 4;
+
+ public RecognizeCommands(
+ List<String> inLabels,
+ long inAverageWindowDurationMs,
+ float inDetectionThreshold,
+ int inSuppressionMS,
+ int inMinimumCount,
+ long inMinimumTimeBetweenSamplesMS) {
+ labels = inLabels;
+ averageWindowDurationMs = inAverageWindowDurationMs;
+ detectionThreshold = inDetectionThreshold;
+ suppressionMs = inSuppressionMS;
+ minimumCount = inMinimumCount;
+ labelsCount = inLabels.size();
+ previousTopLabel = SILENCE_LABEL;
+ previousTopLabelTime = Long.MIN_VALUE;
+ previousTopLabelScore = 0.0f;
+ minimumTimeBetweenSamplesMs = inMinimumTimeBetweenSamplesMS;
+ }
+
+ /** Holds information about what's been recognized. */
+ public static class RecognitionResult {
+ public final String foundCommand;
+ public final float score;
+ public final boolean isNewCommand;
+
+ public RecognitionResult(String inFoundCommand, float inScore, boolean inIsNewCommand) {
+ foundCommand = inFoundCommand;
+ score = inScore;
+ isNewCommand = inIsNewCommand;
+ }
+ }
+
+ private static class ScoreForSorting implements Comparable<ScoreForSorting> {
+ public final float score;
+ public final int index;
+
+ public ScoreForSorting(float inScore, int inIndex) {
+ score = inScore;
+ index = inIndex;
+ }
+
+ @Override
+ public int compareTo(ScoreForSorting other) {
+ if (this.score > other.score) {
+ return -1;
+ } else if (this.score < other.score) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ public RecognitionResult processLatestResults(float[] currentResults, long currentTimeMS) {
+ if (currentResults.length != labelsCount) {
+ throw new RuntimeException(
+ "The results for recognition should contain "
+ + labelsCount
+ + " elements, but there are "
+ + currentResults.length);
+ }
+
+ if ((!previousResults.isEmpty()) && (currentTimeMS < previousResults.getFirst().first)) {
+ throw new RuntimeException(
+ "You must feed results in increasing time order, but received a timestamp of "
+ + currentTimeMS
+ + " that was earlier than the previous one of "
+ + previousResults.getFirst().first);
+ }
+
+ final int howManyResults = previousResults.size();
+ // Ignore any results that are coming in too frequently.
+ if (howManyResults > 1) {
+ final long timeSinceMostRecent = currentTimeMS - previousResults.getLast().first;
+ if (timeSinceMostRecent < minimumTimeBetweenSamplesMs) {
+ return new RecognitionResult(previousTopLabel, previousTopLabelScore, false);
+ }
+ }
+
+ // Add the latest results to the head of the queue.
+ previousResults.addLast(new Pair<Long, float[]>(currentTimeMS, currentResults));
+
+ // Prune any earlier results that are too old for the averaging window.
+ final long timeLimit = currentTimeMS - averageWindowDurationMs;
+ while (previousResults.getFirst().first < timeLimit) {
+ previousResults.removeFirst();
+ }
+
+ // If there are too few results, assume the result will be unreliable and
+ // bail.
+ final long earliestTime = previousResults.getFirst().first;
+ final long samplesDuration = currentTimeMS - earliestTime;
+ if ((howManyResults < minimumCount)
+ || (samplesDuration < (averageWindowDurationMs / MINIMUM_TIME_FRACTION))) {
+ Log.v("RecognizeResult", "Too few results");
+ return new RecognitionResult(previousTopLabel, 0.0f, false);
+ }
+
+ // Calculate the average score across all the results in the window.
+ float[] averageScores = new float[labelsCount];
+ for (Pair<Long, float[]> previousResult : previousResults) {
+ final float[] scoresTensor = previousResult.second;
+ int i = 0;
+ while (i < scoresTensor.length) {
+ averageScores[i] += scoresTensor[i] / howManyResults;
+ ++i;
+ }
+ }
+
+ // Sort the averaged results in descending score order.
+ ScoreForSorting[] sortedAverageScores = new ScoreForSorting[labelsCount];
+ for (int i = 0; i < labelsCount; ++i) {
+ sortedAverageScores[i] = new ScoreForSorting(averageScores[i], i);
+ }
+ Arrays.sort(sortedAverageScores);
+
+ // See if the latest top score is enough to trigger a detection.
+ final int currentTopIndex = sortedAverageScores[0].index;
+ final String currentTopLabel = labels.get(currentTopIndex);
+ final float currentTopScore = sortedAverageScores[0].score;
+ // If we've recently had another label trigger, assume one that occurs too
+ // soon afterwards is a bad result.
+ long timeSinceLastTop;
+ if (previousTopLabel.equals(SILENCE_LABEL) || (previousTopLabelTime == Long.MIN_VALUE)) {
+ timeSinceLastTop = Long.MAX_VALUE;
+ } else {
+ timeSinceLastTop = currentTimeMS - previousTopLabelTime;
+ }
+ boolean isNewCommand;
+ if ((currentTopScore > detectionThreshold) && (timeSinceLastTop > suppressionMs)) {
+ previousTopLabel = currentTopLabel;
+ previousTopLabelTime = currentTimeMS;
+ previousTopLabelScore = currentTopScore;
+ isNewCommand = true;
+ } else {
+ isNewCommand = false;
+ }
+ return new RecognitionResult(currentTopLabel, currentTopScore, isNewCommand);
+ }
+}
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java
new file mode 100644
index 0000000000..eb4dc69d63
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Demonstrates how to run an audio recognition model in Android.
+
+This example loads a simple speech recognition model trained by the tutorial at
+https://www.tensorflow.org/tutorials/audio_training
+
+The model files should be downloaded automatically from the TensorFlow website,
+but if you have a custom model you can update the LABEL_FILENAME and
+MODEL_FILENAME constants to point to your own files.
+
+The example application displays a list view with all of the known audio labels,
+and highlights each one when it thinks it has detected one through the
+microphone. The averaging of results to give a more reliable signal happens in
+the RecognizeCommands helper class.
+*/
+
+package org.tensorflow.demo;
+
+import android.animation.ValueAnimator;
+import android.app.Activity;
+import android.content.pm.PackageManager;
+import android.media.AudioFormat;
+import android.media.AudioRecord;
+import android.media.MediaRecorder;
+import android.os.Bundle;
+import android.util.Log;
+import android.view.View;
+import android.widget.ArrayAdapter;
+import android.widget.Button;
+import android.widget.ListView;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.locks.ReentrantLock;
+import org.tensorflow.contrib.android.TensorFlowInferenceInterface;
+import org.tensorflow.demo.R;
+
+/**
+ * An activity that listens for audio and then uses a TensorFlow model to detect particular classes,
+ * by default a small set of action words.
+ */
+public class SpeechActivity extends Activity {
+
+ // Constants that control the behavior of the recognition code and model
+ // settings. See the audio recognition tutorial for a detailed explanation of
+ // all these, but you should customize them to match your training settings if
+ // you are running your own model.
+ private static final int SAMPLE_RATE = 16000;
+ private static final int SAMPLE_DURATION_MS = 1000;
+ private static final int RECORDING_LENGTH = (int) (SAMPLE_RATE * SAMPLE_DURATION_MS / 1000);
+ private static final long AVERAGE_WINDOW_DURATION_MS = 500;
+ private static final float DETECTION_THRESHOLD = 0.70f;
+ private static final int SUPPRESSION_MS = 1500;
+ private static final int MINIMUM_COUNT = 3;
+ private static final long MINIMUM_TIME_BETWEEN_SAMPLES_MS = 30;
+ private static final String LABEL_FILENAME = "file:///android_asset/conv_actions_labels.txt";
+ private static final String MODEL_FILENAME = "file:///android_asset/conv_actions_frozen.pb";
+ private static final String INPUT_DATA_NAME = "decoded_sample_data:0";
+ private static final String SAMPLE_RATE_NAME = "decoded_sample_data:1";
+ private static final String OUTPUT_SCORES_NAME = "labels_softmax";
+
+ // UI elements.
+ private static final int REQUEST_RECORD_AUDIO = 13;
+ private Button quitButton;
+ private ListView labelsListView;
+ private static final String LOG_TAG = SpeechActivity.class.getSimpleName();
+
+ // Working variables.
+ short[] recordingBuffer = new short[RECORDING_LENGTH];
+ int recordingOffset = 0;
+ boolean shouldContinue = true;
+ private Thread recordingThread;
+ boolean shouldContinueRecognition = true;
+ private Thread recognitionThread;
+ private final ReentrantLock recordingBufferLock = new ReentrantLock();
+ private TensorFlowInferenceInterface inferenceInterface;
+ private List<String> labels = new ArrayList<String>();
+ private List<String> displayedLabels = new ArrayList<>();
+ private RecognizeCommands recognizeCommands = null;
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ // Set up the UI.
+ super.onCreate(savedInstanceState);
+ setContentView(R.layout.activity_speech);
+ quitButton = (Button) findViewById(R.id.quit);
+ quitButton.setOnClickListener(
+ new View.OnClickListener() {
+ @Override
+ public void onClick(View view) {
+ moveTaskToBack(true);
+ android.os.Process.killProcess(android.os.Process.myPid());
+ System.exit(1);
+ }
+ });
+ labelsListView = (ListView) findViewById(R.id.list_view);
+
+ // Load the labels for the model, but only display those that don't start
+ // with an underscore.
+ String actualFilename = LABEL_FILENAME.split("file:///android_asset/")[1];
+ Log.i(LOG_TAG, "Reading labels from: " + actualFilename);
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(getAssets().open(actualFilename)));
+ String line;
+ while ((line = br.readLine()) != null) {
+ labels.add(line);
+ if (line.charAt(0) != '_') {
+ displayedLabels.add(line.substring(0, 1).toUpperCase() + line.substring(1));
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ throw new RuntimeException("Problem reading label file!", e);
+ }
+
+ // Build a list view based on these labels.
+ ArrayAdapter<String> arrayAdapter =
+ new ArrayAdapter<String>(this, R.layout.list_text_item, displayedLabels);
+ labelsListView.setAdapter(arrayAdapter);
+
+ // Set up an object to smooth recognition results to increase accuracy.
+ recognizeCommands =
+ new RecognizeCommands(
+ labels,
+ AVERAGE_WINDOW_DURATION_MS,
+ DETECTION_THRESHOLD,
+ SUPPRESSION_MS,
+ MINIMUM_COUNT,
+ MINIMUM_TIME_BETWEEN_SAMPLES_MS);
+
+ // Load the TensorFlow model.
+ inferenceInterface = new TensorFlowInferenceInterface(getAssets(), MODEL_FILENAME);
+
+ // Start the recording and recognition threads.
+ requestMicrophonePermission();
+ startRecognition();
+ }
+
+ private void requestMicrophonePermission() {
+ requestPermissions(
+ new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO);
+ }
+
+ @Override
+ public void onRequestPermissionsResult(
+ int requestCode, String[] permissions, int[] grantResults) {
+ if (requestCode == REQUEST_RECORD_AUDIO
+ && grantResults.length > 0
+ && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
+ startRecording();
+ startRecognition();
+ }
+ }
+
+ public synchronized void startRecording() {
+ if (recordingThread != null) {
+ return;
+ }
+ shouldContinue = true;
+ recordingThread =
+ new Thread(
+ new Runnable() {
+ @Override
+ public void run() {
+ record();
+ }
+ });
+ recordingThread.start();
+ }
+
+ public synchronized void stopRecording() {
+ if (recordingThread == null) {
+ return;
+ }
+ shouldContinue = false;
+ recordingThread = null;
+ }
+
+ private void record() {
+ android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
+
+ // Estimate the buffer size we'll need for this device.
+ int bufferSize =
+ AudioRecord.getMinBufferSize(
+ SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT);
+ if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) {
+ bufferSize = SAMPLE_RATE * 2;
+ }
+ short[] audioBuffer = new short[bufferSize / 2];
+
+ AudioRecord record =
+ new AudioRecord(
+ MediaRecorder.AudioSource.DEFAULT,
+ SAMPLE_RATE,
+ AudioFormat.CHANNEL_IN_MONO,
+ AudioFormat.ENCODING_PCM_16BIT,
+ bufferSize);
+
+ if (record.getState() != AudioRecord.STATE_INITIALIZED) {
+ Log.e(LOG_TAG, "Audio Record can't initialize!");
+ return;
+ }
+
+ record.startRecording();
+
+ Log.v(LOG_TAG, "Start recording");
+
+ // Loop, gathering audio data and copying it to a round-robin buffer.
+ while (shouldContinue) {
+ int numberRead = record.read(audioBuffer, 0, audioBuffer.length);
+ int maxLength = recordingBuffer.length;
+ int newRecordingOffset = recordingOffset + numberRead;
+ int secondCopyLength = Math.max(0, newRecordingOffset - maxLength);
+ int firstCopyLength = numberRead - secondCopyLength;
+ // We store off all the data for the recognition thread to access. The ML
+ // thread will copy out of this buffer into its own, while holding the
+ // lock, so this should be thread safe.
+ recordingBufferLock.lock();
+ try {
+ System.arraycopy(audioBuffer, 0, recordingBuffer, recordingOffset, firstCopyLength);
+ System.arraycopy(audioBuffer, firstCopyLength, recordingBuffer, 0, secondCopyLength);
+ recordingOffset = newRecordingOffset % maxLength;
+ } finally {
+ recordingBufferLock.unlock();
+ }
+ }
+
+ record.stop();
+ record.release();
+ }
+
+ public synchronized void startRecognition() {
+ if (recognitionThread != null) {
+ return;
+ }
+ shouldContinueRecognition = true;
+ recognitionThread =
+ new Thread(
+ new Runnable() {
+ @Override
+ public void run() {
+ recognize();
+ }
+ });
+ recognitionThread.start();
+ }
+
+ public synchronized void stopRecognition() {
+ if (recognitionThread == null) {
+ return;
+ }
+ shouldContinueRecognition = false;
+ recognitionThread = null;
+ }
+
+ private void recognize() {
+ Log.v(LOG_TAG, "Start recognition");
+
+ short[] inputBuffer = new short[RECORDING_LENGTH];
+ float[] floatInputBuffer = new float[RECORDING_LENGTH];
+ float[] outputScores = new float[labels.size()];
+ String[] outputScoresNames = new String[] {OUTPUT_SCORES_NAME};
+ int[] sampleRateList = new int[] {SAMPLE_RATE};
+
+ // Loop, grabbing recorded data and running the recognition model on it.
+ while (shouldContinueRecognition) {
+ // The recording thread places data in this round-robin buffer, so lock to
+ // make sure there's no writing happening and then copy it to our own
+ // local version.
+ recordingBufferLock.lock();
+ try {
+ int maxLength = recordingBuffer.length;
+ int firstCopyLength = maxLength - recordingOffset;
+ int secondCopyLength = recordingOffset;
+ System.arraycopy(recordingBuffer, recordingOffset, inputBuffer, 0, firstCopyLength);
+ System.arraycopy(recordingBuffer, 0, inputBuffer, firstCopyLength, secondCopyLength);
+ } finally {
+ recordingBufferLock.unlock();
+ }
+
+ // We need to feed in float values between -1.0f and 1.0f, so divide the
+ // signed 16-bit inputs.
+ for (int i = 0; i < RECORDING_LENGTH; ++i) {
+ floatInputBuffer[i] = inputBuffer[i] / 32767.0f;
+ }
+
+ // Run the model.
+ inferenceInterface.feed(SAMPLE_RATE_NAME, sampleRateList);
+ inferenceInterface.feed(INPUT_DATA_NAME, floatInputBuffer, RECORDING_LENGTH, 1);
+ inferenceInterface.run(outputScoresNames);
+ inferenceInterface.fetch(OUTPUT_SCORES_NAME, outputScores);
+
+ // Use the smoother to figure out if we've had a real recognition event.
+ long currentTime = System.currentTimeMillis();
+ final RecognizeCommands.RecognitionResult result =
+ recognizeCommands.processLatestResults(outputScores, currentTime);
+
+ runOnUiThread(
+ new Runnable() {
+ @Override
+ public void run() {
+ // If we do have a new command, highlight the right list entry.
+ if (!result.foundCommand.startsWith("_") && result.isNewCommand) {
+ int labelIndex = -1;
+ for (int i = 0; i < labels.size(); ++i) {
+ if (labels.get(i).equals(result.foundCommand)) {
+ labelIndex = i;
+ }
+ }
+ final View labelView = (View) labelsListView.getChildAt(labelIndex - 2);
+ ValueAnimator colorAnimation =
+ ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff);
+ colorAnimation.setDuration(750);
+ colorAnimation.addUpdateListener(
+ new ValueAnimator.AnimatorUpdateListener() {
+ @Override
+ public void onAnimationUpdate(ValueAnimator animator) {
+ labelView.setBackgroundColor((int) animator.getAnimatedValue());
+ }
+ });
+ colorAnimation.start();
+ }
+ }
+ });
+ try {
+ // We don't need to run too frequently, so snooze for a bit.
+ Thread.sleep(MINIMUM_TIME_BETWEEN_SAMPLES_MS);
+ } catch (InterruptedException e) {
+ // Ignore
+ }
+ }
+
+ Log.v(LOG_TAG, "End recognition");
+ }
+}