Android demo app for speech recognition

PiperOrigin-RevId: 165504820
author: Pete Warden <petewarden@google.com> 2017-08-16 15:16:54 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-08-16 15:23:16 -0700
commit: bf2365e7cbcb54bc06baaae5c6d3fef7236626c4 (patch)
tree: 16e2301cc7e6e1a747b2371bc226c49f22d60c75 /tensorflow/examples/android
parent: 89617e72e7cb103dfefc6a627fc78d0314c5eb9f (diff)
10 files changed, 714 insertions, 57 deletions
diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml
index 9f229d8b9d..bb75431a1f 100644
--- a/tensorflow/examples/android/AndroidManifest.xml
+++ b/tensorflow/examples/android/AndroidManifest.xml
@@ -22,6 +22,7 @@
     <uses-feature android:name="android.hardware.camera" />
     <uses-feature android:name="android.hardware.camera.autofocus" />
     <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
+    <uses-permission android:name="android.permission.RECORD_AUDIO" />
 
     <uses-sdk
         android:minSdkVersion="21"
@@ -59,6 +60,15 @@
                 <category android:name="android.intent.category.LAUNCHER" />
             </intent-filter>
         </activity>
+
+        <activity android:name="org.tensorflow.demo.SpeechActivity"
+            android:screenOrientation="portrait"
+            android:label="@string/activity_name_speech">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
     </application>
 
 </manifest>
diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index 2d3b0911fc..2347e6b023 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD
@@ -93,6 +93,7 @@ filegroup(
     srcs = [
         "@inception5h//:model_files",
         "@mobile_ssd//:model_files",
+        "@speech_commands//:model_files",
         "@stylize//:model_files",
     ],
 )
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index f9881287cd..883f8e664f 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -8,10 +8,11 @@ devices.
 The demos in this folder are designed to give straightforward samples of using
 TensorFlow in mobile applications.
 
-Inference is done using the [TensorFlow Android Inference Interface](../../../tensorflow/contrib/android),
-which may be built separately if you want a standalone library to drop into your
-existing application. Object tracking and efficient YUV -> RGB conversion are
-handled by `libtensorflow_demo.so`.
+Inference is done using the [TensorFlow Android Inference
+Interface](../../../tensorflow/contrib/android), which may be built separately
+if you want a standalone library to drop into your existing application. Object
+tracking and efficient YUV -> RGB conversion are handled by
+`libtensorflow_demo.so`.
 
 A device running Android 5.0 (API 21) or higher is required to run the demo due
 to the use of the camera2 API, although the native libraries themselves can run
@@ -33,6 +34,12 @@ on API >= 14 devices.
         Uses a model based on [A Learned Representation For Artistic
         Style](https://arxiv.org/abs/1610.07629) to restyle the camera preview
         image to that of a number of different artists.
+4.  [TF
+    Speech](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java):
+    Runs a simple speech recognition model built by the [audio training
+    tutorial](https://www.tensorflow.org/tutorials/image_retraining). Listens
+    for a small set of words, and highlights them in the UI when they are
+    recognized.
 
 <img src="sample_images/classify1.jpg" width="30%"><img src="sample_images/stylize1.jpg" width="30%"><img src="sample_images/detect1.jpg" width="30%">
 
@@ -51,20 +58,22 @@ for more details.
 
 ## Running the Demo
 
-Once the app is installed it can be started via the "TF Classify", "TF Detect"
-and "TF Stylize" icons, which have the orange TensorFlow logo as their icon.
+Once the app is installed it can be started via the "TF Classify", "TF Detect",
+"TF Stylize", and "TF Speech" icons, which have the orange TensorFlow logo as
+their icon.
 
 While running the activities, pressing the volume keys on your device will
-toggle debug visualizations on/off, rendering additional info to the screen
-that may be useful for development purposes.
+toggle debug visualizations on/off, rendering additional info to the screen that
+may be useful for development purposes.
 
 ## Building in Android Studio using the TensorFlow AAR from JCenter
 
 The simplest way to compile the demo app yourself, and try out changes to the
-project code is to use AndroidStudio. Simply set this `android` directory as the project root.
+project code is to use AndroidStudio. Simply set this `android` directory as the
+project root.
 
-Then edit the `build.gradle` file and change the value of `nativeBuildSystem`
-to `'none'` so that the project is built in the simplest way possible:
+Then edit the `build.gradle` file and change the value of `nativeBuildSystem` to
+`'none'` so that the project is built in the simplest way possible:
 
 ```None
 def nativeBuildSystem = 'none'
@@ -77,8 +86,8 @@ Note: Currently, in this build mode, YUV -> RGB is done using a less efficient
 Java implementation, and object tracking is not available in the "TF Detect"
 activity. Setting the build system to `'cmake'` currently only builds
 `libtensorflow_demo.so`, which provides fast YUV -> RGB conversion and object
-tracking, while still acquiring TensorFlow support via the downloaded AAR, so
-it may be a lightweight way to enable these features.
+tracking, while still acquiring TensorFlow support via the downloaded AAR, so it
+may be a lightweight way to enable these features.
 
 For any project that does not include custom low level TensorFlow code, this is
 likely sufficient.
@@ -104,50 +113,51 @@ protobuf compilation.
 
 NOTE: Bazel does not currently support building for Android on Windows. Full
 support for gradle/cmake builds is coming soon, but in the meantime we suggest
-that Windows users download the
-[prebuilt binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)
-instead.
+that Windows users download the [prebuilt
+binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) instead.
 
 ##### Install Bazel and Android Prerequisites
 
-Bazel is the primary build system for TensorFlow. To build with Bazel,
-it and the Android NDK and SDK must be installed on your system.
-
-1. Install the latest version of Bazel as per the instructions [on the Bazel website](https://bazel.build/versions/master/docs/install.html).
-2. The Android NDK is required to build the native (C/C++) TensorFlow code.
-        The current recommended version is 12b, which may be found
-        [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
-3. The Android SDK and build tools may be obtained
-        [here](https://developer.android.com/tools/revisions/build-tools.html),
-        or alternatively as part of
-        [Android Studio](https://developer.android.com/studio/index.html). Build
-        tools API >= 23 is required to build the TF Android demo (though it will
-        run on API >= 21 devices).
+Bazel is the primary build system for TensorFlow. To build with Bazel, it and
+the Android NDK and SDK must be installed on your system.
+
+1.  Install the latest version of Bazel as per the instructions [on the Bazel
+    website](https://bazel.build/versions/master/docs/install.html).
+2.  The Android NDK is required to build the native (C/C++) TensorFlow code. The
+    current recommended version is 12b, which may be found
+    [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
+3.  The Android SDK and build tools may be obtained
+    [here](https://developer.android.com/tools/revisions/build-tools.html), or
+    alternatively as part of [Android
+    Studio](https://developer.android.com/studio/index.html). Build tools API >=
+    23 is required to build the TF Android demo (though it will run on API >= 21
+    devices).
 
 ##### Edit WORKSPACE
 
-The Android entries in [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36)
-must be uncommented with the paths filled in appropriately depending on where
-you installed the NDK and SDK. Otherwise an error such as:
-"The external label '//external:android/sdk' is not bound to anything" will
-be reported.
+The Android entries in
+[`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented
+with the paths filled in appropriately depending on where you installed the NDK
+and SDK. Otherwise an error such as: "The external label
+'//external:android/sdk' is not bound to anything" will be reported.
 
-Also edit the API levels for the SDK in WORKSPACE to the highest level you
-have installed in your SDK. This must be >= 23 (this is completely independent
-of the API level of the demo, which is defined in AndroidManifest.xml).
-The NDK API level may remain at 14.
+Also edit the API levels for the SDK in WORKSPACE to the highest level you have
+installed in your SDK. This must be >= 23 (this is completely independent of the
+API level of the demo, which is defined in AndroidManifest.xml). The NDK API
+level may remain at 14.
 
 ##### Install Model Files (optional)
 
-The TensorFlow `GraphDef`s that contain the model definitions and weights
-are not packaged in the repo because of their size. They are downloaded
+The TensorFlow `GraphDef`s that contain the model definitions and weights are
+not packaged in the repo because of their size. They are downloaded
 automatically and packaged with the APK by Bazel via a new_http_archive defined
-in `WORKSPACE` during the build process, and by Gradle via download-models.gradle.
+in `WORKSPACE` during the build process, and by Gradle via
+download-models.gradle.
 
-**Optional**: If you wish to place the models in your assets manually,
-remove all of the `model_files` entries from the `assets`
-list in `tensorflow_demo` found in the `[BUILD](BUILD)` file. Then download
-and extract the archives yourself to the `assets` directory in the source tree:
+**Optional**: If you wish to place the models in your assets manually, remove
+all of the `model_files` entries from the `assets` list in `tensorflow_demo`
+found in the `[BUILD](BUILD)` file. Then download and extract the archives
+yourself to the `assets` directory in the source tree:
 
 ```bash
 BASE_URL=https://storage.googleapis.com/download.tensorflow.org/models
@@ -162,27 +172,23 @@ This will extract the models and their associated metadata files to the local
 assets/ directory.
 
 If you are using Gradle, make sure to remove download-models.gradle reference
-from build.gradle after your manually download models; otherwise gradle
-might download models again and overwrite your models.
+from build.gradle after your manually download models; otherwise gradle might
+download models again and overwrite your models.
 
 ##### Build
 
-After editing your WORKSPACE file to update the SDK/NDK configuration,
-you may build the APK. Run this from your workspace root:
+After editing your WORKSPACE file to update the SDK/NDK configuration, you may
+build the APK. Run this from your workspace root:
 
 ```bash
 bazel build -c opt //tensorflow/examples/android:tensorflow_demo
 ```
 
-If you get build errors about protocol buffers, run
-`git submodule update --init` and make sure that you've modified your WORKSPACE
-file as instructed, then try building again.
-
 ##### Install
 
-Make sure that adb debugging is enabled on your Android 5.0 (API 21) or
-later device, then after building use the following command from your workspace
-root to install the APK:
+Make sure that adb debugging is enabled on your Android 5.0 (API 21) or later
+device, then after building use the following command from your workspace root
+to install the APK:
 
 ```bash
 adb install -r bazel-bin/tensorflow/examples/android/tensorflow_demo.apk
diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle
index a19ca36d7f..0e2cf65f53 100644
--- a/tensorflow/examples/android/download-models.gradle
+++ b/tensorflow/examples/android/download-models.gradle
@@ -11,7 +11,8 @@
 // LINT.IfChange
 def models = ['inception5h.zip',
               'object_detection/ssd_mobilenet_v1_android_export.zip',
-              'stylize_v1.zip']
+              'stylize_v1.zip',
+              'speech_commands_conv_actions.zip']
 // LINT.ThenChange(//tensorflow/examples/android/BUILD)
 
 // Root URL for model archives
diff --git a/tensorflow/examples/android/res/drawable/border.xml b/tensorflow/examples/android/res/drawable/border.xml
new file mode 100644
index 0000000000..dd1d64d1d6
--- /dev/null
+++ b/tensorflow/examples/android/res/drawable/border.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<shape xmlns:android="http://schemas.android.com/apk/res/android" android:shape="rectangle" >
+  <solid android:color="#00000000" />
+  <stroke android:width="1dip" android:color="#cccccc" />
+</shape>
diff --git a/tensorflow/examples/android/res/layout/activity_speech.xml b/tensorflow/examples/android/res/layout/activity_speech.xml
new file mode 100644
index 0000000000..2fe1338da5
--- /dev/null
+++ b/tensorflow/examples/android/res/layout/activity_speech.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<FrameLayout
+    xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:app="http://schemas.android.com/apk/res-auto"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    tools:context="org.tensorflow.demo.SpeechActivity">
+
+    <TextView
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:text="Say one of the words below!"
+        android:id="@+id/textView"
+        android:textAlignment="center"
+        android:layout_gravity="top"
+        android:textSize="24dp"
+        android:layout_marginTop="10dp"
+        android:layout_marginLeft="10dp"
+        />
+
+    <ListView
+        android:id="@+id/list_view"
+        android:layout_width="240dp"
+        android:layout_height="wrap_content"
+        android:background="@drawable/border"
+        android:layout_gravity="top|center_horizontal"
+        android:textAlignment="center"
+        android:layout_marginTop="100dp"
+        />
+
+    <Button
+        android:id="@+id/quit"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:text="Quit"
+        android:layout_gravity="bottom|center_horizontal"
+        android:layout_marginBottom="10dp"
+        />
+
+</FrameLayout>
diff --git a/tensorflow/examples/android/res/layout/list_text_item.xml b/tensorflow/examples/android/res/layout/list_text_item.xml
new file mode 100644
index 0000000000..526017fbb2
--- /dev/null
+++ b/tensorflow/examples/android/res/layout/list_text_item.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+ Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<TextView
+    xmlns:android="http://schemas.android.com/apk/res/android"
+    android:id="@+id/list_text_item"
+    android:layout_width="match_parent"
+    android:layout_height="wrap_content"
+    android:text="TextView"
+    android:textSize="24dp"
+    android:textAlignment="center"
+    android:gravity="center_horizontal"
+    />
diff --git a/tensorflow/examples/android/res/values/base-strings.xml b/tensorflow/examples/android/res/values/base-strings.xml
index 56edb55def..81d144a135 100644
--- a/tensorflow/examples/android/res/values/base-strings.xml
+++ b/tensorflow/examples/android/res/values/base-strings.xml
@@ -20,4 +20,5 @@
     <string name="activity_name_classification">TF Classify</string>
     <string name="activity_name_detection">TF Detect</string>
     <string name="activity_name_stylize">TF Stylize</string>
+    <string name="activity_name_speech">TF Speech</string>
 </resources>
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java
new file mode 100644
index 0000000000..9e91aea7ef
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.tensorflow.demo;
+
+import android.util.Log;
+import android.util.Pair;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
+
+/** Reads in results from an instantaneous audio recognition model and smoothes them over time. */
+public class RecognizeCommands {
+  // Configuration settings.
+  private List<String> labels = new ArrayList<String>();
+  private long averageWindowDurationMs;
+  private float detectionThreshold;
+  private int suppressionMs;
+  private int minimumCount;
+  private long minimumTimeBetweenSamplesMs;
+
+  // Working variables.
+  private Deque<Pair<Long, float[]>> previousResults = new ArrayDeque<Pair<Long, float[]>>();
+  private String previousTopLabel;
+  private int labelsCount;
+  private long previousTopLabelTime;
+  private float previousTopLabelScore;
+
+  private static final String SILENCE_LABEL = "_silence_";
+  private static final long MINIMUM_TIME_FRACTION = 4;
+
+  public RecognizeCommands(
+      List<String> inLabels,
+      long inAverageWindowDurationMs,
+      float inDetectionThreshold,
+      int inSuppressionMS,
+      int inMinimumCount,
+      long inMinimumTimeBetweenSamplesMS) {
+    labels = inLabels;
+    averageWindowDurationMs = inAverageWindowDurationMs;
+    detectionThreshold = inDetectionThreshold;
+    suppressionMs = inSuppressionMS;
+    minimumCount = inMinimumCount;
+    labelsCount = inLabels.size();
+    previousTopLabel = SILENCE_LABEL;
+    previousTopLabelTime = Long.MIN_VALUE;
+    previousTopLabelScore = 0.0f;
+    minimumTimeBetweenSamplesMs = inMinimumTimeBetweenSamplesMS;
+  }
+
+  /** Holds information about what's been recognized. */
+  public static class RecognitionResult {
+    public final String foundCommand;
+    public final float score;
+    public final boolean isNewCommand;
+
+    public RecognitionResult(String inFoundCommand, float inScore, boolean inIsNewCommand) {
+      foundCommand = inFoundCommand;
+      score = inScore;
+      isNewCommand = inIsNewCommand;
+    }
+  }
+
+  private static class ScoreForSorting implements Comparable<ScoreForSorting> {
+    public final float score;
+    public final int index;
+
+    public ScoreForSorting(float inScore, int inIndex) {
+      score = inScore;
+      index = inIndex;
+    }
+
+    @Override
+    public int compareTo(ScoreForSorting other) {
+      if (this.score > other.score) {
+        return -1;
+      } else if (this.score < other.score) {
+        return 1;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  public RecognitionResult processLatestResults(float[] currentResults, long currentTimeMS) {
+    if (currentResults.length != labelsCount) {
+      throw new RuntimeException(
+          "The results for recognition should contain "
+              + labelsCount
+              + " elements, but there are "
+              + currentResults.length);
+    }
+
+    if ((!previousResults.isEmpty()) && (currentTimeMS < previousResults.getFirst().first)) {
+      throw new RuntimeException(
+          "You must feed results in increasing time order, but received a timestamp of "
+              + currentTimeMS
+              + " that was earlier than the previous one of "
+              + previousResults.getFirst().first);
+    }
+
+    final int howManyResults = previousResults.size();
+    // Ignore any results that are coming in too frequently.
+    if (howManyResults > 1) {
+      final long timeSinceMostRecent = currentTimeMS - previousResults.getLast().first;
+      if (timeSinceMostRecent < minimumTimeBetweenSamplesMs) {
+        return new RecognitionResult(previousTopLabel, previousTopLabelScore, false);
+      }
+    }
+
+    // Add the latest results to the head of the queue.
+    previousResults.addLast(new Pair<Long, float[]>(currentTimeMS, currentResults));
+
+    // Prune any earlier results that are too old for the averaging window.
+    final long timeLimit = currentTimeMS - averageWindowDurationMs;
+    while (previousResults.getFirst().first < timeLimit) {
+      previousResults.removeFirst();
+    }
+
+    // If there are too few results, assume the result will be unreliable and
+    // bail.
+    final long earliestTime = previousResults.getFirst().first;
+    final long samplesDuration = currentTimeMS - earliestTime;
+    if ((howManyResults < minimumCount)
+        || (samplesDuration < (averageWindowDurationMs / MINIMUM_TIME_FRACTION))) {
+      Log.v("RecognizeResult", "Too few results");
+      return new RecognitionResult(previousTopLabel, 0.0f, false);
+    }
+
+    // Calculate the average score across all the results in the window.
+    float[] averageScores = new float[labelsCount];
+    for (Pair<Long, float[]> previousResult : previousResults) {
+      final float[] scoresTensor = previousResult.second;
+      int i = 0;
+      while (i < scoresTensor.length) {
+        averageScores[i] += scoresTensor[i] / howManyResults;
+        ++i;
+      }
+    }
+
+    // Sort the averaged results in descending score order.
+    ScoreForSorting[] sortedAverageScores = new ScoreForSorting[labelsCount];
+    for (int i = 0; i < labelsCount; ++i) {
+      sortedAverageScores[i] = new ScoreForSorting(averageScores[i], i);
+    }
+    Arrays.sort(sortedAverageScores);
+
+    // See if the latest top score is enough to trigger a detection.
+    final int currentTopIndex = sortedAverageScores[0].index;
+    final String currentTopLabel = labels.get(currentTopIndex);
+    final float currentTopScore = sortedAverageScores[0].score;
+    // If we've recently had another label trigger, assume one that occurs too
+    // soon afterwards is a bad result.
+    long timeSinceLastTop;
+    if (previousTopLabel.equals(SILENCE_LABEL) || (previousTopLabelTime == Long.MIN_VALUE)) {
+      timeSinceLastTop = Long.MAX_VALUE;
+    } else {
+      timeSinceLastTop = currentTimeMS - previousTopLabelTime;
+    }
+    boolean isNewCommand;
+    if ((currentTopScore > detectionThreshold) && (timeSinceLastTop > suppressionMs)) {
+      previousTopLabel = currentTopLabel;
+      previousTopLabelTime = currentTimeMS;
+      previousTopLabelScore = currentTopScore;
+      isNewCommand = true;
+    } else {
+      isNewCommand = false;
+    }
+    return new RecognitionResult(currentTopLabel, currentTopScore, isNewCommand);
+  }
+}
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java
new file mode 100644
index 0000000000..eb4dc69d63
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Demonstrates how to run an audio recognition model in Android.
+
+This example loads a simple speech recognition model trained by the tutorial at
+https://www.tensorflow.org/tutorials/audio_training
+
+The model files should be downloaded automatically from the TensorFlow website,
+but if you have a custom model you can update the LABEL_FILENAME and
+MODEL_FILENAME constants to point to your own files.
+
+The example application displays a list view with all of the known audio labels,
+and highlights each one when it thinks it has detected one through the
+microphone. The averaging of results to give a more reliable signal happens in
+the RecognizeCommands helper class.
+*/
+
+package org.tensorflow.demo;
+
+import android.animation.ValueAnimator;
+import android.app.Activity;
+import android.content.pm.PackageManager;
+import android.media.AudioFormat;
+import android.media.AudioRecord;
+import android.media.MediaRecorder;
+import android.os.Bundle;
+import android.util.Log;
+import android.view.View;
+import android.widget.ArrayAdapter;
+import android.widget.Button;
+import android.widget.ListView;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.locks.ReentrantLock;
+import org.tensorflow.contrib.android.TensorFlowInferenceInterface;
+import org.tensorflow.demo.R;
+
+/**
+ * An activity that listens for audio and then uses a TensorFlow model to detect particular classes,
+ * by default a small set of action words.
+ */
+public class SpeechActivity extends Activity {
+
+  // Constants that control the behavior of the recognition code and model
+  // settings. See the audio recognition tutorial for a detailed explanation of
+  // all these, but you should customize them to match your training settings if
+  // you are running your own model.
+  private static final int SAMPLE_RATE = 16000;
+  private static final int SAMPLE_DURATION_MS = 1000;
+  private static final int RECORDING_LENGTH = (int) (SAMPLE_RATE * SAMPLE_DURATION_MS / 1000);
+  private static final long AVERAGE_WINDOW_DURATION_MS = 500;
+  private static final float DETECTION_THRESHOLD = 0.70f;
+  private static final int SUPPRESSION_MS = 1500;
+  private static final int MINIMUM_COUNT = 3;
+  private static final long MINIMUM_TIME_BETWEEN_SAMPLES_MS = 30;
+  private static final String LABEL_FILENAME = "file:///android_asset/conv_actions_labels.txt";
+  private static final String MODEL_FILENAME = "file:///android_asset/conv_actions_frozen.pb";
+  private static final String INPUT_DATA_NAME = "decoded_sample_data:0";
+  private static final String SAMPLE_RATE_NAME = "decoded_sample_data:1";
+  private static final String OUTPUT_SCORES_NAME = "labels_softmax";
+
+  // UI elements.
+  private static final int REQUEST_RECORD_AUDIO = 13;
+  private Button quitButton;
+  private ListView labelsListView;
+  private static final String LOG_TAG = SpeechActivity.class.getSimpleName();
+
+  // Working variables.
+  short[] recordingBuffer = new short[RECORDING_LENGTH];
+  int recordingOffset = 0;
+  boolean shouldContinue = true;
+  private Thread recordingThread;
+  boolean shouldContinueRecognition = true;
+  private Thread recognitionThread;
+  private final ReentrantLock recordingBufferLock = new ReentrantLock();
+  private TensorFlowInferenceInterface inferenceInterface;
+  private List<String> labels = new ArrayList<String>();
+  private List<String> displayedLabels = new ArrayList<>();
+  private RecognizeCommands recognizeCommands = null;
+
+  @Override
+  protected void onCreate(Bundle savedInstanceState) {
+    // Set up the UI.
+    super.onCreate(savedInstanceState);
+    setContentView(R.layout.activity_speech);
+    quitButton = (Button) findViewById(R.id.quit);
+    quitButton.setOnClickListener(
+        new View.OnClickListener() {
+          @Override
+          public void onClick(View view) {
+            moveTaskToBack(true);
+            android.os.Process.killProcess(android.os.Process.myPid());
+            System.exit(1);
+          }
+        });
+    labelsListView = (ListView) findViewById(R.id.list_view);
+
+    // Load the labels for the model, but only display those that don't start
+    // with an underscore.
+    String actualFilename = LABEL_FILENAME.split("file:///android_asset/")[1];
+    Log.i(LOG_TAG, "Reading labels from: " + actualFilename);
+    BufferedReader br = null;
+    try {
+      br = new BufferedReader(new InputStreamReader(getAssets().open(actualFilename)));
+      String line;
+      while ((line = br.readLine()) != null) {
+        labels.add(line);
+        if (line.charAt(0) != '_') {
+          displayedLabels.add(line.substring(0, 1).toUpperCase() + line.substring(1));
+        }
+      }
+      br.close();
+    } catch (IOException e) {
+      throw new RuntimeException("Problem reading label file!", e);
+    }
+
+    // Build a list view based on these labels.
+    ArrayAdapter<String> arrayAdapter =
+        new ArrayAdapter<String>(this, R.layout.list_text_item, displayedLabels);
+    labelsListView.setAdapter(arrayAdapter);
+
+    // Set up an object to smooth recognition results to increase accuracy.
+    recognizeCommands =
+        new RecognizeCommands(
+            labels,
+            AVERAGE_WINDOW_DURATION_MS,
+            DETECTION_THRESHOLD,
+            SUPPRESSION_MS,
+            MINIMUM_COUNT,
+            MINIMUM_TIME_BETWEEN_SAMPLES_MS);
+
+    // Load the TensorFlow model.
+    inferenceInterface = new TensorFlowInferenceInterface(getAssets(), MODEL_FILENAME);
+
+    // Start the recording and recognition threads.
+    requestMicrophonePermission();
+    startRecognition();
+  }
+
+  private void requestMicrophonePermission() {
+    requestPermissions(
+        new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO);
+  }
+
+  @Override
+  public void onRequestPermissionsResult(
+      int requestCode, String[] permissions, int[] grantResults) {
+    if (requestCode == REQUEST_RECORD_AUDIO
+        && grantResults.length > 0
+        && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
+      startRecording();
+      startRecognition();
+    }
+  }
+
+  public synchronized void startRecording() {
+    if (recordingThread != null) {
+      return;
+    }
+    shouldContinue = true;
+    recordingThread =
+        new Thread(
+            new Runnable() {
+              @Override
+              public void run() {
+                record();
+              }
+            });
+    recordingThread.start();
+  }
+
+  public synchronized void stopRecording() {
+    if (recordingThread == null) {
+      return;
+    }
+    shouldContinue = false;
+    recordingThread = null;
+  }
+
+  private void record() {
+    android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
+
+    // Estimate the buffer size we'll need for this device.
+    int bufferSize =
+        AudioRecord.getMinBufferSize(
+            SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT);
+    if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) {
+      bufferSize = SAMPLE_RATE * 2;
+    }
+    short[] audioBuffer = new short[bufferSize / 2];
+
+    AudioRecord record =
+        new AudioRecord(
+            MediaRecorder.AudioSource.DEFAULT,
+            SAMPLE_RATE,
+            AudioFormat.CHANNEL_IN_MONO,
+            AudioFormat.ENCODING_PCM_16BIT,
+            bufferSize);
+
+    if (record.getState() != AudioRecord.STATE_INITIALIZED) {
+      Log.e(LOG_TAG, "Audio Record can't initialize!");
+      return;
+    }
+
+    record.startRecording();
+
+    Log.v(LOG_TAG, "Start recording");
+
+    // Loop, gathering audio data and copying it to a round-robin buffer.
+    while (shouldContinue) {
+      int numberRead = record.read(audioBuffer, 0, audioBuffer.length);
+      int maxLength = recordingBuffer.length;
+      int newRecordingOffset = recordingOffset + numberRead;
+      int secondCopyLength = Math.max(0, newRecordingOffset - maxLength);
+      int firstCopyLength = numberRead - secondCopyLength;
+      // We store off all the data for the recognition thread to access. The ML
+      // thread will copy out of this buffer into its own, while holding the
+      // lock, so this should be thread safe.
+      recordingBufferLock.lock();
+      try {
+        System.arraycopy(audioBuffer, 0, recordingBuffer, recordingOffset, firstCopyLength);
+        System.arraycopy(audioBuffer, firstCopyLength, recordingBuffer, 0, secondCopyLength);
+        recordingOffset = newRecordingOffset % maxLength;
+      } finally {
+        recordingBufferLock.unlock();
+      }
+    }
+
+    record.stop();
+    record.release();
+  }
+
+  public synchronized void startRecognition() {
+    if (recognitionThread != null) {
+      return;
+    }
+    shouldContinueRecognition = true;
+    recognitionThread =
+        new Thread(
+            new Runnable() {
+              @Override
+              public void run() {
+                recognize();
+              }
+            });
+    recognitionThread.start();
+  }
+
+  public synchronized void stopRecognition() {
+    if (recognitionThread == null) {
+      return;
+    }
+    shouldContinueRecognition = false;
+    recognitionThread = null;
+  }
+
+  private void recognize() {
+    Log.v(LOG_TAG, "Start recognition");
+
+    short[] inputBuffer = new short[RECORDING_LENGTH];
+    float[] floatInputBuffer = new float[RECORDING_LENGTH];
+    float[] outputScores = new float[labels.size()];
+    String[] outputScoresNames = new String[] {OUTPUT_SCORES_NAME};
+    int[] sampleRateList = new int[] {SAMPLE_RATE};
+
+    // Loop, grabbing recorded data and running the recognition model on it.
+    while (shouldContinueRecognition) {
+      // The recording thread places data in this round-robin buffer, so lock to
+      // make sure there's no writing happening and then copy it to our own
+      // local version.
+      recordingBufferLock.lock();
+      try {
+        int maxLength = recordingBuffer.length;
+        int firstCopyLength = maxLength - recordingOffset;
+        int secondCopyLength = recordingOffset;
+        System.arraycopy(recordingBuffer, recordingOffset, inputBuffer, 0, firstCopyLength);
+        System.arraycopy(recordingBuffer, 0, inputBuffer, firstCopyLength, secondCopyLength);
+      } finally {
+        recordingBufferLock.unlock();
+      }
+
+      // We need to feed in float values between -1.0f and 1.0f, so divide the
+      // signed 16-bit inputs.
+      for (int i = 0; i < RECORDING_LENGTH; ++i) {
+        floatInputBuffer[i] = inputBuffer[i] / 32767.0f;
+      }
+
+      // Run the model.
+      inferenceInterface.feed(SAMPLE_RATE_NAME, sampleRateList);
+      inferenceInterface.feed(INPUT_DATA_NAME, floatInputBuffer, RECORDING_LENGTH, 1);
+      inferenceInterface.run(outputScoresNames);
+      inferenceInterface.fetch(OUTPUT_SCORES_NAME, outputScores);
+
+      // Use the smoother to figure out if we've had a real recognition event.
+      long currentTime = System.currentTimeMillis();
+      final RecognizeCommands.RecognitionResult result =
+          recognizeCommands.processLatestResults(outputScores, currentTime);
+
+      runOnUiThread(
+          new Runnable() {
+            @Override
+            public void run() {
+              // If we do have a new command, highlight the right list entry.
+              if (!result.foundCommand.startsWith("_") && result.isNewCommand) {
+                int labelIndex = -1;
+                for (int i = 0; i < labels.size(); ++i) {
+                  if (labels.get(i).equals(result.foundCommand)) {
+                    labelIndex = i;
+                  }
+                }
+                final View labelView = (View) labelsListView.getChildAt(labelIndex - 2);
+                ValueAnimator colorAnimation =
+                     ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff);
+                colorAnimation.setDuration(750);
+                colorAnimation.addUpdateListener(
+                    new ValueAnimator.AnimatorUpdateListener() {
+                      @Override
+                      public void onAnimationUpdate(ValueAnimator animator) {
+                        labelView.setBackgroundColor((int) animator.getAnimatedValue());
+                      }
+                    });
+                colorAnimation.start();
+              }
+            }
+          });
+      try {
+        // We don't need to run too frequently, so snooze for a bit.
+        Thread.sleep(MINIMUM_TIME_BETWEEN_SAMPLES_MS);
+      } catch (InterruptedException e) {
+        // Ignore
+      }
+    }
+
+    Log.v(LOG_TAG, "End recognition");
+  }
+}
author	Pete Warden <petewarden@google.com>	2017-08-16 15:16:54 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-08-16 15:23:16 -0700
commit	bf2365e7cbcb54bc06baaae5c6d3fef7236626c4 (patch)
tree	16e2301cc7e6e1a747b2371bc226c49f22d60c75 /tensorflow/examples/android
parent	89617e72e7cb103dfefc6a627fc78d0314c5eb9f (diff)