diff options
author | Pete Warden <petewarden@google.com> | 2017-08-16 15:16:54 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-16 15:23:16 -0700 |
commit | bf2365e7cbcb54bc06baaae5c6d3fef7236626c4 (patch) | |
tree | 16e2301cc7e6e1a747b2371bc226c49f22d60c75 /tensorflow/examples/android | |
parent | 89617e72e7cb103dfefc6a627fc78d0314c5eb9f (diff) |
Android demo app for speech recognition
PiperOrigin-RevId: 165504820
Diffstat (limited to 'tensorflow/examples/android')
10 files changed, 714 insertions, 57 deletions
diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index 9f229d8b9d..bb75431a1f 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -22,6 +22,7 @@ <uses-feature android:name="android.hardware.camera" /> <uses-feature android:name="android.hardware.camera.autofocus" /> <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/> + <uses-permission android:name="android.permission.RECORD_AUDIO" /> <uses-sdk android:minSdkVersion="21" @@ -59,6 +60,15 @@ <category android:name="android.intent.category.LAUNCHER" /> </intent-filter> </activity> + + <activity android:name="org.tensorflow.demo.SpeechActivity" + android:screenOrientation="portrait" + android:label="@string/activity_name_speech"> + <intent-filter> + <action android:name="android.intent.action.MAIN" /> + <category android:name="android.intent.category.LAUNCHER" /> + </intent-filter> + </activity> </application> </manifest> diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index 2d3b0911fc..2347e6b023 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -93,6 +93,7 @@ filegroup( srcs = [ "@inception5h//:model_files", "@mobile_ssd//:model_files", + "@speech_commands//:model_files", "@stylize//:model_files", ], ) diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md index f9881287cd..883f8e664f 100644 --- a/tensorflow/examples/android/README.md +++ b/tensorflow/examples/android/README.md @@ -8,10 +8,11 @@ devices. The demos in this folder are designed to give straightforward samples of using TensorFlow in mobile applications. -Inference is done using the [TensorFlow Android Inference Interface](../../../tensorflow/contrib/android), -which may be built separately if you want a standalone library to drop into your -existing application. Object tracking and efficient YUV -> RGB conversion are -handled by `libtensorflow_demo.so`. +Inference is done using the [TensorFlow Android Inference +Interface](../../../tensorflow/contrib/android), which may be built separately +if you want a standalone library to drop into your existing application. Object +tracking and efficient YUV -> RGB conversion are handled by +`libtensorflow_demo.so`. A device running Android 5.0 (API 21) or higher is required to run the demo due to the use of the camera2 API, although the native libraries themselves can run @@ -33,6 +34,12 @@ on API >= 14 devices. Uses a model based on [A Learned Representation For Artistic Style](https://arxiv.org/abs/1610.07629) to restyle the camera preview image to that of a number of different artists. +4. [TF + Speech](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java): + Runs a simple speech recognition model built by the [audio training + tutorial](https://www.tensorflow.org/tutorials/image_retraining). Listens + for a small set of words, and highlights them in the UI when they are + recognized. <img src="sample_images/classify1.jpg" width="30%"><img src="sample_images/stylize1.jpg" width="30%"><img src="sample_images/detect1.jpg" width="30%"> @@ -51,20 +58,22 @@ for more details. ## Running the Demo -Once the app is installed it can be started via the "TF Classify", "TF Detect" -and "TF Stylize" icons, which have the orange TensorFlow logo as their icon. +Once the app is installed it can be started via the "TF Classify", "TF Detect", +"TF Stylize", and "TF Speech" icons, which have the orange TensorFlow logo as +their icon. While running the activities, pressing the volume keys on your device will -toggle debug visualizations on/off, rendering additional info to the screen -that may be useful for development purposes. +toggle debug visualizations on/off, rendering additional info to the screen that +may be useful for development purposes. ## Building in Android Studio using the TensorFlow AAR from JCenter The simplest way to compile the demo app yourself, and try out changes to the -project code is to use AndroidStudio. Simply set this `android` directory as the project root. +project code is to use AndroidStudio. Simply set this `android` directory as the +project root. -Then edit the `build.gradle` file and change the value of `nativeBuildSystem` -to `'none'` so that the project is built in the simplest way possible: +Then edit the `build.gradle` file and change the value of `nativeBuildSystem` to +`'none'` so that the project is built in the simplest way possible: ```None def nativeBuildSystem = 'none' @@ -77,8 +86,8 @@ Note: Currently, in this build mode, YUV -> RGB is done using a less efficient Java implementation, and object tracking is not available in the "TF Detect" activity. Setting the build system to `'cmake'` currently only builds `libtensorflow_demo.so`, which provides fast YUV -> RGB conversion and object -tracking, while still acquiring TensorFlow support via the downloaded AAR, so -it may be a lightweight way to enable these features. +tracking, while still acquiring TensorFlow support via the downloaded AAR, so it +may be a lightweight way to enable these features. For any project that does not include custom low level TensorFlow code, this is likely sufficient. @@ -104,50 +113,51 @@ protobuf compilation. NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime we suggest -that Windows users download the -[prebuilt binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) -instead. +that Windows users download the [prebuilt +binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) instead. ##### Install Bazel and Android Prerequisites -Bazel is the primary build system for TensorFlow. To build with Bazel, -it and the Android NDK and SDK must be installed on your system. - -1. Install the latest version of Bazel as per the instructions [on the Bazel website](https://bazel.build/versions/master/docs/install.html). -2. The Android NDK is required to build the native (C/C++) TensorFlow code. - The current recommended version is 12b, which may be found - [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads). -3. The Android SDK and build tools may be obtained - [here](https://developer.android.com/tools/revisions/build-tools.html), - or alternatively as part of - [Android Studio](https://developer.android.com/studio/index.html). Build - tools API >= 23 is required to build the TF Android demo (though it will - run on API >= 21 devices). +Bazel is the primary build system for TensorFlow. To build with Bazel, it and +the Android NDK and SDK must be installed on your system. + +1. Install the latest version of Bazel as per the instructions [on the Bazel + website](https://bazel.build/versions/master/docs/install.html). +2. The Android NDK is required to build the native (C/C++) TensorFlow code. The + current recommended version is 12b, which may be found + [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads). +3. The Android SDK and build tools may be obtained + [here](https://developer.android.com/tools/revisions/build-tools.html), or + alternatively as part of [Android + Studio](https://developer.android.com/studio/index.html). Build tools API >= + 23 is required to build the TF Android demo (though it will run on API >= 21 + devices). ##### Edit WORKSPACE -The Android entries in [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) -must be uncommented with the paths filled in appropriately depending on where -you installed the NDK and SDK. Otherwise an error such as: -"The external label '//external:android/sdk' is not bound to anything" will -be reported. +The Android entries in +[`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented +with the paths filled in appropriately depending on where you installed the NDK +and SDK. Otherwise an error such as: "The external label +'//external:android/sdk' is not bound to anything" will be reported. -Also edit the API levels for the SDK in WORKSPACE to the highest level you -have installed in your SDK. This must be >= 23 (this is completely independent -of the API level of the demo, which is defined in AndroidManifest.xml). -The NDK API level may remain at 14. +Also edit the API levels for the SDK in WORKSPACE to the highest level you have +installed in your SDK. This must be >= 23 (this is completely independent of the +API level of the demo, which is defined in AndroidManifest.xml). The NDK API +level may remain at 14. ##### Install Model Files (optional) -The TensorFlow `GraphDef`s that contain the model definitions and weights -are not packaged in the repo because of their size. They are downloaded +The TensorFlow `GraphDef`s that contain the model definitions and weights are +not packaged in the repo because of their size. They are downloaded automatically and packaged with the APK by Bazel via a new_http_archive defined -in `WORKSPACE` during the build process, and by Gradle via download-models.gradle. +in `WORKSPACE` during the build process, and by Gradle via +download-models.gradle. -**Optional**: If you wish to place the models in your assets manually, -remove all of the `model_files` entries from the `assets` -list in `tensorflow_demo` found in the `[BUILD](BUILD)` file. Then download -and extract the archives yourself to the `assets` directory in the source tree: +**Optional**: If you wish to place the models in your assets manually, remove +all of the `model_files` entries from the `assets` list in `tensorflow_demo` +found in the `[BUILD](BUILD)` file. Then download and extract the archives +yourself to the `assets` directory in the source tree: ```bash BASE_URL=https://storage.googleapis.com/download.tensorflow.org/models @@ -162,27 +172,23 @@ This will extract the models and their associated metadata files to the local assets/ directory. If you are using Gradle, make sure to remove download-models.gradle reference -from build.gradle after your manually download models; otherwise gradle -might download models again and overwrite your models. +from build.gradle after your manually download models; otherwise gradle might +download models again and overwrite your models. ##### Build -After editing your WORKSPACE file to update the SDK/NDK configuration, -you may build the APK. Run this from your workspace root: +After editing your WORKSPACE file to update the SDK/NDK configuration, you may +build the APK. Run this from your workspace root: ```bash bazel build -c opt //tensorflow/examples/android:tensorflow_demo ``` -If you get build errors about protocol buffers, run -`git submodule update --init` and make sure that you've modified your WORKSPACE -file as instructed, then try building again. - ##### Install -Make sure that adb debugging is enabled on your Android 5.0 (API 21) or -later device, then after building use the following command from your workspace -root to install the APK: +Make sure that adb debugging is enabled on your Android 5.0 (API 21) or later +device, then after building use the following command from your workspace root +to install the APK: ```bash adb install -r bazel-bin/tensorflow/examples/android/tensorflow_demo.apk diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle index a19ca36d7f..0e2cf65f53 100644 --- a/tensorflow/examples/android/download-models.gradle +++ b/tensorflow/examples/android/download-models.gradle @@ -11,7 +11,8 @@ // LINT.IfChange def models = ['inception5h.zip', 'object_detection/ssd_mobilenet_v1_android_export.zip', - 'stylize_v1.zip'] + 'stylize_v1.zip', + 'speech_commands_conv_actions.zip'] // LINT.ThenChange(//tensorflow/examples/android/BUILD) // Root URL for model archives diff --git a/tensorflow/examples/android/res/drawable/border.xml b/tensorflow/examples/android/res/drawable/border.xml new file mode 100644 index 0000000000..dd1d64d1d6 --- /dev/null +++ b/tensorflow/examples/android/res/drawable/border.xml @@ -0,0 +1,19 @@ +<?xml version="1.0" encoding="utf-8"?><!-- + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<shape xmlns:android="http://schemas.android.com/apk/res/android" android:shape="rectangle" > + <solid android:color="#00000000" /> + <stroke android:width="1dip" android:color="#cccccc" /> +</shape> diff --git a/tensorflow/examples/android/res/layout/activity_speech.xml b/tensorflow/examples/android/res/layout/activity_speech.xml new file mode 100644 index 0000000000..2fe1338da5 --- /dev/null +++ b/tensorflow/examples/android/res/layout/activity_speech.xml @@ -0,0 +1,55 @@ +<?xml version="1.0" encoding="utf-8"?><!-- + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<FrameLayout + xmlns:android="http://schemas.android.com/apk/res/android" + xmlns:app="http://schemas.android.com/apk/res-auto" + xmlns:tools="http://schemas.android.com/tools" + android:layout_width="match_parent" + android:layout_height="match_parent" + tools:context="org.tensorflow.demo.SpeechActivity"> + + <TextView + android:layout_width="wrap_content" + android:layout_height="wrap_content" + android:text="Say one of the words below!" + android:id="@+id/textView" + android:textAlignment="center" + android:layout_gravity="top" + android:textSize="24dp" + android:layout_marginTop="10dp" + android:layout_marginLeft="10dp" + /> + + <ListView + android:id="@+id/list_view" + android:layout_width="240dp" + android:layout_height="wrap_content" + android:background="@drawable/border" + android:layout_gravity="top|center_horizontal" + android:textAlignment="center" + android:layout_marginTop="100dp" + /> + + <Button + android:id="@+id/quit" + android:layout_width="wrap_content" + android:layout_height="wrap_content" + android:text="Quit" + android:layout_gravity="bottom|center_horizontal" + android:layout_marginBottom="10dp" + /> + +</FrameLayout> diff --git a/tensorflow/examples/android/res/layout/list_text_item.xml b/tensorflow/examples/android/res/layout/list_text_item.xml new file mode 100644 index 0000000000..526017fbb2 --- /dev/null +++ b/tensorflow/examples/android/res/layout/list_text_item.xml @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="utf-8"?><!-- + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<TextView + xmlns:android="http://schemas.android.com/apk/res/android" + android:id="@+id/list_text_item" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:text="TextView" + android:textSize="24dp" + android:textAlignment="center" + android:gravity="center_horizontal" + /> diff --git a/tensorflow/examples/android/res/values/base-strings.xml b/tensorflow/examples/android/res/values/base-strings.xml index 56edb55def..81d144a135 100644 --- a/tensorflow/examples/android/res/values/base-strings.xml +++ b/tensorflow/examples/android/res/values/base-strings.xml @@ -20,4 +20,5 @@ <string name="activity_name_classification">TF Classify</string> <string name="activity_name_detection">TF Detect</string> <string name="activity_name_stylize">TF Stylize</string> + <string name="activity_name_speech">TF Speech</string> </resources> diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java new file mode 100644 index 0000000000..9e91aea7ef --- /dev/null +++ b/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java @@ -0,0 +1,186 @@ +/* + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.demo; + +import android.util.Log; +import android.util.Pair; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Deque; +import java.util.List; + +/** Reads in results from an instantaneous audio recognition model and smoothes them over time. */ +public class RecognizeCommands { + // Configuration settings. + private List<String> labels = new ArrayList<String>(); + private long averageWindowDurationMs; + private float detectionThreshold; + private int suppressionMs; + private int minimumCount; + private long minimumTimeBetweenSamplesMs; + + // Working variables. + private Deque<Pair<Long, float[]>> previousResults = new ArrayDeque<Pair<Long, float[]>>(); + private String previousTopLabel; + private int labelsCount; + private long previousTopLabelTime; + private float previousTopLabelScore; + + private static final String SILENCE_LABEL = "_silence_"; + private static final long MINIMUM_TIME_FRACTION = 4; + + public RecognizeCommands( + List<String> inLabels, + long inAverageWindowDurationMs, + float inDetectionThreshold, + int inSuppressionMS, + int inMinimumCount, + long inMinimumTimeBetweenSamplesMS) { + labels = inLabels; + averageWindowDurationMs = inAverageWindowDurationMs; + detectionThreshold = inDetectionThreshold; + suppressionMs = inSuppressionMS; + minimumCount = inMinimumCount; + labelsCount = inLabels.size(); + previousTopLabel = SILENCE_LABEL; + previousTopLabelTime = Long.MIN_VALUE; + previousTopLabelScore = 0.0f; + minimumTimeBetweenSamplesMs = inMinimumTimeBetweenSamplesMS; + } + + /** Holds information about what's been recognized. */ + public static class RecognitionResult { + public final String foundCommand; + public final float score; + public final boolean isNewCommand; + + public RecognitionResult(String inFoundCommand, float inScore, boolean inIsNewCommand) { + foundCommand = inFoundCommand; + score = inScore; + isNewCommand = inIsNewCommand; + } + } + + private static class ScoreForSorting implements Comparable<ScoreForSorting> { + public final float score; + public final int index; + + public ScoreForSorting(float inScore, int inIndex) { + score = inScore; + index = inIndex; + } + + @Override + public int compareTo(ScoreForSorting other) { + if (this.score > other.score) { + return -1; + } else if (this.score < other.score) { + return 1; + } else { + return 0; + } + } + } + + public RecognitionResult processLatestResults(float[] currentResults, long currentTimeMS) { + if (currentResults.length != labelsCount) { + throw new RuntimeException( + "The results for recognition should contain " + + labelsCount + + " elements, but there are " + + currentResults.length); + } + + if ((!previousResults.isEmpty()) && (currentTimeMS < previousResults.getFirst().first)) { + throw new RuntimeException( + "You must feed results in increasing time order, but received a timestamp of " + + currentTimeMS + + " that was earlier than the previous one of " + + previousResults.getFirst().first); + } + + final int howManyResults = previousResults.size(); + // Ignore any results that are coming in too frequently. + if (howManyResults > 1) { + final long timeSinceMostRecent = currentTimeMS - previousResults.getLast().first; + if (timeSinceMostRecent < minimumTimeBetweenSamplesMs) { + return new RecognitionResult(previousTopLabel, previousTopLabelScore, false); + } + } + + // Add the latest results to the head of the queue. + previousResults.addLast(new Pair<Long, float[]>(currentTimeMS, currentResults)); + + // Prune any earlier results that are too old for the averaging window. + final long timeLimit = currentTimeMS - averageWindowDurationMs; + while (previousResults.getFirst().first < timeLimit) { + previousResults.removeFirst(); + } + + // If there are too few results, assume the result will be unreliable and + // bail. + final long earliestTime = previousResults.getFirst().first; + final long samplesDuration = currentTimeMS - earliestTime; + if ((howManyResults < minimumCount) + || (samplesDuration < (averageWindowDurationMs / MINIMUM_TIME_FRACTION))) { + Log.v("RecognizeResult", "Too few results"); + return new RecognitionResult(previousTopLabel, 0.0f, false); + } + + // Calculate the average score across all the results in the window. + float[] averageScores = new float[labelsCount]; + for (Pair<Long, float[]> previousResult : previousResults) { + final float[] scoresTensor = previousResult.second; + int i = 0; + while (i < scoresTensor.length) { + averageScores[i] += scoresTensor[i] / howManyResults; + ++i; + } + } + + // Sort the averaged results in descending score order. + ScoreForSorting[] sortedAverageScores = new ScoreForSorting[labelsCount]; + for (int i = 0; i < labelsCount; ++i) { + sortedAverageScores[i] = new ScoreForSorting(averageScores[i], i); + } + Arrays.sort(sortedAverageScores); + + // See if the latest top score is enough to trigger a detection. + final int currentTopIndex = sortedAverageScores[0].index; + final String currentTopLabel = labels.get(currentTopIndex); + final float currentTopScore = sortedAverageScores[0].score; + // If we've recently had another label trigger, assume one that occurs too + // soon afterwards is a bad result. + long timeSinceLastTop; + if (previousTopLabel.equals(SILENCE_LABEL) || (previousTopLabelTime == Long.MIN_VALUE)) { + timeSinceLastTop = Long.MAX_VALUE; + } else { + timeSinceLastTop = currentTimeMS - previousTopLabelTime; + } + boolean isNewCommand; + if ((currentTopScore > detectionThreshold) && (timeSinceLastTop > suppressionMs)) { + previousTopLabel = currentTopLabel; + previousTopLabelTime = currentTimeMS; + previousTopLabelScore = currentTopScore; + isNewCommand = true; + } else { + isNewCommand = false; + } + return new RecognitionResult(currentTopLabel, currentTopScore, isNewCommand); + } +} diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java new file mode 100644 index 0000000000..eb4dc69d63 --- /dev/null +++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java @@ -0,0 +1,353 @@ +/* + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Demonstrates how to run an audio recognition model in Android. + +This example loads a simple speech recognition model trained by the tutorial at +https://www.tensorflow.org/tutorials/audio_training + +The model files should be downloaded automatically from the TensorFlow website, +but if you have a custom model you can update the LABEL_FILENAME and +MODEL_FILENAME constants to point to your own files. + +The example application displays a list view with all of the known audio labels, +and highlights each one when it thinks it has detected one through the +microphone. The averaging of results to give a more reliable signal happens in +the RecognizeCommands helper class. +*/ + +package org.tensorflow.demo; + +import android.animation.ValueAnimator; +import android.app.Activity; +import android.content.pm.PackageManager; +import android.media.AudioFormat; +import android.media.AudioRecord; +import android.media.MediaRecorder; +import android.os.Bundle; +import android.util.Log; +import android.view.View; +import android.widget.ArrayAdapter; +import android.widget.Button; +import android.widget.ListView; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.locks.ReentrantLock; +import org.tensorflow.contrib.android.TensorFlowInferenceInterface; +import org.tensorflow.demo.R; + +/** + * An activity that listens for audio and then uses a TensorFlow model to detect particular classes, + * by default a small set of action words. + */ +public class SpeechActivity extends Activity { + + // Constants that control the behavior of the recognition code and model + // settings. See the audio recognition tutorial for a detailed explanation of + // all these, but you should customize them to match your training settings if + // you are running your own model. + private static final int SAMPLE_RATE = 16000; + private static final int SAMPLE_DURATION_MS = 1000; + private static final int RECORDING_LENGTH = (int) (SAMPLE_RATE * SAMPLE_DURATION_MS / 1000); + private static final long AVERAGE_WINDOW_DURATION_MS = 500; + private static final float DETECTION_THRESHOLD = 0.70f; + private static final int SUPPRESSION_MS = 1500; + private static final int MINIMUM_COUNT = 3; + private static final long MINIMUM_TIME_BETWEEN_SAMPLES_MS = 30; + private static final String LABEL_FILENAME = "file:///android_asset/conv_actions_labels.txt"; + private static final String MODEL_FILENAME = "file:///android_asset/conv_actions_frozen.pb"; + private static final String INPUT_DATA_NAME = "decoded_sample_data:0"; + private static final String SAMPLE_RATE_NAME = "decoded_sample_data:1"; + private static final String OUTPUT_SCORES_NAME = "labels_softmax"; + + // UI elements. + private static final int REQUEST_RECORD_AUDIO = 13; + private Button quitButton; + private ListView labelsListView; + private static final String LOG_TAG = SpeechActivity.class.getSimpleName(); + + // Working variables. + short[] recordingBuffer = new short[RECORDING_LENGTH]; + int recordingOffset = 0; + boolean shouldContinue = true; + private Thread recordingThread; + boolean shouldContinueRecognition = true; + private Thread recognitionThread; + private final ReentrantLock recordingBufferLock = new ReentrantLock(); + private TensorFlowInferenceInterface inferenceInterface; + private List<String> labels = new ArrayList<String>(); + private List<String> displayedLabels = new ArrayList<>(); + private RecognizeCommands recognizeCommands = null; + + @Override + protected void onCreate(Bundle savedInstanceState) { + // Set up the UI. + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_speech); + quitButton = (Button) findViewById(R.id.quit); + quitButton.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View view) { + moveTaskToBack(true); + android.os.Process.killProcess(android.os.Process.myPid()); + System.exit(1); + } + }); + labelsListView = (ListView) findViewById(R.id.list_view); + + // Load the labels for the model, but only display those that don't start + // with an underscore. + String actualFilename = LABEL_FILENAME.split("file:///android_asset/")[1]; + Log.i(LOG_TAG, "Reading labels from: " + actualFilename); + BufferedReader br = null; + try { + br = new BufferedReader(new InputStreamReader(getAssets().open(actualFilename))); + String line; + while ((line = br.readLine()) != null) { + labels.add(line); + if (line.charAt(0) != '_') { + displayedLabels.add(line.substring(0, 1).toUpperCase() + line.substring(1)); + } + } + br.close(); + } catch (IOException e) { + throw new RuntimeException("Problem reading label file!", e); + } + + // Build a list view based on these labels. + ArrayAdapter<String> arrayAdapter = + new ArrayAdapter<String>(this, R.layout.list_text_item, displayedLabels); + labelsListView.setAdapter(arrayAdapter); + + // Set up an object to smooth recognition results to increase accuracy. + recognizeCommands = + new RecognizeCommands( + labels, + AVERAGE_WINDOW_DURATION_MS, + DETECTION_THRESHOLD, + SUPPRESSION_MS, + MINIMUM_COUNT, + MINIMUM_TIME_BETWEEN_SAMPLES_MS); + + // Load the TensorFlow model. + inferenceInterface = new TensorFlowInferenceInterface(getAssets(), MODEL_FILENAME); + + // Start the recording and recognition threads. + requestMicrophonePermission(); + startRecognition(); + } + + private void requestMicrophonePermission() { + requestPermissions( + new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO); + } + + @Override + public void onRequestPermissionsResult( + int requestCode, String[] permissions, int[] grantResults) { + if (requestCode == REQUEST_RECORD_AUDIO + && grantResults.length > 0 + && grantResults[0] == PackageManager.PERMISSION_GRANTED) { + startRecording(); + startRecognition(); + } + } + + public synchronized void startRecording() { + if (recordingThread != null) { + return; + } + shouldContinue = true; + recordingThread = + new Thread( + new Runnable() { + @Override + public void run() { + record(); + } + }); + recordingThread.start(); + } + + public synchronized void stopRecording() { + if (recordingThread == null) { + return; + } + shouldContinue = false; + recordingThread = null; + } + + private void record() { + android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO); + + // Estimate the buffer size we'll need for this device. + int bufferSize = + AudioRecord.getMinBufferSize( + SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT); + if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) { + bufferSize = SAMPLE_RATE * 2; + } + short[] audioBuffer = new short[bufferSize / 2]; + + AudioRecord record = + new AudioRecord( + MediaRecorder.AudioSource.DEFAULT, + SAMPLE_RATE, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + bufferSize); + + if (record.getState() != AudioRecord.STATE_INITIALIZED) { + Log.e(LOG_TAG, "Audio Record can't initialize!"); + return; + } + + record.startRecording(); + + Log.v(LOG_TAG, "Start recording"); + + // Loop, gathering audio data and copying it to a round-robin buffer. + while (shouldContinue) { + int numberRead = record.read(audioBuffer, 0, audioBuffer.length); + int maxLength = recordingBuffer.length; + int newRecordingOffset = recordingOffset + numberRead; + int secondCopyLength = Math.max(0, newRecordingOffset - maxLength); + int firstCopyLength = numberRead - secondCopyLength; + // We store off all the data for the recognition thread to access. The ML + // thread will copy out of this buffer into its own, while holding the + // lock, so this should be thread safe. + recordingBufferLock.lock(); + try { + System.arraycopy(audioBuffer, 0, recordingBuffer, recordingOffset, firstCopyLength); + System.arraycopy(audioBuffer, firstCopyLength, recordingBuffer, 0, secondCopyLength); + recordingOffset = newRecordingOffset % maxLength; + } finally { + recordingBufferLock.unlock(); + } + } + + record.stop(); + record.release(); + } + + public synchronized void startRecognition() { + if (recognitionThread != null) { + return; + } + shouldContinueRecognition = true; + recognitionThread = + new Thread( + new Runnable() { + @Override + public void run() { + recognize(); + } + }); + recognitionThread.start(); + } + + public synchronized void stopRecognition() { + if (recognitionThread == null) { + return; + } + shouldContinueRecognition = false; + recognitionThread = null; + } + + private void recognize() { + Log.v(LOG_TAG, "Start recognition"); + + short[] inputBuffer = new short[RECORDING_LENGTH]; + float[] floatInputBuffer = new float[RECORDING_LENGTH]; + float[] outputScores = new float[labels.size()]; + String[] outputScoresNames = new String[] {OUTPUT_SCORES_NAME}; + int[] sampleRateList = new int[] {SAMPLE_RATE}; + + // Loop, grabbing recorded data and running the recognition model on it. + while (shouldContinueRecognition) { + // The recording thread places data in this round-robin buffer, so lock to + // make sure there's no writing happening and then copy it to our own + // local version. + recordingBufferLock.lock(); + try { + int maxLength = recordingBuffer.length; + int firstCopyLength = maxLength - recordingOffset; + int secondCopyLength = recordingOffset; + System.arraycopy(recordingBuffer, recordingOffset, inputBuffer, 0, firstCopyLength); + System.arraycopy(recordingBuffer, 0, inputBuffer, firstCopyLength, secondCopyLength); + } finally { + recordingBufferLock.unlock(); + } + + // We need to feed in float values between -1.0f and 1.0f, so divide the + // signed 16-bit inputs. + for (int i = 0; i < RECORDING_LENGTH; ++i) { + floatInputBuffer[i] = inputBuffer[i] / 32767.0f; + } + + // Run the model. + inferenceInterface.feed(SAMPLE_RATE_NAME, sampleRateList); + inferenceInterface.feed(INPUT_DATA_NAME, floatInputBuffer, RECORDING_LENGTH, 1); + inferenceInterface.run(outputScoresNames); + inferenceInterface.fetch(OUTPUT_SCORES_NAME, outputScores); + + // Use the smoother to figure out if we've had a real recognition event. + long currentTime = System.currentTimeMillis(); + final RecognizeCommands.RecognitionResult result = + recognizeCommands.processLatestResults(outputScores, currentTime); + + runOnUiThread( + new Runnable() { + @Override + public void run() { + // If we do have a new command, highlight the right list entry. + if (!result.foundCommand.startsWith("_") && result.isNewCommand) { + int labelIndex = -1; + for (int i = 0; i < labels.size(); ++i) { + if (labels.get(i).equals(result.foundCommand)) { + labelIndex = i; + } + } + final View labelView = (View) labelsListView.getChildAt(labelIndex - 2); + ValueAnimator colorAnimation = + ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff); + colorAnimation.setDuration(750); + colorAnimation.addUpdateListener( + new ValueAnimator.AnimatorUpdateListener() { + @Override + public void onAnimationUpdate(ValueAnimator animator) { + labelView.setBackgroundColor((int) animator.getAnimatedValue()); + } + }); + colorAnimation.start(); + } + } + }); + try { + // We don't need to run too frequently, so snooze for a bit. + Thread.sleep(MINIMUM_TIME_BETWEEN_SAMPLES_MS); + } catch (InterruptedException e) { + // Ignore + } + } + + Log.v(LOG_TAG, "End recognition"); + } +} |