diff options
author | 2017-04-05 17:10:48 -0800 | |
---|---|---|
committer | 2017-04-05 18:24:52 -0700 | |
commit | a24c6b842d982de8a38ae5058ace91cb47ee3cef (patch) | |
tree | 6e7a909a5e8ccde22caa2d7b5f4a1a84dda3cdd2 /tensorflow/examples/wav_to_spectrogram | |
parent | 9d57702513001bfded19e72c76a986cce56d5f00 (diff) |
Add AudioSpectrogram op to TensorFlow for audio feature generation
Change: 152332221
Diffstat (limited to 'tensorflow/examples/wav_to_spectrogram')
6 files changed, 348 insertions, 0 deletions
diff --git a/tensorflow/examples/wav_to_spectrogram/BUILD b/tensorflow/examples/wav_to_spectrogram/BUILD new file mode 100644 index 0000000000..1e72324fb0 --- /dev/null +++ b/tensorflow/examples/wav_to_spectrogram/BUILD @@ -0,0 +1,68 @@ +# Description: +# TensorFlow C++ inference example for labeling images. + +package( + default_visibility = ["//tensorflow:internal"], + features = [ + "-layering_check", + "-parse_headers", + ], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "wav_to_spectrogram_lib", + srcs = [ + "wav_to_spectrogram.cc", + ], + hdrs = [ + "wav_to_spectrogram.h", + ], + deps = [ + "//tensorflow/cc:cc_ops", + "//tensorflow/core:framework_internal", + "//tensorflow/core:tensorflow", + ], +) + +cc_binary( + name = "wav_to_spectrogram", + srcs = [ + "main.cc", + ], + deps = [ + ":wav_to_spectrogram_lib", + "//tensorflow/core:framework_internal", + "//tensorflow/core:tensorflow", + ], +) + +cc_test( + name = "wav_to_spectrogram_test", + size = "medium", + srcs = ["wav_to_spectrogram_test.cc"], + deps = [ + ":wav_to_spectrogram_lib", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + "bin/**", + "gen/**", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/examples/wav_to_spectrogram/README.md b/tensorflow/examples/wav_to_spectrogram/README.md new file mode 100644 index 0000000000..7f7eb43700 --- /dev/null +++ b/tensorflow/examples/wav_to_spectrogram/README.md @@ -0,0 +1,49 @@ +# TensorFlow Spectrogram Example + +This example shows how you can load audio from a .wav file, convert it to a +spectrogram, and then save it out as a PNG image. A spectrogram is a +visualization of the frequencies in sound over time, and can be useful as a +feature for neural network recognition on noise or speech. + +## Building + +To build it, run this command: + +```bash +bazel build tensorflow/examples/wav_to_spectrogram/... +``` + +That should build a binary executable that you can then run like this: + +```bash +bazel-bin/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram +``` + +This uses a default test audio file that's part of the TensorFlow source code, +and writes out the image to the current directory as spectrogram.png. + +## Options + +To load your own audio, you need to supply a .wav file in LIN16 format, and use +the `--input_audio` flag to pass in the path. + +To control how the spectrogram is created, you can specify the `--window_size` +and `--stride` arguments, which control how wide the window used to estimate +frequencies is, and how widely adjacent windows are spaced. + +The `--output_image` flag sets the path to save the image file to. This is +always written out in PNG format, even if you specify a different file +extension. + +If your result seems too dark, try using the `--brightness` flag to make the +output image easier to see. + +Here's an example of how to use all of them together: + +```bash +bazel-bin/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram \ +--input_wav=/tmp/my_audio.wav \ +--window=1024 \ +--stride=512 \ +--output_image=/tmp/my_spectrogram.png +``` diff --git a/tensorflow/examples/wav_to_spectrogram/main.cc b/tensorflow/examples/wav_to_spectrogram/main.cc new file mode 100644 index 0000000000..539e6c4fe4 --- /dev/null +++ b/tensorflow/examples/wav_to_spectrogram/main.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h" + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/util/command_line_flags.h" + +int main(int argc, char* argv[]) { + // These are the command-line flags the program can understand. + // They define where the graph and input data is located, and what kind of + // input the model expects. If you train your own model, or use something + // other than inception_v3, then you'll need to update these. + tensorflow::string input_wav = + "tensorflow/core/kernels/spectrogram_test_data/short_test_segment.wav"; + tensorflow::int32 window_size = 256; + tensorflow::int32 stride = 128; + float brightness = 64.0f; + tensorflow::string output_image = "spectrogram.png"; + std::vector<tensorflow::Flag> flag_list = { + tensorflow::Flag("input_wav", &input_wav, "audio file to load"), + tensorflow::Flag("window_size", &window_size, + "frequency sample window width"), + tensorflow::Flag("stride", &stride, + "how far apart to place frequency windows"), + tensorflow::Flag("brightness", &brightness, + "controls how bright the output image is"), + tensorflow::Flag("output_image", &output_image, + "where to save the spectrogram image to"), + }; + tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << usage; + return -1; + } + + // We need to call this to set up global state for TensorFlow. + tensorflow::port::InitMain(argv[0], &argc, &argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return -1; + } + + tensorflow::Status wav_status = WavToSpectrogram( + input_wav, window_size, stride, brightness, output_image); + if (!wav_status.ok()) { + LOG(ERROR) << "WavToSpectrogram failed with " << wav_status; + return -1; + } + + return 0; +} diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc new file mode 100644 index 0000000000..c69a359637 --- /dev/null +++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc @@ -0,0 +1,97 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h" + +#include <vector> + +#include "tensorflow/cc/ops/audio_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/image_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/default_device.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/command_line_flags.h" + +using tensorflow::DT_FLOAT; +using tensorflow::DT_UINT8; +using tensorflow::Output; +using tensorflow::TensorShape; + +// Runs a TensorFlow graph to convert an audio file into a visualization. +tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav, + tensorflow::int32 window_size, + tensorflow::int32 stride, float brightness, + const tensorflow::string& output_image) { + auto root = tensorflow::Scope::NewRootScope(); + using namespace tensorflow::ops; // NOLINT(build/namespaces) + // The following block creates a TensorFlow graph that: + // - Reads and decodes the audio file into a tensor of float samples. + // - Creates a float spectrogram from those samples. + // - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's. + // - Reshapes the tensor so that it's [height, width, 1] for imaging. + // - Encodes it as a PNG stream and saves it out to a file. + Output file_reader = ReadFile(root.WithOpName("input_wav"), input_wav); + DecodeWav wav_decoder = + DecodeWav(root.WithOpName("wav_decoder"), file_reader); + Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"), + wav_decoder.audio, window_size, stride); + Output brightness_placeholder = + Placeholder(root.WithOpName("brightness_placeholder"), DT_FLOAT, + Placeholder::Attrs().Shape(TensorShape({}))); + Output mul = Mul(root.WithOpName("mul"), spectrogram, brightness_placeholder); + Output min_const = Const(root.WithOpName("min_const"), 255.0f); + Output min = Minimum(root.WithOpName("min"), mul, min_const); + Output cast = Cast(root.WithOpName("cast"), min, DT_UINT8); + Output expand_dims_const = Const(root.WithOpName("expand_dims_const"), -1); + Output expand_dims = + ExpandDims(root.WithOpName("expand_dims"), cast, expand_dims_const); + Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims, + Squeeze::Attrs().SqueezeDims({0})); + Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze); + WriteFile file_writer = + WriteFile(root.WithOpName("output_image"), output_image, png_encoder); + tensorflow::GraphDef graph; + TF_RETURN_IF_ERROR(root.ToGraphDef(&graph)); + + // Build a session object from this graph definition. The power of TensorFlow + // is that you can reuse complex computations like this, so usually we'd run a + // lot of different inputs through it. In this example, we're just doing a + // one-off run, so we'll create it and then use it immediately. + std::unique_ptr<tensorflow::Session> session( + tensorflow::NewSession(tensorflow::SessionOptions())); + TF_RETURN_IF_ERROR(session->Create(graph)); + + // We're passing in the brightness as an input, so create a tensor to hold the + // value. + tensorflow::Tensor brightness_tensor(DT_FLOAT, TensorShape({})); + brightness_tensor.scalar<float>()() = brightness; + + // Run the session to analyze the audio and write out the file. + TF_RETURN_IF_ERROR( + session->Run({{"brightness_placeholder", brightness_tensor}}, {}, + {"output_image"}, nullptr)); + return tensorflow::Status::OK(); +} diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h new file mode 100644 index 0000000000..fa8cb0abe9 --- /dev/null +++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h @@ -0,0 +1,31 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ +#define THIRD_PARTY_TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ + +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/types.h" + +// Runs a TensorFlow graph to convert an audio file into a visualization. Takes +// in the path to the audio file, the window size and stride parameters +// controlling the spectrogram creation, the brightness scaling to use, and a +// path to save the output PNG file to. +tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav, + tensorflow::int32 window_size, + tensorflow::int32 stride, float brightness, + const tensorflow::string& output_image); + +#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_WAV_TO_SPECTROGRAM_WAV_TO_SPECTROGRAM_H_ diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram_test.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram_test.cc new file mode 100644 index 0000000000..e599711445 --- /dev/null +++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram_test.cc @@ -0,0 +1,37 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/wav/wav_io.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" + +TEST(WavToSpectrogramTest, WavToSpectrogramTest) { + const tensorflow::string input_wav = + tensorflow::io::JoinPath(tensorflow::testing::TmpDir(), "input_wav.wav"); + const tensorflow::string output_image = tensorflow::io::JoinPath( + tensorflow::testing::TmpDir(), "output_image.png"); + float audio[8] = {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}; + tensorflow::string wav_string; + TF_ASSERT_OK( + tensorflow::wav::EncodeAudioAsS16LEWav(audio, 44100, 1, 8, &wav_string)); + TF_ASSERT_OK(tensorflow::WriteStringToFile(tensorflow::Env::Default(), + input_wav, wav_string)); + TF_ASSERT_OK(WavToSpectrogram(input_wav, 4, 4, 64.0f, output_image)); + TF_EXPECT_OK(tensorflow::Env::Default()->FileExists(output_image)); +} |