diff options
author | Pete Warden <petewarden@google.com> | 2017-04-05 17:10:48 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-04-05 18:24:52 -0700 |
commit | a24c6b842d982de8a38ae5058ace91cb47ee3cef (patch) | |
tree | 6e7a909a5e8ccde22caa2d7b5f4a1a84dda3cdd2 /tensorflow/core/kernels/spectrogram.cc | |
parent | 9d57702513001bfded19e72c76a986cce56d5f00 (diff) |
Add AudioSpectrogram op to TensorFlow for audio feature generation
Change: 152332221
Diffstat (limited to 'tensorflow/core/kernels/spectrogram.cc')
-rw-r--r-- | tensorflow/core/kernels/spectrogram.cc | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/spectrogram.cc b/tensorflow/core/kernels/spectrogram.cc new file mode 100644 index 0000000000..7531d5d64a --- /dev/null +++ b/tensorflow/core/kernels/spectrogram.cc @@ -0,0 +1,212 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/spectrogram.h" + +#include <math.h> + +#include "third_party/fft2d/fft.h" +#include "tensorflow/core/lib/core/bits.h" + +namespace tensorflow { + +using std::complex; + +namespace { +// Returns the default Hann window function for the spectrogram. +void GetPeriodicHann(int window_length, std::vector<double>* window) { + // Some platforms don't have M_PI, so define a local constant here. + const double pi = std::atan(1) * 4; + window->resize(window_length); + for (int i = 0; i < window_length; ++i) { + (*window)[i] = 0.5 - 0.5 * cos((2 * pi * i) / window_length); + } +} +} // namespace + +bool Spectrogram::Initialize(int window_length, int step_length) { + std::vector<double> window; + GetPeriodicHann(window_length, &window); + return Initialize(window, step_length); +} + +bool Spectrogram::Initialize(const std::vector<double>& window, + int step_length) { + window_length_ = window.size(); + window_ = window; // Copy window. + if (window_length_ < 2) { + LOG(ERROR) << "Window length too short."; + initialized_ = false; + return false; + } + + step_length_ = step_length; + if (step_length_ < 1) { + LOG(ERROR) << "Step length must be positive."; + initialized_ = false; + return false; + } + + fft_length_ = NextPowerOfTwo(window_length_); + CHECK(fft_length_ >= window_length_); + output_frequency_channels_ = 1 + fft_length_ / 2; + + // Allocate 2 more than what rdft needs, so we can rationalize the layout. + fft_input_output_.assign(fft_length_ + 2, 0.0); + + int half_fft_length = fft_length_ / 2; + fft_double_working_area_.assign(half_fft_length, 0.0); + fft_integer_working_area_.assign(2 + static_cast<int>(sqrt(half_fft_length)), + 0); + // Set flag element to ensure that the working areas are initialized + // on the first call to cdft. It's redundant given the assign above, + // but keep it as a reminder. + fft_integer_working_area_[0] = 0; + input_queue_.clear(); + samples_to_next_step_ = window_length_; + initialized_ = true; + return true; +} + +template <class InputSample, class OutputSample> +bool Spectrogram::ComputeComplexSpectrogram( + const std::vector<InputSample>& input, + std::vector<std::vector<complex<OutputSample>>>* output) { + if (!initialized_) { + LOG(ERROR) << "ComputeComplexSpectrogram() called before successful call " + << "to Initialize()."; + return false; + } + CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + auto& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // This will convert double to float if it needs to. + spectrogram_slice[i] = complex<OutputSample>( + fft_input_output_[2 * i], fft_input_output_[2 * i + 1]); + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector<float>& input, std::vector<std::vector<complex<float>>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector<double>& input, + std::vector<std::vector<complex<float>>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector<float>& input, + std::vector<std::vector<complex<double>>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector<double>& input, + std::vector<std::vector<complex<double>>>*); + +template <class InputSample, class OutputSample> +bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector<InputSample>& input, + std::vector<std::vector<OutputSample>>* output) { + if (!initialized_) { + LOG(ERROR) << "ComputeSquaredMagnitudeSpectrogram() called before " + << "successful call to Initialize()."; + return false; + } + CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + auto& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // Similar to the Complex case, except storing the norm. + // But the norm function is known to be a performance killer, + // so do it this way with explicit real and imagninary temps. + const double re = fft_input_output_[2 * i]; + const double im = fft_input_output_[2 * i + 1]; + // Which finally converts double to float if it needs to. + spectrogram_slice[i] = re * re + im * im; + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector<float>& input, std::vector<std::vector<float>>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector<double>& input, std::vector<std::vector<float>>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector<float>& input, std::vector<std::vector<double>>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector<double>& input, std::vector<std::vector<double>>*); + +// Return true if a full window of samples is prepared; manage the queue. +template <class InputSample> +bool Spectrogram::GetNextWindowOfSamples(const std::vector<InputSample>& input, + int* input_start) { + auto input_it = input.begin() + *input_start; + int input_remaining = input.end() - input_it; + if (samples_to_next_step_ > input_remaining) { + // Copy in as many samples are left and return false, no full window. + input_queue_.insert(input_queue_.end(), input_it, input.end()); + *input_start += input_remaining; // Increases it to input.size(). + samples_to_next_step_ -= input_remaining; + return false; // Not enough for a full window. + } else { + // Copy just enough into queue to make a new window, then trim the + // front off the queue to make it window-sized. + input_queue_.insert(input_queue_.end(), input_it, + input_it + samples_to_next_step_); + *input_start += samples_to_next_step_; + input_queue_.erase( + input_queue_.begin(), + input_queue_.begin() + input_queue_.size() - window_length_); + DCHECK_EQ(window_length_, input_queue_.size()); + samples_to_next_step_ = step_length_; // Be ready for next time. + return true; // Yes, input_queue_ now contains exactly a window-full. + } +} + +void Spectrogram::ProcessCoreFFT() { + for (int j = 0; j < window_length_; ++j) { + fft_input_output_[j] = input_queue_[j] * window_[j]; + } + // Zero-pad the rest of the input buffer. + for (int j = window_length_; j < fft_length_; ++j) { + fft_input_output_[j] = 0.0; + } + const int kForwardFFT = 1; // 1 means forward; -1 reverse. + // This real FFT is a fair amount faster than using cdft here. + rdft(fft_length_, kForwardFFT, &fft_input_output_[0], + &fft_integer_working_area_[0], &fft_double_working_area_[0]); + // Make rdft result look like cdft result; + // unpack the last real value from the first position's imag slot. + fft_input_output_[fft_length_] = fft_input_output_[1]; + fft_input_output_[fft_length_ + 1] = 0; + fft_input_output_[1] = 0; +} + +} // namespace tensorflow |