aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/spectrogram.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/spectrogram.cc')
-rw-r--r--tensorflow/core/kernels/spectrogram.cc212
1 files changed, 212 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/spectrogram.cc b/tensorflow/core/kernels/spectrogram.cc
new file mode 100644
index 0000000000..7531d5d64a
--- /dev/null
+++ b/tensorflow/core/kernels/spectrogram.cc
@@ -0,0 +1,212 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/spectrogram.h"
+
+#include <math.h>
+
+#include "third_party/fft2d/fft.h"
+#include "tensorflow/core/lib/core/bits.h"
+
+namespace tensorflow {
+
+using std::complex;
+
+namespace {
+// Returns the default Hann window function for the spectrogram.
+void GetPeriodicHann(int window_length, std::vector<double>* window) {
+ // Some platforms don't have M_PI, so define a local constant here.
+ const double pi = std::atan(1) * 4;
+ window->resize(window_length);
+ for (int i = 0; i < window_length; ++i) {
+ (*window)[i] = 0.5 - 0.5 * cos((2 * pi * i) / window_length);
+ }
+}
+} // namespace
+
+bool Spectrogram::Initialize(int window_length, int step_length) {
+ std::vector<double> window;
+ GetPeriodicHann(window_length, &window);
+ return Initialize(window, step_length);
+}
+
+bool Spectrogram::Initialize(const std::vector<double>& window,
+ int step_length) {
+ window_length_ = window.size();
+ window_ = window; // Copy window.
+ if (window_length_ < 2) {
+ LOG(ERROR) << "Window length too short.";
+ initialized_ = false;
+ return false;
+ }
+
+ step_length_ = step_length;
+ if (step_length_ < 1) {
+ LOG(ERROR) << "Step length must be positive.";
+ initialized_ = false;
+ return false;
+ }
+
+ fft_length_ = NextPowerOfTwo(window_length_);
+ CHECK(fft_length_ >= window_length_);
+ output_frequency_channels_ = 1 + fft_length_ / 2;
+
+ // Allocate 2 more than what rdft needs, so we can rationalize the layout.
+ fft_input_output_.assign(fft_length_ + 2, 0.0);
+
+ int half_fft_length = fft_length_ / 2;
+ fft_double_working_area_.assign(half_fft_length, 0.0);
+ fft_integer_working_area_.assign(2 + static_cast<int>(sqrt(half_fft_length)),
+ 0);
+ // Set flag element to ensure that the working areas are initialized
+ // on the first call to cdft. It's redundant given the assign above,
+ // but keep it as a reminder.
+ fft_integer_working_area_[0] = 0;
+ input_queue_.clear();
+ samples_to_next_step_ = window_length_;
+ initialized_ = true;
+ return true;
+}
+
+template <class InputSample, class OutputSample>
+bool Spectrogram::ComputeComplexSpectrogram(
+ const std::vector<InputSample>& input,
+ std::vector<std::vector<complex<OutputSample>>>* output) {
+ if (!initialized_) {
+ LOG(ERROR) << "ComputeComplexSpectrogram() called before successful call "
+ << "to Initialize().";
+ return false;
+ }
+ CHECK(output);
+ output->clear();
+ int input_start = 0;
+ while (GetNextWindowOfSamples(input, &input_start)) {
+ DCHECK_EQ(input_queue_.size(), window_length_);
+ ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_.
+ // Add a new slice vector onto the output, to save new result to.
+ output->resize(output->size() + 1);
+ // Get a reference to the newly added slice to fill in.
+ auto& spectrogram_slice = output->back();
+ spectrogram_slice.resize(output_frequency_channels_);
+ for (int i = 0; i < output_frequency_channels_; ++i) {
+ // This will convert double to float if it needs to.
+ spectrogram_slice[i] = complex<OutputSample>(
+ fft_input_output_[2 * i], fft_input_output_[2 * i + 1]);
+ }
+ }
+ return true;
+}
+// Instantiate it four ways:
+template bool Spectrogram::ComputeComplexSpectrogram(
+ const std::vector<float>& input, std::vector<std::vector<complex<float>>>*);
+template bool Spectrogram::ComputeComplexSpectrogram(
+ const std::vector<double>& input,
+ std::vector<std::vector<complex<float>>>*);
+template bool Spectrogram::ComputeComplexSpectrogram(
+ const std::vector<float>& input,
+ std::vector<std::vector<complex<double>>>*);
+template bool Spectrogram::ComputeComplexSpectrogram(
+ const std::vector<double>& input,
+ std::vector<std::vector<complex<double>>>*);
+
+template <class InputSample, class OutputSample>
+bool Spectrogram::ComputeSquaredMagnitudeSpectrogram(
+ const std::vector<InputSample>& input,
+ std::vector<std::vector<OutputSample>>* output) {
+ if (!initialized_) {
+ LOG(ERROR) << "ComputeSquaredMagnitudeSpectrogram() called before "
+ << "successful call to Initialize().";
+ return false;
+ }
+ CHECK(output);
+ output->clear();
+ int input_start = 0;
+ while (GetNextWindowOfSamples(input, &input_start)) {
+ DCHECK_EQ(input_queue_.size(), window_length_);
+ ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_.
+ // Add a new slice vector onto the output, to save new result to.
+ output->resize(output->size() + 1);
+ // Get a reference to the newly added slice to fill in.
+ auto& spectrogram_slice = output->back();
+ spectrogram_slice.resize(output_frequency_channels_);
+ for (int i = 0; i < output_frequency_channels_; ++i) {
+ // Similar to the Complex case, except storing the norm.
+ // But the norm function is known to be a performance killer,
+ // so do it this way with explicit real and imagninary temps.
+ const double re = fft_input_output_[2 * i];
+ const double im = fft_input_output_[2 * i + 1];
+ // Which finally converts double to float if it needs to.
+ spectrogram_slice[i] = re * re + im * im;
+ }
+ }
+ return true;
+}
+// Instantiate it four ways:
+template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram(
+ const std::vector<float>& input, std::vector<std::vector<float>>*);
+template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram(
+ const std::vector<double>& input, std::vector<std::vector<float>>*);
+template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram(
+ const std::vector<float>& input, std::vector<std::vector<double>>*);
+template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram(
+ const std::vector<double>& input, std::vector<std::vector<double>>*);
+
+// Return true if a full window of samples is prepared; manage the queue.
+template <class InputSample>
+bool Spectrogram::GetNextWindowOfSamples(const std::vector<InputSample>& input,
+ int* input_start) {
+ auto input_it = input.begin() + *input_start;
+ int input_remaining = input.end() - input_it;
+ if (samples_to_next_step_ > input_remaining) {
+ // Copy in as many samples are left and return false, no full window.
+ input_queue_.insert(input_queue_.end(), input_it, input.end());
+ *input_start += input_remaining; // Increases it to input.size().
+ samples_to_next_step_ -= input_remaining;
+ return false; // Not enough for a full window.
+ } else {
+ // Copy just enough into queue to make a new window, then trim the
+ // front off the queue to make it window-sized.
+ input_queue_.insert(input_queue_.end(), input_it,
+ input_it + samples_to_next_step_);
+ *input_start += samples_to_next_step_;
+ input_queue_.erase(
+ input_queue_.begin(),
+ input_queue_.begin() + input_queue_.size() - window_length_);
+ DCHECK_EQ(window_length_, input_queue_.size());
+ samples_to_next_step_ = step_length_; // Be ready for next time.
+ return true; // Yes, input_queue_ now contains exactly a window-full.
+ }
+}
+
+void Spectrogram::ProcessCoreFFT() {
+ for (int j = 0; j < window_length_; ++j) {
+ fft_input_output_[j] = input_queue_[j] * window_[j];
+ }
+ // Zero-pad the rest of the input buffer.
+ for (int j = window_length_; j < fft_length_; ++j) {
+ fft_input_output_[j] = 0.0;
+ }
+ const int kForwardFFT = 1; // 1 means forward; -1 reverse.
+ // This real FFT is a fair amount faster than using cdft here.
+ rdft(fft_length_, kForwardFFT, &fft_input_output_[0],
+ &fft_integer_working_area_[0], &fft_double_working_area_[0]);
+ // Make rdft result look like cdft result;
+ // unpack the last real value from the first position's imag slot.
+ fft_input_output_[fft_length_] = fft_input_output_[1];
+ fft_input_output_[fft_length_ + 1] = 0;
+ fft_input_output_[1] = 0;
+}
+
+} // namespace tensorflow