diff options
-rw-r--r-- | tensorflow/core/kernels/sdca_ops.cc | 82 | ||||
-rw-r--r-- | tensorflow/core/kernels/sdca_ops_test.cc | 13 |
2 files changed, 66 insertions, 29 deletions
diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc index 63e705df43..d30e7486f5 100644 --- a/tensorflow/core/kernels/sdca_ops.cc +++ b/tensorflow/core/kernels/sdca_ops.cc @@ -167,7 +167,7 @@ class Example { // A dense vector which is a row-slice of the underlying matrix. struct DenseVector { // Returns a row slice from the matrix. - Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>> row() + Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>> Row() const { return Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>( data_matrix.data() + row_index * data_matrix.dimension(1), @@ -176,7 +176,7 @@ class Example { // Returns a row slice as a 1 * F matrix, where F is the number of features. Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor>> - row_as_matrix() const { + RowAsMatrix() const { return Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor>>( data_matrix.data() + row_index * data_matrix.dimension(1), 1, data_matrix.dimension(1)); @@ -228,18 +228,26 @@ class FeatureWeightsDenseStorage { const Eigen::ThreadPoolDevice& device, const Example::DenseVector& dense_vector, const std::vector<double>& normalized_bounded_dual_delta) { - // Transform the dual vector into a column matrix. - const Eigen::TensorMap<Eigen::Tensor<const double, 2, Eigen::RowMajor>> - dual_matrix(normalized_bounded_dual_delta.data(), - normalized_bounded_dual_delta.size(), 1); - const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = { - Eigen::IndexPair<int>(1, 0)}; - // This essentially computes delta_w += delta_vector / \lamdba * N. - deltas_.device(device) = - (deltas_.cast<double>() + - dual_matrix.contract(dense_vector.row_as_matrix().cast<double>(), - product_dims)) - .cast<float>(); + const size_t num_weight_vectors = normalized_bounded_dual_delta.size(); + if (num_weight_vectors == 1) { + deltas_.device(device) = + deltas_ + + dense_vector.RowAsMatrix() * + deltas_.constant(normalized_bounded_dual_delta[0]); + } else { + // Transform the dual vector into a column matrix. + const Eigen::TensorMap<Eigen::Tensor<const double, 2, Eigen::RowMajor>> + dual_matrix(normalized_bounded_dual_delta.data(), num_weight_vectors, + 1); + const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = { + Eigen::IndexPair<int>(1, 0)}; + // This essentially computes delta_w += delta_vector / \lamdba * N. + deltas_.device(device) = + (deltas_.cast<double>() + + dual_matrix.contract(dense_vector.RowAsMatrix().cast<double>(), + product_dims)) + .cast<float>(); + } } private: @@ -456,19 +464,37 @@ const ExampleStatistics Example::ComputeWxAndWeightedExampleNorm( dense_weights.nominals() + dense_weights.deltas() * dense_weights.deltas().constant(num_loss_partitions); - const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = { - Eigen::IndexPair<int>(1, 1)}; - const Eigen::Tensor<float, 2, Eigen::RowMajor> prev_prediction = - regularization.EigenShrinkMatrix(dense_weights.nominals()) - .contract(dense_vector.row_as_matrix(), product_dims); - const Eigen::Tensor<float, 2, Eigen::RowMajor> prediction = - regularization.EigenShrinkMatrix(feature_weights) - .contract(dense_vector.row_as_matrix(), product_dims); - // The result of "tensor contraction" (multiplication) in the code - // above is of dimension num_weight_vectors * 1. - for (int l = 0; l < num_weight_vectors; ++l) { - result.prev_wx[l] += prev_prediction(l, 0); - result.wx[l] += prediction(l, 0); + if (num_weight_vectors == 1) { + const Eigen::Tensor<float, 0, Eigen::RowMajor> prev_prediction = + (dense_vector.Row() * + regularization.EigenShrinkVector( + Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>( + dense_weights.nominals().data(), + dense_weights.nominals().dimension(1)))) + .sum(); + const Eigen::Tensor<float, 0, Eigen::RowMajor> prediction = + (dense_vector.Row() * + regularization.EigenShrinkVector( + Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>( + feature_weights.data(), feature_weights.dimension(1)))) + .sum(); + result.prev_wx[0] += prev_prediction(); + result.wx[0] += prediction(); + } else { + const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = { + Eigen::IndexPair<int>(1, 1)}; + const Eigen::Tensor<float, 2, Eigen::RowMajor> prev_prediction = + regularization.EigenShrinkMatrix(dense_weights.nominals()) + .contract(dense_vector.RowAsMatrix(), product_dims); + const Eigen::Tensor<float, 2, Eigen::RowMajor> prediction = + regularization.EigenShrinkMatrix(feature_weights) + .contract(dense_vector.RowAsMatrix(), product_dims); + // The result of "tensor contraction" (multiplication) in the code + // above is of dimension num_weight_vectors * 1. + for (int l = 0; l < num_weight_vectors; ++l) { + result.prev_wx[l] += prev_prediction(l, 0); + result.wx[l] += prediction(l, 0); + } } } @@ -824,7 +850,7 @@ void Examples::ComputeSquaredNormPerExample( } for (int j = 0; j < num_dense_features; ++j) { const Eigen::Tensor<float, 0, Eigen::RowMajor> sn = - example->dense_vectors_[j]->row().square().sum(); + example->dense_vectors_[j]->Row().square().sum(); squared_norm += sn(); } example->squared_norm_ = squared_norm; diff --git a/tensorflow/core/kernels/sdca_ops_test.cc b/tensorflow/core/kernels/sdca_ops_test.cc index 9ddbd817e1..400f330ce7 100644 --- a/tensorflow/core/kernels/sdca_ops_test.cc +++ b/tensorflow/core/kernels/sdca_ops_test.cc @@ -232,6 +232,17 @@ void BM_SDCA(const int iters, const int num_examples) { test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters); } +void BM_SDCA_LARGE_DENSE(const int iters, const int num_examples) { + testing::StopTiming(); + Graph* init = nullptr; + Graph* train = nullptr; + GetGraphs(num_examples, 0 /* sparse feature groups */, + 0 /* sparse features per group */, 5 /* dense feature groups*/, + 200000 /* dense features per group */, &init, &train); + testing::StartTiming(); + test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters); +} + void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) { testing::StopTiming(); Graph* init = nullptr; @@ -242,10 +253,10 @@ void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) { testing::StartTiming(); test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters); } - } // namespace BENCHMARK(BM_SDCA)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); +BENCHMARK(BM_SDCA_LARGE_DENSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); BENCHMARK(BM_SDCA_LARGE_SPARSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); } // namespace tensorflow |