16 files changed, 924 insertions, 84 deletions
diff --git a/src/core/ext/filters/load_reporting/registered_opencensus_objects.h b/src/core/ext/filters/load_reporting/registered_opencensus_objects.h
index b62863dc2c..4eacda7c02 100644
--- a/src/core/ext/filters/load_reporting/registered_opencensus_objects.h
+++ b/src/core/ext/filters/load_reporting/registered_opencensus_objects.h
@@ -28,75 +28,84 @@
 namespace grpc {
 namespace load_reporter {
 
+// Note that the functions here are specified as inline to share the static
+// objects across all the translation units including this header. See more
+// details on https://en.cppreference.com/w/cpp/language/inline.
+
 // Measures.
 
-::opencensus::stats::MeasureInt64 MeasureStartCount() {
-  static const ::opencensus::stats::MeasureInt64 start_count =
+inline ::opencensus::stats::MeasureInt64 MeasureStartCount() {
+  static const ::opencensus::stats::MeasureInt64 measure =
       ::opencensus::stats::MeasureInt64::Register(
           kMeasureStartCount, kMeasureStartCount, kMeasureStartCount);
-  return start_count;
+  return measure;
 }
 
-::opencensus::stats::MeasureInt64 MeasureEndCount() {
-  static const ::opencensus::stats::MeasureInt64 end_count =
+inline ::opencensus::stats::MeasureInt64 MeasureEndCount() {
+  static const ::opencensus::stats::MeasureInt64 measure =
       ::opencensus::stats::MeasureInt64::Register(
           kMeasureEndCount, kMeasureEndCount, kMeasureEndCount);
-  return end_count;
+  return measure;
 }
 
-::opencensus::stats::MeasureInt64 MeasureEndBytesSent() {
-  static const ::opencensus::stats::MeasureInt64 end_bytes_sent =
+inline ::opencensus::stats::MeasureInt64 MeasureEndBytesSent() {
+  static const ::opencensus::stats::MeasureInt64 measure =
       ::opencensus::stats::MeasureInt64::Register(
           kMeasureEndBytesSent, kMeasureEndBytesSent, kMeasureEndBytesSent);
-  return end_bytes_sent;
+  return measure;
 }
 
-::opencensus::stats::MeasureInt64 MeasureEndBytesReceived() {
-  static const ::opencensus::stats::MeasureInt64 end_bytes_received =
+inline ::opencensus::stats::MeasureInt64 MeasureEndBytesReceived() {
+  static const ::opencensus::stats::MeasureInt64 measure =
       ::opencensus::stats::MeasureInt64::Register(kMeasureEndBytesReceived,
                                                   kMeasureEndBytesReceived,
                                                   kMeasureEndBytesReceived);
-  return end_bytes_received;
+  return measure;
 }
 
-::opencensus::stats::MeasureInt64 MeasureEndLatencyMs() {
-  static const ::opencensus::stats::MeasureInt64 end_latency_ms =
+inline ::opencensus::stats::MeasureInt64 MeasureEndLatencyMs() {
+  static const ::opencensus::stats::MeasureInt64 measure =
       ::opencensus::stats::MeasureInt64::Register(
           kMeasureEndLatencyMs, kMeasureEndLatencyMs, kMeasureEndLatencyMs);
-  return end_latency_ms;
+  return measure;
 }
 
-::opencensus::stats::MeasureDouble MeasureOtherCallMetric() {
-  static const ::opencensus::stats::MeasureDouble other_call_metric =
+inline ::opencensus::stats::MeasureDouble MeasureOtherCallMetric() {
+  static const ::opencensus::stats::MeasureDouble measure =
       ::opencensus::stats::MeasureDouble::Register(kMeasureOtherCallMetric,
                                                    kMeasureOtherCallMetric,
                                                    kMeasureOtherCallMetric);
-  return other_call_metric;
+  return measure;
 }
 
 // Tags.
 
-opencensus::stats::TagKey TagKeyToken() {
-  static const auto token = opencensus::stats::TagKey::Register(kTagKeyToken);
+inline ::opencensus::stats::TagKey TagKeyToken() {
+  static const ::opencensus::stats::TagKey token =
+      opencensus::stats::TagKey::Register(kTagKeyToken);
   return token;
 }
 
-opencensus::stats::TagKey TagKeyHost() {
-  static const auto token = opencensus::stats::TagKey::Register(kTagKeyHost);
+inline ::opencensus::stats::TagKey TagKeyHost() {
+  static const ::opencensus::stats::TagKey token =
+      opencensus::stats::TagKey::Register(kTagKeyHost);
   return token;
 }
-opencensus::stats::TagKey TagKeyUserId() {
-  static const auto token = opencensus::stats::TagKey::Register(kTagKeyUserId);
+
+inline ::opencensus::stats::TagKey TagKeyUserId() {
+  static const ::opencensus::stats::TagKey token =
+      opencensus::stats::TagKey::Register(kTagKeyUserId);
   return token;
 }
 
-opencensus::stats::TagKey TagKeyStatus() {
-  static const auto token = opencensus::stats::TagKey::Register(kTagKeyStatus);
+inline ::opencensus::stats::TagKey TagKeyStatus() {
+  static const ::opencensus::stats::TagKey token =
+      opencensus::stats::TagKey::Register(kTagKeyStatus);
   return token;
 }
 
-opencensus::stats::TagKey TagKeyMetricName() {
-  static const auto token =
+inline ::opencensus::stats::TagKey TagKeyMetricName() {
+  static const ::opencensus::stats::TagKey token =
       opencensus::stats::TagKey::Register(kTagKeyMetricName);
   return token;
 }
diff --git a/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc b/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc
index 51e4d795d7..6529046a5e 100644
--- a/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc
+++ b/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc
@@ -19,7 +19,6 @@
 #include <grpc/support/port_platform.h>
 
 #include <grpc/grpc_security.h>
-#include <grpc/load_reporting.h>
 #include <grpc/slice.h>
 #include <grpc/support/alloc.h>
 #include <grpc/support/log.h>
@@ -31,18 +30,20 @@
 #include "src/core/ext/filters/load_reporting/server_load_reporting_filter.h"
 #include "src/core/lib/channel/channel_args.h"
 #include "src/core/lib/channel/context.h"
-#include "src/core/lib/gpr/string.h"
 #include "src/core/lib/iomgr/resolve_address.h"
 #include "src/core/lib/iomgr/sockaddr_posix.h"
 #include "src/core/lib/iomgr/socket_utils.h"
-#include "src/core/lib/profiling/timers.h"
 #include "src/core/lib/security/context/security_context.h"
 #include "src/core/lib/slice/slice_internal.h"
 #include "src/core/lib/surface/call.h"
-#include "src/core/lib/transport/static_metadata.h"
 
 namespace grpc {
 
+constexpr char kEncodedIpv4AddressLengthString[] = "08";
+constexpr char kEncodedIpv6AddressLengthString[] = "32";
+constexpr char kEmptyAddressLengthString[] = "00";
+constexpr size_t kLengthPrefixSize = 2;
+
 grpc_error* ServerLoadReportingChannelData::Init(
     grpc_channel_element* /* elem */, grpc_channel_element_args* args) {
   GPR_ASSERT(!args->is_last);
@@ -90,7 +91,9 @@ void ServerLoadReportingCallData::Destroy(
           {chand->peer_identity(), chand->peer_identity_len()}},
          {::grpc::load_reporter::TagKeyStatus(),
           GetStatusTagForStatus(final_info->final_status)}});
+    gpr_free(client_ip_and_lr_token_);
   }
+  gpr_free(target_host_);
   grpc_slice_unref_internal(service_method_);
 }
 
@@ -100,7 +103,8 @@ void ServerLoadReportingCallData::StartTransportStreamOpBatch(
   if (op->recv_initial_metadata() != nullptr) {
     // Save some fields to use when initial metadata is ready.
     peer_string_ = op->get_peer_string();
-    recv_initial_metadata_ = op->recv_initial_metadata();
+    recv_initial_metadata_ =
+        op->op()->payload->recv_initial_metadata.recv_initial_metadata;
     original_recv_initial_metadata_ready_ = op->recv_initial_metadata_ready();
     // Substitute the original closure for the wrapper closure.
     op->set_recv_initial_metadata_ready(&recv_initial_metadata_ready_);
@@ -157,10 +161,10 @@ void ServerLoadReportingCallData::GetCensusSafeClientIpString(
     *size = 8;
   } else if (addr->sa_family == GRPC_AF_INET6) {
     grpc_sockaddr_in6* addr6 = reinterpret_cast<grpc_sockaddr_in6*>(addr);
-    *client_ip_string = static_cast<char*>(gpr_malloc(32));
+    *client_ip_string = static_cast<char*>(gpr_malloc(32 + 1));
     for (size_t i = 0; i < 16; ++i) {
-      sprintf(*client_ip_string + i, "%02x",
-              addr6->sin6_addr.__in6_u.__u6_addr8[i]);
+      snprintf(*client_ip_string + i * 2, 2 + 1, "%02x",
+               addr6->sin6_addr.__in6_u.__u6_addr8[i]);
     }
     *size = 32;
   } else {
@@ -241,7 +245,7 @@ void ServerLoadReportingCallData::RecvInitialMetadataReady(void* arg,
   if (err == GRPC_ERROR_NONE) {
     GRPC_LOG_IF_ERROR(
         "server_load_reporting_filter",
-        grpc_metadata_batch_filter(calld->recv_initial_metadata_->batch(),
+        grpc_metadata_batch_filter(calld->recv_initial_metadata_,
                                    RecvInitialMetadataFilter, elem,
                                    "recv_initial_metadata filtering error"));
     // If the LB token was not found in the recv_initial_metadata, only the
@@ -277,7 +281,6 @@ grpc_filtered_mdelem ServerLoadReportingCallData::SendTrailingMetadataFilter(
       reinterpret_cast<ServerLoadReportingCallData*>(elem->call_data);
   ServerLoadReportingChannelData* chand =
       reinterpret_cast<ServerLoadReportingChannelData*>(elem->channel_data);
-  // TODO(juanlishen): GRPC_MDSTR_LB_COST_BIN meaning?
   if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_LB_COST_BIN)) {
     const grpc_slice value = GRPC_MDVALUE(md);
     const size_t cost_entry_size = GRPC_SLICE_LENGTH(value);
@@ -325,4 +328,35 @@ const char* ServerLoadReportingCallData::GetStatusTagForStatus(
   }
 }
 
+namespace {
+bool MaybeAddServerLoadReportingFilter(const grpc_channel_args& args) {
+  return grpc_channel_arg_get_bool(
+      grpc_channel_args_find(&args, GRPC_ARG_ENABLE_LOAD_REPORTING), false);
+}
+}  // namespace
+
+// TODO(juanlishen): We should register the filter during grpc initialization
+// time once OpenCensus is compatible with our build system. For now, we force
+// registration of the server load reporting filter at static initialization
+// time if we build with the filter target.
+struct ServerLoadReportingFilterStaticRegistrar {
+  ServerLoadReportingFilterStaticRegistrar() {
+    static std::atomic_bool registered{false};
+    if (registered) return;
+    RegisterChannelFilter<ServerLoadReportingChannelData,
+                          ServerLoadReportingCallData>(
+        "server_load_reporting", GRPC_SERVER_CHANNEL, INT_MAX,
+        MaybeAddServerLoadReportingFilter);
+    // Access measures to ensure they are initialized. Otherwise, we can't
+    // create any valid view before the first RPC.
+    ::grpc::load_reporter::MeasureStartCount();
+    ::grpc::load_reporter::MeasureEndCount();
+    ::grpc::load_reporter::MeasureEndBytesSent();
+    ::grpc::load_reporter::MeasureEndBytesReceived();
+    ::grpc::load_reporter::MeasureEndLatencyMs();
+    ::grpc::load_reporter::MeasureOtherCallMetric();
+    registered = true;
+  }
+} server_load_reporting_filter_static_registrar;
+
 }  // namespace grpc
diff --git a/src/core/ext/filters/load_reporting/server_load_reporting_filter.h b/src/core/ext/filters/load_reporting/server_load_reporting_filter.h
index 029b19ac89..10baf1f833 100644
--- a/src/core/ext/filters/load_reporting/server_load_reporting_filter.h
+++ b/src/core/ext/filters/load_reporting/server_load_reporting_filter.h
@@ -86,8 +86,8 @@ class ServerLoadReportingCallData : public CallData {
   // The received initial metadata (a member of the recv_initial_metadata op).
   // When it is ready, we will extract some data from it via
   // recv_initial_metadata_ready_ closure, before the original
-  // recv_initial_metadata_ready closure,
-  MetadataBatch* recv_initial_metadata_;
+  // recv_initial_metadata_ready closure.
+  grpc_metadata_batch* recv_initial_metadata_;
 
   // The original recv_initial_metadata closure, which is wrapped by our own
   // closure (recv_initial_metadata_ready_) to capture the incoming initial
@@ -112,11 +112,6 @@ class ServerLoadReportingCallData : public CallData {
   // token.
   char* client_ip_and_lr_token_;
   size_t client_ip_and_lr_token_len_;
-
-  static constexpr char kEncodedIpv4AddressLengthString[] = "08";
-  static constexpr char kEncodedIpv6AddressLengthString[] = "32";
-  static constexpr char kEmptyAddressLengthString[] = "00";
-  static constexpr size_t kLengthPrefixSize = 2;
 };
 
 }  // namespace grpc
diff --git a/src/core/ext/transport/chttp2/transport/hpack_parser.cc b/src/core/ext/transport/chttp2/transport/hpack_parser.cc
index 907ba71178..ccf2256974 100644
--- a/src/core/ext/transport/chttp2/transport/hpack_parser.cc
+++ b/src/core/ext/transport/chttp2/transport/hpack_parser.cc
@@ -1622,7 +1622,7 @@ grpc_error* grpc_chttp2_header_parser_parse(void* hpack_parser,
                                             grpc_chttp2_transport* t,
                                             grpc_chttp2_stream* s,
                                             grpc_slice slice, int is_last) {
-  GPR_TIMER_SCOPE("grpc_chttp2_hpack_parser_parse", 0);
+  GPR_TIMER_SCOPE("grpc_chttp2_header_parser_parse", 0);
   grpc_chttp2_hpack_parser* parser =
       static_cast<grpc_chttp2_hpack_parser*>(hpack_parser);
   if (s != nullptr) {
diff --git a/src/core/lib/iomgr/ev_epollex_linux.cc b/src/core/lib/iomgr/ev_epollex_linux.cc
index 5ffabdc665..7b368410cf 100644
--- a/src/core/lib/iomgr/ev_epollex_linux.cc
+++ b/src/core/lib/iomgr/ev_epollex_linux.cc
@@ -494,8 +494,9 @@ static void fd_orphan(grpc_fd* fd, grpc_closure* on_done, int* release_fd,
     is_fd_closed = true;
   }
 
+  // TODO(sreek): handle fd removal (where is_fd_closed=false)
   if (!is_fd_closed) {
-    gpr_log(GPR_DEBUG, "TODO: handle fd removal?");
+    GRPC_FD_TRACE("epoll_fd %p (%d) was orphaned but not closed.", fd, fd->fd);
   }
 
   /* Remove the active status but keep referenced. We want this grpc_fd struct
@@ -1564,7 +1565,7 @@ static void pollset_set_add_pollset_set(grpc_pollset_set* a,
     gpr_mu_unlock(b_mu);
   }
   // try to do the least copying possible
-  // TODO(ctiller): there's probably a better heuristic here
+  // TODO(sreek): there's probably a better heuristic here
   const size_t a_size = a->fd_count + a->pollset_count;
   const size_t b_size = b->fd_count + b->pollset_count;
   if (b_size > a_size) {
diff --git a/src/cpp/server/load_reporter/constants.h b/src/cpp/server/load_reporter/constants.h
index 07c5965fff..00ad794a04 100644
--- a/src/cpp/server/load_reporter/constants.h
+++ b/src/cpp/server/load_reporter/constants.h
@@ -24,6 +24,16 @@
 namespace grpc {
 namespace load_reporter {
 
+// TODO(juanlishen): Update the version number with the PR number every time
+// there is any change to the server load reporter.
+constexpr uint32_t kVersion = 15853;
+
+// TODO(juanlishen): This window size is from the internal spec for the load
+// reporter. Need to ask the gRPC LB team whether we should make this and the
+// fetching interval configurable.
+constexpr uint32_t kFeedbackSampleWindowSeconds = 10;
+constexpr uint32_t kFetchAndSampleIntervalSeconds = 1;
+
 constexpr size_t kLbIdLength = 8;
 constexpr size_t kIpv4AddressLength = 8;
 constexpr size_t kIpv6AddressLength = 32;
diff --git a/src/cpp/server/load_reporter/load_data_store.h b/src/cpp/server/load_reporter/load_data_store.h
index 2da78ea064..dc08ecf479 100644
--- a/src/cpp/server/load_reporter/load_data_store.h
+++ b/src/cpp/server/load_reporter/load_data_store.h
@@ -160,7 +160,8 @@ class LoadRecordValue {
            ", error_count_=" + grpc::to_string(error_count_) +
            ", bytes_sent_=" + grpc::to_string(bytes_sent_) +
            ", bytes_recv_=" + grpc::to_string(bytes_recv_) +
-           ", latency_ms_=" + grpc::to_string(latency_ms_) + "]";
+           ", latency_ms_=" + grpc::to_string(latency_ms_) + ", " +
+           grpc::to_string(call_metrics_.size()) + " other call metric(s)]";
   }
 
   bool InsertCallMetric(const grpc::string& metric_name,
diff --git a/src/cpp/server/load_reporter/load_reporter.cc b/src/cpp/server/load_reporter/load_reporter.cc
index 3f0063d883..464063a13f 100644
--- a/src/cpp/server/load_reporter/load_reporter.cc
+++ b/src/cpp/server/load_reporter/load_reporter.cc
@@ -22,6 +22,7 @@
 #include <stdio.h>
 #include <chrono>
 #include <ctime>
+#include <iterator>
 
 #include "src/cpp/server/load_reporter/constants.h"
 #include "src/cpp/server/load_reporter/get_cpu_stats.h"
@@ -65,8 +66,8 @@ CensusViewProvider::CensusViewProvider()
   // measurements instead of setting the data values directly.
   auto vd_end_count =
       ::opencensus::stats::ViewDescriptor()
-          .set_name((kViewEndCount))
-          .set_measure((kMeasureEndCount))
+          .set_name(kViewEndCount)
+          .set_measure(kMeasureEndCount)
           .set_aggregation(::opencensus::stats::Aggregation::Sum())
           .add_column(tag_key_token_)
           .add_column(tag_key_host_)
@@ -80,8 +81,8 @@ CensusViewProvider::CensusViewProvider()
   view_descriptor_map_.emplace(kViewEndCount, vd_end_count);
   auto vd_end_bytes_sent =
       ::opencensus::stats::ViewDescriptor()
-          .set_name((kViewEndBytesSent))
-          .set_measure((kMeasureEndBytesSent))
+          .set_name(kViewEndBytesSent)
+          .set_measure(kMeasureEndBytesSent)
           .set_aggregation(::opencensus::stats::Aggregation::Sum())
           .add_column(tag_key_token_)
           .add_column(tag_key_host_)
@@ -95,8 +96,8 @@ CensusViewProvider::CensusViewProvider()
   view_descriptor_map_.emplace(kViewEndBytesSent, vd_end_bytes_sent);
   auto vd_end_bytes_received =
       ::opencensus::stats::ViewDescriptor()
-          .set_name((kViewEndBytesReceived))
-          .set_measure((kMeasureEndBytesReceived))
+          .set_name(kViewEndBytesReceived)
+          .set_measure(kMeasureEndBytesReceived)
           .set_aggregation(::opencensus::stats::Aggregation::Sum())
           .add_column(tag_key_token_)
           .add_column(tag_key_host_)
@@ -110,8 +111,8 @@ CensusViewProvider::CensusViewProvider()
   view_descriptor_map_.emplace(kViewEndBytesReceived, vd_end_bytes_received);
   auto vd_end_latency_ms =
       ::opencensus::stats::ViewDescriptor()
-          .set_name((kViewEndLatencyMs))
-          .set_measure((kMeasureEndLatencyMs))
+          .set_name(kViewEndLatencyMs)
+          .set_measure(kMeasureEndLatencyMs)
           .set_aggregation(::opencensus::stats::Aggregation::Sum())
           .add_column(tag_key_token_)
           .add_column(tag_key_host_)
@@ -126,8 +127,8 @@ CensusViewProvider::CensusViewProvider()
   // Two views related to other call metrics.
   auto vd_metric_call_count =
       ::opencensus::stats::ViewDescriptor()
-          .set_name((kViewOtherCallMetricCount))
-          .set_measure((kMeasureOtherCallMetric))
+          .set_name(kViewOtherCallMetricCount)
+          .set_measure(kMeasureOtherCallMetric)
           .set_aggregation(::opencensus::stats::Aggregation::Count())
           .add_column(tag_key_token_)
           .add_column(tag_key_host_)
@@ -141,8 +142,8 @@ CensusViewProvider::CensusViewProvider()
   view_descriptor_map_.emplace(kViewOtherCallMetricCount, vd_metric_call_count);
   auto vd_metric_value =
       ::opencensus::stats::ViewDescriptor()
-          .set_name((kViewOtherCallMetricValue))
-          .set_measure((kMeasureOtherCallMetric))
+          .set_name(kViewOtherCallMetricValue)
+          .set_measure(kMeasureOtherCallMetric)
           .set_aggregation(::opencensus::stats::Aggregation::Sum())
           .add_column(tag_key_token_)
           .add_column(tag_key_host_)
@@ -161,11 +162,26 @@ double CensusViewProvider::GetRelatedViewDataRowDouble(
     size_t view_name_len, const std::vector<grpc::string>& tag_values) {
   auto it_vd = view_data_map.find(grpc::string(view_name, view_name_len));
   GPR_ASSERT(it_vd != view_data_map.end());
+  GPR_ASSERT(it_vd->second.type() ==
+             ::opencensus::stats::ViewData::Type::kDouble);
   auto it_row = it_vd->second.double_data().find(tag_values);
   GPR_ASSERT(it_row != it_vd->second.double_data().end());
   return it_row->second;
 }
 
+uint64_t CensusViewProvider::GetRelatedViewDataRowInt(
+    const ViewDataMap& view_data_map, const char* view_name,
+    size_t view_name_len, const std::vector<grpc::string>& tag_values) {
+  auto it_vd = view_data_map.find(grpc::string(view_name, view_name_len));
+  GPR_ASSERT(it_vd != view_data_map.end());
+  GPR_ASSERT(it_vd->second.type() ==
+             ::opencensus::stats::ViewData::Type::kInt64);
+  auto it_row = it_vd->second.int_data().find(tag_values);
+  GPR_ASSERT(it_row != it_vd->second.int_data().end());
+  GPR_ASSERT(it_row->second >= 0);
+  return it_row->second;
+}
+
 CensusViewProviderDefaultImpl::CensusViewProviderDefaultImpl() {
   for (const auto& p : view_descriptor_map()) {
     const grpc::string& view_name = p.first;
@@ -235,23 +251,23 @@ LoadReporter::GenerateLoadBalancingFeedback() {
     return ::grpc::lb::v1::LoadBalancingFeedback::default_instance();
   }
   // Find the longest range with valid ends.
-  LoadBalancingFeedbackRecord* oldest = &feedback_records_[0];
-  LoadBalancingFeedbackRecord* newest =
-      &feedback_records_[feedback_records_.size() - 1];
-  while (newest > oldest &&
+  auto oldest = feedback_records_.begin();
+  auto newest = feedback_records_.end() - 1;
+  while (std::distance(oldest, newest) > 0 &&
          (newest->cpu_limit == 0 || oldest->cpu_limit == 0)) {
     // A zero limit means that the system info reading was failed, so these
     // records can't be used to calculate CPU utilization.
     if (newest->cpu_limit == 0) --newest;
     if (oldest->cpu_limit == 0) ++oldest;
   }
-  if (newest - oldest < 1 || oldest->end_time == newest->end_time ||
+  if (std::distance(oldest, newest) < 1 ||
+      oldest->end_time == newest->end_time ||
       newest->cpu_limit == oldest->cpu_limit) {
     return ::grpc::lb::v1::LoadBalancingFeedback::default_instance();
   }
   uint64_t rpcs = 0;
   uint64_t errors = 0;
-  for (LoadBalancingFeedbackRecord* p = newest; p != oldest; --p) {
+  for (auto p = newest; p != oldest; --p) {
     // Because these two numbers are counters, the oldest record shouldn't be
     // included.
     rpcs += p->rpcs;
@@ -338,7 +354,8 @@ void LoadReporter::AttachOrphanLoadId(
   if (per_balancer_store.lb_id() == kInvalidLbId) {
     load->set_load_key_unknown(true);
   } else {
-    load->set_load_key_unknown(false);
+    // We shouldn't set load_key_unknown to any value in this case because
+    // load_key_unknown and orphaned_load_identifier are under an oneof struct.
     load->mutable_orphaned_load_identifier()->set_load_key(
         per_balancer_store.load_key());
     load->mutable_orphaned_load_identifier()->set_load_balancer_id(
@@ -381,9 +398,7 @@ void LoadReporter::ProcessViewDataCallStart(
     const CensusViewProvider::ViewDataMap& view_data_map) {
   auto it = view_data_map.find(kViewStartCount);
   if (it != view_data_map.end()) {
-    // Note that the data type for any Sum view is double, whatever the data
-    // type of the original measure.
-    for (const auto& p : it->second.double_data()) {
+    for (const auto& p : it->second.int_data()) {
       const std::vector<grpc::string>& tag_values = p.first;
       const uint64_t start_count = static_cast<uint64_t>(p.second);
       const grpc::string& client_ip_and_token = tag_values[0];
@@ -405,9 +420,7 @@ void LoadReporter::ProcessViewDataCallEnd(
   uint64_t total_error_count = 0;
   auto it = view_data_map.find(kViewEndCount);
   if (it != view_data_map.end()) {
-    // Note that the data type for any Sum view is double, whatever the data
-    // type of the original measure.
-    for (const auto& p : it->second.double_data()) {
+    for (const auto& p : it->second.int_data()) {
       const std::vector<grpc::string>& tag_values = p.first;
       const uint64_t end_count = static_cast<uint64_t>(p.second);
       const grpc::string& client_ip_and_token = tag_values[0];
@@ -424,18 +437,16 @@ void LoadReporter::ProcessViewDataCallEnd(
         continue;
       }
       LoadRecordKey key(client_ip_and_token, user_id);
-      const uint64_t bytes_sent =
-          CensusViewProvider::GetRelatedViewDataRowDouble(
-              view_data_map, kViewEndBytesSent, sizeof(kViewEndBytesSent) - 1,
-              tag_values);
+      const uint64_t bytes_sent = CensusViewProvider::GetRelatedViewDataRowInt(
+          view_data_map, kViewEndBytesSent, sizeof(kViewEndBytesSent) - 1,
+          tag_values);
       const uint64_t bytes_received =
-          CensusViewProvider::GetRelatedViewDataRowDouble(
+          CensusViewProvider::GetRelatedViewDataRowInt(
               view_data_map, kViewEndBytesReceived,
               sizeof(kViewEndBytesReceived) - 1, tag_values);
-      const uint64_t latency_ms =
-          CensusViewProvider::GetRelatedViewDataRowDouble(
-              view_data_map, kViewEndLatencyMs, sizeof(kViewEndLatencyMs) - 1,
-              tag_values);
+      const uint64_t latency_ms = CensusViewProvider::GetRelatedViewDataRowInt(
+          view_data_map, kViewEndLatencyMs, sizeof(kViewEndLatencyMs) - 1,
+          tag_values);
       uint64_t ok_count = 0;
       uint64_t error_count = 0;
       total_end_count += end_count;
diff --git a/src/cpp/server/load_reporter/load_reporter.h b/src/cpp/server/load_reporter/load_reporter.h
index 49a2e4b53c..b2254d5601 100644
--- a/src/cpp/server/load_reporter/load_reporter.h
+++ b/src/cpp/server/load_reporter/load_reporter.h
@@ -59,7 +59,10 @@ class CensusViewProvider {
   // with the same tag values in a related view data. Several ViewData's are
   // considered related if their views are based on the measures that are always
   // recorded at the same time.
-  double static GetRelatedViewDataRowDouble(
+  static double GetRelatedViewDataRowDouble(
+      const ViewDataMap& view_data_map, const char* view_name,
+      size_t view_name_len, const std::vector<grpc::string>& tag_values);
+  static uint64_t GetRelatedViewDataRowInt(
       const ViewDataMap& view_data_map, const char* view_name,
       size_t view_name_len, const std::vector<grpc::string>& tag_values);
 
diff --git a/src/cpp/server/load_reporter/load_reporter_async_service_impl.cc b/src/cpp/server/load_reporter/load_reporter_async_service_impl.cc
new file mode 100644
index 0000000000..d001199b8c
--- /dev/null
+++ b/src/cpp/server/load_reporter/load_reporter_async_service_impl.cc
@@ -0,0 +1,370 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/cpp/server/load_reporter/load_reporter_async_service_impl.h"
+
+namespace grpc {
+namespace load_reporter {
+
+void LoadReporterAsyncServiceImpl::CallableTag::Run(bool ok) {
+  GPR_ASSERT(handler_function_ != nullptr);
+  GPR_ASSERT(handler_ != nullptr);
+  handler_function_(std::move(handler_), ok);
+}
+
+LoadReporterAsyncServiceImpl::LoadReporterAsyncServiceImpl(
+    std::unique_ptr<ServerCompletionQueue> cq)
+    : cq_(std::move(cq)) {
+  thread_ = std::unique_ptr<::grpc_core::Thread>(
+      new ::grpc_core::Thread("server_load_reporting", Work, this));
+  std::unique_ptr<CpuStatsProvider> cpu_stats_provider = nullptr;
+#if defined(GPR_LINUX) || defined(GPR_WINDOWS) || defined(GPR_APPLE)
+  cpu_stats_provider.reset(new CpuStatsProviderDefaultImpl());
+#endif
+  load_reporter_ = std::unique_ptr<LoadReporter>(new LoadReporter(
+      kFeedbackSampleWindowSeconds,
+      std::unique_ptr<CensusViewProvider>(new CensusViewProviderDefaultImpl()),
+      std::move(cpu_stats_provider)));
+}
+
+LoadReporterAsyncServiceImpl::~LoadReporterAsyncServiceImpl() {
+  // We will reach here after the server starts shutting down.
+  shutdown_ = true;
+  {
+    std::unique_lock<std::mutex> lock(cq_shutdown_mu_);
+    cq_->Shutdown();
+  }
+  if (next_fetch_and_sample_alarm_ != nullptr)
+    next_fetch_and_sample_alarm_->Cancel();
+  thread_->Join();
+}
+
+void LoadReporterAsyncServiceImpl::ScheduleNextFetchAndSample() {
+  auto next_fetch_and_sample_time =
+      gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
+                   gpr_time_from_millis(kFetchAndSampleIntervalSeconds * 1000,
+                                        GPR_TIMESPAN));
+  {
+    std::unique_lock<std::mutex> lock(cq_shutdown_mu_);
+    if (shutdown_) return;
+    // TODO(juanlishen): Improve the Alarm implementation to reuse a single
+    // instance for multiple events.
+    next_fetch_and_sample_alarm_.reset(new Alarm);
+    next_fetch_and_sample_alarm_->Set(cq_.get(), next_fetch_and_sample_time,
+                                      this);
+  }
+  gpr_log(GPR_DEBUG, "[LRS %p] Next fetch-and-sample scheduled.", this);
+}
+
+void LoadReporterAsyncServiceImpl::FetchAndSample(bool ok) {
+  if (!ok) {
+    gpr_log(GPR_INFO, "[LRS %p] Fetch-and-sample is stopped.", this);
+    return;
+  }
+  gpr_log(GPR_DEBUG, "[LRS %p] Starting a fetch-and-sample...", this);
+  load_reporter_->FetchAndSample();
+  ScheduleNextFetchAndSample();
+}
+
+void LoadReporterAsyncServiceImpl::Work(void* arg) {
+  LoadReporterAsyncServiceImpl* service =
+      reinterpret_cast<LoadReporterAsyncServiceImpl*>(arg);
+  service->FetchAndSample(true /* ok */);
+  // TODO(juanlishen): This is a workaround to wait for the cq to be ready. Need
+  // to figure out why cq is not ready after service starts.
+  gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
+                               gpr_time_from_seconds(1, GPR_TIMESPAN)));
+  ReportLoadHandler::CreateAndStart(service->cq_.get(), service,
+                                    service->load_reporter_.get());
+  void* tag;
+  bool ok;
+  while (true) {
+    if (!service->cq_->Next(&tag, &ok)) {
+      // The completion queue is shutting down.
+      GPR_ASSERT(service->shutdown_);
+      break;
+    }
+    if (tag == service) {
+      service->FetchAndSample(ok);
+    } else {
+      auto* next_step = static_cast<CallableTag*>(tag);
+      next_step->Run(ok);
+    }
+  }
+}
+
+void LoadReporterAsyncServiceImpl::StartThread() { thread_->Start(); }
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::CreateAndStart(
+    ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service,
+    LoadReporter* load_reporter) {
+  std::shared_ptr<ReportLoadHandler> handler =
+      std::make_shared<ReportLoadHandler>(cq, service, load_reporter);
+  ReportLoadHandler* p = handler.get();
+  {
+    std::unique_lock<std::mutex> lock(service->cq_shutdown_mu_);
+    if (service->shutdown_) return;
+    p->on_done_notified_ =
+        CallableTag(std::bind(&ReportLoadHandler::OnDoneNotified, p,
+                              std::placeholders::_1, std::placeholders::_2),
+                    handler);
+    p->next_inbound_ =
+        CallableTag(std::bind(&ReportLoadHandler::OnRequestDelivered, p,
+                              std::placeholders::_1, std::placeholders::_2),
+                    std::move(handler));
+    p->ctx_.AsyncNotifyWhenDone(&p->on_done_notified_);
+    service->RequestReportLoad(&p->ctx_, &p->stream_, cq, cq,
+                               &p->next_inbound_);
+  }
+}
+
+LoadReporterAsyncServiceImpl::ReportLoadHandler::ReportLoadHandler(
+    ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service,
+    LoadReporter* load_reporter)
+    : cq_(cq),
+      service_(service),
+      load_reporter_(load_reporter),
+      stream_(&ctx_),
+      call_status_(WAITING_FOR_DELIVERY) {}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnRequestDelivered(
+    std::shared_ptr<ReportLoadHandler> self, bool ok) {
+  if (ok) {
+    call_status_ = DELIVERED;
+  } else {
+    // AsyncNotifyWhenDone() needs to be called before the call starts, but the
+    // tag will not pop out if the call never starts (
+    // https://github.com/grpc/grpc/issues/10136). So we need to manually
+    // release the ownership of the handler in this case.
+    GPR_ASSERT(on_done_notified_.ReleaseHandler() != nullptr);
+  }
+  if (!ok || shutdown_) {
+    // The value of ok being false means that the server is shutting down.
+    Shutdown(std::move(self), "OnRequestDelivered");
+    return;
+  }
+  // Spawn a new handler instance to serve the next new client. Every handler
+  // instance will deallocate itself when it's done.
+  CreateAndStart(cq_, service_, load_reporter_);
+  {
+    std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
+    if (service_->shutdown_) {
+      lock.release()->unlock();
+      Shutdown(std::move(self), "OnRequestDelivered");
+      return;
+    }
+    next_inbound_ =
+        CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this,
+                              std::placeholders::_1, std::placeholders::_2),
+                    std::move(self));
+    stream_.Read(&request_, &next_inbound_);
+  }
+  // LB ID is unique for each load reporting stream.
+  lb_id_ = load_reporter_->GenerateLbId();
+  gpr_log(GPR_INFO,
+          "[LRS %p] Call request delivered (lb_id_: %s, handler: %p). "
+          "Start reading the initial request...",
+          service_, lb_id_.c_str(), this);
+}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnReadDone(
+    std::shared_ptr<ReportLoadHandler> self, bool ok) {
+  if (!ok || shutdown_) {
+    if (!ok && call_status_ < INITIAL_REQUEST_RECEIVED) {
+      // The client may have half-closed the stream or the stream is broken.
+      gpr_log(GPR_INFO,
+              "[LRS %p] Failed reading the initial request from the stream "
+              "(lb_id_: %s, handler: %p, done_notified: %d, is_cancelled: %d).",
+              service_, lb_id_.c_str(), this, static_cast<int>(done_notified_),
+              static_cast<int>(is_cancelled_));
+    }
+    Shutdown(std::move(self), "OnReadDone");
+    return;
+  }
+  // We only receive one request, which is the initial request.
+  if (call_status_ < INITIAL_REQUEST_RECEIVED) {
+    if (!request_.has_initial_request()) {
+      Shutdown(std::move(self), "OnReadDone+initial_request_not_found");
+    } else {
+      call_status_ = INITIAL_REQUEST_RECEIVED;
+      const auto& initial_request = request_.initial_request();
+      load_balanced_hostname_ = initial_request.load_balanced_hostname();
+      load_key_ = initial_request.load_key();
+      load_reporter_->ReportStreamCreated(load_balanced_hostname_, lb_id_,
+                                          load_key_);
+      const auto& load_report_interval = initial_request.load_report_interval();
+      load_report_interval_ms_ =
+          static_cast<uint64_t>(load_report_interval.seconds() * 1000 +
+                                load_report_interval.nanos() / 1000);
+      gpr_log(
+          GPR_INFO,
+          "[LRS %p] Initial request received. Start load reporting (load "
+          "balanced host: %s, interval: %lu ms, lb_id_: %s, handler: %p)...",
+          service_, load_balanced_hostname_.c_str(), load_report_interval_ms_,
+          lb_id_.c_str(), this);
+      SendReport(self, true /* ok */);
+      // Expect this read to fail.
+      {
+        std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
+        if (service_->shutdown_) {
+          lock.release()->unlock();
+          Shutdown(std::move(self), "OnReadDone");
+          return;
+        }
+        next_inbound_ =
+            CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this,
+                                  std::placeholders::_1, std::placeholders::_2),
+                        std::move(self));
+        stream_.Read(&request_, &next_inbound_);
+      }
+    }
+  } else {
+    // Another request received! This violates the spec.
+    gpr_log(GPR_ERROR,
+            "[LRS %p] Another request received (lb_id_: %s, handler: %p).",
+            service_, lb_id_.c_str(), this);
+    Shutdown(std::move(self), "OnReadDone+second_request");
+  }
+}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::ScheduleNextReport(
+    std::shared_ptr<ReportLoadHandler> self, bool ok) {
+  if (!ok || shutdown_) {
+    Shutdown(std::move(self), "ScheduleNextReport");
+    return;
+  }
+  auto next_report_time = gpr_time_add(
+      gpr_now(GPR_CLOCK_MONOTONIC),
+      gpr_time_from_millis(load_report_interval_ms_, GPR_TIMESPAN));
+  {
+    std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
+    if (service_->shutdown_) {
+      lock.release()->unlock();
+      Shutdown(std::move(self), "ScheduleNextReport");
+      return;
+    }
+    next_outbound_ =
+        CallableTag(std::bind(&ReportLoadHandler::SendReport, this,
+                              std::placeholders::_1, std::placeholders::_2),
+                    std::move(self));
+    // TODO(juanlishen): Improve the Alarm implementation to reuse a single
+    // instance for multiple events.
+    next_report_alarm_.reset(new Alarm);
+    next_report_alarm_->Set(cq_, next_report_time, &next_outbound_);
+  }
+  gpr_log(GPR_DEBUG,
+          "[LRS %p] Next load report scheduled (lb_id_: %s, handler: %p).",
+          service_, lb_id_.c_str(), this);
+}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::SendReport(
+    std::shared_ptr<ReportLoadHandler> self, bool ok) {
+  if (!ok || shutdown_) {
+    Shutdown(std::move(self), "SendReport");
+    return;
+  }
+  ::grpc::lb::v1::LoadReportResponse response;
+  auto loads = load_reporter_->GenerateLoads(load_balanced_hostname_, lb_id_);
+  response.mutable_load()->Swap(&loads);
+  auto feedback = load_reporter_->GenerateLoadBalancingFeedback();
+  response.mutable_load_balancing_feedback()->Swap(&feedback);
+  if (call_status_ < INITIAL_RESPONSE_SENT) {
+    auto initial_response = response.mutable_initial_response();
+    initial_response->set_load_balancer_id(lb_id_);
+    initial_response->set_implementation_id(
+        ::grpc::lb::v1::InitialLoadReportResponse::CPP);
+    initial_response->set_server_version(kVersion);
+    call_status_ = INITIAL_RESPONSE_SENT;
+  }
+  {
+    std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
+    if (service_->shutdown_) {
+      lock.release()->unlock();
+      Shutdown(std::move(self), "SendReport");
+      return;
+    }
+    next_outbound_ =
+        CallableTag(std::bind(&ReportLoadHandler::ScheduleNextReport, this,
+                              std::placeholders::_1, std::placeholders::_2),
+                    std::move(self));
+    stream_.Write(response, &next_outbound_);
+    gpr_log(GPR_INFO,
+            "[LRS %p] Sending load report (lb_id_: %s, handler: %p, loads "
+            "count: %d)...",
+            service_, lb_id_.c_str(), this, response.load().size());
+  }
+}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnDoneNotified(
+    std::shared_ptr<ReportLoadHandler> self, bool ok) {
+  GPR_ASSERT(ok);
+  done_notified_ = true;
+  if (ctx_.IsCancelled()) {
+    is_cancelled_ = true;
+  }
+  gpr_log(GPR_INFO,
+          "[LRS %p] Load reporting call is notified done (handler: %p, "
+          "is_cancelled: %d).",
+          service_, this, static_cast<int>(is_cancelled_));
+  Shutdown(std::move(self), "OnDoneNotified");
+}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::Shutdown(
+    std::shared_ptr<ReportLoadHandler> self, const char* reason) {
+  if (!shutdown_) {
+    gpr_log(GPR_INFO,
+            "[LRS %p] Shutting down the handler (lb_id_: %s, handler: %p, "
+            "reason: %s).",
+            service_, lb_id_.c_str(), this, reason);
+    shutdown_ = true;
+    if (call_status_ >= INITIAL_REQUEST_RECEIVED) {
+      load_reporter_->ReportStreamClosed(load_balanced_hostname_, lb_id_);
+      next_report_alarm_->Cancel();
+    }
+  }
+  // OnRequestDelivered() may be called after OnDoneNotified(), so we need to
+  // try to Finish() every time we are in Shutdown().
+  if (call_status_ >= DELIVERED && call_status_ < FINISH_CALLED) {
+    std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
+    if (!service_->shutdown_) {
+      on_finish_done_ =
+          CallableTag(std::bind(&ReportLoadHandler::OnFinishDone, this,
+                                std::placeholders::_1, std::placeholders::_2),
+                      std::move(self));
+      // TODO(juanlishen): Maybe add a message proto for the client to
+      // explicitly cancel the stream so that we can return OK status in such
+      // cases.
+      stream_.Finish(Status::CANCELLED, &on_finish_done_);
+      call_status_ = FINISH_CALLED;
+    }
+  }
+}
+
+void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnFinishDone(
+    std::shared_ptr<ReportLoadHandler> self, bool ok) {
+  if (ok) {
+    gpr_log(GPR_INFO,
+            "[LRS %p] Load reporting finished (lb_id_: %s, handler: %p).",
+            service_, lb_id_.c_str(), this);
+  }
+}
+
+}  // namespace load_reporter
+}  // namespace grpc
diff --git a/src/cpp/server/load_reporter/load_reporter_async_service_impl.h b/src/cpp/server/load_reporter/load_reporter_async_service_impl.h
new file mode 100644
index 0000000000..6fc577ff49
--- /dev/null
+++ b/src/cpp/server/load_reporter/load_reporter_async_service_impl.h
@@ -0,0 +1,194 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_ASYNC_SERVICE_IMPL_H
+#define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_ASYNC_SERVICE_IMPL_H
+
+#include <grpc/support/port_platform.h>
+
+#include <grpc/support/log.h>
+#include <grpcpp/alarm.h>
+#include <grpcpp/grpcpp.h>
+
+#include "src/core/lib/gprpp/thd.h"
+#include "src/cpp/server/load_reporter/load_reporter.h"
+
+namespace grpc {
+namespace load_reporter {
+
+// Async load reporting service. It's mainly responsible for controlling the
+// procedure of incoming requests. The real business logic is handed off to the
+// LoadReporter. There should be at most one instance of this service on a
+// server to avoid spreading the load data into multiple places.
+class LoadReporterAsyncServiceImpl
+    : public grpc::lb::v1::LoadReporter::AsyncService {
+ public:
+  explicit LoadReporterAsyncServiceImpl(
+      std::unique_ptr<ServerCompletionQueue> cq);
+  ~LoadReporterAsyncServiceImpl();
+
+  // Starts the working thread.
+  void StartThread();
+
+  // Not copyable nor movable.
+  LoadReporterAsyncServiceImpl(const LoadReporterAsyncServiceImpl&) = delete;
+  LoadReporterAsyncServiceImpl& operator=(const LoadReporterAsyncServiceImpl&) =
+      delete;
+
+ private:
+  class ReportLoadHandler;
+
+  // A tag that can be called with a bool argument. It's tailored for
+  // ReportLoadHandler's use. Before being used, it should be constructed with a
+  // method of ReportLoadHandler and a shared pointer to the handler. The
+  // shared pointer will be moved to the invoked function and the function can
+  // only be invoked once. That makes ref counting of the handler easier,
+  // because the shared pointer is not bound to the function and can be gone
+  // once the invoked function returns (if not used any more).
+  class CallableTag {
+   public:
+    using HandlerFunction =
+        std::function<void(std::shared_ptr<ReportLoadHandler>, bool)>;
+
+    CallableTag() {}
+
+    CallableTag(HandlerFunction func,
+                std::shared_ptr<ReportLoadHandler> handler)
+        : handler_function_(std::move(func)), handler_(std::move(handler)) {
+      GPR_ASSERT(handler_function_ != nullptr);
+      GPR_ASSERT(handler_ != nullptr);
+    }
+
+    // Runs the tag. This should be called only once. The handler is no longer
+    // owned by this tag after this method is invoked.
+    void Run(bool ok);
+
+    // Releases and returns the shared pointer to the handler.
+    std::shared_ptr<ReportLoadHandler> ReleaseHandler() {
+      return std::move(handler_);
+    }
+
+   private:
+    HandlerFunction handler_function_ = nullptr;
+    std::shared_ptr<ReportLoadHandler> handler_;
+  };
+
+  // Each handler takes care of one load reporting stream. It contains
+  // per-stream data and it will access the members of the parent class (i.e.,
+  // LoadReporterAsyncServiceImpl) for service-wide data (e.g., the load data).
+  class ReportLoadHandler {
+   public:
+    // Instantiates a ReportLoadHandler and requests the next load reporting
+    // call. The handler object will manage its own lifetime, so no action is
+    // needed from the caller any more regarding that object.
+    static void CreateAndStart(ServerCompletionQueue* cq,
+                               LoadReporterAsyncServiceImpl* service,
+                               LoadReporter* load_reporter);
+
+    // This ctor is public because we want to use std::make_shared<> in
+    // CreateAndStart(). This ctor shouldn't be used elsewhere.
+    ReportLoadHandler(ServerCompletionQueue* cq,
+                      LoadReporterAsyncServiceImpl* service,
+                      LoadReporter* load_reporter);
+
+   private:
+    // After the handler has a call request delivered, it starts reading the
+    // initial request. Also, a new handler is spawned so that we can keep
+    // servicing future calls.
+    void OnRequestDelivered(std::shared_ptr<ReportLoadHandler> self, bool ok);
+
+    // The first Read() is expected to succeed, after which the handler starts
+    // sending load reports back to the balancer. The second Read() is
+    // expected to fail, which happens when the balancer half-closes the
+    // stream to signal that it's no longer interested in the load reports. For
+    // the latter case, the handler will then close the stream.
+    void OnReadDone(std::shared_ptr<ReportLoadHandler> self, bool ok);
+
+    // The report sending operations are sequential as: send report -> send
+    // done, schedule the next send -> waiting for the alarm to fire -> alarm
+    // fires, send report -> ...
+    void SendReport(std::shared_ptr<ReportLoadHandler> self, bool ok);
+    void ScheduleNextReport(std::shared_ptr<ReportLoadHandler> self, bool ok);
+
+    // Called when Finish() is done.
+    void OnFinishDone(std::shared_ptr<ReportLoadHandler> self, bool ok);
+
+    // Called when AsyncNotifyWhenDone() notifies us.
+    void OnDoneNotified(std::shared_ptr<ReportLoadHandler> self, bool ok);
+
+    void Shutdown(std::shared_ptr<ReportLoadHandler> self, const char* reason);
+
+    // The key fields of the stream.
+    grpc::string lb_id_;
+    grpc::string load_balanced_hostname_;
+    grpc::string load_key_;
+    uint64_t load_report_interval_ms_;
+
+    // The data for RPC communication with the load reportee.
+    ServerContext ctx_;
+    ::grpc::lb::v1::LoadReportRequest request_;
+
+    // The members passed down from LoadReporterAsyncServiceImpl.
+    ServerCompletionQueue* cq_;
+    LoadReporterAsyncServiceImpl* service_;
+    LoadReporter* load_reporter_;
+    ServerAsyncReaderWriter<::grpc::lb::v1::LoadReportResponse,
+                            ::grpc::lb::v1::LoadReportRequest>
+        stream_;
+
+    // The status of the RPC progress.
+    enum CallStatus {
+      WAITING_FOR_DELIVERY,
+      DELIVERED,
+      INITIAL_REQUEST_RECEIVED,
+      INITIAL_RESPONSE_SENT,
+      FINISH_CALLED
+    } call_status_;
+    bool shutdown_{false};
+    bool done_notified_{false};
+    bool is_cancelled_{false};
+    CallableTag on_done_notified_;
+    CallableTag on_finish_done_;
+    CallableTag next_inbound_;
+    CallableTag next_outbound_;
+    std::unique_ptr<Alarm> next_report_alarm_;
+  };
+
+  // Handles the incoming requests and drives the completion queue in a loop.
+  static void Work(void* arg);
+
+  // Schedules the next data fetching from Census and LB feedback sampling.
+  void ScheduleNextFetchAndSample();
+
+  // Fetches data from Census and samples LB feedback.
+  void FetchAndSample(bool ok);
+
+  std::unique_ptr<ServerCompletionQueue> cq_;
+  // To synchronize the operations related to shutdown state of cq_, so that we
+  // don't enqueue new tags into cq_ after it is already shut down.
+  std::mutex cq_shutdown_mu_;
+  std::atomic_bool shutdown_{false};
+  std::unique_ptr<::grpc_core::Thread> thread_;
+  std::unique_ptr<LoadReporter> load_reporter_;
+  std::unique_ptr<Alarm> next_fetch_and_sample_alarm_;
+};
+
+}  // namespace load_reporter
+}  // namespace grpc
+
+#endif  // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_ASYNC_SERVICE_IMPL_H
diff --git a/src/cpp/server/load_reporter/load_reporting_service_server_builder_option.cc b/src/cpp/server/load_reporter/load_reporting_service_server_builder_option.cc
new file mode 100644
index 0000000000..81cf6ac562
--- /dev/null
+++ b/src/cpp/server/load_reporter/load_reporting_service_server_builder_option.cc
@@ -0,0 +1,41 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include <grpcpp/ext/server_load_reporting.h>
+
+#include "src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h"
+
+namespace grpc {
+namespace load_reporter {
+namespace experimental {
+
+void LoadReportingServiceServerBuilderOption::UpdateArguments(
+    ::grpc::ChannelArguments* args) {
+  args->SetInt(GRPC_ARG_ENABLE_LOAD_REPORTING, true);
+}
+
+void LoadReportingServiceServerBuilderOption::UpdatePlugins(
+    std::vector<std::unique_ptr<::grpc::ServerBuilderPlugin>>* plugins) {
+  plugins->emplace_back(new LoadReportingServiceServerBuilderPlugin());
+}
+
+}  // namespace experimental
+}  // namespace load_reporter
+}  // namespace grpc
diff --git a/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.cc b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.cc
new file mode 100644
index 0000000000..c2c5dd5ed5
--- /dev/null
+++ b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.cc
@@ -0,0 +1,60 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h"
+
+#include <grpcpp/impl/server_initializer.h>
+
+namespace grpc {
+namespace load_reporter {
+
+bool LoadReportingServiceServerBuilderPlugin::has_sync_methods() const {
+  if (service_ != nullptr) {
+    return service_->has_synchronous_methods();
+  }
+  return false;
+}
+
+bool LoadReportingServiceServerBuilderPlugin::has_async_methods() const {
+  if (service_ != nullptr) {
+    return service_->has_async_methods();
+  }
+  return false;
+}
+
+void LoadReportingServiceServerBuilderPlugin::UpdateServerBuilder(
+    grpc::ServerBuilder* builder) {
+  auto cq = builder->AddCompletionQueue();
+  service_ = std::make_shared<LoadReporterAsyncServiceImpl>(std::move(cq));
+}
+
+void LoadReportingServiceServerBuilderPlugin::InitServer(
+    grpc::ServerInitializer* si) {
+  si->RegisterService(service_);
+}
+
+void LoadReportingServiceServerBuilderPlugin::Finish(
+    grpc::ServerInitializer* si) {
+  service_->StartThread();
+  service_.reset();
+}
+
+}  // namespace load_reporter
+}  // namespace grpc
diff --git a/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h
new file mode 100644
index 0000000000..1f098591d4
--- /dev/null
+++ b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_SRC_CPP_LOAD_REPORTING_SERVICE_SERVER_BUILDER_PLUGIN_H
+#define GRPC_SRC_CPP_LOAD_REPORTING_SERVICE_SERVER_BUILDER_PLUGIN_H
+
+#include <grpc/support/port_platform.h>
+
+#include <grpcpp/impl/server_builder_plugin.h>
+
+#include "src/cpp/server/load_reporter/load_reporter_async_service_impl.h"
+
+namespace grpc {
+namespace load_reporter {
+
+// The plugin that registers and starts load reporting service when starting a
+// server.
+class LoadReportingServiceServerBuilderPlugin : public ServerBuilderPlugin {
+ public:
+  ~LoadReportingServiceServerBuilderPlugin() override = default;
+  grpc::string name() override { return "load_reporting_service"; }
+
+  // Creates a load reporting service.
+  void UpdateServerBuilder(grpc::ServerBuilder* builder) override;
+
+  // Registers the load reporter service.
+  void InitServer(grpc::ServerInitializer* si) override;
+
+  // Starts the load reporter service.
+  void Finish(grpc::ServerInitializer* si) override;
+
+  void ChangeArguments(const grpc::string& name, void* value) override {}
+  void UpdateChannelArguments(grpc::ChannelArguments* args) override {}
+  bool has_sync_methods() const override;
+  bool has_async_methods() const override;
+
+ private:
+  std::shared_ptr<LoadReporterAsyncServiceImpl> service_;
+};
+
+std::unique_ptr<grpc::ServerBuilderPlugin>
+CreateLoadReportingServiceServerBuilderPlugin();
+
+}  // namespace load_reporter
+}  // namespace grpc
+
+#endif  // GRPC_SRC_CPP_LOAD_REPORTING_SERVICE_SERVER_BUILDER_PLUGIN_H
diff --git a/src/cpp/server/load_reporter/util.cc b/src/cpp/server/load_reporter/util.cc
new file mode 100644
index 0000000000..a2f2f11e70
--- /dev/null
+++ b/src/cpp/server/load_reporter/util.cc
@@ -0,0 +1,45 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/impl/codegen/port_platform.h>
+
+#include <grpcpp/ext/server_load_reporting.h>
+
+#include <grpc/support/log.h>
+
+namespace grpc {
+namespace load_reporter {
+namespace experimental {
+
+void AddLoadReportingCost(grpc::ServerContext* ctx,
+                          const grpc::string& cost_name, double cost_value) {
+  if (std::isnormal(cost_value)) {
+    grpc::string buf;
+    buf.resize(sizeof(cost_value) + cost_name.size());
+    memcpy(&(*buf.begin()), &cost_value, sizeof(cost_value));
+    memcpy(&(*buf.begin()) + sizeof(cost_value), cost_name.data(),
+           cost_name.size());
+    ctx->AddTrailingMetadata(GRPC_LB_COST_MD_KEY, buf);
+  } else {
+    gpr_log(GPR_ERROR, "Call metric value is not normal.");
+  }
+}
+
+}  // namespace experimental
+}  // namespace load_reporter
+}  // namespace grpc
diff --git a/src/proto/grpc/lb/v1/load_reporter.proto b/src/proto/grpc/lb/v1/load_reporter.proto
index c2e4f23f6e..d57a37fed7 100644
--- a/src/proto/grpc/lb/v1/load_reporter.proto
+++ b/src/proto/grpc/lb/v1/load_reporter.proto
@@ -114,6 +114,10 @@ message Load {
   // num_calls_in_progress is the only valid entry. If in_progress_report is not
   // set, num_calls_in_progress will be ignored. If in_progress_report is set,
   // fields other than num_calls_in_progress and orphaned_load will be ignored.
+  // TODO(juanlishen): A Load is either an in_progress_report or not. We should
+  // make this explicit in hierarchy. From the log, I see in_progress_report_
+  // has a random num_calls_in_progress_ when not set, which might lead to bug
+  // when the balancer process the load report.
   oneof in_progress_report {
     // The number of calls in progress (instantaneously) per load balancer id.
     int64 num_calls_in_progress = 5;