diff options
author | 2017-11-22 00:33:29 -0800 | |
---|---|---|
committer | 2017-11-22 00:36:36 -0800 | |
commit | ad7eeec1cc06d7fdba6ee404f03a35fab9cd3e6a (patch) | |
tree | 455989721eda10ba8e4313a48ad0e44aa1f124db /tensorflow/contrib/verbs | |
parent | 0c98a7ecf88da45469cf00edc5cc4c0c82c7d49f (diff) |
Automated g4 rollback of changelist 176615737
PiperOrigin-RevId: 176621645
Diffstat (limited to 'tensorflow/contrib/verbs')
-rw-r--r-- | tensorflow/contrib/verbs/rdma.cc | 61 |
1 files changed, 31 insertions, 30 deletions
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index ac8d994502..331943a3ef 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -16,8 +16,8 @@ limitations under the License. #ifdef TENSORFLOW_USE_VERBS #include "tensorflow/contrib/verbs/rdma.h" -#include <fcntl.h> #include <cstdlib> +#include <fcntl.h> #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -137,7 +137,7 @@ ibv_device* set_device() { if (!env_p_rdma_device.empty()) { for (device_index = 0; device_index < dev_num; device_index++) { if (!env_p_rdma_device.compare( - ibv_get_device_name(dev_list[device_index]))) { + ibv_get_device_name(dev_list[device_index]))) { CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) << "Device " << ibv_get_device_name(dev_list[device_index]) << " has no active ports"; @@ -147,7 +147,7 @@ ibv_device* set_device() { // check validity of input device CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; } else { - // set default device + // set default device str_port_num = get_env_var("RDMA_DEVICE_PORT"); CHECK(str_port_num.empty()) << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; @@ -177,7 +177,7 @@ ibv_device* set_device() { // Returns: // port to use uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; // 0 is illegal port number + uint8_t port_num = 0; //0 is illegal port number string str_port_num; ibv_device_attr device_att; ibv_port_attr port_attr; @@ -199,7 +199,9 @@ uint8_t set_port(ibv_context* context) { // check if port id active CHECK(port_attr.state == IBV_PORT_ACTIVE) << "Selected RDMA_DEVICE_PORT is not active"; - } else { // set default port + } + // set default port + else { for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { rc = ibv_query_port(context, port_index, &port_attr); CHECK(!rc) << "Failed to query the port" << port_index; @@ -267,7 +269,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, // Function to set GID index. // If the port link is IB, no GID index should be selected. // If Ethernet but RDMA_GID_INDEX not set gid index that supports -// RoCE V2 will be chosen(fails if more than one IP is configured) +// RoCE V2 will be chosen(fails if more then one IP is configured) // Args: // context - device context // port_num - port number @@ -300,7 +302,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { } } switch (port_attr.link_layer) { - case (IBV_LINK_LAYER_ETHERNET): + case(IBV_LINK_LAYER_ETHERNET) : gid_str = get_env_var("RDMA_GID_INDEX"); if (!gid_str.empty()) { gid_index = stoi(gid_str); @@ -311,7 +313,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { << "More than one IP is available, please specify GID_INDEX"; } break; - case (IBV_LINK_LAYER_INFINIBAND): // no need in GID index + case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index break; default: LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " @@ -372,8 +374,7 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { break; default: CHECK(0) << "Error: MTU input value must be one of the following: 256, " - "512, 1024, 2048, 4096. MTU " - << mtu << " is invalid\n"; + "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; break; } CHECK(mtu < port_attr.active_mtu) @@ -452,9 +453,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" - << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " - << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err; + << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " + << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " " + << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id); // put back a recv wr. @@ -610,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -671,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -825,11 +826,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); @@ -840,10 +841,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -930,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -1025,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) << "send dev name: " << src_dev->name() + << " gpu_info: " + << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -1053,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; |