1 files changed, 19 insertions, 11 deletions
diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc
index 1a6f355c77..a5ac0e1a8d 100644
--- a/tensorflow/core/framework/bfloat16.cc
+++ b/tensorflow/core/framework/bfloat16.cc
@@ -18,24 +18,32 @@ limitations under the License.
 namespace tensorflow {
 
 void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) {
-  for (int64 i = 0; i < size; ++i) {
-    dst[i] = bfloat16(src[i]);
-  }
+  const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
+  uint16_t* q = reinterpret_cast<uint16_t*>(dst);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    for (; size != 0; p += 2, q++, size--) {  
+      *q = p[0];  
+    }  
+#else
+    for (; size != 0; p += 2, q++, size--) {  
+     *q = p[1];  
+    }  
+#endif
 }
 
 void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) {
   const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
   uint16_t* q = reinterpret_cast<uint16_t*>(dst);
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  for (; size != 0; p++, q += 2, size--) {
-    q[0] = *p;
-    q[1] = 0;
+    for (; size != 0; p++, q += 2, size--) {  
+      q[0] = *p;  
+      q[1] = 0;  
     }
-#else
-  for (; size != 0; p++, q += 2, size--) {
-    q[0] = 0;
-    q[1] = *p;
-  }
+#else  
+    for (; size != 0; p++, q += 2, size--) {  
+      q[0] = 0;  
+      q[1] = *p;  
+    } 
 #endif
 }