diff options
Diffstat (limited to 'tensorflow/core/framework/bfloat16.cc')
-rw-r--r-- | tensorflow/core/framework/bfloat16.cc | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc index 1a6f355c77..a5ac0e1a8d 100644 --- a/tensorflow/core/framework/bfloat16.cc +++ b/tensorflow/core/framework/bfloat16.cc @@ -18,24 +18,32 @@ limitations under the License. namespace tensorflow { void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) { - for (int64 i = 0; i < size; ++i) { - dst[i] = bfloat16(src[i]); - } + const uint16_t* p = reinterpret_cast<const uint16_t*>(src); + uint16_t* q = reinterpret_cast<uint16_t*>(dst); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + for (; size != 0; p += 2, q++, size--) { + *q = p[0]; + } +#else + for (; size != 0; p += 2, q++, size--) { + *q = p[1]; + } +#endif } void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) { const uint16_t* p = reinterpret_cast<const uint16_t*>(src); uint16_t* q = reinterpret_cast<uint16_t*>(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p++, q += 2, size--) { - q[0] = *p; - q[1] = 0; + for (; size != 0; p++, q += 2, size--) { + q[0] = *p; + q[1] = 0; } -#else - for (; size != 0; p++, q += 2, size--) { - q[0] = 0; - q[1] = *p; - } +#else + for (; size != 0; p++, q += 2, size--) { + q[0] = 0; + q[1] = *p; + } #endif } |