aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/framework/bfloat16.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/framework/bfloat16.cc')
-rw-r--r--tensorflow/core/framework/bfloat16.cc30
1 files changed, 19 insertions, 11 deletions
diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc
index 1a6f355c77..a5ac0e1a8d 100644
--- a/tensorflow/core/framework/bfloat16.cc
+++ b/tensorflow/core/framework/bfloat16.cc
@@ -18,24 +18,32 @@ limitations under the License.
namespace tensorflow {
void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) {
- for (int64 i = 0; i < size; ++i) {
- dst[i] = bfloat16(src[i]);
- }
+ const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
+ uint16_t* q = reinterpret_cast<uint16_t*>(dst);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (; size != 0; p += 2, q++, size--) {
+ *q = p[0];
+ }
+#else
+ for (; size != 0; p += 2, q++, size--) {
+ *q = p[1];
+ }
+#endif
}
void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) {
const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
uint16_t* q = reinterpret_cast<uint16_t*>(dst);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- for (; size != 0; p++, q += 2, size--) {
- q[0] = *p;
- q[1] = 0;
+ for (; size != 0; p++, q += 2, size--) {
+ q[0] = *p;
+ q[1] = 0;
}
-#else
- for (; size != 0; p++, q += 2, size--) {
- q[0] = 0;
- q[1] = *p;
- }
+#else
+ for (; size != 0; p++, q += 2, size--) {
+ q[0] = 0;
+ q[1] = *p;
+ }
#endif
}