Fused multiply-add for IA32.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2481 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
author: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2014-05-05 11:39:57 +0000
committer: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2014-05-05 11:39:57 +0000
commit: af2b5e2efd6bf7d682e74a3fde5d54e960fa34af (patch)
tree: 50a50efc714ac699660edf905b28f89d70f80e4e
parent: f126a1c0f2bc6434b6478c863ad910bf996ffbe1 (diff)
4 files changed, 45 insertions, 4 deletions
diff --git a/Changelog b/Changelog
index edf327e..f6329ae 100644
--- a/Changelog
+++ b/Changelog
@@ -1,9 +1,5 @@
 Language features:
 - Support for C99 designated initializers. (ISO C99 section 6.7.8.)
-- Traditional, pre-Standard function definitions are no longer supported, e.g.
-      int f(i) int i; { return i + 1; }  // no longer supported
-  Use Standard form instead:
-      int f(int i) { return i + 1; }
 
 Improvements in confidence:
 - The parser is now formally verified against the ISO C99 grammar plus
@@ -24,6 +20,8 @@ Optimizations:
 Usability:
 - Option "-timings" to print compilation times for various passes.
 - Various tweaks in IRC graph coloring to reduce compilation time.
+- IA32: add built-in functions for fused multiply-add
+  (require a recent processor with FMA3 extensions).
 
 Improvements in ABI conformance:
 - New target platform: ARM with EABI "hard float" calling conventions
diff --git a/ia32/CBuiltins.ml b/ia32/CBuiltins.ml
index 6dbabf1..6046126 100644
--- a/ia32/CBuiltins.ml
+++ b/ia32/CBuiltins.ml
@@ -36,6 +36,22 @@ let builtins = {
       (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
     "__builtin_fmin",
       (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+    "__builtin_fmadd",
+      (TFloat(FDouble, []), 
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fmsub",
+      (TFloat(FDouble, []), 
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fnmadd",
+      (TFloat(FDouble, []), 
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fnmsub",
+      (TFloat(FDouble, []), 
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
     (* Memory accesses *)
     "__builtin_read16_reversed",
       (TInt(IUShort, []), [TPtr(TInt(IUShort, [AConst]), [])], false);
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 9a2648a..33e19f7 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -475,6 +475,25 @@ let print_builtin_inline oc name args res =
         fprintf oc "	movapd	%a, %a\n" freg a1 freg res;
         fprintf oc "	minsd	%a, %a\n" freg a2 freg res
       end
+  | ("__builtin_fmadd"|"__builtin_fmsub"|"__builtin_fnmadd"|"__builtin_fnmsub"),
+    [FR a1; FR a2; FR a3], [FR res] ->
+      let opcode =
+        match name with
+        | "__builtin_fmadd" -> "vfmadd"
+        | "__builtin_fmsub" -> "vfmsub"
+        | "__builtin_fnmadd" -> "vfnmadd"
+        | "__builtin_fnmsub" -> "vfnmsub"
+        | _ -> assert false in
+      if res = a1 then
+        fprintf oc "	%s132sd	%a, %a, %a\n" opcode freg a2 freg a3 freg res
+      else if res = a2 then
+        fprintf oc "	%s213sd	%a, %a, %a\n" opcode freg a3 freg a1 freg res
+      else if res = a3 then
+        fprintf oc "	%s231sd	%a, %a, %a\n" opcode freg a1 freg a2 freg res
+      else begin
+        fprintf oc "	movapd	%a, %a\n" freg a3 freg res;
+        fprintf oc "	%s231sd	%a, %a, %a\n" opcode freg a1 freg a2 freg res
+      end
   (* 64-bit integer arithmetic *)
   | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
       assert (ah = EDX && al = EAX && rh = EDX && rl = EAX);
diff --git a/test/regression/builtins-ia32.c b/test/regression/builtins-ia32.c
index 4ce5488..9145518 100644
--- a/test/regression/builtins-ia32.c
+++ b/test/regression/builtins-ia32.c
@@ -8,6 +8,7 @@ int main(int argc, char ** argv)
   unsigned int y = 0xDEADBEEF;
   double a = 3.14159;
   double b = 2.718;
+  double c = 1.414;
   unsigned short s = 0x1234;
 
   printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
@@ -17,6 +18,13 @@ int main(int argc, char ** argv)
   printf("fmin(%f, %f) = %f\n", a, b, __builtin_fmin(a, b));
   printf("fmax(%f, %f) = %f\n", a, b, __builtin_fmax(a, b));
 
+#ifdef FMA3
+  printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c));
+  printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c));
+  printf("fnmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmadd(a, b, c));
+  printf("fnmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmsub(a, b, c));
+#endif
+
   printf ("read_16_rev = %x\n", __builtin_read16_reversed(&s));
   printf ("read_32_rev = %x\n", __builtin_read32_reversed(&y));
   __builtin_write16_reversed(&s, 0x789A);
author	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2014-05-05 11:39:57 +0000
committer	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2014-05-05 11:39:57 +0000
commit	af2b5e2efd6bf7d682e74a3fde5d54e960fa34af (patch)
tree	50a50efc714ac699660edf905b28f89d70f80e4e
parent	f126a1c0f2bc6434b6478c863ad910bf996ffbe1 (diff)