summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-05-05 11:39:57 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-05-05 11:39:57 +0000
commitaf2b5e2efd6bf7d682e74a3fde5d54e960fa34af (patch)
tree50a50efc714ac699660edf905b28f89d70f80e4e
parentf126a1c0f2bc6434b6478c863ad910bf996ffbe1 (diff)
Fused multiply-add for IA32.
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2481 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
-rw-r--r--Changelog6
-rw-r--r--ia32/CBuiltins.ml16
-rw-r--r--ia32/PrintAsm.ml19
-rw-r--r--test/regression/builtins-ia32.c8
4 files changed, 45 insertions, 4 deletions
diff --git a/Changelog b/Changelog
index edf327e..f6329ae 100644
--- a/Changelog
+++ b/Changelog
@@ -1,9 +1,5 @@
Language features:
- Support for C99 designated initializers. (ISO C99 section 6.7.8.)
-- Traditional, pre-Standard function definitions are no longer supported, e.g.
- int f(i) int i; { return i + 1; } // no longer supported
- Use Standard form instead:
- int f(int i) { return i + 1; }
Improvements in confidence:
- The parser is now formally verified against the ISO C99 grammar plus
@@ -24,6 +20,8 @@ Optimizations:
Usability:
- Option "-timings" to print compilation times for various passes.
- Various tweaks in IRC graph coloring to reduce compilation time.
+- IA32: add built-in functions for fused multiply-add
+ (require a recent processor with FMA3 extensions).
Improvements in ABI conformance:
- New target platform: ARM with EABI "hard float" calling conventions
diff --git a/ia32/CBuiltins.ml b/ia32/CBuiltins.ml
index 6dbabf1..6046126 100644
--- a/ia32/CBuiltins.ml
+++ b/ia32/CBuiltins.ml
@@ -36,6 +36,22 @@ let builtins = {
(TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
"__builtin_fmin",
(TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fmadd",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+ false);
+ "__builtin_fmsub",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+ false);
+ "__builtin_fnmadd",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+ false);
+ "__builtin_fnmsub",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+ false);
(* Memory accesses *)
"__builtin_read16_reversed",
(TInt(IUShort, []), [TPtr(TInt(IUShort, [AConst]), [])], false);
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 9a2648a..33e19f7 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -475,6 +475,25 @@ let print_builtin_inline oc name args res =
fprintf oc " movapd %a, %a\n" freg a1 freg res;
fprintf oc " minsd %a, %a\n" freg a2 freg res
end
+ | ("__builtin_fmadd"|"__builtin_fmsub"|"__builtin_fnmadd"|"__builtin_fnmsub"),
+ [FR a1; FR a2; FR a3], [FR res] ->
+ let opcode =
+ match name with
+ | "__builtin_fmadd" -> "vfmadd"
+ | "__builtin_fmsub" -> "vfmsub"
+ | "__builtin_fnmadd" -> "vfnmadd"
+ | "__builtin_fnmsub" -> "vfnmsub"
+ | _ -> assert false in
+ if res = a1 then
+ fprintf oc " %s132sd %a, %a, %a\n" opcode freg a2 freg a3 freg res
+ else if res = a2 then
+ fprintf oc " %s213sd %a, %a, %a\n" opcode freg a3 freg a1 freg res
+ else if res = a3 then
+ fprintf oc " %s231sd %a, %a, %a\n" opcode freg a1 freg a2 freg res
+ else begin
+ fprintf oc " movapd %a, %a\n" freg a3 freg res;
+ fprintf oc " %s231sd %a, %a, %a\n" opcode freg a1 freg a2 freg res
+ end
(* 64-bit integer arithmetic *)
| "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
assert (ah = EDX && al = EAX && rh = EDX && rl = EAX);
diff --git a/test/regression/builtins-ia32.c b/test/regression/builtins-ia32.c
index 4ce5488..9145518 100644
--- a/test/regression/builtins-ia32.c
+++ b/test/regression/builtins-ia32.c
@@ -8,6 +8,7 @@ int main(int argc, char ** argv)
unsigned int y = 0xDEADBEEF;
double a = 3.14159;
double b = 2.718;
+ double c = 1.414;
unsigned short s = 0x1234;
printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
@@ -17,6 +18,13 @@ int main(int argc, char ** argv)
printf("fmin(%f, %f) = %f\n", a, b, __builtin_fmin(a, b));
printf("fmax(%f, %f) = %f\n", a, b, __builtin_fmax(a, b));
+#ifdef FMA3
+ printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c));
+ printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c));
+ printf("fnmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmadd(a, b, c));
+ printf("fnmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmsub(a, b, c));
+#endif
+
printf ("read_16_rev = %x\n", __builtin_read16_reversed(&s));
printf ("read_32_rev = %x\n", __builtin_read32_reversed(&y));
__builtin_write16_reversed(&s, 0x789A);