summaryrefslogtreecommitdiff
path: root/cil/ocamlutil/perfcount.c.in
diff options
context:
space:
mode:
Diffstat (limited to 'cil/ocamlutil/perfcount.c.in')
-rwxr-xr-xcil/ocamlutil/perfcount.c.in184
1 files changed, 184 insertions, 0 deletions
diff --git a/cil/ocamlutil/perfcount.c.in b/cil/ocamlutil/perfcount.c.in
new file mode 100755
index 0000000..ae532f6
--- /dev/null
+++ b/cil/ocamlutil/perfcount.c.in
@@ -0,0 +1,184 @@
+// -*- Mode: c -*-
+//
+/*
+ * A module that allows the reading of performance counters on Pentium.
+ *
+ * This file contains both code that uses the performance counters to
+ * compute the number of cycles per second (to be used during ./configure)
+ * and also code to read the performance counters from Ocaml.
+ *
+ * Author: George Necula (necula@cs.berkeley.edu)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__GNUC__)
+ #define longlong long long
+ // RDTSC puts the result in EAX and EDX. We tell gcc to use those registers
+ // for "low" and "high"
+ #if defined(__i386__)
+ #define GETCOUNTER(low,high) \
+ __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
+ #else
+ #define GETCOUNTER(low,high) \
+ printf ("Reading of performance counters is supported only on Intel x86\n"); \
+ exit(1);
+ #endif
+#else
+ // Microsoft Visual Studio
+ #define longlong __int64
+ #define inline __inline
+ #define GETCOUNTER(low,high) __asm { \
+ __asm rdtsc \
+ __asm mov low, eax \
+ __asm mov high, edx };
+#endif
+
+/* Store here the first value read from the performance counter */
+unsigned static longlong first_value;
+
+
+/* This is the function that actually reads the performance counter. */
+inline unsigned longlong read_ppc(void) {
+ unsigned long pclow, pchigh;
+ unsigned longlong lowhigh;
+
+ GETCOUNTER(pclow, pchigh);
+
+ // printf ("Read low=0x%08lx high=0x%08lx\n", low, high);
+
+ // Put the 64-bit value together
+ lowhigh = ((unsigned longlong)pclow) | ((unsigned longlong)pchigh << 32);
+
+ if(first_value == 0) {
+ first_value = lowhigh;
+ }
+ return lowhigh - first_value;
+}
+
+
+/* sm: I want a version that is as fast as possible, dropping
+ * bits that aren't very important to achieve it. *
+ *
+ * This version drops the low 20 bits and the high 14 bits so the
+ * result is 30 bits (always a positive Ocaml int); this yields
+ * megacycles, which for GHz machines will be something like
+ * milliseconds. */
+static unsigned long sample_ppc_20(void)
+{
+ unsigned long pclow, pchigh;
+
+ GETCOUNTER(pclow, pchigh);
+
+ return ((pclow >> 20) | (pchigh << 12)) & 0x3FFFFFFF;
+}
+
+/* This version drops the low 10 bits, yielding something like
+ * microseconds. */
+inline static unsigned long sample_ppc_10()
+{
+ unsigned long pclow, pchigh;
+
+ GETCOUNTER(pclow,pchigh);
+
+ return ((pclow >> 10) | (pchigh << 22)) & 0x3FFFFFFF;
+}
+
+
+
+#ifndef CONFIGURATION_ONLY
+/*** This is the OCAML stub for the read_ppc ***/
+#include <caml/mlvalues.h>
+#include <caml/alloc.h>
+#include <caml/memory.h>
+
+#define CYCLES_PER_USEC @CYCLES_PER_USEC@
+value read_pentium_perfcount()
+{
+ double counter = (double)read_ppc() / (1000000.0 * CYCLES_PER_USEC);
+ return copy_double(counter);
+}
+
+/* The Ocaml system can use this function to figure out if there are
+ * performance counters available */
+value has_performance_counters() {
+ // HAS_PERFCOUNT is set by the configuration code at the end of
+ // this file, during ./configure
+#if @HAS_PERFCOUNT@ != 0
+ return Val_true;
+#else
+ return Val_false;
+#endif
+}
+
+/* sm: interface to above from Ocaml */
+value sample_pentium_perfcount_20()
+{
+ return Val_long(sample_ppc_20());
+}
+
+value sample_pentium_perfcount_10()
+{
+ return Val_long(sample_ppc_10());
+}
+
+#endif
+
+
+/* Now we have a function that tries to compute the number of cycles per
+ * second (to be used during ./configure) */
+#ifdef CONFIGURATION_ONLY
+#include <sys/times.h>
+#include <unistd.h>
+#include <math.h>
+
+int main() {
+ struct tms t;
+ clock_t start, finish, diff;
+ unsigned longlong start_pc, finish_pc, diff_pc;
+ long clk_per_sec = sysconf(_SC_CLK_TCK);
+ double cycles_per_usec;
+
+ if(clk_per_sec <= 0) {
+ printf("Cannot find clk_per_sec (got %ld)\n", clk_per_sec);
+ exit(1);
+ }
+
+ times(&t); start = t.tms_utime;
+ start_pc = read_ppc();
+ // Do something for a while
+ {
+ int i;
+ double a = 5.678;
+ for(i=0;i<10000000;i++) {
+ a = (i & 1) ? (a * a) : (sqrt(a));
+ }
+ }
+ times(&t); finish = t.tms_utime;
+ finish_pc = read_ppc();
+ diff = finish - start;
+ diff_pc = finish_pc - start_pc;
+ if(diff == 0) {
+ printf("Cannot use Unix.times\n");
+ exit(1);
+ }
+ if(diff_pc == 0) {
+ printf("Invalid result from the peformance counters\n");
+ exit(1);
+ }
+ diff_pc /= 1000000; // We care about cycles per microsecond
+// printf("diff = %ld, diff_pc = %ld, clk = %ld\n",
+// (long)diff,
+// (long)diff_pc, (long)clk_per_sec);
+
+ cycles_per_usec = (((double)diff_pc / (double)diff)
+ * (double)clk_per_sec);
+
+ /* Whatever value we print here will be used as the CYCLES_PER_USEC
+ * below */
+ printf("%.3lf\n", cycles_per_usec);
+ exit(0);
+}
+#endif //defined CONFIGURATION_ONLY
+