diff options
Diffstat (limited to 'Docs/POSER Profiler.rtf')
-rw-r--r-- | Docs/POSER Profiler.rtf | 522 |
1 files changed, 522 insertions, 0 deletions
diff --git a/Docs/POSER Profiler.rtf b/Docs/POSER Profiler.rtf new file mode 100644 index 0000000..e1de7df --- /dev/null +++ b/Docs/POSER Profiler.rtf @@ -0,0 +1,522 @@ +{\rtf1\mac \deff8\deflang1033{\fonttbl{\f1\fnil\fcharset2\fprq2 Symbol;}{\f3\fnil\fcharset77\fprq2 Courier;}{\f8\fnil\fcharset77\fprq2 Times;}{\f9\fnil\fcharset77\fprq2 Helvetica;}{\f48\fnil\fcharset77\fprq2 Monaco;}} +{\colortbl;\red0\green0\blue0;\red0\green0\blue212;\red2\green171\blue234;\red31\green183\blue20;\red242\green8\blue132;\red221\green8\blue6;\red252\green243\blue5;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green100\blue17; +\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\widctlpar \f8 \snext0 Normal;}{\s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}} +\b\f8\fs36 \sbasedon0\snext0 heading 1;}{\s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 \sbasedon0\snext0 heading 2;}{\s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 +\sbasedon0\snext0 heading 3;}{\s4\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl4\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\i\f8 \sbasedon0\snext0 heading 4;}{\s5\sb240\sa60\widctlpar{\*\pn \pnlvl5\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\f9\fs22 +\sbasedon0\snext0 heading 5;}{\s6\sb240\sa60\widctlpar{\*\pn \pnlvl6\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\i\f9\fs22 \sbasedon0\snext0 heading 6;}{\s7\sb240\sa60\widctlpar{\*\pn \pnlvl7\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\f9\fs20 +\sbasedon0\snext0 heading 7;}{\s8\sb240\sa60\widctlpar{\*\pn \pnlvl8\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\i\f9\fs20 \sbasedon0\snext0 heading 8;}{\s9\sb240\sa60\widctlpar{\*\pn \pnlvl9\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\i\f9\fs18 +\sbasedon0\snext0 heading 9;}{\*\cs10 \additive Default Paragraph Font;}{\s15\li1200\widctlpar\tqr\tldot\tx11880 \f8\fs18 \sbasedon0\snext0 toc 8;}{\s16\li1000\widctlpar\tqr\tldot\tx11880 \f8\fs18 \sbasedon0\snext0 toc 7;}{ +\s17\li800\widctlpar\tqr\tldot\tx11880 \f8\fs18 \sbasedon0\snext0 toc 6;}{\s18\li600\widctlpar\tqr\tldot\tx11880 \f8\fs18 \sbasedon0\snext0 toc 5;}{\s19\li400\widctlpar\tqr\tldot\tx11880 \f8\fs18 \sbasedon0\snext0 toc 4;}{ +\s20\li200\widctlpar\tqr\tldot\tx11880 \i\f8 \sbasedon0\snext0 toc 3;}{\s21\widctlpar\tqr\tldot\tx11880 \scaps\f8 \sbasedon0\snext0 toc 2;}{\s22\sb120\sa120\widctlpar\tqr\tldot\tx11880 \b\caps\f8 \sbasedon0\snext0 toc 1;}{ +\s23\fi-240\li1680\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 7;}{\s24\fi-240\li1440\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 6;}{\s25\fi-240\li1200\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 5;}{ +\s26\fi-240\li960\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 4;}{\s27\fi-240\li720\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 3;}{\s28\fi-240\li480\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 2;}{ +\s29\fi-240\li240\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 1;}{\s30\widctlpar\tqc\tx4320\tqr\tx8640 \f8 \sbasedon0\snext30 footer;}{\s31\widctlpar\tqc\tx4320\tqr\tx8640 \f8 \sbasedon0\snext31 header;}{\s32\li1400\widctlpar\tqr\tldot\tx11880 +\f8\fs18 \sbasedon0\snext0 toc 9;}{\s33\sb120\sa120\widctlpar \b\i\f8 \sbasedon0\snext29 index heading;}{\s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\f3\fs20 \sbasedon0\snext34 Code;}{\s35\fi-1710\li1710\widctlpar\box\brdrs\brdrw15\brsp20 \tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx1980\tx2160\tx2340\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f48\fs18 \sbasedon34\snext35 +PsuedoCode;}{\s36\fi-1710\li1710\ri-3680\widctlpar\tx540\tx720\tx980\tx1080\tx1440\tx1800\tx1980\tx2160\tx2420\tx2520\tx2880\tx3240\tx3320\tx3600\tx3780\tx3960\tx4220\tx4680\tx5040\tx5400\tx5760 \f48\fs18 \sbasedon34\snext36 API Summary;}{ +\s37\fi-200\li1600\widctlpar\tqr\tldot\tx8640 \f8 \sbasedon0\snext0 index 8;}{\s38\fi-200\li1800\widctlpar\tqr\tldot\tx8640 \f8 \sbasedon0\snext0 index 9;}{\s39\fi-2700\li3060\widctlpar\tx1440\tx3060 \f8 \sbasedon0\snext39 Prototype;}{ +\s40\fi-240\li1920\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 8;}{\s41\fi-240\li2160\widctlpar\tqr\tx5580 \f8\fs18 \sbasedon0\snext0 index 9;}{\s42\qc\sb240\sa120\widctlpar \b\f8\fs26 \sbasedon0\snext29 index heading;}{\s43\widctlpar \f8 +\sbasedon0\snext43 ading 2;}}{\info{\title Touchdown System Software Spec}{\subject Touchdown System Software}{\author Bob Ebert}{\keywords Touchdown System Software API}{\operator Bob Ebert}{\creatim\yr1998\mo6\dy10\hr15\min43} +{\revtim\yr1998\mo6\dy10\hr19\min4}{\version128}{\edmins143}{\nofpages0}{\nofwords0}{\nofchars0}{\vern49235}}\paperw14400\paperh18620\margl1440\margr1080 \widowctrl\ftnbj\aenddoc\hyphhotz0\sprstsp\otblrul\brkfrm\sprstsm\truncex\nolead\msmcap\hyphcaps0 +\fet0\sectd \sbknone\linex0\endnhere {\header \pard\plain \s31\nowidctlpar\tqc\tx4320\tqr\tx8640 \f8 {\i\ul Palm Computing Confidential\tab \tab For Internal Use Only +\par }}{\footer \pard\plain \s30\nowidctlpar\tqc\tx4320\tqr\tx8640 \f8 POSER Profiler\tab Page {\chpgn }\tab Last Revised: 6/10/98 +\par }{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang{\pntxta )}}{\*\pnseclvl5 +\pndec\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang +{\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\pntext\pard\plain\b\fs36\cchs0 1.\tab}\pard\plain \s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}}\b\f8\fs36 Overview + +\par \pard\plain \widctlpar \f8 POSER\rquote s emulator can be used to profile the code that it\rquote +s executing. It does this by counting cycles while it simulates the opcodes, and watching for JSR/RTS and interrupt/RTE events. It dumps the resulting data to file that is compatible with the Metrowerks Profiler application for Macintosh. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\fs36\cchs0 2.\tab}\pard\plain \s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}}\b\f8\fs36 Profiler Implementation +\par {\pntext\pard\plain\b\i\fs28\cchs0 2.1\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 UAE Changes +\par \pard\plain \widctlpar \f8 This section describes the profiling changes to the UAE sources. +\par +\par {\pntext\pard\plain\b\cchs0 2.1.1\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Build68K +\par \pard\plain \widctlpar \f8 UAE is built into POSER in three stages. In the first stage, Build68K takes a text file called \ldblquote table68K\rdblquote containing information about the 68K opcodes and produces \ldblquote cpudefs.c\rdblquote +, a C source file with a compressed version of UAE specific information for each opcode. There are no profiling changes to this stage. In the third stage, the POSER sour +ces are combined with the sources produced by GenCPU to build the final executable. The profiling changes for POSER are described in the next section. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.1.2\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 GenCPU +\par \pard\plain \widctlpar \f8 +In the second stage, GenCPU is built from sources and the cpudefs.c file from state one. When run, GenCPU can produce functions to emulate each 68K Opcode. GenCPU has been modified to produce a table of read cycle, write cycle, and extra CPU cycles for +each opcode. In addition, some of the opcode functions (e.g. Bcc) include specific code to tell the profiler to increment the clock. +\par \pard \widctlpar +\par \pard \widctlpar +As GenCPU is running, it adds comments to each opcode function to indicate what the profiling numbers are for that opcode. (gen_opcode) The numbers are specific to the MC68000 processor, to match the MC68EC000 used in the Palm devices. The source for t +he data is the \ldblquote Motorola MC68000 8-/16-/32-Microprocessors User\rquote s Manual\rdblquote section 8. Read and write cycles are tracked seperately, as well as additional CPU-only cycles for each opcode. +\par \pard \widctlpar +\par \pard \widctlpar gen_opcode calls genaread and genastore for each opcode to read in arguments and write out results. These functions have been modified to accumulate read, write and extra CPU cycles for each operation. +\par \pard \widctlpar +\par \pard \widctlpar After calling gen_opcode, generate_func takes the accumulated read, write, and extra cycle counts for the opcode and produces data for a table that\rquote +s used by POSER. It also writes a comment after each code block with the expected (per the Motorola manual) and actual (per accumulated numbers) cycle counts for the function. This is hand +y because the resulting .c file with the opcode functions can be viewed to check the accuracy of the cycle counting. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.1.3\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Inaccuracies +\par \pard\plain \widctlpar \f8 Here are some known inaccuracies with the current cycle counts. +\par +\par \pard \widctlpar JMP, JSR, RTS, RTE, and Bcc fall short by one read each, for 6 or 8 cycles each depending on ROM or RAM. I believe this is because the CPU does instruction prefetch, and so the times in Motorola\rquote +s tables actually include the time needed to fetch the next instruction, not the current instruction. PC-changing instr +uctions need to throw away the prefetched instruction and read in the real next instruction, so this takes an extra read cycle. POSER does not prefetch, so there\rquote +s no good way to account for this. (Since the next instruction may be in ROM or RAM, the actual CPU cycles taken may differ.) +\par \pard \widctlpar +\par \pard \widctlpar ANDSR, ORSR, MV2SR and other "SR" instructions are 2 reads short, for 12 or 16 cycles. I\rquote m not sure why this is, some implementation detail with how the CPU references the status register? +\par \pard \widctlpar +\par \pard \widctlpar MUL and DIV instructions are c +ounted as worst case. Both have unusual execution times which depend on the values being multiplied or divided. The DIV instructions is supposedly less than a 10% difference from best to worst case, for a range of 144-158 cycles for signed and 126-140 c +ycles for unsigned values. These are counted as 158 and 140 cycles (plus wait states and effective address calculation times.) The MUL operations take 38+2n cycles, where n is the number of 1\rquote +s in the memory value for MULU, and the number of 10 or 01 patterns in the memory value catenated with 0 (as the LSB). 70 is indicated as the max value, and that is what is used. +\par {\pntext\pard\plain\b\i\fs28\cchs0 2.2\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 POSER changes +\par \pard\plain \widctlpar \f8 Switches.h has a HAS_PROFILING switch, which must be non-zero to include profiling in an Emulator build. +\par +\par {\pntext\pard\plain\b\cchs0 2.2.1\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 CEumlatorApp.cpp +\par \pard\plain \widctlpar \f8 +CEmulatorApp .cpp contains a little bit of code that hooks the user interface up with the functions in Profiling.cpp. Changes are made in CEmulatorApp::ObeyCommand to responds to menu and key events generated by the user, and in CEmulatorApp::FindCommand +Status to makes sure the menu state is correct. The profiling menu is defined in Palm OS Emulator.ppob. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.2.2\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 CPU_REG.cpp +\par \pard\plain \widctlpar \f8 CPU_REG.cpp defines and loads the table of read, write, and extra cycles for each opcode. +\par +\par \pard \widctlpar +Emulator::Execute has been modified to count cycles as the opcode executes. Most of the cycle counting is done by the functions that read and write memory, but the extra CPU-only cycles are counted in Emulator::Execute. Emulator::Execute also sets and cl +ears a flag to tell the profiler to pay attention to cycles. This is necessary so that the emulator\rquote s internal use of the memory read/write functions (to draw the screen, call ROM traps, etc) are not charged against the running application. +\par \pard \widctlpar +\par \pard \widctlpar +CPU_REG.cpp also has some code controlled by the HAS_PROFILING_DEBUG flag to help debug the profiler. This code compares the actual cycles counted as the opcodes are executed to the ones specified as expected in the table. That is also done in Emulator: +:Execute. +\par \pard \widctlpar +\par Hardware::Initialize loads the table of cycle counts. +\par +\par \pard \widctlpar +Software::ProcessException is hooked in to the emulator opcodes to special case exceptions. It allows POSER to patch out the exception handlers and do its own thing. The profiler counts time spent in exceptions seperately from time spent in regular func +tions, so Software::ProcessException calls ProfleInterruptEnter to do this. If the emulator patched out the exception, the ProfileInterruptExit is also called here. If not, then the interrupt is not finished until the RTE opcode is executed, and Softwar +e::ProcessRTE handles this. +\par \pard \widctlpar +\par \pard \widctlpar +Software::ProcessJSR is called by the CPU emulator function for JSR opcodes. It calls ProfileFnEnter to start profiling for the new function. Similarly, Software::ProcessRTS calls ProfileFnExit to stop profiling for the function and return to the enclos +ing function. +\par \pard \widctlpar +\par \pard \widctlpar Software::ProcessTrap14 has been taken over to allow executing code to contol aspects of the emulator. Controlling the profiler is part of this, and there are \ldblquote E-traps\rdblquote to init, start, stop, dump, and cleanup pro +filing. +\par \pard \widctlpar +\par \pard \widctlpar Software::ProcessTrap15 creates a \ldblquote fake\rdblquote function call by calling ProfileFnEnter and ProfileFnExit. By using the trap number for the address, this allows the profiler to count \ldblquote F-traps\rdblquote + by trap number, which is handy for finding out which system traps ar +e used most often. Software::ProcessTrap15 also normally skips the emulated trap dispatcher, preferring instead to get the trap address and do the jump itself. This greatly improves performance when emulating normal code, since about 10% of the ti +me is spent in the trap dispatcher for common operations. When profiling, native dispatch is skipped, because we actually want to know what overhead the trap dispatcher is requiring. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.2.3\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 RAM_ROM.cpp +\par \pard\plain \widctlpar \f8 RAM_ROM.cpp implements reading and writing to memory for various memory banks. Because the Palm devices require wait states when accessing memory, it\rquote +s not enough to simply count reads and writes as 4 cycles each (which is the CPU overhead for them.) Palm devices use different wait states for different memory, a +nd quite possibly devices will have different wait state characteristics in the future. RAM_ROM.cpp counts this. +\par \pard \widctlpar +\par \pard \widctlpar +Each of the GetLong, GetWord, GetByte, SetLong, SetWord, and SetByte functions for each memory type calls either ProfileIncrementRead or ProfileIncrementWrite, passing in the number of read or write cycles and the number of wait states for that memory typ +e. (The wait states are #defined in Profiling.h) ProfileIncrementRead and ProfileIncrementWrite are inlined to improve performance in the emulator. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.2.4\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 TrapPatches.cpp +\par \pard\plain \widctlpar \f8 +TrapPatches.cpp turns off a couple of trap patches if the profiler is built into the emulator. Normally, these are patched out to improve performance by avoiding emulating the ROM routines. Some of the others should probably be turned off with the HAS_P +ROFILING switch as well, but they are required by the emulator itself. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\i\fs28\cchs0 2.3\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 Profiling Code +\par \pard\plain \widctlpar \f8 Most of the profiling code is in Profiling.h and Profiling.cpp. This is the chunk that gathers and dumps the profiling data. +\par +\par {\pntext\pard\plain\b\cchs0 2.3.1\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Accumulators and Flags +\par \pard\plain \widctlpar \f8 +gReadCycles, gWriteCycles, and gClockCycles contain the total number of read, write, and clocks that have passed since profiling was started. These globals are accessed from other modules through the inline functions in Profiling.h. +\par \pard \widctlpar +\par \pard \widctlpar +gProfilingEnabled, gProfilingOn, and gProfilingCounted are flags that indicate the current state of profiling affairs. Enabled indicates profiling buffers have been allocated and profiling is happening or could start or stop at any time. On indicates pr +ofiling data is currently being collected. Counted indicates that the current instructions as well as memory reads and writes should be added to the global read/write/cycle counts. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.3.2\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Data Structures +\par \pard\plain \widctlpar \f8 The profiler uses two main data structures to accumulate data: a tree of function call records and a stack of currently in progress functions or interrupts. +\par \pard \widctlpar +\par \pard \widctlpar The tree is stored in memory using the same format as the Metrowerks profiler file, so dumping the output file is a simple matter of writing a memory block to disk. See A +ppendix B for the record format. Global variables contain the index of: the root of the tree, the record used for overflow tracking, the record used as the root for interrupt tracking, and the index of the next available record. +\par \pard \widctlpar +\par The stack is an array of the following records. Each element tracks a function or interrupt that is currently executing. +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 struct FnStackRecord \{ +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \tab int call;\tab \tab \tab \tab \tab \tab \tab \tab // fn data block for fn being called +\par \tab uaecptr returnAddress;\tab \tab \tab // return address aka (SP) to calling fn +\par \tab UInt64 cyclesAtEntry;\tab \tab \tab // cycle count when fn was called +\par \tab UInt64 cyclesAtInterrupt;\tab \tab // cycle count when fn was interrupted +\par \tab UInt64 cyclesInKids;\tab \tab \tab \tab // number of cycles spent in subroutines +\par \tab UInt64 cyclesInInterrupts;\tab \tab // number of cycles spent in interrupts +\par \}; +\par \pard\plain \widctlpar \f8 +\par A global variable holds the current top of the stack. +\par +\par \pard \widctlpar To handle interrupts, a separate stack is used that keeps track of the call stack record that matches the interrupt. A stack is necessary for this because interrupts can be interrupted. We allow up to 8 of these, but I\rquote +ve never seen more than 2 used. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 2.3.3\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 GetRoutineName +\par \pard\plain \widctlpar \f8 +GetRoutineName turns an address (or interrupt number or trap number) into a string representation for that function. It attempts to use the ROM.map data to find the function name, and if no match is found it steps through the instructions found at the ad +dress indicated until it finds the end of the function, then looks for the debug symbol. If the address was to a function in a code resource and the resource was moved, then the name will be i +ncorrect. GetRoutineName is completely responsible for turning an address into a name, and it will always return some string, even if passed an invalid address. +\par {\pntext\pard\plain\b\cchs0 2.3.4\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Generating Function Names +\par \pard\plain \widctlpar \f8 +The functiosn InitStringTable, CleanupStringTable, and FindOrAddString do the work of creating the string table (as described in Appendix B) and then adding strings to it. RecursiveAddressToStrings traverses the tree of functions and changes the address +stored with each function record into an offset into the string table, which it is generating. That\rquote s a one-time operation, if you tried to do it twice you\rquote d get garbage. +\par {\pntext\pard\plain\b\cchs0 2.3.5\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 PopCallStackFn +\par \pard\plain \widctlpar \f8 +PopCallStackFn is used when a function or interrupt is being exited from. It updates the nodes in the tree to properly track call counts and time spent in interrupts and in children. It is called both during normal function or interrupt exit and by clea +nup code that\rquote s trying to keep the profiler\rquote s call stack in sync with the CPU\rquote s call stack. +\par {\pntext\pard\plain\b\cchs0 2.3.6\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 FindOrAddCall +\par \pard\plain \widctlpar \f8 FindOrAddCall is used when a f +unction or interrupt is being entered. It looks to see if the function has prevously been called from the current function or interrupt, and if so returns the existing record. If not, a new record is allocated, initialized, and plugged into the tree. + +\par {\pntext\pard\plain\b\cchs0 2.3.7\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileInit +\par \pard\plain \widctlpar \f8 ProfileInit allocates the stack and tree with the passed sizes, and initializes a bunch of other data structures. +\par {\pntext\pard\plain\b\cchs0 2.3.8\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileCleanup +\par \pard\plain \widctlpar \f8 ProfileCleanup frees the data structures allocated in ProfileInit +\par {\pntext\pard\plain\b\cchs0 2.3.9\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileStart +\par \pard\plain \widctlpar \f8 ProfileStart turns on the profiling flags. Currently it assumes that it is not called from within an interrupt, often is not the case. When invoked from the POSER UI, the CPU is often currently processing an interrupt. +\par {\pntext\pard\plain\b\cchs0 2.3.10\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileStop +\par \pard\plain \widctlpar \f8 ProfileStop turns off collection of profiling data. If there are functions currently on the profiling stack, they are popped. +\par {\pntext\pard\plain\b\cchs0 2.3.11\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileDump +\par \pard\plain \widctlpar \f8 ProfileDump reads in the ROM.map file, turns all the addresses in the call tree into real function names, generating the string table in the process, then dumps out the heade +r, tree, and stringtable to disk. This operation is destructive to the function tree, and cannot be done more than once. (It should probably call ProfileCleanup to make sure this doesn\rquote t happen.) +\par {\pntext\pard\plain\b\cchs0 2.3.12\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileFnEnter +\par \pard\plain \widctlpar \f8 ProfileFnEnter is called by the emulator when a JSR is executed, and at a couple of other places. It creates a record for the function (or reuses an existing record), and updates the profiler\rquote s call stack. +\par \pard \widctlpar +\par \pard \widctlpar ProfileFnEnter also supports a special profiling mode. If the profiler is built with PROFILE_ONE_FN on, then it\rquote +s assumed that whenever profiling has been initialized, you always want to start profiling when a specified function is entered, and stop profiling when that function is exited. That way you get profiling information for a particul +ar function no matter who it\rquote s called from. In this case, ProfileFnEnter may be called before ProfileStart has been called, and if it detects that the target function has been entered it will start profiling at that point. +\par {\pntext\pard\plain\b\cchs0 2.3.13\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileFnExit +\par \pard\plain \widctlpar \f8 ProfileFnExit is called by the emulator when an RTS is executed The Palm OS does some funny things such that there is not always an RTS for every JSR. To handle this, ProfileFnExit has to be smart +, so it keeps the return address when a function is called and compares it to the destination address for the RTS. Most of the time they match, and everything is fine, but when they don\rquote t one of two things may be happening: either a function +is being called not by using JSR, but rather by pushing the callee\rquote s address on the stack then executing an RTS, or a RTS has been skipped and the stack has been cleaned up by the executing code (the kernel does this.) +\par \pard \widctlpar +\par \pard \widctlpar +If the return address is not what we expected based on the profiling stack, then the first thing we do is walk up the stack looking to see if the address matches some higher function. If it does, the stack is popped up to the function we expect, and the +intervening functions are flagged as improperly exited. If no match was found, we assume it\rquote s a long jump disguised as an RTS, and call ProfileFnEnter instead. +\par \pard \widctlpar +\par \pard \widctlpar The profiler\rquote s call stack can (commonly) be popped to empty. This happens when the function that was executing when ProfileStart was called exits. If profiling is started from the PO +SER UI, this happens very often, so we do the best we can: create a new fake \ldblquote root\rdblquote for the call tree, representing the function we\rquote re return +ing to, and continue on. If profiling was started by the PROFILE_ONE_FN trick, then a pop to empty is considered exiting the target function, and profiling is turned off. +\par {\pntext\pard\plain\b\cchs0 2.3.14\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileInterruptEnter +\par \pard\plain \widctlpar \f8 ProfileInterruptEnter does almost the same thing as ProfileFnEnter. The difference is that the current call is added to the \ldblquote interrupt\rdblquote root rather than the currently executing function, and the profile +\rquote s call stack pointer is pushed onto the profiler\rquote s interrupt stack. +\par {\pntext\pard\plain\b\cchs0 2.3.15\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileInterruptExit +\par \pard\plain \widctlpar \f8 ProfileInterruptExit works much like ProfileFnExit, the calls stack\rquote s record for the interrupt \ldblquote function\rdblquote is popped. Unlike with RTS, RTE is reliably matched with an interrupt exit, so it\rquote +s not necessary to do the funny call stack searching. However, when the RTE executes it\rquote +s possible that there may still be some unmatched calls (JSRs) that were executed as part of processing the interrupt. (The kernel does this regularly.) So we make sure to pop all the functions off the profiler\rquote +s call stack until we get to the one indicated by the profiler\rquote s interrupt stack. +\par \pard \widctlpar +\par The Palm OS trap handler performs trap calls by pushing stuff on the CPU stack so that the RTE will jump to the real function (and then later be able to do an RTS from that fn to get back to the code that executed the TRAP interrupt.) + To handle this, the RTE destination is compared to the PC that was saved when the interrupt happened, and if they addresses don\rquote t match then ProfileFnEnter is called to track the \rdblquote fake\rdblquote function entry. +\par +\par {\pntext\pard\plain\b\fs36\cchs0 3.\tab}\pard\plain \s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}}\b\f8\fs36 Using the Profiler +\par {\pntext\pard\plain\b\i\fs28\cchs0 3.1\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 POSER UI +\par \pard\plain \widctlpar \f8 You actually don't have to do anything at all to set up your app for profiling. That's the beauty of having the emulator handle profiling--all the work can happen "between cycles" It\rquote +s a good idea to set the compiler switch so that the debug symbols get embedded in your code, otherwise you won't be able to identify your functions in the output. For the +ROM, debug symbols are not necessary if a ROM.map file is available. The ROM.map file will be used first, and if the function is not present then the debug symbols will be used. +\par \pard \widctlpar +\par To do a profiling run: +\par {\pntext\pard\plain\f1\cchs0 \'b7\tab}\pard \fi-360\li360\widctlpar{\*\pn \pnlvlblt\pnf1\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}Launch the Mac version of the emulator. +\par {\pntext\pard\plain\f1\cchs0 \'b7\tab}Load the app or otherwise set up the situation to be profiled +\par {\pntext\pard\plain\f1\cchs0 \'b7\tab}Select "Start" from the profiling menu or press cmd-[ +\par {\pntext\pard\plain\f1\cchs0 \'b7\tab}Do the profiled thing +\par {\pntext\pard\plain\f1\cchs0 \'b7\tab}Select "Stop" from the profiling menu, or press cmd-] +\par {\pntext\pard\plain\f1\cchs0 \'b7\tab}Select "Dump" from the profiling menu, or press cmd-\\ +\par \pard \widctlpar +\par \pard \widctlpar You can start and stop more than once before dumping the results. The profile data gathered in each start/stop pair will be added into that gathered in previous pairs. +\par \pard \widctlpar +\par \pard \widctlpar When you dump the result, you'll find a file in the emulator directory called "POSER Profile Results". At the moment, you have to change the type and creator to 'proF' so the Metrowerks profile will open it. Once that\rquote +s done, just double-click it, or otherwise convince the "MW Profiler" app to open it. See Reading the Results below. +\par \pard \widctlpar +\par \pard \widctlpar When you profile this way, the profiler buffers are predefined in Profiling.h. Currently they\rquote +re set to 20000 functions with a max stack of 200 functions. That's enough to profile booting a ROM, and plenty for repetetive operations, but it quickly gets used up if you do gremlins. + (A few hundred gremlins will generate 20000 unique function calls.) +\par \pard \widctlpar +\par {\pntext\pard\plain\b\i\fs28\cchs0 3.2\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 From Your App +\par \pard\plain \widctlpar \f8 You can Init, Start, Stop, Dump, and Cleanup the POSER profiler from application code, much like you can for the Metrowerks profiler for Mac. To do this, #include \ldblquote EmuTraps.h\rdblquote and #include \ldblquote +EmuProfiling.h\rdblquote in the module that will control the profiler. (You might want to put EmuProfilingNOPs.c in the project for simulator builds only, so that the profiling functions will be stubbed out there.) The APIs are described below. +\par \pard \widctlpar +\par {\pntext\pard\plain\b\cchs0 3.2.1\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileInit +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 void ProfileInit(int maxCalls, int maxDepth) +\par \pard\plain \widctlpar \f8 \tab maxCalls - the number of function call buffers to allocate +\par \tab maxDepth - the number of stack frames to allocate +\par \pard \widctlpar This allocates the profile buffers within the POSER memory partition. It must be done prior to calling any other profiling function. +\par {\pntext\pard\plain\b\cchs0 3.2.2\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileStart +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 void ProfileStart() +\par \pard\plain \widctlpar \f8 Begin profiling at that point in the code. The instructions executed within the function that calls ProfileStart will appear in the result in a function body under the label \ldblquote partial\rdblquote +, as will any code executed by calling functions if ProfileStop is not called when the function that called ProfileStart exits. +\par {\pntext\pard\plain\b\cchs0 3.2.3\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileStop +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 void ProfileStop() +\par \pard\plain \widctlpar \f8 Stop profiling at that point in the code. In general, it\rquote +s a good idea to start and stop profiling at the same level, but that is not required. If you stop while nested deeper than where you started, all the functions still on the call stack will be pop +ped as if they had exited at the point ProfileStop was called. +\par {\pntext\pard\plain\b\cchs0 3.2.4\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileDump +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 void ProfileDump() +\par \pard\plain \widctlpar \f8 Dumps the accumulated results to a file called \ldblquote POSER Profile Results\rdblquote . (Future versions should allow you to pass the file name as an argument to ProfileDump.) +\par {\pntext\pard\plain\b\cchs0 3.2.5\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfilePrint +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 void ProfilePrint() +\par \pard\plain \widctlpar \f8 A debugging function. Prints the profile results as text to POSER\rquote s Log file. Can be very slow. +\par {\pntext\pard\plain\b\cchs0 3.2.6\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 ProfileCleanup +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 void ProfileCleanup() +\par \pard\plain \widctlpar \f8 Free up the data structures allocated by ProfileInit. +\par +\par {\pntext\pard\plain\b\fs36\cchs0 4.\tab}\pard\plain \s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}}\b\f8\fs36 Reading the Results +\par \pard\plain \widctlpar \f8 The profiler dumps the results in a format compatible with Metrowerks profiler. This format was reverse engineered, and is incompletely described in Appendix B. +\par \pard \widctlpar +\par \pard \widctlpar +Times are all theoretically in millisecond for a 16.580608 MHz clock. The times are written to the output file as 64-bit integers of clocks, and a multiplier is applied to scale the result to milliseconds based on the clock speed. 5661.518 is a bit over + 5 1/2 seconds, and 1.255 is a bit over 1 1/4 milliseconds. (In theory at least, I'm still validating the data, if you see anything that strikes you as inaccurate, please tell me about it!) +\par \pard \widctlpar +\par \pard \widctlpar The header in the output file can be ignored. The Overhead field actually contains the number of cycles spent in interrupts, which should be identical to the +Children column for the interrupts block. +\par \pard \widctlpar +\par The first column contains the function name, with these special cases: +\par \pard \fi-1440\li2160\widctlpar functions\tab a top-level cover that includes all regular function calls +\par interrupts\tab a top-level node that includes all interrupts (except the ones POSEr has patched out) +\par \pard \fi-1440\li2160\widctlpar partial\tab means profiling was started in the middle of the function, so we don't have the address of the fn and consequently don't have a name. +\par overflow\tab a lump of all functions called when we're out of space to track more unique calls, where unique means called from the same path to the "root" of the call tree. +\par \pard \fi-1440\li2160\widctlpar unknown\tab a function for which no name could be found. Many functions in .prc files show up as unknown. +\par \pard \widctlpar +\par The rest of the names all take the form "Name $address.x" where: +\par Name- is the name of the function or trap. +\par address-For regular functions, the 4 byte address. +\par \tab -For traps, the 2 byte trap number. +\par \tab -For interrupts, the 1 byte interrupt number. +\par x- debugging info, indicating where the name information came from: +\par \tab 't' = trap names table built in POSER, +\par \tab 'm'= the ROM.Map file found in the same directory as POSER +\par \tab 'd'=the debug symbol appended to the function\rquote s instructions by the compiler +\par \tab 'i'=invalid address flag (usually due to POSERs implementation internals) +\par +\par The other columns are defined as follows. Note MINIMUM and STACK SPACE are NOT what you expect: +\par +\par {\b count} - the number of times the functions was called. +\par +\par {\b only} - time spent in the function, not counting child fns or interrupts. +\par +\par {\b %} (by only) - percentage of total profiling time spent in this fn/call. +\par +\par {\b +Children} - time spent in the function including child fns, but not including interrupts +\par +\par {\b %} (by +Children) - percentage of total profiling time spent in the fn and its kids +\par +\par {\b Average} - "Only" divided by "count" +\par +\par {\b Maximum} - the maximum time (in msec) spent in any 1 call to the fn. +\par +\par \pard \widctlpar {\b Minimum} - {\i NOT WHAT YOU EXPECT}. This is actually the time spent handling interrupts for calls to that particlular instance of that fn. Due to the way the "Summary" is calculated, this number won't be correct in summary views. + +\par \pard \widctlpar +\par \pard \widctlpar {\b Stack space} - {\i NOT WHAT YOU EXPECT} +. More of a trap/interrupt counter plus some debug info. The number in that field for a particular fn entry is incremented by 1 every time the fn is interrupted, by 10000 if the fn call is made by a faked up RTS instead of a JSR, and 1000 if the functio +n was executing when an RTS occurred that didn't return to it's called but instead returned to some fn farther up the call chain. Again, this will only be useful in the detail view, since the summary does some computation on it. +\par {\pntext\pard\plain\b\fs36\cchs0 5.\tab}\pard\plain \s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}}\b\f8\fs36 Appendix A - Profiling Includes +\par {\pntext\pard\plain\b\i\fs28\cchs0 5.1\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 EmuTraps.h +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 /********************************************************************* +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 * +\par * Copyright (c) 1998 +\par * 3Com/Palm Computing Division. All rights reserved. +\par * +\par * Redistribution and use in source and binary forms, with or without +\par * modification, are permitted provided that the following conditions +\par * are met: +\par * +\par * 1. Redistributions of source code must retain the above copyright +\par * notice, this list of conditions and the following disclaimer. +\par * +\par * 2. Redistributions in binary form must reproduce the above +\par * copyright notice, this list of conditions and the following +\par * disclaimer in the documentation and/or other materials provided +\par * with the distribution. +\par * +\par * 3. All advertising materials mentioning features or use of this +\par * software must display the following acknowledgement: +\par * +\par *\tab \tab This product includes software developed by 3Com and its +\par *\tab \tab contributors. +\par * +\par * 4. Neither 3Com nor the names of its contributors may be used to +\par * endorse or promote products derived from this software +\par * without specific prior written permission. +\par * +\par * THIS SOFTWARE IS PROVIDED BY THE 3COM AND CONTRIBUTORS ``AS IS'' +\par * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +\par * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +\par * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 3COM OR +\par * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +\par * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +\par * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +\par * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +\par * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +\par * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\par * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +\par * OF SUCH DAMAGE. +\par * +\par *******************************************************************/ +\par #pragma once +\par +\par #define\tab emuTrapBase\tab \tab \tab 0x0000 +\par +\par typedef enum \{ +\par \tab emuTrapProfileInit = emuTrapBase, +\par \tab emuTrapProfileStart, +\par \tab emuTrapProfileStop, +\par \tab emuTrapProfileDump, +\par \tab emuTrapProfileCleanup, +\par +\par \tab // WARNING!! LEAVE THIS ONE AT THE END AND ALWAYS ADD NEW TRAPS TO +\par \tab // THE END OF THE TRAP TABLE BUT RIGHT BEFORE THIS TRAP!!!!!!!!! +\par \tab emuTrapEmuReserved1, +\par \tab emuTrapEmuReserved2, +\par \tab emuTrapEmuReserved3, +\par \tab emuTrapEmuReserved4, +\par \tab +\par \tab emuTrapLastTrapNumber +\par \tab \} EmuTrapNumber; +\par +\par +\par #define emuTrapNum\tab \tab \tab \tab 0x0E\tab \tab // Emulator trap +\par +\par #define EMU_TRAP(trapNum) \\ +\par \tab TWOWORD_INLINE(m68kTrapInstr+emuTrapNum,trapNum) +\par +\par #define ASM_EMU_TRAP(trapNum)\tab \\ +\par \tab \tab \tab DC.W\tab m68kTrapInstr+emuTrapNum; \\ +\par \tab \tab \tab DC.W\tab trapNum +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\par {\pntext\pard\plain\b\i\fs28\cchs0 5.2\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 EmuProfiling.h +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 /********************************************************************* +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 * +\par * Copyright (c) 1998 +\par * 3Com/Palm Computing Division. All rights reserved. +\par * +\par * Redistribution and use in source and binary forms, with or without +\par * modification, are permitted provided that the following conditions +\par * are met: +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 * +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 * 1. Redistributions of source code must retain the above copyright +\par * notice, this list of conditions and the following disclaimer. +\par * +\par * 2. Redistributions in binary form must reproduce the above +\par * copyright notice, this list of conditions and the following +\par * disclaimer in the documentation and/or other materials provided +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 * with the distribution. +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 * +\par * 3. All advertising materials mentioning features or use of this +\par * software must display the following acknowledgement: +\par * +\par *\tab \tab This product includes software developed by 3Com and its +\par *\tab \tab contributors. +\par * +\par * 4. Neither 3Com nor the names of its contributors may be used to +\par * endorse or promote products derived from this software +\par * without specific prior written permission. +\par * +\par * THIS SOFTWARE IS PROVIDED BY THE 3COM AND CONTRIBUTORS ``AS IS'' +\par * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +\par * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +\par * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 3COM OR +\par * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +\par * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +\par * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +\par * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +\par * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +\par * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\par * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +\par * OF SUCH DAMAGE. +\par * +\par *******************************************************************/ +\par #pragma once +\par +\par #include "EmuTraps.h" +\par +\par void ProfileInit(int maxCalls, int maxDepth) +\par \tab \tab \tab EMU_TRAP(emuTrapProfileInit); +\par +\par void ProfileStart() +\par \tab \tab \tab EMU_TRAP(emuTrapProfileStart); +\par +\par void ProfileStop() +\par \tab \tab \tab EMU_TRAP(emuTrapProfileStop); +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 void ProfileDump() +\par \tab \tab \tab EMU_TRAP(emuTrapProfileDump); +\par +\par void ProfileCleanup() +\par \tab \tab \tab EMU_TRAP(emuTrapProfileCleanup); +\par +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\par {\pntext\pard\plain\b\i\fs28\cchs0 5.3\tab}\pard\plain \s2\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl2\pndec\pnprev1\pnstart1\pnsp144 }\b\i\f8\fs28 EmuProfilingNOPs.c +\par \pard\plain \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 // stub out the functions for simulator builds only +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\par void ProfileInit(int maxCalls, int maxDepth) \{return;\} +\par void ProfileStart() \{return;\} +\par void ProfileStop() \{return;\} +\par void ProfileDump() \{return;\} +\par void ProfilePrint() \{return;\} +\par void ProfileCleanup() \{return;\} +\par {\pntext\pard\plain\b\fs36\cchs0 6.\tab}\pard\plain \s1\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl1\pndec\pnprev1\pnstart1\pnsp144 {\pntxta .}}\b\f8\fs36 Appendix B - Metrowerks Profiler File Format +\par \pard\plain \widctlpar \f8 +The profile output file is composed of three sections. A header section of 0x200 bytes, then a section of FnCallRecord structures containing an array of function data, followed by a string table containing the names of each function. T +he functions are represented in a tree, where each node represents a given function when called from a particular path to the root of the tree. That is, if A calls C and B calls C, then there will be at least 2 records in the tree representing calls to C +, once when called from A and another when called from B. I say at least 2 because A and B themselves may be called from multiple places, and each unique version of A and B will also have a unique child node representing C. +\par {\pntext\pard\plain\b\cchs0 6.1.1\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Header +\par \pard\plain \widctlpar \f8 The header section contai +ns things like the number of functions in the function data array, and offset to the start of the string table, the size of the string table, and the record number (array index) of the root node in the function tree. +\par \pard \widctlpar +\par \pard\plain \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 struct ProfFileHeader \{ +\par \pard \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \tab UInt32\tab proF;\tab \tab \tab \tab \tab // 'proF' +\par \tab UInt32\tab version;\tab \tab \tab \tab // 0x00040002 +\par \tab UInt32\tab fnCount;\tab \tab \tab \tab // number of unique fns (records) in log +\par \tab UInt32\tab four;\tab \tab \tab \tab \tab // 0x00000004 +\par +\par \tab UInt32\tab zeros1;\tab \tab \tab \tab // 0x00000000 +\par \tab UInt32\tab zeros2;\tab \tab \tab \tab // 0x00000000 +\par \tab UInt32\tab unknown;\tab \tab \tab \tab // 0xB141A3A9\tab - maybe timebase data +\par \tab UInt32\tab recordsSize;\tab \tab // size of header plus size of data (or offset to string table) +\par \tab +\par \tab UInt32\tab stringTableSize;\tab // size of string table in bytes +\par \tab UInt64\tab overhead;\tab \tab \tab // count for overhead +\par \tab Int32\tab rootRec;\tab \tab \tab \tab \tab // record number of root of tree +\par \tab +\par \tab UInt32\tab sixtyfour1;\tab \tab \tab // 0x00000064 +\par \tab UInt32\tab sixtyfour2;\tab \tab \tab // 0x00000064 +\par \tab UInt32\tab countsPerTime; \tab // translation between counts at nodes and integers in column +\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab // 0x00FD0000 = 16.580608 MHz with display in seconds +\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab // 0x000040C4 = 16.580608 MHz with display in milliseconds +\par \pard \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \tab UInt32\tab oddstuff0;\tab \tab \tab // seems like it can be 0, set by profiler tool itself + +\par \pard \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \tab +\par \tab UInt32\tab oddstuff1;\tab \tab \tab // seems like it can be 0, set by profiler tool itself +\par \tab UInt32\tab oddstuff2;\tab \tab \tab // seems like it can be 0, set by profiler tool itself +\par \tab UInt32\tab oddstuff3;\tab \tab \tab // seems like it can be 0, set by profiler tool itself +\par \tab UInt32\tab oddstuff4;\tab \tab \tab // seems like it can be 0, set by profiler tool itself +\par +\par \pard \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \tab Byte\tab unused[0x200 - 0x50];\tab // for 0x200 bytes +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \}; +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\par {\pntext\pard\plain\b\cchs0 6.1.2\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 Function Array +\par \pard\plain \widctlpar \f8 Following the header (starting + at offset 0x200 in the file) is an array of header.fnCount structs as defined below. Each struct represents a unique node in the tree. Most of these values are straightfoward, and appear in the results view representing what you expect. The sib and ki +d records determine the relationship to other functions in the tree. Children of a given node are found by looking at the node\rquote s kid field to find the first child, then look at the child\rquote +s sib field to find the adjacent nodes which are all considered children of the first kid\rquote s parent. If the sib or kid contain -1 then there are no sibling or children nodes. +\par \pard \widctlpar +\par \pard\plain \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \f3\fs20 struct FnCallRecord \{ +\par \pard \s34\fi-1714\li1714\keepn\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 \tab UInt32\tab name;\tab \tab \tab \tab +// also offset from start of name table to this fn's name +\par \tab UInt32\tab entries;\tab \tab \tab // times function was called +\par \tab UInt64\tab cyclesSelf;\tab \tab // profiling data for this fn alone +\par \tab UInt64\tab cyclesPlusKids;\tab // profiling data for this fn with kids +\par \tab UInt64\tab cyclesMin;\tab \tab // profiling data for this fn alone, min +\par \tab UInt64\tab cyclesMax;\tab \tab // profiling data for this fn alone, max +\par \tab Int32\tab \tab sib;\tab \tab \tab \tab // record number of sib, -1 for no sibs +\par \tab Int32\tab \tab kid; \tab \tab \tab \tab // record number of kid, -1 for no kids +\par \tab UInt32\tab stackUsed;\tab \tab // bytes of stack used by fn, we use it to count unmatched returns +\par \}; +\par \pard \s34\fi-1710\li1710\widctlpar\tx360\tx720\tx1080\tx1440\tx1710\tx1800\tx2160\tx2520\tx2880\tx3240\tx3600\tx3960\tx4320\tx4680\tx5040\tx5400\tx5760 +\par {\pntext\pard\plain\b\cchs0 6.1.3\tab}\pard\plain \s3\sb240\sa60\keepn\widctlpar{\*\pn \pnlvl3\pndec\pnprev1\pnstart1\pnsp144 {\pntxtb .}}\b\f8 String Table +\par \pard\plain \widctlpar \f8 +After the array of function records is a string table. This table contains the names of all the functions in the profiling output. Each function name appears only once, and all the nodes that represent that function will refer to the same string. Order + of strings within the string table is not important. +\par \pard \widctlpar +\par \pard \widctlpar T +he table is stores as adjacent C strings. That is, the first byte of the table is the first byte of the first string, and subsequent bytes contain the rest of the first string until a NULL (0) byte appears. Immediately after the terminator is the first +byte of the next string. +\par }
\ No newline at end of file |