Big merge of the newregalloc-int64 branch. Lots of changes in two directions:

1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
author: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2013-04-20 07:54:52 +0000
committer: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2013-04-20 07:54:52 +0000
commit: 255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree: 7951b1b13e8fd5e525b9223e8be0580e83550f55
parent: 6e5041958df01c56762e90770abd704b95a36e5d (diff)
166 files changed, 20598 insertions, 10626 deletions
diff --git a/.depend b/.depend
index e87fef9..e55e84c 100644
--- a/.depend
+++ b/.depend
@@ -12,6 +12,7 @@ lib/Parmov.vo lib/Parmov.glob lib/Parmov.v.beautified: lib/Parmov.v lib/Axioms.v
 lib/UnionFind.vo lib/UnionFind.glob lib/UnionFind.v.beautified: lib/UnionFind.v lib/Coqlib.vo
 lib/Wfsimpl.vo lib/Wfsimpl.glob lib/Wfsimpl.v.beautified: lib/Wfsimpl.v lib/Axioms.vo
 lib/Postorder.vo lib/Postorder.glob lib/Postorder.v.beautified: lib/Postorder.v lib/Coqlib.vo lib/Maps.vo lib/Iteration.vo
+lib/FSetAVLplus.vo lib/FSetAVLplus.glob lib/FSetAVLplus.v.beautified: lib/FSetAVLplus.v lib/Coqlib.vo
 common/Errors.vo common/Errors.glob common/Errors.v.beautified: common/Errors.v lib/Coqlib.vo
 common/AST.vo common/AST.glob common/AST.v.beautified: common/AST.v lib/Coqlib.vo common/Errors.vo lib/Integers.vo lib/Floats.vo
 common/Events.vo common/Events.glob common/Events.v.beautified: common/Events.v lib/Coqlib.vo lib/Intv.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Errors.vo
@@ -28,9 +29,11 @@ backend/Cminor.vo backend/Cminor.glob backend/Cminor.v.beautified: backend/Cmino
 $(ARCH)/Op.vo $(ARCH)/Op.glob $(ARCH)/Op.v.beautified: $(ARCH)/Op.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo
 backend/CminorSel.vo backend/CminorSel.glob backend/CminorSel.v.beautified: backend/CminorSel.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Events.vo common/Values.vo common/Memory.vo backend/Cminor.vo $(ARCH)/Op.vo common/Globalenvs.vo common/Switch.vo common/Smallstep.vo
 $(ARCH)/SelectOp.vo $(ARCH)/SelectOp.glob $(ARCH)/SelectOp.v.beautified: $(ARCH)/SelectOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo
-backend/Selection.vo backend/Selection.glob backend/Selection.v.beautified: backend/Selection.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo
-$(ARCH)/SelectOpproof.vo $(ARCH)/SelectOpproof.glob $(ARCH)/SelectOpproof.v.beautified: $(ARCH)/SelectOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo
-backend/Selectionproof.vo backend/Selectionproof.glob backend/Selectionproof.v.beautified: backend/Selectionproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/Selection.vo $(ARCH)/SelectOpproof.vo
+backend/SelectLong.vo backend/SelectLong.glob backend/SelectLong.v.beautified: backend/SelectLong.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo common/Errors.vo
+backend/Selection.vo backend/Selection.glob backend/Selection.v.beautified: backend/Selection.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/SelectLong.vo
+$(ARCH)/SelectOpproof.vo $(ARCH)/SelectOpproof.glob $(ARCH)/SelectOpproof.v.beautified: $(ARCH)/SelectOpproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo
+backend/SelectLongproof.vo backend/SelectLongproof.glob backend/SelectLongproof.v.beautified: backend/SelectLongproof.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo $(ARCH)/SelectOpproof.vo backend/SelectLong.vo
+backend/Selectionproof.vo backend/Selectionproof.glob backend/Selectionproof.v.beautified: backend/Selectionproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/SelectLong.vo backend/Selection.vo $(ARCH)/SelectOpproof.vo backend/SelectLongproof.vo
 backend/Registers.vo backend/Registers.glob backend/Registers.v.beautified: backend/Registers.v lib/Coqlib.vo common/AST.vo lib/Maps.vo lib/Ordered.vo
 backend/RTL.vo backend/RTL.glob backend/RTL.v.beautified: backend/RTL.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo
 backend/RTLgen.vo backend/RTLgen.glob backend/RTLgen.v.beautified: backend/RTLgen.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Switch.vo $(ARCH)/Op.vo backend/Registers.vo backend/CminorSel.vo backend/RTL.vo
@@ -43,59 +46,42 @@ backend/Inliningspec.vo backend/Inliningspec.glob backend/Inliningspec.v.beautif
 backend/Inliningproof.vo backend/Inliningproof.glob backend/Inliningproof.v.beautified: backend/Inliningproof.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/Inlining.vo backend/Inliningspec.vo backend/RTL.vo
 backend/Renumber.vo backend/Renumber.glob backend/Renumber.v.beautified: backend/Renumber.v lib/Coqlib.vo lib/Maps.vo lib/Postorder.vo backend/RTL.vo
 backend/Renumberproof.vo backend/Renumberproof.glob backend/Renumberproof.v.beautified: backend/Renumberproof.v lib/Coqlib.vo lib/Maps.vo lib/Postorder.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Renumber.vo
-backend/RTLtyping.vo backend/RTLtyping.glob backend/RTLtyping.v.beautified: backend/RTLtyping.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo common/Globalenvs.vo common/Values.vo lib/Integers.vo common/Events.vo backend/RTL.vo backend/Conventions.vo
+backend/RTLtyping.vo backend/RTLtyping.glob backend/RTLtyping.v.beautified: backend/RTLtyping.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo common/Globalenvs.vo common/Values.vo lib/Integers.vo common/Memory.vo common/Events.vo backend/RTL.vo backend/Conventions.vo
 backend/Kildall.vo backend/Kildall.glob backend/Kildall.v.beautified: backend/Kildall.v lib/Coqlib.vo lib/Iteration.vo lib/Maps.vo lib/Lattice.vo lib/Heaps.vo
 backend/Liveness.vo backend/Liveness.glob backend/Liveness.v.beautified: backend/Liveness.v lib/Coqlib.vo lib/Maps.vo lib/Lattice.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo
 $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOp.glob $(ARCH)/ConstpropOp.v.beautified: $(ARCH)/ConstpropOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo
 backend/Constprop.vo backend/Constprop.glob backend/Constprop.v.beautified: backend/Constprop.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/Liveness.vo $(ARCH)/ConstpropOp.vo
-$(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob $(ARCH)/ConstpropOpproof.v.beautified: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo
+$(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob $(ARCH)/ConstpropOpproof.v.beautified: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo
 backend/Constpropproof.vo backend/Constpropproof.glob backend/Constpropproof.v.beautified: backend/Constpropproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/Liveness.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo $(ARCH)/ConstpropOpproof.vo
 $(ARCH)/CombineOp.vo $(ARCH)/CombineOp.glob $(ARCH)/CombineOp.v.beautified: $(ARCH)/CombineOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo $(ARCH)/SelectOp.vo
 backend/CSE.vo backend/CSE.glob backend/CSE.v.beautified: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo $(ARCH)/CombineOp.vo
-$(ARCH)/CombineOpproof.vo $(ARCH)/CombineOpproof.glob $(ARCH)/CombineOpproof.v.beautified: $(ARCH)/CombineOpproof.v lib/Coqlib.vo lib/Integers.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/RTL.vo $(ARCH)/CombineOp.vo backend/CSE.vo
+$(ARCH)/CombineOpproof.vo $(ARCH)/CombineOpproof.glob $(ARCH)/CombineOpproof.v.beautified: $(ARCH)/CombineOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/CombineOp.vo backend/CSE.vo
 backend/CSEproof.vo backend/CSEproof.glob backend/CSEproof.v.beautified: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo $(ARCH)/CombineOp.vo $(ARCH)/CombineOpproof.vo backend/CSE.vo
-$(ARCH)/Machregs.vo $(ARCH)/Machregs.glob $(ARCH)/Machregs.v.beautified: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo
-backend/Locations.vo backend/Locations.glob backend/Locations.v.beautified: backend/Locations.v lib/Coqlib.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo
+$(ARCH)/Machregs.vo $(ARCH)/Machregs.glob $(ARCH)/Machregs.v.beautified: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo
+backend/Locations.vo backend/Locations.glob backend/Locations.v.beautified: backend/Locations.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo
 $(ARCH)/$(VARIANT)/Conventions1.vo $(ARCH)/$(VARIANT)/Conventions1.glob $(ARCH)/$(VARIANT)/Conventions1.v.beautified: $(ARCH)/$(VARIANT)/Conventions1.v lib/Coqlib.vo common/AST.vo backend/Locations.vo
 backend/Conventions.vo backend/Conventions.glob backend/Conventions.v.beautified: backend/Conventions.v lib/Coqlib.vo common/AST.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Conventions1.vo
 backend/LTL.vo backend/LTL.glob backend/LTL.v.beautified: backend/LTL.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo
-backend/LTLtyping.vo backend/LTLtyping.glob backend/LTLtyping.v.beautified: backend/LTLtyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo
-backend/InterfGraph.vo backend/InterfGraph.glob backend/InterfGraph.v.beautified: backend/InterfGraph.v lib/Coqlib.vo lib/Ordered.vo backend/Registers.vo backend/Locations.vo
-backend/Coloring.vo backend/Coloring.glob backend/Coloring.v.beautified: backend/Coloring.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Locations.vo backend/Conventions.vo backend/InterfGraph.vo
-backend/Coloringproof.vo backend/Coloringproof.glob backend/Coloringproof.v.beautified: backend/Coloringproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Locations.vo backend/Conventions.vo backend/InterfGraph.vo backend/Coloring.vo
-backend/Allocation.vo backend/Allocation.glob backend/Allocation.v.beautified: backend/Allocation.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Liveness.vo backend/Locations.vo backend/LTL.vo backend/Coloring.vo
-backend/Allocproof.vo backend/Allocproof.glob backend/Allocproof.v.beautified: backend/Allocproof.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Smallstep.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Liveness.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo backend/Coloring.vo backend/Coloringproof.vo backend/Allocation.vo
-backend/Alloctyping.vo backend/Alloctyping.glob backend/Alloctyping.v.beautified: backend/Alloctyping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Locations.vo backend/LTL.vo backend/Coloring.vo backend/Coloringproof.vo backend/Allocation.vo backend/Allocproof.vo backend/RTLtyping.vo backend/LTLtyping.vo backend/Conventions.vo
+backend/Allocation.vo backend/Allocation.glob backend/Allocation.v.beautified: backend/Allocation.v lib/FSetAVLplus.vo lib/Coqlib.vo lib/Ordered.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Memdata.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo backend/Locations.vo backend/Conventions.vo backend/RTLtyping.vo backend/LTL.vo
+backend/Allocproof.vo backend/Allocproof.glob backend/Allocproof.v.beautified: backend/Allocproof.v lib/Coqlib.vo lib/Ordered.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo backend/Locations.vo backend/Conventions.vo backend/LTL.vo backend/Allocation.vo
 backend/Tunneling.vo backend/Tunneling.glob backend/Tunneling.v.beautified: backend/Tunneling.v lib/Coqlib.vo lib/Maps.vo lib/UnionFind.vo common/AST.vo backend/LTL.vo
 backend/Tunnelingproof.vo backend/Tunnelingproof.glob backend/Tunnelingproof.v.beautified: backend/Tunnelingproof.v lib/Coqlib.vo lib/Maps.vo lib/UnionFind.vo common/AST.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Tunneling.vo
-backend/Tunnelingtyping.vo backend/Tunnelingtyping.glob backend/Tunnelingtyping.v.beautified: backend/Tunnelingtyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo backend/LTL.vo backend/LTLtyping.vo backend/Tunneling.vo backend/Tunnelingproof.vo
-backend/LTLin.vo backend/LTLin.glob backend/LTLin.v.beautified: backend/LTLin.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo
-backend/LTLintyping.vo backend/LTLintyping.glob backend/LTLintyping.v.beautified: backend/LTLintyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/LTLtyping.vo backend/Conventions.vo
-backend/Linearize.vo backend/Linearize.glob backend/Linearize.v.beautified: backend/Linearize.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Errors.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLin.vo backend/Kildall.vo lib/Lattice.vo
-backend/Linearizeproof.vo backend/Linearizeproof.glob backend/Linearizeproof.v.beautified: backend/Linearizeproof.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Errors.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLtyping.vo backend/LTLin.vo backend/Linearize.vo lib/Lattice.vo
-backend/Linearizetyping.vo backend/Linearizetyping.glob backend/Linearizetyping.v.beautified: backend/Linearizetyping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLtyping.vo backend/LTLin.vo backend/Linearize.vo backend/LTLintyping.vo backend/Conventions.vo
-backend/CleanupLabels.vo backend/CleanupLabels.glob backend/CleanupLabels.v.beautified: backend/CleanupLabels.v lib/Coqlib.vo lib/Ordered.vo backend/LTLin.vo
-backend/CleanupLabelsproof.vo backend/CleanupLabelsproof.glob backend/CleanupLabelsproof.v.beautified: backend/CleanupLabelsproof.v lib/Coqlib.vo lib/Ordered.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/CleanupLabels.vo
-backend/CleanupLabelstyping.vo backend/CleanupLabelstyping.glob backend/CleanupLabelstyping.v.beautified: backend/CleanupLabelstyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/CleanupLabels.vo backend/LTLintyping.vo
 backend/Linear.vo backend/Linear.glob backend/Linear.v.beautified: backend/Linear.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo
-backend/Lineartyping.vo backend/Lineartyping.glob backend/Lineartyping.v.beautified: backend/Lineartyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Conventions.vo
-backend/Parallelmove.vo backend/Parallelmove.glob backend/Parallelmove.v.beautified: backend/Parallelmove.v lib/Coqlib.vo lib/Parmov.vo common/Values.vo common/AST.vo backend/Locations.vo backend/Conventions.vo
-backend/Reload.vo backend/Reload.glob backend/Reload.v.beautified: backend/Reload.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/Conventions.vo backend/Parallelmove.vo backend/Linear.vo
-backend/Reloadproof.vo backend/Reloadproof.glob backend/Reloadproof.v.beautified: backend/Reloadproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/RTLtyping.vo backend/LTLin.vo backend/LTLintyping.vo backend/Linear.vo backend/Parallelmove.vo backend/Reload.vo
-backend/Reloadtyping.vo backend/Reloadtyping.glob backend/Reloadtyping.v.beautified: backend/Reloadtyping.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/LTLintyping.vo backend/Linear.vo backend/Lineartyping.vo backend/Conventions.vo backend/Parallelmove.vo backend/Reload.vo backend/Reloadproof.vo
-backend/RRE.vo backend/RRE.glob backend/RRE.v.beautified: backend/RRE.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Linear.vo
-backend/RREproof.vo backend/RREproof.glob backend/RREproof.v.beautified: backend/RREproof.v lib/Axioms.vo lib/Coqlib.vo common/AST.vo common/Values.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Linear.vo backend/RRE.vo
-backend/RREtyping.vo backend/RREtyping.glob backend/RREtyping.v.beautified: backend/RREtyping.v lib/Coqlib.vo common/AST.vo backend/Locations.vo backend/Linear.vo backend/Lineartyping.vo backend/Conventions.vo backend/RRE.vo backend/RREproof.vo
+backend/Lineartyping.vo backend/Lineartyping.glob backend/Lineartyping.v.beautified: backend/Lineartyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/LTL.vo backend/Linear.vo
+backend/Linearize.vo backend/Linearize.glob backend/Linearize.v.beautified: backend/Linearize.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Errors.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Kildall.vo lib/Lattice.vo
+backend/Linearizeproof.vo backend/Linearizeproof.glob backend/Linearizeproof.v.beautified: backend/Linearizeproof.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Errors.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Linearize.vo
+backend/CleanupLabels.vo backend/CleanupLabels.glob backend/CleanupLabels.v.beautified: backend/CleanupLabels.v lib/Coqlib.vo lib/Ordered.vo backend/Linear.vo
+backend/CleanupLabelsproof.vo backend/CleanupLabelsproof.glob backend/CleanupLabelsproof.v.beautified: backend/CleanupLabelsproof.v lib/Coqlib.vo lib/Ordered.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/CleanupLabels.vo
 backend/Mach.vo backend/Mach.glob backend/Mach.v.beautified: backend/Mach.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo
-backend/Bounds.vo backend/Bounds.glob backend/Bounds.v.beautified: backend/Bounds.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Lineartyping.vo backend/Conventions.vo
+backend/Bounds.vo backend/Bounds.glob backend/Bounds.v.beautified: backend/Bounds.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Conventions.vo
 $(ARCH)/$(VARIANT)/Stacklayout.vo $(ARCH)/$(VARIANT)/Stacklayout.glob $(ARCH)/$(VARIANT)/Stacklayout.v.beautified: $(ARCH)/$(VARIANT)/Stacklayout.v lib/Coqlib.vo backend/Bounds.vo
-backend/Stacking.vo backend/Stacking.glob backend/Stacking.v.beautified: backend/Stacking.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Bounds.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo
+backend/Stacking.vo backend/Stacking.glob backend/Stacking.v.beautified: backend/Stacking.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Bounds.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Lineartyping.vo
 backend/Stackingproof.vo backend/Stackingproof.glob backend/Stackingproof.v.beautified: backend/Stackingproof.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo common/Values.vo $(ARCH)/Op.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Lineartyping.vo backend/Mach.vo backend/Bounds.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Stacking.vo
 $(ARCH)/Asm.vo $(ARCH)/Asm.glob $(ARCH)/Asm.v.beautified: $(ARCH)/Asm.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Conventions.vo
 $(ARCH)/Asmgen.vo $(ARCH)/Asmgen.glob $(ARCH)/Asmgen.v.beautified: $(ARCH)/Asmgen.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo
 backend/Asmgenproof0.vo backend/Asmgenproof0.glob backend/Asmgenproof0.v.beautified: backend/Asmgenproof0.v lib/Coqlib.vo lib/Intv.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo
-$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob $(ARCH)/Asmgenproof1.v.beautified: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo backend/Conventions.vo
-$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob $(ARCH)/Asmgenproof.v.beautified: $(ARCH)/Asmgenproof.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo $(ARCH)/Asmgenproof1.vo
+$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob $(ARCH)/Asmgenproof1.v.beautified: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo backend/Asmgenproof0.vo
+$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob $(ARCH)/Asmgenproof.v.beautified: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo $(ARCH)/Asmgenproof1.vo
 cfrontend/Ctypes.vo cfrontend/Ctypes.glob cfrontend/Ctypes.v.beautified: cfrontend/Ctypes.v lib/Coqlib.vo common/AST.vo common/Errors.vo
 cfrontend/Cop.vo cfrontend/Cop.glob cfrontend/Cop.v.beautified: cfrontend/Cop.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo cfrontend/Ctypes.vo
 cfrontend/Csyntax.vo cfrontend/Csyntax.glob cfrontend/Csyntax.v.beautified: cfrontend/Csyntax.v lib/Coqlib.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo cfrontend/Ctypes.vo cfrontend/Cop.vo
@@ -116,7 +102,7 @@ cfrontend/Cshmgenproof.vo cfrontend/Cshmgenproof.glob cfrontend/Cshmgenproof.v.b
 cfrontend/Csharpminor.vo cfrontend/Csharpminor.glob cfrontend/Csharpminor.v.beautified: cfrontend/Csharpminor.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo backend/Cminor.vo common/Smallstep.vo
 cfrontend/Cminorgen.vo cfrontend/Cminorgen.glob cfrontend/Cminorgen.v.beautified: cfrontend/Cminorgen.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Ordered.vo common/AST.vo lib/Integers.vo lib/Floats.vo cfrontend/Csharpminor.vo backend/Cminor.vo
 cfrontend/Cminorgenproof.vo cfrontend/Cminorgenproof.glob cfrontend/Cminorgenproof.v.beautified: cfrontend/Cminorgenproof.v lib/Coqlib.vo lib/Intv.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo common/Switch.vo cfrontend/Csharpminor.vo backend/Cminor.vo cfrontend/Cminorgen.vo
-driver/Compiler.vo driver/Compiler.glob driver/Compiler.v.beautified: driver/Compiler.v lib/Coqlib.vo common/Errors.vo common/AST.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Cstrategy.vo cfrontend/Cexec.vo cfrontend/Clight.vo cfrontend/Csharpminor.vo backend/Cminor.vo backend/CminorSel.vo backend/RTL.vo backend/LTL.vo backend/LTLin.vo backend/Linear.vo backend/Mach.vo $(ARCH)/Asm.vo cfrontend/Initializers.vo cfrontend/SimplExpr.vo cfrontend/SimplLocals.vo cfrontend/Cshmgen.vo cfrontend/Cminorgen.vo backend/Selection.vo backend/RTLgen.vo backend/Tailcall.vo backend/Inlining.vo backend/Renumber.vo backend/Constprop.vo backend/CSE.vo backend/Allocation.vo backend/Tunneling.vo backend/Linearize.vo backend/CleanupLabels.vo backend/Reload.vo backend/RRE.vo backend/Stacking.vo $(ARCH)/Asmgen.vo backend/RTLtyping.vo backend/LTLtyping.vo backend/LTLintyping.vo backend/Lineartyping.vo cfrontend/SimplExprproof.vo cfrontend/SimplLocalsproof.vo cfrontend/Cshmgenproof.vo cfrontend/Cminorgenproof.vo backend/Selectionproof.vo backend/RTLgenproof.vo backend/Tailcallproof.vo backend/Inliningproof.vo backend/Renumberproof.vo backend/Constpropproof.vo backend/CSEproof.vo backend/Allocproof.vo backend/Alloctyping.vo backend/Tunnelingproof.vo backend/Tunnelingtyping.vo backend/Linearizeproof.vo backend/Linearizetyping.vo backend/CleanupLabelsproof.vo backend/CleanupLabelstyping.vo backend/Reloadproof.vo backend/Reloadtyping.vo backend/RREproof.vo backend/RREtyping.vo backend/Stackingproof.vo $(ARCH)/Asmgenproof.vo
+driver/Compiler.vo driver/Compiler.glob driver/Compiler.v.beautified: driver/Compiler.v lib/Coqlib.vo common/Errors.vo common/AST.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Cstrategy.vo cfrontend/Cexec.vo cfrontend/Clight.vo cfrontend/Csharpminor.vo backend/Cminor.vo backend/CminorSel.vo backend/RTL.vo backend/LTL.vo backend/Linear.vo backend/Mach.vo $(ARCH)/Asm.vo cfrontend/Initializers.vo cfrontend/SimplExpr.vo cfrontend/SimplLocals.vo cfrontend/Cshmgen.vo cfrontend/Cminorgen.vo backend/Selection.vo backend/RTLgen.vo backend/Tailcall.vo backend/Inlining.vo backend/Renumber.vo backend/Constprop.vo backend/CSE.vo backend/Allocation.vo backend/Tunneling.vo backend/Linearize.vo backend/CleanupLabels.vo backend/Stacking.vo $(ARCH)/Asmgen.vo cfrontend/SimplExprproof.vo cfrontend/SimplLocalsproof.vo cfrontend/Cshmgenproof.vo cfrontend/Cminorgenproof.vo backend/Selectionproof.vo backend/RTLgenproof.vo backend/Tailcallproof.vo backend/Inliningproof.vo backend/Renumberproof.vo backend/Constpropproof.vo backend/CSEproof.vo backend/Allocproof.vo backend/Tunnelingproof.vo backend/Linearizeproof.vo backend/CleanupLabelsproof.vo backend/Stackingproof.vo $(ARCH)/Asmgenproof.vo
 driver/Complements.vo driver/Complements.glob driver/Complements.v.beautified: driver/Complements.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo common/Behaviors.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Cstrategy.vo cfrontend/Clight.vo backend/Cminor.vo backend/RTL.vo $(ARCH)/Asm.vo driver/Compiler.vo common/Errors.vo
 flocq/Core/Fcore_float_prop.vo flocq/Core/Fcore_float_prop.glob flocq/Core/Fcore_float_prop.v.beautified: flocq/Core/Fcore_float_prop.v flocq/Core/Fcore_Raux.vo flocq/Core/Fcore_defs.vo
 flocq/Core/Fcore_Zaux.vo flocq/Core/Fcore_Zaux.glob flocq/Core/Fcore_Zaux.v.beautified: flocq/Core/Fcore_Zaux.v
diff --git a/Makefile b/Makefile
index fed2660..0db4118 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ FLOCQ=$(FLOCQ_CORE) $(FLOCQ_PROP) $(FLOCQ_CALC) $(FLOCQ_APPLI)
 
 LIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \
   Iteration.v Integers.v Floats.v Parmov.v UnionFind.v Wfsimpl.v \
-  Postorder.v
+  Postorder.v FSetAVLplus.v
 
 # Parts common to the front-ends and the back-end (in common/)
 
@@ -66,7 +66,8 @@ COMMON=Errors.v AST.v Events.v Globalenvs.v Memdata.v Memtype.v Memory.v \
 
 BACKEND=\
   Cminor.v Op.v CminorSel.v \
-  SelectOp.v Selection.v SelectOpproof.v Selectionproof.v \
+  SelectOp.v SelectLong.v Selection.v \
+  SelectOpproof.v SelectLongproof.v Selectionproof.v \
   Registers.v RTL.v \
   RTLgen.v RTLgenspec.v RTLgenproof.v \
   Tailcall.v Tailcallproof.v \
@@ -76,16 +77,12 @@ BACKEND=\
   Kildall.v Liveness.v \
   ConstpropOp.v Constprop.v ConstpropOpproof.v Constpropproof.v \
   CombineOp.v CSE.v CombineOpproof.v CSEproof.v \
-  Machregs.v Locations.v Conventions1.v Conventions.v LTL.v LTLtyping.v \
-  InterfGraph.v Coloring.v Coloringproof.v \
-  Allocation.v Allocproof.v Alloctyping.v \
-  Tunneling.v Tunnelingproof.v Tunnelingtyping.v \
-  LTLin.v LTLintyping.v \
-  Linearize.v Linearizeproof.v Linearizetyping.v \
-  CleanupLabels.v CleanupLabelsproof.v CleanupLabelstyping.v \
+  Machregs.v Locations.v Conventions1.v Conventions.v LTL.v \
+  Allocation.v Allocproof.v \
+  Tunneling.v Tunnelingproof.v \
   Linear.v Lineartyping.v \
-  Parallelmove.v Reload.v Reloadproof.v Reloadtyping.v \
-  RRE.v RREproof.v RREtyping.v \
+  Linearize.v Linearizeproof.v \
+  CleanupLabels.v CleanupLabelsproof.v \
   Mach.v \
   Bounds.v Stacklayout.v Stacking.v Stackingproof.v \
   Asm.v Asmgen.v Asmgenproof0.v Asmgenproof1.v Asmgenproof.v
diff --git a/arm/Asm.v b/arm/Asm.v
index cad7188..60dae47 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -141,6 +141,7 @@ Inductive instruction : Type :=
   | Pldrh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *)
   | Pldrsb: ireg -> ireg -> shift_addr -> instruction (**r signed int8 load *)
   | Pldrsh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *)
+  | Pmla: ireg -> ireg -> ireg -> ireg -> instruction      (**r integer multiply-add *)
   | Pmov: ireg -> shift_op -> instruction          (**r integer move *)
   | Pmovc: crbit -> ireg -> shift_op -> instruction (**r integer conditional move *)
   | Pmul: ireg -> ireg -> ireg -> instruction      (**r integer multiplication *)
@@ -180,7 +181,7 @@ Inductive instruction : Type :=
   | Plabel: label -> instruction                   (**r define a code label *)
   | Ploadsymbol: ireg -> ident -> int -> instruction (**r load the address of a symbol *)
   | Pbtbl: ireg -> list label -> instruction       (**r N-way branch through a jump table *)
-  | Pbuiltin: external_function -> list preg -> preg -> instruction  (**r built-in function *)
+  | Pbuiltin: external_function -> list preg -> list preg -> instruction  (**r built-in function *)
   | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *)
 
 with annot_param : Type :=
@@ -261,6 +262,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
   | r :: l' => undef_regs l' (rs#r <- Vundef)
   end.
 
+(** Assigning multiple registers *)
+
+Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
+  match rl, vl with
+  | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1)
+  | _, _ => rs
+  end.
+
 Section RELSEM.
 
 (** Looking up instructions in a code sequence by position. *)
@@ -461,6 +470,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       exec_load Mint8signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pldrsh r1 r2 sa => 
       exec_load Mint16signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+  | Pmla r1 r2 r3 r4 =>      
+      Next (nextinstr (rs#r1 <- (Val.add (Val.mul rs#r2 rs#r3) rs#r4))) m
   | Pmov r1 so =>          
       Next (nextinstr (rs#r1 <- (eval_shift_op so rs))) m
   | Pmovc bit r1 so =>          
@@ -522,9 +533,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pfuitod r1 r2 =>
       Next (nextinstr (rs#r1 <- (Val.maketotal (Val.floatofintu rs#r2)))) m
   | Pftosizd r1 r2 =>
-      Next (nextinstr (rs#r1 <- (Val.maketotal (Val.intoffloat rs#r2)))) m
+      Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intoffloat rs#r2)))) m
   | Pftouizd r1 r2 =>
-      Next (nextinstr (rs#r1 <- (Val.maketotal (Val.intuoffloat rs#r2)))) m
+      Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intuoffloat rs#r2)))) m
   | Pfcvtsd r1 r2 =>
       Next (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m
   | Pfldd r1 r2 n =>
@@ -535,7 +546,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       exec_store Mfloat64al32 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfsts r1 r2 n =>
       match exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m with
-      | Next rs' m' => Next (rs'#FR7 <- Vundef) m'
+      | Next rs' m' => Next (rs'#FR6 <- Vundef) m'
       | Stuck => Stuck
       end
   (* Pseudo-instructions *)
@@ -544,7 +555,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       let sp := (Vptr stk Int.zero) in
       match Mem.storev Mint32 m1 (Val.add sp (Vint pos)) rs#IR13 with
       | None => Stuck
-      | Some m2 => Next (nextinstr (rs #IR10 <- (rs#IR13) #IR13 <- sp)) m2
+      | Some m2 => Next (nextinstr (rs #IR12 <- (rs#IR13) #IR13 <- sp)) m2
       end
   | Pfreeframe sz pos =>
       match Mem.loadv Mint32 m (Val.add rs#IR13 (Vint pos)) with
@@ -585,37 +596,36 @@ Definition preg_of (r: mreg) : preg :=
   match r with
   | R0 => IR0 | R1 => IR1 | R2 => IR2 | R3 => IR3
   | R4 => IR4 | R5 => IR5 | R6 => IR6 | R7 => IR7
-  | R8 => IR8 | R9 => IR9 | R11 => IR11
-  | IT1 => IR10 | IT2 => IR12
+  | R8 => IR8 | R9 => IR9 | R10 => IR10 | R11 => IR11
+  | R12 => IR12
   | F0 => FR0 | F1 => FR1 | F2 => FR2 | F3 => FR3
-  | F4 => FR4 | F5 => FR5
+  | F4 => FR4 | F5 => FR5 | F6 => FR6 | F7 => FR7
   | F8 => FR8 | F9 => FR9 | F10 => FR10 | F11 => FR11
   | F12 => FR12 | F13 => FR13 | F14 => FR14 | F15 => FR15
-  | FT1 => FR6 | FT2 => FR7
   end.
 
 (** Extract the values of the arguments of an external call.
     We exploit the calling conventions from module [Conventions], except that
     we use ARM registers instead of locations. *)
 
+Definition chunk_of_type (ty: typ) :=
+  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end.
+
 Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
   | extcall_arg_reg: forall r,
       extcall_arg rs m (R r) (rs (preg_of r))
-  | extcall_arg_int_stack: forall ofs bofs v,
-      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mint32 m (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tint)) v
-  | extcall_arg_float_stack: forall ofs bofs v,
+  | extcall_arg_stack: forall ofs ty bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64al32 m (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
+      Mem.loadv (chunk_of_type ty) m
+                (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v ->
+      extcall_arg rs m (S Outgoing ofs ty) v.
 
 Definition extcall_arguments
     (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
   list_forall2 (extcall_arg rs m) (loc_arguments sg) args.
 
-Definition loc_external_result (sg: signature) : preg :=
-  preg_of (loc_result sg).
+Definition loc_external_result (sg: signature) : list preg :=
+  map preg_of (loc_result sg).
 
 (** Extract the values of the arguments of an annotation. *)
 
@@ -645,28 +655,30 @@ Inductive step: state -> trace -> state -> Prop :=
       exec_instr f i rs m = Next rs' m' ->
       step (State rs m) E0 (State rs' m')
   | exec_step_builtin:
-      forall b ofs f ef args res rs m t v m',
+      forall b ofs f ef args res rs m t vl rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal f) ->
       find_instr (Int.unsigned ofs) (fn_code f) = Some (Pbuiltin ef args res) ->
-      external_call ef ge (map rs args) m t v m' ->
-      step (State rs m) t (State (nextinstr(rs # res <- v)) m')
+      external_call' ef ge (map rs args) m t vl m' ->
+      rs' = nextinstr 
+              (set_regs res vl
+                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+      step (State rs m) t (State rs' m')
   | exec_step_annot:
       forall b ofs f ef args rs m vargs t v m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal f) ->
       find_instr (Int.unsigned ofs) (fn_code f) = Some (Pannot ef args) ->
       annot_arguments rs m args vargs ->
-      external_call ef ge vargs m t v m' ->
+      external_call' ef ge vargs m t v m' ->
       step (State rs m) t (State (nextinstr rs) m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
       rs PC = Vptr b Int.zero ->
       Genv.find_funct_ptr ge b = Some (External ef) ->
-      external_call ef ge args m t res m' ->
+      external_call' ef ge args m t res m' ->
       extcall_arguments rs m (ef_sig ef) args ->
-      rs' = (rs#(loc_external_result (ef_sig ef)) <- res
-               #PC <- (rs IR14)) ->
+      rs' = (set_regs (loc_external_result (ef_sig ef) ) res rs)#PC <- (rs IR14) ->
       step (State rs m) t (State rs' m').
 
 End RELSEM.
@@ -734,21 +746,21 @@ Ltac Equalities :=
   discriminate.
   discriminate.
   inv H11. 
-  exploit external_call_determ. eexact H4. eexact H11. intros [A B].
+  exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   inv H12.
   assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0.
-  exploit external_call_determ. eexact H5. eexact H13. intros [A B].
+  exploit external_call_determ'. eexact H5. eexact H13. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
-  exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+  exploit external_call_determ'. eexact H3. eexact H8. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
 (* trace length *)
   red; intros; inv H; simpl.
   omega.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
+  inv H3; eapply external_call_trace_length; eauto.
+  inv H4; eapply external_call_trace_length; eauto.
+  inv H2; eapply external_call_trace_length; eauto.
 (* initial states *)
   inv H; inv H0. f_equal. congruence.
 (* final no step *)
@@ -768,15 +780,3 @@ Definition data_preg (r: preg) : bool :=
   | PC => false
   end.
 
-Definition nontemp_preg (r: preg) : bool :=
-  match r with
-  | IR IR14 => false
-  | IR IR10 => false
-  | IR IR12 => false
-  | IR _ => true
-  | FR FR6 => false
-  | FR FR7 => false
-  | FR _ => true
-  | CR _ => false
-  | PC => false
-  end.
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index d158c77..1ff28d9 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -290,9 +290,15 @@ Definition transl_op
       OK (rsubimm r r1 n k)
   | Omul, a1 :: a2 :: nil =>
       do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (if ireg_eq r r1 || ireg_eq r r2
-          then Pmul IR14 r1 r2 :: Pmov r (SOreg IR14) :: k
-          else Pmul r r1 r2 :: k)
+      OK (if negb (ireg_eq r r1) then Pmul r r1 r2 :: k
+          else if negb (ireg_eq r r2) then Pmul r r2 r1 :: k
+          else Pmul IR14 r1 r2 :: Pmov r (SOreg IR14) :: k)
+  | Omla, a1 :: a2 :: a3 :: nil =>
+      do r <- ireg_of res; do r1 <- ireg_of a1; 
+      do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (if negb (ireg_eq r r1) then Pmla r r1 r2 r3 :: k
+          else if negb (ireg_eq r r2) then Pmla r r2 r1 r3 :: k
+          else Pmla IR14 r1 r2 r3 :: Pmov r (SOreg IR14) :: k)
   | Odiv, a1 :: a2 :: nil =>
       do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
       OK (Psdiv r r1 r2 :: k)
@@ -420,6 +426,7 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
   match ty with
   | Tint   => do r <- ireg_of dst; OK (loadind_int base ofs r k)
   | Tfloat => do r <- freg_of dst; OK (loadind_float base ofs r k)
+  | Tlong => Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind_int (src: ireg) (base: ireg) (ofs: int) (k: code) :=
@@ -432,6 +439,7 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
   match ty with
   | Tint   => do r <- ireg_of src; OK (storeind_int r base ofs k)
   | Tfloat => do r <- freg_of src; OK (storeind_float r base ofs k)
+  | Tlong => Error (msg "Asmgen.storeind")
   end.
 
 (** Translation of memory accesses *)
@@ -512,6 +520,8 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pflds mk_immed_mem_float dst addr args k
   | Mfloat64 | Mfloat64al32 =>
       transl_memory_access_float Pfldd mk_immed_mem_float dst addr args k
+  | Mint64 =>
+      Error (msg "Asmgen.transl_load")
   end.
 
 Definition transl_store (chunk: memory_chunk) (addr: addressing)
@@ -531,6 +541,8 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pfsts mk_immed_mem_float src addr args k
   | Mfloat64 | Mfloat64al32 =>
       transl_memory_access_float Pfstd mk_immed_mem_float src addr args k
+  | Mint64 =>
+      Error (msg "Asmgen.transl_store")
   end.
 
 (** Translation of arguments to annotations *)
@@ -544,17 +556,17 @@ Definition transl_annot_param (p: Mach.annot_param) : Asm.annot_param :=
 (** Translation of a Mach instruction. *)
 
 Definition transl_instr (f: Mach.function) (i: Mach.instruction)
-                        (r10_is_parent: bool) (k: code) :=
+                        (r12_is_parent: bool) (k: code) :=
   match i with
   | Mgetstack ofs ty dst =>
       loadind IR13 ofs ty dst k
   | Msetstack src ofs ty =>
       storeind src IR13 ofs ty k
   | Mgetparam ofs ty dst =>
-      do c <- loadind IR10 ofs ty dst k;
-      OK (if r10_is_parent
+      do c <- loadind IR12 ofs ty dst k;
+      OK (if r12_is_parent
           then c
-          else loadind_int IR13 f.(fn_link_ofs) IR10 c)
+          else loadind_int IR13 f.(fn_link_ofs) IR12 c)
   | Mop op args res =>
       transl_op op args res k
   | Mload chunk addr args dst =>
@@ -573,7 +585,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
       OK (loadind_int IR13 f.(fn_retaddr_ofs) IR14
            (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbsymb symb sig :: k))
   | Mbuiltin ef args res =>
-      OK (Pbuiltin ef (map preg_of args) (preg_of res) :: k)
+      OK (Pbuiltin ef (map preg_of args) (map preg_of res) :: k)
   | Mannot ef args =>
       OK (Pannot ef (map transl_annot_param args) :: k)
   | Mlabel lbl =>
@@ -596,8 +608,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
 Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
   match i with
   | Msetstack src ofs ty => before
-  | Mgetparam ofs ty dst => negb (mreg_eq dst IT1)
-  | Mop Omove args res => before && negb (mreg_eq res IT1)
+  | Mgetparam ofs ty dst => negb (mreg_eq dst R12)
+  | Mop Omove args res => before && negb (mreg_eq res R12)
   | _ => false
   end.
 
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 986d474..aede0da 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -264,7 +264,8 @@ Proof.
 Opaque Int.eq.
   unfold transl_op; intros; destruct op; TailNoLabel.
   destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
-  destruct (ireg_eq x x0 || ireg_eq x x1); TailNoLabel.
+  destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel.
+  destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel.
   eapply tail_nolabel_trans; TailNoLabel.
   eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
 Qed.
@@ -420,7 +421,7 @@ Inductive match_states: Mach.state -> Asm.state -> Prop :=
         (MEXT: Mem.extends m m')
         (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
         (AG: agree ms sp rs)
-        (DXP: ep = true -> rs#IR10 = parent_sp s),
+        (DXP: ep = true -> rs#IR12 = parent_sp s),
       match_states (Mach.State s fb sp c ms m)
                    (Asm.State rs m')
   | match_states_call:
@@ -451,7 +452,7 @@ Lemma exec_straight_steps:
    exists rs2,
        exec_straight tge tf c rs1 m1' k rs2 m2'
     /\ agree ms2 sp rs2
-    /\ (it1_is_parent ep i = true -> rs2#IR10 = parent_sp s)) ->
+    /\ (it1_is_parent ep i = true -> rs2#IR12 = parent_sp s)) ->
   exists st',
   plus step tge (State rs1 m1') E0 st' /\
   match_states (Mach.State s fb sp c ms2 m2) st'.
@@ -515,9 +516,9 @@ Definition measure (s: Mach.state) : nat :=
   | Mach.Returnstate _ _ _ => 1%nat
   end.
 
-Remark preg_of_not_R10: forall r, negb (mreg_eq r IT1) = true -> IR IR10 <> preg_of r.
+Remark preg_of_not_R12: forall r, negb (mreg_eq r R12) = true -> IR IR12 <> preg_of r.
 Proof.
-  intros. change (IR IR10) with (preg_of IT1). red; intros. 
+  intros. change (IR IR12) with (preg_of R12). red; intros. 
   exploit preg_of_injective; eauto. intros; subst r. 
   unfold proj_sumbool in H; rewrite dec_eq_true in H; discriminate.
 Qed.
@@ -555,7 +556,7 @@ Proof.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
   exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. change (undef_setstack rs) with rs. apply agree_exten with rs0; auto with asmgen.
+  split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. apply agree_exten with rs0; auto with asmgen.
   simpl; intros. rewrite Q; auto with asmgen.
 
 - (* Mgetparam *)
@@ -569,24 +570,24 @@ Proof.
 Opaque loadind.
   left; eapply exec_straight_steps; eauto; intros.
   destruct ep; monadInv TR.
-(* R10 contains parent *)
+(* R12 contains parent *)
   exploit loadind_correct. eexact EQ.
   instantiate (2 := rs0). rewrite DXP; eauto.
   intros [rs1 [P [Q R]]].
   exists rs1; split. eauto. 
   split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
   simpl; intros. rewrite R; auto with asmgen. 
-  apply preg_of_not_R10; auto.
+  apply preg_of_not_R12; auto.
 (* GPR11 does not contain parent *)
-  exploit loadind_int_correct. eexact A. instantiate (1 := IR10). intros [rs1 [P [Q R]]].
+  exploit loadind_int_correct. eexact A. instantiate (1 := IR12). intros [rs1 [P [Q R]]].
   exploit loadind_correct. eexact EQ. instantiate (2 := rs1). rewrite Q. eauto. intros [rs2 [S [T U]]]. 
   exists rs2; split. eapply exec_straight_trans; eauto.
   split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
-  instantiate (1 := rs1#IR10 <- (rs2#IR10)). intros.
+  instantiate (1 := rs1#IR12 <- (rs2#IR12)). intros.
   rewrite Pregmap.gso; auto with asmgen.
-  congruence. intros. unfold Pregmap.set. destruct (PregEq.eq r' IR10). congruence. auto with asmgen. 
+  congruence. intros. unfold Pregmap.set. destruct (PregEq.eq r' IR12). congruence. auto with asmgen. 
   simpl; intros. rewrite U; auto with asmgen. 
-  apply preg_of_not_R10; auto.
+  apply preg_of_not_R12; auto.
 
 - (* Mop *)
   assert (eval_operation tge sp op rs##args m = Some v). 
@@ -597,12 +598,9 @@ Opaque loadind.
   exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
   assert (S: Val.lessdef v (rs2 (preg_of res))) by (eapply Val.lessdef_trans; eauto).
   exists rs2; split. eauto. split.
-  assert (agree (Regmap.set res v (undef_temps rs)) sp rs2).
-    eapply agree_set_undef_mreg; eauto with asmgen.
-  unfold undef_op; destruct op; auto.
-  change (undef_move rs) with rs. eapply agree_set_mreg; eauto.
+  eapply agree_set_undef_mreg; eauto with asmgen.
   simpl. destruct op; try congruence. destruct ep; simpl; try congruence. intros. 
-  rewrite R; auto. apply preg_of_not_R10; auto.
+  rewrite R; auto. apply preg_of_not_R12; auto. exact I.
 
 - (* Mload *)
   assert (eval_addressing tge sp addr rs##args = Some a). 
@@ -627,7 +625,7 @@ Opaque loadind.
   intros. simpl in TR. 
   exploit transl_store_correct; eauto. intros [rs2 [P Q]].
   exists rs2; split. eauto.
-  split. eapply agree_exten_temps; eauto.
+  split. eapply agree_undef_regs; eauto. 
   simpl; congruence.
 
 - (* Mcall *)
@@ -695,7 +693,7 @@ Opaque loadind.
     exploit loadind_int_correct. eexact C. intros [rs1 [P [Q R]]]. 
     econstructor; split.
     eapply exec_straight_trans. eexact P. apply exec_straight_one. 
-    simpl. rewrite R; auto with asmgen. rewrite A. 
+    simpl. rewrite R; auto with asmgen. unfold Mach.chunk_of_type in A. rewrite A. 
     rewrite <- (sp_val _ _ _ AG). rewrite E. eauto. auto. 
     split. Simpl.
     split. Simpl.
@@ -741,19 +739,21 @@ Opaque loadind.
   inv AT. monadInv H3. 
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H2); intro NOOV.
-  exploit external_call_mem_extends; eauto. eapply preg_vals; eauto.
+  exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_builtin. eauto. eauto.
   eapply find_instr_tail; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
+  eauto.
   econstructor; eauto.
-  Simpl. rewrite <- H0. simpl. econstructor; eauto.
+  Simpl. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
-  apply agree_nextinstr. eapply agree_set_undef_mreg; eauto. 
-  rewrite Pregmap.gss. auto. 
-  intros. Simpl. 
+  apply preg_notin_charact. auto with asmgen.
+  apply preg_notin_charact. auto with asmgen.
+  apply agree_nextinstr. eapply agree_set_mregs; auto.
+  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. 
   congruence.
 
 - (* Mannot *)
@@ -761,12 +761,12 @@ Opaque loadind.
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H3); intro NOOV.
   exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. 
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_annot. eauto. eauto.
   eapply find_instr_tail; eauto. eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   eapply match_states_intro with (ep := false); eauto with coqlib.
   unfold nextinstr. rewrite Pregmap.gss. 
@@ -796,7 +796,7 @@ Opaque loadind.
   destruct (transl_cond_correct tge tf cond args _ rs0 m' _ TR) as [rs' [A [B C]]].
   rewrite EC in B.
   econstructor; econstructor; econstructor; split. eexact A. 
-  split. eapply agree_exten_temps; eauto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
   simpl. rewrite B. reflexivity. 
 
 - (* Mcond false *)
@@ -807,7 +807,7 @@ Opaque loadind.
   econstructor; split.
   eapply exec_straight_trans. eexact A. 
   apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
-  split. eapply agree_exten_temps; eauto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
   intros; Simpl.
   simpl. congruence.
 
@@ -827,7 +827,7 @@ Opaque loadind.
   eapply find_instr_tail; eauto. 
   simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
   econstructor; eauto. 
-  eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simpl. 
+  eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen. Simpl. 
   congruence.
 
 - (* Mreturn *)
@@ -887,7 +887,7 @@ Opaque loadind.
   exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. 
   intros [m3' [P Q]].
   (* Execution of function prologue *)
-  set (rs2 := nextinstr (rs0#IR10 <- (parent_sp s) #IR13 <- (Vptr stk Int.zero))).
+  set (rs2 := nextinstr (rs0#IR12 <- (parent_sp s) #IR13 <- (Vptr stk Int.zero))).
   set (rs3 := nextinstr rs2).
   assert (EXEC_PROLOGUE:
             exec_straight tge x
@@ -896,7 +896,7 @@ Opaque loadind.
   rewrite <- H5 at 2; unfold fn_code.
   apply exec_straight_two with rs2 m2'.
   unfold exec_instr. rewrite C. fold sp.
-  rewrite <- (sp_val _ _ _ AG). unfold chunk_of_type in F. rewrite F. auto. 
+  rewrite <- (sp_val _ _ _ AG). unfold Mach.chunk_of_type in F. rewrite F. auto. 
   simpl. auto.
   simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14). 
   rewrite Int.add_zero_l. simpl. unfold chunk_of_type in P. simpl in P.
@@ -911,7 +911,7 @@ Opaque loadind.
   unfold rs3, rs2.
   apply agree_nextinstr. apply agree_nextinstr.
   eapply agree_change_sp. 
-  apply agree_exten_temps with rs0; eauto.
+  apply agree_undef_regs with rs0; eauto.
   intros. Simpl. congruence.
 
 - (* external function *)
@@ -919,16 +919,15 @@ Opaque loadind.
   intros [tf [A B]]. simpl in B. inv B.
   exploit extcall_arguments_match; eauto. 
   intros [args' [C D]].
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [res' [m2' [P [Q [R S]]]]].
   left; econstructor; split.
   apply plus_one. eapply exec_step_external; eauto. 
-  eapply external_call_symbols_preserved; eauto. 
+  eapply external_call_symbols_preserved'; eauto. 
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
-  eapply agree_set_mreg; eauto. 
-  rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. 
-  intros; Simpl.
+  apply agree_set_other; auto with asmgen.
+  eapply agree_set_mregs; eauto.
 
 - (* return *)
   inv STACKS. simpl in *.
@@ -962,8 +961,8 @@ Lemma transf_final_states:
 Proof.
   intros. inv H0. inv H. inv STACKS. constructor.
   auto. 
-  compute in H1. 
-  generalize (preg_val _ _ _ R0 AG). rewrite H1. intros LD; inv LD. auto.
+  compute in H1. inv H1. 
+  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
 Qed.
 
 Theorem transf_program_correct:
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 06d6d17..e27ee80 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -541,7 +541,8 @@ Ltac ArgsInv :=
   | [ H: Error _ = OK _ |- _ ] => discriminate
   | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
   | [ H: bind _ _ = OK _ |- _ ] => monadInv H
-  | [ H: assertion _ = OK _ |- _ ] => monadInv H
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H
   end);
   subst;
   repeat (match goal with
@@ -685,7 +686,7 @@ Lemma transl_op_correct_same:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of res) = v
-  /\ forall r, data_preg r = true -> r <> preg_of res -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs'#r = rs#r.
 Proof.
   intros until v; intros TR EV NOCMP. 
   unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; inv EV; try (TranslOpSimpl; fail).
@@ -713,11 +714,21 @@ Proof.
   intros [rs' [A [B C]]].
   exists rs'; auto with asmgen.
   (* Omul *)
-  destruct (ireg_eq x x0 || ireg_eq x x1).
+  destruct (negb (ireg_eq x x0)).
+  TranslOpSimpl.
+  destruct (negb (ireg_eq x x1)).
+  rewrite Val.mul_commut. TranslOpSimpl.
   econstructor; split.
   eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
   intuition Simpl.
+  (* Omla *)
+  destruct (negb (ireg_eq x x0)).
   TranslOpSimpl.
+  destruct (negb (ireg_eq x x1)).
+  rewrite Val.mul_commut. TranslOpSimpl.
+  econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+  intuition Simpl.
   (* divs *)
   econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. 
   intuition Simpl.
@@ -767,10 +778,11 @@ Proof.
     intros. unfold rs4, rs3; Simpl. destruct islt; Simpl; rewrite OTH2; auto with asmgen. 
   (* intoffloat *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
-  intuition Simpl.
+Transparent destroyed_by_op. 
+  simpl. intuition Simpl.
   (* intuoffloat *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
-  intuition Simpl.
+  simpl. intuition Simpl.
   (* floatofint *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
   intuition Simpl.
@@ -788,7 +800,7 @@ Lemma transl_op_correct:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, data_preg r = true -> r <> preg_of res -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs'#r = rs#r.
 Proof.
   intros. 
   assert (EITHER: match op with Ocmp _ => False | _ => True end \/ exists cmp, op = Ocmp cmp).
@@ -878,7 +890,7 @@ Lemma transl_load_int_correct:
   exists rs',
       exec_straight ge fn c rs m k rs' m
    /\ rs'#(preg_of dst) = v
-   /\ forall r, nontemp_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
+   /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
   intros. monadInv H. erewrite ireg_of_eq by eauto.  
   eapply transl_memory_access_correct; eauto.
@@ -902,7 +914,7 @@ Lemma transl_load_float_correct:
   exists rs',
       exec_straight ge fn c rs m k rs' m
    /\ rs'#(preg_of dst) = v
-   /\ forall r, nontemp_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
+   /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
   intros. monadInv H. erewrite freg_of_eq by eauto.  
   eapply transl_memory_access_correct; eauto.
@@ -913,7 +925,7 @@ Proof.
 Qed.
 
 Lemma transl_store_int_correct:
-  forall mk_instr is_immed src addr args k c (rs: regset) a chunk m m',
+  forall mr mk_instr is_immed src addr args k c (rs: regset) a chunk m m',
   transl_memory_access_int mk_instr is_immed src addr args k = OK c ->
   eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a ->
   Mem.storev chunk m a rs#(preg_of src) = Some m' ->
@@ -922,7 +934,7 @@ Lemma transl_store_int_correct:
     exec_store chunk (Val.add rs1#r2 (eval_shift_addr sa rs1)) r1 rs1 m) ->
   exists rs',
       exec_straight ge fn c rs m k rs' m'
-   /\ forall r, nontemp_preg r = true -> rs'#r = rs#r.
+   /\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r.
 Proof.
   intros. monadInv H. erewrite ireg_of_eq in * by eauto.  
   eapply transl_memory_access_correct; eauto.
@@ -937,7 +949,7 @@ Proof.
 Qed.
 
 Lemma transl_store_float_correct:
-  forall mk_instr is_immed src addr args k c (rs: regset) a chunk m m',
+  forall mr mk_instr is_immed src addr args k c (rs: regset) a chunk m m',
   transl_memory_access_float mk_instr is_immed src addr args k = OK c ->
   eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a ->
   Mem.storev chunk m a rs#(preg_of src) = Some m' ->
@@ -946,7 +958,7 @@ Lemma transl_store_float_correct:
     exec_store chunk (Val.add rs1#r2 (Vint n)) r1 rs1 m) ->
   exists rs',
       exec_straight ge fn c rs m k rs' m'
-   /\ forall r, nontemp_preg r = true -> rs'#r = rs#r.
+   /\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r.
 Proof.
   intros. monadInv H. erewrite freg_of_eq in * by eauto.  
   eapply transl_memory_access_correct; eauto.
@@ -964,7 +976,7 @@ Lemma transl_load_correct:
   exists rs',
       exec_straight ge fn c rs m k rs' m
    /\ rs'#(preg_of dst) = v
-   /\ forall r, nontemp_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
+   /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
   intros. destruct chunk; simpl in H.
   eapply transl_load_int_correct; eauto.
@@ -972,6 +984,7 @@ Proof.
   eapply transl_load_int_correct; eauto.
   eapply transl_load_int_correct; eauto.
   eapply transl_load_int_correct; eauto.
+  discriminate.
   eapply transl_load_float_correct; eauto.
   apply Mem.loadv_float64al32 in H1. eapply transl_load_float_correct; eauto.
   eapply transl_load_float_correct; eauto.
@@ -984,7 +997,7 @@ Lemma transl_store_correct:
   Mem.storev chunk m a rs#(preg_of src) = Some m' ->
   exists rs',
       exec_straight ge fn c rs m k rs' m'
-   /\ forall r, nontemp_preg r = true -> rs'#r = rs#r.
+   /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_store chunk addr) -> rs'#r = rs#r.
 Proof.
   intros. destruct chunk; simpl in H.
 - assert (Mem.storev Mint8unsigned m a (rs (preg_of src)) = Some m').
@@ -996,11 +1009,12 @@ Proof.
   clear H1. eapply transl_store_int_correct; eauto.
 - eapply transl_store_int_correct; eauto.
 - eapply transl_store_int_correct; eauto.
+- discriminate.
 - unfold transl_memory_access_float in H. monadInv H. rewrite (freg_of_eq _ _ EQ) in *. 
   eapply transl_memory_access_correct; eauto. 
   intros. econstructor; split. apply exec_straight_one.
   simpl. unfold exec_store. rewrite H. rewrite H2; eauto with asmgen. 
-  rewrite H1. eauto. auto. intros. Simpl. 
+  rewrite H1. eauto. auto. intros. Simpl.
   simpl; auto.
 - apply Mem.storev_float64al32 in H1. eapply transl_store_float_correct; eauto.
 - eapply transl_store_float_correct; eauto.
diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index 7e3217e..9bf066b 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -31,6 +31,7 @@ Inductive approx : Type :=
                              no compile-time information is available. *)
   | I: int -> approx     (** A known integer value. *)
   | F: float -> approx   (** A known floating-point value. *)
+  | L: int64 -> approx   (** A know 64-bit integer value. *)
   | G: ident -> int -> approx
                          (** The value is the address of the given global
                              symbol plus the given integer offset. *)
@@ -145,6 +146,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Ointuoffloat, F n1 :: nil => eval_static_intuoffloat n1
   | Ofloatofint, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofint n1) else Unknown
   | Ofloatofintu, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofintu n1) else Unknown
+  | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2)
+  | Olowlong, L n :: nil => I(Int64.loword n)
+  | Ohighlong, L n :: nil => I(Int64.hiword n)
   | Ocmp c, vl => eval_static_condition_val c vl
   | _, _ => Unknown
   end.
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index c7de86d..687e08f 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -45,9 +45,10 @@ Definition val_match_approx (a: approx) (v: val) : Prop :=
   | Unknown => True
   | I p => v = Vint p
   | F p => v = Vfloat p
+  | L p => v = Vlong p
   | G symb ofs => v = symbol_address ge symb ofs
   | S ofs => v = Val.add sp (Vint ofs)
-  | _ => False
+  | Novalue => False
   end.
 
 Inductive val_list_match_approx: list approx -> list val -> Prop :=
@@ -65,6 +66,8 @@ Ltac SimplVMA :=
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (F _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
+  | H: (val_match_approx (L _) ?v) |- _ =>
+      simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (G _ _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (S _) ?v) |- _ =>
diff --git a/arm/Machregs.v b/arm/Machregs.v
index 317515c..4906eb0 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -13,6 +13,7 @@
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
+Require Import Op.
 
 (** ** Machine registers *)
 
@@ -21,43 +22,36 @@ Require Import AST.
 - Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
 - Floating-point registers that can be allocated to RTL pseudo-registers 
   ([Fxx]).
-- Two integer registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([ITx]).
-- Two float registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([FTx]).
 
-  The type [mreg] does not include special-purpose machine registers
-  such as the stack pointer and the condition codes. *)
+  The type [mreg] does not include reserved machine registers
+  such as the stack pointer, the link register, and the condition codes. *)
 
 Inductive mreg: Type :=
   (** Allocatable integer regs *)
   | R0: mreg  | R1: mreg  | R2: mreg  | R3: mreg
   | R4: mreg  | R5: mreg  | R6: mreg  | R7: mreg
-  | R8: mreg  | R9: mreg  | R11: mreg
+  | R8: mreg  | R9: mreg  | R10: mreg | R11: mreg
+  | R12: mreg
   (** Allocatable double-precision float regs *)
   | F0: mreg  | F1: mreg  | F2: mreg  | F3: mreg
-  | F4: mreg  | F5: mreg
+  | F4: mreg  | F5: mreg  | F6: mreg  | F7: mreg
   | F8: mreg  | F9: mreg  | F10: mreg | F11: mreg
-  | F12: mreg | F13: mreg | F14: mreg | F15: mreg
-  (** Integer temporaries *)
-  | IT1: mreg (* R10 *) | IT2: mreg (* R12 *)
-  (** Float temporaries *)
-  | FT1: mreg (* F6 *) | FT2: mreg (* F7 *).
+  | F12: mreg | F13: mreg | F14: mreg | F15: mreg.
 
 Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
 Proof. decide equality. Defined.
+Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
   | R0 => Tint  | R1 => Tint  | R2 => Tint  | R3 => Tint
   | R4 => Tint  | R5 => Tint  | R6 => Tint  | R7 => Tint
-  | R8 => Tint  | R9 => Tint  | R11 => Tint
+  | R8 => Tint  | R9 => Tint  | R10 => Tint | R11 => Tint
+  | R12 => Tint
   | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat
-  | F4 => Tfloat| F5 => Tfloat
+  | F4 => Tfloat| F5 => Tfloat  | F6 => Tfloat | F7 => Tfloat
   | F8 => Tfloat | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat
   | F12 => Tfloat | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
-  | IT1 => Tint | IT2 => Tint
-  | FT1 => Tfloat | FT2 => Tfloat
   end.
 
 Open Scope positive_scope.
@@ -69,13 +63,12 @@ Module IndexedMreg <: INDEXED_TYPE.
     match r with
     | R0 => 1  | R1 => 2  | R2 => 3  | R3 => 4
     | R4 => 5  | R5 => 6  | R6 => 7  | R7 => 8
-    | R8 => 9  | R9 => 10 | R11 => 11
-    | F0 => 12 | F1 => 13 | F2 => 14  | F3 => 15
-    | F4 => 16 | F5 => 17
-    | F8 => 18 | F9 => 19 | F10 => 20 | F11 => 21
-    | F12 => 22 | F13 => 23 | F14 => 24 | F15 => 25
-    | IT1 => 26 | IT2 => 27
-    | FT1 => 28 | FT2 => 29
+    | R8 => 9  | R9 => 10 | R10 => 11 | R11 => 12
+    | R12 => 13
+    | F0 => 14 | F1 => 15 | F2 => 16  | F3 => 17
+    | F4 => 18 | F5 => 19 | F6 => 20  | F7 => 21
+    | F8 => 22 | F9 => 23 | F10 => 24 | F11 => 25
+    | F12 => 26 | F13 => 27 | F14 => 28 | F15 => 29
     end.
   Lemma index_inj:
     forall r1 r2, index r1 = index r2 -> r1 = r2.
@@ -84,3 +77,67 @@ Module IndexedMreg <: INDEXED_TYPE.
   Qed.
 End IndexedMreg.
 
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg :=
+  match op with
+  | Odiv | Odivu => R0 :: R1 :: R2 :: R3 :: R12 :: nil
+  | Ointoffloat | Ointuoffloat => F6 :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
+  nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
+  match chunk with
+  | Mfloat32 => F6 :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_by_cond (cond: condition): list mreg :=
+  nil.
+
+Definition destroyed_by_jumptable: list mreg :=
+  nil.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+  match ef with
+  | EF_memcpy sz al => if zle sz 32 then nil else R2 :: R3 :: R12 :: nil
+  | _ => R12 :: F6 :: nil
+  end.
+
+Definition destroyed_at_function_entry: list mreg :=
+  R12 :: nil.
+
+Definition temp_for_parent_frame: mreg :=
+  R12.
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
+  match op with
+  | Odiv | Odivu => (Some R0 :: Some R1 :: nil, Some R0)
+  | _ => (nil, None)
+  end.
+
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(option mreg) :=
+  match ef with
+  | EF_memcpy sz al =>
+      if zle sz 32 then (nil, nil) else (Some R3 :: Some R2 :: nil, nil)
+  | _ => (nil, nil)
+  end.
+
+Global Opaque
+    destroyed_by_op destroyed_by_load destroyed_by_store
+    destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+    destroyed_at_function_entry temp_for_parent_frame
+    mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations.  Return [true] if the first argument and
+  the result must be in the same location *and* are unconstrained
+  by [mregs_for_operation].  There are none for ARM. *)
+
+Definition two_address_op (op: operation) : bool :=
+  false.
+
+Global Opaque two_address_op.
+
diff --git a/arm/Machregsaux.ml b/arm/Machregsaux.ml
index 642437e..5486c4b 100644
--- a/arm/Machregsaux.ml
+++ b/arm/Machregsaux.ml
@@ -17,13 +17,12 @@ open Machregs
 let register_names = [
   ("R0", R0);  ("R1", R1);  ("R2", R2);  ("R3", R3);
   ("R4", R4);  ("R5", R5);  ("R6", R6);  ("R7", R7);
-  ("R8", R8);  ("R9", R9);  ("R11", R11);
+  ("R8", R8);  ("R9", R9);  ("R10", R10); ("R11", R11);
+  ("R12", R12);
   ("F0", F0);  ("F1", F1);  ("F2", F2); ("F3", F3);
-  ("F4", F4);  ("F5", F5);
+  ("F4", F4);  ("F5", F5);  ("F6", F6); ("F7", F7);
   ("F8", F8);  ("F9", F9);  ("F10", F10); ("F11", F11);
-  ("F12", F12);("F13", F13);("F14", F14); ("F15", F15);
-  ("R10", IT1); ("R12", IT2);
-  ("F6", FT1); ("F7", FT2)
+  ("F12", F12);("F13", F13);("F14", F14); ("F15", F15)
 ]
 
 let name_of_register r =
diff --git a/arm/Op.v b/arm/Op.v
index 06d0705..3dfea77 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -81,6 +81,7 @@ Inductive operation : Type :=
   | Orsubshift: shift -> operation      (**r [rd = shifted r2 - r1] *)
   | Orsubimm: int -> operation          (**r [rd = n - r1] *)
   | Omul: operation                     (**r [rd = r1 * r2] *)
+  | Omla: operation                     (**r [rd = r1 * r2 + r3] *)
   | Odiv: operation                     (**r [rd = r1 / r2] (signed) *)
   | Odivu: operation                    (**r [rd = r1 / r2] (unsigned) *)
   | Oand: operation                     (**r [rd = r1 & r2] *)
@@ -114,6 +115,10 @@ Inductive operation : Type :=
   | Ointuoffloat: operation             (**r [rd = unsigned_int_of_float(r1)] *)
   | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
   | Ofloatofintu: operation             (**r [rd = float_of_unsigned_int(r1)] *)
+(*c Manipulating 64-bit integers: *)
+  | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
+  | Olowlong: operation                 (**r [rd = low-word(r1)] *)
+  | Ohighlong: operation                (**r [rd = high-word(r1)] *)
 (*c Boolean tests: *)
   | Ocmp: condition -> operation.       (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
 
@@ -213,6 +218,7 @@ Definition eval_operation
   | Orsubshift s, v1 :: v2 :: nil => Some (Val.sub (eval_shift s v2) v1)
   | Orsubimm n, v1 :: nil => Some (Val.sub (Vint n) v1)
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
+  | Omla, v1 :: v2 :: v3 :: nil => Some (Val.add (Val.mul v1 v2) v3)
   | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
   | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
@@ -244,6 +250,9 @@ Definition eval_operation
   | Ointuoffloat, v1::nil => Val.intuoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
   | Ofloatofintu, v1::nil => Val.floatofintu v1
+  | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
+  | Olowlong, v1::nil => Some(Val.loword v1)
+  | Ohighlong, v1::nil => Some(Val.hiword v1)
   | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
   | _, _ => None
   end.
@@ -302,6 +311,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Orsubshift _ => (Tint :: Tint :: nil, Tint)
   | Orsubimm _ => (Tint :: nil, Tint)
   | Omul => (Tint :: Tint :: nil, Tint)
+  | Omla => (Tint :: Tint :: Tint :: nil, Tint)
   | Odiv => (Tint :: Tint :: nil, Tint)
   | Odivu => (Tint :: Tint :: nil, Tint)
   | Oand => (Tint :: Tint :: nil, Tint)
@@ -333,6 +343,9 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Ointuoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
   | Ofloatofintu => (Tint :: nil, Tfloat)
+  | Omakelong => (Tint :: Tint :: nil, Tlong)
+  | Olowlong => (Tlong :: nil, Tint)
+  | Ohighlong => (Tlong :: nil, Tint)
   | Ocmp c => (type_of_condition c, Tint)
   end.
 
@@ -374,6 +387,7 @@ Proof with (try exact I).
   generalize (S s v1). destruct v0; destruct (eval_shift s v1); simpl; intuition. destruct (zeq b0 b)...
   destruct v0...
   destruct v0; destruct v1...
+  destruct v0... destruct v1... destruct v2...
   destruct v0; destruct v1; simpl in H0; inv H0.
     destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
   destruct v0; destruct v1; simpl in H0; inv H0. destruct (Int.eq i0 Int.zero); inv H2...
@@ -406,6 +420,9 @@ Proof with (try exact I).
   destruct v0; simpl in H0; inv H0. destruct (Float.intuoffloat f); simpl in H2; inv H2...
   destruct v0; simpl in H0; inv H0...
   destruct v0; simpl in H0; inv H0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
   destruct (eval_condition c vl m)... destruct b...
 Qed.
 
@@ -544,6 +561,29 @@ Proof.
   rewrite Val.add_assoc. simpl. auto.
 Qed.
 
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+  it designates the pointer [delta] bytes past the pointer designated
+  by [addr].  May be undefined, in which case [None] is returned. *)
+
+Definition offset_addressing (addr: addressing) (delta: int) : option addressing :=
+  match addr with
+  | Aindexed n => Some(Aindexed (Int.add n delta))
+  | Aindexed2 => None
+  | Aindexed2shift s => None
+  | Ainstack n => Some(Ainstack (Int.add n delta))
+  end.
+
+Lemma eval_offset_addressing:
+  forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+  offset_addressing addr delta = Some addr' ->
+  eval_addressing ge sp addr args = Some v ->
+  eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)).
+Proof.
+  intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst.
+  rewrite Val.add_assoc; auto.
+  rewrite Val.add_assoc. auto. 
+Qed.
+
 (** Transformation of addressing modes with two operands or more
   into an equivalent arithmetic operation.  This is used in the [Reload]
   pass when a store instruction cannot be reloaded directly because
@@ -787,6 +827,7 @@ Proof.
   apply (@Values.val_sub_inject f (Vint i) (Vint i) v v'); auto.
 
   inv H4; inv H2; simpl; auto.
+  apply Values.val_add_inject; auto. inv H4; inv H2; simpl; auto.
   inv H4; inv H3; simpl in H1; inv H1. simpl. 
     destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists.
   inv H4; inv H3; simpl in H1; inv H1. simpl. 
@@ -828,6 +869,10 @@ Proof.
   inv H4; simpl in *; inv H1. TrivialExists.
   inv H4; simpl in *; inv H1. TrivialExists.
 
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+
   subst v1. destruct (eval_condition c vl1 m1) eqn:?.
   exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
   destruct b; simpl; constructor.
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index 5778286..1bac715 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -181,47 +181,11 @@ let emit_constants oc =
     symbol_labels;
   reset_constants ()
 
-(* Simulate instructions by calling helper functions *)
-
-let print_list_ireg oc l =
-  match l with
-  | [] -> assert false
-  | r1 :: rs -> ireg oc r1; List.iter (fun r -> fprintf oc ", %a" ireg r) rs
-
-let rec remove l r =
-  match l with
-  | [] -> []
-  | hd :: tl -> if hd = r then remove tl r else hd :: remove tl r
-
-let call_helper oc fn dst arg1 arg2 =
-  (* Preserve caller-save registers r0...r3 except dst *)
-  let tosave = remove [IR0; IR1; IR2; IR3] dst in
-  fprintf oc "	stmfd	sp!, {%a}\n" print_list_ireg tosave;
-  (* Copy arg1 to R0 and arg2 to R1 *)
-  let moves =
-    Parmov.parmove2 (=) (fun _ -> IR14) [arg1; arg2] [IR0; IR1] in
-  List.iter
-    (fun (s, d) ->
-      fprintf oc "	mov	%a, %a\n" ireg d ireg s)
-    moves;
-  (* Call the helper function *)
-  fprintf oc "	bl	%s\n" fn;
-  (* Move result to dst *)
-  begin match dst with
-  | IR0 -> ()
-  | _   -> fprintf oc "	mov	%a, r0\n" ireg dst
-  end;
-  (* Restore the other caller-save registers *)
-  fprintf oc "	ldmfd	sp!, {%a}\n" print_list_ireg tosave;
-  (* ... for a total of at most 7 instructions *)
-  7
-
 (* Built-ins.  They come in two flavors: 
    - annotation statements: take their arguments in registers or stack
      locations; generate no code;
    - inlined by the compiler: take their arguments in arbitrary
-     registers; preserve all registers the temporaries
-     (IR10, IR12, IR14, FP2, FP4)
+     registers; preserve all registers except IR2, IR3, IR12 and FP6.
 *)
 
 (* Handling of annotations *)
@@ -234,9 +198,9 @@ let print_annot_val oc txt args res =
   fprintf oc "%s annotation: " comment;
   PrintAnnot.print_annot_val preg oc txt args;
   match args, res with
-  | IR src :: _, IR dst ->
+  | [IR src], [IR dst] ->
       if dst = src then 0 else (fprintf oc "	mov	%a, %a\n" ireg dst ireg src; 1)
-  | FR src :: _, FR dst ->
+  | [FR src], [FR dst] ->
       if dst = src then 0 else (fprintf oc "	fcpy	%a, %a\n" freg dst freg src; 1)
   | _, _ -> assert false
 
@@ -265,16 +229,12 @@ let print_builtin_memcpy_small oc sz al src dst =
 let print_builtin_memcpy_big oc sz al src dst =
   assert (sz >= al);
   assert (sz mod al = 0);
+  assert (src = IR2);
+  assert (dst = IR3);
   let (load, store, chunksize) =
     if al >= 4 then ("ldr", "str", 4)
     else if al = 2 then ("ldrh", "strh", 2)
     else ("ldrb", "strb", 1) in
-  let tmp =
-    if src <> IR0 && dst <> IR0 then IR0
-    else if src <> IR1 && dst <> IR1 then IR1
-    else IR2 in
-  let tosave = List.sort compare [tmp;src;dst] in
-  fprintf oc "	stmfd	sp!, {%a}\n" print_list_ireg tosave;
   begin match Asmgen.decompose_int
                  (coqint_of_camlint (Int32.of_int (sz / chunksize))) with
   | [] -> assert false
@@ -286,12 +246,11 @@ let print_builtin_memcpy_big oc sz al src dst =
         tl
   end;
   let lbl = new_label() in
-  fprintf oc ".L%d:	%s	%a, [%a], #%d\n" lbl load ireg tmp ireg src chunksize;
+  fprintf oc ".L%d:	%s	%a, [%a], #%d\n" lbl load ireg IR12 ireg src chunksize;
   fprintf oc "	subs	%a, %a, #1\n" ireg IR14 ireg IR14;
-  fprintf oc "	%s	%a, [%a], #%d\n" store ireg tmp ireg dst chunksize;
+  fprintf oc "	%s	%a, [%a], #%d\n" store ireg IR12 ireg dst chunksize;
   fprintf oc "	bne	.L%d\n" lbl;
-  fprintf oc "	ldmfd	sp!, {%a}\n" print_list_ireg tosave;
-  9
+  8
 
 let print_builtin_memcpy oc sz al args =
   let (dst, src) =
@@ -308,20 +267,28 @@ let print_builtin_memcpy oc sz al args =
 
 let print_builtin_vload_common oc chunk args res =
   match chunk, args, res with
-  | Mint8unsigned, [IR addr], IR res ->
+  | Mint8unsigned, [IR addr], [IR res] ->
       fprintf oc "	ldrb	%a, [%a, #0]\n" ireg res ireg addr; 1
-  | Mint8signed, [IR addr], IR res ->
+  | Mint8signed, [IR addr], [IR res] ->
       fprintf oc "	ldrsb	%a, [%a, #0]\n" ireg res ireg addr; 1
-  | Mint16unsigned, [IR addr], IR res ->
+  | Mint16unsigned, [IR addr], [IR res] ->
       fprintf oc "	ldrh	%a, [%a, #0]\n" ireg res ireg addr; 1
-  | Mint16signed, [IR addr], IR res ->
+  | Mint16signed, [IR addr], [IR res] ->
       fprintf oc "	ldrsh	%a, [%a, #0]\n" ireg res ireg addr; 1
-  | Mint32, [IR addr], IR res ->
+  | Mint32, [IR addr], [IR res] ->
       fprintf oc "	ldr	%a, [%a, #0]\n" ireg res ireg addr; 1
-  | Mfloat32, [IR addr], FR res ->
+  | Mint64, [IR addr], [IR res1; IR res2] ->
+      if addr <> res2 then begin
+        fprintf oc "	ldr	%a, [%a, #0]\n" ireg res2 ireg addr;
+        fprintf oc "	ldr	%a, [%a, #4]\n" ireg res1 ireg addr
+      end else begin
+        fprintf oc "	ldr	%a, [%a, #4]\n" ireg res1 ireg addr;
+        fprintf oc "	ldr	%a, [%a, #0]\n" ireg res2 ireg addr
+      end; 2
+  | Mfloat32, [IR addr], [FR res] ->
       fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr;
       fprintf oc "	fcvtds	%a, %a\n" freg res freg_single res; 2
-  | (Mfloat64 | Mfloat64al32), [IR addr], FR res ->
+  | (Mfloat64 | Mfloat64al32), [IR addr], [FR res] ->
       fprintf oc "	fldd	%a, [%a, #0]\n" freg res ireg addr; 1
   | _ ->
       assert false
@@ -346,6 +313,9 @@ let print_builtin_vstore_common oc chunk args =
       fprintf oc "	strh	%a, [%a, #0]\n" ireg src ireg addr; 1
   | Mint32, [IR  addr; IR src] ->
       fprintf oc "	str	%a, [%a, #0]\n" ireg src ireg addr; 1
+  | Mint64, [IR  addr; IR src1; IR src2] ->
+      fprintf oc "	str	%a, [%a, #0]\n" ireg src2 ireg addr;
+      fprintf oc "	str	%a, [%a, #4]\n" ireg src1 ireg addr; 2
   | Mfloat32, [IR  addr; FR src] ->
       fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg src;
       fprintf oc "	fsts	%a, [%a, #0]\n" freg_single FR6 ireg addr; 2
@@ -386,22 +356,57 @@ let print_builtin_inline oc name args res =
   fprintf oc "%s begin %s\n" comment name;
   let n = match name, args, res with
   (* Integer arithmetic *)
-  | "__builtin_bswap", [IR a1], IR res ->
+  | "__builtin_bswap", [IR a1], [IR res] ->
       print_bswap oc a1 IR14 res; 4
-  | "__builtin_cntlz", [IR a1], IR res ->
+  | "__builtin_cntlz", [IR a1], [IR res] ->
       fprintf oc "	clz	%a, %a\n" ireg res ireg a1; 1
   (* Float arithmetic *)
-  | "__builtin_fabs", [FR a1], FR res ->
+  | "__builtin_fabs", [FR a1], [FR res] ->
       fprintf oc "	fabsd	%a, %a\n" freg res freg a1; 1
-  | "__builtin_fsqrt", [FR a1], FR res ->
+  | "__builtin_fsqrt", [FR a1], [FR res] ->
       fprintf oc "	fsqrtd	%a, %a\n" freg res freg a1; 1
+  (* 64-bit integer arithmetic *)
+  | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+      if rl = ah then begin
+        fprintf oc "	rsbs	%a, %a, #0\n" ireg IR14 ireg al;
+        fprintf oc "	rsc	%a, %a, #0\n" ireg rh ireg ah;
+        fprintf oc "	mov	%a, %a\n" ireg rl ireg IR14; 3
+      end else begin
+        fprintf oc "	rsbs	%a, %a, #0\n" ireg rl ireg al;
+        fprintf oc "	rsc	%a, %a, #0\n" ireg rh ireg ah; 2
+      end
+  | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      if rl = ah || rl = bh then begin
+        fprintf oc "	adds	%a, %a, %a\n" ireg IR14 ireg al ireg bl;
+        fprintf oc "	adc	%a, %a, %a\n" ireg rh ireg ah ireg bh;
+        fprintf oc "	mov	%a, %a\n" ireg rl ireg IR14; 3
+      end else begin
+        fprintf oc "	adds	%a, %a, %a\n" ireg rl ireg al ireg bl;
+        fprintf oc "	adc	%a, %a, %a\n" ireg rh ireg ah ireg bh; 2
+      end
+  | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      if rl = ah || rl = bh then begin
+        fprintf oc "	subs	%a, %a, %a\n" ireg IR14 ireg al ireg bl;
+        fprintf oc "	sbc	%a, %a, %a\n" ireg rh ireg ah ireg bh;
+        fprintf oc "	mov	%a, %a\n" ireg rl ireg IR14; 3
+      end else begin
+        fprintf oc "	subs	%a, %a, %a\n" ireg rl ireg al ireg bl;
+        fprintf oc "	sbc	%a, %a, %a\n" ireg rh ireg ah ireg bh; 2
+      end
+  | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+      if rl = a || rh = a then begin
+        fprintf oc "	mov	%a, %a\n" ireg IR14 ireg a;
+        fprintf oc "	umull   %a, %a, %a, %a\n" ireg rl ireg rh ireg IR14 ireg b; 2
+      end else begin
+        fprintf oc "	umull   %a, %a, %a, %a\n" ireg rl ireg rh ireg a ireg b; 1
+      end
   (* Memory accesses *)
-  | "__builtin_read16_reversed", [IR a1], IR res ->
+  | "__builtin_read16_reversed", [IR a1], [IR res] ->
       fprintf oc "	ldrh	%a, [%a, #0]\n" ireg res ireg a1;
       fprintf oc "	mov	%a, %a, lsl #8\n" ireg IR14 ireg res;
       fprintf oc "	and	%a, %a, #0xFF00\n" ireg IR14 ireg IR14;
       fprintf oc "	orr	%a, %a, %a, lsr #8\n" ireg res ireg IR14 ireg res; 4
-  | "__builtin_read32_reversed", [IR a1], IR res ->
+  | "__builtin_read32_reversed", [IR a1], [IR res] ->
       fprintf oc "	ldr	%a, [%a, #0]\n" ireg res ireg a1;
       print_bswap oc res IR14 res; 5
   | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
@@ -410,9 +415,19 @@ let print_builtin_inline oc name args res =
       fprintf oc "	orr	%a, %a, %a, lsl #8\n" ireg IR14 ireg IR14 ireg a2;
       fprintf oc "	strh	%a, [%a, #0]\n" ireg IR14 ireg a1; 4
   | "__builtin_write32_reversed", [IR a1; IR a2], _ ->
-      let tmp = if a1 = IR10 then IR12 else IR10 in
-      print_bswap oc a2 IR14 tmp;
-      fprintf oc "	str	%a, [%a, #0]\n" ireg tmp ireg a1; 5
+      if a1 <> IR12 then begin
+        print_bswap oc a2 IR14 IR12;
+        fprintf oc "	str	%a, [%a, #0]\n" ireg IR12 ireg a1; 5
+      end else begin
+        fprintf oc "	mov	%a, %a, lsr #24\n" ireg IR14 ireg a2;
+        fprintf oc "	str     %a, [%a, #0]\n" ireg IR14 ireg a1;
+        fprintf oc "	mov	%a, %a, lsr #16\n" ireg IR14 ireg a2;
+        fprintf oc "	str     %a, [%a, #1]\n" ireg IR14 ireg a1;
+        fprintf oc "	mov	%a, %a, lsr #8\n" ireg IR14 ireg a2;
+        fprintf oc "	str     %a, [%a, #2]\n" ireg IR14 ireg a1;
+        fprintf oc "	str     %a, [%a, #3]\n" ireg IR14 ireg a1;
+        7
+      end
   (* Catch-all *)
   | _ ->
       invalid_arg ("unrecognized builtin " ^ name)
@@ -516,6 +531,8 @@ let print_instruction oc = function
       fprintf oc "	ldrsb	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
   | Pldrsh(r1, r2, sa) ->
       fprintf oc "	ldrsh	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+  | Pmla(r1, r2, r3, r4) ->
+      fprintf oc "	mla	%a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1
   | Pmov(r1, so) ->
       fprintf oc "	mov	%a, %a\n" ireg r1 shift_op so; 1
   | Pmovc(bit, r1, so) ->
@@ -535,11 +552,13 @@ let print_instruction oc = function
   | Pstrh(r1, r2, sa) ->
       fprintf oc "	strh	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
   | Psdiv(r1, r2, r3) ->
-      call_helper oc "__aeabi_idiv" r1 r2 r3
+      assert (r1 = IR0 && r2 = IR0 && r3 = IR1);
+      fprintf oc "	bl	__aeabi_idiv\n"; 1
   | Psub(r1, r2, so) ->
       fprintf oc "	sub	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
   | Pudiv(r1, r2, r3) ->
-      call_helper oc "__aeabi_uidiv" r1 r2 r3
+      assert (r1 = IR0 && r2 = IR0 && r3 = IR1);
+      fprintf oc "	bl	__aeabi_uidiv\n"; 1
   (* Floating-point coprocessor instructions *)
   | Pfcpyd(r1, r2) ->
       fprintf oc "	fcpyd	%a, %a\n" freg r1 freg r2; 1
@@ -593,14 +612,14 @@ let print_instruction oc = function
       fprintf oc "	fsts	%a, [%a, #%a]\n" freg_single FR6 ireg r2 coqint n; 2
   (* Pseudo-instructions *)
   | Pallocframe(sz, ofs) ->
-      fprintf oc "	mov	r10, sp\n";
+      fprintf oc "	mov	r12, sp\n";
       let ninstr = ref 0 in
       List.iter
         (fun n ->
            fprintf oc "	sub	sp, sp, #%a\n" coqint n;
            incr ninstr)
         (Asmgen.decompose_int sz);
-      fprintf oc "	str	r10, [sp, #%a]\n" coqint ofs;
+      fprintf oc "	str	r12, [sp, #%a]\n" coqint ofs;
       2 + !ninstr
   | Pfreeframe(sz, ofs) ->
       if Asmgen.is_immed_arith sz
@@ -620,7 +639,7 @@ let print_instruction oc = function
       List.iter
         (fun l -> fprintf oc "	.word	%a\n" print_label l)
         tbl;
-      2 + List.length tbl
+      3 + List.length tbl
   | Pbuiltin(ef, args, res) ->
       begin match ef with
       | EF_builtin(name, sg) ->
@@ -713,6 +732,8 @@ let print_init oc = function
       fprintf oc "	.short	%ld\n" (camlint_of_coqint n)
   | Init_int32 n ->
       fprintf oc "	.word	%ld\n" (camlint_of_coqint n)
+  | Init_int64 n ->
+      fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
       fprintf oc "	.word	0x%lx %s %.15g \n" (camlint_of_coqint (Floats.Float.bits_of_single n))
 	comment (camlfloat_of_coqfloat n)
diff --git a/arm/PrintOp.ml b/arm/PrintOp.ml
index b5a8d75..eff3959 100644
--- a/arm/PrintOp.ml
+++ b/arm/PrintOp.ml
@@ -71,6 +71,7 @@ let print_operation reg pp = function
   | Orsubshift s, [r1;r2] -> fprintf pp "%a %a - %a" reg r2 shift s reg r1
   | Orsubimm n, [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint n) reg r1
   | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2
+  | Omla, [r1;r2;r3] -> fprintf pp "%a * %a + %a" reg r1 reg r2 reg r3
   | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2
   | Odivu, [r1;r2] -> fprintf pp "%a /u %a" reg r1 reg r2
   | Oand, [r1;r2] -> fprintf pp "%a & %a" reg r1 reg r2
@@ -100,6 +101,9 @@ let print_operation reg pp = function
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
+  | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+  | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+  | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
   | _ -> fprintf pp "<bad operator>"
 
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index 22ef88d..d81328b 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -85,6 +85,8 @@ Nondetfunction add (e1: expr) (e2: expr) :=
   | t1, Eop (Oaddimm n2) (t2:::Enil) => addimm n2 (Eop Oadd (t1:::t2:::Enil))
   | Eop (Oshift s) (t1:::Enil), t2 => Eop (Oaddshift s) (t2:::t1:::Enil)
   | t1, Eop (Oshift s) (t2:::Enil) => Eop (Oaddshift s) (t1:::t2:::Enil)
+  | Eop Omul (t1:::t2:::Enil), t3 => Eop Omla (t1:::t2:::t3:::Enil)
+  | t1, Eop Omul (t2:::t3:::Enil) => Eop Omla (t2:::t3:::t1:::Enil)
   | _, _ => Eop Oadd (e1:::e2:::Enil)
   end.
 
@@ -185,6 +187,7 @@ Nondetfunction mul (e1: expr) (e2: expr) :=
 
 Nondetfunction andimm (n1: int) (e2: expr) := 
   if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else
+  if Int.eq n1 Int.mone then e2 else
   match e2 with
   | Eop (Ointconst n2) Enil =>
       Eop (Ointconst (Int.and n1 n2)) Enil
@@ -215,6 +218,7 @@ Definition same_expr_pure (e1 e2: expr) :=
 
 Nondetfunction orimm (n1: int) (e2: expr) :=
   if Int.eq n1 Int.zero then e2 else
+  if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil else
   match e2 with
   | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil
   | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
@@ -407,6 +411,7 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
   | Mint16signed => true
   | Mint16unsigned => true
   | Mint32 => true
+  | Mint64 => false
   | Mfloat32 => false
   | Mfloat64 => false
   | Mfloat64al32 => false
@@ -419,6 +424,7 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
   | Mint16signed => false
   | Mint16unsigned => false
   | Mint32 => true
+  | Mint64 => false
   | Mfloat32 => false
   | Mfloat64 => false
   | Mfloat64al32 => false
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index ecc758f..a71ead7 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -27,13 +27,6 @@ Require Import SelectOp.
 
 Open Local Scope cminorsel_scope.
 
-Section CMCONSTR.
-
-Variable ge: genv.
-Variable sp: val.
-Variable e: env.
-Variable m: mem.
-
 (** * Useful lemmas and tactics *)
 
 (** The following are trivial lemmas and custom tactics that help
@@ -81,6 +74,13 @@ Ltac TrivialExists :=
 
 (** * Correctness of the smart constructors *)
 
+Section CMCONSTR.
+
+Variable ge: genv.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
 (** We now show that the code generated by "smart constructor" functions
   such as [Selection.notint] behaves as expected.  Continuing the
   [notint] example, we show that if the expression [e]
@@ -172,6 +172,8 @@ Proof.
   subst. rewrite <- Val.add_assoc. apply eval_addimm. EvalOp.
   subst. rewrite Val.add_commut. TrivialExists.
   subst. TrivialExists.
+  subst. TrivialExists. 
+  subst. rewrite Val.add_commut. TrivialExists.
   TrivialExists.
 Qed.
 
@@ -325,6 +327,9 @@ Proof.
   predSpec Int.eq Int.eq_spec n Int.zero. 
   intros. exists (Vint Int.zero); split. EvalOp. 
   destruct x; simpl; auto. subst n. rewrite Int.and_zero. auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  intros. exists x; split; auto. 
+  subst. destruct x; simpl; auto. rewrite Int.and_mone; auto.
   case (andimm_match a); intros.
   InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto.
   InvEval. subst. rewrite Val.and_assoc. simpl. rewrite Int.and_commut. TrivialExists. 
@@ -352,6 +357,9 @@ Proof.
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros. subst. exists x; split; auto.
   destruct x; simpl; auto. rewrite Int.or_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  intros. exists (Vint Int.mone); split. EvalOp. 
+  destruct x; simpl; auto. subst n. rewrite Int.or_mone. auto.
   destruct (orimm_match a); intros; InvEval.
   TrivialExists. simpl. rewrite Int.or_commut; auto.
   subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists. 
diff --git a/arm/linux/Conventions1.v b/arm/linux/Conventions1.v
index f1ddc23..1731eba 100644
--- a/arm/linux/Conventions1.v
+++ b/arm/linux/Conventions1.v
@@ -32,34 +32,19 @@ Require Import Locations.
 *)
 
 Definition int_caller_save_regs :=
-  R0 :: R1 :: R2 :: R3 :: nil.
+  R0 :: R1 :: R2 :: R3 :: R12 :: nil.
 
 Definition float_caller_save_regs :=
-  F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: nil.
+  F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: nil.
 
 Definition int_callee_save_regs :=
-  R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R11 :: nil.
+  R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: nil.
 
 Definition float_callee_save_regs :=
   F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: nil.
 
-Definition destroyed_at_call_regs :=
-  int_caller_save_regs ++ float_caller_save_regs.
-
 Definition destroyed_at_call :=
-  List.map R destroyed_at_call_regs.
-
-Definition int_temporaries := IT1 :: IT2 :: nil.
-
-Definition float_temporaries := FT1 :: FT2 :: nil.
-  
-Definition temporary_regs := int_temporaries ++ float_temporaries.
-
-Definition temporaries := List.map R temporary_regs.
-
-Definition destroyed_at_move_regs: list mreg := nil.
-
-Definition destroyed_at_move := List.map R destroyed_at_move_regs.
+  int_caller_save_regs ++ float_caller_save_regs.
 
 Definition dummy_int_reg := R0.     (**r Used in [Coloring]. *)
 Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
@@ -72,7 +57,7 @@ Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
 Definition index_int_callee_save (r: mreg) :=
   match r with
   | R4 => 0  | R5 => 1  | R6 => 2  | R7 => 3
-  | R8 => 4  | R9 => 5  | R11 => 6
+  | R8 => 4  | R9 => 5  | R10 => 6 | R11 => 7
   | _ => -1
   end.
 
@@ -169,34 +154,27 @@ Qed.
 
 Lemma register_classification:
   forall r, 
-  (In (R r) temporaries \/ In (R r) destroyed_at_call) \/
-  (In r int_callee_save_regs \/ In r float_callee_save_regs).
+  In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs.
 Proof.
   destruct r; 
-  try (left; left; simpl; OrEq);
-  try (left; right; simpl; OrEq);
+  try (left; simpl; OrEq);
   try (right; left; simpl; OrEq);
   try (right; right; simpl; OrEq).
 Qed.
 
+
 Lemma int_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r int_callee_save_regs).
+    In r destroyed_at_call -> In r int_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma float_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r float_callee_save_regs).
+    In r destroyed_at_call -> In r float_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma int_callee_save_type:
@@ -245,51 +223,33 @@ Qed.
   Calling conventions are largely arbitrary: they must respect the properties
   proved in this section (such as no overlapping between the locations
   of function arguments), but this leaves much liberty in choosing actual
-  locations.  To ensure binary interoperability of code generated by our
-  compiler with libraries compiled by another ARM EABI compiler, we
-  implement *almost* the standard conventions defined in the ARM EABI application
-  binary interface, with two exceptions:
-- Double-precision arguments and results are passed in VFP double registers
-  instead of pairs of integer registers.
-- Single-precision arguments and results are passed as double-precision floats.
-*)
+  locations.  *)
 
 (** ** Location of function result *)
 
 (** The result value of a function is passed back to the caller in
-  registers [R0] or [F0], depending on the type of the returned value.
-  We treat a function without result as a function with one integer result. *)
+  registers [R0] or [F0] or [R0,R1], depending on the type of the
+  returned value.  We treat a function without result as a function
+  with one integer result. *)
 
-Definition loc_result (s: signature) : mreg :=
+Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
-  | None => R0
-  | Some Tint => R0
-  | Some Tfloat => F0
+  | None => R0 :: nil
+  | Some Tint => R0 :: nil
+  | Some Tfloat => F0 :: nil
+  | Some Tlong => R1 :: R0 :: nil
   end.
 
-(** The result location has the type stated in the signature. *)
-
-Lemma loc_result_type:
-  forall sig,
-  mreg_type (loc_result sig) =
-  match sig.(sig_res) with None => Tint | Some ty => ty end.
-Proof.
-  intros; unfold loc_result.
-  destruct (sig_res sig). 
-  destruct t; reflexivity.
-  reflexivity.
-Qed.
-
 (** The result location is a caller-save register or a temporary *)
 
 Lemma loc_result_caller_save:
-  forall (s: signature), 
-  In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries.
+  forall (s: signature) (r: mreg), 
+  In r (loc_result s) -> In r destroyed_at_call.
 Proof.
-  intros; unfold loc_result. left;
-  destruct (sig_res s). 
-  destruct t; simpl; OrEq.
-  simpl; OrEq.
+  intros.
+  assert (r = R0 \/ r = R1 \/ r = F0).
+    unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
+  destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
 Qed.
 
 (** ** Location of function arguments *)
@@ -299,34 +259,37 @@ Qed.
 - The first 2 float arguments are passed in registers [F0] and [F1].
 - Each float argument passed in a float register ``consumes'' an aligned pair
   of two integer registers.
+- Each long integer argument is passed in an aligned pair of two integer
+  registers.
 - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively
-  assigned (1 word for an integer argument, 2 words for a float),
+  assigned (1 word for an integer argument, 2 words for a float or a long),
   starting at word offset 0.
 *)
 
-Function ireg_param (n: Z) : mreg :=
+Definition ireg_param (n: Z) : mreg :=
   if zeq n (-4) then R0
   else if zeq n (-3) then R1
   else if zeq n (-2) then R2
-  else if zeq n (-1) then R3
-  else R4. (**r should not happen *)
-
-Function freg_param (n: Z) : mreg :=
-  if zeq n (-4) then F0
-  else if zeq n (-3) then F1
-  else if zeq n (-2) then F1
-  else F2.  (**r should not happen *)
+  else R3.
 
+Definition freg_param (n: Z) : mreg :=
+  if zeq n (-4) then F0 else F1.
 
 Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
   | Tint :: tys =>
-      (if zle 0 ofs then S (Outgoing ofs Tint) else R (ireg_param ofs))
+      (if zle 0 ofs then S Outgoing ofs Tint else R (ireg_param ofs))
       :: loc_arguments_rec tys (ofs + 1)
   | Tfloat :: tys =>
-      (if zle (-1) ofs then S (Outgoing (align ofs 2) Tfloat) else R (freg_param ofs))
-      :: loc_arguments_rec tys (align ofs 2 + 2)
+      let ofs := align ofs 2 in
+      (if zle 0 ofs then S Outgoing ofs Tfloat else R (freg_param ofs))
+      :: loc_arguments_rec tys (ofs + 2)
+  | Tlong :: tys =>
+      let ofs := align ofs 2 in
+      (if zle 0 ofs then S Outgoing (ofs + 1) Tint else R (ireg_param (ofs + 1)))
+      :: (if zle 0 ofs then S Outgoing ofs Tint else R (ireg_param ofs))
+      :: loc_arguments_rec tys (ofs + 2)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
@@ -342,7 +305,7 @@ Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
   | Tint :: tys => size_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys => size_arguments_rec tys (align ofs 2 + 2)
+  | (Tfloat | Tlong) :: tys => size_arguments_rec tys (align ofs 2 + 2)
   end.
 
 Definition size_arguments (s: signature) : Z :=
@@ -353,109 +316,85 @@ Definition size_arguments (s: signature) : Z :=
 
 Definition loc_argument_acceptable (l: loc) : Prop :=
   match l with
-  | R r => ~(In l temporaries)
-  | S (Outgoing ofs ty) => ofs >= 0
+  | R r => In r destroyed_at_call
+  | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong
   | _ => False
   end.
 
+(*
 Lemma align_monotone:
   forall x1 x2 y, y > 0 -> x1 <= x2 -> align x1 y <= align x2 y.
 Proof.
   intros. unfold align. apply Zmult_le_compat_r. apply Z_div_le. 
   omega. omega. omega.
 Qed.
+*)
+
+Remark ireg_param_caller_save:
+  forall n, In (ireg_param n) destroyed_at_call.
+Proof.
+  unfold ireg_param; intros.
+  destruct (zeq n (-4)). simpl; auto. 
+  destruct (zeq n (-3)). simpl; auto. 
+  destruct (zeq n (-2)); simpl; auto.
+Qed.
+
+Remark freg_param_caller_save:
+  forall n, In (freg_param n) destroyed_at_call.
+Proof.
+  unfold freg_param; intros. destruct (zeq n (-4)); simpl; OrEq.
+Qed.
 
 Remark loc_arguments_rec_charact:
   forall tyl ofs l,
   In l (loc_arguments_rec tyl ofs) ->
   match l with
-  | R r => 
-       (exists n, ofs <= n < 0 /\ r = ireg_param n)
-    \/ (exists n, ofs <= n < -1 /\ r = freg_param n)
-  | S (Outgoing ofs' ty) => ofs' >= 0 /\ ofs' >= ofs
-  | S _ => False
+  | R r => In r destroyed_at_call
+  | S Outgoing ofs' ty => ofs' >= 0 /\ ofs <= ofs' /\ ty <> Tlong
+  | S _ _ _ => False
   end.
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
   elim H.
-  destruct a; elim H; intro.
-  subst l. destruct (zle 0 ofs). omega.
-  left. exists ofs; split; auto; omega.
-  generalize (IHtyl _ _ H0). 
-  destruct l. intros [[n A] | [n A]]; [left|right]; exists n; intuition omega.
-  destruct s; auto; omega.
-  subst l. destruct (zle (-1) ofs).
-  split. apply Zle_ge. change 0 with (align (-1) 2). apply align_monotone; omega.
-  apply Zle_ge. apply align_le. omega.
-  right. exists ofs. intuition.
+  destruct a.
+- (* Tint *)
+  destruct H.
+  subst l. destruct (zle 0 ofs).
+  split. omega. split. omega. congruence.
+  apply ireg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tfloat *)
   assert (ofs <= align ofs 2) by (apply align_le; omega).
-  generalize (IHtyl _ _ H0). 
-  destruct l. intros [[n A] | [n A]]; [left|right]; exists n; intuition omega.
-  destruct s; auto; omega.
-Qed.
-
-Lemma loc_notin_in_diff:
-  forall l ll,
-  Loc.notin l ll <-> (forall l', In l' ll -> Loc.diff l l').
-Proof.
-  induction ll; simpl; intuition. subst l'. auto. 
-Qed.
-  
-Remark loc_arguments_rec_notin_local:
-  forall tyl ofs ofs0 ty0,
-  Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl ofs).
-Proof.
-  intros. rewrite loc_notin_in_diff. intros. 
-  exploit loc_arguments_rec_charact; eauto. 
-  destruct l'; intros; simpl; auto. destruct s; auto; contradiction.
+  destruct H.
+  subst l. destruct (zle 0 (align ofs 2)).
+  split. omega. split. auto. congruence.
+  apply freg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tlong *)
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  destruct H.
+  subst l. destruct (zle 0 (align ofs 2)).
+  split. omega. split. omega. congruence.
+  apply ireg_param_caller_save.
+  destruct H.
+  subst l. destruct (zle 0 (align ofs 2)).
+  split. omega. split. omega. congruence.
+  apply ireg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
   forall (s: signature) (r: loc),
   In r (loc_arguments s) -> loc_argument_acceptable r.
 Proof.
-  unfold loc_arguments; intros.
+  unfold loc_arguments, loc_argument_acceptable; intros.
   generalize (loc_arguments_rec_charact _ _ _ H).
-  destruct r; simpl.
-  intros [[n [A B]] | [n [A B]]]; subst m.
-  functional induction (ireg_param n); intuition congruence.
-  functional induction (freg_param n); intuition congruence.
-  destruct s0; tauto.
+  destruct r; auto.
+  destruct sl; auto.
+  tauto.
 Qed.
 Hint Resolve loc_arguments_acceptable: locs.
 
-(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
-
-Lemma loc_arguments_norepet:
-  forall (s: signature), Loc.norepet (loc_arguments s).
-Proof.
-  unfold loc_arguments; intros. 
-  assert (forall tyl ofs, -4 <= ofs -> Loc.norepet (loc_arguments_rec tyl ofs)).
-    induction tyl; simpl; intros.
-    constructor.
-    destruct a; constructor.
-    rewrite loc_notin_in_diff. intros. exploit loc_arguments_rec_charact; eauto. 
-    destruct (zle 0 ofs); destruct l'; simpl; auto.
-    destruct s0; intuition.
-    intros [[n [A B]] | [n [A B]]]; subst m.
-    functional induction (ireg_param ofs); functional induction (ireg_param n); congruence || omegaContradiction.
-    functional induction (ireg_param ofs); functional induction (freg_param n); congruence || omegaContradiction.
-    apply IHtyl. omega.
-    rewrite loc_notin_in_diff. intros. exploit loc_arguments_rec_charact; eauto. 
-    destruct (zle (-1) ofs); destruct l'; simpl; auto.
-    destruct s0; intuition.
-    intros [[n [A B]] | [n [A B]]]; subst m.
-    functional induction (freg_param ofs); functional induction (ireg_param n); congruence || omegaContradiction.
-    functional induction (freg_param ofs); functional induction (freg_param n); try (congruence || omegaContradiction).
-    compute in A. intuition.
-    compute in A. intuition.
-    compute in A. intuition.
-    compute in A. intuition.
-    compute in A. intuition.
-    apply IHtyl. assert (ofs <= align ofs 2) by (apply align_le; omega). omega.
-  apply H. omega.
-Qed.
-
 (** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
 
 Remark size_arguments_rec_above:
@@ -468,6 +407,8 @@ Proof.
   apply Zle_trans with (ofs + 1); auto; omega.
   assert (ofs <= align ofs 2) by (apply align_le; omega).
   apply Zle_trans with (align ofs 2 + 2); auto; omega.
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  apply Zle_trans with (align ofs 2 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -478,79 +419,35 @@ Qed.
 
 Lemma loc_arguments_bounded:
   forall (s: signature) (ofs: Z) (ty: typ),
-  In (S (Outgoing ofs ty)) (loc_arguments s) ->
+  In (S Outgoing ofs ty) (loc_arguments s) ->
   ofs + typesize ty <= size_arguments s.
 Proof.
   intros.
   assert (forall tyl ofs0,
           0 <= ofs0 ->
           ofs0 <= Zmax 0 (size_arguments_rec tyl ofs0)).
+  {
     intros. generalize (size_arguments_rec_above tyl ofs0). intros.
-    rewrite Zmax_spec. rewrite zlt_false. auto. omega. 
+    rewrite Zmax_spec. rewrite zlt_false. auto. omega.
+  } 
   assert (forall tyl ofs0,
-    In (S (Outgoing ofs ty)) (loc_arguments_rec tyl ofs0) ->
+    In (S Outgoing ofs ty) (loc_arguments_rec tyl ofs0) ->
     ofs + typesize ty <= Zmax 0 (size_arguments_rec tyl ofs0)).
-  induction tyl; simpl; intros.
-  elim H1.
-  destruct a; elim H1; intros.
-  destruct (zle 0 ofs0); inv H2. apply H0. omega. auto.
-  destruct (zle (-1) ofs0); inv H2. apply H0. 
-  assert (align (-1) 2 <= align ofs0 2). apply align_monotone. omega. auto.
-  change (align (-1) 2) with 0 in H2. omega. 
-  auto.
- 
+  {
+    induction tyl; simpl; intros.
+    elim H1.
+    destruct a.
+  - (* Tint *)
+    destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. 
+  - (* Tfloat *)
+    destruct H1; auto. destruct (zle 0 (align ofs0 2)); inv H1. apply H0. omega.
+  - (* Tlong *)
+    destruct H1.
+    destruct (zle 0 (align ofs0 2)); inv H1.
+    eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega. 
+    destruct H1; auto.
+    destruct (zle 0 (align ofs0 2)); inv H1.
+    eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega.
+  }
   unfold size_arguments. apply H1. auto.  
 Qed.
-
-(** Temporary registers do not overlap with argument locations. *)
-
-Lemma loc_arguments_not_temporaries:
-  forall sig, Loc.disjoint (loc_arguments sig) temporaries.
-Proof.
-  intros; red; intros x1 x2 A B.
-  exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable. 
-  destruct x1; intros. simpl. destruct x2; auto. intuition congruence.
-  destruct s; try contradiction. revert B. simpl. ElimOrEq; auto.
-Qed.
-Hint Resolve loc_arguments_not_temporaries: locs.
-
-(** Argument registers are caller-save. *)
-
-Lemma arguments_caller_save:
-  forall sig r,
-  In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
-Proof.
-  unfold loc_arguments; intros.
-  destruct (loc_arguments_rec_charact _ _ _ H) as [[n [A B]] | [n [A B]]]; subst r.
-  functional induction (ireg_param n); simpl; auto. omegaContradiction.
-  functional induction (freg_param n); simpl; auto 10.
-Qed.
-
-(** Argument locations agree in number with the function signature. *)
-
-Lemma loc_arguments_length:
-  forall sig,
-  List.length (loc_arguments sig) = List.length sig.(sig_args).
-Proof.
-  assert (forall tyl ofs, List.length (loc_arguments_rec tyl ofs) = List.length tyl).
-  induction tyl; simpl; intros.
-  auto.
-  destruct a; simpl; decEq; auto.
-
-  intros. unfold loc_arguments. auto.
-Qed.
-
-(** Argument locations agree in types with the function signature. *)
-
-Lemma loc_arguments_type:
-  forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args).
-Proof.
-  assert (forall tyl ofs, List.map Loc.type (loc_arguments_rec tyl ofs) = tyl).
-  induction tyl; simpl; intros.
-  auto.
-  destruct a; simpl; decEq; auto.
-  destruct (zle 0 ofs). auto. functional induction (ireg_param ofs); auto.
-  destruct (zle (-1) ofs). auto. functional induction (freg_param ofs); auto.
-
-  intros. unfold loc_arguments. apply H. 
-Qed.
diff --git a/arm/linux/Stacklayout.v b/arm/linux/Stacklayout.v
index d84da6b..7694dcf 100644
--- a/arm/linux/Stacklayout.v
+++ b/arm/linux/Stacklayout.v
@@ -18,11 +18,8 @@ Require Import Bounds.
 (** The general shape of activation records is as follows,
   from bottom (lowest offsets) to top:
 - Space for outgoing arguments to function calls.
-- Local stack slots of integer type.
+- Local stack slots.
 - Saved values of integer callee-save registers used by the function.
-- One word of padding, if necessary to align the following data
-  on a 8-byte boundary.
-- Local stack slots of float type.
 - Saved values of float callee-save registers used by the function.
 - Saved return address into caller.
 - Pointer to activation record of the caller.
@@ -38,10 +35,9 @@ Record frame_env : Type := mk_frame_env {
   fe_size: Z;
   fe_ofs_link: Z;
   fe_ofs_retaddr: Z;
-  fe_ofs_int_local: Z;
+  fe_ofs_local: Z;
   fe_ofs_int_callee_save: Z;
   fe_num_int_callee_save: Z;
-  fe_ofs_float_local: Z;
   fe_ofs_float_callee_save: Z;
   fe_num_float_callee_save: Z;
   fe_stack_data: Z
@@ -51,18 +47,17 @@ Record frame_env : Type := mk_frame_env {
   function. *)
 
 Definition make_env (b: bounds) :=
-  let oil := 4 * b.(bound_outgoing) in    (* integer locals *)
-  let oics := oil + 4 * b.(bound_int_local) in (* integer callee-saves *)
+  let ol := align (4 * b.(bound_outgoing)) 8 in    (* locals *)
+  let oics := ol + 4 * b.(bound_local) in (* integer callee-saves *)
   let oendi := oics + 4 * b.(bound_int_callee_save) in
-  let ofl := align oendi 8 in       (* float locals *)
-  let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *)
+  let ofcs := align oendi 8 in       (* float callee-saves *)
   let ora := ofcs + 8 * b.(bound_float_callee_save) in (* retaddr *)
   let olink := ora + 4 in (* back link *)
   let ostkdata := olink + 4 in (* stack data *)
   let sz := align (ostkdata + b.(bound_stack_data)) 8 in
-  mk_frame_env sz olink ora
-                  oil oics b.(bound_int_callee_save)
-                  ofl ofcs b.(bound_float_callee_save)
+  mk_frame_env sz olink ora ol
+                  oics b.(bound_int_callee_save)
+                  ofcs b.(bound_float_callee_save)
                   ostkdata.
 
 (** Separation property *)
@@ -71,26 +66,24 @@ Remark frame_env_separated:
   forall b,
   let fe := make_env b in
   0 <= fe_ofs_arg
-  /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_int_local)
-  /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_int_callee_save)
-  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local)
-  /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save)
+  /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_local)
+  /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_ofs_int_callee_save)
+  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save)
   /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_ofs_retaddr)
   /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_ofs_link)
   /\ fe.(fe_ofs_link) + 4 <= fe.(fe_stack_data)
   /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size).
 Proof.
   intros.
-  generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)).
-  generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 8 (refl_equal _)).
+  generalize (align_le (4 * bound_outgoing b) 8 (refl_equal)).
+  generalize (align_le (fe_ofs_int_callee_save fe + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)).
+  generalize (align_le (fe_stack_data fe + b.(bound_stack_data)) 8 (refl_equal)).
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
-    fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data, fe_ofs_arg.
   intros.
-  generalize (bound_int_local_pos b); intro;
-  generalize (bound_float_local_pos b); intro;
+  generalize (bound_local_pos b); intro;
   generalize (bound_int_callee_save_pos b); intro;
   generalize (bound_float_callee_save_pos b); intro;
   generalize (bound_outgoing_pos b); intro;
@@ -104,9 +97,8 @@ Remark frame_env_aligned:
   forall b,
   let fe := make_env b in
   (4 | fe.(fe_ofs_link))
-  /\ (4 | fe.(fe_ofs_int_local))
+  /\ (8 | fe.(fe_ofs_local))
   /\ (4 | fe.(fe_ofs_int_callee_save))
-  /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
   /\ (4 | fe.(fe_ofs_retaddr))
   /\ (8 | fe.(fe_stack_data))
@@ -114,30 +106,27 @@ Remark frame_env_aligned:
 Proof.
   intros.
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
-    fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data.
   set (x1 := 4 * bound_outgoing b).
   assert (4 | x1). unfold x1; exists (bound_outgoing b); ring.
-  set (x2 := x1 + 4 * bound_int_local b).
-  assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring.
-  set (x3 := x2 + 4 * bound_int_callee_save b).
-  set (x4 := align x3 8).
-  assert (8 | x4). unfold x4. apply align_divides. omega.
-  set (x5 := x4 + 8 * bound_float_local b).
-  assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
-  set (x6 := x5 + 8 * bound_float_callee_save b).
-  assert (8 | x6).
-    unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
-  assert (4 | x6).
-    apply Zdivides_trans with 8. exists 2; auto. auto.
+  set (x2 := align x1 8).
+  assert (8 | x2). apply align_divides. omega. 
+  set (x3 := x2 + 4 * bound_local b).
+  assert (4 | x3). apply Zdivide_plus_r. apply Zdivides_trans with 8; auto. exists 2; auto.
+  exists (bound_local b); ring.
+  set (x4 := align (x3 + 4 * bound_int_callee_save b) 8).
+  assert (8 | x4). apply align_divides. omega.
+  set (x5 := x4 + 8 * bound_float_callee_save b).
+  assert (8 | x5). apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
+  assert (4 | x5). apply Zdivides_trans with 8; auto. exists 2; auto.
+  set (x6 := x5 + 4). 
+  assert (4 | x6). apply Zdivide_plus_r; auto. exists 1; auto.
   set (x7 := x6 + 4).
-  assert (4 | x7). unfold x7; apply Zdivide_plus_r; auto. exists 1; auto.
-  set (x8 := x7 + 4).
-  assert (8 | x8). unfold x8, x7. replace (x6 + 4 + 4) with (x6 + 8) by omega.
-    apply Zdivide_plus_r; auto. exists 1; auto.
-  set (x9 := align (x8 + bound_stack_data b) 8).
-  assert (8 | x9). unfold x9; apply align_divides. omega.
+  assert (8 | x7). unfold x7, x6. replace (x5 + 4 + 4) with (x5 + 8) by omega. 
+  apply Zdivide_plus_r; auto. exists 1; auto.
+  set (x8 := align (x7 + bound_stack_data b) 8).
+  assert (8 | x8). apply align_divides. omega.
   tauto.
 Qed.
diff --git a/backend/Allocation.v b/backend/Allocation.v
index caaf09d..8674335 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -10,107 +10,1169 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Register allocation. *)
+(** Register allocation by external oracle and a posteriori validation. *)
 
+Require Import FSets.
+Require FSetAVLplus.
 Require Import Coqlib.
+Require Import Ordered.
 Require Import Errors.
 Require Import Maps.
 Require Import Lattice.
 Require Import AST.
+Require Import Integers.
+Require Import Memdata.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
-Require Import RTLtyping.
-Require Import Liveness.
+Require Import Kildall.
 Require Import Locations.
+Require Import Conventions.
+Require Import RTLtyping.
 Require Import LTL.
-Require Import Coloring.
-
-(** * Translation from RTL to LTL *)
-
-(** Each [RTL] instruction translates to an [LTL] instruction.
-  The register assignment [assign] returned by register allocation
-  is applied to the arguments and results of the RTL
-  instruction.  Moreover, dead instructions and redundant moves
-  are eliminated (turned into a [Lnop] instruction).
-  Dead instructions are instructions without side-effects ([Iop] and
-  [Iload]) whose result register is dead, i.e. whose result value
-  is never used.  Redundant moves are moves whose source and destination
-  are assigned the same location. *)
-
-Definition is_redundant_move
-    (op: operation) (args: list reg) (res: reg) (assign: reg -> loc) : bool :=
-  match is_move_operation op args with
-  | None => false
-  | Some src => if Loc.eq (assign src) (assign res) then true else false
-  end.
-
-Definition transf_instr
-         (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc)
-         (pc: node) (instr: RTL.instruction) : LTL.instruction :=
-  match instr with
+
+(** The validation algorithm used here is described in
+  "Validating register allocation and spilling", 
+  by Silvain Rideau and Xavier Leroy,
+  in Compiler Construction (CC 2010), LNCS 6011, Springer, 2010. *)
+
+(** * Structural checks *)
+
+(** As a first pass, we check the LTL code returned by the external oracle
+  against the original RTL code for structural conformance.
+  Each RTL instruction was transformed into a LTL basic block whose
+  shape must agree with the RTL instruction.  For example, if the RTL
+  instruction is [Istore(Mint32, addr, args, src, s)], the LTL basic block
+  must be of the following shape:
+- zero, one or several "move" instructions
+- a store instruction [Lstore(Mint32, addr, args', src')]
+- a [Lbranch s] instruction.
+
+  The [block_shape] type below describes all possible cases of structural
+  maching between an RTL instruction and an LTL basic block.
+*)
+
+Definition moves := list (loc * loc)%type.
+
+Inductive block_shape: Type :=
+  | BSnop (mv: moves) (s: node)
+  | BSmove (src: reg) (dst: reg) (mv: moves) (s: node)
+  | BSmakelong (src1 src2: reg) (dst: reg) (mv: moves) (s: node)
+  | BSlowlong (src: reg) (dst: reg) (mv: moves) (s: node)
+  | BShighlong (src: reg) (dst: reg) (mv: moves) (s: node)
+  | BSop (op: operation) (args: list reg) (res: reg)
+         (mv1: moves) (args': list mreg) (res': mreg)
+         (mv2: moves) (s: node)
+  | BSopdead (op: operation) (args: list reg) (res: reg)
+         (mv: moves) (s: node)
+  | BSload (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
+         (mv1: moves) (args': list mreg) (dst': mreg)
+         (mv2: moves) (s: node)
+  | BSloaddead (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
+         (mv: moves) (s: node)
+  | BSload2 (addr1 addr2: addressing) (args: list reg) (dst: reg)
+         (mv1: moves) (args1': list mreg) (dst1': mreg)
+         (mv2: moves) (args2': list mreg) (dst2': mreg)
+         (mv3: moves) (s: node)
+  | BSload2_1 (addr: addressing) (args: list reg) (dst: reg)
+         (mv1: moves) (args': list mreg) (dst': mreg)
+         (mv2: moves) (s: node)
+  | BSload2_2 (addr addr': addressing) (args: list reg) (dst: reg)
+         (mv1: moves) (args': list mreg) (dst': mreg)
+         (mv2: moves) (s: node)
+  | BSstore (chunk: memory_chunk) (addr: addressing) (args: list reg) (src: reg)
+         (mv1: moves) (args': list mreg) (src': mreg)
+         (s: node)
+  | BSstore2 (addr1 addr2: addressing) (args: list reg) (src: reg)
+         (mv1: moves) (args1': list mreg) (src1': mreg)
+         (mv2: moves) (args2': list mreg) (src2': mreg)
+         (s: node)
+  | BScall (sg: signature) (ros: reg + ident) (args: list reg) (res: reg)
+         (mv1: moves) (ros': mreg + ident) (mv2: moves) (s: node)
+  | BStailcall (sg: signature) (ros: reg + ident) (args: list reg)
+         (mv1: moves) (ros': mreg + ident)
+  | BSbuiltin (ef: external_function) (args: list reg) (res: reg)
+         (mv1: moves) (args': list mreg) (res': list mreg)
+         (mv2: moves) (s: node)
+  | BSannot (text: ident) (targs: list annot_arg) (args: list reg) (res: reg)
+         (mv: moves) (args': list loc) (s: node)
+  | BScond (cond: condition) (args: list reg)
+         (mv: moves) (args': list mreg) (s1 s2: node)
+  | BSjumptable (arg: reg)
+         (mv: moves) (arg': mreg) (tbl: list node)
+  | BSreturn (arg: option reg)
+         (mv: moves).
+
+(** Extract the move instructions at the beginning of block [b].
+  Return the list of moves and the suffix of [b] after the moves. *)
+
+Fixpoint extract_moves (accu: moves) (b: bblock) {struct b} : moves * bblock :=
+  match b with
+  | Lgetstack sl ofs ty dst :: b' =>
+      extract_moves ((S sl ofs ty, R dst) :: accu) b'
+  | Lsetstack src sl ofs ty :: b' =>
+      extract_moves ((R src, S sl ofs ty) :: accu) b'
+  | Lop op args res :: b' =>
+      match is_move_operation op args with
+      | Some arg => extract_moves ((R arg, R res) :: accu) b'
+      | None => (List.rev accu, b)
+      end
+  | _ =>
+      (List.rev accu, b)
+  end.
+
+Definition check_succ (s: node) (b: LTL.bblock) : bool :=
+  match b with
+  | Lbranch s' :: _ => peq s s'
+  | _ => false
+  end.
+
+Parameter eq_operation: forall (x y: operation), {x=y} + {x<>y}.
+Parameter eq_addressing: forall (x y: addressing), {x=y} + {x<>y}.
+Parameter eq_opt_addressing: forall (x y: option addressing), {x=y} + {x<>y}.
+Parameter eq_condition: forall (x y: condition), {x=y} + {x<>y}.
+Parameter eq_chunk: forall (x y: memory_chunk), {x=y} + {x<>y}.
+Parameter eq_external_function: forall (x y: external_function), {x=y} + {x<>y}.
+Parameter eq_signature: forall (x y: signature), {x=y} + {x<>y}.
+
+Notation "'do' X <- A ; B" := (match A with Some X => B | None => None end)
+         (at level 200, X ident, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Notation "'assertion' A ; B" := (if A then B else None)
+         (at level 200, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Local Open Scope option_monad_scope.
+
+(** Classify operations into moves, 64-bit integer operations, and other
+  arithmetic/logical operations. *)
+
+Inductive operation_kind: operation -> list reg -> Type :=
+  | operation_Omove: forall arg, operation_kind Omove (arg :: nil)
+  | operation_Omakelong: forall arg1 arg2, operation_kind Omakelong (arg1 :: arg2 :: nil)
+  | operation_Olowlong: forall arg, operation_kind Olowlong (arg :: nil)
+  | operation_Ohighlong: forall arg, operation_kind Ohighlong (arg :: nil)
+  | operation_other: forall op args, operation_kind op args.
+
+Definition classify_operation (op: operation) (args: list reg) : operation_kind op args :=
+  match op, args with
+  | Omove, arg::nil => operation_Omove arg
+  | Omakelong, arg1::arg2::nil => operation_Omakelong arg1 arg2
+  | Olowlong, arg::nil => operation_Olowlong arg
+  | Ohighlong, arg::nil => operation_Ohighlong arg
+  | op, args => operation_other op args
+  end.
+
+(** Check RTL instruction [i] against LTL basic block [b].  
+  On success, return [Some] with a [block_shape] describing the correspondence.
+  On error, return [None]. *)
+
+Definition pair_instr_block
+               (i: RTL.instruction) (b: LTL.bblock) : option block_shape :=
+  match i with
   | Inop s =>
-      Lnop s
+      let (mv, b1) := extract_moves nil b in
+      assertion (check_succ s b1); Some(BSnop mv s)
   | Iop op args res s =>
-      if Regset.mem res live!!pc then
-        if is_redundant_move op args res assign then
-          Lnop s
-        else 
-          Lop op (List.map assign args) (assign res) s
-      else
-        Lnop s
+      match classify_operation op args with
+      | operation_Omove arg =>
+          let (mv, b1) := extract_moves nil b in
+          assertion (check_succ s b1); Some(BSmove arg res mv s)
+      | operation_Omakelong arg1 arg2 =>
+          let (mv, b1) := extract_moves nil b in
+          assertion (check_succ s b1); Some(BSmakelong arg1 arg2 res mv s)
+      | operation_Olowlong arg =>
+          let (mv, b1) := extract_moves nil b in
+          assertion (check_succ s b1); Some(BSlowlong arg res mv s)
+      | operation_Ohighlong arg =>
+          let (mv, b1) := extract_moves nil b in
+          assertion (check_succ s b1); Some(BShighlong arg res mv s)
+      | operation_other _ _ =>
+          let (mv1, b1) := extract_moves nil b in
+          match b1 with
+          | Lop op' args' res' :: b2 =>
+              let (mv2, b3) := extract_moves nil b2 in
+              assertion (eq_operation op op');
+              assertion (check_succ s b3);
+              Some(BSop op args res mv1 args' res' mv2 s)
+          | _ =>
+              assertion (check_succ s b1);
+              Some(BSopdead op args res mv1 s)
+          end
+      end
   | Iload chunk addr args dst s =>
-      if Regset.mem dst live!!pc then
-        Lload chunk addr (List.map assign args) (assign dst) s
-      else
-        Lnop s
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Lload chunk' addr' args' dst' :: b2 =>
+          if eq_chunk chunk Mint64 then
+            assertion (eq_chunk chunk' Mint32);
+            let (mv2, b3) := extract_moves nil b2 in
+            match b3 with
+            | Lload chunk'' addr'' args'' dst'' :: b4 =>
+                let (mv3, b5) := extract_moves nil b4 in
+                assertion (eq_chunk chunk'' Mint32);
+                assertion (eq_addressing addr addr');
+                assertion (eq_opt_addressing (offset_addressing addr (Int.repr 4)) (Some addr''));
+                assertion (check_succ s b5);
+                Some(BSload2 addr addr'' args dst mv1 args' dst' mv2 args'' dst'' mv3 s)
+            | _ =>
+                assertion (check_succ s b3);
+                if (eq_addressing addr addr') then
+                  Some(BSload2_1 addr args dst mv1 args' dst' mv2 s)
+                else
+                 (assertion (eq_opt_addressing (offset_addressing addr (Int.repr 4)) (Some addr'));
+                  Some(BSload2_2 addr addr' args dst mv1 args' dst' mv2 s))
+            end
+          else (
+            let (mv2, b3) := extract_moves nil b2 in
+            assertion (eq_chunk chunk chunk');
+            assertion (eq_addressing addr addr');
+            assertion (check_succ s b3);
+            Some(BSload chunk addr args dst mv1 args' dst' mv2 s))
+      | _ =>
+          assertion (check_succ s b1);
+          Some(BSloaddead chunk addr args dst mv1 s)
+      end
   | Istore chunk addr args src s =>
-      Lstore chunk addr (List.map assign args) (assign src) s
-  | Icall sig ros args res s =>
-      Lcall sig (sum_left_map assign ros) (List.map assign args)
-                (assign res) s
-  | Itailcall sig ros args =>
-      Ltailcall sig (sum_left_map assign ros) (List.map assign args)
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Lstore chunk' addr' args' src' :: b2 =>
+          if eq_chunk chunk Mint64 then
+            let (mv2, b3) := extract_moves nil b2 in
+            match b3 with
+            | Lstore chunk'' addr'' args'' src'' :: b4 =>
+                assertion (eq_chunk chunk' Mint32);
+                assertion (eq_chunk chunk'' Mint32);
+                assertion (eq_addressing addr addr');
+                assertion (eq_opt_addressing (offset_addressing addr (Int.repr 4)) (Some addr''));
+                assertion (check_succ s b4);
+                Some(BSstore2 addr addr'' args src mv1 args' src' mv2 args'' src'' s)
+            | _ => None
+            end
+          else (
+            assertion (eq_chunk chunk chunk');
+            assertion (eq_addressing addr addr');
+            assertion (check_succ s b2);
+            Some(BSstore chunk addr args src mv1 args' src' s))
+      | _ => None
+      end
+  | Icall sg ros args res s =>
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Lcall sg' ros' :: b2 =>
+          let (mv2, b3) := extract_moves nil b2 in
+          assertion (eq_signature sg sg');
+          assertion (check_succ s b3);
+          Some(BScall sg ros args res mv1 ros' mv2 s)
+      | _ => None
+      end
+  | Itailcall sg ros args =>
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Ltailcall sg' ros' :: b2 =>
+          assertion (eq_signature sg sg');
+          Some(BStailcall sg ros args mv1 ros')
+      | _ => None
+      end
   | Ibuiltin ef args res s =>
-      Lbuiltin ef (List.map assign args) (assign res) s
-  | Icond cond args ifso ifnot =>
-      Lcond cond (List.map assign args) ifso ifnot
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Lbuiltin ef' args' res' :: b2 =>
+          let (mv2, b3) := extract_moves nil b2 in
+          assertion (eq_external_function ef ef');
+          assertion (check_succ s b3);
+          Some(BSbuiltin ef args res mv1 args' res' mv2 s)
+      | Lannot ef' args' :: b2 =>
+          assertion (eq_external_function ef ef');
+          assertion (check_succ s b2);
+          match ef with
+          | EF_annot txt typ => Some(BSannot txt typ args res mv1 args' s)
+          | _ => None
+          end
+      | _ => None
+      end
+  | Icond cond args s1 s2 =>
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Lcond cond' args' s1' s2' :: b2 =>
+          assertion (eq_condition cond cond');
+          assertion (peq s1 s1');
+          assertion (peq s2 s2');
+          Some(BScond cond args mv1 args' s1 s2)
+      | _ => None
+      end
   | Ijumptable arg tbl =>
-      Ljumptable (assign arg) tbl
-  | Ireturn optarg =>
-      Lreturn (option_map assign optarg)
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Ljumptable arg' tbl' :: b2 =>
+          assertion (list_eq_dec peq tbl tbl');
+          Some(BSjumptable arg mv1 arg' tbl)
+      | _ => None
+      end
+  | Ireturn arg =>
+      let (mv1, b1) := extract_moves nil b in
+      match b1 with
+      | Lreturn :: b2 => Some(BSreturn arg mv1)
+      | _ => None
+      end
+  end.
+
+(** Check all instructions of the RTL function [f1] against the corresponding
+  basic blocks of LTL function [f2].  Return a map from CFG nodes to
+  [block_shape] info. *)
+
+Definition pair_codes (f1: RTL.function) (f2: LTL.function) : PTree.t block_shape :=
+  PTree.combine
+    (fun opti optb => do i <- opti; do b <- optb; pair_instr_block i b)
+    (RTL.fn_code f1) (LTL.fn_code f2).
+
+(** Check the entry point code of the LTL function [f2].  It must be
+  a sequence of moves that branches to the same node as the entry point
+  of RTL function [f1]. *)
+
+Definition pair_entrypoints (f1: RTL.function) (f2: LTL.function) : option moves :=
+  do b <- (LTL.fn_code f2)!(LTL.fn_entrypoint f2);
+  let (mv, b1) := extract_moves nil b in
+  assertion (check_succ (RTL.fn_entrypoint f1) b1);
+  Some mv.
+
+(** * Representing sets of equations between RTL registers and LTL locations. *)
+
+(** The Rideau-Leroy validation algorithm manipulates sets of equations of
+  the form [pseudoreg = location [kind]], meaning:
+- if [kind = Full], the value of [location] in the generated LTL code is
+  the same as (or more defined than) the value of [pseudoreg] in the original
+  RTL code;
+- if [kind = Low], the value of [location] in the generated LTL code is
+  the same as (or more defined than) the low 32 bits of the 64-bit
+  integer value of [pseudoreg] in the original RTL code;
+- if [kind = High], the value of [location] in the generated LTL code is
+  the same as (or more defined than) the high 32 bits of the 64-bit
+  integer value of [pseudoreg] in the original RTL code.
+*)
+
+Inductive equation_kind : Type := Full | Low | High.
+
+Record equation := Eq {
+  ekind: equation_kind;
+  ereg: reg;
+  eloc: loc
+}.
+
+(** We use AVL finite sets to represent sets of equations.  Therefore, we need
+  total orders over equations and their components. *)
+
+Module IndexedEqKind <: INDEXED_TYPE.
+  Definition t := equation_kind.
+  Definition index (x: t) :=
+    match x with Full => 1%positive | Low => 2%positive | High => 3%positive end.
+  Lemma index_inj: forall x y, index x = index y -> x = y.
+  Proof. destruct x; destruct y; simpl; congruence. Qed.
+  Definition eq (x y: t) : {x=y} + {x<>y}.
+  Proof. decide equality. Defined.
+End IndexedEqKind.
+
+Module OrderedEqKind := OrderedIndexed(IndexedEqKind).
+
+(** This is an order over equations that is lexicgraphic on [ereg], then
+  [eloc], then [ekind]. *)
+
+Module OrderedEquation <: OrderedType.
+  Definition t := equation.
+  Definition eq (x y: t) := x = y.
+  Definition lt (x y: t) :=
+    Plt (ereg x) (ereg y) \/ (ereg x = ereg y /\
+    (OrderedLoc.lt (eloc x) (eloc y) \/ (eloc x = eloc y /\
+    OrderedEqKind.lt (ekind x) (ekind y)))).
+  Lemma eq_refl : forall x : t, eq x x.
+  Proof (@refl_equal t). 
+  Lemma eq_sym : forall x y : t, eq x y -> eq y x.
+  Proof (@sym_equal t).
+  Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z.
+  Proof (@trans_equal t).
+  Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z.
+  Proof.
+    unfold lt; intros.
+    destruct H. 
+    destruct H0. left; eapply Plt_trans; eauto.
+    destruct H0. rewrite <- H0. auto.
+    destruct H. rewrite H. 
+    destruct H0. auto. 
+    destruct H0. right; split; auto.
+    intuition. 
+    left; eapply OrderedLoc.lt_trans; eauto.
+    left; congruence.
+    left; congruence.
+    right; split. congruence. eapply OrderedEqKind.lt_trans; eauto.
+  Qed.
+  Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y.
+  Proof.
+    unfold lt, eq; intros; red; intros. subst y. intuition.
+    eelim Plt_strict; eauto.
+    eelim OrderedLoc.lt_not_eq; eauto. red; auto.
+    eelim OrderedEqKind.lt_not_eq; eauto. red; auto.
+  Qed.
+  Definition compare : forall x y : t, Compare lt eq x y.
+  Proof.
+    intros.
+    destruct (OrderedPositive.compare (ereg x) (ereg y)).
+  - apply LT. red; auto.
+  - destruct (OrderedLoc.compare (eloc x) (eloc y)).
+    + apply LT. red; auto. 
+    + destruct (OrderedEqKind.compare (ekind x) (ekind y)).
+      * apply LT. red; auto.
+      * apply EQ. red in e; red in e0; red in e1; red. 
+        destruct x; destruct y; simpl in *; congruence.
+      * apply GT. red; auto.
+   + apply GT. red; auto.
+  - apply GT. red; auto.
+  Defined.
+  Definition eq_dec (x y: t) : {x = y} + {x <> y}.
+  Proof.
+    intros. decide equality. 
+    apply Loc.eq.
+    apply peq.
+    apply IndexedEqKind.eq.
+  Defined.
+End OrderedEquation.
+
+(** This is an alternate order over equations that is lexicgraphic on
+  [eloc], then [ereg], then [ekind]. *)
+
+Module OrderedEquation' <: OrderedType.
+  Definition t := equation.
+  Definition eq (x y: t) := x = y.
+  Definition lt (x y: t) :=
+    OrderedLoc.lt (eloc x) (eloc y) \/ (eloc x = eloc y /\
+    (Plt (ereg x) (ereg y) \/ (ereg x = ereg y /\
+    OrderedEqKind.lt (ekind x) (ekind y)))).
+  Lemma eq_refl : forall x : t, eq x x.
+  Proof (@refl_equal t). 
+  Lemma eq_sym : forall x y : t, eq x y -> eq y x.
+  Proof (@sym_equal t).
+  Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z.
+  Proof (@trans_equal t).
+  Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z.
+  Proof.
+    unfold lt; intros.
+    destruct H. 
+    destruct H0. left; eapply OrderedLoc.lt_trans; eauto. 
+    destruct H0. rewrite <- H0. auto.
+    destruct H. rewrite H. 
+    destruct H0. auto. 
+    destruct H0. right; split; auto.
+    intuition. 
+    left; eapply Plt_trans; eauto. 
+    left; congruence.
+    left; congruence.
+    right; split. congruence. eapply OrderedEqKind.lt_trans; eauto.
+  Qed.
+  Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y.
+  Proof.
+    unfold lt, eq; intros; red; intros. subst y. intuition.
+    eelim OrderedLoc.lt_not_eq; eauto. red; auto.
+    eelim Plt_strict; eauto.
+    eelim OrderedEqKind.lt_not_eq; eauto. red; auto.
+  Qed.
+  Definition compare : forall x y : t, Compare lt eq x y.
+  Proof.
+    intros.
+    destruct (OrderedLoc.compare (eloc x) (eloc y)).
+  - apply LT. red; auto.
+  - destruct (OrderedPositive.compare (ereg x) (ereg y)).
+    + apply LT. red; auto. 
+    + destruct (OrderedEqKind.compare (ekind x) (ekind y)).
+      * apply LT. red; auto.
+      * apply EQ. red in e; red in e0; red in e1; red. 
+        destruct x; destruct y; simpl in *; congruence.
+      * apply GT. red; auto.
+   + apply GT. red; auto.
+  - apply GT. red; auto.
+  Defined.
+  Definition eq_dec: forall (x y: t), {x = y} + {x <> y} := OrderedEquation.eq_dec.
+End OrderedEquation'.
+
+Module EqSet := FSetAVLplus.Make(OrderedEquation).
+Module EqSet2 := FSetAVLplus.Make(OrderedEquation').
+
+(** We use a redundant representation for sets of equations, comprising
+  two AVL finite sets, containing the same elements, but ordered along
+  the two orders defined above.  Playing on properties of lexicographic
+  orders, this redundant representation enables us to quickly find
+  all equations involving a given RTL pseudoregister, or all equations
+  involving a given LTL location or overlapping location. *)
+
+Record eqs := mkeqs {
+  eqs1 :> EqSet.t;
+  eqs2 : EqSet2.t;
+  eqs_same: forall q, EqSet2.In q eqs2 <-> EqSet.In q eqs1
+}.
+
+(** * Operations on sets of equations *)
+
+(** The empty set of equations. *)
+
+Program Definition empty_eqs := mkeqs EqSet.empty EqSet2.empty _.
+Next Obligation.
+  split; intros. eelim EqSet2.empty_1; eauto. eelim EqSet.empty_1; eauto.
+Qed.
+
+(** Adding or removing an equation from a set. *)
+
+Program Definition add_equation (q: equation) (e: eqs) :=
+  mkeqs (EqSet.add q (eqs1 e)) (EqSet2.add q (eqs2 e)) _.
+Next Obligation.
+  split; intros.
+  destruct (OrderedEquation'.eq_dec q q0). 
+  apply EqSet.add_1; auto.
+  apply EqSet.add_2. apply (eqs_same e). apply EqSet2.add_3 with q; auto.
+  destruct (OrderedEquation.eq_dec q q0). 
+  apply EqSet2.add_1; auto.
+  apply EqSet2.add_2. apply (eqs_same e). apply EqSet.add_3 with q; auto.
+Qed.
+
+Program Definition remove_equation (q: equation) (e: eqs) :=
+  mkeqs (EqSet.remove q (eqs1 e)) (EqSet2.remove q (eqs2 e)) _.
+Next Obligation.
+  split; intros.
+  destruct (OrderedEquation'.eq_dec q q0). 
+  eelim EqSet2.remove_1; eauto.
+  apply EqSet.remove_2; auto. apply (eqs_same e). apply EqSet2.remove_3 with q; auto.
+  destruct (OrderedEquation.eq_dec q q0). 
+  eelim EqSet.remove_1; eauto.
+  apply EqSet2.remove_2; auto. apply (eqs_same e). apply EqSet.remove_3 with q; auto.
+Qed.
+
+(** [reg_unconstrained r e] is true if [e] contains no equations involving
+  the RTL pseudoregister [r].  In other words, all equations [r' = l [kind]]
+  in [e] are such that [r' <> r]. *)
+
+Definition select_reg_l (r: reg) (q: equation) := Pos.leb r (ereg q).
+Definition select_reg_h (r: reg) (q: equation) := Pos.leb (ereg q) r.
+
+Definition reg_unconstrained (r: reg) (e: eqs) : bool :=
+  negb (EqSet.mem_between (select_reg_l r) (select_reg_h r) (eqs1 e)).
+
+(** [loc_unconstrained l e] is true if [e] contains no equations involving
+  the LTL location [l] or a location that partially overlaps with [l].
+  In other words, all equations [r = l' [kind]] in [e] are such that
+  [Loc.diff l' l]. *)
+
+Definition select_loc_l (l: loc) :=
+  let lb := OrderedLoc.diff_low_bound l in
+  fun (q: equation) => match OrderedLoc.compare (eloc q) lb with LT _ => false | _ => true end.
+Definition select_loc_h (l: loc) :=
+  let lh := OrderedLoc.diff_high_bound l in
+  fun (q: equation) => match OrderedLoc.compare (eloc q) lh with GT _ => false | _ => true end.
+
+Definition loc_unconstrained (l: loc) (e: eqs) : bool :=
+  negb (EqSet2.mem_between (select_loc_l l) (select_loc_h l) (eqs2 e)).
+
+Definition reg_loc_unconstrained (r: reg) (l: loc) (e: eqs) : bool :=
+  reg_unconstrained r e && loc_unconstrained l e.
+
+(** [subst_reg r1 r2 e] simulates the effect of assigning [r2] to [r1] on [e].
+  All equations of the form [r1 = l [kind]] are replaced by [r2 = l [kind]].
+*)
+
+Definition subst_reg (r1 r2: reg) (e: eqs) : eqs :=
+  EqSet.fold
+    (fun q e => add_equation (Eq (ekind q) r2 (eloc q)) (remove_equation q e))
+    (EqSet.elements_between (select_reg_l r1) (select_reg_h r1) (eqs1 e))
+    e.
+
+(** [subst_reg_kind r1 k1 r2 k2 e] simulates the effect of assigning
+  the [k2] part of [r2] to the [k1] part of [r1] on [e].
+  All equations of the form [r1 = l [k1]] are replaced by [r2 = l [k2]].
+*)
+
+Definition subst_reg_kind (r1: reg) (k1: equation_kind) (r2: reg) (k2: equation_kind) (e: eqs) : eqs :=
+  EqSet.fold
+    (fun q e =>
+      if IndexedEqKind.eq (ekind q) k1
+      then add_equation (Eq k2 r2 (eloc q)) (remove_equation q e)
+      else e)
+    (EqSet.elements_between (select_reg_l r1) (select_reg_h r1) (eqs1 e))
+    e.
+
+(** [subst_loc l1 l2 e] simulates the effect of assigning [l2] to [l1] on [e].
+  All equations of the form [r = l1 [kind]] are replaced by [r = l2 [kind]].
+  Return [None] if [e] contains an equation of the form [r = l] with [l]
+  partially overlapping [l1]. 
+*)
+
+Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs :=
+  EqSet2.fold
+    (fun q opte =>
+      match opte with
+      | None => None
+      | Some e =>
+          if Loc.eq l1 (eloc q) then
+            Some (add_equation (Eq (ekind q) (ereg q) l2) (remove_equation q e))
+          else
+            None
+      end)
+     (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
+     (Some e).
+
+(** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations
+    [ri = R mi [Full]].  Return [None] if the two lists have different lengths.
+*)
+
+Fixpoint add_equations (rl: list reg) (ml: list mreg) (e: eqs) : option eqs :=
+  match rl, ml with
+  | nil, nil => Some e
+  | r1 :: rl, m1 :: ml => add_equations rl ml (add_equation (Eq Full r1 (R m1)) e)
+  | _, _ => None
+  end.
+
+(** [add_equations_args] is similar but additionally handles the splitting
+  of pseudoregisters of type [Tlong] in two locations containing the
+  two 32-bit halves of the 64-bit integer. *)
+
+Function add_equations_args (rl: list reg) (tyl: list typ) (ll: list loc) (e: eqs) : option eqs :=
+  match rl, tyl, ll with
+  | nil, nil, nil => Some e
+  | r1 :: rl, Tlong :: tyl, l1 :: l2 :: ll =>
+      add_equations_args rl tyl ll (add_equation (Eq Low r1 l2) (add_equation (Eq High r1 l1) e))
+  | r1 :: rl, (Tint|Tfloat) :: tyl, l1 :: ll =>
+      add_equations_args rl tyl ll (add_equation (Eq Full r1 l1) e)
+  | _, _, _ => None
+  end.
+
+(** [add_equations_res] is similar but is specialized to the case where
+  there is only one pseudo-register. *)
+
+Function add_equations_res (r: reg) (oty: option typ) (ll: list loc) (e: eqs) : option eqs :=
+  match oty with
+  | Some Tlong =>
+      match ll with
+      | l1 :: l2 :: nil => Some (add_equation (Eq Low r l2) (add_equation (Eq High r l1) e))
+      | _ => None
+      end
+  | _ =>
+      match ll with
+      | l1 :: nil => Some (add_equation (Eq Full r l1) e)
+      | _ => None
+      end
   end.
 
-Definition transf_fun (f: RTL.function) (live: PMap.t Regset.t)
-                      (assign: reg -> loc) : LTL.function :=
-  LTL.mkfunction
-     (RTL.fn_sig f)
-     (List.map assign (RTL.fn_params f))
-     (RTL.fn_stacksize f)
-     (PTree.map (transf_instr f live assign) (RTL.fn_code f))
-     (RTL.fn_entrypoint f).
+(** [remove_equations_res] is similar to [add_equations_res] but removes
+  equations instead of adding them. *)
+
+Function remove_equations_res (r: reg) (oty: option typ) (ll: list loc) (e: eqs) : option eqs :=
+  match oty with
+  | Some Tlong =>
+      match ll with
+      | l1 :: l2 :: nil =>
+          if Loc.diff_dec l2 l1
+          then Some (remove_equation (Eq Low r l2) (remove_equation (Eq High r l1) e))
+          else None
+      | _ => None
+      end
+  | _ =>
+      match ll with
+      | l1 :: nil => Some (remove_equation (Eq Full r l1) e)
+      | _ => None
+      end
+  end.
+
+(** [add_equations_ros] adds an equation, if needed, between an optional
+  pseudoregister and an optional machine register.  It is used for the
+  function argument of the [Icall] and [Itailcall] instructions. *)
+
+Definition add_equation_ros (ros: reg + ident) (ros': mreg + ident) (e: eqs) : option eqs :=
+  match ros, ros' with
+  | inl r, inl mr => Some(add_equation (Eq Full r (R mr)) e)
+  | inr id, inr id' => assertion (ident_eq id id'); Some e
+  | _, _ => None
+  end.
+
+(** [can_undef ml] returns true if all machine registers in [ml] are
+  unconstrained and can harmlessly be undefined. *)
+
+Fixpoint can_undef (ml: list mreg) (e: eqs) : bool :=
+  match ml with
+  | nil => true
+  | m1 :: ml => loc_unconstrained (R m1) e && can_undef ml e
+  end.
+
+Fixpoint can_undef_except (l: loc) (ml: list mreg) (e: eqs) : bool :=
+  match ml with
+  | nil => true
+  | m1 :: ml => 
+      (Loc.eq l (R m1) || loc_unconstrained (R m1) e) && can_undef_except l ml e
+  end.
+
+(** [no_caller_saves e] returns [e] if all caller-save locations are
+  unconstrained in [e].  In other words, [e] contains no equations
+  involving a caller-save register or [Outgoing] stack slot. *)
+
+Definition no_caller_saves (e: eqs) : bool :=
+  EqSet.for_all
+   (fun eq =>
+     match eloc eq with
+       | R r =>
+           zle 0 (index_int_callee_save r) || zle 0 (index_float_callee_save r)
+       | S Outgoing _ _ => false
+       | S _ _ _ => true
+       end)
+    e.
+
+(** [compat_left r l e] returns true if all equations in [e] that involve
+    [r] are of the form [r = l [Full]]. *)
+
+Definition compat_left (r: reg) (l: loc) (e: eqs) : bool :=
+  EqSet.for_all_between
+    (fun q =>
+        match ekind q with
+        | Full => Loc.eq l (eloc q)
+        | _ => false
+        end)
+    (select_reg_l r) (select_reg_h r)
+    (eqs1 e).
+
+(** [compat_left2 r l1 l2 e] returns true if all equations in [e] that involve
+    [r] are of the form [r = l1 [High]] or [r = l2 [Low]]. *)
+
+Definition compat_left2 (r: reg) (l1 l2: loc) (e: eqs) : bool :=
+  EqSet.for_all_between
+    (fun q =>
+        match ekind q with
+        | High => Loc.eq l1 (eloc q)
+        | Low => Loc.eq l2 (eloc q)
+        | _ => false
+        end)
+    (select_reg_l r) (select_reg_h r)
+    (eqs1 e).
+
+(** [ros_compatible_tailcall ros] returns true if [ros] is a function
+  name or a caller-save register.  This is used to check [Itailcall]
+  instructions. *)
+
+Definition ros_compatible_tailcall (ros: mreg + ident) : bool :=
+  match ros with
+  | inl r => In_dec mreg_eq r destroyed_at_call
+  | inr id => true
+  end.
+
+(** * The validator *)
+
+Definition destroyed_by_move (src dst: loc) :=
+  match src with
+  | S sl ofs ty => destroyed_by_getstack sl
+  | _ => destroyed_by_op Omove
+  end.
+
+(** Simulate the effect of a sequence of moves [mv] on a set of
+  equations [e].  The set [e] is the equations that must hold
+  after the sequence of moves.  Return the set of equations that
+  must hold before the sequence of moves.  Return [None] if the
+  set of equations [e] cannot hold after the sequence of moves. *)
+
+Fixpoint track_moves (mv: moves) (e: eqs) : option eqs :=
+  match mv with
+  | nil => Some e
+  | (src, dst) :: mv =>
+      do e1 <- track_moves mv e;
+      do e2 <- subst_loc dst src e1;
+      assertion (can_undef_except dst (destroyed_by_move src dst)) e1;
+      Some e2
+  end.
+
+(** [transfer_use_def args res args' res' undefs e] returns the set
+  of equations that must hold "before" in order for the equations [e]
+  to hold "after" the execution of RTL and LTL code of the following form:
+<<
+                RTL                            LTL
+         use pseudoregs args            use machine registers args'
+         define pseudoreg res           undefine machine registers undef
+                                        define machine register res'
+>>
+  As usual, [None] is returned if the equations [e] cannot hold after
+  this execution.
+*)
+
+Definition transfer_use_def (args: list reg) (res: reg) (args': list mreg) (res': mreg)
+                            (undefs: list mreg) (e: eqs) : option eqs :=
+  let e1 := remove_equation (Eq Full res (R res')) e in
+  assertion (reg_loc_unconstrained res (R res') e1);
+  assertion (can_undef undefs e1);
+  add_equations args args' e1.
+
+Definition kind_first_word := if big_endian then High else Low.
+Definition kind_second_word := if big_endian then Low else High.
+
+(** The core transfer function.  It takes a set [e] of equations that must
+  hold "after" and a block shape [shape] representing a matching pair
+  of an RTL instruction and an LTL basic block.  It returns the set of
+  equations that must hold "before" these instructions, or [None] if
+  impossible. *)
+
+Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option eqs :=
+  match shape with
+  | BSnop mv s =>
+      track_moves mv e
+  | BSmove src dst mv s =>
+      track_moves mv (subst_reg dst src e)
+  | BSmakelong src1 src2 dst mv s =>
+      let e1 := subst_reg_kind dst High src1 Full e in
+      let e2 := subst_reg_kind dst Low src2 Full e1 in
+      assertion (reg_unconstrained dst e2);
+      track_moves mv e2
+  | BSlowlong src dst mv s =>
+      let e1 := subst_reg_kind dst Full src Low e in
+      assertion (reg_unconstrained dst e1);
+      track_moves mv e1
+  | BShighlong src dst mv s =>
+      let e1 := subst_reg_kind dst Full src High e in
+      assertion (reg_unconstrained dst e1);
+      track_moves mv e1
+  | BSop op args res mv1 args' res' mv2 s =>
+      do e1 <- track_moves mv2 e;
+      do e2 <- transfer_use_def args res args' res' (destroyed_by_op op) e1;
+      track_moves mv1 e2
+  | BSopdead op args res mv s =>
+      assertion (reg_unconstrained res e);
+      track_moves mv e
+  | BSload chunk addr args dst mv1 args' dst' mv2 s =>
+      do e1 <- track_moves mv2 e;
+      do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
+      track_moves mv1 e2
+  | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s =>
+      do e1 <- track_moves mv3 e;
+      let e2 := remove_equation (Eq kind_second_word dst (R dst2')) e1 in
+      assertion (loc_unconstrained (R dst2') e2);
+      assertion (can_undef (destroyed_by_load Mint32 addr') e2);
+      do e3 <- add_equations args args2' e2;
+      do e4 <- track_moves mv2 e3;
+      let e5 := remove_equation (Eq kind_first_word dst (R dst1')) e4 in
+      assertion (loc_unconstrained (R dst1') e5);
+      assertion (can_undef (destroyed_by_load Mint32 addr) e5);
+      assertion (reg_unconstrained dst e5);
+      do e6 <- add_equations args args1' e5;
+      track_moves mv1 e6
+  | BSload2_1 addr args dst mv1 args' dst' mv2 s =>
+      do e1 <- track_moves mv2 e;
+      let e2 := remove_equation (Eq kind_first_word dst (R dst')) e1 in
+      assertion (reg_loc_unconstrained dst (R dst') e2);
+      assertion (can_undef (destroyed_by_load Mint32 addr) e2);
+      do e3 <- add_equations args args' e2;
+      track_moves mv1 e3
+  | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s =>
+      do e1 <- track_moves mv2 e;
+      let e2 := remove_equation (Eq kind_second_word dst (R dst')) e1 in
+      assertion (reg_loc_unconstrained dst (R dst') e2);
+      assertion (can_undef (destroyed_by_load Mint32 addr') e2);
+      do e3 <- add_equations args args' e2;
+      track_moves mv1 e3
+  | BSloaddead chunk addr args dst mv s =>
+      assertion (reg_unconstrained dst e);
+      track_moves mv e
+  | BSstore chunk addr args src mv args' src' s =>
+      assertion (can_undef (destroyed_by_store chunk addr) e);
+      do e1 <- add_equations (src :: args) (src' :: args') e;
+      track_moves mv e1
+  | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s =>
+      assertion (can_undef (destroyed_by_store Mint32 addr') e);
+      do e1 <- add_equations args args2' 
+                  (add_equation (Eq kind_second_word src (R src2')) e);
+      do e2 <- track_moves mv2 e1;
+      assertion (can_undef (destroyed_by_store Mint32 addr) e2);
+      do e3 <- add_equations args args1' 
+                  (add_equation (Eq kind_first_word src (R src1')) e2);
+      track_moves mv1 e3
+  | BScall sg ros args res mv1 ros' mv2 s =>
+      let args' := loc_arguments sg in
+      let res' := map R (loc_result sg) in
+      do e1 <- track_moves mv2 e;
+      do e2 <- remove_equations_res res (sig_res sg) res' e1;
+      assertion (forallb (fun l => reg_loc_unconstrained res l e2) res');
+      assertion (no_caller_saves e2);
+      do e3 <- add_equation_ros ros ros' e2;
+      do e4 <- add_equations_args args (sig_args sg) args' e3;
+      track_moves mv1 e4
+  | BStailcall sg ros args mv1 ros' =>
+      let args' := loc_arguments sg in
+      assertion (tailcall_is_possible sg);
+      assertion (opt_typ_eq sg.(sig_res) f.(RTL.fn_sig).(sig_res));
+      assertion (ros_compatible_tailcall ros');
+      do e1 <- add_equation_ros ros ros' empty_eqs;
+      do e2 <- add_equations_args args (sig_args sg) args' e1;
+      track_moves mv1 e2
+  | BSbuiltin ef args res mv1 args' res' mv2 s =>
+      do e1 <- track_moves mv2 e;
+      let args' := map R args' in
+      let res' := map R res' in
+      do e2 <- remove_equations_res res (sig_res (ef_sig ef)) res' e1;
+      assertion (reg_unconstrained res e2);
+      assertion (forallb (fun l => loc_unconstrained l e2) res');
+      assertion (can_undef (destroyed_by_builtin ef) e2);
+      do e3 <- add_equations_args args (sig_args (ef_sig ef)) args' e2;
+      track_moves mv1 e3
+  | BSannot txt typ args res mv1 args' s =>
+      do e1 <- add_equations_args args (annot_args_typ typ) args' e;
+      track_moves mv1 e1
+  | BScond cond args mv args' s1 s2 =>
+      assertion (can_undef (destroyed_by_cond cond) e);
+      do e1 <- add_equations args args' e;
+      track_moves mv e1
+  | BSjumptable arg mv arg' tbl =>
+      assertion (can_undef destroyed_by_jumptable e);
+      track_moves mv (add_equation (Eq Full arg (R arg')) e)
+  | BSreturn None mv =>
+      track_moves mv empty_eqs
+  | BSreturn (Some arg) mv =>
+      let arg' := map R (loc_result (RTL.fn_sig f)) in
+      do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
+      track_moves mv e1
+  end.
+
+(** The main transfer function for the dataflow analysis.  Like [transfer_aux],
+  it infers the equations that must hold "before" as a function of the
+  equations that must hold "after".  It also handles error propagation
+  and reporting. *)
+
+Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
+                    (pc: node) (after: res eqs) : res eqs :=
+  match after with
+  | Error _ => after
+  | OK e =>
+      match shapes!pc with
+      | None => Error(MSG "At PC " :: POS pc :: MSG ": unmatched block" :: nil)
+      | Some shape =>
+          match transfer_aux f shape e with
+          | None => Error(MSG "At PC " :: POS pc :: MSG ": invalid register allocation" :: nil)
+          | Some e' => OK e'
+          end
+      end
+  end.
+
+(** The semilattice for dataflow analysis.  Operates on analysis results
+  of type [res eqs], that is, either a set of equations or an error
+  message.  Errors correspond to [Top].  Sets of equations are ordered
+  by inclusion. *)
+
+Module LEq <: SEMILATTICE.
+
+  Definition t := res eqs.
+
+  Definition eq (x y: t) :=
+    match x, y with
+    | OK a, OK b => EqSet.Equal a b
+    | Error _, Error _ => True
+    | _, _ => False
+    end.
+
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    intros; destruct x; simpl; auto. red; tauto. 
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; intros; destruct x; destruct y; auto. 
+    red in H; red; intros. rewrite H; tauto.
+  Qed. 
+
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; intros. destruct x; destruct y; try contradiction; destruct z; auto.
+    red in H; red in H0; red; intros. rewrite H. auto. 
+  Qed.
+
+  Definition beq (x y: t) := 
+    match x, y with
+    | OK a, OK b => EqSet.equal a b
+    | Error _, Error _ => true
+    | _, _ => false
+    end.
+
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq; intros. destruct x; destruct y. 
+    apply EqSet.equal_2. auto.
+    discriminate.
+    discriminate.
+    auto.
+  Qed.
+
+  Definition ge (x y: t) := 
+    match x, y with
+    | OK a, OK b => EqSet.Subset b a
+    | Error _, _ => True
+    | _, Error _ => False
+    end.
+
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge, EqSet.Equal, EqSet.Subset; intros. 
+    destruct x; destruct y; auto. intros; rewrite H; auto.
+  Qed.
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge, EqSet.Subset; intros.
+    destruct x; auto; destruct y; try contradiction.
+    destruct z; eauto. 
+  Qed.
+
+  Definition bot: t := OK empty_eqs.
+ 
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot, EqSet.Subset; simpl; intros.
+    destruct x; auto. intros. elim (EqSet.empty_1 H).
+  Qed.
+
+  Program Definition lub (x y: t) : t :=
+    match x, y return _ with
+    | OK a, OK b =>
+        OK (mkeqs (EqSet.union (eqs1 a) (eqs1 b))
+                  (EqSet2.union (eqs2 a) (eqs2 b)) _)
+    | OK _, Error _ => y
+    | Error _, _ => x
+    end.
+  Next Obligation.
+    split; intros. 
+    apply EqSet2.union_1 in H. destruct H; rewrite eqs_same in H. 
+    apply EqSet.union_2; auto. apply EqSet.union_3; auto.
+    apply EqSet.union_1 in H. destruct H; rewrite <- eqs_same in H. 
+    apply EqSet2.union_2; auto. apply EqSet2.union_3; auto.
+  Qed.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold lub, ge, EqSet.Subset; intros. 
+    destruct x; destruct y; auto. 
+    intros; apply EqSet.union_2; auto. 
+  Qed.
+
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold lub, ge, EqSet.Subset; intros. 
+    destruct x; destruct y; auto. 
+    intros; apply EqSet.union_3; auto. 
+  Qed.
+
+End LEq.
+
+(** The backward dataflow solver is an instantiation of Kildall's algorithm. *)
+
+Module DS := Backward_Dataflow_Solver(LEq)(NodeSetBackward).
+
+(** The control-flow graph that the solver operates on is the CFG of
+  block shapes built by the structural check phase.  Here is its notion
+  of successors. *)
+
+Definition successors_block_shape (bsh: block_shape) : list node :=
+  match bsh with
+  | BSnop mv s => s :: nil
+  | BSmove src dst mv s => s :: nil
+  | BSmakelong src1 src2 dst mv s => s :: nil
+  | BSlowlong src dst mv s => s :: nil
+  | BShighlong src dst mv s => s :: nil
+  | BSop op args res mv1 args' res' mv2 s => s :: nil
+  | BSopdead op args res mv s => s :: nil
+  | BSload chunk addr args dst mv1 args' dst' mv2 s => s :: nil
+  | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s => s :: nil
+  | BSload2_1 addr args dst mv1 args' dst' mv2 s => s :: nil
+  | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s => s :: nil
+  | BSloaddead chunk addr args dst mv s => s :: nil
+  | BSstore chunk addr args src mv1 args' src' s => s :: nil
+  | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s => s :: nil
+  | BScall sg ros args res mv1 ros' mv2 s => s :: nil
+  | BStailcall sg ros args mv1 ros' => nil
+  | BSbuiltin ef args res mv1 args' res' mv2 s => s :: nil
+  | BSannot txt typ args res mv1 args' s => s :: nil
+  | BScond cond args mv args' s1 s2 => s1 :: s2 :: nil
+  | BSjumptable arg mv arg' tbl => tbl
+  | BSreturn optarg mv => nil
+  end.
+
+Definition analyze (f: RTL.function) (bsh: PTree.t block_shape) :=
+  DS.fixpoint (PTree.map1 successors_block_shape bsh) (transfer f bsh) nil.
+
+(** * Validating and translating functions and programs *)
+
+(** Checking equations at function entry point.  The RTL function receives
+  its arguments in the list [rparams] of pseudoregisters.  The LTL function
+  receives them in the list [lparams] of locations dictated by the
+  calling conventions, with arguments of type [Tlong] being split in
+  two 32-bit halves.  We check that the equations [e] that must hold
+  at the beginning of the functions are compatible with these calling
+  conventions, in the sense that all equations involving a pseudoreg
+  [r] from [rparams] is of the form [r = l [Full]] or [r = l [Low]]
+  or [r = l [High]], where [l] is the corresponding element of [lparams].
+
+  Note that [e] can contain additional equations [r' = l [kind]]
+  involving pseudoregs [r'] not in [rparams]: these equations are
+  automatically satisfied since the initial value of [r'] is [Vundef]. *)
+
+Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e: eqs)
+                      {struct rparams} : bool :=
+  match rparams, tys, lparams with
+  | nil, nil, nil => true
+  | r1 :: rl, Tlong :: tyl, l1 :: l2 :: ll =>
+      compat_left2 r1 l1 l2 e && compat_entry rl tyl ll e
+  | r1 :: rl, (Tint|Tfloat) :: tyl, l1 :: ll =>
+      compat_left r1 l1 e && compat_entry rl tyl ll e
+  | _, _, _ => false
+  end.
+
+(** Checking the satisfiability of equations inferred at function entry
+  point.  We also check that the RTL and LTL functions agree in signature
+  and stack size. *)
+
+Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function) (e1: eqs) : option unit :=
+  do mv <- pair_entrypoints rtl ltl;
+  do e2 <- track_moves mv e1;
+  assertion (compat_entry (RTL.fn_params rtl)
+                          (sig_args (RTL.fn_sig rtl))
+                          (loc_parameters (RTL.fn_sig rtl)) e2);
+  assertion (can_undef destroyed_at_function_entry e2);
+  assertion (zeq (RTL.fn_stacksize rtl) (LTL.fn_stacksize ltl));
+  assertion (eq_signature (RTL.fn_sig rtl) (LTL.fn_sig ltl));
+  Some tt.
+
+Local Close Scope option_monad_scope.
+Local Open Scope error_monad_scope.
+
+Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
+                            (bsh: PTree.t block_shape)
+                            (a: PMap.t LEq.t): res unit :=
+  do e1 <- transfer rtl bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
+  match check_entrypoints_aux rtl ltl e1 with
+  | None => Error (msg "invalid register allocation at entry point")
+  | Some _ => OK tt
+  end.
+
+(** Putting it all together, this is the validation function for
+  a source RTL function and an LTL function generated by the external
+  register allocator. *)
+
+Definition check_function (rtl: RTL.function) (ltl: LTL.function) : res unit :=
+  let bsh := pair_codes rtl ltl in
+  match analyze rtl bsh with
+  | None => Error (msg "allocation analysis diverges")
+  | Some a => check_entrypoints rtl ltl bsh a
+  end.
 
-(** The translation of a function performs liveness analysis,
-  construction and coloring of the inference graph, and per-instruction
-  transformation as described above. *)
+(** [regalloc] is the external register allocator.  It is written in OCaml
+  in file [backend/Regalloc.ml]. *)
 
-Definition live0 (f: RTL.function) (live: PMap.t Regset.t) :=
-  transfer f f.(RTL.fn_entrypoint) live!!(f.(RTL.fn_entrypoint)).
+Parameter regalloc: RTL.function -> res LTL.function.
 
-Open Scope string_scope.
+(** Register allocation followed by validation. *)
 
 Definition transf_function (f: RTL.function) : res LTL.function :=
   match type_function f with
-  | Error msg => Error msg
-  | OK env =>
-    match analyze f with
-    | None => Error (msg "Liveness analysis failure")
-    | Some live =>
-        match regalloc f live (live0 f live) env with
-        | None => Error (msg "Incorrect graph coloring")
-        | Some assign => OK (transf_fun f live assign)
-        end
-    end
+  | Error m => Error m
+  | OK tyenv =>
+      match regalloc f with
+      | Error m => Error m
+      | OK tf => do x <- check_function f tf; OK tf
+      end
   end.
 
 Definition transf_fundef (fd: RTL.fundef) : res LTL.fundef :=
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 8dfa2d4..76e9744 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -10,403 +10,1335 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Correctness proof for the [Allocation] pass (translation from
+(** Correctness proof for the [Allocation] pass (validated translation from
   RTL to LTL). *)
 
 Require Import FSets.
 Require Import Coqlib.
+Require Import Ordered.
 Require Import Errors.
 Require Import Maps.
+Require Import Lattice.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
 Require Import Memory.
 Require Import Events.
-Require Import Smallstep.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
 Require Import RTLtyping.
-Require Import Liveness.
+Require Import Kildall.
 Require Import Locations.
-Require Import LTL.
 Require Import Conventions.
-Require Import Coloring.
-Require Import Coloringproof.
+Require Import LTL.
 Require Import Allocation.
 
-(** * Properties of allocated locations *)
+(** * Soundness of structural checks *)
 
-(** We list here various properties of the locations [alloc r],
-  where [r] is an RTL pseudo-register and [alloc] is the register
-  assignment returned by [regalloc]. *)
+Definition expand_move (sd: loc * loc) : instruction :=
+  match sd with
+  | (R src, R dst) => Lop Omove (src::nil) dst
+  | (S sl ofs ty, R dst) => Lgetstack sl ofs ty dst
+  | (R src, S sl ofs ty) => Lsetstack src sl ofs ty
+  | (S _ _ _, S _ _ _) => Lreturn    (**r should never happen *)
+  end.
 
-Section REGALLOC_PROPERTIES.
+Definition expand_moves (mv: moves) (k: bblock) : bblock :=
+  List.map expand_move mv ++ k.
 
-Variable f: RTL.function.
-Variable env: regenv.
-Variable live: PMap.t Regset.t.
-Variable alloc: reg -> loc.
-Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc.
+Definition wf_move (sd: loc * loc) : Prop :=
+  match sd with
+  | (S _ _ _, S _ _ _) => False
+  | _ => True
+  end.
 
-Lemma regalloc_noteq_diff:
-  forall r1 l2,
-  alloc r1 <> l2 -> Loc.diff (alloc r1) l2.
+Definition wf_moves (mv: moves) : Prop :=
+  forall sd, In sd mv -> wf_move sd.
+
+Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Prop :=
+  | ebs_nop: forall mv s k,
+      wf_moves mv ->
+      expand_block_shape (BSnop mv s)
+                         (Inop s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_move: forall src dst mv s k,
+      wf_moves mv ->
+      expand_block_shape (BSmove src dst mv s)
+                         (Iop Omove (src :: nil) dst s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_makelong: forall src1 src2 dst mv s k,
+      wf_moves mv ->
+      expand_block_shape (BSmakelong src1 src2 dst mv s)
+                         (Iop Omakelong (src1 :: src2 :: nil) dst s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_lowlong: forall src dst mv s k,
+      wf_moves mv ->
+      expand_block_shape (BSlowlong src dst mv s)
+                         (Iop Olowlong (src :: nil) dst s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_highlong: forall src dst mv s k,
+      wf_moves mv ->
+      expand_block_shape (BShighlong src dst mv s)
+                         (Iop Ohighlong (src :: nil) dst s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_op: forall op args res mv1 args' res' mv2 s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      expand_block_shape (BSop op args res mv1 args' res' mv2 s)
+                         (Iop op args res s)
+                         (expand_moves mv1
+                           (Lop op args' res' :: expand_moves mv2 (Lbranch s :: k)))
+  | ebs_op_dead: forall op args res mv s k,
+      wf_moves mv ->
+      expand_block_shape (BSopdead op args res mv s)
+                         (Iop op args res s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_load: forall chunk addr args dst mv1 args' dst' mv2 s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      expand_block_shape (BSload chunk addr args dst mv1 args' dst' mv2 s)
+                         (Iload chunk addr args dst s)
+                         (expand_moves mv1
+                           (Lload chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
+  | ebs_load2: forall addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s k,
+      wf_moves mv1 -> wf_moves mv2 -> wf_moves mv3 ->
+      offset_addressing addr (Int.repr 4) = Some addr2 ->
+      expand_block_shape (BSload2 addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s)
+                         (Iload Mint64 addr args dst s)
+                         (expand_moves mv1
+                           (Lload Mint32 addr args1' dst1' ::
+                           expand_moves mv2
+                             (Lload Mint32 addr2 args2' dst2' :: 
+                              expand_moves mv3 (Lbranch s :: k))))
+  | ebs_load2_1: forall addr args dst mv1 args' dst' mv2 s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      expand_block_shape (BSload2_1 addr args dst mv1 args' dst' mv2 s)
+                         (Iload Mint64 addr args dst s)
+                         (expand_moves mv1
+                           (Lload Mint32 addr args' dst' ::
+                            expand_moves mv2 (Lbranch s :: k)))
+  | ebs_load2_2: forall addr addr2 args dst mv1 args' dst' mv2 s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      offset_addressing addr (Int.repr 4) = Some addr2 ->
+      expand_block_shape (BSload2_2 addr addr2 args dst mv1 args' dst' mv2 s)
+                         (Iload Mint64 addr args dst s)
+                         (expand_moves mv1
+                           (Lload Mint32 addr2 args' dst' ::
+                            expand_moves mv2 (Lbranch s :: k)))
+  | ebs_load_dead: forall chunk addr args dst mv s k,
+      wf_moves mv ->
+      expand_block_shape (BSloaddead chunk addr args dst mv s)
+                         (Iload chunk addr args dst s)
+                         (expand_moves mv (Lbranch s :: k))
+  | ebs_store: forall chunk addr args src mv1 args' src' s k,
+      wf_moves mv1 ->
+      expand_block_shape (BSstore chunk addr args src mv1 args' src' s)
+                         (Istore chunk addr args src s)
+                         (expand_moves mv1
+                           (Lstore chunk addr args' src' :: Lbranch s :: k))
+  | ebs_store2: forall addr addr2 args src mv1 args1' src1' mv2 args2' src2' s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      offset_addressing addr (Int.repr 4) = Some addr2 ->
+      expand_block_shape (BSstore2 addr addr2 args src mv1 args1' src1' mv2 args2' src2' s)
+                         (Istore Mint64 addr args src s)
+                         (expand_moves mv1
+                           (Lstore Mint32 addr args1' src1' ::
+                            expand_moves mv2
+                             (Lstore Mint32 addr2 args2' src2' ::
+                              Lbranch s :: k)))
+  | ebs_call: forall sg ros args res mv1 ros' mv2 s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      expand_block_shape (BScall sg ros args res mv1 ros' mv2 s)
+                         (Icall sg ros args res s)
+                         (expand_moves mv1
+                           (Lcall sg ros' :: expand_moves mv2 (Lbranch s :: k)))
+  | ebs_tailcall: forall sg ros args mv ros' k,
+      wf_moves mv ->
+      expand_block_shape (BStailcall sg ros args mv ros')
+                         (Itailcall sg ros args)
+                         (expand_moves mv (Ltailcall sg ros' :: k))
+  | ebs_builtin: forall ef args res mv1 args' res' mv2 s k,
+      wf_moves mv1 -> wf_moves mv2 ->
+      expand_block_shape (BSbuiltin ef args res mv1 args' res' mv2 s)
+                         (Ibuiltin ef args res s)
+                         (expand_moves mv1
+                           (Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k)))
+  | ebs_annot: forall txt typ args res mv args' s k,
+      wf_moves mv ->
+      expand_block_shape (BSannot txt typ args res mv args' s)
+                         (Ibuiltin (EF_annot txt typ) args res s)
+                         (expand_moves mv
+                           (Lannot (EF_annot txt typ) args' :: Lbranch s :: k))
+  | ebs_cond: forall cond args mv args' s1 s2 k,
+      wf_moves mv ->
+      expand_block_shape (BScond cond args mv args' s1 s2)
+                         (Icond cond args s1 s2)
+                         (expand_moves mv (Lcond cond args' s1 s2 :: k))
+  | ebs_jumptable: forall arg mv arg' tbl k,
+      wf_moves mv ->
+      expand_block_shape (BSjumptable arg mv arg' tbl)
+                         (Ijumptable arg tbl)
+                         (expand_moves mv (Ljumptable arg' tbl :: k))
+  | ebs_return: forall optarg mv k,
+      wf_moves mv ->
+      expand_block_shape (BSreturn optarg mv)
+                         (Ireturn optarg)
+                         (expand_moves mv (Lreturn :: k)).
+
+Ltac MonadInv :=
+  match goal with
+  | [ H: match ?x with Some _ => _ | None => None end = Some _ |- _ ] =>
+        destruct x as [] eqn:? ; [MonadInv|discriminate]
+  | [ H: match ?x with left _ => _ | right _ => None end = Some _ |- _ ] =>
+        destruct x; [MonadInv|discriminate]
+  | [ H: match negb (proj_sumbool ?x) with true => _ | false => None end = Some _ |- _ ] =>
+        destruct x; [discriminate|simpl in H; MonadInv]
+  | [ H: match negb ?x with true => _ | false => None end = Some _ |- _ ] =>
+        destruct x; [discriminate|simpl in H; MonadInv]
+  | [ H: match ?x with true => _ | false => None end = Some _ |- _ ] =>
+        destruct x as [] eqn:? ; [MonadInv|discriminate]
+  | [ H: match ?x with (_, _) => _ end = Some _ |- _ ] =>
+        destruct x as [] eqn:? ; MonadInv
+  | [ H: Some _ = Some _ |- _ ] =>
+        inv H; MonadInv
+  | [ H: None = Some _ |- _ ] =>
+        discriminate
+  | _ =>
+        idtac
+  end.
+
+Lemma extract_moves_sound:
+  forall b mv b',
+  extract_moves nil b = (mv, b') ->
+  wf_moves mv /\ b = expand_moves mv b'.
 Proof.
-  intros. apply loc_acceptable_noteq_diff. 
-  eapply regalloc_acceptable; eauto.
-  auto.
-Qed.   
+  assert (BASE:
+      forall accu b,
+      wf_moves accu ->
+      wf_moves (List.rev accu) /\ expand_moves (List.rev accu) b = expand_moves (List.rev accu) b).
+   intros; split; auto.
+   red; intros. apply H. rewrite <- in_rev in H0; auto.
+
+  assert (IND: forall b accu mv b',
+          extract_moves accu b = (mv, b') ->
+          wf_moves accu ->
+          wf_moves mv /\ expand_moves (List.rev accu) b = expand_moves mv b').
+    induction b; simpl; intros. 
+    inv H. auto.
+    destruct a; try (inv H; apply BASE; auto; fail).
+    destruct (is_move_operation op args) as [arg|] eqn:E.
+    exploit is_move_operation_correct; eauto. intros [A B]; subst.
+    (* reg-reg move *)
+    exploit IHb; eauto.
+      red; intros. destruct H1; auto. subst sd; exact I.
+    intros [P Q].
+    split; auto. rewrite <- Q. simpl. unfold expand_moves. rewrite map_app. 
+    rewrite app_ass. simpl. auto.
+    inv H; apply BASE; auto.
+    (* stack-reg move *)
+    exploit IHb; eauto.
+      red; intros. destruct H1; auto. subst sd; exact I.
+    intros [P Q].
+    split; auto. rewrite <- Q. simpl. unfold expand_moves. rewrite map_app. 
+    rewrite app_ass. simpl. auto.
+    (* reg-stack move *)
+    exploit IHb; eauto.
+      red; intros. destruct H1; auto. subst sd; exact I.
+    intros [P Q].
+    split; auto. rewrite <- Q. simpl. unfold expand_moves. rewrite map_app. 
+    rewrite app_ass. simpl. auto.
+
+  intros. exploit IND; eauto. red; intros. elim H0. 
+Qed.
+
+Lemma check_succ_sound:
+  forall s b, check_succ s b = true -> exists k, b = Lbranch s :: k.
+Proof.
+  intros. destruct b; simpl in H; try discriminate. 
+  destruct i; try discriminate. 
+  destruct (peq s s0); simpl in H; inv H. exists b; auto.
+Qed.
+
+Ltac UseParsingLemmas :=
+  match goal with
+  | [ H: extract_moves nil _ = (_, _) |- _ ] =>
+      destruct (extract_moves_sound _ _ _ H); clear H; subst; UseParsingLemmas
+  | [ H: check_succ _ _ = true |- _ ] =>
+      try discriminate;
+      destruct (check_succ_sound _ _ H); clear H; subst; UseParsingLemmas
+  | _ => idtac
+  end.
 
-Lemma regalloc_notin_notin:
-  forall r ll,
-  ~(In (alloc r) ll) -> Loc.notin (alloc r) ll.
+Lemma pair_instr_block_sound:
+  forall i b bsh,
+  pair_instr_block i b = Some bsh -> expand_block_shape bsh i b.
 Proof.
-  intros. apply loc_acceptable_notin_notin. 
-  eapply regalloc_acceptable; eauto. auto.
+  intros; destruct i; simpl in H; MonadInv; UseParsingLemmas.
+(* nop *)
+  econstructor; eauto.
+(* op *)
+  destruct (classify_operation o l). 
+  (* move *)
+  MonadInv; UseParsingLemmas. econstructor; eauto. 
+  (* makelong *)
+  MonadInv; UseParsingLemmas. econstructor; eauto.
+  (* lowlong *)
+  MonadInv; UseParsingLemmas. econstructor; eauto.
+  (* highlong *)
+  MonadInv; UseParsingLemmas. econstructor; eauto.
+  (* other ops *)
+  MonadInv. destruct b0.
+  MonadInv; UseParsingLemmas.
+  destruct i; MonadInv; UseParsingLemmas. 
+  eapply ebs_op; eauto.
+  inv H0. eapply ebs_op_dead; eauto.
+(* load *)
+  destruct b0.
+  MonadInv; UseParsingLemmas.
+  destruct i; MonadInv; UseParsingLemmas.
+  destruct (eq_chunk m Mint64).
+  MonadInv; UseParsingLemmas. 
+  destruct b; MonadInv; UseParsingLemmas. destruct i; MonadInv; UseParsingLemmas. 
+  eapply ebs_load2; eauto.
+  destruct (eq_addressing a addr).
+  MonadInv. inv H2. eapply ebs_load2_1; eauto.
+  MonadInv. inv H2. eapply ebs_load2_2; eauto.
+  MonadInv; UseParsingLemmas. eapply ebs_load; eauto.
+  inv H. eapply ebs_load_dead; eauto.
+(* store *)
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas.
+  destruct (eq_chunk m Mint64).
+  MonadInv; UseParsingLemmas.
+  destruct b; MonadInv. destruct i; MonadInv; UseParsingLemmas.
+  eapply ebs_store2; eauto.
+  MonadInv; UseParsingLemmas.
+  eapply ebs_store; eauto.
+(* call *)
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto. 
+(* tailcall *)
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto.
+(* builtin *) 
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas.
+  econstructor; eauto.
+  destruct ef; inv H. econstructor; eauto. 
+(* cond *)
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto.
+(* jumptable *)
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto.
+(* return *)
+  destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto.
 Qed.
 
-Lemma regalloc_notin_notin_2:
-  forall l rl,
-  ~(In l (map alloc rl)) -> Loc.notin l (map alloc rl).
+Lemma matching_instr_block:
+  forall f1 f2 pc bsh i,
+  (pair_codes f1 f2)!pc = Some bsh ->
+  (RTL.fn_code f1)!pc = Some i ->
+  exists b, (LTL.fn_code f2)!pc = Some b /\ expand_block_shape bsh i b.
 Proof.
-  induction rl; simpl; intros. auto. 
-  split. apply Loc.diff_sym. apply regalloc_noteq_diff. tauto.
-  apply IHrl. tauto.
+  intros. unfold pair_codes in H. rewrite PTree.gcombine in H; auto. rewrite H0 in H.
+  destruct (LTL.fn_code f2)!pc as [b|]. 
+  exists b; split; auto. apply pair_instr_block_sound; auto. 
+  discriminate.
 Qed.
-  
-Lemma regalloc_norepet_norepet:
-  forall rl,
-  list_norepet (List.map alloc rl) ->
-  Loc.norepet (List.map alloc rl).
+
+(** * Properties of equations *)
+
+Module ESF := FSetFacts.Facts(EqSet).
+Module ESP := FSetProperties.Properties(EqSet).
+Module ESD := FSetDecide.Decide(EqSet).
+
+Definition sel_val (k: equation_kind) (v: val) : val :=
+  match k with
+  | Full => v
+  | Low => Val.loword v
+  | High => Val.hiword v
+  end.
+
+(** A set of equations [e] is satisfied in a RTL pseudoreg state [rs]
+  and an LTL location state [ls] if, for every equation [r = l [k]] in [e],
+  [sel_val k (rs#r)] (the [k] fragment of [r]'s value in the RTL code)
+  is less defined than [ls l] (the value of [l] in the LTL code). *)
+
+Definition satisf (rs: regset) (ls: locset) (e: eqs) : Prop :=
+  forall q, EqSet.In q e -> Val.lessdef (sel_val (ekind q) rs#(ereg q)) (ls (eloc q)).
+
+Lemma empty_eqs_satisf:
+  forall rs ls, satisf rs ls empty_eqs.
+Proof.
+  unfold empty_eqs; intros; red; intros. ESD.fsetdec.
+Qed.
+
+Lemma satisf_incr:
+  forall rs ls (e1 e2: eqs),
+  satisf rs ls e2 -> EqSet.Subset e1 e2 -> satisf rs ls e1.
 Proof.
-  induction rl; simpl; intros.
-  apply Loc.norepet_nil.
-  inversion H.
-  apply Loc.norepet_cons.
-  eapply regalloc_notin_notin; eauto.
+  unfold satisf; intros. apply H. ESD.fsetdec. 
+Qed.
+
+Lemma satisf_undef_reg:
+  forall rs ls e r,
+  satisf rs ls e ->
+  satisf (rs#r <- Vundef) ls e.
+Proof.
+  intros; red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r); auto.
+  destruct (ekind q); simpl; auto.
+Qed.
+
+Lemma add_equation_lessdef:
+  forall rs ls q e,
+  satisf rs ls (add_equation q e) -> Val.lessdef (sel_val (ekind q) rs#(ereg q)) (ls (eloc q)).
+Proof.
+  intros. apply H. unfold add_equation. simpl. apply EqSet.add_1. auto. 
+Qed.
+
+Lemma add_equation_satisf:
+  forall rs ls q e,
+  satisf rs ls (add_equation q e) -> satisf rs ls e.
+Proof.
+  intros. eapply satisf_incr; eauto. unfold add_equation. simpl. ESD.fsetdec. 
+Qed.
+
+Lemma add_equations_satisf:
+  forall rs ls rl ml e e',
+  add_equations rl ml e = Some e' ->
+  satisf rs ls e' -> satisf rs ls e.
+Proof.
+  induction rl; destruct ml; simpl; intros; MonadInv.
   auto.
+  eapply add_equation_satisf; eauto. 
 Qed.
 
-Lemma regalloc_not_temporary:
-  forall (r: reg), 
-  Loc.notin (alloc r) temporaries.
+Lemma add_equations_lessdef:
+  forall rs ls rl ml e e',
+  add_equations rl ml e = Some e' ->
+  satisf rs ls e' ->
+  Val.lessdef_list (rs##rl) (reglist ls ml).
 Proof.
-  intros. apply temporaries_not_acceptable. 
-  eapply regalloc_acceptable; eauto.
+  induction rl; destruct ml; simpl; intros; MonadInv.
+  constructor.
+  constructor; eauto.
+  apply add_equation_lessdef with (e := e) (q := Eq Full a (R m)).
+  eapply add_equations_satisf; eauto.
 Qed.
 
-Lemma regalloc_disj_temporaries:
-  forall (rl: list reg),
-  Loc.disjoint (List.map alloc rl) temporaries.
+Lemma add_equations_args_satisf:
+  forall rs ls rl tyl ll e e',
+  add_equations_args rl tyl ll e = Some e' ->
+  satisf rs ls e' -> satisf rs ls e.
 Proof.
-  intros. 
-  apply Loc.notin_disjoint. intros.
-  generalize (list_in_map_inv _ _ _ H). intros [r [EQ IN]].
-  subst x. apply regalloc_not_temporary; auto.
+  intros until e'. functional induction (add_equations_args rl tyl ll e); intros.
+  inv H; auto. 
+  eapply add_equation_satisf. eapply add_equation_satisf. eauto. 
+  eapply add_equation_satisf. eauto.
+  eapply add_equation_satisf. eauto.
+  congruence.
 Qed.
 
-End REGALLOC_PROPERTIES. 
+Lemma val_longofwords_eq:
+  forall v,
+  Val.has_type v Tlong ->
+  Val.longofwords (Val.hiword v) (Val.loword v) = v.
+Proof.
+  intros. red in H. destruct v; try contradiction.
+  reflexivity.
+  simpl. rewrite Int64.ofwords_recompose. auto.
+Qed.
 
-(** * Semantic agreement between RTL registers and LTL locations *)
+Lemma add_equations_args_lessdef:
+  forall rs ls rl tyl ll e e',
+  add_equations_args rl tyl ll e = Some e' ->
+  satisf rs ls e' ->
+  Val.has_type_list (rs##rl) tyl ->
+  Val.lessdef_list (rs##rl) (decode_longs tyl (map ls ll)).
+Proof.
+  intros until e'. functional induction (add_equations_args rl tyl ll e); simpl; intros.
+- inv H; auto.
+- destruct H1. constructor; auto. 
+  rewrite <- (val_longofwords_eq (rs#r1)); auto. apply Val.longofwords_lessdef. 
+  eapply add_equation_lessdef with (q := Eq High r1 l1). 
+  eapply add_equation_satisf. eapply add_equations_args_satisf; eauto. 
+  eapply add_equation_lessdef with (q := Eq Low r1 l2). 
+  eapply add_equations_args_satisf; eauto.
+- destruct H1. constructor; auto. 
+  eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto.
+- destruct H1. constructor; auto. 
+  eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto.
+- discriminate.
+Qed.
 
-Require Import LTL.
-Module RegsetP := Properties(Regset).
-
-Section AGREE.
-
-Variable f: RTL.function.
-Variable env: regenv.
-Variable flive: PMap.t Regset.t.
-Variable assign: reg -> loc.
-Hypothesis REGALLOC: regalloc f flive (live0 f flive) env = Some assign.
-
-(** Remember the core of the code transformation performed in module
-  [Allocation]: every reference to register [r] is replaced by
-  a reference to location [assign r].  We will shortly prove
-  the semantic equivalence between the original code and the transformed code.
-  The key tool to do this is the following relation between
-  a register set [rs] in the original RTL program and a location set
-  [ls] in the transformed LTL program.  The two sets agree if
-  they assign identical values to matching registers and locations,
-  that is, the value of register [r] in [rs] is the same as
-  the value of location [assign r] in [ls].  However, this equality
-  needs to hold only for live registers [r].  If [r] is dead at
-  the current point, its value is never used later, hence the value
-  of [assign r] can be arbitrary. *)
-
-Definition agree (live: Regset.t) (rs: regset) (ls: locset) : Prop :=
-  forall (r: reg), Regset.In r live -> rs#r = ls (assign r).
-
-(** What follows is a long list of lemmas expressing properties
-  of the [agree_live_regs] predicate that are useful for the 
-  semantic equivalence proof.  First: two register sets that agree
-  on a given set of live registers also agree on a subset of
-  those live registers. *)
-
-Lemma agree_increasing:
-  forall live1 live2 rs ls,
-  Regset.Subset live2 live1 -> agree live1 rs ls ->
-  agree live2 rs ls.
-Proof.
-  unfold agree; intros. 
-  apply H0. apply H. auto.
-Qed.
-
-Lemma agree_succ:
-  forall n s rs ls live i,
-  analyze f = Some live ->
-  f.(RTL.fn_code)!n = Some i ->
-  In s (RTL.successors_instr i) ->
-  agree live!!n rs ls ->
-  agree (transfer f s live!!s) rs ls.
-Proof.
-  intros.
-  apply agree_increasing with (live!!n).
-  eapply Liveness.analyze_solution; eauto. 
+Lemma add_equation_ros_satisf:
+  forall rs ls ros mos e e',
+  add_equation_ros ros mos e = Some e' ->
+  satisf rs ls e' -> satisf rs ls e.
+Proof.
+  unfold add_equation_ros; intros. destruct ros; destruct mos; MonadInv.
+  eapply add_equation_satisf; eauto.
   auto.
 Qed.
 
-(** Some useful special cases of [agree_increasing]. *)
+Lemma remove_equation_satisf:
+  forall rs ls q e,
+  satisf rs ls e -> satisf rs ls (remove_equation q e).
+Proof.
+  intros. eapply satisf_incr; eauto. unfold remove_equation; simpl. ESD.fsetdec. 
+Qed.
 
-Lemma agree_reg_live:
-  forall r live rs ls,
-  agree (reg_live r live) rs ls -> agree live rs ls.
+Lemma remove_equation_res_satisf:
+  forall rs ls r oty ll e e',
+  remove_equations_res r oty ll e = Some e' ->
+  satisf rs ls e -> satisf rs ls e'.
 Proof.
-  intros. apply agree_increasing with (reg_live r live); auto.
-  red. apply RegsetP.subset_add_2. apply RegsetP.subset_refl.
+  intros. functional inversion H. 
+  apply remove_equation_satisf. apply remove_equation_satisf; auto.
+  apply remove_equation_satisf; auto.
 Qed.
 
-Lemma agree_reg_list_live:
-  forall rl live rs ls,
-  agree (reg_list_live rl live) rs ls -> agree live rs ls.
+Remark select_reg_l_monotone:
+  forall r q1 q2,
+  OrderedEquation.eq q1 q2 \/ OrderedEquation.lt q1 q2 ->
+  select_reg_l r q1 = true -> select_reg_l r q2 = true.
 Proof.
-  induction rl; simpl; intros.
-  assumption. 
-  apply agree_reg_live with a. apply IHrl. assumption.
+  unfold select_reg_l; intros. destruct H.
+  red in H. congruence.
+  rewrite Pos.leb_le in *. red in H. destruct H as [A | [A B]]. 
+  red in A. zify; omega.
+  rewrite <- A; auto.
 Qed.
 
-Lemma agree_reg_sum_live:
-  forall ros live rs ls,
-  agree (reg_sum_live ros live) rs ls -> agree live rs ls.
+Remark select_reg_h_monotone:
+  forall r q1 q2,
+  OrderedEquation.eq q1 q2 \/ OrderedEquation.lt q2 q1 ->
+  select_reg_h r q1 = true -> select_reg_h r q2 = true.
 Proof.
-  intros. destruct ros; simpl in H.
-  apply agree_reg_live with r; auto.
-  auto.  
+  unfold select_reg_h; intros. destruct H.
+  red in H. congruence.
+  rewrite Pos.leb_le in *. red in H. destruct H as [A | [A B]]. 
+  red in A. zify; omega.
+  rewrite A; auto.
 Qed.
 
-(** Agreement over a set of live registers just extended with [r]
-  implies equality of the values of [r] and [assign r]. *)
+Remark select_reg_charact:
+  forall r q, select_reg_l r q = true /\ select_reg_h r q = true <-> ereg q = r.
+Proof.
+  unfold select_reg_l, select_reg_h; intros; split.
+  rewrite ! Pos.leb_le. unfold reg; zify; omega.
+  intros. rewrite H. rewrite ! Pos.leb_refl; auto. 
+Qed.
 
-Lemma agree_eval_reg:
-  forall r live rs ls,
-  agree (reg_live r live) rs ls -> rs#r = ls (assign r).
+Lemma reg_unconstrained_sound:
+  forall r e q,
+  reg_unconstrained r e = true ->
+  EqSet.In q e ->
+  ereg q <> r.
 Proof.
-  intros. apply H. apply Regset.add_1. auto. 
+  unfold reg_unconstrained; intros. red; intros.
+  apply select_reg_charact in H1. 
+  assert (EqSet.mem_between (select_reg_l r) (select_reg_h r) e = true).
+  {
+    apply EqSet.mem_between_2 with q; auto.
+    exact (select_reg_l_monotone r).
+    exact (select_reg_h_monotone r).
+    tauto.
+    tauto.
+  }
+  rewrite H2 in H; discriminate.
 Qed.
 
-(** Same, for a list of registers. *)
+Lemma reg_unconstrained_satisf:
+  forall r e rs ls v,
+  reg_unconstrained r e = true ->
+  satisf rs ls e ->
+  satisf (rs#r <- v) ls e.
+Proof.
+  red; intros. rewrite PMap.gso. auto. eapply reg_unconstrained_sound; eauto. 
+Qed.
 
-Lemma agree_eval_regs:
-  forall rl live rs ls,
-  agree (reg_list_live rl live) rs ls ->
-  rs##rl = List.map ls (List.map assign rl).
+Remark select_loc_l_monotone:
+  forall l q1 q2,
+  OrderedEquation'.eq q1 q2 \/ OrderedEquation'.lt q1 q2 ->
+  select_loc_l l q1 = true -> select_loc_l l q2 = true.
 Proof.
-  induction rl; simpl; intros.
+  unfold select_loc_l; intros. set (lb := OrderedLoc.diff_low_bound l) in *.
+  destruct H.
+  red in H. subst q2; auto. 
+  assert (eloc q1 = eloc q2 \/ OrderedLoc.lt (eloc q1) (eloc q2)).
+    red in H. tauto. 
+  destruct H1. rewrite <- H1; auto. 
+  destruct (OrderedLoc.compare (eloc q2) lb); auto. 
+  assert (OrderedLoc.lt (eloc q1) lb) by (eapply OrderedLoc.lt_trans; eauto). 
+  destruct (OrderedLoc.compare (eloc q1) lb). 
   auto.
-  f_equal.
-  apply agree_eval_reg with live. 
-  apply agree_reg_list_live with rl. auto.
-  eapply IHrl. eexact H.
-Qed.
-
-(** Agreement is insensitive to the current values of the temporary
-  machine registers. *)
-
-Lemma agree_exten:
-  forall live rs ls ls',
-  agree live rs ls ->
-  (forall l, Loc.notin l temporaries -> ls' l = ls l) ->
-  agree live rs ls'.
-Proof.
-  unfold agree; intros. 
-  rewrite H0. apply H. auto. eapply regalloc_not_temporary; eauto.
-Qed.
-
-Lemma agree_undef_temps:
-  forall live rs ls,
-  agree live rs ls -> agree live rs (undef_temps ls).
-Proof.
-  intros. apply agree_exten with ls; auto. 
-  intros. apply Locmap.guo; auto. 
-Qed.
-
-(** If a register is dead, assigning it an arbitrary value in [rs]
-    and leaving [ls] unchanged preserves agreement.  (This corresponds
-    to an operation over a dead register in the original program
-    that is turned into a no-op in the transformed program.) *)
-
-Lemma agree_assign_dead:
-  forall live r rs ls v,
-  ~Regset.In r live ->
-  agree live rs ls ->
-  agree live (rs#r <- v) ls.
-Proof.
-  unfold agree; intros.
-  case (Reg.eq r r0); intro.
-  subst r0. contradiction.
-  rewrite Regmap.gso; auto. 
-Qed.
-
-(** Setting [r] to value [v] in [rs]
-  and simultaneously setting [assign r] to value [v] in [ls]
-  preserves agreement, provided that all live registers except [r]
-  are mapped to locations other than that of [r]. *)
-
-Lemma agree_assign_live:
-  forall live r rs ls v,
-  (forall s,
-     Regset.In s live -> s <> r -> assign s <> assign r) ->
-  agree (reg_dead r live) rs ls ->
-  agree live (rs#r <- v) (Locmap.set (assign r) v ls).
-Proof.
-  unfold agree; intros. rewrite Regmap.gsspec.
-  destruct (peq r0 r).
-  subst r0. rewrite Locmap.gss. auto.
-  rewrite Locmap.gso. apply H0. apply Regset.remove_2; auto.
-  eapply regalloc_noteq_diff. eauto. apply sym_not_equal. apply H. auto.  auto.
-Qed.
-
-(** This is a special case of the previous lemma where the value [v]
-  being stored is not arbitrary, but is the value of
-  another register [arg].  (This corresponds to a register-register move
-  instruction.)  In this case, the condition can be weakened:
-  it suffices that all live registers except [arg] and [res] 
-  are mapped to locations other than that of [res]. *)
-
-Lemma agree_move_live:
-  forall live arg res rs (ls: locset),
-  (forall r,
-     Regset.In r live -> r <> res -> r <> arg -> 
-     assign r <> assign res) ->
-  agree (reg_live arg (reg_dead res live)) rs ls ->
-  agree live (rs#res <- (rs#arg)) (Locmap.set (assign res) (ls (assign arg)) ls).
-Proof.
-  unfold agree; intros. rewrite Regmap.gsspec. destruct (peq r res). 
-  subst r. rewrite Locmap.gss. apply H0.
-  apply Regset.add_1; auto.
-  destruct (Reg.eq r arg).
-  subst r.
-  replace (Locmap.set (assign res) (ls (assign arg)) ls (assign arg))
-     with (ls (assign arg)).
-  apply H0. apply Regset.add_1. auto.
-  symmetry. destruct (Loc.eq (assign arg) (assign res)).
-  rewrite <- e. apply Locmap.gss.
-  apply Locmap.gso. eapply regalloc_noteq_diff; eauto.
-
-  rewrite Locmap.gso. apply H0. apply Regset.add_2. apply Regset.remove_2. auto. auto.
-  eapply regalloc_noteq_diff; eauto. apply sym_not_equal. apply H; auto.
-Qed.
-
-(** Yet another special case corresponding to the case of 
-  a redundant move. *)
-
-Lemma agree_redundant_move_live:
-  forall live arg res rs (ls: locset),
-  (forall r,
-     Regset.In r live -> r <> res -> r <> arg -> 
-     assign r <> assign res) ->
-  agree (reg_live arg (reg_dead res live)) rs ls ->
-  assign res = assign arg ->
-  agree live (rs#res <- (rs#arg)) ls.
+  eelim OrderedLoc.lt_not_eq; eauto.
+  eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans. eexact l1. eexact H2. red; auto.
+Qed.
+
+Remark select_loc_h_monotone:
+  forall l q1 q2,
+  OrderedEquation'.eq q1 q2 \/ OrderedEquation'.lt q2 q1 ->
+  select_loc_h l q1 = true -> select_loc_h l q2 = true.
 Proof.
-  intros. 
-  apply agree_exten with (Locmap.set (assign res) (ls (assign arg)) ls).
-  eapply agree_move_live; eauto. 
-  intros. symmetry. rewrite H1. destruct (Loc.eq l (assign arg)).
-  subst l. apply Locmap.gss.
-  apply Locmap.gso. eapply regalloc_noteq_diff; eauto. 
-Qed.
-
-(** This complicated lemma states agreement between the states after
-  a function call, provided that the states before the call agree
-  and that calling conventions are respected. *)
-
-Lemma agree_postcall:
-  forall live args ros res rs v (ls: locset),
-  (forall r,
-    Regset.In r live -> r <> res ->
-    ~(In (assign r) destroyed_at_call)) ->
-  (forall r,
-    Regset.In r live -> r <> res -> assign r <> assign res) ->
-  agree (reg_list_live args (reg_sum_live ros (reg_dead res live))) rs ls ->
-  agree live (rs#res <- v) (Locmap.set (assign res) v (postcall_locs ls)).
+  unfold select_loc_h; intros. set (lb := OrderedLoc.diff_high_bound l) in *.
+  destruct H.
+  red in H. subst q2; auto. 
+  assert (eloc q2 = eloc q1 \/ OrderedLoc.lt (eloc q2) (eloc q1)).
+    red in H. tauto. 
+  destruct H1. rewrite H1; auto. 
+  destruct (OrderedLoc.compare (eloc q2) lb); auto. 
+  assert (OrderedLoc.lt lb (eloc q1)) by (eapply OrderedLoc.lt_trans; eauto). 
+  destruct (OrderedLoc.compare (eloc q1) lb). 
+  eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans. eexact l1. eexact H2. red; auto.
+  eelim OrderedLoc.lt_not_eq. eexact H2. apply OrderedLoc.eq_sym; auto.
+  auto.
+Qed.
+
+Remark select_loc_charact:
+  forall l q,
+  select_loc_l l q = false \/ select_loc_h l q = false <-> Loc.diff l (eloc q).
 Proof.
-  intros. 
-  assert (agree (reg_dead res live) rs ls).
-  apply agree_reg_sum_live with ros. 
-  apply agree_reg_list_live with args. assumption.
-  red; intros. rewrite Regmap.gsspec. destruct (peq r res). 
-  subst r. rewrite Locmap.gss. auto.
-  rewrite Locmap.gso. transitivity (ls (assign r)). 
-  apply H2. apply Regset.remove_2; auto. 
-  unfold postcall_locs.
-  assert (~In (assign r) temporaries).
-    apply Loc.notin_not_in. eapply regalloc_not_temporary; eauto.
-  assert (~In (assign r) destroyed_at_call).
-    apply H. auto. auto.
-  caseEq (assign r); auto. intros m ASG. rewrite <- ASG. 
-  destruct (In_dec Loc.eq (assign r) temporaries). contradiction.
-  destruct (In_dec Loc.eq (assign r) destroyed_at_call). contradiction.
+  unfold select_loc_l, select_loc_h; intros; split; intros.
+  apply OrderedLoc.outside_interval_diff. 
+  destruct H.
+  left. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_low_bound l)); assumption || discriminate.
+  right. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_high_bound l)); assumption || discriminate.
+  exploit OrderedLoc.diff_outside_interval. eauto. 
+  intros [A | A].
+  left. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_low_bound l)).
+  auto.
+  eelim OrderedLoc.lt_not_eq; eauto. 
+  eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans; eauto. red; auto.
+  right. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_high_bound l)).
+  eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans; eauto. red; auto.
+  eelim OrderedLoc.lt_not_eq; eauto. apply OrderedLoc.eq_sym; auto. 
   auto.
-  eapply regalloc_noteq_diff; eauto. apply sym_not_eq. auto.
 Qed.
 
-(** Agreement between the initial register set at RTL function entry
-  and the location set at LTL function entry. *)
+Lemma loc_unconstrained_sound:
+  forall l e q,
+  loc_unconstrained l e = true ->
+  EqSet.In q e ->
+  Loc.diff l (eloc q).
+Proof.
+  unfold loc_unconstrained; intros. 
+  destruct (select_loc_l l q) eqn:SL.
+  destruct (select_loc_h l q) eqn:SH.
+  assert (EqSet2.mem_between (select_loc_l l) (select_loc_h l) (eqs2 e) = true).
+  {
+    apply EqSet2.mem_between_2 with q; auto.
+    exact (select_loc_l_monotone l).
+    exact (select_loc_h_monotone l).
+    apply eqs_same. auto. 
+  }
+  rewrite H1 in H; discriminate.
+  apply select_loc_charact; auto.
+  apply select_loc_charact; auto.
+Qed.
+
+Lemma loc_unconstrained_satisf:
+  forall rs ls k r l e v,
+  satisf rs ls (remove_equation (Eq k r l) e) ->
+  loc_unconstrained l (remove_equation (Eq k r l) e) = true ->
+  Val.lessdef (sel_val k rs#r) v ->
+  satisf rs (Locmap.set l v ls) e.
+Proof.
+  intros; red; intros. 
+  destruct (OrderedEquation.eq_dec q (Eq k r l)). 
+  subst q; simpl. rewrite Locmap.gss. auto.
+  assert (EqSet.In q (remove_equation (Eq k r l) e)).
+    simpl. ESD.fsetdec.  
+  rewrite Locmap.gso. apply H; auto. eapply loc_unconstrained_sound; eauto. 
+Qed.
+
+Lemma reg_loc_unconstrained_sound:
+  forall r l e q,
+  reg_loc_unconstrained r l e = true ->
+  EqSet.In q e ->
+  ereg q <> r /\ Loc.diff l (eloc q).
+Proof.
+  intros. destruct (andb_prop _ _ H). 
+  split. eapply reg_unconstrained_sound; eauto. eapply loc_unconstrained_sound; eauto.
+Qed.
+
+Lemma parallel_assignment_satisf:
+  forall k r l e rs ls v v',
+  Val.lessdef (sel_val k v) v' ->
+  reg_loc_unconstrained r l (remove_equation (Eq k r l) e) = true ->
+  satisf rs ls (remove_equation (Eq k r l) e) ->
+  satisf (rs#r <- v) (Locmap.set l v' ls) e.
+Proof.
+  intros; red; intros.
+  destruct (OrderedEquation.eq_dec q (Eq k r l)).
+  subst q; simpl. rewrite Regmap.gss; rewrite Locmap.gss; auto.
+  assert (EqSet.In q (remove_equation {| ekind := k; ereg := r; eloc := l |} e)).
+    simpl. ESD.fsetdec.
+  exploit reg_loc_unconstrained_sound; eauto. intros [A B].
+  rewrite Regmap.gso; auto. rewrite Locmap.gso; auto. 
+Qed.
+
+Lemma parallel_assignment_satisf_2:
+  forall rs ls res res' oty e e' v v',
+  remove_equations_res res oty res' e = Some e' ->
+  satisf rs ls e' ->
+  reg_unconstrained res e' = true ->
+  forallb (fun l => loc_unconstrained l e') res' = true ->
+  Val.lessdef v v' ->
+  satisf (rs#res <- v) (Locmap.setlist res' (encode_long oty v') ls) e.
+Proof.
+  intros; red; intros.
+  functional inversion H.
+- (* Two 32-bit halves *)
+  subst.
+  set (e' := remove_equation {| ekind := Low; ereg := res; eloc := l2 |}
+          (remove_equation {| ekind := High; ereg := res; eloc := l1 |} e)) in *.
+  simpl in H2. InvBooleans. simpl.
+  destruct (OrderedEquation.eq_dec q (Eq Low res l2)).
+  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss.
+  apply Val.loword_lessdef; auto. 
+  destruct (OrderedEquation.eq_dec q (Eq High res l1)).
+  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto. rewrite Locmap.gss.
+  apply Val.hiword_lessdef; auto.
+  assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec. 
+  rewrite Regmap.gso. rewrite ! Locmap.gso. auto. 
+  eapply loc_unconstrained_sound; eauto.
+  eapply loc_unconstrained_sound; eauto.
+  eapply reg_unconstrained_sound; eauto.
+- (* One location *)
+  subst. simpl in H2. InvBooleans.
+  replace (encode_long oty v') with (v' :: nil).
+  set (e' := remove_equation {| ekind := Full; ereg := res; eloc := l1 |} e) in *.
+  destruct (OrderedEquation.eq_dec q (Eq Full res l1)). 
+  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss. auto.
+  assert (EqSet.In q e'). unfold e', remove_equation; simpl. ESD.fsetdec. 
+  simpl. rewrite Regmap.gso. rewrite Locmap.gso. auto.
+  eapply loc_unconstrained_sound; eauto.
+  eapply reg_unconstrained_sound; eauto.
+  destruct oty as [[]|]; reflexivity || contradiction.
+Qed.
+
+Lemma in_subst_reg:
+  forall r1 r2 q (e: eqs),
+  EqSet.In q e ->
+  ereg q = r1 /\ EqSet.In (Eq (ekind q) r2 (eloc q)) (subst_reg r1 r2 e)
+  \/ ereg q <> r1 /\ EqSet.In q (subst_reg r1 r2 e).
+Proof.
+  intros r1 r2 q e0 IN0. unfold subst_reg.
+  set (f := fun (q: EqSet.elt) e => add_equation (Eq (ekind q) r2 (eloc q)) (remove_equation q e)).
+  set (elt := EqSet.elements_between (select_reg_l r1) (select_reg_h r1) e0).
+  assert (IN_ELT: forall q, EqSet.In q elt <-> EqSet.In q e0 /\ ereg q = r1).
+  {
+    intros. unfold elt. rewrite EqSet.elements_between_iff.
+    rewrite select_reg_charact. tauto. 
+    exact (select_reg_l_monotone r1).
+    exact (select_reg_h_monotone r1).
+  }
+  set (P := fun e1 e2 =>
+         EqSet.In q e1 ->
+         EqSet.In (Eq (ekind q) r2 (eloc q)) e2).
+  assert (P elt (EqSet.fold f elt e0)).
+  {
+    apply ESP.fold_rec; unfold P; intros.
+    - ESD.fsetdec.
+    - simpl. red in H1. apply H1 in H3. destruct H3. 
+      + subst x. ESD.fsetdec. 
+      + rewrite ESF.add_iff. rewrite ESF.remove_iff. 
+        destruct (OrderedEquation.eq_dec x {| ekind := ekind q; ereg := r2; eloc := eloc q |}); auto.
+        left. subst x; auto. 
+  }
+  set (Q := fun e1 e2 =>
+         ~EqSet.In q e1 ->
+         EqSet.In q e2).
+  assert (Q elt (EqSet.fold f elt e0)).
+  {
+    apply ESP.fold_rec; unfold Q; intros.
+    - auto.
+    - simpl. red in H2. rewrite H2 in H4.
+      rewrite ESF.add_iff. rewrite ESF.remove_iff.
+      right. split. apply H3. tauto. tauto. 
+  }
+  destruct (ESP.In_dec q elt).
+  left. split. apply IN_ELT. auto. apply H. auto.
+  right. split. red; intros. elim n. rewrite IN_ELT. auto. apply H0. auto. 
+Qed.
+
+Lemma subst_reg_satisf:
+  forall src dst rs ls e,
+  satisf rs ls (subst_reg dst src e) ->
+  satisf (rs#dst <- (rs#src)) ls e.
+Proof.
+  intros; red; intros.
+  destruct (in_subst_reg dst src q e H0) as [[A B] | [A B]].
+  subst dst. rewrite Regmap.gss. exploit H; eauto.
+  rewrite Regmap.gso; auto.
+Qed.
+
+Lemma in_subst_reg_kind:
+  forall r1 k1 r2 k2 q (e: eqs),
+  EqSet.In q e ->
+  (ereg q, ekind q) = (r1, k1) /\ EqSet.In (Eq k2 r2 (eloc q)) (subst_reg_kind r1 k1 r2 k2 e)
+  \/ EqSet.In q (subst_reg_kind r1 k1 r2 k2 e).
+Proof.
+  intros r1 k1 r2 k2 q e0 IN0. unfold subst_reg.
+  set (f := fun (q: EqSet.elt) e =>
+      if IndexedEqKind.eq (ekind q) k1
+      then add_equation (Eq k2 r2 (eloc q)) (remove_equation q e)
+      else e).
+  set (elt := EqSet.elements_between (select_reg_l r1) (select_reg_h r1) e0).
+  assert (IN_ELT: forall q, EqSet.In q elt <-> EqSet.In q e0 /\ ereg q = r1).
+  {
+    intros. unfold elt. rewrite EqSet.elements_between_iff.
+    rewrite select_reg_charact. tauto. 
+    exact (select_reg_l_monotone r1).
+    exact (select_reg_h_monotone r1).
+  }
+  set (P := fun e1 e2 =>
+         EqSet.In q e1 -> ekind q = k1 ->
+         EqSet.In (Eq k2 r2 (eloc q)) e2).
+  assert (P elt (EqSet.fold f elt e0)).
+  {
+    intros; apply ESP.fold_rec; unfold P; intros.
+    - ESD.fsetdec.
+    - simpl. red in H1. apply H1 in H3. destruct H3. 
+      + subst x. unfold f. destruct (IndexedEqKind.eq (ekind q) k1).
+        simpl. ESD.fsetdec. contradiction.
+      + unfold f. destruct (IndexedEqKind.eq (ekind x) k1).
+        simpl. rewrite ESF.add_iff. rewrite ESF.remove_iff. 
+        destruct (OrderedEquation.eq_dec x {| ekind := k2; ereg := r2; eloc := eloc q |}); auto.
+        left. subst x; auto.
+        auto. 
+  }
+  set (Q := fun e1 e2 =>
+         ~EqSet.In q e1 \/ ekind q <> k1 ->
+         EqSet.In q e2).
+  assert (Q elt (EqSet.fold f elt e0)).
+  {
+    apply ESP.fold_rec; unfold Q; intros.
+    - auto.
+    - unfold f. red in H2. rewrite H2 in H4.
+      destruct (IndexedEqKind.eq (ekind x) k1).
+      simpl. rewrite ESF.add_iff. rewrite ESF.remove_iff.
+      right. split. apply H3. tauto. intuition congruence. 
+      apply H3. intuition. 
+  }
+  destruct (ESP.In_dec q elt).
+  destruct (IndexedEqKind.eq (ekind q) k1).
+  left. split. f_equal. apply IN_ELT. auto. auto. apply H. auto. auto.
+  right. apply H0. auto.
+  right. apply H0. auto.
+Qed.
+
+Lemma subst_reg_kind_satisf_makelong:
+  forall src1 src2 dst rs ls e,
+  let e1 := subst_reg_kind dst High src1 Full e in
+  let e2 := subst_reg_kind dst Low src2 Full e1 in
+  reg_unconstrained dst e2 = true ->
+  satisf rs ls e2 ->
+  satisf (rs#dst <- (Val.longofwords rs#src1 rs#src2)) ls e.
+Proof.
+  intros; red; intros.
+  destruct (in_subst_reg_kind dst High src1 Full q e H1) as [[A B] | B]; fold e1 in B.
+  destruct (in_subst_reg_kind dst Low src2 Full _ e1 B) as [[C D] | D]; fold e2 in D.
+  simpl in C; simpl in D. inv C.
+  inversion A. rewrite H3; rewrite H4. rewrite Regmap.gss.
+  apply Val.lessdef_trans with (rs#src1). 
+  simpl. destruct (rs#src1); simpl; auto. destruct (rs#src2); simpl; auto. 
+  rewrite Int64.hi_ofwords. auto.
+  exploit H0. eexact D. simpl. auto. 
+  destruct (in_subst_reg_kind dst Low src2 Full q e1 B) as [[C D] | D]; fold e2 in D.
+  inversion C. rewrite H3; rewrite H4. rewrite Regmap.gss. 
+  apply Val.lessdef_trans with (rs#src2). 
+  simpl. destruct (rs#src1); simpl; auto. destruct (rs#src2); simpl; auto. 
+  rewrite Int64.lo_ofwords. auto.
+  exploit H0. eexact D. simpl. auto.
+  rewrite Regmap.gso. apply H0; auto. eapply reg_unconstrained_sound; eauto. 
+Qed.
+
+Lemma subst_reg_kind_satisf_lowlong:
+  forall src dst rs ls e,
+  let e1 := subst_reg_kind dst Full src Low e in
+  reg_unconstrained dst e1 = true ->
+  satisf rs ls e1 ->
+  satisf (rs#dst <- (Val.loword rs#src)) ls e.
+Proof.
+  intros; red; intros.
+  destruct (in_subst_reg_kind dst Full src Low q e H1) as [[A B] | B]; fold e1 in B.
+  inversion A. rewrite H3; rewrite H4. simpl. rewrite Regmap.gss. 
+  exploit H0. eexact B. simpl. auto. 
+  rewrite Regmap.gso. apply H0; auto. eapply reg_unconstrained_sound; eauto.
+Qed.
+
+Lemma subst_reg_kind_satisf_highlong:
+  forall src dst rs ls e,
+  let e1 := subst_reg_kind dst Full src High e in
+  reg_unconstrained dst e1 = true ->
+  satisf rs ls e1 ->
+  satisf (rs#dst <- (Val.hiword rs#src)) ls e.
+Proof.
+  intros; red; intros.
+  destruct (in_subst_reg_kind dst Full src High q e H1) as [[A B] | B]; fold e1 in B.
+  inversion A. rewrite H3; rewrite H4. simpl. rewrite Regmap.gss. 
+  exploit H0. eexact B. simpl. auto. 
+  rewrite Regmap.gso. apply H0; auto. eapply reg_unconstrained_sound; eauto.
+Qed.
+
+Module ESF2 := FSetFacts.Facts(EqSet2).
+Module ESP2 := FSetProperties.Properties(EqSet2).
+Module ESD2 := FSetDecide.Decide(EqSet2).
+
+Lemma in_subst_loc:
+  forall l1 l2 q (e e': eqs),
+  EqSet.In q e ->
+  subst_loc l1 l2 e = Some e' ->
+  (eloc q = l1 /\ EqSet.In (Eq (ekind q) (ereg q) l2) e') \/ (Loc.diff l1 (eloc q) /\ EqSet.In q e').
+Proof.
+  intros l1 l2 q e0 e0'.
+  unfold subst_loc. 
+  set (f := fun (q0 : EqSet2.elt) (opte : option eqs) =>
+   match opte with
+   | Some e =>
+       if Loc.eq l1 (eloc q0)
+       then
+        Some
+          (add_equation {| ekind := ekind q0; ereg := ereg q0; eloc := l2 |}
+             (remove_equation q0 e))
+       else None
+   | None => None
+   end).
+  set (elt := EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e0)).
+  intros IN SUBST.
+  set (P := fun e1 (opte: option eqs) =>
+          match opte with
+          | None => True
+          | Some e2 =>
+              EqSet2.In q e1 ->
+              eloc q = l1 /\ EqSet.In (Eq (ekind q) (ereg q) l2) e2
+          end).
+  assert (P elt (EqSet2.fold f elt (Some e0))).
+  {
+    apply ESP2.fold_rec; unfold P; intros.
+    - ESD2.fsetdec. 
+    - destruct a as [e2|]; simpl; auto. 
+      destruct (Loc.eq l1 (eloc x)); auto.
+      unfold add_equation, remove_equation; simpl.
+      red in H1. rewrite H1. intros [A|A]. 
+      + subst x. split. auto. ESD.fsetdec.
+      + exploit H2; eauto. intros [B C]. split. auto.
+        rewrite ESF.add_iff. rewrite ESF.remove_iff. 
+        destruct (OrderedEquation.eq_dec x {| ekind := ekind q; ereg := ereg q; eloc := l2 |}).
+        left. rewrite e1; auto. 
+        right; auto. 
+  }
+  set (Q := fun e1 (opte: option eqs) =>
+          match opte with
+          | None => True
+          | Some e2 => ~EqSet2.In q e1 -> EqSet.In q e2
+          end).
+  assert (Q elt (EqSet2.fold f elt (Some e0))).
+  {
+    apply ESP2.fold_rec; unfold Q; intros.
+    - auto. 
+    - destruct a as [e2|]; simpl; auto. 
+      destruct (Loc.eq l1 (eloc x)); auto. 
+      red in H2. rewrite H2; intros. 
+      unfold add_equation, remove_equation; simpl.
+      rewrite ESF.add_iff. rewrite ESF.remove_iff.
+      right; split. apply H3. tauto. tauto.
+  }
+  rewrite SUBST in H; rewrite SUBST in H0; simpl in *. 
+  destruct (ESP2.In_dec q elt). 
+  left. apply H; auto.
+  right. split; auto. 
+  rewrite <- select_loc_charact.
+  destruct (select_loc_l l1 q) eqn: LL; auto.
+  destruct (select_loc_h l1 q) eqn: LH; auto.
+  elim n. eapply EqSet2.elements_between_iff. 
+  exact (select_loc_l_monotone l1).
+  exact (select_loc_h_monotone l1).
+  split. apply eqs_same; auto. auto. 
+Qed.
+
+Lemma subst_loc_satisf:
+  forall src dst rs ls e e',
+  subst_loc dst src e = Some e' ->
+  satisf rs ls e' ->
+  satisf rs (Locmap.set dst (ls src) ls) e.
+Proof.
+  intros; red; intros.
+  exploit in_subst_loc; eauto. intros [[A B] | [A B]].
+  subst dst. rewrite Locmap.gss. apply (H0 _ B). 
+  rewrite Locmap.gso; auto.
+Qed.
+
+Lemma can_undef_sound:
+  forall e ml q,
+  can_undef ml e = true -> EqSet.In q e -> Loc.notin (eloc q) (map R ml).
+Proof.
+  induction ml; simpl; intros.
+  tauto.
+  InvBooleans. split.
+  apply Loc.diff_sym. eapply loc_unconstrained_sound; eauto. 
+  eauto.
+Qed.
+
+Lemma undef_regs_outside:
+  forall ml ls l,
+  Loc.notin l (map R ml) -> undef_regs ml ls l = ls l.
+Proof.
+  induction ml; simpl; intros. auto.
+  rewrite Locmap.gso. apply IHml. tauto. apply Loc.diff_sym. tauto. 
+Qed.
+
+Lemma can_undef_satisf:
+  forall ml e rs ls,
+  can_undef ml e = true ->
+  satisf rs ls e ->
+  satisf rs (undef_regs ml ls) e.
+Proof.
+  intros; red; intros. rewrite undef_regs_outside. eauto.
+  eapply can_undef_sound; eauto.
+Qed.
+
+Lemma can_undef_except_sound:
+  forall lx e ml q,
+  can_undef_except lx ml e = true -> EqSet.In q e -> Loc.diff (eloc q) lx -> Loc.notin (eloc q) (map R ml).
+Proof.
+  induction ml; simpl; intros.
+  tauto.
+  InvBooleans. split.
+  destruct (orb_true_elim _ _ H2). 
+  apply proj_sumbool_true in e0. congruence.
+  apply Loc.diff_sym. eapply loc_unconstrained_sound; eauto. 
+  eapply IHml; eauto.
+Qed.
+
+Lemma subst_loc_undef_satisf:
+  forall src dst rs ls ml e e',
+  subst_loc dst src e = Some e' ->
+  can_undef_except dst ml e = true ->
+  satisf rs ls e' ->
+  satisf rs (Locmap.set dst (ls src) (undef_regs ml ls)) e.
+Proof.
+  intros; red; intros.
+  exploit in_subst_loc; eauto. intros [[A B] | [A B]].
+  rewrite A. rewrite Locmap.gss. apply (H1 _ B). 
+  rewrite Locmap.gso; auto. rewrite undef_regs_outside. eauto. 
+  eapply can_undef_except_sound; eauto. apply Loc.diff_sym; auto.
+Qed.
+
+Lemma transfer_use_def_satisf:
+  forall args res args' res' und e e' rs ls,
+  transfer_use_def args res args' res' und e = Some e' ->
+  satisf rs ls e' ->
+  Val.lessdef_list rs##args (reglist ls args') /\
+  (forall v v', Val.lessdef v v' ->
+    satisf (rs#res <- v) (Locmap.set (R res') v' (undef_regs und ls)) e).
+Proof.
+  unfold transfer_use_def; intros. MonadInv. 
+  split. eapply add_equations_lessdef; eauto.
+  intros. eapply parallel_assignment_satisf; eauto. assumption. 
+  eapply can_undef_satisf; eauto. 
+  eapply add_equations_satisf; eauto. 
+Qed.
+
+Lemma add_equations_res_lessdef:
+  forall r oty ll e e' rs ls,
+  add_equations_res r oty ll e = Some e' ->
+  satisf rs ls e' ->
+  Val.lessdef_list (encode_long oty rs#r) (map ls ll).
+Proof.
+  intros. functional inversion H.
+- subst. simpl. constructor. 
+  eapply add_equation_lessdef with (q := Eq High r l1).
+  eapply add_equation_satisf. eauto. 
+  constructor.
+  eapply add_equation_lessdef with (q := Eq Low r l2). eauto.
+  constructor.
+- subst. replace (encode_long oty rs#r) with (rs#r :: nil). simpl. constructor; auto.
+  eapply add_equation_lessdef with (q := Eq Full r l1); eauto.
+  destruct oty as [[]|]; reflexivity || contradiction.
+Qed.
+
+Definition callee_save_loc (l: loc) :=
+  match l with
+  | R r => ~(In r destroyed_at_call)
+  | S sl ofs ty => sl <> Outgoing
+  end.
+
+Definition agree_callee_save (ls1 ls2: locset) : Prop :=
+  forall l, callee_save_loc l -> ls1 l = ls2 l.
+
+Lemma return_regs_agree_callee_save:
+  forall caller callee,
+  agree_callee_save caller (return_regs caller callee).
+Proof.
+  intros; red; intros. unfold return_regs. red in H. 
+  destruct l.
+  rewrite pred_dec_false; auto. 
+  destruct sl; auto || congruence. 
+Qed.
+
+Lemma no_caller_saves_sound:
+  forall e q,
+  no_caller_saves e = true ->
+  EqSet.In q e ->
+  callee_save_loc (eloc q).
+Proof.
+  unfold no_caller_saves, callee_save_loc; intros.
+  exploit EqSet.for_all_2; eauto.
+  hnf. intros. simpl in H1. rewrite H1. auto.
+  lazy beta. destruct (eloc q). 
+  intros; red; intros. destruct (orb_true_elim _ _ H1); InvBooleans.
+  eapply int_callee_save_not_destroyed; eauto.
+  apply index_int_callee_save_pos2. omega.
+  eapply float_callee_save_not_destroyed; eauto.
+  apply index_float_callee_save_pos2. omega.
+  destruct sl; congruence.
+Qed.
+
+Lemma function_return_satisf:
+  forall rs ls_before ls_after res res' sg e e' v,
+  res' = map R (loc_result sg) ->
+  remove_equations_res res (sig_res sg) res' e = Some e' ->
+  satisf rs ls_before e' ->
+  forallb (fun l => reg_loc_unconstrained res l e') res' = true ->
+  no_caller_saves e' = true ->
+  Val.lessdef_list (encode_long (sig_res sg) v) (map ls_after res') ->
+  agree_callee_save ls_before ls_after ->
+  satisf (rs#res <- v) ls_after e.
+Proof.
+  intros; red; intros.
+  functional inversion H0.
+- subst. rewrite <- H11 in *. unfold encode_long in H4. rewrite <- H7 in H4. 
+  simpl in H4. inv H4. inv H14. 
+  set (e' := remove_equation {| ekind := Low; ereg := res; eloc := l2 |}
+          (remove_equation {| ekind := High; ereg := res; eloc := l1 |} e)) in *.
+  simpl in H2. InvBooleans.
+  destruct (OrderedEquation.eq_dec q (Eq Low res l2)).
+  subst q; simpl. rewrite Regmap.gss. auto.
+  destruct (OrderedEquation.eq_dec q (Eq High res l1)).
+  subst q; simpl. rewrite Regmap.gss. auto.
+  assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec. 
+  exploit reg_loc_unconstrained_sound. eexact H. eauto. intros [A B].
+  exploit reg_loc_unconstrained_sound. eexact H2. eauto. intros [C D].
+  rewrite Regmap.gso; auto.
+  exploit no_caller_saves_sound; eauto. intros.
+  red in H5. rewrite <- H5; auto. 
+- subst. rewrite <- H11 in *.
+  replace (encode_long (sig_res sg) v) with (v :: nil) in H4.
+  simpl in H4. inv H4.
+  simpl in H2. InvBooleans.
+  set (e' := remove_equation {| ekind := Full; ereg := res; eloc := l1 |} e) in *.
+  destruct (OrderedEquation.eq_dec q (Eq Full res l1)). 
+  subst q; simpl. rewrite Regmap.gss; auto. 
+  assert (EqSet.In q e'). unfold e', remove_equation; simpl. ESD.fsetdec. 
+  exploit reg_loc_unconstrained_sound; eauto. intros [A B].
+  rewrite Regmap.gso; auto.
+  exploit no_caller_saves_sound; eauto. intros.
+  red in H5. rewrite <- H5; auto.
+  destruct (sig_res sg) as [[]|]; reflexivity || contradiction.
+Qed.
 
-Lemma agree_init_regs:
-  forall live rl vl,
-  (forall r1 r2,
-    In r1 rl -> Regset.In r2 live -> r1 <> r2 ->
-    assign r1 <> assign r2) ->
-  agree live (RTL.init_regs vl rl) 
-             (LTL.init_locs vl (List.map assign rl)).
+Lemma compat_left_sound:
+  forall r l e q,
+  compat_left r l e = true -> EqSet.In q e -> ereg q = r -> ekind q = Full /\ eloc q = l.
 Proof.
-  intro live.
-  assert (agree live (Regmap.init Vundef) (Locmap.init Vundef)).
-    red; intros. rewrite Regmap.gi. auto.
-  induction rl; simpl; intros.
+  unfold compat_left; intros.
+  rewrite EqSet.for_all_between_iff in H. 
+  apply select_reg_charact in H1. destruct H1. 
+  exploit H; eauto. intros. 
+  destruct (ekind q); try discriminate. 
+  destruct (Loc.eq l (eloc q)); try discriminate. 
   auto.
-  destruct vl. auto.
-  assert (agree live (init_regs vl rl) (init_locs vl (map assign rl))).
-  apply IHrl. intros. apply H0. tauto. auto. auto.
-  red; intros. rewrite Regmap.gsspec. destruct (peq r a). 
-  subst r. rewrite Locmap.gss. auto.
-  rewrite Locmap.gso. apply H1; auto. 
-  eapply regalloc_noteq_diff; eauto. 
+  intros. subst x2. auto.
+  exact (select_reg_l_monotone r).
+  exact (select_reg_h_monotone r).
 Qed.
 
-Lemma agree_parameters:
-  forall vl,
-  let params := f.(RTL.fn_params) in
-  agree (live0 f flive)
-        (RTL.init_regs vl params) 
-        (LTL.init_locs vl (List.map assign params)).
+Lemma compat_left2_sound:
+  forall r l1 l2 e q,
+  compat_left2 r l1 l2 e = true -> EqSet.In q e -> ereg q = r ->
+  (ekind q = High /\ eloc q = l1) \/ (ekind q = Low /\ eloc q = l2).
 Proof.
-  intros. apply agree_init_regs. 
-  intros. eapply regalloc_correct_3; eauto.
+  unfold compat_left2; intros.
+  rewrite EqSet.for_all_between_iff in H. 
+  apply select_reg_charact in H1. destruct H1. 
+  exploit H; eauto. intros. 
+  destruct (ekind q); try discriminate. 
+  InvBooleans. auto.
+  InvBooleans. auto.
+  intros. subst x2. auto.
+  exact (select_reg_l_monotone r).
+  exact (select_reg_h_monotone r).
 Qed.
 
-End AGREE.
+Lemma val_hiword_longofwords:
+  forall v1 v2, Val.lessdef (Val.hiword (Val.longofwords v1 v2)) v1.
+Proof.
+  intros. destruct v1; simpl; auto. destruct v2; auto. unfold Val.hiword.
+  rewrite Int64.hi_ofwords. auto.
+Qed.
 
-(** * Preservation of semantics *)
+Lemma val_loword_longofwords:
+  forall v1 v2, Val.lessdef (Val.loword (Val.longofwords v1 v2)) v2.
+Proof.
+  intros. destruct v1; simpl; auto. destruct v2; auto. unfold Val.loword.
+  rewrite Int64.lo_ofwords. auto.
+Qed.
 
-(** We now show that the LTL code reflecting register allocation has
-  the same semantics as the original RTL code.  We start with
-  standard properties of translated functions and 
-  global environments in the original and translated code. *)
+Lemma compat_entry_satisf:
+  forall rl tyl ll e,
+  compat_entry rl tyl ll e = true ->
+  forall vl ls,
+  Val.lessdef_list vl (decode_longs tyl (map ls ll)) ->
+  satisf (init_regs vl rl) ls e.
+Proof.
+  intros until e. functional induction (compat_entry rl tyl ll e); intros. 
+- (* no params *)
+  simpl. red; intros. rewrite Regmap.gi. destruct (ekind q); simpl; auto.
+- (* a param of type Tlong *)
+  InvBooleans. simpl in H0. inv H0. simpl.
+  red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). 
+  exploit compat_left2_sound; eauto.
+  intros [[A B] | [A B]]; rewrite A; rewrite B; simpl.
+  apply Val.lessdef_trans with (Val.hiword (Val.longofwords (ls l1) (ls l2))).
+  apply Val.hiword_lessdef; auto. apply val_hiword_longofwords.
+  apply Val.lessdef_trans with (Val.loword (Val.longofwords (ls l1) (ls l2))).
+  apply Val.loword_lessdef; auto. apply val_loword_longofwords.
+  eapply IHb; eauto.
+- (* a param of type Tint *)
+  InvBooleans. simpl in H0. inv H0. simpl.
+  red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). 
+  exploit compat_left_sound; eauto. intros [A B]. rewrite A; rewrite B; auto.
+  eapply IHb; eauto.
+- (* a param of type Tfloat *)
+  InvBooleans. simpl in H0. inv H0. simpl.
+  red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). 
+  exploit compat_left_sound; eauto. intros [A B]. rewrite A; rewrite B; auto.
+  eapply IHb; eauto.
+- (* error case *)
+  discriminate.
+Qed.
+
+Lemma call_regs_param_values:
+  forall sg ls,
+  map (call_regs ls) (loc_parameters sg) = map ls (loc_arguments sg).
+Proof.
+  intros. unfold loc_parameters. rewrite list_map_compose. 
+  apply list_map_exten; intros. unfold call_regs, parameter_of_argument.
+  exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable. 
+  destruct x; auto. destruct sl; tauto. 
+Qed.
+
+Lemma return_regs_arg_values:
+  forall sg ls1 ls2,
+  tailcall_is_possible sg = true ->
+  map (return_regs ls1 ls2) (loc_arguments sg) = map ls2 (loc_arguments sg).
+Proof.
+  intros. apply list_map_exten; intros. 
+  exploit loc_arguments_acceptable; eauto.
+  exploit tailcall_is_possible_correct; eauto. 
+  unfold loc_argument_acceptable, return_regs.
+  destruct x; intros.
+  rewrite pred_dec_true; auto. 
+  contradiction.
+Qed.
+
+Lemma find_function_tailcall:
+  forall tge ros ls1 ls2,
+  ros_compatible_tailcall ros = true ->
+  find_function tge ros (return_regs ls1 ls2) = find_function tge ros ls2.
+Proof.
+  unfold ros_compatible_tailcall, find_function; intros. 
+  destruct ros as [r|id]; auto.
+  unfold return_regs. destruct (in_dec mreg_eq r destroyed_at_call); simpl in H.
+  auto. congruence.
+Qed.
+
+(** * Properties of the dataflow analysis *)
+
+Lemma analyze_successors:
+  forall f bsh an pc bs s e,
+  analyze f bsh = Some an ->
+  bsh!pc = Some bs ->
+  In s (successors_block_shape bs) ->
+  an!!pc = OK e ->
+  exists e', transfer f bsh s an!!s = OK e' /\ EqSet.Subset e' e.
+Proof.
+  unfold analyze; intros. exploit DS.fixpoint_solution; eauto.
+  instantiate (1 := pc). instantiate (1 := s). 
+  unfold successors_list. rewrite PTree.gmap1. rewrite H0. simpl. auto. 
+  rewrite H2. unfold DS.L.ge. destruct (transfer f bsh s an#s); intros.
+  exists e0; auto. 
+  contradiction.
+Qed.
+
+Lemma satisf_successors:
+  forall f bsh an pc bs s e rs ls,
+  analyze f bsh = Some an ->
+  bsh!pc = Some bs ->
+  In s (successors_block_shape bs) ->
+  an!!pc = OK e ->
+  satisf rs ls e ->
+  exists e', transfer f bsh s an!!s = OK e' /\ satisf rs ls e'.
+Proof.
+  intros. exploit analyze_successors; eauto. intros [e' [A B]]. 
+  exists e'; split; auto. eapply satisf_incr; eauto.
+Qed.
+
+(** Inversion on [transf_function] *)
+
+Inductive transf_function_spec (f: RTL.function) (tf: LTL.function) : Prop :=
+  | transf_function_spec_intro:  
+  forall tyenv an mv k e1 e2,
+  wt_function f tyenv ->
+  analyze f (pair_codes f tf) = Some an ->
+  (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) ->
+  wf_moves mv ->
+  transfer f (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
+  track_moves mv e1 = Some e2 ->
+  compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true ->
+  can_undef destroyed_at_function_entry e2 = true ->
+  RTL.fn_stacksize f = LTL.fn_stacksize tf ->
+  RTL.fn_sig f = LTL.fn_sig tf ->
+  transf_function_spec f tf.
+
+Lemma transf_function_inv:
+  forall f tf,
+  transf_function f = OK tf ->
+  transf_function_spec f tf.
+Proof.
+  unfold transf_function; intros.
+  destruct (type_function f) as [tyenv|] eqn:TY; try discriminate.
+  destruct (regalloc f); try discriminate.
+  destruct (check_function f f0) as [] eqn:?; inv H.
+  unfold check_function in Heqr. 
+  destruct (analyze f (pair_codes f tf)) as [an|] eqn:?; try discriminate.
+  monadInv Heqr. 
+  destruct (check_entrypoints_aux f tf x) as [y|] eqn:?; try discriminate.
+  unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv.
+  exploit extract_moves_sound; eauto. intros [A B]. subst b.
+  exploit check_succ_sound; eauto. intros [k EQ1]. subst b0.
+  econstructor; eauto. eapply type_function_correct; eauto. congruence. 
+Qed.
+
+Lemma invert_code:
+  forall f tf pc i opte e,
+  (RTL.fn_code f)!pc = Some i ->
+  transfer f (pair_codes f tf) pc opte = OK e ->
+  exists eafter, exists bsh, exists bb,
+  opte = OK eafter /\ 
+  (pair_codes f tf)!pc = Some bsh /\
+  (LTL.fn_code tf)!pc = Some bb /\
+  expand_block_shape bsh i bb /\
+  transfer_aux f bsh eafter = Some e.
+Proof.
+  intros. destruct opte as [eafter|]; simpl in H0; try discriminate. exists eafter.
+  destruct (pair_codes f tf)!pc as [bsh|] eqn:?; try discriminate. exists bsh. 
+  exploit matching_instr_block; eauto. intros [bb [A B]].
+  destruct (transfer_aux f bsh eafter) as [e1|] eqn:?; inv H0. 
+  exists bb. auto.
+Qed.
+
+(** * Semantic preservation *)
 
 Section PRESERVATION.
 
@@ -452,348 +1384,726 @@ Lemma sig_function_translated:
   transf_fundef f = OK tf ->
   LTL.funsig tf = RTL.funsig f.
 Proof.
-  intros f tf. destruct f; simpl.
-  unfold transf_function.
-  destruct (type_function f).
-  destruct (analyze f).
-  destruct (regalloc f t).
-  intro. monadInv H. inv EQ. auto.  
-  simpl; congruence. simpl; congruence. simpl; congruence.
-  intro EQ; inv EQ. auto.
-Qed.
-
-(** The proof of semantic preservation is a simulation argument
-  based on diagrams of the following form:
-<<
-           st1 --------------- st2
-            |                   |
-           t|                   |t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  Hypotheses: the left vertical arrow represents a transition in the
-  original RTL code.  The top horizontal bar is the [match_states]
-  relation defined below.  It implies agreement between
-  the RTL register map [rs] and the LTL location map [ls]
-  over the pseudo-registers live before the RTL instruction at [pc].  
-
-  Conclusions: the right vertical arrow is an [exec_instrs] transition
-  in the LTL code generated by translation of the current function.
-  The bottom horizontal bar is the [match_states] relation.
-*)
-
-Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> Prop :=
-  | match_stackframes_nil:
-      match_stackframes nil nil
+  intros; destruct f; monadInv H.
+  destruct (transf_function_inv _ _ EQ). simpl; auto.
+  auto.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd ros' e e' ls,
+  RTL.find_function ge ros rs = Some fd ->
+  add_equation_ros ros ros' e = Some e' ->
+  satisf rs ls e' ->
+  exists tfd,
+  LTL.find_function tge ros' ls = Some tfd /\ transf_fundef fd = OK tfd.
+Proof.
+  unfold RTL.find_function, LTL.find_function; intros.
+  destruct ros as [r|id]; destruct ros' as [r'|id']; simpl in H0; MonadInv.
+  (* two regs *)
+  exploit add_equation_lessdef; eauto. intros LD. inv LD.
+  eapply functions_translated; eauto.
+  rewrite <- H2 in H. simpl in H. congruence.
+  (* two symbols *)
+  rewrite symbols_preserved. rewrite Heqo. 
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma exec_moves:
+  forall mv rs s f sp bb m e e' ls,
+  track_moves mv e = Some e' ->
+  wf_moves mv ->
+  satisf rs ls e' ->
+  exists ls',
+    star step tge (Block s f sp (expand_moves mv bb) ls m)
+               E0 (Block s f sp bb ls' m)
+  /\ satisf rs ls' e.
+Proof.
+Opaque destroyed_by_op.
+  induction mv; simpl; intros.
+  (* base *)
+- unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto.
+  (* step *)
+- destruct a as [src dst]. unfold expand_moves. simpl. 
+  destruct (track_moves mv e) as [e1|] eqn:?; MonadInv.
+  assert (wf_moves mv). red; intros. apply H0; auto with coqlib. 
+  destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst].
+  (* reg-reg *)
++ exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto. 
+  intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto. 
+  econstructor. simpl. eauto. auto. auto.
+  (* reg->stack *)
++ exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto. 
+  intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto. 
+  econstructor. simpl. eauto. auto.
+  (* stack->reg *)
++ simpl in Heqb. exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto. 
+  intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto. 
+  econstructor. auto. auto. 
+  (* stack->stack *)
++ exploit H0; auto with coqlib. unfold wf_move. tauto.
+Qed.
+
+(** The simulation relation *)
+
+Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signature -> Prop :=
+  | match_stackframes_nil: forall sg,
+      sg.(sig_res) = Some Tint ->
+      match_stackframes nil nil sg
   | match_stackframes_cons:
-      forall s ts res f sp pc rs ls env live assign,
-      match_stackframes s ts ->
-      wt_function f env ->
-      analyze f = Some live ->
-      regalloc f live (live0 f live) env = Some assign ->
-      (forall rv,
-        agree assign (transfer f pc live!!pc)
-              (rs#res <- rv)
-              (Locmap.set (assign res) rv ls)) ->
+      forall res f sp pc rs s tf bb ls ts sg an e tyenv
+        (STACKS: match_stackframes s ts (fn_sig tf))
+        (FUN: transf_function f = OK tf)
+        (ANL: analyze f (pair_codes f tf) = Some an)
+        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (WTF: wt_function f tyenv)
+        (WTRS: wt_regset tyenv rs)
+        (WTRES: tyenv res = proj_sig_res sg)
+        (STEPS: forall v ls1 m,
+           Val.lessdef_list (encode_long (sig_res sg) v) (map ls1 (map R (loc_result sg))) ->
+           agree_callee_save ls ls1 ->
+           exists ls2,
+           star LTL.step tge (Block ts tf sp bb ls1 m)
+                          E0 (State ts tf sp pc ls2 m)
+           /\ satisf (rs#res <- v) ls2 e),
       match_stackframes
         (RTL.Stackframe res f sp pc rs :: s)
-        (LTL.Stackframe (assign res) (transf_fun f live assign) sp ls pc :: ts).
+        (LTL.Stackframe tf sp ls bb :: ts)
+        sg.
 
 Inductive match_states: RTL.state -> LTL.state -> Prop :=
   | match_states_intro:
-      forall s f sp pc rs m ts ls live assign env
-      (STACKS: match_stackframes s ts)
-      (WT: wt_function f env)
-      (ANL: analyze f = Some live)
-      (ASG: regalloc f live (live0 f live) env = Some assign)
-      (AG: agree assign (transfer f pc live!!pc) rs ls),
+      forall s f sp pc rs m ts tf ls m' an e tyenv
+        (STACKS: match_stackframes s ts (fn_sig tf))
+        (FUN: transf_function f = OK tf)
+        (ANL: analyze f (pair_codes f tf) = Some an)
+        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (SAT: satisf rs ls e)
+        (MEM: Mem.extends m m')
+        (WTF: wt_function f tyenv)
+        (WTRS: wt_regset tyenv rs),
       match_states (RTL.State s f sp pc rs m)
-                   (LTL.State ts (transf_fun f live assign) sp pc ls m)
+                   (LTL.State ts tf sp pc ls m')
   | match_states_call:
-      forall s f args m ts tf,
-      match_stackframes s ts ->
-      transf_fundef f = OK tf ->
+      forall s f args m ts tf ls m'
+        (STACKS: match_stackframes s ts (funsig tf))
+        (FUN: transf_fundef f = OK tf)
+        (ARGS: Val.lessdef_list args (decode_longs (sig_args (funsig tf)) (map ls (loc_arguments (funsig tf)))))
+        (AG: agree_callee_save (parent_locset ts) ls)
+        (MEM: Mem.extends m m')
+        (WTARGS: Val.has_type_list args (sig_args (funsig tf))),
       match_states (RTL.Callstate s f args m)
-                   (LTL.Callstate ts tf args m)
+                   (LTL.Callstate ts tf ls m')
   | match_states_return:
-      forall s v m ts,
-      match_stackframes s ts ->
-      match_states (RTL.Returnstate s v m)
-                   (LTL.Returnstate ts v m).
-
-(** The simulation proof is by case analysis over the RTL transition
-  taken in the source program. *)
-
-Ltac CleanupHyps :=
-  match goal with
-  | H: (match_states _ _) |- _ =>
-      inv H; CleanupHyps
-  | H1: (PTree.get _ _ = Some _),
-    H2: (agree _ (transfer _ _ _) _ _) |- _ =>
-      unfold transfer in H2; rewrite H1 in H2; simpl in H2; CleanupHyps
-  | _ => idtac
-  end.
-
-Ltac WellTypedHyp :=
-  match goal with
-  | H1: (PTree.get _ _ = Some _),
-    H2: (wt_function _ _) |- _ =>
-      let R := fresh "WTI" in (
-      generalize (wt_instrs _ _ H2 _ _ H1); intro R)
-  | _ => idtac
-  end.
+      forall s res m ts ls m' sg
+        (STACKS: match_stackframes s ts sg)
+        (RES: Val.lessdef_list (encode_long (sig_res sg) res) (map ls (map R (loc_result sg))))
+        (AG: agree_callee_save (parent_locset ts) ls)
+        (MEM: Mem.extends m m')
+        (WTRES: Val.has_type res (proj_sig_res sg)),
+      match_states (RTL.Returnstate s res m)
+                   (LTL.Returnstate ts ls m').
+
+Lemma match_stackframes_change_sig:
+  forall s ts sg sg',
+  match_stackframes s ts sg ->
+  sg'.(sig_res) = sg.(sig_res) ->
+  match_stackframes s ts sg'.
+Proof.
+  intros. inv H. 
+  constructor. congruence.
+  econstructor; eauto.
+  unfold proj_sig_res in *. rewrite H0; auto. 
+  intros. unfold loc_result in H; rewrite H0 in H; eauto. 
+Qed.
 
-Ltac TranslInstr :=
+Ltac UseShape :=
   match goal with
-  | H: (PTree.get _ _ = Some _) |- _ =>
-      simpl in H; simpl; rewrite PTree.gmap; rewrite H; simpl; auto
+  | [ CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ = OK _ |- _ ] =>
+      destruct (invert_code _ _ _ _ _ _ CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR); 
+      inv EBS; unfold transfer_aux in TR; MonadInv
   end.
 
-Ltac MatchStates :=
+Ltac UseType :=
   match goal with
-  | |- match_states (RTL.State _ _ _ _ _ _) (LTL.State _ _ _ _ _ _) =>
-      eapply match_states_intro; eauto; MatchStates
-  | H: (PTree.get ?pc _ = Some _) |- agree _ _ _ _ =>
-      eapply agree_succ with (n := pc); eauto; MatchStates
-  | |- In _ (RTL.successors_instr _) =>
-      unfold RTL.successors_instr; auto with coqlib
-  | _ => idtac
+  | [ CODE: (RTL.fn_code _)!_ = Some _, WTF: wt_function _ _ |- _ ] =>
+      generalize (wt_instrs _ _ WTF _ _ CODE); intro WTI
   end.
 
-Lemma transl_find_function:
-  forall ros f args lv rs ls alloc,
-  RTL.find_function ge ros rs = Some f ->
-  agree alloc (reg_list_live args (reg_sum_live ros lv)) rs ls ->
-  exists tf,
-    LTL.find_function tge (sum_left_map alloc ros) ls = Some tf /\
-    transf_fundef f = OK tf.
+Remark addressing_not_long:
+  forall env f addr args dst s r,
+  wt_instr env f (Iload Mint64 addr args dst s) ->
+  In r args -> r <> dst.
 Proof.
-  intros; destruct ros; simpl in *.
-  assert (rs#r = ls (alloc r)).
-    eapply agree_eval_reg. eapply agree_reg_list_live; eauto.
-  rewrite <- H1. apply functions_translated. auto.
-  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
-  apply function_ptr_translated. auto. discriminate.
+  intros. 
+  assert (forall ty, In ty (type_of_addressing addr) -> ty = Tint).
+  { intros. destruct addr; simpl in H1; intuition. }
+  inv H. 
+  assert (env r = Tint).
+  { apply H1. rewrite <- H5. apply in_map; auto. }
+  simpl in H8; congruence.
 Qed.
 
-Theorem transl_step_correct:
-  forall s1 t s2, RTL.step ge s1 t s2 ->
-  forall s1', match_states s1 s1' ->
-  exists s2', LTL.step tge s1' t s2' /\ match_states s2 s2'.
+(** The proof of semantic preservation is a simulation argument of the
+    "plus" kind. *)
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', plus LTL.step tge S1' t S2' /\ match_states S2 S2'.
 Proof.
-  induction 1; intros; CleanupHyps; WellTypedHyp.
+  induction 1; intros S1' MS; inv MS; try UseType; try UseShape.
+
+(* nop *)
+  exploit exec_moves; eauto. intros [ls1 [X Y]]. 
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. 
+  econstructor; eauto. 
+
+(* op move *)
+- simpl in H0. inv H0.  
+  exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. eapply subst_reg_satisf; eauto. 
+  intros [enext [U V]]. 
+  econstructor; eauto.
+  inv WTI. apply wt_regset_assign; auto. rewrite <- H2. apply WTRS. congruence.
 
-  (* Inop *)
+(* op makelong *)
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
+  exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
-  eapply exec_Lnop. TranslInstr. MatchStates.
-
-  (* Iop *)
-  generalize (PTree.gmap (transf_instr f live assign) pc (RTL.fn_code f)).
-  rewrite H. simpl. 
-  caseEq (Regset.mem res live!!pc); intro LV;
-  rewrite LV in AG.
-  generalize (Regset.mem_2 LV). intro LV'.
-  generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr, is_redundant_move.
-  caseEq (is_move_operation op args).
-  (* Special case for moves *)
-  intros arg IMO CORR.
-  generalize (is_move_operation_correct _ _ IMO).
-  intros [EQ1 EQ2]. subst op; subst args. 
-  injection H0; intro.
-  destruct (Loc.eq (assign arg) (assign res)); intro CODE.
-  (* sub-case: redundant move *)
-  econstructor; split. eapply exec_Lnop; eauto.
-  MatchStates. 
-  rewrite <- H1. eapply agree_redundant_move_live; eauto.
-  (* sub-case: non-redundant move *)
-  econstructor; split. eapply exec_Lop; eauto. simpl. eauto.
-  MatchStates.
-  rewrite <- H1. set (ls1 := undef_temps ls).
-  replace (ls (assign arg)) with (ls1 (assign arg)).
-  eapply agree_move_live; eauto.
-  unfold ls1. eapply agree_undef_temps; eauto.
-  unfold ls1. simpl. apply Locmap.guo. eapply regalloc_not_temporary; eauto. 
-  (* Not a move *)
-  intros INMO CORR CODE.
-  assert (eval_operation tge sp op (map ls (map assign args)) m = Some v).
-    replace (map ls (map assign args)) with (rs##args).
-    rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved. 
-    eapply agree_eval_regs; eauto.
-  econstructor; split. eapply exec_Lop; eauto. MatchStates. 
-  apply agree_assign_live with f env live; auto.
-  eapply agree_undef_temps; eauto.
-  eapply agree_reg_list_live; eauto.
-  (* Result is not live, instruction turned into a nop *)
-  intro CODE. econstructor; split. eapply exec_Lnop; eauto.
-  MatchStates. apply agree_assign_dead; auto. 
-  red; intro. exploit Regset.mem_1; eauto. congruence.
-
-  (* Iload *)
-  caseEq (Regset.mem dst live!!pc); intro LV;
-  rewrite LV in AG.
-  (* dst is live *)
-  exploit Regset.mem_2; eauto. intro LV'.
-  assert (eval_addressing tge sp addr (map ls (map assign args)) = Some a).
-    replace (map ls (map assign args)) with (rs##args).
-    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-    eapply agree_eval_regs; eauto.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto.
+  eapply subst_reg_kind_satisf_makelong. eauto. eauto. 
+  intros [enext [U V]]. 
+  econstructor; eauto.
+
+(* op lowlong *)
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
+  exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
-  eapply exec_Lload; eauto. TranslInstr. rewrite LV; auto.
-  generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr. intro CORR.
-  MatchStates. 
-  eapply agree_assign_live; eauto.
-  eapply agree_undef_temps; eauto.
-  eapply agree_reg_list_live; eauto.
-  (* dst is dead *)
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto.
+  eapply subst_reg_kind_satisf_lowlong. eauto. eauto. 
+  intros [enext [U V]]. 
+  econstructor; eauto.
+
+(* op highlong *)
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
+  exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
-  eapply exec_Lnop. TranslInstr. rewrite LV; auto.
-  MatchStates. apply agree_assign_dead; auto.
-  red; intro; exploit Regset.mem_1; eauto. congruence.
-
-  (* Istore *)
-  assert (eval_addressing tge sp addr (map ls (map assign args)) = Some a).
-    replace (map ls (map assign args)) with (rs##args).
-    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-    eapply agree_eval_regs; eauto.
-  assert (ESRC: rs#src = ls (assign src)).
-    eapply agree_eval_reg. eapply agree_reg_list_live. eauto.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto.
+  eapply subst_reg_kind_satisf_highlong. eauto. eauto. 
+  intros [enext [U V]]. 
+  econstructor; eauto.
+
+(* op regular *)
+- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_operation_lessdef; eauto. intros [v' [F G]]. 
+  exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_left. econstructor. instantiate (1 := v'). rewrite <- F. 
+  apply eval_operation_preserved. exact symbols_preserved.
+  eauto. eapply star_right. eexact A2. constructor. 
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. 
+  econstructor; eauto.
+  eapply wt_exec_Iop; eauto. 
+
+(* op dead *)
+- exploit exec_moves; eauto. intros [ls1 [X Y]]. 
   econstructor; split.
-  eapply exec_Lstore; eauto. TranslInstr.
-  rewrite <- ESRC. eauto.
-  MatchStates.
-  eapply agree_undef_temps; eauto.
-  eapply agree_reg_live. eapply agree_reg_list_live. eauto.
-
-  (* Icall *)
-  exploit transl_find_function; eauto.  intros [tf [TFIND TF]].
-  generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr. intros [CORR1 [CORR2 CORR3]].
-  assert (rs##args = map ls (map assign args)).
-    eapply agree_eval_regs; eauto. 
-  econstructor; split. 
-  eapply exec_Lcall; eauto. TranslInstr.
-  rewrite (sig_function_translated _ _ TF). eauto.
-  rewrite H1. 
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto. 
+  eapply reg_unconstrained_satisf; eauto. 
+  intros [enext [U V]]. 
   econstructor; eauto.
+  eapply wt_exec_Iop; eauto. 
+
+(* load regular *)
+- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
+  exploit Mem.loadv_extends; eauto. intros [v' [P Q]]. 
+  exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_left. econstructor. instantiate (1 := a'). rewrite <- F. 
+  apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto.
+  eapply star_right. eexact A2. constructor. 
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. 
+  econstructor; eauto.
+  eapply wt_exec_Iload; eauto.
+
+(* load pair *)
+- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+  set (v2' := if big_endian then v2 else v1) in *.
+  set (v1' := if big_endian then v1 else v2) in *.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  assert (LD1: Val.lessdef_list rs##args (reglist ls1 args1')).
+  { eapply add_equations_lessdef; eauto. }
+  exploit eval_addressing_lessdef. eexact LD1. eauto. intros [a1' [F1 G1]].
+  exploit Mem.loadv_extends. eauto. eexact LOAD1. eexact G1. intros (v1'' & LOAD1' & LD2).
+  set (ls2 := Locmap.set (R dst1') v1'' (undef_regs (destroyed_by_load Mint32 addr) ls1)).
+  assert (SAT2: satisf (rs#dst <- v) ls2 e2).
+  { eapply loc_unconstrained_satisf. eapply can_undef_satisf; eauto. 
+    eapply reg_unconstrained_satisf. eauto. 
+    eapply add_equations_satisf; eauto. assumption.
+    rewrite Regmap.gss. apply Val.lessdef_trans with v1'; auto. 
+    subst v. unfold v1', kind_first_word.
+    destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
+  }
+  exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
+  assert (LD3: Val.lessdef_list rs##args (reglist ls3 args2')).
+  { replace (rs##args) with ((rs#dst<-v)##args). 
+    eapply add_equations_lessdef; eauto. 
+    apply list_map_exten; intros. rewrite Regmap.gso; auto. 
+    eapply addressing_not_long; eauto. 
+  }
+  exploit eval_addressing_lessdef. eexact LD3.
+  eapply eval_offset_addressing; eauto. intros [a2' [F2 G2]].
+  exploit Mem.loadv_extends. eauto. eexact LOAD2. eexact G2. intros (v2'' & LOAD2' & LD4).
+  set (ls4 := Locmap.set (R dst2') v2'' (undef_regs (destroyed_by_load Mint32 addr2) ls3)).
+  assert (SAT4: satisf (rs#dst <- v) ls4 e0).
+  { eapply loc_unconstrained_satisf. eapply can_undef_satisf; eauto. 
+    eapply add_equations_satisf; eauto. assumption.
+    apply Val.lessdef_trans with v2'; auto. 
+    rewrite Regmap.gss. subst v. unfold v2', kind_second_word.
+    destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
+  }
+  exploit (exec_moves mv3); eauto. intros [ls5 [A5 B5]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_left. econstructor. 
+  instantiate (1 := a1'). rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved.
+  eexact LOAD1'. instantiate (1 := ls2); auto. 
+  eapply star_trans. eexact A3.
+  eapply star_left. econstructor.
+  instantiate (1 := a2'). rewrite <- F2. apply eval_addressing_preserved. exact symbols_preserved.
+  eexact LOAD2'. instantiate (1 := ls4); auto.
+  eapply star_right. eexact A5.
+  constructor. 
+  eauto. eauto. eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. 
   econstructor; eauto.
-  intros. eapply agree_succ with (n := pc); eauto.
-  simpl. auto.
-  eapply agree_postcall; eauto.
-
-  (* Itailcall *)
-  exploit transl_find_function; eauto.  intros [tf [TFIND TF]].
-  assert (rs##args = map ls (map assign args)).
-    eapply agree_eval_regs; eauto. 
-  econstructor; split. 
-  eapply exec_Ltailcall; eauto. TranslInstr.
-  rewrite (sig_function_translated _ _ TF). eauto.
-  rewrite H1. econstructor; eauto.
-
-  (* Ibuiltin *)
+  eapply wt_exec_Iload; eauto.
+
+(* load first word of a pair *)
+- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+  set (v2' := if big_endian then v2 else v1) in *.
+  set (v1' := if big_endian then v1 else v2) in *.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  assert (LD1: Val.lessdef_list rs##args (reglist ls1 args')).
+  { eapply add_equations_lessdef; eauto. }
+  exploit eval_addressing_lessdef. eexact LD1. eauto. intros [a1' [F1 G1]].
+  exploit Mem.loadv_extends. eauto. eexact LOAD1. eexact G1. intros (v1'' & LOAD1' & LD2).
+  set (ls2 := Locmap.set (R dst') v1'' (undef_regs (destroyed_by_load Mint32 addr) ls1)).
+  assert (SAT2: satisf (rs#dst <- v) ls2 e0).
+  { eapply parallel_assignment_satisf; eauto.
+    apply Val.lessdef_trans with v1'; auto. 
+    subst v. unfold v1', kind_first_word.
+    destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
+    eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
+  }
+  exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
   econstructor; split.
-  eapply exec_Lbuiltin; eauto. TranslInstr.
-  replace (map ls (@map reg loc assign args)) with (rs##args).
-  eapply external_call_symbols_preserved; eauto.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_left. econstructor. 
+  instantiate (1 := a1'). rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved.
+  eexact LOAD1'. instantiate (1 := ls2); auto. 
+  eapply star_right. eexact A3.
+  constructor. 
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. 
+  econstructor; eauto.
+  eapply wt_exec_Iload; eauto.
+
+(* load second word of a pair *)
+- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+  set (v2' := if big_endian then v2 else v1) in *.
+  set (v1' := if big_endian then v1 else v2) in *.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  assert (LD1: Val.lessdef_list rs##args (reglist ls1 args')).
+  { eapply add_equations_lessdef; eauto. }
+  exploit eval_addressing_lessdef. eexact LD1.
+  eapply eval_offset_addressing; eauto. intros [a1' [F1 G1]].
+  exploit Mem.loadv_extends. eauto. eexact LOAD2. eexact G1. intros (v2'' & LOAD2' & LD2).
+  set (ls2 := Locmap.set (R dst') v2'' (undef_regs (destroyed_by_load Mint32 addr2) ls1)).
+  assert (SAT2: satisf (rs#dst <- v) ls2 e0).
+  { eapply parallel_assignment_satisf; eauto.
+    apply Val.lessdef_trans with v2'; auto. 
+    subst v. unfold v2', kind_second_word.
+    destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
+    eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
+  }
+  exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_left. econstructor. 
+  instantiate (1 := a1'). rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved.
+  eexact LOAD2'. instantiate (1 := ls2); auto. 
+  eapply star_right. eexact A3.
+  constructor. 
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. 
+  econstructor; eauto.
+  eapply wt_exec_Iload; eauto.
+
+(* load dead *)
+- exploit exec_moves; eauto. intros [ls1 [X Y]]. 
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact X. econstructor; eauto. 
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto. 
+  eapply reg_unconstrained_satisf; eauto. 
+  intros [enext [U V]]. 
+  econstructor; eauto.
+  eapply wt_exec_Iload; eauto.
+
+(* store *)
+- exploit exec_moves; eauto. intros [ls1 [X Y]].
+  exploit add_equations_lessdef; eauto. intros LD. simpl in LD. inv LD. 
+  exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
+  exploit Mem.storev_extends; eauto. intros [m'' [P Q]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact X.
+  eapply star_two. econstructor. instantiate (1 := a'). rewrite <- F. 
+  apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto.
+  constructor. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto.
+  eapply can_undef_satisf; eauto. eapply add_equations_satisf; eauto. intros [enext [U V]]. 
+  econstructor; eauto.
+
+(* store 2 *)
+- exploit Mem.storev_int64_split; eauto. 
+  replace (if big_endian then Val.hiword rs#src else Val.loword rs#src)
+     with (sel_val kind_first_word rs#src)
+       by (unfold kind_first_word; destruct big_endian; reflexivity).
+  replace (if big_endian then Val.loword rs#src else Val.hiword rs#src)
+     with (sel_val kind_second_word rs#src)
+       by (unfold kind_second_word; destruct big_endian; reflexivity).
+  intros [m1 [STORE1 STORE2]].
+  exploit (exec_moves mv1); eauto. intros [ls1 [X Y]].
+  exploit add_equations_lessdef. eexact Heqo1. eexact Y. intros LD1.
+  exploit add_equation_lessdef. eapply add_equations_satisf. eexact Heqo1. eexact Y. 
+  simpl. intros LD2.
+  set (ls2 := undef_regs (destroyed_by_store Mint32 addr) ls1).
+  assert (SAT2: satisf rs ls2 e1).
+    eapply can_undef_satisf. eauto. 
+    eapply add_equation_satisf. eapply add_equations_satisf; eauto.
+  exploit eval_addressing_lessdef. eexact LD1. eauto. intros [a1' [F1 G1]].
+  assert (F1': eval_addressing tge sp addr (reglist ls1 args1') = Some a1').
+    rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved.
+  exploit Mem.storev_extends. eauto. eexact STORE1. eexact G1. eauto. 
+  intros [m1' [STORE1' EXT1]].
+  exploit (exec_moves mv2); eauto. intros [ls3 [U V]].
+  exploit add_equations_lessdef. eexact Heqo. eexact V. intros LD3.
+  exploit add_equation_lessdef. eapply add_equations_satisf. eexact Heqo. eexact V.
+  simpl. intros LD4.
+  exploit eval_addressing_lessdef. eexact LD3. eauto. intros [a2' [F2 G2]].
+  assert (F2': eval_addressing tge sp addr (reglist ls3 args2') = Some a2').
+    rewrite <- F2. apply eval_addressing_preserved. exact symbols_preserved.
+  exploit eval_offset_addressing. eauto. eexact F2'. intros F2''.
+  exploit Mem.storev_extends. eexact EXT1. eexact STORE2. 
+  apply Val.add_lessdef. eexact G2. eauto. eauto. 
+  intros [m2' [STORE2' EXT2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact X.
+  eapply star_left. 
+  econstructor. eexact F1'. eexact STORE1'. instantiate (1 := ls2). auto.
+  eapply star_trans. eexact U.
+  eapply star_two.
+  econstructor. eexact F2''. eexact STORE2'. eauto. 
+  constructor. eauto. eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto.
+  eapply can_undef_satisf. eauto.
+  eapply add_equation_satisf. eapply add_equations_satisf; eauto.
+  intros [enext [P Q]]. 
+  econstructor; eauto.
+
+(* call *)
+- set (sg := RTL.funsig fd) in *.
+  set (args' := loc_arguments sg) in *.
+  set (res' := map R (loc_result sg)) in *.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  exploit find_function_translated. eauto. eauto. eapply add_equations_args_satisf; eauto. 
+  intros [tfd [E F]].
+  assert (SIG: funsig tfd = sg). eapply sig_function_translated; eauto.
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1. econstructor; eauto.
+  eauto. traceEq.
+  exploit analyze_successors; eauto. simpl. left; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  econstructor; eauto.
+  inv WTI. rewrite SIG. auto. 
+  intros. exploit (exec_moves mv2); eauto.
+  eapply function_return_satisf with (v := v) (ls_before := ls1) (ls_after := ls0); eauto.
+  eapply add_equation_ros_satisf; eauto. 
+  eapply add_equations_args_satisf; eauto. 
+  congruence. intros [ls2 [A2 B2]].
+  exists ls2; split. 
+  eapply star_right. eexact A2. constructor. traceEq.
+  apply satisf_incr with eafter; auto. 
+  rewrite SIG. eapply add_equations_args_lessdef; eauto.
+  inv WTI. rewrite <- H7. apply wt_regset_list; auto. 
+  simpl. red; auto.
+  rewrite SIG. inv WTI. rewrite <- H7.  apply wt_regset_list; auto. 
+
+(* tailcall *)
+- set (sg := RTL.funsig fd) in *.
+  set (args' := loc_arguments sg) in *.
+  exploit Mem.free_parallel_extends; eauto. intros [m'' [P Q]]. 
+  exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. 
+  exploit find_function_translated. eauto. eauto. eapply add_equations_args_satisf; eauto. 
+  intros [tfd [E F]].
+  assert (SIG: funsig tfd = sg). eapply sig_function_translated; eauto.
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1. econstructor; eauto.
+  rewrite <- E. apply find_function_tailcall; auto. 
+  replace (fn_stacksize tf) with (RTL.fn_stacksize f); eauto.
+  destruct (transf_function_inv _ _ FUN); auto.
+  eauto. traceEq.
+  econstructor; eauto.
+  eapply match_stackframes_change_sig; eauto. rewrite SIG. rewrite e0. decEq.
+  destruct (transf_function_inv _ _ FUN); auto.
+  rewrite SIG. rewrite return_regs_arg_values; auto. eapply add_equations_args_lessdef; eauto.
+  inv WTI. rewrite <- H8. apply wt_regset_list; auto. 
+  apply return_regs_agree_callee_save.
+  rewrite SIG. inv WTI. rewrite <- H8. apply wt_regset_list; auto. 
+
+(* builtin *)
+- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+  exploit external_call_mem_extends; eauto.
+  eapply add_equations_args_lessdef; eauto.
+  inv WTI. rewrite <- H4. apply wt_regset_list; auto. 
+  intros [v' [m'' [F [G [J K]]]]].
+  assert (E: map ls1 (map R args') = reglist ls1 args').
+  { unfold reglist. rewrite list_map_compose. auto. }
+  rewrite E in F. clear E.
+  set (vl' := encode_long (sig_res (ef_sig ef)) v').
+  set (ls2 := Locmap.setlist (map R res') vl' (undef_regs (destroyed_by_builtin ef) ls1)).
+  assert (satisf (rs#res <- v) ls2 e0).
+  { eapply parallel_assignment_satisf_2; eauto. 
+    eapply can_undef_satisf; eauto.
+    eapply add_equations_args_satisf; eauto. }
+  exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_left. econstructor. 
+  econstructor. unfold reglist. eapply external_call_symbols_preserved; eauto. 
   exact symbols_preserved. exact varinfo_preserved.
-  eapply agree_eval_regs; eauto.
-  generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr. intro CORR.
-  MatchStates. 
-  eapply agree_assign_live; eauto.
-  eapply agree_reg_list_live; eauto.
-
-  (* Icond *)
-  assert (COND: eval_condition cond (map ls (map assign args)) m = Some b).
-    replace (map ls (map assign args)) with (rs##args). auto.
-    eapply agree_eval_regs; eauto.
+  instantiate (1 := vl'); auto. 
+  instantiate (1 := ls2); auto. 
+  eapply star_right. eexact A3.
+  econstructor. 
+  reflexivity. reflexivity. reflexivity. traceEq. 
+  exploit satisf_successors; eauto. simpl; eauto.
+  intros [enext [U V]]. 
+  econstructor; eauto.
+  inv WTI. apply wt_regset_assign; auto. rewrite H9. 
+  eapply external_call_well_typed; eauto. 
+ 
+(* annot *)
+- exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. 
+  exploit external_call_mem_extends; eauto. eapply add_equations_args_lessdef; eauto.
+  inv WTI. simpl in H4. rewrite <- H4. apply wt_regset_list; auto. 
+  intros [v' [m'' [F [G [J K]]]]].
+  assert (v = Vundef). red in H0; inv H0. auto.
   econstructor; split.
-  eapply exec_Lcond; eauto. TranslInstr.
-  MatchStates. destruct b; simpl; auto.
-  eapply agree_undef_temps; eauto.
-  eapply agree_reg_list_live. eauto.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_trans. eexact A1. 
+  eapply star_two. econstructor.
+  eapply external_call_symbols_preserved' with (ge1 := ge).
+  econstructor; eauto. 
+  exact symbols_preserved. exact varinfo_preserved.
+  eauto. constructor. eauto. eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto. 
+  eapply satisf_undef_reg with (r := res).
+  eapply add_equations_args_satisf; eauto. 
+  intros [enext [U V]]. 
+  econstructor; eauto.
+  change (destroyed_by_builtin (EF_annot txt typ)) with (@nil mreg). 
+  simpl. subst v. assumption.
+  apply wt_regset_assign; auto. subst v. constructor. 
 
-  (* Ijumptable *)
-  assert (rs#arg = ls (assign arg)). apply AG. apply Regset.add_1. auto. 
+(* cond *)
+- exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
-  eapply exec_Ljumptable; eauto. TranslInstr. congruence. 
-  MatchStates. eapply list_nth_z_in; eauto.
-  eapply agree_undef_temps; eauto.
-  eapply agree_reg_live; eauto. 
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1.
+  econstructor. eapply eval_condition_lessdef; eauto. eapply add_equations_lessdef; eauto. 
+  eauto. eauto. eauto. traceEq. 
+  exploit satisf_successors; eauto.
+  instantiate (1 := if b then ifso else ifnot). simpl. destruct b; auto.
+  eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
+  intros [enext [U V]]. 
+  econstructor; eauto.
 
-  (* Ireturn *)
+(* jumptable *)
+- exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
+  assert (Val.lessdef (Vint n) (ls1 (R arg'))).
+    rewrite <- H0. eapply add_equation_lessdef with (q := Eq Full arg (R arg')); eauto.
+  inv H2.  
   econstructor; split.
-  eapply exec_Lreturn; eauto. TranslInstr; eauto.
-  replace (regmap_optget or Vundef rs)
-     with (locmap_optget (option_map assign or) Vundef ls).
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1.
+  econstructor. eauto. eauto. eauto. eauto. traceEq. 
+  exploit satisf_successors; eauto.
+  instantiate (1 := pc'). simpl. eapply list_nth_z_in; eauto. 
+  eapply can_undef_satisf. eauto. eapply add_equation_satisf; eauto.
+  intros [enext [U V]]. 
   econstructor; eauto.
-  inv WTI. destruct or; simpl in *.
-  symmetry; eapply agree_eval_reg; eauto. 
-  auto.
 
-  (* internal function *)
-  generalize H7. simpl.  unfold transf_function.
-  caseEq (type_function f); simpl; try congruence. intros env TYP.
-  assert (WTF: wt_function f env). apply type_function_correct; auto.
-  caseEq (analyze f); simpl; try congruence. intros live ANL.
-  caseEq (regalloc f live (live0 f live) env); simpl; try congruence.
-  intros alloc ALLOC EQ. inv EQ. simpl in *.
+(* return *)
+- destruct (transf_function_inv _ _ FUN). 
+  exploit Mem.free_parallel_extends; eauto. rewrite H10. intros [m'' [P Q]].
+  destruct or as [r|]; MonadInv.
+  (* with an argument *)
++ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
-  eapply exec_function_internal; simpl; eauto.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1.
+  econstructor. eauto. eauto. traceEq.
+  simpl. econstructor; eauto. rewrite <- H11.
+  replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f))))
+     with (map ls1 (map R (loc_result (RTL.fn_sig f)))).
+  eapply add_equations_res_lessdef; eauto.
+  rewrite !list_map_compose. apply list_map_exten; intros.
+  unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
+  apply return_regs_agree_callee_save.
+  inv WTI. simpl in H13. unfold proj_sig_res. rewrite <- H11; rewrite <- H13. apply WTRS. 
+  (* without an argument *)
++ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1.
+  econstructor. eauto. eauto. traceEq.
   simpl. econstructor; eauto.
-  change (transfer f (RTL.fn_entrypoint f) live !! (RTL.fn_entrypoint f))
-    with (live0 f live).
-  eapply agree_parameters; eauto. 
+  unfold encode_long, loc_result. destruct (sig_res (fn_sig tf)) as [[]|]; simpl; auto.
+  apply return_regs_agree_callee_save.
+  constructor.
+
+(* internal function *)
+- monadInv FUN. simpl in *.
+  destruct (transf_function_inv _ _ EQ). 
+  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H8; apply Zle_refl. 
+  intros [m'' [U V]].
+  exploit (exec_moves mv). eauto. eauto.
+    eapply can_undef_satisf; eauto. eapply compat_entry_satisf; eauto. 
+    rewrite call_regs_param_values. rewrite H9. eexact ARGS. 
+  intros [ls1 [A B]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_left. econstructor; eauto. 
+  eapply star_right. eexact A. 
+  econstructor; eauto. 
+  eauto. eauto. traceEq.
+  econstructor; eauto.
+  apply wt_init_regs. inv H0. rewrite wt_params. congruence.
 
-  (* external function *)
-  injection H7; intro EQ; inv EQ.
+(* external function *)
+- exploit external_call_mem_extends; eauto. intros [v' [m'' [F [G [J K]]]]].
+  simpl in FUN; inv FUN.
   econstructor; split.
-  eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  apply plus_one. econstructor; eauto. 
+  eapply external_call_symbols_preserved' with (ge1 := ge). 
+  econstructor; eauto. 
   exact symbols_preserved. exact varinfo_preserved.
-  eapply match_states_return; eauto.
-
-  (* return *)
-  inv H4. 
+  econstructor; eauto. simpl.
+  replace (map
+           (Locmap.setlist (map R (loc_result (ef_sig ef)))
+                (encode_long (sig_res (ef_sig ef)) v') ls)
+           (map R (loc_result (ef_sig ef))))
+  with (encode_long (sig_res (ef_sig ef)) v').
+  apply encode_long_lessdef; auto.
+  unfold encode_long, loc_result.
+  destruct (sig_res (ef_sig ef)) as [[]|]; simpl; symmetry; f_equal; auto.
+  red; intros. rewrite Locmap.gsetlisto. apply AG; auto.
+  apply Loc.notin_iff. intros. 
+  exploit list_in_map_inv; eauto. intros [r [A B]]; subst l'. 
+  destruct l; simpl; auto. red; intros; subst r0; elim H0.
+  eapply loc_result_caller_save; eauto.
+  simpl. eapply external_call_well_typed; eauto. 
+ 
+(* return *)
+- inv STACKS.
+  exploit STEPS; eauto. intros [ls2 [A B]].
   econstructor; split.
-  eapply exec_return; eauto.
+  eapply plus_left. constructor. eexact A. traceEq.
   econstructor; eauto.
+  apply wt_regset_assign; auto. congruence.  
 Qed.
 
-(** The semantic equivalence between the original and transformed programs
-  follows easily. *)
-
-Lemma transf_initial_states:
+Lemma initial_states_simulation:
   forall st1, RTL.initial_state prog st1 ->
   exists st2, LTL.initial_state tprog st2 /\ match_states st1 st2.
 Proof.
-  intros. inversion H.
+  intros. inv H.
   exploit function_ptr_translated; eauto. intros [tf [FIND TR]].
-  exists (LTL.Callstate nil tf nil m0); split.
+  exploit sig_function_translated; eauto. intros SIG.
+  exists (LTL.Callstate nil tf (Locmap.init Vundef) m0); split.
   econstructor; eauto.
   eapply Genv.init_mem_transf_partial; eauto. 
   rewrite symbols_preserved. 
   rewrite (transform_partial_program_main _ _ TRANSF).  auto.
-  rewrite <- H3. apply sig_function_translated; auto. 
-  constructor; auto. constructor.
+  congruence.
+  constructor; auto.
+  constructor. rewrite SIG; rewrite H3; auto. 
+  rewrite SIG; rewrite H3; simpl; auto.
+  red; auto.
+  apply Mem.extends_refl.
+  rewrite SIG; rewrite H3; simpl; auto.
 Qed.
 
-Lemma transf_final_states:
+Lemma final_states_simulation:
   forall st1 st2 r, 
   match_states st1 st2 -> RTL.final_state st1 r -> LTL.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv H4. econstructor.
+  intros. inv H0. inv H. inv STACKS.
+  econstructor. simpl; reflexivity.  
+  unfold loc_result in RES; rewrite H in RES. simpl in RES. inv RES. inv H3; auto. 
 Qed.
 
 Theorem transf_program_correct:
   forward_simulation (RTL.semantics prog) (LTL.semantics tprog).
 Proof.
-  eapply forward_simulation_step.
+  eapply forward_simulation_plus.
   eexact symbols_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  exact transl_step_correct. 
+  eexact initial_states_simulation.
+  eexact final_states_simulation.
+  exact step_simulation. 
 Qed.
 
 End PRESERVATION.
diff --git a/backend/Alloctyping.v b/backend/Alloctyping.v
deleted file mode 100644
index 59bf621..0000000
--- a/backend/Alloctyping.v
+++ /dev/null
@@ -1,205 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Preservation of typing during register allocation. *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import Errors.
-Require Import AST.
-Require Import Op.
-Require Import Registers.
-Require Import RTL.
-Require Import Liveness.
-Require Import Locations.
-Require Import LTL.
-Require Import Coloring.
-Require Import Coloringproof.
-Require Import Allocation.
-Require Import Allocproof.
-Require Import RTLtyping.
-Require Import LTLtyping.
-Require Import Conventions.
-
-(** This file proves that register allocation (the translation from
-  RTL to LTL defined in file [Allocation]) preserves typing:
-  given a well-typed RTL input, it produces LTL code that is
-  well-typed. *)
-
-Section TYPING_FUNCTION.
-
-Variable f: RTL.function.
-Variable env: regenv.
-Variable live: PMap.t Regset.t.
-Variable alloc: reg -> loc.
-Variable tf: LTL.function.
-
-Hypothesis TYPE_RTL: type_function f = OK env.
-Hypothesis LIVE: analyze f = Some live.
-Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc.
-Hypothesis TRANSL: transf_function f = OK tf.
-
-Lemma wt_rtl_function: RTLtyping.wt_function f env.
-Proof.
-  apply type_function_correct; auto.
-Qed.
-
-Lemma alloc_type: forall r, Loc.type (alloc r) = env r.
-Proof.
-  intro. eapply regalloc_preserves_types; eauto.
-Qed.
-
-Lemma alloc_types:
-  forall rl, List.map Loc.type (List.map alloc rl) = List.map env rl.
-Proof.
-  intros. rewrite list_map_compose. apply list_map_exten.
-  intros. symmetry. apply alloc_type. 
-Qed.
-
-Lemma alloc_acceptable: 
-  forall r, loc_acceptable (alloc r).
-Proof.
-  intros. eapply regalloc_acceptable; eauto.
-Qed.
-
-Lemma allocs_acceptable:
-  forall rl, locs_acceptable (List.map alloc rl).
-Proof.
-  intros. eapply regsalloc_acceptable; eauto.
-Qed.
-
-Remark transf_unroll:
-  tf = transf_fun f live alloc.
-Proof.
-  generalize TRANSL. unfold transf_function.
-  rewrite TYPE_RTL. rewrite LIVE. rewrite ALLOC. congruence.
-Qed.
-
-Lemma valid_successor_transf:
-  forall s,
-  RTLtyping.valid_successor f s ->
-  LTLtyping.valid_successor tf s.
-Proof.
-  unfold RTLtyping.valid_successor, LTLtyping.valid_successor.
-  intros s [i AT].
-  rewrite transf_unroll; simpl. rewrite PTree.gmap. 
-  rewrite AT. exists (transf_instr f live alloc s i). auto.
-Qed.  
-
-Hint Resolve alloc_acceptable allocs_acceptable: allocty.
-Hint Rewrite alloc_type alloc_types: allocty.
-Hint Resolve valid_successor_transf: allocty.
-
-(** * Type preservation during translation from RTL to LTL *)
-
-Ltac WT := 
-  constructor; auto with allocty; autorewrite with allocty; auto.
-
-Lemma wt_transf_instr:
-  forall pc instr,
-  RTLtyping.wt_instr env f instr ->
-  f.(RTL.fn_code)!pc = Some instr ->
-  wt_instr tf (transf_instr f live alloc pc instr).
-Proof.
-  intros. inv H; simpl.
-  (* nop *)
-  WT.
-  (* move *)
-  destruct (Regset.mem r live!!pc).
-  destruct (is_redundant_move Omove (r1 :: nil) r alloc); WT.
-  WT.
-  (* other ops *)
-  destruct (Regset.mem res live!!pc).
-  destruct (is_redundant_move op args res alloc); WT.
-  WT.
-  (* load *)
-  destruct (Regset.mem dst live!!pc); WT.
-  (* store *)
-  WT.
-  (* call *)
-  exploit regalloc_correct_1; eauto. unfold correct_alloc_instr. 
-  intros [A1 [A2 A3]]. 
-  WT.
-  destruct ros; simpl; auto. 
-  split. autorewrite with allocty; auto.
-  split. auto with allocty. auto.
-  (* tailcall *)
-  exploit regalloc_correct_1; eauto. unfold correct_alloc_instr. 
-  intro A1.
-  WT.
-  destruct ros; simpl; auto. 
-  split. autorewrite with allocty; auto.
-  split. auto with allocty. auto.
-  rewrite transf_unroll; auto.
-  (* builtin *)
-  WT.
-  (* cond *)
-  WT.
-  (* jumptable *)
-  WT. 
-  (* return *)
-  WT.
-  rewrite transf_unroll; simpl. 
-  destruct optres; simpl. autorewrite with allocty. auto. auto.
-  destruct optres; simpl; auto with allocty.
-Qed.
-
-End TYPING_FUNCTION.
-
-Lemma wt_transf_function:
-  forall f tf,
-  transf_function f = OK tf -> wt_function tf.
-Proof.
-  intros. generalize H; unfold transf_function.
-  caseEq (type_function f). intros env TYP. 
-  caseEq (analyze f). intros live ANL.
-  change (transfer f (RTL.fn_entrypoint f)
-                     live!!(RTL.fn_entrypoint f))
-    with (live0 f live).
-  caseEq (regalloc f live (live0 f live) env).
-  intros alloc ALLOC.
-  intro EQ; injection EQ; intro.
-  assert (RTLtyping.wt_function f env). apply type_function_correct; auto.
-  inversion H1.
-  constructor; rewrite <- H0; simpl.
-  rewrite (alloc_types _ _ _ _ ALLOC). auto.
-  eapply regsalloc_acceptable; eauto.
-  eapply regalloc_norepet_norepet; eauto.
-  eapply regalloc_correct_2; eauto.
-  intros until instr. rewrite PTree.gmap. 
-  caseEq (RTL.fn_code f)!pc; simpl; intros.
-  inversion H3. eapply wt_transf_instr; eauto. congruence. discriminate.
-  eapply valid_successor_transf; eauto. congruence. 
-  congruence. congruence. congruence.
-Qed.
-
-Lemma wt_transf_fundef:
-  forall f tf,
-  transf_fundef f = OK tf -> wt_fundef tf.
-Proof.
-  intros until tf; destruct f; simpl. 
-  caseEq (transf_function f); simpl. intros g TF EQ. inversion EQ.
-  constructor. eapply wt_transf_function; eauto.
-  congruence.
-  intros. inversion H. constructor. 
-Qed.
-
-Lemma program_typing_preserved:
-  forall (p: RTL.program) (tp: LTL.program),
-  transf_program p = OK tp ->
-  LTLtyping.wt_program tp.
-Proof.
-  intros; red; intros.
-  generalize (transform_partial_program_function transf_fundef p i f H H0).
-  intros [f0 [IN TRANSF]].
-  apply wt_transf_fundef with f0; auto.
-Qed.
diff --git a/backend/Asmgenproof0.v b/backend/Asmgenproof0.v
index 72de80a..f74fba8 100644
--- a/backend/Asmgenproof0.v
+++ b/backend/Asmgenproof0.v
@@ -80,29 +80,6 @@ Qed.
 
 Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
 
-Lemma nontemp_diff:
-  forall r r',
-  nontemp_preg r = true -> nontemp_preg r' = false -> r <> r'.
-Proof.
-  congruence.
-Qed.
-Hint Resolve nontemp_diff: asmgen.
-
-Lemma temporaries_temp_preg:
-  forall r, In r temporary_regs -> nontemp_preg (preg_of r) = false.
-Proof.
-  assert (List.forallb (fun r => negb (nontemp_preg (preg_of r))) temporary_regs = true) by reflexivity.
-  rewrite List.forallb_forall in H. intros. generalize (H r H0). 
-  destruct (nontemp_preg (preg_of r)); simpl; congruence.
-Qed.
-
-Lemma nontemp_data_preg:
-  forall r, nontemp_preg r = true -> data_preg r = true.
-Proof.
-  destruct r; try (destruct i); try (destruct f); simpl; congruence.
-Qed.
-Hint Resolve nontemp_data_preg: asmgen.
-
 Lemma nextinstr_pc:
   forall rs, (nextinstr rs)#PC = Val.add rs#PC Vone.
 Proof.
@@ -121,12 +98,6 @@ Proof.
   intros. apply nextinstr_inv. red; intro; subst; discriminate.
 Qed.
 
-Lemma nextinstr_inv2:
-  forall r rs, nontemp_preg r = true -> (nextinstr rs)#r = rs#r.
-Proof.
-  intros. apply nextinstr_inv1; auto with asmgen.
-Qed.
-
 Lemma nextinstr_set_preg:
   forall rs m v,
   (nextinstr (rs#(preg_of m) <- v))#PC = Val.add rs#PC Vone.
@@ -135,6 +106,58 @@ Proof.
   rewrite Pregmap.gso. auto. apply sym_not_eq. apply preg_of_not_PC. 
 Qed.
 
+Lemma undef_regs_other:
+  forall r rl rs, 
+  (forall r', In r' rl -> r <> r') ->
+  undef_regs rl rs r = rs r.
+Proof.
+  induction rl; simpl; intros. auto. 
+  rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+  match rl with
+  | nil => True
+  | r1 :: nil => r <> preg_of r1
+  | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
+  end.
+
+Remark preg_notin_charact:
+  forall r rl,
+  preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+  induction rl; simpl; intros.
+  tauto.
+  destruct rl.
+  simpl. split. intros. intuition congruence. auto. 
+  rewrite IHrl. split. 
+  intros [A B]. intros. destruct H. congruence. auto. 
+  auto.
+Qed.
+
+Lemma undef_regs_other_2:
+  forall r rl rs,
+  preg_notin r rl ->
+  undef_regs (map preg_of rl) rs r = rs r.
+Proof.
+  intros. apply undef_regs_other. intros. 
+  exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+  rewrite preg_notin_charact in H. auto.
+Qed.
+
+Lemma set_pregs_other_2:
+  forall r rl vl rs,
+  preg_notin r rl ->
+  set_regs (map preg_of rl) vl rs r = rs r.
+Proof.
+  induction rl; simpl; intros. 
+  auto.
+  destruct vl; auto.
+  assert (r <> preg_of a) by (destruct rl; tauto).
+  assert (preg_notin r rl) by (destruct rl; simpl; tauto).
+  rewrite IHrl by auto. apply Pregmap.gso; auto. 
+Qed.
+
 (** * Agreement between Mach registers and processor registers *)
 
 Record agree (ms: Mach.regset) (sp: val) (rs: Asm.regset) : Prop := mkagree {
@@ -224,7 +247,21 @@ Proof.
   intros. unfold nextinstr. apply agree_set_other. auto. auto.
 Qed.
 
-Lemma agree_undef_regs:
+Lemma agree_set_mregs:
+  forall sp rl vl vl' ms rs,
+  agree ms sp rs ->
+  Val.lessdef_list vl vl' ->
+  agree (Mach.set_regs rl vl ms) sp (set_regs (map preg_of rl) vl' rs).
+Proof.
+  induction rl; simpl; intros. 
+  auto.
+  inv H0. auto. apply IHrl; auto. 
+  eapply agree_set_mreg. eexact H. 
+  rewrite Pregmap.gss. auto.
+  intros. apply Pregmap.gso; auto. 
+Qed.
+
+Lemma agree_undef_nondata_regs:
   forall ms sp rl rs,
   agree ms sp rs ->
   (forall r, In r rl -> data_preg r = false) ->
@@ -237,31 +274,33 @@ Proof.
   intros. apply H0; auto.
 Qed.
 
-Lemma agree_exten_temps:
-  forall ms sp rs rs',
+Lemma agree_undef_regs:
+  forall ms sp rl rs rs',
   agree ms sp rs ->
-  (forall r, nontemp_preg r = true -> rs'#r = rs#r) ->
-  agree (undef_temps ms) sp rs'.
+  (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
+  agree (Mach.undef_regs rl ms) sp rs'.
 Proof.
   intros. destruct H. split; auto.
-  rewrite H0; auto. auto. 
-  intros. unfold undef_temps. 
-  destruct (In_dec mreg_eq r temporary_regs).
-  rewrite Mach.undef_regs_same; auto. 
-  rewrite Mach.undef_regs_other; auto. rewrite H0; auto.
-  simpl in n. destruct r; auto; intuition.
+  rewrite <- agree_sp0. apply H0; auto. 
+  rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP. 
+  intros. destruct (In_dec mreg_eq r rl).
+  rewrite Mach.undef_regs_same; auto.
+  rewrite Mach.undef_regs_other; auto. rewrite H0; auto. 
+  apply preg_of_data.
+  rewrite preg_notin_charact. intros; red; intros. elim n. 
+  exploit preg_of_injective; eauto. congruence.
 Qed.
 
 Lemma agree_set_undef_mreg:
-  forall ms sp rs r v rs',
+  forall ms sp rs r v rl rs',
   agree ms sp rs ->
   Val.lessdef v (rs'#(preg_of r)) ->
-  (forall r', nontemp_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') ->
-  agree (Regmap.set r v (undef_temps ms)) sp rs'.
+  (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') ->
+  agree (Regmap.set r v (Mach.undef_regs rl ms)) sp rs'.
 Proof.
   intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto.
-  eapply agree_exten_temps; eauto. 
-  intros. unfold Pregmap.set. destruct (PregEq.eq r0 (preg_of r)). 
+  apply agree_undef_regs with rs; auto. 
+  intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)). 
   congruence. auto. 
   intros. rewrite Pregmap.gso; auto. 
 Qed.
@@ -291,9 +330,7 @@ Proof.
   exploit Mem.loadv_extends; eauto. intros [v' [A B]].
   rewrite (sp_val _ _ _ H) in A.
   exists v'; split; auto.
-  destruct ty; econstructor.
-  reflexivity. assumption.
-  reflexivity. assumption.
+  econstructor. eauto. assumption. 
 Qed.
 
 Lemma extcall_args_match:
@@ -667,6 +704,7 @@ Ltac TailNoLabel :=
   match goal with
   | [ |- tail_nolabel _ (_ :: _) ] => apply tail_nolabel_cons; [auto; exact I | TailNoLabel]
   | [ H: Error _ = OK _ |- _ ] => discriminate
+  | [ H: assertion_failed = OK _ |- _ ] => discriminate
   | [ H: OK _ = OK _ |- _ ] => inv H; TailNoLabel
   | [ H: bind _ _ = OK _ |- _ ] => monadInv H;  TailNoLabel
   | [ H: (if ?x then _ else _) = OK _ |- _ ] => destruct x; TailNoLabel
diff --git a/backend/Bounds.v b/backend/Bounds.v
index ef78b2e..bcd2848 100644
--- a/backend/Bounds.v
+++ b/backend/Bounds.v
@@ -17,7 +17,6 @@ Require Import AST.
 Require Import Op.
 Require Import Locations.
 Require Import Linear.
-Require Import Lineartyping.
 Require Import Conventions.
 
 (** * Resource bounds for a function *)
@@ -30,14 +29,12 @@ Require Import Conventions.
   the activation record. *)
 
 Record bounds : Type := mkbounds {
-  bound_int_local: Z;
-  bound_float_local: Z;
+  bound_local: Z;
   bound_int_callee_save: Z;
   bound_float_callee_save: Z;
   bound_outgoing: Z;
   bound_stack_data: Z;
-  bound_int_local_pos: bound_int_local >= 0;
-  bound_float_local_pos: bound_float_local >= 0;
+  bound_local_pos: bound_local >= 0;
   bound_int_callee_save_pos: bound_int_callee_save >= 0;
   bound_float_callee_save_pos: bound_float_callee_save >= 0;
   bound_outgoing_pos: bound_outgoing >= 0;
@@ -47,41 +44,39 @@ Record bounds : Type := mkbounds {
 (** The following predicates define the correctness of a set of bounds
     for the code of a function. *)
 
-Section BELOW.
+Section WITHIN_BOUNDS.
 
-Variable funct: function.
 Variable b: bounds.
 
 Definition mreg_within_bounds (r: mreg) :=
-  match mreg_type r with
-  | Tint => index_int_callee_save r < bound_int_callee_save b
-  | Tfloat => index_float_callee_save r < bound_float_callee_save b
-  end.
-
-Definition slot_within_bounds (s: slot) :=
-  match s with
-  | Local ofs Tint => 0 <= ofs < bound_int_local b
-  | Local ofs Tfloat => 0 <= ofs < bound_float_local b
-  | Outgoing ofs ty => 0 <= ofs /\ ofs + typesize ty <= bound_outgoing b
-  | Incoming ofs ty => In (S s) (loc_parameters funct.(fn_sig))
+  index_int_callee_save r < bound_int_callee_save b
+  /\ index_float_callee_save r < bound_float_callee_save b.
+
+Definition slot_within_bounds (sl: slot) (ofs: Z) (ty: typ) :=
+  match sl with
+  | Local => ofs + typesize ty <= bound_local b
+  | Outgoing => ofs + typesize ty <= bound_outgoing b
+  | Incoming => True
   end.
 
 Definition instr_within_bounds (i: instruction) :=
   match i with
-  | Lgetstack s r => slot_within_bounds s /\ mreg_within_bounds r
-  | Lsetstack r s => slot_within_bounds s
+  | Lgetstack sl ofs ty r => slot_within_bounds sl ofs ty /\ mreg_within_bounds r
+  | Lsetstack r sl ofs ty => slot_within_bounds sl ofs ty
   | Lop op args res => mreg_within_bounds res
   | Lload chunk addr args dst => mreg_within_bounds dst
   | Lcall sig ros => size_arguments sig <= bound_outgoing b
-  | Lbuiltin ef args res => mreg_within_bounds res
-  | Lannot ef args => forall s, In (S s) args -> slot_within_bounds s
+  | Lbuiltin ef args res =>
+      forall r, In r res \/ In r (destroyed_by_builtin ef) -> mreg_within_bounds r
+  | Lannot ef args =>
+      forall sl ofs ty, In (S sl ofs ty) args -> slot_within_bounds sl ofs ty
   | _ => True
   end.
 
-End BELOW.
+End WITHIN_BOUNDS.
 
 Definition function_within_bounds (f: function) (b: bounds) : Prop :=
-  forall instr, In instr f.(fn_code) -> instr_within_bounds f b instr.
+  forall instr, In instr f.(fn_code) -> instr_within_bounds b instr.
 
 (** * Inference of resource bounds for a function *)
 
@@ -99,14 +94,14 @@ Variable f: function.
 
 Definition regs_of_instr (i: instruction) : list mreg :=
   match i with
-  | Lgetstack s r => r :: nil
-  | Lsetstack r s => r :: nil
+  | Lgetstack sl ofs ty r => r :: nil
+  | Lsetstack r sl ofs ty => r :: nil
   | Lop op args res => res :: nil
   | Lload chunk addr args dst => dst :: nil
   | Lstore chunk addr args src => nil
   | Lcall sig ros => nil
   | Ltailcall sig ros => nil
-  | Lbuiltin ef args res => res :: nil
+  | Lbuiltin ef args res => res ++ destroyed_by_builtin ef
   | Lannot ef args => nil
   | Llabel lbl => nil
   | Lgoto lbl => nil
@@ -115,58 +110,55 @@ Definition regs_of_instr (i: instruction) : list mreg :=
   | Lreturn => nil
   end.
 
-Fixpoint slots_of_locs (l: list loc) : list slot :=
+Fixpoint slots_of_locs (l: list loc) : list (slot * Z * typ) :=
   match l with
   | nil => nil
-  | S s :: l' => s :: slots_of_locs l'
+  | S sl ofs ty :: l' => (sl, ofs, ty) :: slots_of_locs l'
   | R r :: l' => slots_of_locs l'
   end.
 
-Definition slots_of_instr (i: instruction) : list slot :=
+Definition slots_of_instr (i: instruction) : list (slot * Z * typ) :=
   match i with
-  | Lgetstack s r => s :: nil
-  | Lsetstack r s => s :: nil
+  | Lgetstack sl ofs ty r => (sl, ofs, ty) :: nil
+  | Lsetstack r sl ofs ty => (sl, ofs, ty) :: nil
   | Lannot ef args => slots_of_locs args
   | _ => nil
   end.
 
-Definition max_over_list (A: Type) (valu: A -> Z) (l: list A) : Z :=
+Definition max_over_list {A: Type} (valu: A -> Z) (l: list A) : Z :=
   List.fold_left (fun m l => Zmax m (valu l)) l 0.
 
 Definition max_over_instrs (valu: instruction -> Z) : Z :=
-  max_over_list instruction valu f.(fn_code).
+  max_over_list valu f.(fn_code).
 
 Definition max_over_regs_of_instr (valu: mreg -> Z) (i: instruction) : Z :=
-  max_over_list mreg valu (regs_of_instr i).
+  max_over_list valu (regs_of_instr i).
 
-Definition max_over_slots_of_instr (valu: slot -> Z) (i: instruction) : Z :=
-  max_over_list slot valu (slots_of_instr i).
+Definition max_over_slots_of_instr (valu: slot * Z * typ -> Z) (i: instruction) : Z :=
+  max_over_list valu (slots_of_instr i).
 
 Definition max_over_regs_of_funct (valu: mreg -> Z) : Z :=
   max_over_instrs (max_over_regs_of_instr valu).
 
-Definition max_over_slots_of_funct (valu: slot -> Z) : Z :=
+Definition max_over_slots_of_funct (valu: slot * Z * typ -> Z) : Z :=
   max_over_instrs (max_over_slots_of_instr valu).
 
 Definition int_callee_save (r: mreg) := 1 + index_int_callee_save r.
 
 Definition float_callee_save (r: mreg) := 1 + index_float_callee_save r.
 
-Definition int_local (s: slot) :=
-  match s with Local ofs Tint => 1 + ofs | _ => 0 end.
-
-Definition float_local (s: slot) :=
-  match s with Local ofs Tfloat => 1 + ofs | _ => 0 end.
+Definition local_slot (s: slot * Z * typ) :=
+  match s with (Local, ofs, ty) => ofs + typesize ty | _ => 0 end.
 
-Definition outgoing_slot (s: slot) :=
-  match s with Outgoing ofs ty => ofs + typesize ty | _ => 0 end.
+Definition outgoing_slot (s: slot * Z * typ) :=
+  match s with (Outgoing, ofs, ty) => ofs + typesize ty | _ => 0 end.
 
 Definition outgoing_space (i: instruction) :=
   match i with Lcall sig _ => size_arguments sig | _ => 0 end.
 
 Lemma max_over_list_pos:
   forall (A: Type) (valu: A -> Z) (l: list A),
-  max_over_list A valu l >= 0.
+  max_over_list valu l >= 0.
 Proof.
   intros until valu. unfold max_over_list.
   assert (forall l z, fold_left (fun x y => Zmax x (valu y)) l z >= z).
@@ -176,7 +168,7 @@ Proof.
 Qed.
 
 Lemma max_over_slots_of_funct_pos:
-  forall (valu: slot -> Z), max_over_slots_of_funct valu >= 0.
+  forall (valu: slot * Z * typ -> Z), max_over_slots_of_funct valu >= 0.
 Proof.
   intros. unfold max_over_slots_of_funct.
   unfold max_over_instrs. apply max_over_list_pos.
@@ -188,18 +180,16 @@ Proof.
   intros. unfold max_over_regs_of_funct.
   unfold max_over_instrs. apply max_over_list_pos.
 Qed.
- 
+
 Program Definition function_bounds :=
   mkbounds
-    (max_over_slots_of_funct int_local)
-    (max_over_slots_of_funct float_local)
+    (max_over_slots_of_funct local_slot)
     (max_over_regs_of_funct int_callee_save)
     (max_over_regs_of_funct float_callee_save)
     (Zmax (max_over_instrs outgoing_space)
           (max_over_slots_of_funct outgoing_slot))
     (Zmax f.(fn_stacksize) 0)
-    (max_over_slots_of_funct_pos int_local)
-    (max_over_slots_of_funct_pos float_local)
+    (max_over_slots_of_funct_pos local_slot)
     (max_over_regs_of_funct_pos int_callee_save)
     (max_over_regs_of_funct_pos float_callee_save)
     _ _.
@@ -215,7 +205,7 @@ Qed.
 
 Lemma max_over_list_bound:
   forall (A: Type) (valu: A -> Z) (l: list A) (x: A),
-  In x l -> valu x <= max_over_list A valu l.
+  In x l -> valu x <= max_over_list valu l.
 Proof.
   intros until x. unfold max_over_list.
   assert (forall c z,
@@ -250,7 +240,7 @@ Proof.
 Qed.
 
 Lemma max_over_slots_of_funct_bound:
-  forall (valu: slot -> Z) i s,
+  forall (valu: slot * Z * typ -> Z) i s,
   In i f.(fn_code) -> In s (slots_of_instr i) ->
   valu s <= max_over_slots_of_funct valu.
 Proof.
@@ -282,34 +272,23 @@ Proof.
   eapply max_over_regs_of_funct_bound; eauto.
 Qed.
 
-Lemma int_local_slot_bound:
-  forall i ofs,
-  In i f.(fn_code) -> In (Local ofs Tint) (slots_of_instr i) ->
-  ofs < bound_int_local function_bounds.
-Proof.
-  intros. apply Zlt_le_trans with (int_local (Local ofs Tint)).
-  unfold int_local. omega.
-  unfold function_bounds, bound_int_local.
-  eapply max_over_slots_of_funct_bound; eauto.
-Qed.
-
-Lemma float_local_slot_bound:
-  forall i ofs,
-  In i f.(fn_code) -> In (Local ofs Tfloat) (slots_of_instr i) ->
-  ofs < bound_float_local function_bounds.
+Lemma local_slot_bound:
+  forall i ofs ty,
+  In i f.(fn_code) -> In (Local, ofs, ty) (slots_of_instr i) ->
+  ofs + typesize ty <= bound_local function_bounds.
 Proof.
-  intros. apply Zlt_le_trans with (float_local (Local ofs Tfloat)).
-  unfold float_local. omega.
-  unfold function_bounds, bound_float_local.
+  intros.
+  unfold function_bounds, bound_local.
+  change (ofs + typesize ty) with (local_slot (Local, ofs, ty)).
   eapply max_over_slots_of_funct_bound; eauto.
 Qed.
 
 Lemma outgoing_slot_bound:
   forall i ofs ty,
-  In i f.(fn_code) -> In (Outgoing ofs ty) (slots_of_instr i) ->
+  In i f.(fn_code) -> In (Outgoing, ofs, ty) (slots_of_instr i) ->
   ofs + typesize ty <= bound_outgoing function_bounds.
 Proof.
-  intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing ofs ty)).
+  intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing, ofs, ty)).
   unfold function_bounds, bound_outgoing.
   apply Zmax_bound_r. eapply max_over_slots_of_funct_bound; eauto.
 Qed.
@@ -332,28 +311,25 @@ Lemma mreg_is_within_bounds:
   forall r, In r (regs_of_instr i) ->
   mreg_within_bounds function_bounds r.
 Proof.
-  intros. unfold mreg_within_bounds. 
-  case (mreg_type r).
+  intros. unfold mreg_within_bounds. split.
   eapply int_callee_save_bound; eauto.
   eapply float_callee_save_bound; eauto.
 Qed.
 
 Lemma slot_is_within_bounds:
   forall i, In i f.(fn_code) -> 
-  forall s, In s (slots_of_instr i) -> Lineartyping.slot_valid f s ->
-  slot_within_bounds f function_bounds s.
+  forall sl ty ofs, In (sl, ofs, ty) (slots_of_instr i) ->
+  slot_within_bounds function_bounds sl ofs ty.
 Proof.
   intros. unfold slot_within_bounds. 
-  destruct s.
-  destruct t.
-  split. exact H1. eapply int_local_slot_bound; eauto.
-  split. exact H1. eapply float_local_slot_bound; eauto.
-  exact H1.
-  split. simpl in H1. exact H1. eapply outgoing_slot_bound; eauto.
+  destruct sl.
+  eapply local_slot_bound; eauto.
+  auto.
+  eapply outgoing_slot_bound; eauto.
 Qed.
 
 Lemma slots_of_locs_charact:
-  forall s l, In s (slots_of_locs l) <-> In (S s) l.
+  forall sl ofs ty l, In (sl, ofs, ty) (slots_of_locs l) <-> In (S sl ofs ty) l.
 Proof.
   induction l; simpl; intros. 
   tauto.
@@ -366,27 +342,21 @@ Qed.
 Lemma instr_is_within_bounds:
   forall i,
   In i f.(fn_code) ->
-  Lineartyping.wt_instr f i ->
-  instr_within_bounds f function_bounds i.
+  instr_within_bounds function_bounds i.
 Proof.
   intros; 
   destruct i;
   generalize (mreg_is_within_bounds _ H); generalize (slot_is_within_bounds _ H); 
   simpl; intros; auto.
-(* getstack *)
-  inv H0. split; auto.
-(* setstack *)
-  inv H0; auto.
 (* call *)
   eapply size_arguments_bound; eauto.
+(* builtin *)
+  apply H1. apply in_or_app; auto. 
 (* annot *)
-  inv H0. apply H1. rewrite slots_of_locs_charact; auto. 
-  generalize (H8 _ H3). unfold loc_acceptable, slot_valid. 
-  destruct s; (contradiction || omega).
+  apply H0. rewrite slots_of_locs_charact; auto. 
 Qed.
 
 Lemma function_is_within_bounds:
-  Lineartyping.wt_code f f.(fn_code) ->
   function_within_bounds f function_bounds.
 Proof.
   intros; red; intros. apply instr_is_within_bounds; auto.
diff --git a/backend/CMtypecheck.ml b/backend/CMtypecheck.ml
index 39e8c51..1a19f71 100644
--- a/backend/CMtypecheck.ml
+++ b/backend/CMtypecheck.ml
@@ -24,15 +24,16 @@ open Cminor
 
 exception Error of string
 
-let name_of_typ = function Tint -> "int" | Tfloat -> "float"
+let name_of_typ = function Tint -> "int" | Tfloat -> "float" | Tlong -> "long"
 
 type ty = Base of typ | Var of ty option ref
 
 let newvar () = Var (ref None)
 let tint = Base Tint
 let tfloat = Base Tfloat
+let tlong = Base Tlong
 
-let ty_of_typ = function Tint -> tint | Tfloat -> tfloat
+let ty_of_typ = function Tint -> tint | Tfloat -> tfloat | Tlong -> tlong
 
 let ty_of_sig_args tyl = List.map ty_of_typ tyl
 
@@ -81,6 +82,7 @@ let name_of_comparison = function
 let type_constant = function
   | Ointconst _ -> tint
   | Ofloatconst _ -> tfloat
+  | Olongconst _ -> tlong
   | Oaddrsymbol _ -> tint
   | Oaddrstack _ -> tint
 
@@ -98,6 +100,15 @@ let type_unary_operation = function
   | Ointuoffloat -> tfloat, tint
   | Ofloatofint -> tint, tfloat
   | Ofloatofintu -> tint, tfloat
+  | Onegl -> tlong, tlong
+  | Onotl -> tlong, tlong
+  | Ointoflong -> tlong, tint
+  | Olongofint -> tint, tlong
+  | Olongofintu -> tint, tlong
+  | Olongoffloat -> tfloat, tlong
+  | Olonguoffloat -> tfloat, tlong
+  | Ofloatoflong -> tlong, tfloat
+  | Ofloatoflongu -> tlong, tfloat
 
 let type_binary_operation = function
   | Oadd -> tint, tint, tint
@@ -117,52 +128,28 @@ let type_binary_operation = function
   | Osubf -> tfloat, tfloat, tfloat
   | Omulf -> tfloat, tfloat, tfloat
   | Odivf -> tfloat, tfloat, tfloat
+  | Oaddl -> tlong, tlong, tlong
+  | Osubl -> tlong, tlong, tlong
+  | Omull -> tlong, tlong, tlong
+  | Odivl -> tlong, tlong, tlong
+  | Odivlu -> tlong, tlong, tlong
+  | Omodl -> tlong, tlong, tlong
+  | Omodlu -> tlong, tlong, tlong
+  | Oandl -> tlong, tlong, tlong
+  | Oorl -> tlong, tlong, tlong
+  | Oxorl -> tlong, tlong, tlong
+  | Oshll -> tlong, tint, tlong
+  | Oshrl -> tlong, tint, tlong
+  | Oshrlu -> tlong, tint, tlong
   | Ocmp _ -> tint, tint, tint
   | Ocmpu _ -> tint, tint, tint
   | Ocmpf _ -> tfloat, tfloat, tint
+  | Ocmpl _ -> tlong, tlong, tint
+  | Ocmplu _ -> tlong, tlong, tint
 
-let name_of_constant = function
-  | Ointconst n -> sprintf "intconst %ld" (camlint_of_coqint n)
-  | Ofloatconst n -> sprintf "floatconst %g" (camlfloat_of_coqfloat n)
-  | Oaddrsymbol (s, ofs) -> sprintf "addrsymbol %s %ld" (extern_atom s) (camlint_of_coqint ofs)
-  | Oaddrstack n -> sprintf "addrstack %ld" (camlint_of_coqint n)
+let name_of_unary_operation = PrintCminor.name_of_unop
 
-let name_of_unary_operation = function
-  | Ocast8signed -> "cast8signed"
-  | Ocast16signed -> "cast16signed"
-  | Ocast8unsigned -> "cast8unsigned"
-  | Ocast16unsigned -> "cast16unsigned"
-  | Onegint -> "negint"
-  | Onotint -> "notint"
-  | Onegf -> "negf"
-  | Oabsf -> "absf"
-  | Osingleoffloat -> "singleoffloat"
-  | Ointoffloat -> "intoffloat"
-  | Ointuoffloat -> "intuoffloat"
-  | Ofloatofint -> "floatofint"
-  | Ofloatofintu -> "floatofintu"
-
-let name_of_binary_operation = function
-  | Oadd -> "add"
-  | Osub -> "sub"
-  | Omul -> "mul"
-  | Odiv -> "div"
-  | Odivu -> "divu"
-  | Omod -> "mod"
-  | Omodu -> "modu"
-  | Oand -> "and"
-  | Oor -> "or"
-  | Oxor -> "xor"
-  | Oshl -> "shl"
-  | Oshr -> "shr"
-  | Oshru -> "shru"
-  | Oaddf -> "addf"
-  | Osubf -> "subf"
-  | Omulf -> "mulf"
-  | Odivf -> "divf"
-  | Ocmp c -> sprintf "cmp %s" (name_of_comparison c)
-  | Ocmpu c -> sprintf "cmpu %s" (name_of_comparison c)
-  | Ocmpf c -> sprintf "cmpf %s" (name_of_comparison c)
+let name_of_binary_operation = PrintCminor.name_of_binop
 
 let type_chunk = function
   | Mint8signed -> tint
@@ -170,19 +157,12 @@ let type_chunk = function
   | Mint16signed -> tint
   | Mint16unsigned -> tint
   | Mint32 -> tint
+  | Mint64 -> tlong
   | Mfloat32 -> tfloat
   | Mfloat64 -> tfloat
   | Mfloat64al32 -> tfloat
 
-let name_of_chunk = function
-  | Mint8signed -> "int8signed"
-  | Mint8unsigned -> "int8unsigned"
-  | Mint16signed -> "int16signed"
-  | Mint16unsigned -> "int16unsigned"
-  | Mint32 -> "int32"
-  | Mfloat32 -> "float32"
-  | Mfloat64 -> "float64"
-  | Mfloat64al32 -> "float64al32"
+let name_of_chunk = PrintAST.name_of_chunk
 
 let rec type_expr env lenv e =
   match e with
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 8fc9407..1e269f8 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -996,12 +996,7 @@ Proof.
   rewrite <- RES. apply eval_operation_preserved. exact symbols_preserved.
   (* state matching *)
   econstructor; eauto.
-  apply wt_regset_assign; auto. 
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
-  simpl in H0. inv H0. rewrite <- H3. apply WTREGS.
-  replace (tyenv res) with (snd (type_of_operation op)).
-  eapply type_of_operation_sound; eauto.
-  rewrite <- H6. reflexivity.
+  eapply wt_exec_Iop; eauto. eapply wt_instrs; eauto.  
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H. 
   eapply add_op_satisfiable; eauto. eapply wf_analyze; eauto.
@@ -1028,8 +1023,7 @@ Proof.
   eapply exec_Iload; eauto.
   (* state matching *)
   econstructor; eauto.
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
-  apply wt_regset_assign. auto. rewrite H8. eapply type_of_chunk_correct; eauto.
+  eapply wt_exec_Iload; eauto. eapply wt_instrs; eauto.
   eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H. 
   eapply add_load_satisfiable; eauto. eapply wf_analyze; eauto.
diff --git a/backend/CleanupLabels.v b/backend/CleanupLabels.v
index 8db871e..5eaa81e 100644
--- a/backend/CleanupLabels.v
+++ b/backend/CleanupLabels.v
@@ -16,15 +16,15 @@
   control-flow graph.  Many of these labels are never branched to,
   which can complicate further optimizations over linearized code.
   (There are no such optimizations yet.)  In preparation for these
-  further optimizations, and to make the generated LTLin code 
+  further optimizations, and to make the generated Linear code 
   better-looking, the present pass removes labels that cannot be
   branched to. *)
 
-Require Import Coqlib.
-Require Import Ordered.
 Require Import FSets.
 Require FSetAVL.
-Require Import LTLin.
+Require Import Coqlib.
+Require Import Ordered.
+Require Import Linear.
 
 Module Labelset := FSetAVL.Make(OrderedPositive).
 
@@ -63,7 +63,6 @@ Definition cleanup_labels (c: code) :=
 Definition transf_function (f: function) : function :=
   mkfunction
      (fn_sig f)
-     (fn_params f)
      (fn_stacksize f)
      (cleanup_labels (fn_code f)).
 
diff --git a/backend/CleanupLabelsproof.v b/backend/CleanupLabelsproof.v
index 70f0eb3..65ba61c 100644
--- a/backend/CleanupLabelsproof.v
+++ b/backend/CleanupLabelsproof.v
@@ -24,7 +24,7 @@ Require Import Globalenvs.
 Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
-Require Import LTLin.
+Require Import Linear.
 Require Import CleanupLabels.
 
 Module LabelsetFacts := FSetFacts.Facts(Labelset).
@@ -103,7 +103,7 @@ Proof.
   intros; red; intros; destruct i; simpl; auto.
   apply Labelset.add_2; auto.
   apply Labelset.add_2; auto.
-  revert H; induction l0; simpl. auto. intros; apply Labelset.add_2; auto.
+  revert H; induction l; simpl. auto. intros; apply Labelset.add_2; auto.
 Qed.
 
 Remark add_label_branched_to_contains:
@@ -114,7 +114,7 @@ Proof.
   destruct i; simpl; intros; try contradiction.
   apply Labelset.add_1; auto.
   apply Labelset.add_1; auto.
-  revert H. induction l0; simpl; intros. 
+  revert H. induction l; simpl; intros. 
   contradiction.
   destruct H. apply Labelset.add_1; auto. apply Labelset.add_2; auto.
 Qed.
@@ -200,11 +200,11 @@ Qed.
 
 Inductive match_stackframes: stackframe -> stackframe -> Prop :=
   | match_stackframe_intro:
-      forall res f sp ls c,
+      forall f sp ls c,
       incl c f.(fn_code) ->
       match_stackframes
-        (Stackframe res f sp ls c)
-        (Stackframe res (transf_function f) sp ls 
+        (Stackframe f sp ls c)
+        (Stackframe (transf_function f) sp ls 
           (remove_unused_labels (labels_branched_to f.(fn_code)) c)).
 
 Inductive match_states: state -> state -> Prop :=
@@ -231,6 +231,14 @@ Definition measure (st: state) : nat :=
   | _ => O
   end.
 
+Lemma match_parent_locset:
+  forall s ts,
+  list_forall2 match_stackframes s ts ->
+  parent_locset ts = parent_locset s.
+Proof.
+  induction 1; simpl. auto. inv H; auto.
+Qed.
+
 Theorem transf_step_correct:
   forall s1 t s2, step ge s1 t s2 ->
   forall s1' (MS: match_states s1 s1'),
@@ -238,19 +246,27 @@ Theorem transf_step_correct:
   \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat.
 Proof.
   induction 1; intros; inv MS; try rewrite remove_unused_labels_cons. 
+(* Lgetstack *)
+  left; econstructor; split.
+  econstructor; eauto. 
+  econstructor; eauto with coqlib.
+(* Lsetstack *)
+  left; econstructor; split.
+  econstructor; eauto. 
+  econstructor; eauto with coqlib.
 (* Lop *)
   left; econstructor; split.
   econstructor; eauto. instantiate (1 := v). rewrite <- H.
   apply eval_operation_preserved. exact symbols_preserved.
   econstructor; eauto with coqlib.
 (* Lload *)
-  assert (eval_addressing tge sp addr (map rs args) = Some a).
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
   left; econstructor; split.
   econstructor; eauto. 
   econstructor; eauto with coqlib.
 (* Lstore *)
-  assert (eval_addressing tge sp addr (map rs args) = Some a).
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
   left; econstructor; split.
   econstructor; eauto.
@@ -262,13 +278,18 @@ Proof.
   econstructor; eauto. constructor; auto. constructor; eauto with coqlib.
 (* Ltailcall *)
   left; econstructor; split.
-  econstructor. eapply find_function_translated; eauto. 
+  econstructor. erewrite match_parent_locset; eauto. eapply find_function_translated; eauto. 
   symmetry; apply sig_function_translated.
   simpl. eauto. 
   econstructor; eauto.
 (* Lbuiltin *)
   left; econstructor; split.
-  econstructor; eauto. eapply external_call_symbols_preserved; eauto.
+  econstructor; eauto. eapply external_call_symbols_preserved'; eauto.
+  exact symbols_preserved.  exact varinfo_preserved.
+  econstructor; eauto with coqlib.
+(* Lannot *)
+  left; econstructor; split.
+  econstructor; eauto. eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved.  exact varinfo_preserved.
   econstructor; eauto with coqlib.
 (* Llabel *)
@@ -285,7 +306,7 @@ Proof.
   econstructor; eauto. eapply find_label_incl; eauto.
 (* Lcond taken *)
   left; econstructor; split.
-  econstructor. auto. eapply find_label_translated; eauto. red; auto. 
+  econstructor. auto. eauto. eapply find_label_translated; eauto. red; auto. 
   econstructor; eauto. eapply find_label_incl; eauto.
 (* Lcond not taken *)
   left; econstructor; split.
@@ -294,11 +315,12 @@ Proof.
 (* Ljumptable *)
   left; econstructor; split.
   econstructor. eauto. eauto. eapply find_label_translated; eauto. 
-  red. eapply list_nth_z_in; eauto. 
+  red. eapply list_nth_z_in; eauto. eauto. 
   econstructor; eauto. eapply find_label_incl; eauto.
 (* Lreturn *)
   left; econstructor; split.
   econstructor; eauto.
+  erewrite <- match_parent_locset; eauto.
   econstructor; eauto with coqlib.
 (* internal function *)
   left; econstructor; split.
@@ -306,7 +328,7 @@ Proof.
   econstructor; eauto with coqlib.
 (* external function *)
   left; econstructor; split.
-  econstructor; eauto. eapply external_call_symbols_preserved; eauto.
+  econstructor; eauto. eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved.  exact varinfo_preserved.
   econstructor; eauto with coqlib.
 (* return *)
@@ -333,11 +355,11 @@ Lemma transf_final_states:
   forall st1 st2 r, 
   match_states st1 st2 -> final_state st1 r -> final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv H4. constructor.
+  intros. inv H0. inv H. inv H6. econstructor; eauto.
 Qed.
 
 Theorem transf_program_correct:
-  forward_simulation (LTLin.semantics prog) (LTLin.semantics tprog).
+  forward_simulation (Linear.semantics prog) (Linear.semantics tprog).
 Proof.
   eapply forward_simulation_opt.
   eexact symbols_preserved.
diff --git a/backend/CleanupLabelstyping.v b/backend/CleanupLabelstyping.v
deleted file mode 100644
index 11b516f..0000000
--- a/backend/CleanupLabelstyping.v
+++ /dev/null
@@ -1,59 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Type preservation for the CleanupLabels pass *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import Op.
-Require Import Locations.
-Require Import LTLin.
-Require Import CleanupLabels.
-Require Import LTLintyping.
-
-Lemma in_remove_unused_labels:
-  forall bto i c, In i (remove_unused_labels bto c) -> In i c.
-Proof.
-  unfold remove_unused_labels, remove_unused. induction c; simpl.
-  auto.
-  rewrite list_fold_right_eq. destruct a; simpl; intuition.
-  destruct (Labelset.mem l bto); simpl in H; intuition.
-Qed.
-
-Lemma wt_transf_function:
-  forall f,
-  wt_function f ->
-  wt_function (transf_function f).
-Proof.
-  intros. inv H. constructor; simpl; auto.
-  unfold cleanup_labels; red; intros.
-  apply wt_instrs. eapply in_remove_unused_labels; eauto.
-Qed.
-
-Lemma wt_transf_fundef:
-  forall f,
-  wt_fundef f ->
-  wt_fundef (transf_fundef f). 
-Proof.
-  induction 1. constructor. constructor. apply wt_transf_function; auto.
-Qed.
-
-Lemma program_typing_preserved:
-  forall p,
-  wt_program p ->
-  wt_program (transf_program p).
-Proof.
-  intros; red; intros.
-  exploit transform_program_function; eauto. intros [f1 [A B]]. subst f.
-  apply wt_transf_fundef. eapply H; eauto.
-Qed.
diff --git a/backend/Cminor.v b/backend/Cminor.v
index 3d177e4..3963e76 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -36,6 +36,7 @@ Require Import Switch.
 Inductive constant : Type :=
   | Ointconst: int -> constant     (**r integer constant *)
   | Ofloatconst: float -> constant (**r floating-point constant *)
+  | Olongconst: int64 -> constant  (**r long integer constant *)
   | Oaddrsymbol: ident -> int -> constant (**r address of the symbol plus the offset *)
   | Oaddrstack: int -> constant.   (**r stack pointer plus the given offset *)
 
@@ -52,7 +53,16 @@ Inductive unary_operation : Type :=
   | Ointoffloat: unary_operation           (**r signed integer to float *)
   | Ointuoffloat: unary_operation          (**r unsigned integer to float *)
   | Ofloatofint: unary_operation           (**r float to signed integer *)
-  | Ofloatofintu: unary_operation.         (**r float to unsigned integer *)
+  | Ofloatofintu: unary_operation          (**r float to unsigned integer *)
+  | Onegl: unary_operation                 (**r long integer opposite *)
+  | Onotl: unary_operation                 (**r long bitwise complement *)
+  | Ointoflong: unary_operation            (**r long to int *)
+  | Olongofint: unary_operation            (**r signed int to long *)
+  | Olongofintu: unary_operation           (**r unsigned int to long *)
+  | Olongoffloat: unary_operation          (**r signed long to float *)
+  | Olonguoffloat: unary_operation         (**r unsigned long to float *)
+  | Ofloatoflong: unary_operation          (**r float to signed long *)
+  | Ofloatoflongu: unary_operation.        (**r float to unsigned long *)
 
 Inductive binary_operation : Type :=
   | Oadd: binary_operation                 (**r integer addition *)
@@ -62,19 +72,34 @@ Inductive binary_operation : Type :=
   | Odivu: binary_operation                (**r integer unsigned division *)
   | Omod: binary_operation                 (**r integer signed modulus *)
   | Omodu: binary_operation                (**r integer unsigned modulus *)
-  | Oand: binary_operation                 (**r bitwise ``and'' *)
-  | Oor: binary_operation                  (**r bitwise ``or'' *)
-  | Oxor: binary_operation                 (**r bitwise ``xor'' *)
-  | Oshl: binary_operation                 (**r left shift *)
-  | Oshr: binary_operation                 (**r right signed shift *)
-  | Oshru: binary_operation                (**r right unsigned shift *)
+  | Oand: binary_operation                 (**r integer bitwise ``and'' *)
+  | Oor: binary_operation                  (**r integer bitwise ``or'' *)
+  | Oxor: binary_operation                 (**r integer bitwise ``xor'' *)
+  | Oshl: binary_operation                 (**r integer left shift *)
+  | Oshr: binary_operation                 (**r integer right signed shift *)
+  | Oshru: binary_operation                (**r integer right unsigned shift *)
   | Oaddf: binary_operation                (**r float addition *)
   | Osubf: binary_operation                (**r float subtraction *)
   | Omulf: binary_operation                (**r float multiplication *)
   | Odivf: binary_operation                (**r float division *)
+  | Oaddl: binary_operation                (**r long addition *)
+  | Osubl: binary_operation                (**r long subtraction *)
+  | Omull: binary_operation                (**r long multiplication *)
+  | Odivl: binary_operation                (**r long signed division *)
+  | Odivlu: binary_operation               (**r long unsigned division *)
+  | Omodl: binary_operation                (**r long signed modulus *)
+  | Omodlu: binary_operation               (**r long unsigned modulus *)
+  | Oandl: binary_operation                (**r long bitwise ``and'' *)
+  | Oorl: binary_operation                 (**r long bitwise ``or'' *)
+  | Oxorl: binary_operation                (**r long bitwise ``xor'' *)
+  | Oshll: binary_operation                (**r long left shift *)
+  | Oshrl: binary_operation                (**r long right signed shift *)
+  | Oshrlu: binary_operation               (**r long right unsigned shift *)
   | Ocmp: comparison -> binary_operation   (**r integer signed comparison *)
   | Ocmpu: comparison -> binary_operation  (**r integer unsigned comparison *)
-  | Ocmpf: comparison -> binary_operation. (**r float comparison *)
+  | Ocmpf: comparison -> binary_operation  (**r float comparison *)
+  | Ocmpl: comparison -> binary_operation  (**r long signed comparison *)
+  | Ocmplu: comparison -> binary_operation. (**r long unsigned comparison *)
 
 (** Expressions include reading local variables, constants and
   arithmetic operations, reading store locations, and conditional
@@ -214,6 +239,7 @@ Definition eval_constant (sp: val) (cst: constant) : option val :=
   match cst with
   | Ointconst n => Some (Vint n)
   | Ofloatconst n => Some (Vfloat n)
+  | Olongconst n => Some (Vlong n)
   | Oaddrsymbol s ofs =>
       Some(match Genv.find_symbol ge s with
            | None => Vundef
@@ -236,6 +262,15 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
   | Ointuoffloat => Val.intuoffloat arg
   | Ofloatofint => Val.floatofint arg
   | Ofloatofintu => Val.floatofintu arg
+  | Onegl => Some (Val.negl arg)
+  | Onotl => Some (Val.notl arg)
+  | Ointoflong => Some (Val.loword arg)
+  | Olongofint => Some (Val.longofint arg)
+  | Olongofintu => Some (Val.longofintu arg)
+  | Olongoffloat => Val.longoffloat arg
+  | Olonguoffloat => Val.longuoffloat arg
+  | Ofloatoflong => Val.floatoflong arg
+  | Ofloatoflongu => Val.floatoflongu arg
   end.
 
 Definition eval_binop
@@ -258,9 +293,24 @@ Definition eval_binop
   | Osubf => Some (Val.subf arg1 arg2)
   | Omulf => Some (Val.mulf arg1 arg2)
   | Odivf => Some (Val.divf arg1 arg2)
+  | Oaddl => Some (Val.addl arg1 arg2)
+  | Osubl => Some (Val.subl arg1 arg2)
+  | Omull => Some (Val.mull arg1 arg2)
+  | Odivl => Val.divls arg1 arg2
+  | Odivlu => Val.divlu arg1 arg2
+  | Omodl => Val.modls arg1 arg2
+  | Omodlu => Val.modlu arg1 arg2
+  | Oandl => Some (Val.andl arg1 arg2)
+  | Oorl => Some (Val.orl arg1 arg2)
+  | Oxorl => Some (Val.xorl arg1 arg2)
+  | Oshll => Some (Val.shll arg1 arg2)
+  | Oshrl => Some (Val.shrl arg1 arg2)
+  | Oshrlu => Some (Val.shrlu arg1 arg2)
   | Ocmp c => Some (Val.cmp c arg1 arg2)
   | Ocmpu c => Some (Val.cmpu (Mem.valid_pointer m) c arg1 arg2)
   | Ocmpf c => Some (Val.cmpf c arg1 arg2)
+  | Ocmpl c => Some (Val.cmpl c arg1 arg2)
+  | Ocmplu c => Some (Val.cmplu c arg1 arg2)
   end.
 
 (** Evaluation of an expression: [eval_expr ge sp e m a v]
diff --git a/backend/CminorSel.v b/backend/CminorSel.v
index b5a0d39..3538dda 100644
--- a/backend/CminorSel.v
+++ b/backend/CminorSel.v
@@ -43,6 +43,8 @@ Inductive expr : Type :=
   | Econdition : condition -> exprlist -> expr -> expr -> expr
   | Elet : expr -> expr -> expr
   | Eletvar : nat -> expr
+  | Ebuiltin : external_function -> exprlist -> expr
+  | Eexternal : ident -> signature -> exprlist -> expr
 
 with exprlist : Type :=
   | Enil: exprlist
@@ -171,6 +173,17 @@ Inductive eval_expr: letenv -> expr -> val -> Prop :=
   | eval_Eletvar: forall le n v,
       nth_error le n = Some v ->
       eval_expr le (Eletvar n) v
+  | eval_Ebuiltin: forall le ef al vl v,
+      eval_exprlist le al vl ->
+      external_call ef ge vl m E0 v m ->
+      eval_expr le (Ebuiltin ef al) v
+  | eval_Eexternal: forall le id sg al b ef vl v,
+      Genv.find_symbol ge id = Some b ->
+      Genv.find_funct_ptr ge b = Some (External ef) ->
+      ef_sig ef = sg ->
+      eval_exprlist le al vl ->
+      external_call ef ge vl m E0 v m ->
+      eval_expr le (Eexternal id sg al) v
 
 with eval_exprlist: letenv -> exprlist -> list val -> Prop :=
   | eval_Enil: forall le,
@@ -388,6 +401,8 @@ Fixpoint lift_expr (p: nat) (a: expr) {struct a}: expr :=
   | Elet b c => Elet (lift_expr p b) (lift_expr (S p) c)
   | Eletvar n =>
       if le_gt_dec p n then Eletvar (S n) else Eletvar n
+  | Ebuiltin ef bl => Ebuiltin ef (lift_exprlist p bl)
+  | Eexternal id sg bl => Eexternal id sg (lift_exprlist p bl)
   end
 
 with lift_exprlist (p: nat) (a: exprlist) {struct a}: exprlist :=
diff --git a/backend/Constprop.v b/backend/Constprop.v
index f85405d..0575079 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -47,6 +47,7 @@ Module Approx <: SEMILATTICE_WITH_TOP.
     decide equality.
     apply Int.eq_dec.
     apply Float.eq_dec.
+    apply Int64.eq_dec.
     apply Int.eq_dec.
     apply ident_eq.
     apply Int.eq_dec.
@@ -139,6 +140,10 @@ Fixpoint eval_load_init (chunk: memory_chunk) (pos: Z) (il: list init_data): app
       if zeq pos 0 
       then match chunk with Mint32 => I n | _ => Unknown end
       else eval_load_init chunk (pos - 4) il'
+  | Init_int64 n :: il' =>
+      if zeq pos 0 
+      then match chunk with Mint64 => L n | _ => Unknown end
+      else eval_load_init chunk (pos - 8) il'
   | Init_float32 n :: il' =>
       if zeq pos 0
       then match chunk with 
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index 580d551..a6385f4 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -186,6 +186,11 @@ Proof.
   destruct chunk; simpl; auto.
   congruence.
   eapply IHil; eauto. omega.
+  (* Init_int64 *)
+  destruct H. destruct (zeq pos 0). subst. rewrite Zplus_0_r in H0.
+  destruct chunk; simpl; auto.
+  congruence.
+  eapply IHil; eauto. omega.
   (* Init_float32 *)
   destruct H. destruct (zeq pos 0). subst.  rewrite Zplus_0_r in H0.
   destruct chunk; simpl; auto. destruct (propagate_float_constants tt); simpl; auto.
diff --git a/backend/Conventions.v b/backend/Conventions.v
index c11bf47..abfe4ee 100644
--- a/backend/Conventions.v
+++ b/backend/Conventions.v
@@ -22,110 +22,6 @@ Require Export Conventions1.
     [arch/abi/Conventions1.v].  This file adds various processor-independent
     definitions and lemmas. *)
 
-(** * Acceptable locations for register allocation *)
-
-(** The following predicate describes the locations that can be assigned
-  to an RTL pseudo-register during register allocation: a non-temporary
-  machine register or a [Local] stack slot are acceptable. *)
-
-Definition loc_acceptable (l: loc) : Prop :=
-  match l with
-  | R r => ~(In l temporaries)
-  | S (Local ofs ty) => ofs >= 0
-  | S (Incoming _ _) => False
-  | S (Outgoing _ _) => False
-  end.
-
-Definition locs_acceptable (ll: list loc) : Prop :=
-  forall l, In l ll -> loc_acceptable l.
-
-Lemma temporaries_not_acceptable:
-  forall l, loc_acceptable l -> Loc.notin l temporaries.
-Proof.
-  unfold loc_acceptable; destruct l.
-  simpl. intuition congruence.
-  destruct s; try contradiction. 
-  intro. simpl. tauto.
-Qed.
-Hint Resolve temporaries_not_acceptable: locs.
-
-Lemma locs_acceptable_disj_temporaries:
-  forall ll, locs_acceptable ll -> Loc.disjoint ll temporaries.
-Proof.
-  intros. apply Loc.notin_disjoint. intros.
-  apply temporaries_not_acceptable. auto. 
-Qed.
-
-Lemma loc_acceptable_noteq_diff:
-  forall l1 l2,
-  loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2.
-Proof.
-  unfold loc_acceptable, Loc.diff; destruct l1; destruct l2;
-  try (destruct s); try (destruct s0); intros; auto; try congruence.
-  case (zeq z z0); intro. 
-  compare t t0; intro.
-  subst z0; subst t0; tauto.
-  tauto. tauto.
-  contradiction. contradiction.
-Qed.
-
-Lemma loc_acceptable_notin_notin:
-  forall r ll,
-  loc_acceptable r ->
-  ~(In r ll) -> Loc.notin r ll.
-Proof.
-  induction ll; simpl; intros.
-  auto.
-  split. apply loc_acceptable_noteq_diff. assumption. 
-  apply sym_not_equal. tauto. 
-  apply IHll. assumption. tauto. 
-Qed.
-
-(** * Additional properties of result and argument locations *)
-
-(** The result location is not a callee-save register. *)
-
-Lemma loc_result_not_callee_save:
-  forall (s: signature),
-  ~(In (loc_result s) int_callee_save_regs \/ In (loc_result s) float_callee_save_regs).
-Proof.
-  intros. generalize (loc_result_caller_save s).
-  generalize (int_callee_save_not_destroyed (loc_result s)).
-  generalize (float_callee_save_not_destroyed (loc_result s)).
-  tauto.
-Qed.
-
-(** Callee-save registers do not overlap with argument locations. *)
-
-Lemma arguments_not_preserved:
-  forall sig l,
-  Loc.notin l destroyed_at_call -> loc_acceptable l ->
-  Loc.notin l (loc_arguments sig).
-Proof.
-  intros. destruct l; red in H0. 
-  apply Loc.reg_notin. red; intros. 
-  exploit Loc.notin_not_in; eauto. eapply arguments_caller_save; eauto.
-  destruct s; try contradiction.
-  unfold loc_arguments. apply loc_arguments_rec_notin_local. 
-Qed.
-
-(** There is no partial overlap between arguments and acceptable locations. *)
-
-Lemma no_overlap_arguments:
-  forall args sg,
-  locs_acceptable args ->
-  Loc.no_overlap args (loc_arguments sg).
-Proof.
-  unfold Loc.no_overlap; intros.
-  generalize (H r H0).
-  generalize (loc_arguments_acceptable _ _ H1).
-  destruct s; destruct r; simpl.
-  intros. case (mreg_eq m0 m); intro. left; congruence. tauto.
-  intros. right; destruct s; auto.
-  intros. right. auto.
-  destruct s; try tauto. destruct s0; tauto.
-Qed.
-
 (** ** Location of function parameters *)
 
 (** A function finds the values of its parameter in the same locations
@@ -135,86 +31,39 @@ Qed.
 
 Definition parameter_of_argument (l: loc) : loc :=
   match l with
-  | S (Outgoing n ty) => S (Incoming n ty)
+  | S Outgoing n ty => S Incoming n ty
   | _ => l
   end.
 
 Definition loc_parameters (s: signature) :=
   List.map parameter_of_argument (loc_arguments s).
 
-Lemma loc_parameters_type:
-  forall sig, List.map Loc.type (loc_parameters sig) = sig.(sig_args).
-Proof.
-  intros. unfold loc_parameters.
-  rewrite list_map_compose. 
-  rewrite <- loc_arguments_type.
-  apply list_map_exten.
-  intros. destruct x; simpl. auto. 
-  destruct s; reflexivity.
-Qed.
-
-Lemma loc_parameters_length:
-  forall sg, List.length (loc_parameters sg) = List.length sg.(sig_args).
-Proof.
-  intros. unfold loc_parameters. rewrite list_length_map. 
-  apply loc_arguments_length.
-Qed.
-
-Lemma loc_parameters_not_temporaries:
-  forall sig, Loc.disjoint (loc_parameters sig) temporaries.
-Proof.
-  intro; red; intros.
-  unfold loc_parameters in H. 
-  elim (list_in_map_inv _ _ _ H). intros y [EQ IN].
-  generalize (loc_arguments_not_temporaries sig y x2 IN H0).
-  subst x1. destruct x2.
-  destruct y; simpl. auto. destruct s; auto.
-  byContradiction. generalize H0. simpl. NotOrEq.
-Qed.
-
-Lemma no_overlap_parameters:
-  forall params sg,
-  locs_acceptable params ->
-  Loc.no_overlap (loc_parameters sg) params.
-Proof.
-  unfold Loc.no_overlap; intros.
-  unfold loc_parameters in H0. 
-  elim (list_in_map_inv _ _ _ H0). intros t [EQ IN].
-  rewrite EQ. 
-  generalize (loc_arguments_acceptable _ _ IN).
-  generalize (H s H1).
-  destruct s; destruct t; simpl.
-  intros. case (mreg_eq m0 m); intro. left; congruence. tauto.
-  intros. right; destruct s; simpl; auto.
-  intros; right; auto.
-  destruct s; try tauto. destruct s0; try tauto. 
-  intros; simpl. tauto.
-Qed.
-
 Lemma incoming_slot_in_parameters:
   forall ofs ty sg,
-  In (S (Incoming ofs ty)) (loc_parameters sg) ->
-  In (S (Outgoing ofs ty)) (loc_arguments sg).
+  In (S Incoming ofs ty) (loc_parameters sg) ->
+  In (S Outgoing ofs ty) (loc_arguments sg).
 Proof.
   intros.
   unfold loc_parameters in H. 
-  change (S (Incoming ofs ty)) with (parameter_of_argument (S (Outgoing ofs ty))) in H.
+  change (S Incoming ofs ty) with (parameter_of_argument (S Outgoing ofs ty)) in H.
   exploit list_in_map_inv. eexact H. intros [x [A B]]. simpl in A.
   exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable; intros.
   destruct x; simpl in A; try discriminate.
-  destruct s; try contradiction. 
+  destruct sl; try contradiction. 
   inv A. auto.
 Qed. 
 
-
 (** * Tail calls *)
 
 (** A tail-call is possible for a signature if the corresponding
     arguments are all passed in registers. *)
 
+(** A tail-call is possible for a signature if the corresponding
+    arguments are all passed in registers. *)
+
 Definition tailcall_possible (s: signature) : Prop :=
   forall l, In l (loc_arguments s) ->
-  match l with R _ => True | S _ => False end.
+  match l with R _ => True | S _ _ _ => False end.
 
 (** Decide whether a tailcall is possible. *)
 
@@ -223,7 +72,7 @@ Definition tailcall_is_possible (sg: signature) : bool :=
     match l with
     | nil => true
     | R _ :: l' => tcisp l'
-    | S _ :: l' => false
+    | S _ _ _ :: l' => false
     end
   in tcisp (loc_arguments sg).
 
@@ -237,31 +86,12 @@ Proof.
   destruct H0. subst l0. auto. apply IHl. auto. auto. discriminate.
 Qed.
 
-(** * Counting temporaries *)
-
-(** Given a list [tys] of types representing arguments to an operator,
-  [arity_ok tys] returns [true] if there are enough temporaries to
-  reload all arguments into temporaries. *)
-
-Fixpoint arity_ok_rec (tys: list typ) (itmps ftmps: list mreg)
-                      {struct tys} : bool :=
-  match tys with
-  | nil => true
-  | Tint :: ts =>
-      match itmps with
-      | nil => false
-      | it1 :: its => arity_ok_rec ts its ftmps
-      end
-  | Tfloat :: ts =>
-      match ftmps with
-      | nil => false
-      | ft1 :: fts => arity_ok_rec ts itmps fts
-      end
-  end.
-
-Definition arity_ok (tys: list typ) :=
-  arity_ok_rec tys int_temporaries float_temporaries.
-
-
-
-
+Lemma zero_size_arguments_tailcall_possible:
+  forall sg, size_arguments sg = 0 -> tailcall_possible sg.
+Proof.
+  intros; red; intros. exploit loc_arguments_acceptable; eauto. 
+  unfold loc_argument_acceptable. 
+  destruct l; intros. auto. destruct sl; try contradiction. destruct H1. 
+  generalize (loc_arguments_bounded _ _ _ H0).
+  generalize (typesize_pos ty). omega. 
+Qed.
diff --git a/backend/IRC.ml b/backend/IRC.ml
new file mode 100644
index 0000000..573c3d7
--- /dev/null
+++ b/backend/IRC.ml
@@ -0,0 +1,894 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+open Printf
+open Camlcoq
+open Datatypes
+open AST
+open Registers
+open Machregs
+open Locations
+open Conventions1
+open Conventions
+open XTL
+
+(* Iterated Register Coalescing: George and Appel's graph coloring algorithm *)
+
+type var_stats = {
+  mutable cost: int;             (* estimated cost of a spill *)
+  mutable usedefs: int           (* number of uses and defs *)
+}
+
+(* Representation of the interference graph.  Each node of the graph
+   (i.e. each variable) is represented as follows. *)
+
+type node =
+  { ident: int;                         (*r unique identifier *)
+    typ: typ;                           (*r its type *)
+    var: var;                           (*r the XTL variable it comes from *)
+    regclass: int;                      (*r identifier of register class *)
+    mutable accesses: int;              (*r number of defs and uses *)
+    mutable spillcost: float;           (*r estimated cost of spilling *)
+    mutable adjlist: node list;         (*r all nodes it interferes with *)
+    mutable degree: int;                (*r number of adjacent nodes *)
+    mutable movelist: move list;        (*r list of moves it is involved in *)
+    mutable extra_adj: node list;       (*r extra interferences (see below) *)
+    mutable extra_pref: move list;      (*r extra preferences (see below) *)
+    mutable alias: node option;         (*r [Some n] if coalesced with [n] *)
+    mutable color: loc option;          (*r chosen color *)
+    mutable nstate: nodestate;          (*r in which set of nodes it is *)
+    mutable nprev: node;                (*r for double linking *)
+    mutable nnext: node                 (*r for double linking *)
+  }
+
+(* These are the possible states for nodes. *)
+
+and nodestate =
+  | Colored
+  | Initial
+  | SimplifyWorklist
+  | FreezeWorklist
+  | SpillWorklist
+  | CoalescedNodes
+  | SelectStack
+
+(* Each move (i.e. wish to be put in the same location) is represented
+   as follows. *)
+
+and move =
+  { src: node;                          (*r source of the move *)
+    dst: node;                          (*r destination of the move *)
+    mutable mstate: movestate;          (*r in which set of moves it is *)
+    mutable mprev: move;                (*r for double linking *)
+    mutable mnext: move                 (*r for double linking *)
+  }
+
+(* These are the possible states for moves *)
+
+and movestate =
+  | CoalescedMoves
+  | ConstrainedMoves
+  | FrozenMoves
+  | WorklistMoves
+  | ActiveMoves
+
+(* Note on "precolored" nodes and how they are handled:
+
+The register allocator can express interferences and preferences between
+any two values of type [var]: either pseudoregisters, to be colored by IRC,
+or fixed, "precolored" locations.
+
+I and P between two pseudoregisters are recorded in the graph that IRC
+modifies, via the [adjlist] and [movelist] fields.
+
+I and P between a pseudoregister and a machine register are also
+recorded in the IRC graph, but only in the [adjlist] and [movelist]
+fields of the pseudoregister.  This is the special case described
+in George and Appel's papers.
+
+I and P between a pseudoregister and a stack slot
+are omitted from the IRC graph, as they contribute nothing to the
+simplification and coalescing process.  We record them in the
+[extra_adj] and [extra_pref] fields, where they can be honored
+after IRC elimination, when assigning a stack slot to a spilled variable. *)
+
+let name_of_loc = function
+  | R r ->
+      begin match Machregsaux.name_of_register r with
+                | None -> "fixed-reg"
+                | Some s -> s
+      end
+  | S (Local, ofs, ty) ->
+      sprintf "L%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs)
+  | S (Incoming, ofs, ty) ->
+      sprintf "I%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs)
+  | S (Outgoing, ofs, ty) ->
+      sprintf "O%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs)
+
+let name_of_node n =
+  match n.var with
+  | V(r, ty) -> sprintf "x%ld" (P.to_int32 r)
+  | L l -> name_of_loc l
+
+(* The algorithm manipulates partitions of the nodes and of the moves
+   according to their states, frequently moving a node or a move from
+   a state to another, and frequently enumerating all nodes or all moves
+   of a given state.  To support these operations efficiently,
+   nodes or moves having the same state are put into imperative doubly-linked
+   lists, allowing for constant-time insertion and removal, and linear-time
+   scanning.  We now define the operations over these doubly-linked lists. *)
+
+module DLinkNode = struct
+  type t = node
+  let make state =
+    let rec empty =
+      { ident = 0; typ = Tint; var = V(P.one, Tint); regclass = 0;
+        adjlist = []; degree = 0; accesses = 0; spillcost = 0.0;
+        movelist = []; extra_adj = []; extra_pref = [];
+        alias = None; color = None;
+        nstate = state; nprev = empty; nnext = empty }
+    in empty
+  let dummy = make Colored
+  let clear dl = dl.nnext <- dl; dl.nprev <- dl
+  let notempty dl = dl.nnext != dl
+  let insert n dl =
+    n.nstate <- dl.nstate;
+    n.nnext <- dl.nnext; n.nprev <- dl;
+    dl.nnext.nprev <- n; dl.nnext <- n
+  let remove n dl =
+    assert (n.nstate = dl.nstate);
+    n.nnext.nprev <- n.nprev; n.nprev.nnext <- n.nnext
+  let move n dl1 dl2 =
+    remove n dl1; insert n dl2
+  let pick dl =
+    let n = dl.nnext in remove n dl; n
+  let iter f dl =
+    let rec iter n = if n != dl then (f n; iter n.nnext)
+    in iter dl.nnext
+  let fold f dl accu =
+    let rec fold n accu = if n == dl then accu else fold n.nnext (f n accu)
+    in fold dl.nnext accu
+end
+
+module DLinkMove = struct
+  type t = move
+  let make state =
+    let rec empty =
+      { src = DLinkNode.dummy; dst = DLinkNode.dummy; 
+        mstate = state; mprev = empty; mnext = empty }
+    in empty
+  let dummy = make CoalescedMoves
+  let clear dl = dl.mnext <- dl; dl.mprev <- dl
+  let notempty dl = dl.mnext != dl
+  let insert m dl =
+    m.mstate <- dl.mstate;
+    m.mnext <- dl.mnext; m.mprev <- dl;
+    dl.mnext.mprev <- m; dl.mnext <- m
+  let remove m dl =
+    assert (m.mstate = dl.mstate);
+    m.mnext.mprev <- m.mprev; m.mprev.mnext <- m.mnext
+  let move m dl1 dl2 =
+    remove m dl1; insert m dl2
+  let pick dl =
+    let m = dl.mnext in remove m dl; m
+  let iter f dl =
+    let rec iter m = if m != dl then (f m; iter m.mnext)
+    in iter dl.mnext
+  let fold f dl accu =
+    let rec fold m accu = if m == dl then accu else fold m.mnext (f m accu)
+    in fold dl.mnext accu
+end
+
+(* Auxiliary data structures *)
+
+module IntSet = Set.Make(struct
+  type t = int
+  let compare (x:int) (y:int) = compare x y
+end)
+
+module IntPairSet = Set.Make(struct
+  type t = int * int
+  let compare ((x1, y1): (int * int)) (x2, y2) =
+    if x1 < x2 then -1 else
+    if x1 > x2 then 1 else
+    if y1 < y2 then -1 else
+    if y1 > y2 then 1 else
+    0
+  end)
+
+(* The global state of the algorithm *)
+
+type graph = {
+  (* Machine registers available for allocation *)
+  caller_save_registers: mreg array array;
+  callee_save_registers: mreg array array;
+  num_available_registers: int array;
+  start_points: int array;
+  allocatable_registers: mreg list;
+  (* Costs for pseudo-registers *)
+  stats_of_reg: reg -> var_stats;
+  (* Mapping from XTL variables to nodes *)
+  varTable: (var, node) Hashtbl.t;
+  mutable nextIdent: int;
+  (* The adjacency set  *)
+  mutable adjSet: IntPairSet.t;
+  (* Low-degree, non-move-related nodes *)
+  simplifyWorklist: DLinkNode.t;
+  (* Low-degree, move-related nodes *)
+  freezeWorklist: DLinkNode.t;
+  (* High-degree nodes *)
+  spillWorklist: DLinkNode.t;
+  (* Nodes that have been coalesced *)
+  coalescedNodes: DLinkNode.t;
+  (* Moves that have been coalesced *)
+  coalescedMoves: DLinkMove.t;
+  (* Moves whose source and destination interfere *)
+  constrainedMoves: DLinkMove.t;
+  (* Moves that will no longer be considered for coalescing *)
+  frozenMoves: DLinkMove.t;
+  (* Moves enabled for possible coalescing *)
+  worklistMoves: DLinkMove.t;
+  (* Moves not yet ready for coalescing *)
+  activeMoves: DLinkMove.t
+}
+
+(* Register classes and reserved registers *)
+
+let num_register_classes = 2
+
+let class_of_type = function Tint -> 0 | Tfloat -> 1 | Tlong -> assert false
+
+let reserved_registers = ref ([]: mreg list)
+
+let rec remove_reserved = function
+  | [] -> []
+  | hd :: tl ->
+      if List.mem hd !reserved_registers
+      then remove_reserved tl
+      else hd :: remove_reserved tl
+
+(* Initialize and return an empty graph *)
+
+let init costs =
+  let int_caller_save = remove_reserved int_caller_save_regs
+  and float_caller_save = remove_reserved float_caller_save_regs
+  and int_callee_save = remove_reserved int_callee_save_regs
+  and float_callee_save = remove_reserved float_callee_save_regs in
+  {
+    caller_save_registers =
+      [| Array.of_list int_caller_save; Array.of_list float_caller_save |];
+    callee_save_registers =
+      [| Array.of_list int_callee_save; Array.of_list float_callee_save |];
+    num_available_registers =
+      [| List.length int_caller_save + List.length int_callee_save;
+         List.length float_caller_save + List.length float_callee_save |];
+    start_points =
+      [| 0; 0 |];
+    allocatable_registers =
+      int_caller_save @ int_callee_save @ float_caller_save @ float_callee_save;
+    stats_of_reg = costs;
+    varTable = Hashtbl.create 253;
+    nextIdent = 0;
+    adjSet = IntPairSet.empty;
+    simplifyWorklist = DLinkNode.make SimplifyWorklist;
+    freezeWorklist = DLinkNode.make FreezeWorklist;
+    spillWorklist = DLinkNode.make SpillWorklist;
+    coalescedNodes = DLinkNode.make CoalescedNodes;
+    coalescedMoves = DLinkMove.make CoalescedMoves;
+    constrainedMoves = DLinkMove.make ConstrainedMoves;
+    frozenMoves = DLinkMove.make FrozenMoves;
+    worklistMoves = DLinkMove.make WorklistMoves;
+    activeMoves = DLinkMove.make ActiveMoves
+  }
+  
+(* Create nodes corresponding to XTL variables *)
+
+let weightedSpillCost st =
+  if st.cost < max_int
+  then float_of_int st.cost
+  else infinity
+
+let newNodeOfReg g r ty =
+  let st = g.stats_of_reg r in
+  g.nextIdent <- g.nextIdent + 1;
+  { ident = g.nextIdent; typ = ty; 
+    var = V(r, ty); regclass = class_of_type ty;
+    accesses = st.usedefs;
+    spillcost = weightedSpillCost st;
+    adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = [];
+    alias = None;
+    color = None;
+    nstate = Initial;
+    nprev = DLinkNode.dummy; nnext = DLinkNode.dummy }
+
+let newNodeOfLoc g l =
+  let ty = Loc.coq_type l in
+  g.nextIdent <- g.nextIdent + 1;
+  { ident = g.nextIdent; typ = ty; 
+    var = L l; regclass = class_of_type ty;
+    accesses = 0; spillcost = 0.0;
+    adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = [];
+    alias = None;
+    color = Some l;
+    nstate = Colored;
+    nprev = DLinkNode.dummy; nnext = DLinkNode.dummy }
+
+let nodeOfVar g v =
+  try
+    Hashtbl.find g.varTable v
+  with Not_found ->
+    let n =
+      match v with V(r, ty) -> newNodeOfReg g r ty | L l -> newNodeOfLoc g l in
+    Hashtbl.add g.varTable v n;
+    n
+
+(* Determine if two nodes interfere *)
+
+let interfere g n1 n2 =
+  let i1 = n1.ident and i2 = n2.ident in
+  let p = if i1 < i2 then (i1, i2) else (i2, i1) in
+  IntPairSet.mem p g.adjSet
+
+(* Add an edge to the graph. *)
+
+let recordInterf n1 n2 =
+  match n2.color with
+  | None | Some (R _) ->
+      if n1.regclass = n2.regclass then begin
+        n1.adjlist <- n2 :: n1.adjlist;
+        n1.degree  <- 1 + n1.degree
+      end else begin
+        n1.extra_adj <- n2 :: n1.extra_adj
+      end
+  | Some (S _) ->
+      (*i printf "extra adj %s to %s\n" (name_of_node n1) (name_of_node n2); *)
+      n1.extra_adj <- n2 :: n1.extra_adj
+
+let addEdge g n1 n2 =
+  (*i printf "edge  %s -- %s;\n" (name_of_node n1) (name_of_node n2);*)
+  assert (n1 != n2);
+  if not (interfere g n1 n2) then begin
+    let i1 = n1.ident and i2 = n2.ident in
+    let p = if i1 < i2 then (i1, i2) else (i2, i1) in
+    g.adjSet <- IntPairSet.add p g.adjSet;
+    if n1.nstate <> Colored then recordInterf n1 n2;
+    if n2.nstate <> Colored then recordInterf n2 n1
+  end
+
+(* Add a move preference. *)
+
+let recordMove g n1 n2 =
+  let m =
+    { src = n1; dst = n2; mstate = WorklistMoves;
+      mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
+  n1.movelist <- m :: n1.movelist;
+  n2.movelist <- m :: n2.movelist;
+  DLinkMove.insert m g.worklistMoves
+
+let recordExtraPref n1 n2 =
+  let m =
+    { src = n1; dst = n2; mstate = FrozenMoves;
+      mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
+  n1.extra_pref <- m :: n1.extra_pref
+
+let addMovePref g n1 n2 =
+  assert (n1.regclass = n2.regclass);
+  match n1.color, n2.color with
+  | None, None ->
+      recordMove g n1 n2
+  | Some (R mr1), None ->
+      if List.mem mr1 g.allocatable_registers then recordMove g n1 n2
+  | None, Some (R mr2) ->
+      if List.mem mr2 g.allocatable_registers then recordMove g n1 n2
+  | Some (S _), None ->
+      recordExtraPref n2 n1
+  | None, Some (S _) ->
+      recordExtraPref n1 n2
+  | _, _ ->
+      ()
+
+(* Apply the given function to the relevant adjacent nodes of a node *)
+
+let iterAdjacent f n =
+  List.iter
+    (fun n ->
+      match n.nstate with
+      | SelectStack | CoalescedNodes -> ()
+      | _ -> f n)
+    n.adjlist
+
+(* Determine the moves affecting a node *)
+
+let moveIsActiveOrWorklist m =
+  match m.mstate with
+  | ActiveMoves | WorklistMoves -> true
+  | _ -> false
+
+let nodeMoves n =
+  List.filter moveIsActiveOrWorklist n.movelist
+
+(* Determine whether a node is involved in a move *)
+
+let moveRelated n =
+  List.exists moveIsActiveOrWorklist n.movelist
+
+(* Initial partition of nodes into spill / freeze / simplify *)
+
+let initialNodePartition g =
+  let part_node v n =
+    match n.nstate with
+    | Initial ->
+        let k = g.num_available_registers.(n.regclass) in
+        if n.degree >= k then
+          DLinkNode.insert n g.spillWorklist
+        else if moveRelated n then
+          DLinkNode.insert n g.freezeWorklist
+        else
+          DLinkNode.insert n g.simplifyWorklist
+    | Colored -> ()
+    | _ -> assert false in
+  Hashtbl.iter part_node g.varTable
+
+
+(* Check invariants *)
+
+let degreeInvariant g n =
+  let c = ref 0 in
+  iterAdjacent (fun n -> incr c) n;
+  if !c <> n.degree then
+    failwith("degree invariant violated by " ^ name_of_node n)
+
+let simplifyWorklistInvariant g n =
+  if n.degree < g.num_available_registers.(n.regclass)
+  && not (moveRelated n)
+  then ()
+  else failwith("simplify worklist invariant violated by " ^ name_of_node n)
+
+let freezeWorklistInvariant g n =
+  if n.degree < g.num_available_registers.(n.regclass)
+  && moveRelated n
+  then ()
+  else failwith("freeze worklist invariant violated by " ^ name_of_node n)
+
+let spillWorklistInvariant g n =
+  if n.degree >= g.num_available_registers.(n.regclass)
+  then ()
+  else failwith("spill worklist invariant violated by " ^ name_of_node n)
+
+let checkInvariants g =
+  DLinkNode.iter
+    (fun n -> degreeInvariant g n; simplifyWorklistInvariant g n)
+    g.simplifyWorklist;
+  DLinkNode.iter
+    (fun n -> degreeInvariant g n; freezeWorklistInvariant g n)
+    g.freezeWorklist;
+  DLinkNode.iter
+    (fun n -> degreeInvariant g n; spillWorklistInvariant g n)
+    g.spillWorklist
+
+(* Enable moves that have become low-degree related *)
+
+let enableMoves g n =
+  List.iter
+    (fun m ->
+      if m.mstate = ActiveMoves
+      then DLinkMove.move m g.activeMoves g.worklistMoves)
+    (nodeMoves n)
+
+(* Simulate the removal of a node from the graph *)
+
+let decrementDegree g n =
+  let k = g.num_available_registers.(n.regclass) in
+  let d = n.degree in
+  n.degree <- d - 1;
+  if d = k then begin
+    enableMoves g n;
+    iterAdjacent (enableMoves g) n;
+    if moveRelated n
+    then DLinkNode.move n g.spillWorklist g.freezeWorklist
+    else DLinkNode.move n g.spillWorklist g.simplifyWorklist
+  end
+
+(* Simulate the effect of combining nodes [n1] and [n3] on [n2],
+   where [n2] is a node adjacent to [n3]. *)
+
+let combineEdge g n1 n2 =
+  assert (n1 != n2);
+  if interfere g n1 n2 then begin
+    (* The two edges n2--n3 and n2--n1 become one, so degree of n2 decreases *)
+    decrementDegree g n2
+  end else begin
+    (* Add new edge *)
+    let i1 = n1.ident and i2 = n2.ident in
+    let p = if i1 < i2 then (i1, i2) else (i2, i1) in
+    g.adjSet <- IntPairSet.add p g.adjSet;
+    if n1.nstate <> Colored then begin
+      n1.adjlist <- n2 :: n1.adjlist;
+      n1.degree  <- 1 + n1.degree
+    end;
+    if n2.nstate <> Colored then begin
+      n2.adjlist <- n1 :: n2.adjlist;
+      (* n2's degree stays the same because the old edge n2--n3 disappears
+         and becomes the new edge n2--n1 *)
+    end
+  end
+
+(* Simplification of a low-degree node *)
+
+let simplify g =
+  let n = DLinkNode.pick g.simplifyWorklist in
+  (*i printf "Simplifying %s\n" (name_of_node n); *)
+  n.nstate <- SelectStack;
+  iterAdjacent (decrementDegree g) n;
+  n
+
+(* Briggs's conservative coalescing criterion.  In the terminology of
+   Hailperin, "Comparing Conservative Coalescing Criteria",
+   TOPLAS 27(3) 2005, this is the full Briggs criterion, slightly
+   more powerful than the one in George and Appel's paper. *)
+
+let canCoalesceBriggs g u v =
+  let seen = ref IntSet.empty in
+  let k = g.num_available_registers.(u.regclass) in
+  let c = ref 0 in
+  let consider other n =
+    if not (IntSet.mem n.ident !seen) then begin
+      seen := IntSet.add n.ident !seen;
+      (* if n interferes with both u and v, its degree will decrease by one
+         after coalescing *)
+      let degree_after_coalescing =
+        if interfere g n other then n.degree - 1 else n.degree in
+      if degree_after_coalescing >= k || n.nstate = Colored then begin
+        incr c;
+        if !c >= k then raise Exit
+      end
+    end in
+  try
+    iterAdjacent (consider v) u;
+    iterAdjacent (consider u) v;
+    (*i printf "  Briggs: OK for %s and %s\n" (name_of_node u) (name_of_node v); *)
+    true
+  with Exit ->
+    (*i printf "  Briggs: no\n"; *)
+    false
+
+(* George's conservative coalescing criterion: all high-degree neighbors
+   of [v] are neighbors of [u]. *)
+
+let canCoalesceGeorge g u v =
+  let k = g.num_available_registers.(u.regclass) in
+  let isOK t =
+    if t.nstate = Colored then
+      if u.nstate = Colored || interfere g t u then () else raise Exit
+    else
+      if t.degree < k || interfere g t u then () else raise Exit
+  in
+  try
+    iterAdjacent isOK v;
+    (*i printf "  George: OK for %s and %s\n" (name_of_node u) (name_of_node v); *)
+    true
+  with Exit ->
+    (*i printf "  George: no\n"; *)
+    false
+
+(* The combined coalescing criterion.  [u] can be precolored, but
+   [v] is not.  According to George and Appel's paper:
+-  If [u] is precolored, use George's criterion.
+-  If [u] is not precolored, use Briggs's criterion.
+
+   As noted by Hailperin, for non-precolored nodes, George's criterion 
+   is incomparable with Briggs's: there are cases where G says yes
+   and B says no.  Typically, [u] is a long-lived variable with many 
+   interferences, and [v] is a short-lived temporary copy of [u]
+   that has no more interferences than [u].  Coalescing [u] and [v]
+   is "weakly safe" in Hailperin's terminology: [u] is no harder to color,
+   [u]'s neighbors are no harder to color either, but if we end up
+   spilling [u], we'll spill [v] as well.  So, we restrict this heuristic
+   to [v] having a small number of uses.
+*)
+
+let thresholdGeorge = 3
+
+let canCoalesce g u v =
+  (*i printf "canCoalesce %s[%.2f] %s[%.2f]\n"
+     (name_of_node u) u.spillcost (name_of_node v) v.spillcost; *)
+  if u.nstate = Colored
+  then canCoalesceGeorge g u v
+  else canCoalesceBriggs g u v
+       || (u.spillcost < infinity && v.spillcost < infinity &&
+            ((v.accesses <= thresholdGeorge && canCoalesceGeorge g u v)
+             || (u.accesses <= thresholdGeorge && canCoalesceGeorge g v u)))
+
+(* Update worklists after a move was processed *)
+
+let addWorkList g u =
+  if (not (u.nstate = Colored))
+  && u.degree < g.num_available_registers.(u.regclass)
+  && (not (moveRelated u))
+  then DLinkNode.move u g.freezeWorklist g.simplifyWorklist
+
+(* Return the canonical representative of a possibly coalesced node *)
+
+let rec getAlias n =
+  match n.alias with None -> n | Some n' -> getAlias n'
+
+(* Combine two nodes *)
+
+let combine g u v =
+  (*i printf "Combining %s and %s\n" (name_of_node u) (name_of_node v); *)
+  (*i if u.spillcost = infinity then
+    printf "Warning: combining unspillable %s\n" (name_of_node u);
+  if v.spillcost = infinity then
+    printf "Warning: combining unspillable %s\n" (name_of_node v);*)
+  if v.nstate = FreezeWorklist
+  then DLinkNode.move v g.freezeWorklist g.coalescedNodes
+  else DLinkNode.move v g.spillWorklist g.coalescedNodes;
+  v.alias <- Some u;
+  (* Precolored nodes often have big movelists, and if one of [u] and [v]
+     is precolored, it is []u.  So, append [v.movelist] to [u.movelist]
+     instead of the other way around. *)
+  u.movelist <- List.rev_append v.movelist u.movelist;
+  u.spillcost <- u.spillcost +. v.spillcost;
+  iterAdjacent (combineEdge g u) v;  (*r original code using [decrementDegree] is buggy *)
+  u.extra_adj <- u.extra_adj @ v.extra_adj;
+  u.extra_pref <- u.extra_pref @ v.extra_pref;
+  enableMoves g v;                   (*r added as per Appel's book erratum *)
+  if u.degree >= g.num_available_registers.(u.regclass)
+  && u.nstate = FreezeWorklist
+  then DLinkNode.move u g.freezeWorklist g.spillWorklist
+
+(* Attempt coalescing *)
+
+let coalesce g =
+  let m = DLinkMove.pick g.worklistMoves in
+  let x = getAlias m.src and y = getAlias m.dst in
+  let (u, v) = if y.nstate = Colored then (y, x) else (x, y) in
+  (*i printf "Attempt coalescing %s and %s\n" (name_of_node u) (name_of_node v);*)
+  if u == v then begin
+    DLinkMove.insert m g.coalescedMoves;
+    addWorkList g u
+  end else if v.nstate = Colored || interfere g u v then begin
+    DLinkMove.insert m g.constrainedMoves;
+    addWorkList g u;
+    addWorkList g v
+  end else if canCoalesce g u v then begin
+    DLinkMove.insert m g.coalescedMoves;
+    combine g u v;
+    addWorkList g u
+  end else begin
+    DLinkMove.insert m g.activeMoves    
+  end
+
+(* Freeze moves associated with node [u] *)
+
+let freezeMoves g u =
+  let u' = getAlias u in
+  let freeze m =
+    let y = getAlias m.src in
+    let v = if y == u' then getAlias m.dst else y in
+    DLinkMove.move m g.activeMoves g.frozenMoves;
+    if not (moveRelated v)
+    && v.degree < g.num_available_registers.(v.regclass)
+    && v.nstate <> Colored
+    then DLinkNode.move v g.freezeWorklist g.simplifyWorklist in
+  List.iter freeze (nodeMoves u)
+
+(* Pick a move and freeze it *)
+
+let freeze g =
+  let u = DLinkNode.pick g.freezeWorklist in
+  (*i printf "Freezing %s\n" (name_of_node u); *)
+  DLinkNode.insert u g.simplifyWorklist;
+  freezeMoves g u
+
+(* This is the original spill cost function from Chaitin 1982 *)
+
+(*
+let spillCost n =
+(*i
+  printf "spillCost %s: cost = %.2f  degree = %d  rank = %.2f\n"
+                (name_of_node n) n.spillcost n.degree 
+                (n.spillcost /. float n.degree);
+*)
+  n.spillcost /. float n.degree
+*)
+
+(* This is spill cost function h_0 from Bernstein et al 1989.  It performs
+   slightly better than Chaitin's and than functions h_1 and h_2. *)
+
+let spillCost n =
+  let deg = float n.degree in n.spillcost /. (deg *. deg)
+
+(* Spill a node *)
+
+let selectSpill g =
+  (*i printf "Attempt spilling\n"; *)
+  (* Find a spillable node of minimal cost *)
+  let (n, cost) =
+    DLinkNode.fold
+      (fun n (best_node, best_cost as best) ->
+        let cost = spillCost n in
+        if cost <= best_cost then (n, cost) else best)
+      g.spillWorklist (DLinkNode.dummy, infinity) in
+  assert (n != DLinkNode.dummy);
+  if cost = infinity then begin
+    printf "Warning: spilling unspillable %s\n" (name_of_node n);
+    printf "  spill queue is:";
+    DLinkNode.iter (fun n -> printf " %s" (name_of_node n)) g.spillWorklist;
+    printf "\n"
+  end;
+  DLinkNode.remove n g.spillWorklist;
+  (*i printf "Spilling %s\n" (name_of_node n); *)
+  freezeMoves g n;
+  n.nstate <- SelectStack;
+  iterAdjacent (decrementDegree g) n;
+  n
+
+(* Produce the order of nodes that we'll use for coloring *)
+
+let rec nodeOrder g stack =
+  (*i checkInvariants g; *)
+  if DLinkNode.notempty g.simplifyWorklist then
+    (let n = simplify g in nodeOrder g (n :: stack))
+  else if DLinkMove.notempty g.worklistMoves then
+    (coalesce g; nodeOrder g stack)
+  else if DLinkNode.notempty g.freezeWorklist then
+    (freeze g; nodeOrder g stack)
+  else if DLinkNode.notempty g.spillWorklist then
+    (let n = selectSpill g in nodeOrder g (n :: stack))
+  else
+    stack
+
+(* Assign a color (i.e. a hardware register or a stack location)
+   to a node.  The color is chosen among the colors that are not
+   assigned to nodes with which this node interferes.  The choice
+   is guided by the following heuristics: consider first caller-save
+   hardware register of the correct type; second, callee-save registers;
+   third, a stack location.  Callee-save registers and stack locations
+   are ``expensive'' resources, so we try to minimize their number
+   by picking the smallest available callee-save register or stack location.
+   In contrast, caller-save registers are ``free'', so we pick an
+   available one pseudo-randomly. *)
+
+module Regset =
+  Set.Make(struct type t = mreg let compare = compare end)
+
+let find_reg g conflicts regclass =
+  let rec find avail curr last =
+    if curr >= last then None else begin
+      let r = avail.(curr) in
+      if Regset.mem r conflicts
+      then find avail (curr + 1) last
+      else Some (R r)
+    end in
+  let caller_save = g.caller_save_registers.(regclass)
+  and callee_save = g.callee_save_registers.(regclass)
+  and start = g.start_points.(regclass) in
+  match find caller_save start (Array.length caller_save) with
+  | Some _ as res ->
+      g.start_points.(regclass) <-
+        (if start + 1 < Array.length caller_save then start + 1 else 0);
+      res
+  | None ->
+      match find caller_save 0 start with
+      | Some _ as res ->
+          g.start_points.(regclass) <-
+            (if start + 1 < Array.length caller_save then start + 1 else 0);
+          res
+      | None ->
+          find callee_save 0 (Array.length callee_save)
+
+(* Aggressive coalescing of stack slots.  When assigning a slot,
+   try first the slots assigned to the pseudoregs for which we
+   have a preference, provided no conflict occurs. *)
+
+let rec reuse_slot conflicts n mvlist =
+  match mvlist with
+  | [] -> None
+  | mv :: rem ->
+      let attempt_reuse n' =
+        match n'.color with
+        | Some(S(Local, _, _) as l)
+          when List.for_all (Loc.diff_dec l) conflicts -> Some l
+        | _ -> reuse_slot conflicts n rem in
+      let src = getAlias mv.src and dst = getAlias mv.dst in
+      if n == src then attempt_reuse dst
+      else if n == dst then attempt_reuse src
+      else reuse_slot conflicts n rem (* should not happen? *)
+
+(* If no reuse possible, assign lowest nonconflicting stack slot. *)
+
+let compare_slots s1 s2 =
+  match s1, s2 with
+  | S(_, ofs1, _), S(_, ofs2, _) -> Z.compare ofs1 ofs2
+  | _, _ -> assert false
+
+let find_slot conflicts typ =
+  let rec find curr = function
+  | [] ->
+      S(Local, curr, typ)
+  | S(Local, ofs, typ') :: l ->
+      if Z.le (Z.add curr (typesize typ)) ofs then
+        S(Local, curr, typ)
+      else begin
+        let ofs' = Z.add ofs (typesize typ') in
+        find (if Z.le ofs' curr then curr else ofs') l
+      end
+  | _ :: l ->
+      find curr l
+  in find Z.zero (List.stable_sort compare_slots conflicts)
+
+(* Record locations assigned to interfering nodes *)
+
+let record_reg_conflict cnf n =
+  match (getAlias n).color with
+  | Some (R r) -> Regset.add r cnf
+  | _ -> cnf
+
+let record_slot_conflict cnf n =
+  match (getAlias n).color with
+  | Some (S _ as l) -> l :: cnf
+  | _ -> cnf
+
+(* Assign a location, the best we can *)
+
+let assign_color g n =
+  let reg_conflicts =
+    List.fold_left record_reg_conflict Regset.empty n.adjlist in
+  (* First, try to assign a register *)
+  match find_reg g reg_conflicts n.regclass with
+  | Some loc ->
+      n.color <- Some loc
+  | None ->
+      (* Add extra conflicts for nonallocatable and preallocated stack slots *)
+      let slot_conflicts =
+        List.fold_left record_slot_conflict
+          (List.fold_left record_slot_conflict [] n.adjlist)
+            n.extra_adj in
+      (* Second, try to coalesce stack slots *)
+      match reuse_slot slot_conflicts n (n.extra_pref @ n.movelist) with
+      | Some loc ->
+          n.color <- Some loc
+      | None ->
+          (* Last, pick a Local stack slot *)
+          n.color <- Some (find_slot slot_conflicts n.typ)
+
+(* Extract the location of a variable *)
+
+let location_of_var g v =
+  match v with
+  | L l -> l
+  | V(r, ty) ->
+      try
+        let n = Hashtbl.find g.varTable v in
+        let n' = getAlias n in
+        match n'.color with
+        | None -> assert false
+        | Some l -> l
+      with Not_found ->
+        match ty with
+        | Tint -> R dummy_int_reg
+        | Tfloat -> R dummy_float_reg
+        | Tlong -> assert false
+
+(* The exported interface *)
+
+let add_interf g v1 v2 =
+  addEdge g (nodeOfVar g v1) (nodeOfVar g v2)
+
+let add_pref g v1 v2 =
+  addMovePref g (nodeOfVar g v1) (nodeOfVar g v2)
+
+let coloring g =
+  initialNodePartition g;
+  List.iter (assign_color g) (nodeOrder g []);
+  location_of_var g  (* total function var -> location *)
diff --git a/backend/IRC.mli b/backend/IRC.mli
new file mode 100644
index 0000000..e81b6dc
--- /dev/null
+++ b/backend/IRC.mli
@@ -0,0 +1,44 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Iterated Register Coalescing: George and Appel's graph coloring algorithm *)
+
+open AST
+open Registers
+open Machregs
+open Locations
+open XTL
+
+(* The abstract type of interference and preference graphs. *)
+type graph
+
+(* Information associated to every variable. *)
+type var_stats = {
+  mutable cost: int;             (* estimated cost of a spill *)
+  mutable usedefs: int           (* number of uses and defs *)
+}
+
+(* Create an empty graph.  The given function associates statistics to
+   every variable. *)
+val init: (reg -> var_stats) -> graph
+
+(* Add an interference between two variables. *)
+val add_interf: graph -> var -> var -> unit
+
+(* Add a preference between two variables. *)
+val add_pref: graph -> var -> var -> unit
+
+(* Color the graph.  Return an assignment of locations to variables. *)
+val coloring: graph -> (var -> loc)
+
+(* Machine registers that are reserved and not available for allocation. *)
+val reserved_registers: mreg list ref
diff --git a/backend/Inlining.v b/backend/Inlining.v
index a9b1e7e..7b19f80 100644
--- a/backend/Inlining.v
+++ b/backend/Inlining.v
@@ -180,11 +180,17 @@ Next Obligation.
   intros; constructor; simpl; xomega.
 Qed.
 
-Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit :=
-  match l with
-  | nil => ret tt
-  | (x,y) :: l' => do z <- f x y; mlist_iter2 f l'
-  end.
+Program Definition ptree_mfold {A: Type} (f: positive -> A -> mon unit) (t: PTree.t A): mon unit :=
+  fun s =>
+    R tt 
+      (PTree.fold (fun s1 k v => match f k v s1 return _ with R _ s2 _ => s2 end) t s)
+      _.
+Next Obligation.
+  apply PTree_Properties.fold_rec.
+  auto.
+  apply sincr_refl.
+  intros. destruct (f k v a). eapply sincr_trans; eauto. 
+Qed.
 
 (** ** Inlining contexts *)
 
@@ -422,7 +428,7 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
 
 Definition expand_cfg_rec (ctx: context) (f: function): mon unit :=
   do x <- request_stack (ctx.(dstk) + ctx.(mstk));
-  mlist_iter2 (expand_instr ctx) (PTree.elements f.(fn_code)).
+  ptree_mfold (expand_instr ctx) f.(fn_code).
 
 End EXPAND_CFG.
 
diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v
index 82ef9cf..e8dba67 100644
--- a/backend/Inliningspec.v
+++ b/backend/Inliningspec.v
@@ -180,6 +180,35 @@ Ltac monadInv H :=
       ((progress simpl in H) || unfold F in H); monadInv1 H
   end.
 
+Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit :=
+  match l with
+  | nil => ret tt
+  | (x,y) :: l' => do z <- f x y; mlist_iter2 f l'
+  end.
+
+Remark mlist_iter2_fold:
+  forall (A B: Type) (f: A -> B -> mon unit) l s,
+  exists i,
+  mlist_iter2 f l s =
+  R tt (fold_left (fun a p => match f (fst p) (snd p) a with R _ s2 _ => s2 end) l s) i.
+Proof.
+  induction l; simpl; intros.
+  exists (sincr_refl s); auto.
+  destruct a as [x y]. unfold bind. simpl. destruct (f x y s) as [xx s1 i1].
+  destruct (IHl s1) as [i2 EQ]. rewrite EQ. econstructor; eauto. 
+Qed.
+
+Lemma ptree_mfold_spec:
+  forall (A: Type) (f: positive -> A -> mon unit) t s x s' i,
+  ptree_mfold f t s = R x s' i ->
+  exists i', mlist_iter2 f (PTree.elements t) s = R tt s' i'.
+Proof.
+  intros. 
+  destruct (mlist_iter2_fold _ _ f (PTree.elements t) s) as [i' EQ].
+  unfold ptree_mfold in H. inv H. rewrite PTree.fold_spec.
+  econstructor. eexact EQ.
+Qed.
+
 (** ** Relational specification of the translation of moves *)
 
 Inductive tr_moves (c: code) : node -> list reg -> list reg -> node -> Prop :=
@@ -416,6 +445,7 @@ Lemma expand_cfg_rec_unchanged:
 Proof.
   intros. unfold expand_cfg_rec in H. monadInv H. inversion EQ.
   transitivity ((st_code s0)!pc).
+  exploit ptree_mfold_spec; eauto. intros [INCR' ITER].  
   eapply iter_expand_instr_unchanged; eauto. 
     subst s0; auto. 
     subst s0; simpl. xomega.
@@ -596,7 +626,8 @@ Proof.
   intros. unfold expand_cfg_rec in H. monadInv H. inversion EQ. 
   constructor. 
   intros. rewrite H1. eapply max_def_function_params; eauto. 
-  intros. eapply iter_expand_instr_spec; eauto. 
+  intros. exploit ptree_mfold_spec; eauto. intros [INCR' ITER].
+  eapply iter_expand_instr_spec; eauto. 
     apply PTree.elements_keys_norepet. 
     intros. rewrite H1. eapply max_def_function_instr; eauto. 
     eapply PTree.elements_complete; eauto.
diff --git a/backend/LTL.v b/backend/LTL.v
index 422b0e0..de10845 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -30,27 +30,33 @@ Require Import Conventions.
 
 (** * Abstract syntax *)
 
-(** LTL is close to RTL, but uses locations instead of pseudo-registers. *)
+(** LTL is close to RTL, but uses machine registers and stack slots
+  instead of pseudo-registers.  Also, the nodes of the control-flow
+  graph are basic blocks instead of single instructions. *)
 
 Definition node := positive.
 
 Inductive instruction: Type :=
-  | Lnop: node -> instruction
-  | Lop: operation -> list loc -> loc -> node -> instruction
-  | Lload: memory_chunk -> addressing -> list loc -> loc -> node -> instruction
-  | Lstore: memory_chunk -> addressing -> list loc -> loc -> node -> instruction
-  | Lcall: signature -> loc + ident -> list loc -> loc -> node -> instruction
-  | Ltailcall: signature -> loc + ident -> list loc -> instruction
-  | Lbuiltin: external_function -> list loc -> loc -> node -> instruction
-  | Lcond: condition -> list loc -> node -> node -> instruction
-  | Ljumptable: loc -> list node -> instruction
-  | Lreturn: option loc -> instruction.
-
-Definition code: Type := PTree.t instruction.
+  | Lop (op: operation) (args: list mreg) (res: mreg)
+  | Lload (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
+  | Lgetstack (sl: slot) (ofs: Z) (ty: typ) (dst: mreg)
+  | Lsetstack (src: mreg) (sl: slot) (ofs: Z) (ty: typ)
+  | Lstore (chunk: memory_chunk) (addr: addressing) (args: list mreg) (src: mreg)
+  | Lcall (sg: signature) (ros: mreg + ident)
+  | Ltailcall (sg: signature) (ros: mreg + ident)
+  | Lbuiltin (ef: external_function) (args: list mreg) (res: list mreg)
+  | Lannot (ef: external_function) (args: list loc)
+  | Lbranch (s: node)
+  | Lcond (cond: condition) (args: list mreg) (s1 s2: node)
+  | Ljumptable (arg: mreg) (tbl: list node)
+  | Lreturn.
+
+Definition bblock := list instruction.
+
+Definition code: Type := PTree.t bblock.
 
 Record function: Type := mkfunction {
   fn_sig: signature;
-  fn_params: list loc;
   fn_stacksize: Z;
   fn_code: code;
   fn_entrypoint: node
@@ -71,49 +77,56 @@ Definition funsig (fd: fundef) :=
 Definition genv := Genv.t fundef unit.
 Definition locset := Locmap.t.
 
-Definition locmap_optget (ol: option loc) (dfl: val) (ls: locset) : val :=
-  match ol with
-  | None => dfl
-  | Some l => ls l
-  end.
+(** Calling conventions are reflected at the level of location sets
+  (environments mapping locations to values) by the following two 
+  functions.  
 
-Fixpoint init_locs (vl: list val) (rl: list loc) {struct rl} : locset :=
-  match rl, vl with
-  | r1 :: rs, v1 :: vs => Locmap.set r1 v1 (init_locs vs rs)
-  | _, _ => Locmap.init Vundef
-  end.
+  [call_regs caller] returns the location set at function entry,
+  as a function of the location set [caller] of the calling function.
+- Machine registers have the same values as in the caller.
+- Incoming stack slots (used for parameter passing) have the same
+  values as the corresponding outgoing stack slots (used for argument
+  passing) in the caller.
+- Local and outgoing stack slots are initialized to undefined values.
+*) 
 
-(** [postcall_locs ls] returns the location set [ls] after a function
-  call.  Caller-save registers and temporary registers
-  are set to [undef], reflecting the fact that the called
-  function can modify them freely. *)
+Definition call_regs (caller: locset) : locset :=
+  fun (l: loc) =>
+    match l with
+    | R r => caller (R r)
+    | S Local ofs ty => Vundef
+    | S Incoming ofs ty => caller (S Outgoing ofs ty)
+    | S Outgoing ofs ty => Vundef
+    end.
 
-Definition postcall_locs (ls: locset) : locset :=
+(** [return_regs caller callee] returns the location set after
+  a call instruction, as a function of the location set [caller]
+  of the caller before the call instruction and of the location
+  set [callee] of the callee at the return instruction.
+- Callee-save machine registers have the same values as in the caller
+  before the call.
+- Caller-save machine registers have the same values as in the callee.
+- Stack slots have the same values as in the caller.
+*)
+
+Definition return_regs (caller callee: locset) : locset :=
   fun (l: loc) =>
     match l with
     | R r =>
-        if In_dec Loc.eq (R r) temporaries then
-          Vundef
-        else if In_dec Loc.eq (R r) destroyed_at_call then
-          Vundef
-        else
-          ls (R r)
-    | S s => ls (S s)
+        if In_dec mreg_eq r destroyed_at_call
+        then callee (R r)
+        else caller (R r)
+    | S sl ofs ty => caller (S sl ofs ty)
     end.
 
-(** Temporaries destroyed across instructions *)
-
-Definition undef_temps (ls: locset) := Locmap.undef temporaries ls.
-
 (** LTL execution states. *)
 
 Inductive stackframe : Type :=
   | Stackframe:
-      forall (res: loc)         (**r where to store the result *)
-             (f: function)      (**r calling function *)
+      forall (f: function)      (**r calling function *)
              (sp: val)          (**r stack pointer in calling function *)
              (ls: locset)       (**r location state in calling function *)
-             (pc: node),        (**r program point in calling function *)
+             (bb: bblock),      (**r continuation in calling function *)
       stackframe.
 
 Inductive state : Type :=
@@ -125,15 +138,23 @@ Inductive state : Type :=
              (ls: locset)             (**r location state *)
              (m: mem),                (**r memory state *)
       state
+  | Block:
+      forall (stack: list stackframe) (**r call stack *)
+             (f: function)            (**r function currently executing *)
+             (sp: val)                (**r stack pointer *)
+             (bb: bblock)             (**r current basic block *)
+             (ls: locset)             (**r location state *)
+             (m: mem),                (**r memory state *)
+      state
   | Callstate:
       forall (stack: list stackframe) (**r call stack *)
              (f: fundef)              (**r function to call *)
-             (args: list val)         (**r arguments to the call *)
+             (ls: locset)             (**r location state of caller *)
              (m: mem),                (**r memory state *)
       state
   | Returnstate:
       forall (stack: list stackframe) (**r call stack *)
-             (v: val)                 (**r return value for the call *)
+             (ls: locset)             (**r location state of callee *)
              (m: mem),                (**r memory state *)
       state.
 
@@ -142,9 +163,24 @@ Section RELSEM.
 
 Variable ge: genv.
 
-Definition find_function (los: loc + ident) (rs: locset) : option fundef :=
-  match los with
-  | inl l => Genv.find_funct ge (rs l)
+Definition reglist (rs: locset) (rl: list mreg) : list val :=
+  List.map (fun r => rs (R r)) rl.
+
+Fixpoint undef_regs (rl: list mreg) (rs: locset) : locset :=
+  match rl with
+  | nil => rs
+  | r1 :: rl => Locmap.set (R r1) Vundef (undef_regs rl rs)
+  end.
+
+Definition destroyed_by_getstack (s: slot) : list mreg :=
+  match s with
+  | Incoming => temp_for_parent_frame :: nil
+  | _        => nil
+  end.
+
+Definition find_function (ros: mreg + ident) (rs: locset) : option fundef :=
+  match ros with
+  | inl r => Genv.find_funct ge (rs (R r))
   | inr symb =>
       match Genv.find_symbol ge symb with
       | None => None
@@ -152,95 +188,109 @@ Definition find_function (los: loc + ident) (rs: locset) : option fundef :=
       end
   end.
 
-(** The LTL transition relation is very similar to that of RTL,
-  with locations being used in place of pseudo-registers.
-  The main difference is for the [call] instruction: caller-save
-  registers are set to [Vundef] across the call, using the [postcall_locs]
-  function defined above.  This forces the LTL producer to avoid
-  storing values live across the call in a caller-save register. *)
+(** [parent_locset cs] returns the mapping of values for locations
+  of the caller function. *)
+
+Definition parent_locset (stack: list stackframe) : locset :=
+  match stack with
+  | nil => Locmap.init Vundef
+  | Stackframe f sp ls bb :: stack' => ls
+  end.
+
+(* REVISE
+(** [getslot sl ofs ty rs] looks up the value of location [S sl ofs ty] in [rs],
+  and normalizes it to the type [ty] of this location. *)
+
+Definition getslot (sl: slot) (ofs: Z) (ty: typ) (rs: locset) : val :=
+  Val.load_result
+    (match ty with Tint => Mint32 | Tfloat => Mfloat64 | Tlong => Mint64 end)
+    (rs (S sl ofs ty)).
+*)
 
 Inductive step: state -> trace -> state -> Prop :=
-  | exec_Lnop:
-      forall s f sp pc rs m pc',
-      (fn_code f)!pc = Some(Lnop pc') ->
+  | exec_start_block: forall s f sp pc rs m bb,
+      (fn_code f)!pc = Some bb ->
       step (State s f sp pc rs m)
-        E0 (State s f sp pc' rs m)
-  | exec_Lop:
-      forall s f sp pc rs m op args res pc' v,
-      (fn_code f)!pc = Some(Lop op args res pc') ->
-      eval_operation ge sp op (map rs args) m = Some v ->
-      step (State s f sp pc rs m)
-        E0 (State s f sp pc' (Locmap.set res v (undef_temps rs)) m)
-  | exec_Lload:
-      forall s f sp pc rs m chunk addr args dst pc' a v,
-      (fn_code f)!pc = Some(Lload chunk addr args dst pc') ->
-      eval_addressing ge sp addr (map rs args) = Some a ->
+        E0 (Block s f sp bb rs m)
+  | exec_Lop: forall s f sp op args res bb rs m v rs',
+      eval_operation ge sp op (reglist rs args) m = Some v ->
+      rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) ->
+      step (Block s f sp (Lop op args res :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload: forall s f sp chunk addr args dst bb rs m a v rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
-      step (State s f sp pc rs m)
-        E0 (State s f sp pc' (Locmap.set dst v (undef_temps rs)) m)
-  | exec_Lstore:
-      forall s f sp pc rs m chunk addr args src pc' a m',
-      (fn_code f)!pc = Some(Lstore chunk addr args src pc') ->
-      eval_addressing ge sp addr (map rs args) = Some a ->
-      Mem.storev chunk m a (rs src) = Some m' ->
-      step (State s f sp pc rs m)
-        E0 (State s f sp pc' (undef_temps rs) m')
-  | exec_Lcall:
-      forall s f sp pc rs m sig ros args res pc' f',
-      (fn_code f)!pc = Some(Lcall sig ros args res pc') ->
-      find_function ros rs = Some f' ->
-      funsig f' = sig ->
-      step (State s f sp pc rs m)
-        E0 (Callstate (Stackframe res f sp (postcall_locs rs) pc' :: s)
-                      f' (List.map rs args) m)
-  | exec_Ltailcall:
-      forall s f stk pc rs m sig ros args f' m',
-      (fn_code f)!pc = Some(Ltailcall sig ros args) ->
-      find_function ros rs = Some f' ->
-      funsig f' = sig ->
-      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
-      step (State s f (Vptr stk Int.zero) pc rs m)
-        E0 (Callstate s f' (List.map rs args) m')
-  | exec_Lbuiltin:
-      forall s f sp pc rs m ef args res pc' t v m',
-      (fn_code f)!pc = Some(Lbuiltin ef args res pc') ->
-      external_call ef ge (map rs args) m t v m' ->
-      step (State s f sp pc rs m)
-         t (State s f sp pc' (Locmap.set res v rs) m')
-  | exec_Lcond:
-      forall s f sp pc rs m cond args ifso ifnot b pc',
-      (fn_code f)!pc = Some(Lcond cond args ifso ifnot) ->
-      eval_condition cond (map rs args) m = Some b ->
-      pc' = (if b then ifso else ifnot) ->
-      step (State s f sp pc rs m)
-        E0 (State s f sp pc' (undef_temps rs) m)
-  | exec_Ljumptable:
-      forall s f sp pc rs m arg tbl n pc',
-      (fn_code f)!pc = Some(Ljumptable arg tbl) ->
-      rs arg = Vint n ->
-      list_nth_z tbl (Int.unsigned n) = Some pc' ->
-      step (State s f sp pc rs m)
-        E0 (State s f sp pc' (undef_temps rs) m)
-  | exec_Lreturn:
-      forall s f stk pc rs m or m',
-      (fn_code f)!pc = Some(Lreturn or) ->
-      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
-      step (State s f (Vptr stk Int.zero) pc rs m)
-        E0 (Returnstate s (locmap_optget or Vundef rs) m')
-  | exec_function_internal:
-      forall s f args m m' stk,
-      Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
-      step (Callstate s (Internal f) args m)
-        E0 (State s f (Vptr stk Int.zero) f.(fn_entrypoint) (init_locs args f.(fn_params)) m')
-  | exec_function_external:
-      forall s ef t args res m m',
-      external_call ef ge args m t res m' ->
-      step (Callstate s (External ef) args m)
-         t (Returnstate s res m')
-  | exec_return:
-      forall res f sp rs pc s vres m,
-      step (Returnstate (Stackframe res f sp rs pc :: s) vres m)
-        E0 (State s f sp pc (Locmap.set res vres rs) m).
+      rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lgetstack: forall s f sp sl ofs ty dst bb rs m rs',
+      rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) ->
+      step (Block s f sp (Lgetstack sl ofs ty dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lsetstack: forall s f sp src sl ofs ty bb rs m rs',
+      rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_op Omove) rs) ->
+      step (Block s f sp (Lsetstack src sl ofs ty :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lstore: forall s f sp chunk addr args src bb rs m a rs' m',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.storev chunk m a (rs (R src)) = Some m' ->
+      rs' = undef_regs (destroyed_by_store chunk addr) rs ->
+      step (Block s f sp (Lstore chunk addr args src :: bb) rs m)
+        E0 (Block s f sp bb rs' m')
+  | exec_Lcall: forall s f sp sig ros bb rs m fd,
+      find_function ros rs = Some fd ->
+      funsig fd = sig ->
+      step (Block s f sp (Lcall sig ros :: bb) rs m)
+        E0 (Callstate (Stackframe f sp rs bb :: s) fd rs m)
+  | exec_Ltailcall: forall s f sp sig ros bb rs m fd rs' m',
+      rs' = return_regs (parent_locset s) rs ->
+      find_function ros rs' = Some fd ->
+      funsig fd = sig ->
+      Mem.free m sp 0 f.(fn_stacksize) = Some m' ->
+      step (Block s f (Vptr sp Int.zero) (Ltailcall sig ros :: bb) rs m)
+        E0 (Callstate s fd rs' m')
+  | exec_Lbuiltin: forall s f sp ef args res bb rs m t vl rs' m',
+      external_call' ef ge (reglist rs args) m t vl m' ->
+      rs' = Locmap.setlist (map R res) vl (undef_regs (destroyed_by_builtin ef) rs) ->
+      step (Block s f sp (Lbuiltin ef args res :: bb) rs m)
+         t (Block s f sp bb rs' m')
+  | exec_Lannot: forall s f sp ef args bb rs m t vl m',
+      external_call' ef ge (map rs args) m t vl m' ->
+      step (Block s f sp (Lannot ef args :: bb) rs m)
+         t (Block s f sp bb rs m')
+  | exec_Lbranch: forall s f sp pc bb rs m,
+      step (Block s f sp (Lbranch pc :: bb) rs m)
+        E0 (State s f sp pc rs m)
+  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m,
+      eval_condition cond (reglist rs args) m = Some b ->
+      pc = (if b then pc1 else pc2) ->
+      rs' = undef_regs (destroyed_by_cond cond) rs ->
+      step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m)
+        E0 (State s f sp pc rs' m)
+  | exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs',
+      rs (R arg) = Vint n ->
+      list_nth_z tbl (Int.unsigned n) = Some pc ->
+      rs' = undef_regs (destroyed_by_jumptable) rs ->
+      step (Block s f sp (Ljumptable arg tbl :: bb) rs m)
+        E0 (State s f sp pc rs' m)
+  | exec_Lreturn: forall s f sp bb rs m m',
+      Mem.free m sp 0 f.(fn_stacksize) = Some m' ->
+      step (Block s f (Vptr sp Int.zero) (Lreturn :: bb) rs m)
+        E0 (Returnstate s (return_regs (parent_locset s) rs) m')
+  | exec_function_internal: forall s f rs m m' sp rs',
+      Mem.alloc m 0 f.(fn_stacksize) = (m', sp) ->
+      rs' = undef_regs destroyed_at_function_entry (call_regs rs) ->
+      step (Callstate s (Internal f) rs m)
+        E0 (State s f (Vptr sp Int.zero) f.(fn_entrypoint) rs' m')
+  | exec_function_external: forall s ef t args res rs m rs' m',
+      args = map rs (loc_arguments (ef_sig ef)) ->
+      external_call' ef ge args m t res m' ->
+      rs' = Locmap.setlist (map R (loc_result (ef_sig ef))) res rs ->
+      step (Callstate s (External ef) rs m)
+         t (Returnstate s rs' m')
+  | exec_return: forall f sp rs1 bb s rs m,
+      step (Returnstate (Stackframe f sp rs1 bb :: s) rs m)
+        E0 (Block s f sp bb rs m).
 
 End RELSEM.
 
@@ -256,33 +306,32 @@ Inductive initial_state (p: program): state -> Prop :=
       Genv.find_symbol ge p.(prog_main) = Some b ->
       Genv.find_funct_ptr ge b = Some f ->
       funsig f = mksignature nil (Some Tint) ->
-      initial_state p (Callstate nil f nil m0).
+      initial_state p (Callstate nil f (Locmap.init Vundef) m0).
 
 Inductive final_state: state -> int -> Prop :=
-  | final_state_intro: forall n m,
-      final_state (Returnstate nil (Vint n) m) n.
+  | final_state_intro: forall rs m r retcode,
+      loc_result (mksignature nil (Some Tint)) = r :: nil ->
+      rs (R r) = Vint retcode ->
+      final_state (Returnstate nil rs m) retcode.
 
 Definition semantics (p: program) :=
   Semantics step (initial_state p) final_state (Genv.globalenv p).
 
 (** * Operations over LTL *)
 
-(** Computation of the possible successors of an instruction.
+(** Computation of the possible successors of a block.
   This is used in particular for dataflow analyses. *)
 
-Definition successors_instr (i: instruction) : list node :=
-  match i with
-  | Lnop s => s :: nil
-  | Lop op args res s => s :: nil
-  | Lload chunk addr args dst s => s :: nil
-  | Lstore chunk addr args src s => s :: nil
-  | Lcall sig ros args res s => s :: nil
-  | Ltailcall sig ros args => nil
-  | Lbuiltin ef args res s => s :: nil
-  | Lcond cond args ifso ifnot => ifso :: ifnot :: nil
-  | Ljumptable arg tbl => tbl
-  | Lreturn optarg => nil
+Fixpoint successors_block (b: bblock) : list node :=
+  match b with
+  | nil => nil                          (**r should never happen *)
+  | Ltailcall _ _ :: _ => nil
+  | Lbranch s :: _ => s :: nil
+  | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil
+  | Ljumptable _ tbl :: _ => tbl
+  | Lreturn :: _ => nil
+  | instr :: b' => successors_block b'
   end.
 
 Definition successors (f: function): PTree.t (list node) :=
-  PTree.map1 successors_instr f.(fn_code).
+  PTree.map1 successors_block f.(fn_code).
diff --git a/backend/LTLin.v b/backend/LTLin.v
deleted file mode 100644
index e0d5ca2..0000000
--- a/backend/LTLin.v
+++ /dev/null
@@ -1,268 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** The LTLin intermediate language: abstract syntax and semantcs *)
-
-(** The LTLin language is a variant of LTL where control-flow is not
-    expressed as a graph of basic blocks, but as a linear list of
-    instructions with explicit labels and ``goto'' instructions. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Values.
-Require Import Memory.
-Require Import Events.
-Require Import Globalenvs.
-Require Import Smallstep.
-Require Import Op.
-Require Import Locations.
-Require Import LTL.
-
-(** * Abstract syntax *)
-
-Definition label := positive.
-
-(** LTLin instructions are similar to those of LTL.
-  Except the last three, these instructions continue in sequence
-  with the next instruction in the linear list of instructions.
-  Unconditional branches [Lgoto] and conditional branches [Lcond]
-  transfer control to the given code label.  Labels are explicitly
-  inserted in the instruction list as pseudo-instructions [Llabel]. *)
-
-Inductive instruction: Type :=
-  | Lop: operation -> list loc -> loc -> instruction
-  | Lload: memory_chunk -> addressing -> list loc -> loc -> instruction
-  | Lstore: memory_chunk -> addressing -> list loc -> loc -> instruction
-  | Lcall: signature -> loc + ident -> list loc -> loc -> instruction
-  | Ltailcall: signature -> loc + ident -> list loc -> instruction
-  | Lbuiltin: external_function -> list loc -> loc -> instruction
-  | Llabel: label -> instruction
-  | Lgoto: label -> instruction
-  | Lcond: condition -> list loc -> label -> instruction
-  | Ljumptable: loc -> list label -> instruction
-  | Lreturn: option loc -> instruction.
-
-Definition code: Type := list instruction.
-
-Record function: Type := mkfunction {
-  fn_sig: signature;
-  fn_params: list loc;
-  fn_stacksize: Z;
-  fn_code: code
-}.
-
-Definition fundef := AST.fundef function.
-
-Definition program := AST.program fundef unit.
-
-Definition funsig (fd: fundef) :=
-  match fd with
-  | Internal f => fn_sig f
-  | External ef => ef_sig ef
-  end.
-
-Definition genv := Genv.t fundef unit.
-Definition locset := Locmap.t.
-
-(** * Operational semantics *)
-
-(** Looking up labels in the instruction list.  *)
-
-Definition is_label (lbl: label) (instr: instruction) : bool :=
-  match instr with
-  | Llabel lbl' => if peq lbl lbl' then true else false
-  | _ => false
-  end.
-
-Lemma is_label_correct:
-  forall lbl instr,
-  if is_label lbl instr then instr = Llabel lbl else instr <> Llabel lbl.
-Proof.
-  intros.  destruct instr; simpl; try discriminate.
-  case (peq lbl l); intro; congruence.
-Qed.
-
-(** [find_label lbl c] returns a list of instruction, suffix of the
-  code [c], that immediately follows the [Llabel lbl] pseudo-instruction.
-  If the label [lbl] is multiply-defined, the first occurrence is
-  retained.  If the label [lbl] is not defined, [None] is returned. *)
-
-Fixpoint find_label (lbl: label) (c: code) {struct c} : option code :=
-  match c with
-  | nil => None
-  | i1 :: il => if is_label lbl i1 then Some il else find_label lbl il
-  end.
-
-(** The states of the dynamic semantics are similar to those used
-  in the LTL semantics (see module [LTL]).  The only difference
-  is that program points [pc] (nodes of the CFG in LTL) become
-  code sequences [c] (suffixes of the code of the current function).
-*)
-
-Inductive stackframe : Type :=
-  | Stackframe:
-      forall (res: loc)       (**r where to store the result *)
-             (f: function)    (**r calling function *)
-             (sp: val)        (**r stack pointer in calling function *)
-             (ls: locset)     (**r location state in calling function *)
-             (c: code),       (**r program point in calling function *)
-      stackframe.
-
-Inductive state : Type :=
-  | State:
-      forall (stack: list stackframe) (**r call stack *)
-             (f: function)            (**r function currently executing *)
-             (sp: val)                (**r stack pointer *)
-             (c: code)                (**r current program point *)
-             (ls: locset)             (**r location state *)
-             (m: mem),                (**r memory state *)
-      state
-  | Callstate:
-      forall (stack: list stackframe) (**r call stack *)
-             (f: fundef)              (**r function to call *)
-             (args: list val)         (**r arguments to the call *)
-             (m: mem),                (**r memory state *)
-      state
-  | Returnstate:
-      forall (stack: list stackframe) (**r call stack *)
-             (v: val)                 (**r return value for the call *)
-             (m: mem),                (**r memory state *)
-      state.
-
-Section RELSEM.
-
-Variable ge: genv.
-
-Definition find_function (ros: loc + ident) (rs: locset) : option fundef :=
-  match ros with
-  | inl r => Genv.find_funct ge (rs r)
-  | inr symb =>
-      match Genv.find_symbol ge symb with
-      | None => None
-      | Some b => Genv.find_funct_ptr ge b
-      end
-  end.
-
-Inductive step: state -> trace -> state -> Prop :=
-  | exec_Lop:
-      forall s f sp op args res b rs m v,
-      eval_operation ge sp op (map rs args) m = Some v ->
-      step (State s f sp (Lop op args res :: b) rs m)
-        E0 (State s f sp b (Locmap.set res v (undef_temps rs)) m)
-  | exec_Lload:
-      forall s f sp chunk addr args dst b rs m a v,
-      eval_addressing ge sp addr (map rs args) = Some a ->
-      Mem.loadv chunk m a = Some v ->
-      step (State s f sp (Lload chunk addr args dst :: b) rs m)
-        E0 (State s f sp b (Locmap.set dst v (undef_temps rs)) m)
-  | exec_Lstore:
-      forall s f sp chunk addr args src b rs m m' a,
-      eval_addressing ge sp addr (map rs args) = Some a ->
-      Mem.storev chunk m a (rs src) = Some m' ->
-      step (State s f sp (Lstore chunk addr args src :: b) rs m)
-        E0 (State s f sp b (undef_temps rs) m')
-  | exec_Lcall:
-      forall s f sp sig ros args res b rs m f',
-      find_function ros rs = Some f' ->
-      sig = funsig f' ->
-      step (State s f sp (Lcall sig ros args res :: b) rs m)
-        E0 (Callstate (Stackframe res f sp (postcall_locs rs) b :: s)
-                      f' (List.map rs args) m)
-  | exec_Ltailcall:
-      forall s f stk sig ros args b rs m f' m',
-      find_function ros rs = Some f' ->
-      sig = funsig f' ->
-      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
-      step (State s f (Vptr stk Int.zero) (Ltailcall sig ros args :: b) rs m)
-        E0 (Callstate s f' (List.map rs args) m')
-  | exec_Lbuiltin:
-      forall s f sp rs m ef args res b t v m',
-      external_call ef ge (map rs args) m t v m' ->
-      step (State s f sp (Lbuiltin ef args res :: b) rs m)
-         t (State s f sp b (Locmap.set res v rs) m')
-  | exec_Llabel:
-      forall s f sp lbl b rs m,
-      step (State s f sp (Llabel lbl :: b) rs m)
-        E0 (State s f sp b rs m)
-  | exec_Lgoto:
-      forall s f sp lbl b rs m b',
-      find_label lbl f.(fn_code) = Some b' ->
-      step (State s f sp (Lgoto lbl :: b) rs m)
-        E0 (State s f sp b' rs m)
-  | exec_Lcond_true:
-      forall s f sp cond args lbl b rs m b',
-      eval_condition cond (map rs args) m = Some true ->
-      find_label lbl f.(fn_code) = Some b' ->
-      step (State s f sp (Lcond cond args lbl :: b) rs m)
-        E0 (State s f sp b' (undef_temps rs) m)
-  | exec_Lcond_false:
-      forall s f sp cond args lbl b rs m,
-      eval_condition cond (map rs args) m = Some false ->
-      step (State s f sp (Lcond cond args lbl :: b) rs m)
-        E0 (State s f sp b (undef_temps rs) m)
-  | exec_Ljumptable:
-      forall s f sp arg tbl b rs m n lbl b',
-      rs arg = Vint n ->
-      list_nth_z tbl (Int.unsigned n) = Some lbl ->
-      find_label lbl f.(fn_code) = Some b' ->
-      step (State s f sp (Ljumptable arg tbl :: b) rs m)
-        E0 (State s f sp b' (undef_temps rs) m)
-  | exec_Lreturn:
-      forall s f stk rs m or b m',
-      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
-      step (State s f (Vptr stk Int.zero) (Lreturn or :: b) rs m)
-        E0 (Returnstate s (locmap_optget or Vundef rs) m')
-  | exec_function_internal:
-      forall s f args m m' stk,
-      Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
-      step (Callstate s (Internal f) args m)
-        E0 (State s f (Vptr stk Int.zero) f.(fn_code) (init_locs args f.(fn_params)) m')
-  | exec_function_external:
-      forall s ef args t res m m',
-      external_call ef ge args m t res m' ->
-      step (Callstate s (External ef) args m)
-         t (Returnstate s res m')
-  | exec_return:
-      forall res f sp rs b s vres m,
-      step (Returnstate (Stackframe res f sp rs b :: s) vres m)
-        E0 (State s f sp b (Locmap.set res vres rs) m).
-
-End RELSEM.
-
-Inductive initial_state (p: program): state -> Prop :=
-  | initial_state_intro: forall b f m0,
-      let ge := Genv.globalenv p in
-      Genv.init_mem p = Some m0 ->
-      Genv.find_symbol ge p.(prog_main) = Some b ->
-      Genv.find_funct_ptr ge b = Some f ->
-      funsig f = mksignature nil (Some Tint) ->
-      initial_state p (Callstate nil f nil m0).
-
-Inductive final_state: state -> int -> Prop :=
-  | final_state_intro: forall n m,
-      final_state (Returnstate nil (Vint n) m) n.
-
-Definition semantics (p: program) :=
-  Semantics step (initial_state p) final_state (Genv.globalenv p).
-
-(** * Properties of the operational semantics *)
-
-Lemma find_label_is_tail:
-  forall lbl c c', find_label lbl c = Some c' -> is_tail c' c.
-Proof.
-  induction c; simpl; intros.
-  discriminate.
-  destruct (is_label lbl a). inv H. constructor. constructor.
-  constructor. auto.
-Qed.
-  
diff --git a/backend/LTLintyping.v b/backend/LTLintyping.v
deleted file mode 100644
index 0338667..0000000
--- a/backend/LTLintyping.v
+++ /dev/null
@@ -1,122 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Typing rules for LTLin. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Op.
-Require Import Locations.
-Require Import LTLin.
-Require LTLtyping.
-Require Import Conventions.
-
-(** The following predicates define a type system for LTLin similar to that
-  of LTL. *)
-
-Section WT_INSTR.
-
-Variable funsig: signature.
-
-Inductive wt_instr : instruction -> Prop :=
-  | wt_Lopmove:
-      forall r1 r,
-      Loc.type r1 = Loc.type r -> loc_acceptable r1 -> loc_acceptable r ->
-      wt_instr (Lop Omove (r1 :: nil) r)
-  | wt_Lop:
-      forall op args res,
-      op <> Omove ->
-      (List.map Loc.type args, Loc.type res) = type_of_operation op ->
-      locs_acceptable args -> loc_acceptable res ->
-      wt_instr (Lop op args res)
-  | wt_Lload:
-      forall chunk addr args dst,
-      List.map Loc.type args = type_of_addressing addr ->
-      Loc.type dst = type_of_chunk chunk ->
-      locs_acceptable args -> loc_acceptable dst ->
-      wt_instr (Lload chunk addr args dst)
-  | wt_Lstore:
-      forall chunk addr args src,
-      List.map Loc.type args = type_of_addressing addr ->
-      Loc.type src = type_of_chunk chunk ->
-      locs_acceptable args -> loc_acceptable src ->
-      wt_instr (Lstore chunk addr args src)
-  | wt_Lcall:
-      forall sig ros args res,
-      List.map Loc.type args = sig.(sig_args) ->
-      Loc.type res = proj_sig_res sig ->
-      LTLtyping.call_loc_acceptable sig ros ->
-      locs_acceptable args -> loc_acceptable res ->
-      wt_instr (Lcall sig ros args res)
-  | wt_Ltailcall:
-      forall sig ros args,
-      List.map Loc.type args = sig.(sig_args) ->
-      LTLtyping.call_loc_acceptable sig ros ->
-      locs_acceptable args -> 
-      sig.(sig_res) = funsig.(sig_res) ->
-      tailcall_possible sig ->
-      wt_instr (Ltailcall sig ros args)
-  | wt_Lbuiltin:
-      forall ef args res,
-      List.map Loc.type args = (ef_sig ef).(sig_args) ->
-      Loc.type res = proj_sig_res (ef_sig ef) ->
-      arity_ok (ef_sig ef).(sig_args) = true \/ ef_reloads ef = false ->
-      locs_acceptable args -> loc_acceptable res ->
-       wt_instr (Lbuiltin ef args res)
-  | wt_Llabel: forall lbl,
-      wt_instr (Llabel lbl)
-  | wt_Lgoto: forall lbl,
-      wt_instr (Lgoto lbl)
-  | wt_Lcond:
-      forall cond args lbl,
-      List.map Loc.type args = type_of_condition cond ->
-      locs_acceptable args ->
-      wt_instr (Lcond cond args lbl)
-  | wt_Ljumptable:
-      forall arg tbl,
-      Loc.type arg = Tint ->
-      loc_acceptable arg ->
-      list_length_z tbl * 4 <= Int.max_unsigned ->
-      wt_instr (Ljumptable arg tbl)
-  | wt_Lreturn: 
-      forall optres,
-      option_map Loc.type optres = funsig.(sig_res) ->
-      match optres with None => True | Some r => loc_acceptable r end ->
-      wt_instr (Lreturn optres).
-
-Definition wt_code (c: code) : Prop :=
-  forall i, In i c -> wt_instr i.
-
-End WT_INSTR.
-
-Record wt_function (f: function): Prop :=
-  mk_wt_function {
-    wt_params:
-      List.map Loc.type f.(fn_params) = f.(fn_sig).(sig_args);
-    wt_acceptable:
-      locs_acceptable f.(fn_params);
-    wt_norepet:
-      Loc.norepet f.(fn_params);
-    wt_instrs:
-      wt_code f.(fn_sig) f.(fn_code)
-}.
-
-Inductive wt_fundef: fundef -> Prop :=
-  | wt_fundef_external: forall ef,
-      wt_fundef (External ef)
-  | wt_function_internal: forall f,
-      wt_function f ->
-      wt_fundef (Internal f).
-
-Definition wt_program (p: program): Prop :=
-  forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f.
diff --git a/backend/LTLtyping.v b/backend/LTLtyping.v
deleted file mode 100644
index 0c90514..0000000
--- a/backend/LTLtyping.v
+++ /dev/null
@@ -1,143 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Typing rules for LTL. *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import Integers.
-Require Import Op.
-Require Import Locations.
-Require Import LTL.
-Require Import Conventions.
-
-(** The following predicates define a type system for LTL similar to that
-  of [RTL] (see file [RTLtyping]): it statically guarantees that operations
-  and addressing modes are applied to the right number of arguments
-  and that the arguments are of the correct types.  Moreover, it also
-  guarantees that the locations of arguments and results are "acceptable",
-  i.e. either non-temporary registers or [Local] stack locations. *)
-
-Section WT_INSTR.
-
-Variable funct: function.
-
-Definition valid_successor (s: node) : Prop :=
-  exists i, funct.(fn_code)!s = Some i.
-
-Definition call_loc_acceptable (sig: signature) (los: loc + ident) : Prop :=
-  match los with
-  | inl l => Loc.type l = Tint /\ loc_acceptable l /\ ~In l (loc_arguments sig)
-  | inr s => True
-  end.
-
-Inductive wt_instr : instruction -> Prop :=
-  | wt_Lnop:
-      forall s,
-      valid_successor s ->
-      wt_instr (Lnop s)
-  | wt_Lopmove:
-      forall r1 r s,
-      Loc.type r1 = Loc.type r -> loc_acceptable r1 -> loc_acceptable r ->
-      valid_successor s ->
-      wt_instr (Lop Omove (r1 :: nil) r s)
-  | wt_Lop:
-      forall op args res s,
-      op <> Omove ->
-      (List.map Loc.type args, Loc.type res) = type_of_operation op ->
-      locs_acceptable args -> loc_acceptable res ->
-      valid_successor s ->
-      wt_instr (Lop op args res s)
-  | wt_Lload:
-      forall chunk addr args dst s,
-      List.map Loc.type args = type_of_addressing addr ->
-      Loc.type dst = type_of_chunk chunk ->
-      locs_acceptable args -> loc_acceptable dst ->
-      valid_successor s ->
-      wt_instr (Lload chunk addr args dst s)
-  | wt_Lstore:
-      forall chunk addr args src s,
-      List.map Loc.type args = type_of_addressing addr ->
-      Loc.type src = type_of_chunk chunk ->
-      locs_acceptable args -> loc_acceptable src ->
-      valid_successor s ->
-      wt_instr (Lstore chunk addr args src s)
-  | wt_Lcall:
-      forall sig ros args res s,
-      List.map Loc.type args = sig.(sig_args) ->
-      Loc.type res = proj_sig_res sig ->
-      call_loc_acceptable sig ros ->
-      locs_acceptable args -> loc_acceptable res ->
-      valid_successor s ->
-      wt_instr (Lcall sig ros args res s)
-  | wt_Ltailcall:
-      forall sig ros args,
-      List.map Loc.type args = sig.(sig_args) ->
-      call_loc_acceptable sig ros ->
-      locs_acceptable args ->
-      sig.(sig_res) = funct.(fn_sig).(sig_res) ->
-      tailcall_possible sig ->
-      wt_instr (Ltailcall sig ros args)
-  | wt_Lbuiltin:
-      forall ef args res s,
-      List.map Loc.type args = (ef_sig ef).(sig_args) ->
-      Loc.type res = proj_sig_res (ef_sig ef) ->
-      arity_ok (ef_sig ef).(sig_args) = true \/ ef_reloads ef = false ->
-      locs_acceptable args -> loc_acceptable res ->
-      valid_successor s ->
-      wt_instr (Lbuiltin ef args res s)
-  | wt_Lcond:
-      forall cond args s1 s2,
-      List.map Loc.type args = type_of_condition cond ->
-      locs_acceptable args ->
-      valid_successor s1 -> valid_successor s2 ->
-      wt_instr (Lcond cond args s1 s2)
-  | wt_Ljumptable:
-      forall arg tbl,
-      Loc.type arg = Tint ->
-      loc_acceptable arg ->
-      (forall lbl, In lbl tbl -> valid_successor lbl) ->
-      list_length_z tbl * 4 <= Int.max_unsigned ->
-      wt_instr (Ljumptable arg tbl)
-  | wt_Lreturn: 
-      forall optres,
-      option_map Loc.type optres = funct.(fn_sig).(sig_res) ->
-      match optres with None => True | Some r => loc_acceptable r end ->
-      wt_instr (Lreturn optres).
-
-End WT_INSTR.
-
-Record wt_function (f: function): Prop :=
-  mk_wt_function {
-    wt_params:
-      List.map Loc.type f.(fn_params) = f.(fn_sig).(sig_args);
-    wt_acceptable:
-      locs_acceptable f.(fn_params);
-    wt_norepet:
-      Loc.norepet f.(fn_params);
-    wt_instrs:
-      forall pc instr, 
-      f.(fn_code)!pc = Some instr -> wt_instr f instr;
-    wt_entrypoint:
-      valid_successor f f.(fn_entrypoint)
-}.
-
-Inductive wt_fundef: fundef -> Prop :=
-  | wt_fundef_external: forall ef,
-      wt_fundef (External ef)
-  | wt_function_internal: forall f,
-      wt_function f ->
-      wt_fundef (Internal f).
-
-Definition wt_program (p: program): Prop :=
-  forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f.
diff --git a/backend/Linear.v b/backend/Linear.v
index 52f5fd7..bdb08b4 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -12,10 +12,9 @@
 
 (** The Linear intermediate language: abstract syntax and semantcs *)
 
-(** The Linear language is a variant of LTLin where arithmetic
-    instructions operate on machine registers (type [mreg]) instead
-    of arbitrary locations.  Special instructions [Lgetstack] and
-    [Lsetstack] are provided to access stack slots. *)
+(** The Linear language is a variant of LTL where control-flow is not
+    expressed as a graph of basic blocks, but as a linear list of
+    instructions with explicit labels and ``goto'' instructions. *)
 
 Require Import Coqlib.
 Require Import AST.
@@ -35,14 +34,14 @@ Require Import Conventions.
 Definition label := positive.
 
 Inductive instruction: Type :=
-  | Lgetstack: slot -> mreg -> instruction
-  | Lsetstack: mreg -> slot -> instruction
+  | Lgetstack: slot -> Z -> typ -> mreg -> instruction
+  | Lsetstack: mreg -> slot -> Z -> typ -> instruction
   | Lop: operation -> list mreg -> mreg -> instruction
   | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lcall: signature -> mreg + ident -> instruction
   | Ltailcall: signature -> mreg + ident -> instruction
-  | Lbuiltin: external_function -> list mreg -> mreg -> instruction
+  | Lbuiltin: external_function -> list mreg -> list mreg -> instruction
   | Lannot: external_function -> list loc -> instruction
   | Llabel: label -> instruction
   | Lgoto: label -> instruction
@@ -114,73 +113,6 @@ Definition find_function (ros: mreg + ident) (rs: locset) : option fundef :=
       end
   end.
 
-Definition reglist (rs: locset) (rl: list mreg) : list val :=
-  List.map (fun r => rs (R r)) rl.
-
-(** Calling conventions are reflected at the level of location sets
-  (environments mapping locations to values) by the following two 
-  functions.  
-
-  [call_regs caller] returns the location set at function entry,
-  as a function of the location set [caller] of the calling function.
-- Temporary registers are undefined.
-- Other machine registers have the same values as in the caller.
-- Incoming stack slots (used for parameter passing) have the same
-  values as the corresponding outgoing stack slots (used for argument
-  passing) in the caller.
-- Local and outgoing stack slots are initialized to undefined values.
-*) 
-
-Definition call_regs (caller: locset) : locset :=
-  fun (l: loc) =>
-    match l with
-    | R r => if In_dec Loc.eq (R r) temporaries then Vundef else caller (R r)
-    | S (Local ofs ty) => Vundef
-    | S (Incoming ofs ty) => caller (S (Outgoing ofs ty))
-    | S (Outgoing ofs ty) => Vundef
-    end.
-
-(** [return_regs caller callee] returns the location set after
-  a call instruction, as a function of the location set [caller]
-  of the caller before the call instruction and of the location
-  set [callee] of the callee at the return instruction.
-- Callee-save machine registers have the same values as in the caller
-  before the call.
-- Caller-save machine registers have the same values
-  as in the callee at the return point.
-- Stack slots have the same values as in the caller before the call.
-*)
-
-Definition return_regs (caller callee: locset) : locset :=
-  fun (l: loc) =>
-    match l with
-    | R r =>
-        if In_dec Loc.eq (R r) temporaries then
-          callee (R r)
-        else if In_dec Loc.eq (R r) destroyed_at_call then
-          callee (R r)
-        else
-          caller (R r)
-    | S s => caller (S s)
-    end.
-
-(** Temporaries destroyed across operations *)
-
-Definition undef_op (op: operation) (rs: locset) :=
-  match op with
-  | Omove => Locmap.undef destroyed_at_move rs
-  | _ => Locmap.undef temporaries rs
-  end.
-
-Definition undef_getstack (s: slot) (rs: locset) :=
-  match s with
-  | Incoming _ _ => Locmap.set (R IT1) Vundef rs
-  | _ => rs
-  end.
-
-Definition undef_setstack (rs: locset) :=
-  Locmap.undef destroyed_at_move rs.
-
 (** Linear execution states. *)
 
 Inductive stackframe: Type :=
@@ -214,73 +146,43 @@ Inductive state: Type :=
 
 (** [parent_locset cs] returns the mapping of values for locations
   of the caller function. *)
-
 Definition parent_locset (stack: list stackframe) : locset :=
   match stack with
   | nil => Locmap.init Vundef
   | Stackframe f sp ls c :: stack' => ls
   end.
 
-(** The main difference between the Linear transition relation
-  and the LTL transition relation is the handling of function calls.
-  In LTL, arguments and results to calls are transmitted via
-  [vargs] and [vres] components of [Callstate] and [Returnstate],
-  respectively.  The semantics takes care of transferring these values
-  between the locations of the caller and of the callee.
-  
-  In Linear and lower-level languages (Mach, PPC), arguments and
-  results are transmitted implicitly: the generated code for the
-  caller arranges for arguments to be left in conventional registers
-  and stack locations, as determined by the calling conventions, where
-  the function being called will find them.  Similarly, conventional
-  registers will be used to pass the result value back to the caller.
-  This is reflected in the definition of [Callstate] and [Returnstate] 
-  above, where a whole location state [rs] is passed instead of just
-  the values of arguments or returns as in LTL.
-
-  These location states passed across calls are treated in a way that
-  reflects the callee-save/caller-save convention:
-- The [exec_Lcall] transition from [State] to [Callstate]
-  saves the current location state [ls] in the call stack,
-  and passes it to the callee.
-- The [exec_function_internal] transition from [Callstate] to [State]
-  changes the view of stack slots ([Outgoing] slots slide to
-  [Incoming] slots as per [call_regs]).
-- The [exec_Lreturn] transition from [State] to [Returnstate]
-  restores the values of callee-save locations from
-  the location state of the caller, using [return_regs].
-
-This protocol makes it much easier to later prove the correctness of
-the [Stacking] pass, which inserts actual code that performs the
-saving and restoring of callee-save registers described above.
-*)
-
 Inductive step: state -> trace -> state -> Prop :=
   | exec_Lgetstack:
-      forall s f sp sl r b rs m,
-      step (State s f sp (Lgetstack sl r :: b) rs m)
-        E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) (undef_getstack sl rs)) m)
+      forall s f sp sl ofs ty dst b rs m rs',
+      rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) ->
+      step (State s f sp (Lgetstack sl ofs ty dst :: b) rs m)
+        E0 (State s f sp b rs' m)
   | exec_Lsetstack:
-      forall s f sp r sl b rs m,
-      step (State s f sp (Lsetstack r sl :: b) rs m)
-        E0 (State s f sp b (Locmap.set (S sl) (rs (R r)) (undef_setstack rs)) m)
+      forall s f sp src sl ofs ty b rs m rs',
+      rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_op Omove) rs) ->
+      step (State s f sp (Lsetstack src sl ofs ty :: b) rs m)
+        E0 (State s f sp b rs' m)
   | exec_Lop:
-      forall s f sp op args res b rs m v,
+      forall s f sp op args res b rs m v rs',
       eval_operation ge sp op (reglist rs args) m = Some v ->
+      rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) ->
       step (State s f sp (Lop op args res :: b) rs m)
-        E0 (State s f sp b (Locmap.set (R res) v (undef_op op rs)) m)
+        E0 (State s f sp b rs' m)
   | exec_Lload:
-      forall s f sp chunk addr args dst b rs m a v,
+      forall s f sp chunk addr args dst b rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
+      rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
       step (State s f sp (Lload chunk addr args dst :: b) rs m)
-        E0 (State s f sp b (Locmap.set (R dst) v (undef_temps rs)) m)
+        E0 (State s f sp b rs' m)
   | exec_Lstore:
-      forall s f sp chunk addr args src b rs m m' a,
+      forall s f sp chunk addr args src b rs m m' a rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.storev chunk m a (rs (R src)) = Some m' ->
+      rs' = undef_regs (destroyed_by_store chunk addr) rs ->
       step (State s f sp (Lstore chunk addr args src :: b) rs m)
-        E0 (State s f sp b (undef_temps rs) m')
+        E0 (State s f sp b rs' m')
   | exec_Lcall:
       forall s f sp sig ros b rs m f',
       find_function ros rs = Some f' ->
@@ -288,20 +190,22 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Lcall sig ros :: b) rs m)
         E0 (Callstate (Stackframe f sp rs b:: s) f' rs m)
   | exec_Ltailcall:
-      forall s f stk sig ros b rs m f' m',
-      find_function ros rs = Some f' ->
+      forall s f stk sig ros b rs m rs' f' m',
+      rs' = return_regs (parent_locset s) rs ->
+      find_function ros rs' = Some f' ->
       sig = funsig f' ->
       Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
       step (State s f (Vptr stk Int.zero) (Ltailcall sig ros :: b) rs m)
-        E0 (Callstate s f' (return_regs (parent_locset s) rs) m')
+        E0 (Callstate s f' rs' m')
   | exec_Lbuiltin:
-      forall s f sp rs m ef args res b t v m',
-      external_call ef ge (reglist rs args) m t v m' ->
+      forall s f sp rs m ef args res b t vl rs' m',
+      external_call' ef ge (reglist rs args) m t vl m' ->
+      rs' = Locmap.setlist (map R res) vl (undef_regs (destroyed_by_builtin ef) rs) ->
       step (State s f sp (Lbuiltin ef args res :: b) rs m)
-         t (State s f sp b (Locmap.set (R res) v (undef_temps rs)) m')
+         t (State s f sp b rs' m')
   | exec_Lannot:
       forall s f sp rs m ef args b t v m',
-      external_call ef ge (map rs args) m t v m' ->
+      external_call' ef ge (map rs args) m t v m' ->
       step (State s f sp (Lannot ef args :: b) rs m)
          t (State s f sp b rs m')
   | exec_Llabel:
@@ -314,38 +218,42 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Lgoto lbl :: b) rs m)
         E0 (State s f sp b' rs m)
   | exec_Lcond_true:
-      forall s f sp cond args lbl b rs m b',
+      forall s f sp cond args lbl b rs m rs' b',
       eval_condition cond (reglist rs args) m = Some true ->
+      rs' = undef_regs (destroyed_by_cond cond) rs ->
       find_label lbl f.(fn_code) = Some b' ->
       step (State s f sp (Lcond cond args lbl :: b) rs m)
-        E0 (State s f sp b' (undef_temps rs) m)
+        E0 (State s f sp b' rs' m)
   | exec_Lcond_false:
-      forall s f sp cond args lbl b rs m,
+      forall s f sp cond args lbl b rs m rs',
       eval_condition cond (reglist rs args) m = Some false ->
+      rs' = undef_regs (destroyed_by_cond cond) rs ->
       step (State s f sp (Lcond cond args lbl :: b) rs m)
-        E0 (State s f sp b (undef_temps rs) m)
+        E0 (State s f sp b rs' m)
   | exec_Ljumptable:
-      forall s f sp arg tbl b rs m n lbl b',
+      forall s f sp arg tbl b rs m n lbl b' rs',
       rs (R arg) = Vint n ->
       list_nth_z tbl (Int.unsigned n) = Some lbl ->
       find_label lbl f.(fn_code) = Some b' ->
+      rs' = undef_regs (destroyed_by_jumptable) rs ->
       step (State s f sp (Ljumptable arg tbl :: b) rs m)
-        E0 (State s f sp b' (undef_temps rs) m)
+        E0 (State s f sp b' rs' m)
   | exec_Lreturn:
       forall s f stk b rs m m',
       Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
       step (State s f (Vptr stk Int.zero) (Lreturn :: b) rs m)
         E0 (Returnstate s (return_regs (parent_locset s) rs) m')
   | exec_function_internal:
-      forall s f rs m m' stk,
+      forall s f rs m rs' m' stk,
       Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
+      rs' = undef_regs destroyed_at_function_entry (call_regs rs) ->
       step (Callstate s (Internal f) rs m)
-        E0 (State s f (Vptr stk Int.zero) f.(fn_code) (call_regs rs) m')
+        E0 (State s f (Vptr stk Int.zero) f.(fn_code) rs' m')
   | exec_function_external:
       forall s ef args res rs1 rs2 m t m',
-      external_call ef ge args m t res m' ->
-      args = List.map rs1 (loc_arguments (ef_sig ef)) ->
-      rs2 = Locmap.set (R (loc_result (ef_sig ef))) res rs1 ->
+      args = map rs1 (loc_arguments (ef_sig ef)) ->
+      external_call' ef ge args m t res m' ->
+      rs2 = Locmap.setlist (map R (loc_result (ef_sig ef))) res rs1 ->
       step (Callstate s (External ef) rs1 m)
          t (Returnstate s rs2 m')
   | exec_return:
@@ -365,9 +273,10 @@ Inductive initial_state (p: program): state -> Prop :=
       initial_state p (Callstate nil f (Locmap.init Vundef) m0).
 
 Inductive final_state: state -> int -> Prop :=
-  | final_state_intro: forall rs m r,
-      rs (R (loc_result (mksignature nil (Some Tint)))) = Vint r ->
-      final_state (Returnstate nil rs m) r.
+  | final_state_intro: forall rs m r retcode,
+      loc_result (mksignature nil (Some Tint)) = r :: nil ->
+      rs (R r) = Vint retcode ->
+      final_state (Returnstate nil rs m) retcode.
 
 Definition semantics (p: program) :=
   Semantics step (initial_state p) final_state (Genv.globalenv p).
diff --git a/backend/Linearize.v b/backend/Linearize.v
index 6fc8e48..388f459 100644
--- a/backend/Linearize.v
+++ b/backend/Linearize.v
@@ -10,8 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Linearization of the control-flow graph: 
-    translation from LTL to LTLin *)
+(** Linearization of the control-flow graph: translation from LTL to Linear *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -23,26 +22,26 @@ Require Import Errors.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
-Require Import LTLin.
+Require Import Linear.
 Require Import Kildall.
 Require Import Lattice.
 
 Open Scope error_monad_scope.
 
-(** To translate from LTL to LTLin, we must lay out the nodes
+(** To translate from LTL to Linear, we must lay out the nodes
   of the LTL control-flow graph in some linear order, and insert
   explicit branches and conditional branches to make sure that
   each node jumps to its successors as prescribed by the
   LTL control-flow graph.  However, branches are not necessary
   if the fall-through behaviour of LTLin instructions already
   implements the desired flow of control.  For instance,
-  consider the two LTL instructions
+  consider the two LTL blocks
 <<
-    L1: Lop op args res L2
+    L1: Lop op args res; Lbranch L2
     L2: ...
 >>
   If the instructions [L1] and [L2] are laid out consecutively in the LTLin
-  code, we can generate the following LTLin code:
+  code, we can generate the following Linear code:
 <<
     L1: Lop op args res
     L2: ...
@@ -65,7 +64,7 @@ Open Scope error_monad_scope.
 - Choosing an order for the nodes.  This returns an enumeration of CFG
   nodes stating that they must be laid out in the order shown in the
   list.
-- Generate LTLin code where each node branches explicitly to its
+- Generate Linear code where each node branches explicitly to its
   successors, except if one of these successors is the immediately
   following instruction.
 
@@ -102,7 +101,7 @@ Definition reachable (f: LTL.function) : PMap.t bool :=
   | Some rs => rs
   end.
 
-(** We then enumerate the nodes of reachable instructions.
+(** We then enumerate the nodes of reachable blocks.
   This task is performed by external, untrusted Caml code. *)
 
 Parameter enumerate_aux: LTL.function -> PMap.t bool -> list node.
@@ -126,7 +125,7 @@ Fixpoint nodeset_of_list (l: list node) (s: Nodeset.t)
 
 Definition check_reachable_aux
      (reach: PMap.t bool) (s: Nodeset.t)
-     (ok: bool) (pc: node) (i: LTL.instruction) : bool :=
+     (ok: bool) (pc: node) (bb: LTL.bblock) : bool :=
   if reach!!pc then ok && Nodeset.mem pc s else ok.
 
 Definition check_reachable
@@ -141,10 +140,10 @@ Definition enumerate (f: LTL.function) : res (list node) :=
   then OK enum
   else Error (msg "Linearize: wrong enumeration").
 
-(** * Translation from LTL to LTLin *)
+(** * Translation from LTL to Linear *)
 
 (** We now flatten the structure of the CFG graph, laying out
-  LTL instructions consecutively in the order computed by [enumerate],
+  LTL blocks consecutively in the order computed by [enumerate],
   and inserting branches to the labels of sucessors if necessary.
   Whether to insert a branch or not is determined by
   the [starts_with] function below.
@@ -169,31 +168,38 @@ Fixpoint starts_with (lbl: label) (k: code) {struct k} : bool :=
 Definition add_branch (s: label) (k: code) : code :=
   if starts_with s k then k else Lgoto s :: k.
 
-Definition linearize_instr (b: LTL.instruction) (k: code) : code :=
+Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
   match b with
-  | LTL.Lnop s =>
+  | nil => k
+  | LTL.Lop op args res :: b' =>
+      Lop op args res :: linearize_block b' k
+  | LTL.Lload chunk addr args dst :: b' =>
+      Lload chunk addr args dst :: linearize_block b' k
+  | LTL.Lgetstack sl ofs ty dst :: b' =>
+      Lgetstack sl ofs ty dst :: linearize_block b' k
+  | LTL.Lsetstack src sl ofs ty :: b' =>
+      Lsetstack src sl ofs ty :: linearize_block b' k
+  | LTL.Lstore chunk addr args src :: b' =>
+      Lstore chunk addr args src :: linearize_block b' k
+  | LTL.Lcall sig ros :: b' =>
+      Lcall sig ros :: linearize_block b' k
+  | LTL.Ltailcall sig ros :: b' =>
+      Ltailcall sig ros :: k
+  | LTL.Lbuiltin ef args res :: b' =>
+      Lbuiltin ef args res :: linearize_block b' k
+  | LTL.Lannot ef args :: b' =>
+      Lannot ef args :: linearize_block b' k
+  | LTL.Lbranch s :: b' =>
       add_branch s k
-  | LTL.Lop op args res s =>
-      Lop op args res :: add_branch s k
-  | LTL.Lload chunk addr args dst s =>
-      Lload chunk addr args dst :: add_branch s k
-  | LTL.Lstore chunk addr args src s =>
-      Lstore chunk addr args src :: add_branch s k
-  | LTL.Lcall sig ros args res s =>
-      Lcall sig ros args res :: add_branch s k
-  | LTL.Ltailcall sig ros args =>
-      Ltailcall sig ros args :: k
-  | LTL.Lbuiltin ef args res s =>
-      Lbuiltin ef args res :: add_branch s k
-  | LTL.Lcond cond args s1 s2 =>
+  | LTL.Lcond cond args s1 s2 :: b' =>
       if starts_with s1 k then
         Lcond (negate_condition cond) args s2 :: add_branch s1 k
       else
         Lcond cond args s1 :: add_branch s2 k
-  | LTL.Ljumptable arg tbl =>
+  | LTL.Ljumptable arg tbl :: b' =>
       Ljumptable arg tbl :: k
-  | LTL.Lreturn or =>
-      Lreturn or :: k
+  | LTL.Lreturn :: b' =>
+      Lreturn :: k
   end.
 
 (** Linearize a function body according to an enumeration of its nodes.  *)
@@ -201,7 +207,7 @@ Definition linearize_instr (b: LTL.instruction) (k: code) : code :=
 Definition linearize_node (f: LTL.function) (pc: node) (k: code) : code :=
   match f.(LTL.fn_code)!pc with
   | None => k
-  | Some b => Llabel pc :: linearize_instr b k
+  | Some b => Llabel pc :: linearize_block b k
   end.
 
 Definition linearize_body (f: LTL.function) (enum: list node) : code :=
@@ -209,16 +215,15 @@ Definition linearize_body (f: LTL.function) (enum: list node) : code :=
 
 (** * Entry points for code linearization *)
 
-Definition transf_function (f: LTL.function) : res LTLin.function :=
+Definition transf_function (f: LTL.function) : res Linear.function :=
   do enum <- enumerate f;
   OK (mkfunction
        (LTL.fn_sig f)
-       (LTL.fn_params f)
        (LTL.fn_stacksize f)
        (add_branch (LTL.fn_entrypoint f) (linearize_body f enum))).
 
-Definition transf_fundef (f: LTL.fundef) : res LTLin.fundef :=
+Definition transf_fundef (f: LTL.fundef) : res Linear.fundef :=
   AST.transf_partial_fundef transf_function f.
 
-Definition transf_program (p: LTL.program) : res LTLin.program :=
+Definition transf_program (p: LTL.program) : res Linear.program :=
   transform_partial_program transf_fundef p.
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index ac47ae8..5bb5838 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -47,8 +47,12 @@ let join_points f =
         reached_twice := IntSet.add npc !reached_twice
     end else begin
       reached := IntSet.add npc !reached;
-      List.iter traverse (Kildall.successors_list succs pc)
+      traverse_succs (Kildall.successors_list succs pc)
     end
+  and traverse_succs = function
+    | [] -> ()
+    | [pc] -> traverse pc
+    | pc :: l -> traverse pc; traverse_succs l
   in traverse f.fn_entrypoint; !reached_twice
 
 (* Cut into reachable basic blocks, annotated with the min value of the PC *)
@@ -73,20 +77,18 @@ let basic_blocks f joins =
     let minpc = min npc minpc in
     match PTree.get pc f.fn_code with
     | None -> assert false
-    | Some i ->
-       match i with
-         | Lnop s -> next_in_block blk minpc s
-         | Lop (op, args, res, s) -> next_in_block blk minpc s
-         | Lload (chunk, addr, args, dst, s) -> next_in_block blk minpc s
-         | Lstore (chunk, addr, args, src, s) -> next_in_block blk minpc s
-         | Lcall (sig0, ros, args, res, s) -> next_in_block blk minpc s
-         | Ltailcall (sig0, ros, args) -> end_block blk minpc
-         | Lbuiltin (ef, args, res, s) -> next_in_block blk minpc s
-         | Lcond (cond, args, ifso, ifnot) ->
+    | Some b ->
+       let rec do_instr_list = function
+       | [] -> assert false
+       | Lbranch s :: _ -> next_in_block blk minpc s
+       | Ltailcall (sig0, ros) :: _ -> end_block blk minpc
+       | Lcond (cond, args, ifso, ifnot) :: _ ->
              end_block blk minpc; start_block ifso; start_block ifnot
-         | Ljumptable(arg, tbl) ->
+       | Ljumptable(arg, tbl) :: _ ->
              end_block blk minpc; List.iter start_block tbl
-         | Lreturn optarg -> end_block blk minpc
+       | Lreturn :: _ -> end_block blk minpc
+       | instr :: b' -> do_instr_list b' in
+       do_instr_list b
   (* next_in_block: check if join point and either extend block
      or start block *)
   and next_in_block blk minpc pc =
diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v
index d368311..2548580 100644
--- a/backend/Linearizeproof.v
+++ b/backend/Linearizeproof.v
@@ -12,10 +12,11 @@
 
 (** Correctness proof for code linearization *)
 
+Require Import FSets.
 Require Import Coqlib.
 Require Import Maps.
 Require Import Ordered.
-Require Import FSets.
+Require Import Lattice.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
@@ -27,17 +28,15 @@ Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
-Require Import LTLtyping.
-Require Import LTLin.
+Require Import Linear.
 Require Import Linearize.
-Require Import Lattice.
 
 Module NodesetFacts := FSetFacts.Facts(Nodeset).
 
 Section LINEARIZATION.
 
 Variable prog: LTL.program.
-Variable tprog: LTLin.program.
+Variable tprog: Linear.program.
 
 Hypothesis TRANSF: transf_program prog = OK tprog.
 
@@ -70,7 +69,7 @@ Proof (Genv.find_var_info_transf_partial transf_fundef _ TRANSF).
 Lemma sig_preserved:
   forall f tf,
   transf_fundef f = OK tf ->
-  LTLin.funsig tf = LTL.funsig f.
+  Linear.funsig tf = LTL.funsig f.
 Proof.
   unfold transf_fundef, transf_partial_fundef; intros.
   destruct f. monadInv H. monadInv EQ. reflexivity.
@@ -80,7 +79,7 @@ Qed.
 Lemma stacksize_preserved:
   forall f tf,
   transf_function f = OK tf ->
-  LTLin.fn_stacksize tf = LTL.fn_stacksize f.
+  Linear.fn_stacksize tf = LTL.fn_stacksize f.
 Proof.
   intros. monadInv H. auto.
 Qed.
@@ -117,8 +116,8 @@ Qed.
 (** The successors of a reachable instruction are reachable. *)
 
 Lemma reachable_successors:
-  forall f pc pc' i,
-  f.(LTL.fn_code)!pc = Some i -> In pc' (successors_instr i) ->
+  forall f pc pc' b,
+  f.(LTL.fn_code)!pc = Some b -> In pc' (successors_block b) ->
   (reachable f)!!pc = true ->
   (reachable f)!!pc' = true.
 Proof.
@@ -222,7 +221,7 @@ Qed.
 
 (** * Properties related to labels *)
 
-(** If labels are globally unique and the LTLin code [c] contains
+(** If labels are globally unique and the Linear code [c] contains
   a subsequence [Llabel lbl :: c1], then [find_label lbl c] returns [c1].
 *)
 
@@ -284,13 +283,13 @@ Proof.
   intros. unfold add_branch. destruct (starts_with s k); auto.
 Qed.
 
-Lemma find_label_lin_instr:
+Lemma find_label_lin_block:
   forall lbl k b,
-  find_label lbl (linearize_instr b k) = find_label lbl k.
+  find_label lbl (linearize_block b k) = find_label lbl k.
 Proof.
   intros lbl k. generalize (find_label_add_branch lbl k); intro.
-  induction b; simpl; auto.
-  case (starts_with n k); simpl; auto.
+  induction b; simpl; auto. destruct a; simpl; auto. 
+  case (starts_with s1 k); simpl; auto.
 Qed.
 
 Remark linearize_body_cons:
@@ -298,7 +297,7 @@ Remark linearize_body_cons:
   linearize_body f (pc :: enum) =
   match f.(LTL.fn_code)!pc with
   | None => linearize_body f enum
-  | Some b => Llabel pc :: linearize_instr b (linearize_body f enum)
+  | Some b => Llabel pc :: linearize_block b (linearize_body f enum)
   end.
 Proof.
   intros. unfold linearize_body. rewrite list_fold_right_eq. 
@@ -309,7 +308,7 @@ Lemma find_label_lin_rec:
   forall f enum pc b,
   In pc enum ->
   f.(LTL.fn_code)!pc = Some b ->
-  exists k, find_label pc (linearize_body f enum) = Some (linearize_instr b k).
+  exists k, find_label pc (linearize_body f enum) = Some (linearize_block b k).
 Proof.
   induction enum; intros.
   elim H.
@@ -320,7 +319,7 @@ Proof.
   assert (In pc enum). simpl in H. tauto.
   destruct (IHenum pc b H1 H0) as [k FIND].
   exists k. destruct (LTL.fn_code f)!a. 
-  simpl. rewrite peq_false. rewrite find_label_lin_instr. auto. auto.
+  simpl. rewrite peq_false. rewrite find_label_lin_block. auto. auto.
   auto.
 Qed.
 
@@ -330,7 +329,7 @@ Lemma find_label_lin:
   f.(LTL.fn_code)!pc = Some b ->
   (reachable f)!!pc = true ->
   exists k,
-  find_label pc (fn_code tf) = Some (linearize_instr b k).
+  find_label pc (fn_code tf) = Some (linearize_block b k).
 Proof.
   intros. monadInv H. simpl. 
   rewrite find_label_add_branch. apply find_label_lin_rec.
@@ -343,25 +342,12 @@ Lemma find_label_lin_inv:
   f.(LTL.fn_code)!pc = Some b ->
   (reachable f)!!pc = true ->
   find_label pc (fn_code tf) = Some k ->
-  exists k', k = linearize_instr b k'.
+  exists k', k = linearize_block b k'.
 Proof.
   intros. exploit find_label_lin; eauto. intros [k' FIND].
   exists k'. congruence.
 Qed.
 
-Lemma find_label_lin_succ:
-  forall f tf s,
-  transf_function f = OK tf ->
-  valid_successor f s ->
-  (reachable f)!!s = true ->
-  exists k,
-  find_label s (fn_code tf) = Some k.
-Proof.
-  intros. destruct H0 as [i AT]. 
-  exploit find_label_lin; eauto. intros [k FIND].
-  exists (linearize_instr i k); auto.
-Qed.
-
 (** Unique label property for linearized code. *)
 
 Lemma label_in_add_branch:
@@ -372,16 +358,16 @@ Proof.
   destruct (starts_with s k); simpl; intuition congruence.
 Qed.
 
-Lemma label_in_lin_instr:
+Lemma label_in_lin_block:
   forall lbl k b,
-  In (Llabel lbl) (linearize_instr b k) -> In (Llabel lbl) k.
+  In (Llabel lbl) (linearize_block b k) -> In (Llabel lbl) k.
 Proof.
-  induction b; simpl; intros;
-  try (apply label_in_add_branch with n; intuition congruence);
-  try (intuition congruence).
-  destruct (starts_with n k); simpl in H.
-  apply label_in_add_branch with n; intuition congruence.
-  apply label_in_add_branch with n0; intuition congruence.
+  induction b; simpl; intros. auto.
+  destruct a; simpl in H; try (intuition congruence).
+  apply label_in_add_branch with s; intuition congruence.
+  destruct (starts_with s1 k); simpl in H.
+  apply label_in_add_branch with s1; intuition congruence.
+  apply label_in_add_branch with s2; intuition congruence.
 Qed.
 
 Lemma label_in_lin_rec:
@@ -392,7 +378,7 @@ Proof.
   simpl; auto.
   rewrite linearize_body_cons. destruct (LTL.fn_code f)!a. 
   simpl. intros [A|B]. left; congruence. 
-  right. apply IHenum. eapply label_in_lin_instr; eauto.
+  right. apply IHenum. eapply label_in_lin_block; eauto.
   intro; right; auto.
 Qed.
 
@@ -404,12 +390,13 @@ Proof.
   destruct (starts_with lbl k); simpl; intuition.
 Qed.
 
-Lemma unique_labels_lin_instr:
+Lemma unique_labels_lin_block:
   forall k b,
-  unique_labels k -> unique_labels (linearize_instr b k).
+  unique_labels k -> unique_labels (linearize_block b k).
 Proof.
-  induction b; intro; simpl; auto; try (apply unique_labels_add_branch; auto).
-  case (starts_with n k); simpl; apply unique_labels_add_branch; auto.
+  induction b; intros; simpl. auto.
+  destruct a; auto; try (apply unique_labels_add_branch; auto).
+  case (starts_with s1 k); simpl; apply unique_labels_add_branch; auto.
 Qed.
 
 Lemma unique_labels_lin_rec:
@@ -423,8 +410,8 @@ Proof.
   intro. destruct (LTL.fn_code f)!a. 
   simpl. split. red. intro. inversion H. elim H3.
   apply label_in_lin_rec with f. 
-  apply label_in_lin_instr with i. auto.
-  apply unique_labels_lin_instr. apply IHenum. inversion H; auto.
+  apply label_in_lin_block with b. auto.
+  apply unique_labels_lin_block. apply IHenum. inversion H; auto.
   apply IHenum. inversion H; auto.
 Qed.
 
@@ -458,6 +445,17 @@ Proof.
   auto. eauto with coqlib.
 Qed.
 
+Lemma is_tail_lin_block:
+  forall b c1 c2,
+  is_tail (linearize_block b c1) c2 -> is_tail c1 c2.
+Proof.
+  induction b; simpl; intros.
+  auto.
+  destruct a; eauto with coqlib. 
+  eapply is_tail_add_branch; eauto.
+  destruct (starts_with s1 c1); eapply is_tail_add_branch; eauto with coqlib.
+Qed.
+
 Lemma add_branch_correct:
   forall lbl c k s f tf sp ls m,
   transf_function f = OK tf ->
@@ -475,12 +473,11 @@ Qed.
 
 (** * Correctness of linearization *)
 
-(** The proof of semantic preservation is a simulation argument
-  based on diagrams of the following form:
+(** The proof of semantic preservation is a simulation argument of the "star" kind:
 <<
            st1 --------------- st2
             |                   |
-           t|                  +|t
+           t|                  t| + or ( 0 \/ |st1'| < |st1| )
             |                   |
             v                   v
            st1'--------------- st2'
@@ -492,273 +489,260 @@ Qed.
   control-flow graph, the transformed state is at a code
   sequence [c] that starts with the label [pc]. *)
 
-Inductive match_stackframes: LTL.stackframe -> LTLin.stackframe -> Prop :=
+Inductive match_stackframes: LTL.stackframe -> Linear.stackframe -> Prop :=
   | match_stackframe_intro:
-      forall res f sp pc ls tf c,
+      forall f sp bb ls tf c,
       transf_function f = OK tf ->
-      (reachable f)!!pc = true ->
-      valid_successor f pc ->
-      is_tail c (fn_code tf) ->
-      wt_function f ->
+      (forall pc, In pc (successors_block bb) -> (reachable f)!!pc = true) ->
+      is_tail c tf.(fn_code) ->
       match_stackframes
-        (LTL.Stackframe res f sp ls pc)
-        (LTLin.Stackframe res tf sp ls (add_branch pc c)).
+        (LTL.Stackframe f sp ls bb)
+        (Linear.Stackframe tf sp ls (linearize_block bb c)).
 
-Inductive match_states: LTL.state -> LTLin.state -> Prop :=
-  | match_states_intro:
+Inductive match_states: LTL.state -> Linear.state -> Prop :=
+  | match_states_add_branch:
       forall s f sp pc ls m tf ts c
         (STACKS: list_forall2 match_stackframes s ts)
         (TRF: transf_function f = OK tf)
         (REACH: (reachable f)!!pc = true)
-        (AT: find_label pc (fn_code tf) = Some c)
-        (WTF: wt_function f),
+        (TAIL: is_tail c tf.(fn_code)),
       match_states (LTL.State s f sp pc ls m)
-                   (LTLin.State ts tf sp c ls m)
+                   (Linear.State ts tf sp (add_branch pc c) ls m)
+  | match_states_cond_taken:
+      forall s f sp pc ls m tf ts cond args c
+        (STACKS: list_forall2 match_stackframes s ts)
+        (TRF: transf_function f = OK tf)
+        (REACH: (reachable f)!!pc = true)
+        (JUMP: eval_condition cond (reglist ls args) m = Some true),
+      match_states (LTL.State s f sp pc (undef_regs (destroyed_by_cond cond) ls) m)
+                   (Linear.State ts tf sp (Lcond cond args pc :: c) ls m)
+  | match_states_jumptable:
+      forall s f sp pc ls m tf ts arg tbl c n
+        (STACKS: list_forall2 match_stackframes s ts)
+        (TRF: transf_function f = OK tf)
+        (REACH: (reachable f)!!pc = true)
+        (ARG: ls (R arg) = Vint n)
+        (JUMP: list_nth_z tbl (Int.unsigned n) = Some pc),
+      match_states (LTL.State s f sp pc (undef_regs destroyed_by_jumptable ls) m)
+                   (Linear.State ts tf sp (Ljumptable arg tbl :: c) ls m)
+  | match_states_block:
+      forall s f sp bb ls m tf ts c
+        (STACKS: list_forall2 match_stackframes s ts)
+        (TRF: transf_function f = OK tf)
+        (REACH: forall pc, In pc (successors_block bb) -> (reachable f)!!pc = true)
+        (TAIL: is_tail c tf.(fn_code)),
+      match_states (LTL.Block s f sp bb ls m)
+                   (Linear.State ts tf sp (linearize_block bb c) ls m)
   | match_states_call:
       forall s f ls m tf ts,
       list_forall2 match_stackframes s ts ->
       transf_fundef f = OK tf ->
-      wt_fundef f ->
       match_states (LTL.Callstate s f ls m)
-                   (LTLin.Callstate ts tf ls m)
+                   (Linear.Callstate ts tf ls m)
   | match_states_return:
       forall s ls m ts,
       list_forall2 match_stackframes s ts ->
       match_states (LTL.Returnstate s ls m)
-                   (LTLin.Returnstate ts ls m).
+                   (Linear.Returnstate ts ls m).
 
-Hypothesis wt_prog: wt_program prog.
+Definition measure (S: LTL.state) : nat :=
+  match S with
+  | LTL.State s f sp pc ls m => 0%nat
+  | LTL.Block s f sp bb ls m => 1%nat
+  | _ => 0%nat
+  end.
+
+Remark match_parent_locset:
+  forall s ts, list_forall2 match_stackframes s ts -> parent_locset ts = LTL.parent_locset s.
+Proof.
+  induction 1; simpl. auto. inv H; auto. 
+Qed.
 
 Theorem transf_step_correct:
   forall s1 t s2, LTL.step ge s1 t s2 ->
   forall s1' (MS: match_states s1 s1'),
-  exists s2', plus LTLin.step tge s1' t s2' /\ match_states s2 s2'.
-Proof.
-  induction 1; intros; try (inv MS);
-  try (generalize (wt_instrs _ WTF _ _ H); intro WTI).
-  (* Lnop *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
-  econstructor; split.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_find_label. eauto.
+  (exists s2', plus Linear.step tge s1' t s2' /\ match_states s2 s2')
+  \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat.
+Proof.
+  induction 1; intros; try (inv MS).
+
+  (* start of block, at an [add_branch] *)
+  exploit find_label_lin; eauto. intros [k F]. 
+  left; econstructor; split.
+  eapply add_branch_correct; eauto. 
+  econstructor; eauto. 
+  intros; eapply reachable_successors; eauto.
+  eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto.
+
+  (* start of block, target of an [Lcond] *)
+  exploit find_label_lin; eauto. intros [k F]. 
+  left; econstructor; split.
+  apply plus_one. eapply exec_Lcond_true; eauto. 
+  econstructor; eauto. 
+  intros; eapply reachable_successors; eauto.
+  eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto.
+
+  (* start of block, target of an [Ljumptable] *)
+  exploit find_label_lin; eauto. intros [k F]. 
+  left; econstructor; split.
+  apply plus_one. eapply exec_Ljumptable; eauto. 
   econstructor; eauto. 
+  intros; eapply reachable_successors; eauto.
+  eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto.
+
   (* Lop *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
-  econstructor; split.
-  eapply plus_left'.
-  eapply exec_Lop with (v := v); eauto.
-  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  traceEq.
-  econstructor; eauto.
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor; eauto. 
+  instantiate (1 := v); rewrite <- H; apply eval_operation_preserved. 
+  exact symbols_preserved.
+  econstructor; eauto. 
+
   (* Lload *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
-  econstructor; split.
-  eapply plus_left'.
-  apply exec_Lload with a.
-  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  traceEq.
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor. 
+  instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved. 
+  exact symbols_preserved. eauto. eauto. 
+  econstructor; eauto.
+
+  (* Lgetstack *)
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor; eauto.
+  econstructor; eauto.
+
+  (* Lsetstack *)
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor; eauto. 
   econstructor; eauto.
+
   (* Lstore *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
-  econstructor; split.
-  eapply plus_left'.
-  apply exec_Lstore with a. 
-  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  traceEq.
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor. 
+  instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved. 
+  exact symbols_preserved. eauto. eauto. 
   econstructor; eauto.
+
   (* Lcall *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  assert (VALID: valid_successor f pc'). inv WTI; auto.
-  exploit find_function_translated; eauto. intros [tf' [A B]].
-  econstructor; split.
-  apply plus_one. eapply exec_Lcall with (f' := tf'); eauto.
-  symmetry; apply sig_preserved; auto.
-  econstructor; eauto.
-  constructor; auto. econstructor; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  destruct ros; simpl in H0.
-  eapply Genv.find_funct_prop; eauto.
-  destruct (Genv.find_symbol ge i); try discriminate.  
-  eapply Genv.find_funct_ptr_prop; eauto.
+  exploit find_function_translated; eauto. intros [tfd [A B]].
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor; eauto.
+  symmetry; eapply sig_preserved; eauto.
+  econstructor; eauto. constructor; auto. econstructor; eauto. 
 
   (* Ltailcall *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  exploit find_function_translated; eauto. intros [tf' [A B]].
-  econstructor; split.
-  apply plus_one. eapply exec_Ltailcall with (f' := tf'); eauto.
-  symmetry; apply sig_preserved; auto.
-  rewrite (stacksize_preserved _ _ TRF). eauto.
+  exploit find_function_translated; eauto. intros [tfd [A B]].
+  left; econstructor; split. simpl.
+  apply plus_one. econstructor; eauto.
+  rewrite (match_parent_locset _ _ STACKS). eauto.
+  symmetry; eapply sig_preserved; eauto.
+  rewrite (stacksize_preserved _ _ TRF); eauto. 
+  rewrite (match_parent_locset _ _ STACKS).
   econstructor; eauto.
-  destruct ros; simpl in H0.
-  eapply Genv.find_funct_prop; eauto.
-  destruct (Genv.find_symbol ge i); try discriminate.  
-  eapply Genv.find_funct_ptr_prop; eauto.
 
   (* Lbuiltin *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
-  econstructor; split.
-  eapply plus_left'.
-  eapply exec_Lbuiltin. 
-  eapply external_call_symbols_preserved; eauto.
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lbuiltin; eauto.
+  eapply external_call_symbols_preserved'; eauto.
+  exact symbols_preserved. exact varinfo_preserved.
+  econstructor; eauto.
+
+  (* Lannot *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lannot; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  traceEq.
   econstructor; eauto.
 
+  (* Lbranch *)
+  assert ((reachable f)!!pc = true). apply REACH; simpl; auto.
+  right; split. simpl; omega. split. auto. simpl. econstructor; eauto.
+
   (* Lcond *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
+  assert (REACH1: (reachable f)!!pc1 = true) by (apply REACH; simpl; auto).
+  assert (REACH2: (reachable f)!!pc2 = true) by (apply REACH; simpl; auto).
+  simpl linearize_block.
+  destruct (starts_with pc1 c).
+  (* branch if cond is false *)
+  assert (DC: destroyed_by_cond (negate_condition cond) = destroyed_by_cond cond).
+    destruct cond; reflexivity.
   destruct b.
-  (* true *)
-  assert (REACH': (reachable f)!!ifso = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; eauto. intros [c'' AT'].
-  destruct (starts_with ifso c').
-  econstructor; split.
-  eapply plus_left'.  
-  eapply exec_Lcond_false; eauto.
-  rewrite eval_negate_condition; rewrite H0; auto.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  traceEq.
-  econstructor; eauto.
-  econstructor; split.
-  apply plus_one. eapply exec_Lcond_true; eauto.
-  econstructor; eauto.
-  (* false *)
-  assert (REACH': (reachable f)!!ifnot = true).
-  eapply reachable_successors; eauto. simpl; auto.
-  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
-  destruct (starts_with ifso c').
-  econstructor; split.
-  apply plus_one. eapply exec_Lcond_true; eauto.
-  rewrite eval_negate_condition; rewrite H0; auto.
-  econstructor; eauto.
-  econstructor; split.
-  eapply plus_left'. 
-  eapply exec_Lcond_false; eauto.
-  eapply add_branch_correct; eauto.
-  eapply is_tail_add_branch. eapply is_tail_cons_left. 
-  eapply is_tail_find_label. eauto.
-  traceEq.
+  (* cond is true: no branch *)
+  left; econstructor; split.
+  apply plus_one. eapply exec_Lcond_false. 
+  rewrite eval_negate_condition. rewrite H. auto. eauto.
+  rewrite DC. econstructor; eauto.
+  (* cond is false: branch is taken *)
+  right; split. simpl; omega. split. auto.  rewrite <- DC. econstructor; eauto. 
+  rewrite eval_negate_condition. rewrite H. auto.
+  (* branch if cond is true *)
+  destruct b.
+  (* cond is true: branch is taken *)
+  right; split. simpl; omega. split. auto. econstructor; eauto. 
+  (* cond is false: no branch *)
+  left; econstructor; split.
+  apply plus_one. eapply exec_Lcond_false. eauto. eauto. 
   econstructor; eauto.
 
   (* Ljumptable *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  assert (REACH': (reachable f)!!pc' = true).
-  eapply reachable_successors; eauto. simpl. eapply list_nth_z_in; eauto.
-  exploit find_label_lin_succ; eauto.
-  inv WTI. apply H6. eapply list_nth_z_in; eauto. 
-  intros [c'' AT'].
-  econstructor; split.
-  apply plus_one. eapply exec_Ljumptable; eauto. 
-  econstructor; eauto.
+  assert (REACH': (reachable f)!!pc = true).
+    apply REACH. simpl. eapply list_nth_z_in; eauto. 
+  right; split. simpl; omega. split. auto. econstructor; eauto. 
 
   (* Lreturn *)
-  destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ].
-  simpl in EQ. subst c.
-  econstructor; split.
-  apply plus_one. eapply exec_Lreturn; eauto.
+  left; econstructor; split.
+  simpl. apply plus_one. econstructor; eauto.
   rewrite (stacksize_preserved _ _ TRF). eauto.
-  econstructor; eauto.
- 
-  (* internal function *)
+  rewrite (match_parent_locset _ _ STACKS). econstructor; eauto.
+
+  (* internal functions *)
   assert (REACH: (reachable f)!!(LTL.fn_entrypoint f) = true).
     apply reachable_entrypoint.
-  inv H7. monadInv H6.   
-  exploit find_label_lin_succ; eauto. inv H1; auto. intros [c'' AT'].
-  generalize EQ; intro. monadInv EQ0. econstructor; simpl; split.
-  eapply plus_left'.  
-  eapply exec_function_internal; eauto.
-  simpl. eapply add_branch_correct. eauto.  
-  simpl. eapply is_tail_add_branch. constructor. eauto.
-  traceEq.
-  econstructor; eauto.
+  monadInv H7.
+  left; econstructor; split.
+  apply plus_one. eapply exec_function_internal; eauto. 
+  rewrite (stacksize_preserved _ _ EQ). eauto.
+  generalize EQ; intro EQ'; monadInv EQ'. simpl. 
+  econstructor; eauto. simpl. eapply is_tail_add_branch. constructor.
 
   (* external function *)
-  monadInv H6. econstructor; split.
+  monadInv H8. left; econstructor; split.
   apply plus_one. eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
 
   (* return *)
   inv H3. inv H1.
-  exploit find_label_lin_succ; eauto. intros [c' AT].
-  econstructor; split.
-  eapply plus_left'.
-  eapply exec_return; eauto.
-  eapply add_branch_correct; eauto. traceEq.
-  econstructor; eauto.
+  left; econstructor; split.
+  apply plus_one. econstructor. 
+  econstructor; eauto. 
 Qed.
 
 Lemma transf_initial_states:
   forall st1, LTL.initial_state prog st1 ->
-  exists st2, LTLin.initial_state tprog st2 /\ match_states st1 st2.
+  exists st2, Linear.initial_state tprog st2 /\ match_states st1 st2.
 Proof.
   intros. inversion H.
   exploit function_ptr_translated; eauto. intros [tf [A B]].  
-  exists (Callstate nil tf nil m0); split.
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
   econstructor; eauto. eapply Genv.init_mem_transf_partial; eauto. 
   replace (prog_main tprog) with (prog_main prog).
   rewrite symbols_preserved. eauto.
   symmetry. apply (transform_partial_program_main transf_fundef _ TRANSF). 
   rewrite <- H3. apply sig_preserved. auto.
   constructor. constructor. auto.
-  eapply Genv.find_funct_ptr_prop; eauto.
 Qed.
 
 Lemma transf_final_states:
   forall st1 st2 r, 
-  match_states st1 st2 -> LTL.final_state st1 r -> LTLin.final_state st2 r.
+  match_states st1 st2 -> LTL.final_state st1 r -> Linear.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv H4. constructor.
+  intros. inv H0. inv H. inv H6. econstructor; eauto.
 Qed.
 
 Theorem transf_program_correct:
-  forward_simulation (LTL.semantics prog) (LTLin.semantics tprog).
+  forward_simulation (LTL.semantics prog) (Linear.semantics tprog).
 Proof.
-  eapply forward_simulation_plus.
+  eapply forward_simulation_star.
   eexact symbols_preserved.
   eexact transf_initial_states.
   eexact transf_final_states.
diff --git a/backend/Linearizetyping.v b/backend/Linearizetyping.v
deleted file mode 100644
index b4e25de..0000000
--- a/backend/Linearizetyping.v
+++ /dev/null
@@ -1,112 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Type preservation for the Linearize pass *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import Errors.
-Require Import AST.
-Require Import Op.
-Require Import Locations.
-Require Import LTL.
-Require Import LTLtyping.
-Require Import LTLin.
-Require Import Linearize.
-Require Import LTLintyping.
-Require Import Conventions.
-
-(** * Type preservation for the linearization pass *)
-
-Lemma wt_add_instr:
-  forall f i k, wt_instr f i -> wt_code f k -> wt_code f (i :: k).
-Proof.
-  intros; red; intros. elim H1; intro. subst i0; auto. auto.
-Qed.
-
-Lemma wt_add_branch:
-  forall f s k, wt_code f k -> wt_code f (add_branch s k).
-Proof.
-  intros; unfold add_branch. destruct (starts_with s k).
-  auto. apply wt_add_instr; auto. constructor.
-Qed.
-
-Lemma wt_linearize_instr:
-  forall f instr,
-  LTLtyping.wt_instr f instr ->
-  forall k,
-  wt_code f.(LTL.fn_sig) k ->
-  wt_code f.(LTL.fn_sig) (linearize_instr instr k).
-Proof.
-  induction 1; simpl; intros.
-  apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. auto.
-  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
-  destruct (starts_with s1 k); apply wt_add_instr.
-  constructor; auto. rewrite H. destruct cond; auto. 
-  apply wt_add_branch; auto.
-  constructor; auto. apply wt_add_branch; auto.
-  apply wt_add_instr. constructor; auto. auto.
-  apply wt_add_instr. constructor; auto. auto.
-Qed.
-
-Lemma wt_linearize_body:
-  forall f,
-  (forall pc instr, 
-     f.(LTL.fn_code)!pc = Some instr -> LTLtyping.wt_instr f instr) ->
-  forall enum,
-  wt_code f.(LTL.fn_sig) (linearize_body f enum).
-Proof.
-  unfold linearize_body; induction enum; rewrite list_fold_right_eq.
-  red; simpl; intros; contradiction.
-  unfold linearize_node. caseEq ((LTL.fn_code f)!a); intros.
-  apply wt_add_instr. constructor. apply wt_linearize_instr; eauto with coqlib.
-  auto.
-Qed.
-
-Lemma wt_transf_function:
-  forall f tf,
-  LTLtyping.wt_function f ->
-  transf_function f = OK tf ->
-  wt_function tf. 
-Proof.
-  intros. inv H. monadInv H0. constructor; auto.
-  simpl. apply wt_add_branch. 
-  apply wt_linearize_body. auto. 
-Qed.
-
-Lemma wt_transf_fundef:
-  forall f tf,
-  LTLtyping.wt_fundef f ->
-  transf_fundef f = OK tf ->
-  wt_fundef tf. 
-Proof.
-  induction 1; intros. monadInv H. constructor. 
-  monadInv H0. constructor; eapply wt_transf_function; eauto.
-Qed.
-
-Lemma program_typing_preserved:
-  forall (p: LTL.program) (tp: LTLin.program),
-  LTLtyping.wt_program p ->
-  transf_program p = OK tp ->
-  LTLintyping.wt_program tp.
-Proof.
-  intros; red; intros.
-  generalize (transform_partial_program_function transf_fundef _ _ _ H0 H1).
-  intros [f0 [IN TR]].
-  eapply wt_transf_fundef; eauto. 
-Qed.
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 32d6045..c51db6f 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -16,124 +16,87 @@ Require Import Coqlib.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
+Require Import Events.
 Require Import Op.
 Require Import Locations.
+Require Import Conventions.
 Require Import LTL.
 Require Import Linear.
-Require Import Conventions.
 
-(** The typing rules for Linear are similar to those for LTLin: we check
-  that instructions receive the right number of arguments,
-  and that the types of the argument and result registers agree
-  with what the instruction expects.  Moreover, we  also
-  enforces some correctness conditions on the offsets of stack slots
-  accessed through [Lgetstack] and [Lsetstack] Linear instructions. *)
+(** The typing rules for Linear enforce several properties useful for
+  the proof of the [Stacking] pass:
+- for each instruction, the type of the result register or slot
+  agrees with the type of values produced by the instruction;
+- correctness conditions on the offsets of stack slots
+  accessed through [Lgetstack] and [Lsetstack] Linear instructions.
+*)
+
+(** The rules are presented as boolean-valued functions so that we
+  get an executable type-checker for free. *)
 
 Section WT_INSTR.
 
 Variable funct: function.
 
-Definition slot_valid (s: slot) :=
-  match s with
-  | Local ofs ty => 0 <= ofs
-  | Outgoing ofs ty => 0 <= ofs
-  | Incoming ofs ty => In (S s) (loc_parameters funct.(fn_sig))
+Definition slot_valid (sl: slot) (ofs: Z) (ty: typ): bool :=
+  match sl with
+  | Local => zle 0 ofs
+  | Outgoing => zle 0 ofs
+  | Incoming => In_dec Loc.eq (S Incoming ofs ty) (loc_parameters funct.(fn_sig))
+  end &&
+  match ty with
+  | Tint | Tfloat => true
+  | Tlong => false
   end.
 
-Definition slot_writable (s: slot) :=
-  match s with
-  | Local ofs ty => True
-  | Outgoing ofs ty => True
-  | Incoming ofs ty => False
+Definition slot_writable (sl: slot) : bool :=
+  match sl with
+  | Local => true
+  | Outgoing => true
+  | Incoming => false
   end.
 
-Inductive wt_instr : instruction -> Prop :=
-  | wt_Lgetstack:
-      forall s r,
-      slot_type s = mreg_type r ->
-      slot_valid s ->
-      wt_instr (Lgetstack s r)
-  | wt_Lsetstack:
-      forall s r,
-      slot_type s = mreg_type r ->
-      slot_valid s -> slot_writable s ->
-      wt_instr (Lsetstack r s)
-  | wt_Lopmove:
-      forall r1 r,
-      mreg_type r1 = mreg_type r ->
-      wt_instr (Lop Omove (r1 :: nil) r)
-  | wt_Lop:
-      forall op args res,
-      op <> Omove ->
-      (List.map mreg_type args, mreg_type res) = type_of_operation op ->
-      wt_instr (Lop op args res)
-  | wt_Lload:
-      forall chunk addr args dst,
-      List.map mreg_type args = type_of_addressing addr ->
-      mreg_type dst = type_of_chunk chunk ->
-      wt_instr (Lload chunk addr args dst)
-  | wt_Lstore:
-      forall chunk addr args src,
-      List.map mreg_type args = type_of_addressing addr ->
-      mreg_type src = type_of_chunk chunk ->
-      wt_instr (Lstore chunk addr args src)
-  | wt_Lcall:
-      forall sig ros,
-      match ros with inl r => mreg_type r = Tint | _ => True end ->
-      wt_instr (Lcall sig ros)
-  | wt_Ltailcall:
-      forall sig ros,
-      tailcall_possible sig ->
-      match ros with inl r => r = IT1 | _ => True end ->
-      wt_instr (Ltailcall sig ros)
-  | wt_Lbuiltin:
-      forall ef args res,
-      List.map mreg_type args = (ef_sig ef).(sig_args) ->
-      mreg_type res = proj_sig_res (ef_sig ef) ->
-      arity_ok (ef_sig ef).(sig_args) = true ->
-      wt_instr (Lbuiltin ef args res)
-  | wt_Lannot:
-      forall ef args,
-      List.map Loc.type args = (ef_sig ef).(sig_args) ->
-      ef_reloads ef = false ->
-      locs_acceptable args ->
-      wt_instr (Lannot ef args)
-  | wt_Llabel:
-      forall lbl,
-      wt_instr (Llabel lbl)
-  | wt_Lgoto:
-      forall lbl,
-      wt_instr (Lgoto lbl)
-  | wt_Lcond:
-      forall cond args lbl,
-      List.map mreg_type args = type_of_condition cond ->
-      wt_instr (Lcond cond args lbl)
-  | wt_Ljumptable:
-      forall arg tbl,
-      mreg_type arg = Tint ->
-      list_length_z tbl * 4 <= Int.max_unsigned ->
-      wt_instr (Ljumptable arg tbl)
-  | wt_Lreturn: 
-      wt_instr (Lreturn).
+Definition loc_valid (l: loc) : bool :=
+  match l with
+  | R r => true
+  | S Local ofs ty => slot_valid Local ofs ty
+  | S _ _ _ => false
+  end.
+
+Definition wt_instr (i: instruction) : bool :=
+  match i with
+  | Lgetstack sl ofs ty r =>
+      typ_eq ty (mreg_type r) && slot_valid sl ofs ty
+  | Lsetstack r sl ofs ty =>
+      typ_eq ty (mreg_type r) && slot_valid sl ofs ty && slot_writable sl
+  | Lop op args res =>
+      match is_move_operation op args with
+      | Some arg =>
+          typ_eq (mreg_type arg) (mreg_type res)
+      | None => 
+          let (targs, tres) := type_of_operation op in
+          typ_eq (mreg_type res) tres
+      end
+  | Lload chunk addr args dst =>
+      typ_eq (mreg_type dst) (type_of_chunk chunk)
+  | Ltailcall sg ros =>
+      zeq (size_arguments sg) 0
+  | Lbuiltin ef args res =>
+      list_typ_eq (map mreg_type res) (proj_sig_res' (ef_sig ef))
+  | Lannot ef args =>
+      forallb loc_valid args
+  | _ =>
+      true
+  end.
 
 End WT_INSTR.
 
-Definition wt_code (f: function) (c: code) : Prop :=
-  forall instr, In instr c -> wt_instr f instr.
+Definition wt_code (f: function) (c: code) : bool :=
+  forallb (wt_instr f) c.
 
-Definition wt_function (f: function) : Prop :=
+Definition wt_function (f: function) : bool :=
   wt_code f f.(fn_code).
 
-Inductive wt_fundef: fundef -> Prop :=
-  | wt_fundef_external: forall ef,
-      wt_fundef (External ef)
-  | wt_function_internal: forall f,
-      wt_function f ->
-      wt_fundef (Internal f).
-
-Definition wt_program (p: program) : Prop :=
-  forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f.
-
 (** Typing the run-time state.  These definitions are used in [Stackingproof]. *)
 
 Require Import Values.
@@ -148,48 +111,30 @@ Proof.
   intros; red; intros.
   unfold Locmap.set. 
   destruct (Loc.eq l l0). congruence.
-  destruct (Loc.overlap l l0).  red. auto.
-  auto.
-Qed.
-
-Lemma wt_undef_locs:
-  forall locs ls, wt_locset ls -> wt_locset (Locmap.undef locs ls).
-Proof.
-  induction locs; simpl; intros. auto. apply IHlocs. apply wt_setloc; auto. red; auto.
+  destruct (Loc.diff_dec l l0). auto. red. auto.
 Qed.
 
-Lemma wt_undef_temps:
-  forall ls, wt_locset ls -> wt_locset (undef_temps ls).
+Lemma wt_setlocs:
+  forall ll vl ls,
+  Val.has_type_list vl (map Loc.type ll) -> wt_locset ls -> wt_locset (Locmap.setlist ll vl ls).
 Proof.
-  intros; unfold undef_temps. apply wt_undef_locs; auto.
+  induction ll; destruct vl; simpl; intuition. 
+  apply IHll; auto. apply wt_setloc; auto.
 Qed.
 
-Lemma wt_undef_op:
-  forall op ls, wt_locset ls -> wt_locset (undef_op op ls).
+Lemma wt_undef_regs:
+  forall rs ls, wt_locset ls -> wt_locset (undef_regs rs ls).
 Proof.
-  intros. generalize (wt_undef_temps ls H); intro. 
-  unfold undef_op; destruct op; auto; apply wt_undef_locs; auto.
-Qed.
-
-Lemma wt_undef_getstack:
-  forall s ls, wt_locset ls -> wt_locset (undef_getstack s ls).
-Proof.
-  intros. unfold undef_getstack. destruct s; auto. apply wt_setloc; auto. red; auto.
-Qed.
-
-Lemma wt_undef_setstack:
-  forall ls, wt_locset ls -> wt_locset (undef_setstack ls).
-Proof.
-  intros. unfold undef_setstack. apply wt_undef_locs; auto.
+  induction rs; simpl; intros. auto. apply wt_setloc; auto. red; auto.
 Qed.
 
 Lemma wt_call_regs:
   forall ls, wt_locset ls -> wt_locset (call_regs ls).
 Proof.
-  intros; red; intros. unfold call_regs. destruct l. auto. 
-  destruct (in_dec Loc.eq (R m) temporaries). red; auto. auto.
-  destruct s. red; auto. 
-  change (Loc.type (S (Incoming z t))) with (Loc.type (S (Outgoing z t))). auto.
+  intros; red; intros. unfold call_regs. destruct l. auto.
+  destruct sl.
+  red; auto.
+  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)). auto.
   red; auto.
 Qed.
 
@@ -198,9 +143,8 @@ Lemma wt_return_regs:
   wt_locset caller -> wt_locset callee -> wt_locset (return_regs caller callee).
 Proof.
   intros; red; intros.
-  unfold return_regs. destruct l; auto. 
-  destruct (in_dec Loc.eq (R m) temporaries); auto.
-  destruct (in_dec Loc.eq (R m) destroyed_at_call); auto.
+  unfold return_regs. destruct l; auto.
+  destruct (in_dec mreg_eq r destroyed_at_call); auto.
 Qed.
 
 Lemma wt_init:
@@ -208,3 +152,19 @@ Lemma wt_init:
 Proof.
   red; intros. unfold Locmap.init. red; auto.
 Qed.
+
+Lemma wt_setlist_result:
+  forall sg v rs,
+  Val.has_type v (proj_sig_res sg) ->
+  wt_locset rs ->
+  wt_locset (Locmap.setlist (map R (loc_result sg)) (encode_long (sig_res sg) v) rs).
+Proof.
+  unfold loc_result, encode_long, proj_sig_res; intros.
+  destruct (sig_res sg) as [[] | ]; simpl.
+  apply wt_setloc; auto.
+  apply wt_setloc; auto.
+  destruct v; simpl in H; try contradiction;
+  simpl; apply wt_setloc; auto; apply wt_setloc; auto.
+  apply wt_setloc; auto.
+Qed.
+
diff --git a/backend/Locations.v b/backend/Locations.v
index 2381fea..2f2dae8 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -13,7 +13,10 @@
 (** Locations are a refinement of RTL pseudo-registers, used to reflect
   the results of register allocation (file [Allocation]). *)
 
+Require Import OrderedType.
 Require Import Coqlib.
+Require Import Maps.
+Require Import Ordered.
 Require Import AST.
 Require Import Values.
 Require Export Machregs.
@@ -42,9 +45,9 @@ Require Export Machregs.
   calling conventions. *)
 
 Inductive slot: Type :=
-  | Local: Z -> typ -> slot
-  | Incoming: Z -> typ -> slot
-  | Outgoing: Z -> typ -> slot.
+  | Local
+  | Incoming
+  | Outgoing.
 
 (** Morally, the [Incoming] slots of a function are the [Outgoing]
 slots of its caller function.
@@ -56,46 +59,17 @@ as 32-bit integers/pointers (type [Tint]) or as 64-bit floats (type [Tfloat]).
 The offset of a slot, combined with its type and its kind, identifies
 uniquely the slot and will determine later where it resides within the
 memory-allocated activation record.  Offsets are always positive.
-
-Conceptually, slots will be mapped to four non-overlapping memory areas
-within activation records:
-- The area for [Local] slots of type [Tint].  The offset is interpreted
-  as a 4-byte word index.
-- The area for [Local] slots of type [Tfloat].  The offset is interpreted
-  as a 8-byte word index.  Thus, two [Local] slots always refer either
-  to the same memory chunk (if they have the same types and offsets)
-  or to non-overlapping memory chunks (if the types or offsets differ).
-- The area for [Outgoing] slots.  The offset is a 4-byte word index.
-  Unlike [Local] slots, the PowerPC calling conventions demand that
-  integer and float [Outgoing] slots reside in the same memory area.
-  Therefore, [Outgoing Tint 0] and [Outgoing Tfloat 0] refer to
-  overlapping memory chunks and cannot be used simultaneously: one will
-  lose its value when the other is assigned.  We will reflect this
-  overlapping behaviour in the environments mapping locations to values
-  defined later in this file.
-- The area for [Incoming] slots.  Same structure as the [Outgoing] slots.
 *)
 
-Definition slot_type (s: slot): typ :=
-  match s with
-  | Local ofs ty => ty
-  | Incoming ofs ty => ty
-  | Outgoing ofs ty => ty
-  end.
-
 Lemma slot_eq: forall (p q: slot), {p = q} + {p <> q}.
 Proof.
-  assert (typ_eq: forall (t1 t2: typ), {t1 = t2} + {t1 <> t2}).
-  decide equality.
-  generalize zeq; intro.
   decide equality.
 Defined.
-Global Opaque slot_eq.
 
 Open Scope Z_scope.
 
 Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 1 | Tfloat => 2 end.
+  match ty with Tint => 1 | Tlong => 2 | Tfloat => 2 end.
 
 Lemma typesize_pos:
   forall (ty: typ), typesize ty > 0.
@@ -109,20 +83,24 @@ Qed.
   activation record slots. *)
 
 Inductive loc : Type :=
-  | R: mreg -> loc
-  | S: slot -> loc.
+  | R (r: mreg)
+  | S (sl: slot) (pos: Z) (ty: typ).
 
 Module Loc.
 
   Definition type (l: loc) : typ :=
     match l with
     | R r => mreg_type r
-    | S s => slot_type s
+    | S sl pos ty => ty
     end.
 
   Lemma eq: forall (p q: loc), {p = q} + {p <> q}.
   Proof.
-    decide equality. apply mreg_eq. apply slot_eq.
+    decide equality.
+    apply mreg_eq.
+    apply typ_eq.
+    apply zeq.
+    apply slot_eq.
   Defined.
 
 (** As mentioned previously, two locations can be different (in the sense
@@ -138,13 +116,10 @@ Module Loc.
 *)
   Definition diff (l1 l2: loc) : Prop :=
     match l1, l2 with
-    | R r1, R r2 => r1 <> r2
-    | S (Local d1 t1), S (Local d2 t2) =>
-        d1 <> d2 \/ t1 <> t2
-    | S (Incoming d1 t1), S (Incoming d2 t2) =>
-        d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1
-    | S (Outgoing d1 t1), S (Outgoing d2 t2) =>
-        d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1
+    | R r1, R r2 => 
+        r1 <> r2
+    | S s1 d1 t1, S s2 d2 t2 =>
+        s1 <> s2 \/ d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1
     | _, _ =>
         True
     end.
@@ -152,11 +127,8 @@ Module Loc.
   Lemma same_not_diff:
     forall l, ~(diff l l).
   Proof.
-    destruct l; unfold diff; try tauto.
-    destruct s.
-    tauto.
-    generalize (typesize_pos t); omega. 
-    generalize (typesize_pos t); omega. 
+    destruct l; unfold diff; auto.
+    red; intros. destruct H; auto. generalize (typesize_pos ty); omega. 
   Qed.
 
   Lemma diff_not_eq:
@@ -169,124 +141,22 @@ Module Loc.
     forall l1 l2, diff l1 l2 -> diff l2 l1.
   Proof.
     destruct l1; destruct l2; unfold diff; auto.
-    destruct s; auto.
-    destruct s; destruct s0; intuition auto.
-  Qed.
-
-  Lemma diff_reg_right:
-    forall l r, l <> R r -> diff (R r) l.
-  Proof.
-    intros. simpl. destruct l. congruence. auto.
-  Qed.
-
-  Lemma diff_reg_left:
-    forall l r, l <> R r -> diff l (R r).
-  Proof.
-    intros. apply diff_sym. apply diff_reg_right. auto.
-  Qed.
-
-(** [Loc.overlap l1 l2] returns [false] if [l1] and [l2] are different and
-  non-overlapping, and [true] otherwise: either [l1 = l2] or they partially 
-  overlap. *)
-
-  Definition overlap_aux (t1: typ) (d1 d2: Z) : bool :=
-    if zeq d1 d2 then true else
-    match t1 with
-    | Tint => false
-    | Tfloat => if zeq (d1 + 1) d2 then true else false
-    end.    
-
-  Definition overlap (l1 l2: loc) : bool :=
-    match l1, l2 with
-    | S (Incoming d1 t1), S (Incoming d2 t2) =>
-        overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1
-    | S (Outgoing d1 t1), S (Outgoing d2 t2) =>
-        overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1
-    | _, _ => false
-    end.
-
-  Lemma overlap_aux_true_1:
-    forall d1 t1 d2 t2,
-    overlap_aux t1 d1 d2 = true ->
-    ~(d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1).
-  Proof.
-    intros until t2.
-    generalize (typesize_pos t1); intro.
-    generalize (typesize_pos t2); intro.
-    unfold overlap_aux.
-    case (zeq d1 d2).
-    intros. omega.
-    case t1. intros; discriminate.
-    case (zeq (d1 + 1) d2); intros.
-    subst d2. simpl. omega.
-    discriminate.
-  Qed.
-
-  Lemma overlap_aux_true_2:
-    forall d1 t1 d2 t2,
-    overlap_aux t2 d2 d1 = true ->
-    ~(d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1).
-  Proof.
-    intros. generalize (overlap_aux_true_1 d2 t2 d1 t1 H).
-    tauto.
+    intuition.
   Qed.
 
-  Lemma overlap_not_diff:
-    forall l1 l2, overlap l1 l2 = true -> ~(diff l1 l2).
+  Definition diff_dec (l1 l2: loc) : { Loc.diff l1 l2 } + { ~Loc.diff l1 l2 }.
   Proof.
-    unfold overlap, diff; destruct l1; destruct l2; intros; try discriminate.
-    destruct s; discriminate.
-    destruct s; destruct s0; try discriminate.
-    elim (orb_true_elim _ _ H); intro.
-    apply overlap_aux_true_1; auto.
-    apply overlap_aux_true_2; auto.
-    elim (orb_true_elim _ _ H); intro.
-    apply overlap_aux_true_1; auto.
-    apply overlap_aux_true_2; auto.
-  Qed.
-
-  Lemma overlap_aux_false_1:
-    forall t1 d1 t2 d2,
-    overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 = false ->
-    d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1.
-  Proof.
-    intros until d2. intro OV. 
-    generalize (orb_false_elim _ _ OV). intro OV'. elim OV'.
-    unfold overlap_aux. 
-    case (zeq d1 d2); intro.
-    intros; discriminate.
-    case (zeq d2 d1); intro.
-    intros; discriminate.
-    case t1; case t2; simpl.
-    intros; omega.
-    case (zeq (d2 + 1) d1); intros. discriminate. omega.
-    case (zeq (d1 + 1) d2); intros. discriminate. omega.
-    case (zeq (d1 + 1) d2); intros H1 H2. discriminate. 
-    case (zeq (d2 + 1) d1); intros. discriminate. omega.
-  Qed.
-
-  Lemma non_overlap_diff:
-    forall l1 l2, l1 <> l2 -> overlap l1 l2 = false -> diff l1 l2.
-  Proof.
-    intros. unfold diff; destruct l1; destruct l2. 
-    congruence.
-    auto.
-    destruct s; auto.
-    destruct s; destruct s0; auto. 
-    case (zeq z z0); intro.
-    compare t t0; intro.
-    congruence. tauto. tauto.
-    apply overlap_aux_false_1. exact H0.
-    apply overlap_aux_false_1. exact H0.
-  Qed.
-
-  Definition diff_dec (l1 l2: loc) : { Loc.diff l1 l2 } + {~Loc.diff l1 l2}.
-  Proof.
-    intros. case (eq l1 l2); intros. 
-    right. rewrite e. apply same_not_diff.
-    case_eq (overlap l1 l2); intros.
-    right. apply overlap_not_diff; auto.
-    left. apply non_overlap_diff; auto.
+    intros. destruct l1; destruct l2; simpl.
+  - destruct (mreg_eq r r0). right; tauto. left; auto.
+  - left; auto.
+  - left; auto.
+  - destruct (slot_eq sl sl0).
+    destruct (zle (pos + typesize ty) pos0).
+    left; auto.
+    destruct (zle (pos0 + typesize ty0) pos).
+    left; auto.
+    right; red; intros [P | [P | P]]. congruence. omega. omega. 
+    left; auto.
   Defined.
 
 (** We now redefine some standard notions over lists, using the [Loc.diff]
@@ -316,12 +186,16 @@ Module Loc.
     elim (diff_not_eq l l); auto.
   Qed.
 
-  Lemma reg_notin:
-    forall r ll, ~(In (R r) ll) -> notin (R r) ll.
+  Lemma notin_dec (l: loc) (ll: list loc) : {notin l ll} + {~notin l ll}.
   Proof.
-    intros. rewrite notin_iff; intros. 
-    destruct l'; simpl. congruence. auto.
-  Qed.
+    induction ll; simpl.
+    left; auto.
+    destruct (diff_dec l a).
+    destruct IHll.
+    left; auto.
+    right; tauto.
+    right; tauto.
+  Defined.
 
 (** [Loc.disjoint l1 l2] is true if the locations in list [l1]
   are different from all locations in list [l2]. *)
@@ -376,6 +250,17 @@ Module Loc.
   | norepet_cons:
       forall hd tl, notin hd tl -> norepet tl -> norepet (hd :: tl).
 
+  Lemma norepet_dec (ll: list loc) : {norepet ll} + {~norepet ll}.
+  Proof.
+    induction ll.
+    left; constructor.
+    destruct (notin_dec a ll).
+    destruct IHll.
+    left; constructor; auto.
+    right; red; intros P; inv P; contradiction.
+    right; red; intros P; inv P; contradiction.
+  Defined.
+
 (** [Loc.no_overlap l1 l2] holds if elements of [l1] never overlap partially
   with elements of [l2]. *)
 
@@ -384,8 +269,6 @@ Module Loc.
 
 End Loc.
 
-Global Opaque Loc.eq Loc.diff_dec.
-
 (** * Mappings from locations to values *)
 
 (** The [Locmap] module defines mappings from locations to values,
@@ -413,20 +296,20 @@ Module Locmap.
 
   Definition set (l: loc) (v: val) (m: t) : t :=
     fun (p: loc) =>
-      if Loc.eq l p then v else if Loc.overlap l p then Vundef else m p.
+      if Loc.eq l p then v else if Loc.diff_dec l p then m p else Vundef.
 
   Lemma gss: forall l v m, (set l v m) l = v.
   Proof.
-    intros. unfold set. case (Loc.eq l l); tauto.
+    intros. unfold set. rewrite dec_eq_true. auto.
   Qed.
 
   Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p.
   Proof.
-    intros. unfold set. case (Loc.eq l p); intro.
-    subst p. elim (Loc.same_not_diff _ H). 
-    caseEq (Loc.overlap l p); intro.
-    elim (Loc.overlap_not_diff _ _ H0 H).
+    intros. unfold set. destruct (Loc.eq l p).
+    subst p. elim (Loc.same_not_diff _ H).
+    destruct (Loc.diff_dec l p).
     auto.
+    contradiction.
   Qed.
 
   Fixpoint undef (ll: list loc) (m: t) {struct ll} : t :=
@@ -446,10 +329,172 @@ Module Locmap.
     assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef).
       induction ll; simpl; intros. auto. apply IHll. 
       unfold set. destruct (Loc.eq a l); auto. 
-      destruct (Loc.overlap a l); auto.
+      destruct (Loc.diff_dec a l); auto.
     induction ll; simpl; intros. contradiction. 
     destruct H. apply P. subst a. apply gss. 
     auto.
   Qed.
 
+  Fixpoint setlist (ll: list loc) (vl: list val) (m: t) {struct ll} : t :=
+    match ll, vl with
+    | l1 :: ls, v1 :: vs => setlist ls vs (set l1 v1 m)
+    | _, _ => m
+    end.
+
+  Lemma gsetlisto: forall l ll vl m, Loc.notin l ll -> (setlist ll vl m) l = m l.
+  Proof.
+    induction ll; simpl; intros. 
+    auto.
+    destruct vl; auto. destruct H. rewrite IHll; auto. apply gso; auto. apply Loc.diff_sym; auto.
+  Qed.
+
 End Locmap.
+
+(** * Total ordering over locations *)
+
+Module IndexedTyp <: INDEXED_TYPE.
+  Definition t := typ.
+  Definition index (x: t) :=
+    match x with Tint => 1%positive | Tfloat => 2%positive | Tlong => 3%positive end.
+  Lemma index_inj: forall x y, index x = index y -> x = y.
+  Proof. destruct x; destruct y; simpl; congruence. Qed.
+  Definition eq := typ_eq.
+End IndexedTyp.
+
+Module OrderedTyp := OrderedIndexed(IndexedTyp).
+
+Module IndexedSlot <: INDEXED_TYPE.
+  Definition t := slot.
+  Definition index (x: t) :=
+    match x with Local => 1%positive | Incoming => 2%positive | Outgoing => 3%positive end.
+  Lemma index_inj: forall x y, index x = index y -> x = y.
+  Proof. destruct x; destruct y; simpl; congruence. Qed.
+  Definition eq := slot_eq.
+End IndexedSlot.
+
+Module OrderedSlot := OrderedIndexed(IndexedSlot).
+
+Module OrderedLoc <: OrderedType.
+  Definition t := loc.
+  Definition eq (x y: t) := x = y.
+  Definition lt (x y: t) :=
+    match x, y with
+    | R r1, R r2 => Plt (IndexedMreg.index r1) (IndexedMreg.index r2)
+    | R _, S _ _ _ => True
+    | S _ _ _, R _ => False
+    | S sl1 ofs1 ty1, S sl2 ofs2 ty2 =>
+        OrderedSlot.lt sl1 sl2 \/ (sl1 = sl2 /\
+        (ofs1 < ofs2 \/ (ofs1 = ofs2 /\ OrderedTyp.lt ty1 ty2)))
+    end.
+  Lemma eq_refl : forall x : t, eq x x.
+  Proof (@refl_equal t). 
+  Lemma eq_sym : forall x y : t, eq x y -> eq y x.
+  Proof (@sym_equal t).
+  Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z.
+  Proof (@trans_equal t).
+  Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z.
+  Proof.
+    unfold lt; intros. 
+    destruct x; destruct y; destruct z; try tauto.
+    eapply Plt_trans; eauto.
+    destruct H. 
+    destruct H0. left; eapply OrderedSlot.lt_trans; eauto.
+    destruct H0. subst sl0. auto. 
+    destruct H. subst sl.
+    destruct H0. auto.
+    destruct H. 
+    right.  split. auto.
+    intuition.
+    right; split. congruence. eapply OrderedTyp.lt_trans; eauto. 
+  Qed.
+  Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y.
+  Proof.
+    unfold lt, eq; intros; red; intros. subst y. 
+    destruct x. 
+    eelim Plt_strict; eauto.
+    destruct H. eelim OrderedSlot.lt_not_eq; eauto. red; auto. 
+    destruct H. destruct H0. omega. 
+    destruct H0. eelim OrderedTyp.lt_not_eq; eauto. red; auto.
+  Qed.
+  Definition compare : forall x y : t, Compare lt eq x y.
+  Proof.
+    intros. destruct x; destruct y.
+  - destruct (OrderedPositive.compare (IndexedMreg.index r) (IndexedMreg.index r0)).
+    + apply LT. red. auto. 
+    + apply EQ. red. f_equal. apply IndexedMreg.index_inj. auto. 
+    + apply GT. red. auto.
+  - apply LT. red; auto. 
+  - apply GT. red; auto.
+  - destruct (OrderedSlot.compare sl sl0).
+    + apply LT. red; auto.
+    + destruct (OrderedZ.compare pos pos0).
+      * apply LT. red. auto. 
+      * destruct (OrderedTyp.compare ty ty0).
+        apply LT. red; auto.
+        apply EQ. red; red in e; red in e0; red in e1. congruence. 
+        apply GT. red; auto.
+      * apply GT. red. auto. 
+    + apply GT. red; auto.
+  Defined.
+  Definition eq_dec := Loc.eq.
+
+(** Connection between the ordering defined here and the [Loc.diff] predicate. *)
+
+  Definition diff_low_bound (l: loc) : loc :=
+    match l with
+    | R mr => l
+    | S sl ofs ty => S sl (ofs - 1) Tfloat
+    end.
+
+  Definition diff_high_bound (l: loc) : loc :=
+    match l with
+    | R mr => l
+    | S sl ofs ty => S sl (ofs + typesize ty - 1) Tlong
+    end.
+
+  Lemma outside_interval_diff:
+    forall l l', lt l' (diff_low_bound l) \/ lt (diff_high_bound l) l' -> Loc.diff l l'.
+  Proof.
+    intros. 
+    destruct l as [mr | sl ofs ty]; destruct l' as [mr' | sl' ofs' ty']; simpl in *; auto.
+    - assert (IndexedMreg.index mr <> IndexedMreg.index mr').
+      { destruct H. apply sym_not_equal. apply Plt_ne; auto. apply Plt_ne; auto. }
+      congruence.
+    - assert (RANGE: forall ty, 1 <= typesize ty <= 2).
+      { intros; unfold typesize. destruct ty0; omega.  }
+      destruct H. 
+      + destruct H. left. apply sym_not_equal. apply OrderedSlot.lt_not_eq; auto. 
+        destruct H. right.
+        destruct H0. right. generalize (RANGE ty'); omega. 
+        destruct H0. 
+        assert (ty' = Tint). 
+        { unfold OrderedTyp.lt in H1. destruct ty'; compute in H1; congruence. }
+        subst ty'. right. simpl typesize. omega. 
+      + destruct H. left. apply OrderedSlot.lt_not_eq; auto.
+        destruct H. right.
+        destruct H0. left; omega.
+        destruct H0. exfalso. destruct ty'; compute in H1; congruence.
+  Qed.
+
+  Lemma diff_outside_interval:
+    forall l l', Loc.diff l l' -> lt l' (diff_low_bound l) \/ lt (diff_high_bound l) l'.
+  Proof.
+    intros. 
+    destruct l as [mr | sl ofs ty]; destruct l' as [mr' | sl' ofs' ty']; simpl in *; auto.
+    - unfold Plt, Pos.lt. destruct (Pos.compare (IndexedMreg.index mr) (IndexedMreg.index mr')) eqn:C.
+      elim H. apply IndexedMreg.index_inj. apply Pos.compare_eq_iff. auto.
+      auto. 
+      rewrite Pos.compare_antisym. rewrite C. auto. 
+    - destruct (OrderedSlot.compare sl sl'); auto.
+      destruct H. contradiction. 
+      destruct H.
+      right; right; split; auto. left; omega. 
+      left; right; split; auto. destruct ty'; simpl in *.
+      destruct (zlt ofs' (ofs - 1)). left; auto. 
+      right; split. omega. compute. auto. 
+      left; omega. 
+      left; omega.
+  Qed.
+
+End OrderedLoc.
+
diff --git a/backend/Mach.v b/backend/Mach.v
index 0728c4d..5030de1 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -60,7 +60,7 @@ Inductive instruction: Type :=
   | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mcall: signature -> mreg + ident -> instruction
   | Mtailcall: signature -> mreg + ident -> instruction
-  | Mbuiltin: external_function -> list mreg -> mreg -> instruction
+  | Mbuiltin: external_function -> list mreg -> list mreg -> instruction
   | Mannot: external_function -> list annot_param -> instruction
   | Mlabel: label -> instruction
   | Mgoto: label -> instruction
@@ -124,7 +124,7 @@ store in the reserved location.
 *)
 
 Definition chunk_of_type (ty: typ) :=
-  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 end.
+  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end.
 
 Definition load_stack (m: mem) (sp: val) (ty: typ) (ofs: int) :=
   Mem.loadv (chunk_of_type ty) m (Val.add sp (Vint ofs)).
@@ -147,38 +147,29 @@ Notation "a # b <- c" := (Regmap.set b c a) (at level 1, b at next level).
 Fixpoint undef_regs (rl: list mreg) (rs: regset) {struct rl} : regset :=
   match rl with
   | nil => rs
-  | r1 :: rl' => undef_regs rl' (Regmap.set r1 Vundef rs)
+  | r1 :: rl' => Regmap.set r1 Vundef (undef_regs rl' rs)
   end.
 
 Lemma undef_regs_other:
   forall r rl rs, ~In r rl -> undef_regs rl rs r = rs r.
 Proof.
-  induction rl; simpl; intros. auto. rewrite IHrl. apply Regmap.gso. intuition. intuition.
+  induction rl; simpl; intros. auto. rewrite Regmap.gso. apply IHrl. intuition. intuition.
 Qed.
 
 Lemma undef_regs_same:
-  forall r rl rs, In r rl \/ rs r = Vundef -> undef_regs rl rs r = Vundef.
+  forall r rl rs, In r rl -> undef_regs rl rs r = Vundef.
 Proof.
   induction rl; simpl; intros. tauto.
-  destruct H. destruct H. apply IHrl. right. subst; apply Regmap.gss.
-  auto.
-  apply IHrl. right. unfold Regmap.set. destruct (RegEq.eq r a); auto. 
+  destruct H. subst a. apply Regmap.gss.
+  unfold Regmap.set. destruct (RegEq.eq r a); auto. 
 Qed.
 
-Definition undef_temps (rs: regset) := 
-  undef_regs temporary_regs rs.
-
-Definition undef_move (rs: regset) := 
-  undef_regs destroyed_at_move_regs rs.
-
-Definition undef_op (op: operation) (rs: regset) :=
-  match op with
-  | Omove => undef_move rs
-  | _ => undef_temps rs
+Fixpoint set_regs (rl: list mreg) (vl: list val) (rs: regset) : regset :=
+  match rl, vl with
+  | r1 :: rl', v1 :: vl' => set_regs rl' vl' (Regmap.set r1 v1 rs)
+  | _, _ => rs
   end.
 
-Definition undef_setstack (rs: regset) := undef_move rs.
-
 Definition is_label (lbl: label) (instr: instruction) : bool :=
   match instr with
   | Mlabel lbl' => if peq lbl lbl' then true else false
@@ -231,7 +222,7 @@ Inductive extcall_arg: regset -> mem -> val -> loc -> val -> Prop :=
       extcall_arg rs m sp (R r) (rs r)
   | extcall_arg_stack: forall rs m sp ofs ty v,
       load_stack m sp ty (Int.repr (Stacklayout.fe_ofs_arg + 4 * ofs)) = Some v ->
-      extcall_arg rs m sp (S (Outgoing ofs ty)) v.
+      extcall_arg rs m sp (S Outgoing ofs ty) v.
 
 Definition extcall_arguments
     (rs: regset) (m: mem) (sp: val) (sg: signature) (args: list val) : Prop :=
@@ -306,34 +297,39 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Mgetstack ofs ty dst :: c) rs m)
         E0 (State s f sp c (rs#dst <- v) m)
   | exec_Msetstack:
-      forall s f sp src ofs ty c rs m m',
+      forall s f sp src ofs ty c rs m m' rs',
       store_stack m sp ty ofs (rs src) = Some m' ->
+      rs' = undef_regs (destroyed_by_op Omove) rs ->
       step (State s f sp (Msetstack src ofs ty :: c) rs m)
-        E0 (State s f sp c (undef_setstack rs) m')
+        E0 (State s f sp c rs' m')
   | exec_Mgetparam:
-      forall s fb f sp ofs ty dst c rs m v,
+      forall s fb f sp ofs ty dst c rs m v rs',
       Genv.find_funct_ptr ge fb = Some (Internal f) ->
       load_stack m sp Tint f.(fn_link_ofs) = Some (parent_sp s) ->
       load_stack m (parent_sp s) ty ofs = Some v ->
+      rs' = (rs # temp_for_parent_frame <- Vundef # dst <- v) ->
       step (State s fb sp (Mgetparam ofs ty dst :: c) rs m)
-        E0 (State s fb sp c (rs # IT1 <- Vundef # dst <- v) m)
+        E0 (State s fb sp c rs' m)
   | exec_Mop:
-      forall s f sp op args res c rs m v,
+      forall s f sp op args res c rs m v rs',
       eval_operation ge sp op rs##args m = Some v ->
+      rs' = ((undef_regs (destroyed_by_op op) rs)#res <- v) ->
       step (State s f sp (Mop op args res :: c) rs m)
-        E0 (State s f sp c ((undef_op op rs)#res <- v) m)
+        E0 (State s f sp c rs' m)
   | exec_Mload:
-      forall s f sp chunk addr args dst c rs m a v,
+      forall s f sp chunk addr args dst c rs m a v rs',
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- v) ->
       step (State s f sp (Mload chunk addr args dst :: c) rs m)
-        E0 (State s f sp c ((undef_temps rs)#dst <- v) m)
+        E0 (State s f sp c rs' m)
   | exec_Mstore:
-      forall s f sp chunk addr args src c rs m m' a,
+      forall s f sp chunk addr args src c rs m m' a rs',
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.storev chunk m a (rs src) = Some m' ->
+      rs' = undef_regs (destroyed_by_store chunk addr) rs ->
       step (State s f sp (Mstore chunk addr args src :: c) rs m)
-        E0 (State s f sp c (undef_temps rs) m')
+        E0 (State s f sp c rs' m')
   | exec_Mcall:
       forall s fb sp sig ros c rs m f f' ra,
       find_function_ptr ge ros rs = Some f' ->
@@ -352,14 +348,15 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s fb (Vptr stk soff) (Mtailcall sig ros :: c) rs m)
         E0 (Callstate s f' rs m')
   | exec_Mbuiltin:
-      forall s f sp rs m ef args res b t v m',
-      external_call ef ge rs##args m t v m' ->
+      forall s f sp rs m ef args res b t vl rs' m',
+      external_call' ef ge rs##args m t vl m' ->
+      rs' = set_regs res vl (undef_regs (destroyed_by_builtin ef) rs) ->
       step (State s f sp (Mbuiltin ef args res :: b) rs m)
-         t (State s f sp b ((undef_temps rs)#res <- v) m')
+         t (State s f sp b rs' m')
   | exec_Mannot:
       forall s f sp rs m ef args b vargs t v m',
       annot_arguments rs m sp args vargs ->
-      external_call ef ge vargs m t v m' ->
+      external_call' ef ge vargs m t v m' ->
       step (State s f sp (Mannot ef args :: b) rs m)
          t (State s f sp b rs m')
   | exec_Mgoto:
@@ -369,25 +366,28 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s fb sp (Mgoto lbl :: c) rs m)
         E0 (State s fb sp c' rs m)
   | exec_Mcond_true:
-      forall s fb f sp cond args lbl c rs m c',
+      forall s fb f sp cond args lbl c rs m c' rs',
       eval_condition cond rs##args m = Some true ->
       Genv.find_funct_ptr ge fb = Some (Internal f) ->
       find_label lbl f.(fn_code) = Some c' ->
+      rs' = undef_regs (destroyed_by_cond cond) rs ->
       step (State s fb sp (Mcond cond args lbl :: c) rs m)
-        E0 (State s fb sp c' (undef_temps rs) m)
+        E0 (State s fb sp c' rs' m)
   | exec_Mcond_false:
-      forall s f sp cond args lbl c rs m,
+      forall s f sp cond args lbl c rs m rs',
       eval_condition cond rs##args m = Some false ->
+      rs' = undef_regs (destroyed_by_cond cond) rs ->
       step (State s f sp (Mcond cond args lbl :: c) rs m)
-        E0 (State s f sp c (undef_temps rs) m)
+        E0 (State s f sp c rs' m)
   | exec_Mjumptable:
-      forall s fb f sp arg tbl c rs m n lbl c',
+      forall s fb f sp arg tbl c rs m n lbl c' rs',
       rs arg = Vint n ->
       list_nth_z tbl (Int.unsigned n) = Some lbl ->
       Genv.find_funct_ptr ge fb = Some (Internal f) ->
       find_label lbl f.(fn_code) = Some c' ->
+      rs' = undef_regs destroyed_by_jumptable rs ->
       step (State s fb sp (Mjumptable arg tbl :: c) rs m)
-        E0 (State s fb sp c' (undef_temps rs) m)
+        E0 (State s fb sp c' rs' m)
   | exec_Mreturn:
       forall s fb stk soff c rs m f m',
       Genv.find_funct_ptr ge fb = Some (Internal f) ->
@@ -397,20 +397,21 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s fb (Vptr stk soff) (Mreturn :: c) rs m)
         E0 (Returnstate s rs m')
   | exec_function_internal:
-      forall s fb rs m f m1 m2 m3 stk,
+      forall s fb rs m f m1 m2 m3 stk rs',
       Genv.find_funct_ptr ge fb = Some (Internal f) ->
       Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) ->
       let sp := Vptr stk Int.zero in
       store_stack m1 sp Tint f.(fn_link_ofs) (parent_sp s) = Some m2 ->
       store_stack m2 sp Tint f.(fn_retaddr_ofs) (parent_ra s) = Some m3 ->
+      rs' = undef_regs destroyed_at_function_entry rs ->
       step (Callstate s fb rs m)
-        E0 (State s fb sp f.(fn_code) (undef_temps rs) m3)
+        E0 (State s fb sp f.(fn_code) rs' m3)
   | exec_function_external:
       forall s fb rs m t rs' ef args res m',
       Genv.find_funct_ptr ge fb = Some (External ef) ->
-      external_call ef ge args m t res m' ->
       extcall_arguments rs m (parent_sp s) (ef_sig ef) args ->
-      rs' = (rs#(loc_result (ef_sig ef)) <- res) ->
+      external_call' ef ge args m t res m' ->
+      rs' = set_regs (loc_result (ef_sig ef)) res rs ->
       step (Callstate s fb rs m)
          t (Returnstate s rs' m')
   | exec_return:
@@ -428,9 +429,10 @@ Inductive initial_state (p: program): state -> Prop :=
       initial_state p (Callstate nil fb (Regmap.init Vundef) m0).
 
 Inductive final_state: state -> int -> Prop :=
-  | final_state_intro: forall rs m r,
-      rs (loc_result (mksignature nil (Some Tint))) = Vint r ->
-      final_state (Returnstate nil rs m) r.
+  | final_state_intro: forall rs m r retcode,
+      loc_result (mksignature nil (Some Tint)) = r :: nil ->
+      rs r = Vint retcode ->
+      final_state (Returnstate nil rs m) retcode.
 
 Definition semantics (rao: function -> code -> int -> Prop) (p: program) :=
   Semantics (step rao) (initial_state p) final_state (Genv.globalenv p).
diff --git a/backend/Machtyping.v b/backend/Machtyping.v
deleted file mode 100644
index 2dc19be..0000000
--- a/backend/Machtyping.v
+++ /dev/null
@@ -1,108 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Type system for the Mach intermediate language. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Op.
-Require Import Locations.
-Require Import Conventions.
-Require Import Mach.
-
-(** * Typing rules *)
-
-Inductive wt_instr : instruction -> Prop :=
-  | wt_Mlabel:
-     forall lbl,
-     wt_instr (Mlabel lbl)
-  | wt_Mgetstack:
-     forall ofs ty r,
-     mreg_type r = ty ->
-     wt_instr (Mgetstack ofs ty r)
-  | wt_Msetstack:
-     forall ofs ty r,
-     mreg_type r = ty ->
-     wt_instr (Msetstack r ofs ty)
-  | wt_Mgetparam:
-     forall ofs ty r,
-     mreg_type r = ty ->
-     wt_instr (Mgetparam ofs ty r)
-  | wt_Mopmove:
-     forall r1 r,
-     mreg_type r1 = mreg_type r ->
-     wt_instr (Mop Omove (r1 :: nil) r)
-  | wt_Mop:
-     forall op args res,
-      op <> Omove ->
-      (List.map mreg_type args, mreg_type res) = type_of_operation op ->
-      wt_instr (Mop op args res)
-  | wt_Mload:
-      forall chunk addr args dst,
-      List.map mreg_type args = type_of_addressing addr ->
-      mreg_type dst = type_of_chunk chunk ->
-      wt_instr (Mload chunk addr args dst)
-  | wt_Mstore:
-      forall chunk addr args src,
-      List.map mreg_type args = type_of_addressing addr ->
-      mreg_type src = type_of_chunk chunk ->
-      wt_instr (Mstore chunk addr args src)
-  | wt_Mcall:
-      forall sig ros,
-      match ros with inl r => mreg_type r = Tint | inr s => True end ->
-      wt_instr (Mcall sig ros)
-  | wt_Mtailcall:
-      forall sig ros,
-      tailcall_possible sig ->
-      match ros with inl r => mreg_type r = Tint | inr s => True end ->
-      wt_instr (Mtailcall sig ros)
-  | wt_Mbuiltin:
-     forall ef args res,
-      List.map mreg_type args = (ef_sig ef).(sig_args) ->
-      mreg_type res = proj_sig_res (ef_sig ef) ->
-      wt_instr (Mbuiltin ef args res)
-  | wt_Mannot:
-      forall ef args,
-      ef_reloads ef = false ->
-      wt_instr (Mannot ef args)
-  | wt_Mgoto:
-      forall lbl,
-      wt_instr (Mgoto lbl)
-  | wt_Mcond:
-      forall cond args lbl,
-      List.map mreg_type args = type_of_condition cond ->
-      wt_instr (Mcond cond args lbl)
-  | wt_Mjumptable:
-      forall arg tbl,
-      mreg_type arg = Tint ->
-      list_length_z tbl * 4 <= Int.max_unsigned ->
-      wt_instr (Mjumptable arg tbl)
-  | wt_Mreturn: 
-      wt_instr Mreturn.
-
-Record wt_function (f: function) : Prop := mk_wt_function {
-  wt_function_instrs:
-    forall instr, In instr f.(fn_code) -> wt_instr instr;
-  wt_function_stacksize:
-    0 <= f.(fn_stacksize) <= Int.max_unsigned
-}.
-
-Inductive wt_fundef: fundef -> Prop :=
-  | wt_fundef_external: forall ef,
-      wt_fundef (External ef)
-  | wt_function_internal: forall f,
-      wt_function f ->
-      wt_fundef (Internal f).
-
-Definition wt_program (p: program) : Prop :=
-  forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f.
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 01ea1e6..8612b73 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -31,13 +31,13 @@ let rec precedence = function
   | Evar _ -> (16, NA)
   | Econst _ -> (16, NA)
   | Eunop _ -> (15, RtoL)
-  | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf), _, _) -> (13, LtoR)
-  | Ebinop((Oadd|Osub|Oaddf|Osubf), _, _) -> (12, LtoR)
-  | Ebinop((Oshl|Oshr|Oshru), _, _) -> (11, LtoR)
-  | Ebinop((Ocmp _|Ocmpu _|Ocmpf _), _, _) -> (10, LtoR)
-  | Ebinop(Oand, _, _) -> (8, LtoR)
-  | Ebinop(Oxor, _, _) -> (7, LtoR)
-  | Ebinop(Oor, _, _) -> (6, LtoR)
+  | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf|Omull|Odivl|Odivlu|Omodl|Omodlu), _, _) -> (13, LtoR)
+  | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddl|Osubl), _, _) -> (12, LtoR)
+  | Ebinop((Oshl|Oshr|Oshru|Oshll|Oshrl|Oshrlu), _, _) -> (11, LtoR)
+  | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
+  | Ebinop((Oand|Oandl), _, _) -> (8, LtoR)
+  | Ebinop((Oxor|Oxorl), _, _) -> (7, LtoR)
+  | Ebinop((Oor|Oorl), _, _) -> (6, LtoR)
   | Eload _ -> (15, RtoL)
 
 (* Naming idents. *)
@@ -60,6 +60,15 @@ let name_of_unop = function
   | Ointuoffloat -> "intuoffloat"
   | Ofloatofint -> "floatofint"
   | Ofloatofintu -> "floatofintu"
+  | Onegl -> "-l"
+  | Onotl -> "~l"
+  | Ointoflong -> "intofflong"
+  | Olongofint -> "longofint"
+  | Olongofintu -> "longofintu"
+  | Olongoffloat -> "longoffloat"
+  | Olonguoffloat -> "longuoffloat"
+  | Ofloatoflong -> "floatoflong"
+  | Ofloatoflongu -> "floatoflongu"
 
 let comparison_name = function
   | Ceq -> "=="
@@ -87,9 +96,24 @@ let name_of_binop = function
   | Osubf -> "-f"
   | Omulf -> "*f"
   | Odivf -> "/f"
+  | Oaddl -> "+l"
+  | Osubl -> "-l"
+  | Omull -> "*l"
+  | Odivl -> "/l"
+  | Odivlu -> "/lu"
+  | Omodl -> "%l"
+  | Omodlu -> "%lu"
+  | Oandl -> "&l"
+  | Oorl -> "|l"
+  | Oxorl -> "^l"
+  | Oshll -> "<<l"
+  | Oshrl -> ">>l"
+  | Oshrlu -> ">>lu"
   | Ocmp c -> comparison_name c
   | Ocmpu c -> comparison_name c ^ "u"
   | Ocmpf c -> comparison_name c ^ "f"
+  | Ocmpl c -> comparison_name c ^ "l"
+  | Ocmplu c -> comparison_name c ^ "lu"
 
 (* Expressions *)
 
@@ -109,6 +133,8 @@ let rec expr p (prec, e) =
       fprintf p "%ld" (camlint_of_coqint n)
   | Econst(Ofloatconst f) ->
       fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Econst(Olongconst n) ->
+      fprintf p "%LdLL" (camlint64_of_coqint n)
   | Econst(Oaddrsymbol(id, ofs)) ->
       let ofs = camlint_of_coqint ofs in
       if ofs = 0l
@@ -141,6 +167,7 @@ let rec print_expr_list p (first, rl) =
 let name_of_type = function
   | Tint -> "int"
   | Tfloat -> "float"
+  | Tlong -> "long"
 
 let rec print_sig p = function
   | {sig_args = []; sig_res = None} -> fprintf p "void"
@@ -262,6 +289,7 @@ let print_init_data p = function
   | Init_int8 i -> fprintf p "int8 %ld" (camlint_of_coqint i)
   | Init_int16 i -> fprintf p "int16 %ld" (camlint_of_coqint i)
   | Init_int32 i -> fprintf p "%ld" (camlint_of_coqint i)
+  | Init_int64 i -> fprintf p "%Ld" (camlint64_of_coqint i)
   | Init_float32 f -> fprintf p "float32 %F" (camlfloat_of_coqfloat f)
   | Init_float64 f -> fprintf p "%F" (camlfloat_of_coqfloat f)
   | Init_space i -> fprintf p "[%ld]" (camlint_of_coqint i)
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index 94f5af0..1149dd0 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -22,21 +22,30 @@ open Locations
 open LTL
 open PrintAST
 open PrintOp
-open PrintRTL
+
+let mreg pp r =
+  match Machregsaux.name_of_register r with
+  | Some s -> fprintf pp "%s" s
+  | None -> fprintf pp "<unknown machreg>"
+
+let rec mregs pp = function
+  | [] -> ()
+  | [r] -> mreg pp r
+  | r1::rl -> fprintf pp "%a, %a" mreg r1 mregs rl
+
+let slot pp (sl, ofs, ty) =
+  match sl with
+  | Local ->
+      fprintf pp "local(%ld,%s)" (camlint_of_coqint ofs) (name_of_type ty)
+  | Incoming ->
+      fprintf pp "incoming(%ld,%s)" (camlint_of_coqint ofs) (name_of_type ty)
+  | Outgoing ->
+      fprintf pp "outgoing(%ld,%s)" (camlint_of_coqint ofs) (name_of_type ty)
 
 let loc pp l =
   match l with
-  | R r ->
-      begin match Machregsaux.name_of_register r with
-      | Some s -> fprintf pp "%s" s
-      | None -> fprintf pp "<unknown machreg>"
-      end
-  | S(Local(ofs, ty)) ->
-      fprintf pp "local(%ld,%s)" (camlint_of_coqint ofs) (name_of_typ ty)
-  | S(Incoming(ofs, ty)) ->
-      fprintf pp "incoming(%ld,%s)" (camlint_of_coqint ofs) (name_of_typ ty)
-  | S(Outgoing(ofs, ty)) ->
-      fprintf pp "outgoing(%ld,%s)" (camlint_of_coqint ofs) (name_of_typ ty)
+  | R r -> mreg pp r
+  | S(sl, ofs, ty) -> slot pp (sl, ofs, ty)
 
 let rec locs pp = function
   | [] -> ()
@@ -44,77 +53,93 @@ let rec locs pp = function
   | r1::rl -> fprintf pp "%a, %a" loc r1 locs rl
 
 let ros pp = function
-  | Coq_inl r -> loc pp r
+  | Coq_inl r -> mreg pp r
   | Coq_inr s -> fprintf pp "\"%s\"" (extern_atom s)
 
 let print_succ pp s dfl =
-  let s = camlint_of_positive s in
-  if s <> dfl then fprintf pp "       goto %ld@ " s
-
-let print_instruction pp (pc, i) =
-  fprintf pp "%5ld: " pc;
-  match i with
-  | Lnop s ->
-      let s = camlint_of_positive s in
-      if s = Int32.pred pc
-      then fprintf pp "nop@ "
-      else fprintf pp "goto %ld@ " s
-  | Lop(op, args, res, s) ->
-      fprintf pp "%a = %a@ " loc res (print_operator loc) (op, args);
-      print_succ pp s (Int32.pred pc)
-  | Lload(chunk, addr, args, dst, s) ->
-      fprintf pp "%a = %s[%a]@ "
-         loc dst (name_of_chunk chunk) (print_addressing loc) (addr, args);
-      print_succ pp s (Int32.pred pc)
-  | Lstore(chunk, addr, args, src, s) ->
-      fprintf pp "%s[%a] = %a@ "
-         (name_of_chunk chunk) (print_addressing loc) (addr, args) loc src;
-      print_succ pp s (Int32.pred pc)
-  | Lcall(sg, fn, args, res, s) ->
-      fprintf pp "%a = %a(%a)@ "
-        loc res ros fn locs args;
-      print_succ pp s (Int32.pred pc)
-  | Ltailcall(sg, fn, args) ->
-      fprintf pp "tailcall %a(%a)@ "
-        ros fn locs args
-  | Lbuiltin(ef, args, res, s) ->
-      fprintf pp "%a = builtin %s(%a)@ "
-        loc res (name_of_external ef) locs args;
-      print_succ pp s (Int32.pred pc)
+  let s = P.to_int32 s in
+  if s <> dfl then fprintf pp "goto %ld" s
+
+let print_instruction pp succ = function
+  | Lop(op, args, res) ->
+      fprintf pp "%a = %a;" mreg res (print_operation mreg) (op, args)
+  | Lload(chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a];"
+         mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+  | Lgetstack(sl, ofs, ty, dst) ->
+      fprintf pp "%a = %a;" mreg dst slot (sl, ofs, ty)
+  | Lsetstack(src, sl, ofs, ty) ->
+      fprintf pp "%a = %a;" slot (sl, ofs, ty) mreg src
+  | Lstore(chunk, addr, args, src) ->
+      fprintf pp "%s[%a] = %a;"
+         (name_of_chunk chunk) (print_addressing mreg) (addr, args) mreg src
+  | Lcall(sg, fn) ->
+      fprintf pp "call %a;" ros fn
+  | Ltailcall(sg, fn) ->
+      fprintf pp "tailcall %a;" ros fn
+  | Lbuiltin(ef, args, res) ->
+      fprintf pp "%a = builtin %s(%a);"
+        mregs res (name_of_external ef) mregs args
+  | Lannot(ef, args) ->
+      fprintf pp "builtin %s(%a);"
+        (name_of_external ef) locs args
+  | Lbranch s ->
+      print_succ pp s succ
   | Lcond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %ld else goto %ld@ "
-        (print_condition loc) (cond, args)
-        (camlint_of_positive s1) (camlint_of_positive s2)
+      fprintf pp "if (%a) goto %ld else goto %ld"
+        (print_condition mreg) (cond, args)
+        (P.to_int32 s1) (P.to_int32 s2)
   | Ljumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
-      fprintf pp "@[<v 2>jumptable (%a)" loc arg;
+      fprintf pp "@[<v 2>jumptable (%a)" mreg arg;
       for i = 0 to Array.length tbl - 1 do
-        fprintf pp "@ case %d: goto %ld" i (camlint_of_positive tbl.(i))
+        fprintf pp "@ case %d: goto %ld" i (P.to_int32 tbl.(i))
       done;
-      fprintf pp "@]@ "
-  | Lreturn None ->
-      fprintf pp "return@ "
-  | Lreturn (Some arg) ->
-      fprintf pp "return %a@ " loc arg
-
-let print_function pp f =
-  fprintf pp "@[<v 2>f(%a) {@ " locs f.fn_params;
+      fprintf pp "@]"
+  | Lreturn ->
+      fprintf pp "return"
+
+let rec print_instructions pp succ = function
+  | [] -> ()
+  | [i] -> print_instruction pp succ i
+  | i :: il ->
+      print_instruction pp succ i;
+      fprintf pp "@ ";
+      print_instructions pp succ il
+
+let print_block pp (pc, blk) =
+  fprintf pp "%5ld: @[<hov 0>" pc;
+  print_instructions pp (Int32.pred pc) blk;
+  fprintf pp "@]@ "
+
+let print_function pp id f =
+  fprintf pp "@[<v 2>%s() {@ " (extern_atom id);
   let instrs =
     List.sort
       (fun (pc1, _) (pc2, _) -> Pervasives.compare pc2 pc1)
       (List.map
-        (fun (pc, i) -> (camlint_of_positive pc, i))
+        (fun (pc, i) -> (P.to_int32 pc, i))
         (PTree.elements f.fn_code)) in
   print_succ pp f.fn_entrypoint 
     (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1l);
-  List.iter (print_instruction pp) instrs;
+  List.iter (print_block pp) instrs;
   fprintf pp "@;<0 -2>}@]@."
 
-let print_fundef fd =
-  begin match fd with
-  | Internal f -> print_function std_formatter f
-  | External _ -> ()
-  end;
-  fd
+let print_globdef pp (id, gd) =
+  match gd with
+  | Gfun(Internal f) -> print_function pp id f
+  | _ -> ()
+
+let print_program pp (prog: LTL.program) =
+  List.iter (print_globdef pp) prog.prog_defs
 
+let destination : string option ref = ref None
 
+let print_if prog =
+  match !destination with
+  | None -> ()
+  | Some f ->
+      let oc = open_out f in
+      let pp = formatter_of_out_channel oc in
+      print_program pp prog;
+      close_out oc
diff --git a/backend/PrintMach.ml b/backend/PrintMach.ml
index 7e6c343..e7cb947 100644
--- a/backend/PrintMach.ml
+++ b/backend/PrintMach.ml
@@ -76,7 +76,7 @@ let print_instruction pp i =
       fprintf pp "tailcall %a@ " ros fn
   | Mbuiltin(ef, args, res) ->
       fprintf pp "%a = builtin %s(%a)@ "
-        reg res (name_of_external ef) regs args
+        regs res (name_of_external ef) regs args
   | Mannot(ef, args) ->
       fprintf pp "%s(%a)@ " (name_of_external ef) annot_params args
   | Mlabel lbl ->
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
new file mode 100644
index 0000000..756fc58
--- /dev/null
+++ b/backend/PrintXTL.ml
@@ -0,0 +1,147 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Pretty-printer for XTL *)
+
+open Format
+open Camlcoq
+open Datatypes
+open Maps
+open AST
+open Registers
+open Op
+open Locations
+open PrintAST
+open PrintOp
+open XTL
+
+let mreg pp r =
+  match Machregsaux.name_of_register r with
+  | Some s -> fprintf pp "%s" s
+  | None -> fprintf pp "<unknown machreg>"
+
+let short_name_of_type = function Tint -> 'i' | Tfloat -> 'f' | Tlong -> 'l'
+
+let loc pp = function
+  | R r -> mreg pp r
+  | S(Local, ofs, ty) ->
+      fprintf pp "L%c%ld" (short_name_of_type ty) (camlint_of_coqint ofs)
+  | S(Incoming, ofs, ty) ->
+      fprintf pp "I%c%ld" (short_name_of_type ty) (camlint_of_coqint ofs)
+  | S(Outgoing, ofs, ty) ->
+      fprintf pp "O%c%ld" (short_name_of_type ty) (camlint_of_coqint ofs)
+
+let current_alloc = ref (None: (var -> loc) option)
+let current_liveness = ref (None: VSet.t PMap.t option)
+
+let reg pp r ty =
+  match !current_alloc with
+  | None -> fprintf pp "x%ld" (P.to_int32 r)
+  | Some alloc -> fprintf pp "x%ld{%a}" (P.to_int32 r) loc (alloc (V(r, ty)))
+
+let var pp = function
+  | V(r, ty) -> reg pp r ty
+  | L l -> loc pp l
+
+let rec vars pp = function
+  | [] -> ()
+  | [r] -> var pp r
+  | r1::rl -> fprintf pp "%a, %a" var r1 vars rl
+
+let ros pp = function
+  | Coq_inl r -> var pp r
+  | Coq_inr s -> fprintf pp "\"%s\"" (extern_atom s)
+
+let liveset pp lv =
+  fprintf pp "@[<hov 2>{";
+  VSet.iter (function V(r, ty) -> fprintf pp "@ x%ld" (P.to_int32 r)
+                    | L l -> ())
+    lv;                  
+  fprintf pp " }@]"
+
+let print_succ pp s dfl =
+  let s = P.to_int32 s in
+  if s <> dfl then fprintf pp "goto %ld" s
+
+let print_instruction pp succ = function
+  | Xmove(src, dst) ->
+      fprintf pp "%a = %a;" var dst var src
+  | Xreload(src, dst) ->
+      fprintf pp "%a =r %a;" var dst var src
+  | Xspill(src, dst) ->
+      fprintf pp "%a =s %a;" var dst var src
+  | Xparmove(srcs, dsts, t1, t2) ->
+      fprintf pp "(%a) = (%a) using %a, %a;" vars dsts vars srcs var t1 var t2
+  | Xop(op, args, res) ->
+      fprintf pp "%a = %a;" var res (print_operation var) (op, args)
+  | Xload(chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a];"
+         var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+  | Xstore(chunk, addr, args, src) ->
+      fprintf pp "%s[%a] = %a;"
+         (name_of_chunk chunk) (print_addressing var) (addr, args) var src
+  | Xcall(sg, fn, args, res) ->
+      fprintf pp "%a = call %a(%a);" vars res ros fn vars args
+  | Xtailcall(sg, fn, args) ->
+      fprintf pp "tailcall %a(%a);" ros fn vars args
+  | Xbuiltin(ef, args, res) ->
+      fprintf pp "%a = builtin %s(%a);"
+        vars res (name_of_external ef) vars args
+  | Xbranch s ->
+      print_succ pp s succ
+  | Xcond(cond, args, s1, s2) ->
+      fprintf pp "if (%a) goto %ld else goto %ld"
+        (print_condition var) (cond, args)
+        (P.to_int32 s1) (P.to_int32 s2)
+  | Xjumptable(arg, tbl) ->
+      let tbl = Array.of_list tbl in
+      fprintf pp "@[<v 2>jumptable (%a)" var arg;
+      for i = 0 to Array.length tbl - 1 do
+        fprintf pp "@ case %d: goto %ld" i (P.to_int32 tbl.(i))
+      done;
+      fprintf pp "@]"
+  | Xreturn args ->
+      fprintf pp "return %a" vars args
+
+let rec print_instructions pp succ = function
+  | [] -> ()
+  | [i] -> print_instruction pp succ i
+  | i :: il ->
+      print_instruction pp succ i;
+      fprintf pp "@ ";
+      print_instructions pp succ il
+
+let print_block pp (pc, blk) =
+  fprintf pp "%5ld: @[<hov 0>" pc;
+  print_instructions pp (Int32.pred pc) blk;
+  fprintf pp "@]@ ";
+  match !current_liveness with
+  | None -> ()
+  | Some liveness -> fprintf pp "%a@ " liveset (PMap.get (P.of_int32 pc) liveness)
+
+let print_function pp ?alloc ?live f =
+  current_alloc := alloc;
+  current_liveness := live;
+  fprintf pp "@[<v 2>f() {@ ";
+  let instrs =
+    List.sort
+      (fun (pc1, _) (pc2, _) -> Pervasives.compare pc2 pc1)
+      (List.map
+        (fun (pc, i) -> (P.to_int32 pc, i))
+        (PTree.elements f.fn_code)) in
+  print_succ pp f.fn_entrypoint 
+    (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1l);
+  List.iter (print_block pp) instrs;
+  fprintf pp "@;<0 -2>}@]@.";
+  current_alloc := None;
+  current_liveness := None
+
diff --git a/backend/RRE.v b/backend/RRE.v
deleted file mode 100644
index bee57f6..0000000
--- a/backend/RRE.v
+++ /dev/null
@@ -1,173 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Redundant Reloads Elimination *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Op.
-Require Import Locations.
-Require Import Conventions.
-Require Import Linear.
-
-(** * Equations between slots and machine registers *)
-
-(** The RRE pass keeps track of which register holds the value of which
-  stack slot, using sets of equations like the following. *)
-
-Record equation := mkeq { e_reg: mreg; e_slot: slot }.
-
-Definition equations : Type := list equation.
-
-Fixpoint find_reg_containing (s: slot) (eqs: equations) : option mreg :=
-  match eqs with
-  | nil => None
-  | e :: eqs' => if slot_eq (e_slot e) s then Some (e_reg e) else find_reg_containing s eqs'
-  end.
-
-Definition eq_equation (eq1 eq2: equation) : {eq1=eq2} + {eq1<>eq2}.
-Proof.
-  generalize slot_eq mreg_eq. decide equality.
-Defined.
-
-Definition contains_equation (s: slot) (r: mreg) (eqs: equations) : bool :=
-  In_dec eq_equation (mkeq r s) eqs.
-
-(** Remove equations that are invalidated by an assignment to location [l]. *)
-
-Fixpoint kill_loc (l: loc) (eqs: equations) : equations :=
-  match eqs with
-  | nil => nil
-  | e :: eqs' =>
-      if Loc.diff_dec (S (e_slot e)) l && Loc.diff_dec (R (e_reg e)) l
-      then e :: kill_loc l eqs'
-      else kill_loc l eqs'
-  end.
-
-(** Same, for a list of locations [ll]. *)
-
-Definition kill_locs (ll: list loc) (eqs: equations) : equations :=
-  List.fold_left (fun eqs l => kill_loc l eqs) ll eqs.
-
-Definition kill_temps (eqs: equations) : equations :=
-  kill_locs temporaries eqs.
-
-Definition kill_at_move (eqs: equations) : equations :=
-  kill_locs destroyed_at_move eqs.
-
-Definition kill_op (op: operation) (eqs: equations) : equations :=
-  match op with Omove => kill_at_move eqs | _ => kill_temps eqs end.
-
-(** * Safety criterion *)
-
-Definition is_incoming (s: slot) : bool :=
-  match s with
-  | Incoming _ _ => true
-  | _ => false
-  end.
-
-(** Turning a [Lgetstack] into a register-to-register move is not always
-  safe: at least on x86, the move destroys some registers 
-  (those from [destroyed_at_move] list) while the [Lgetstack] does not.
-  Therefore, we perform this transformation only if the destroyed
-  registers are not used before being destroyed by a later
-  [Lop], [Lload], [Lstore], [Lbuiltin], [Lcond] or [Ljumptable] operation. *)
-
-Fixpoint safe_move_insertion (c: code) : bool :=
-  match c with
-  | Lgetstack s r :: c' =>
-      negb(In_dec mreg_eq r destroyed_at_move_regs) && safe_move_insertion c'
-  | Lsetstack r s :: c' =>
-      negb(In_dec mreg_eq r destroyed_at_move_regs)
-  | Lop op args res :: c' =>
-      list_disjoint_dec mreg_eq args destroyed_at_move_regs
-  | Lload chunk addr args res :: c' =>
-      list_disjoint_dec mreg_eq args destroyed_at_move_regs
-  | Lstore chunk addr args src :: c' =>
-      list_disjoint_dec mreg_eq (src :: args) destroyed_at_move_regs
-  | Lbuiltin ef args res :: c' =>
-      list_disjoint_dec mreg_eq args destroyed_at_move_regs
-  | Lcond cond args lbl :: c' =>
-      list_disjoint_dec mreg_eq args destroyed_at_move_regs
-  | Ljumptable arg tbl :: c' =>
-      negb(In_dec mreg_eq arg destroyed_at_move_regs)
-  | _ => false
-  end.
-
-(** * Code transformation *)
-
-(** The following function eliminates [Lgetstack] instructions or turn them
-  into register-to-register move whenever possible.  Simultaneously,
-  it propagates valid (register, slot) equations across basic blocks. *)
-
-(** [transf_code] is written in accumulator-passing style so that it runs
-  in constant stack space.  The [k] parameter accumulates the instructions
-  to be generated, in reverse order, and is then reversed at the end *)
-
-Fixpoint transf_code (eqs: equations) (c: code) (k: code) : code :=
-  match c with
-  | nil => List.rev' k
-  | Lgetstack s r :: c =>
-      if is_incoming s then
-        transf_code (kill_loc (R r) (kill_loc (R IT1) eqs)) c (Lgetstack s r :: k)
-      else if contains_equation s r eqs then
-        transf_code eqs c k
-      else 
-        match find_reg_containing s eqs with
-        | Some r' => 
-            if safe_move_insertion c then
-              transf_code (kill_at_move (mkeq r s :: kill_loc (R r) eqs)) c (Lop Omove (r' :: nil) r :: k)
-            else
-              transf_code (mkeq r s :: kill_loc (R r) eqs) c (Lgetstack s r :: k)
-        | None =>
-            transf_code (mkeq r s :: kill_loc (R r) eqs) c (Lgetstack s r :: k)
-        end
-  | Lsetstack r s :: c =>
-      transf_code (kill_at_move (mkeq r s :: kill_loc (S s) eqs)) c (Lsetstack r s :: k)
-  | Lop op args res :: c =>
-      transf_code (kill_loc (R res) (kill_op op eqs)) c (Lop op args res :: k)
-  | Lload chunk addr args res :: c =>
-      transf_code (kill_loc (R res) (kill_temps eqs)) c (Lload chunk addr args res :: k)
-  | Lstore chunk addr args src :: c =>
-      transf_code (kill_temps eqs) c (Lstore chunk addr args src :: k)
-  | Lcall sg ros :: c =>
-      transf_code nil c (Lcall sg ros :: k)
-  | Ltailcall sg ros :: c =>
-      transf_code nil c (Ltailcall sg ros :: k)
-  | Lbuiltin ef args res :: c =>
-      transf_code (kill_loc (R res) (kill_temps eqs)) c (Lbuiltin ef args res :: k)
-  | Lannot ef args :: c =>
-      transf_code eqs c (Lannot ef args :: k)
-  | Llabel lbl :: c =>
-      transf_code nil c (Llabel lbl :: k)
-  | Lgoto lbl :: c =>
-      transf_code nil c (Lgoto lbl :: k)
-  | Lcond cond args lbl :: c =>
-      transf_code (kill_temps eqs) c (Lcond cond args lbl :: k)
-  | Ljumptable arg lbls :: c =>
-      transf_code nil c (Ljumptable arg lbls :: k)
-  | Lreturn :: c =>
-      transf_code nil c (Lreturn :: k)
-  end.
-
-Definition transf_function (f: function) : function :=
-  mkfunction
-    (fn_sig f)
-    (fn_stacksize f)
-    (transf_code nil (fn_code f) nil).
-
-Definition transf_fundef (fd: fundef) : fundef :=
-  transf_fundef transf_function fd.
-
-Definition transf_program (p: program) : program :=
-  transform_program transf_fundef p.
-
diff --git a/backend/RREproof.v b/backend/RREproof.v
deleted file mode 100644
index 98de7a8..0000000
--- a/backend/RREproof.v
+++ /dev/null
@@ -1,658 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for the [RRE] pass. *)
-
-Require Import Axioms.
-Require Import Coqlib.
-Require Import AST.
-Require Import Values.
-Require Import Globalenvs.
-Require Import Events.
-Require Import Smallstep.
-Require Import Op.
-Require Import Locations.
-Require Import Conventions.
-Require Import Linear.
-Require Import RRE.
-
-(** * Operations over equations *)
-
-Lemma find_reg_containing_sound:
-  forall s r eqs, find_reg_containing s eqs = Some r -> In (mkeq r s) eqs.
-Proof.
-  induction eqs; simpl; intros.
-  congruence.
-  destruct (slot_eq (e_slot a) s). inv H. left; destruct a; auto. right; eauto.
-Qed.
-
-Definition equations_hold (ls: locset) (eqs: equations) : Prop :=
-  forall e, In e eqs -> ls (S (e_slot e)) = ls (R (e_reg e)).
-
-Lemma nil_hold:
-  forall ls, equations_hold ls nil.
-Proof.
-  red; intros; contradiction.
-Qed.
-
-Lemma In_kill_loc:
-  forall e l eqs,
-  In e (kill_loc l eqs) ->
-  In e eqs /\ Loc.diff (S (e_slot e)) l /\ Loc.diff (R (e_reg e)) l.
-Proof.
-  induction eqs; simpl kill_loc; simpl In; intros.
-  tauto.
-  destruct (Loc.diff_dec (S (e_slot a)) l).
-  destruct (Loc.diff_dec (R (e_reg a)) l).
-  simpl in H.  intuition congruence. 
-  simpl in H.  intuition.
-  simpl in H.  intuition.
-Qed.
-
-Lemma kill_loc_hold:
-  forall ls eqs l v,
-  equations_hold ls eqs ->
-  equations_hold (Locmap.set l v ls) (kill_loc l eqs).
-Proof.
-  intros; red; intros. 
-  exploit In_kill_loc; eauto. intros [A [B C]].
-  repeat rewrite Locmap.gso; auto; apply Loc.diff_sym; auto.
-Qed.
-
-Lemma In_kill_locs:
-  forall e ll eqs,
-  In e (kill_locs ll eqs) ->
-  In e eqs /\ Loc.notin (S (e_slot e)) ll /\ Loc.notin (R (e_reg e)) ll.
-Proof.
-Opaque Loc.diff.
-  induction ll; simpl; intros.
-  tauto.
-  exploit IHll; eauto. intros [A [B C]]. exploit In_kill_loc; eauto. intros [D [E F]]. 
-  tauto.
-Qed.
-
-Lemma kill_locs_hold:
-  forall ll ls eqs,
-  equations_hold ls eqs ->
-  equations_hold (Locmap.undef ll ls) (kill_locs ll eqs).
-Proof.
-  intros; red; intros. exploit In_kill_locs; eauto. intros [A [B C]]. 
-  repeat rewrite Locmap.guo; auto. 
-Qed.
-
-Lemma kill_temps_hold:
-  forall ls eqs,
-  equations_hold ls eqs ->
-  equations_hold (LTL.undef_temps ls) (kill_temps eqs).
-Proof.
-  exact (kill_locs_hold temporaries).
-Qed.
-
-Lemma kill_at_move_hold:
-  forall ls eqs,
-  equations_hold ls eqs ->
-  equations_hold (undef_setstack ls) (kill_at_move eqs).
-Proof.
-  exact (kill_locs_hold destroyed_at_move).
-Qed.
-
-Lemma kill_at_op_hold:
-  forall op ls eqs,
-  equations_hold ls eqs ->
-  equations_hold (undef_op op ls) (kill_op op eqs).
-Proof.
-  intros op. 
-  destruct op; exact kill_temps_hold || exact kill_at_move_hold.
-Qed.
-
-Lemma eqs_getstack_hold:
-  forall rs r s eqs,
-  equations_hold rs eqs ->
-  equations_hold (Locmap.set (R r) (rs (S s)) rs)
-                 (mkeq r s :: kill_loc (R r) eqs).
-Proof.
-Transparent Loc.diff.
-  intros; red; intros. simpl in H0; destruct H0.
-  subst e. simpl. rewrite Locmap.gss; rewrite Locmap.gso; auto. red; auto.
-  exploit In_kill_loc; eauto. intros [D [E F]].
-  repeat rewrite Locmap.gso. auto. 
-  apply Loc.diff_sym; auto. apply Loc.diff_sym; auto.
-Qed.
-
-Lemma eqs_movestack_hold:
-  forall rs r s eqs,
-  equations_hold rs eqs ->
-  equations_hold (Locmap.set (R r) (rs (S s)) (undef_setstack rs))
-                 (kill_at_move (mkeq r s :: kill_loc (R r) eqs)).
-Proof.
-  unfold undef_setstack, kill_at_move; intros; red; intros.
-  exploit In_kill_locs; eauto. intros [A [B C]].
-  simpl in A; destruct A.
-  subst e. rewrite Locmap.gss. rewrite Locmap.gso. apply Locmap.guo. auto. 
-  simpl; auto.
-  exploit In_kill_loc; eauto. intros [D [E F]].
-  repeat rewrite Locmap.gso. repeat rewrite Locmap.guo; auto.
-  apply Loc.diff_sym; auto. apply Loc.diff_sym; auto.
-Qed.
-
-Lemma eqs_setstack_hold:
-  forall rs r s eqs,
-  equations_hold rs eqs ->
-  equations_hold (Locmap.set (S s) (rs (R r)) (undef_setstack rs))
-                 (kill_at_move (mkeq r s :: kill_loc (S s) eqs)).
-Proof.
-  unfold undef_setstack, kill_at_move; intros; red; intros.
-  exploit In_kill_locs; eauto. intros [A [B C]].
-  simpl in A; destruct A.
-  subst e. rewrite Locmap.gss. rewrite Locmap.gso. rewrite Locmap.guo. auto. 
-  auto. simpl. destruct s; auto.
-  exploit In_kill_loc; eauto. intros [D [E F]].
-  repeat rewrite Locmap.gso. repeat rewrite Locmap.guo; auto.
-  apply Loc.diff_sym; auto. apply Loc.diff_sym; auto.
-Qed.
-
-Lemma locmap_set_reg_same:
-  forall rs r,
-  Locmap.set (R r) (rs (R r)) rs = rs.
-Proof.
-  intros. apply extensionality; intros. 
-  destruct (Loc.eq x (R r)).
-  subst x. apply Locmap.gss.
-  apply Locmap.gso. apply Loc.diff_reg_right; auto.
-Qed.
-
-(** * Agreement between values of locations *)
-
-(** Values of locations may differ between the original and transformed
-  program: after a [Lgetstack] is optimized to a [Lop Omove], 
-  the values of [destroyed_at_move] temporaries differ.  This
-  can only happen in parts of the code where the [safe_move_insertion]
-  function returns [true].  *)
-
-Definition agree (sm: bool) (rs rs': locset) : Prop :=
-  forall l, sm = false \/ Loc.notin l destroyed_at_move -> rs' l = rs l.
-
-Lemma agree_false:
-  forall rs rs',
-  agree false rs rs' <-> rs' = rs.
-Proof.
-  intros; split; intros.
-  apply extensionality; intros. auto.
-  subst rs'. red; auto. 
-Qed.
-
-Lemma agree_slot:
-  forall sm rs rs' s,
-  agree sm rs rs' -> rs' (S s) = rs (S s).
-Proof.
-Transparent Loc.diff.
-  intros. apply H. right. simpl; destruct s; tauto.
-Qed.
-
-Lemma agree_reg:
-  forall sm rs rs' r,
-  agree sm rs rs' ->
-  sm = false \/ ~In r destroyed_at_move_regs -> rs' (R r) = rs (R r).
-Proof.
-  intros. apply H. destruct H0; auto. right. 
-  simpl in H0; simpl; intuition congruence.
-Qed.
-
-Lemma agree_regs:
-  forall sm rs rs' rl,
-  agree sm rs rs' ->
-  sm = false \/ list_disjoint rl destroyed_at_move_regs -> reglist rs' rl = reglist rs rl.
-Proof.
-  induction rl; intros; simpl. 
-  auto.
-  decEq. apply agree_reg with sm. auto.
-    destruct H0. auto. right. eapply list_disjoint_notin; eauto with coqlib.
-  apply IHrl; auto. destruct H0; auto. right. eapply list_disjoint_cons_left; eauto.
-Qed.
-
-Lemma agree_set:
-  forall sm rs rs' l v,
-  agree sm rs rs' ->
-  agree sm (Locmap.set l v rs) (Locmap.set l v rs').
-Proof.
-  intros; red; intros.
-  unfold Locmap.set. 
-  destruct (Loc.eq l l0). auto. 
-  destruct (Loc.overlap l l0). auto.
-  apply H; auto.
-Qed. 
-
-Lemma agree_undef_move_1:
-  forall sm rs rs',
-  agree sm rs rs' ->
-  agree true rs (undef_setstack rs').
-Proof.
-  intros. unfold undef_setstack. red; intros.
-  destruct H0. congruence. rewrite Locmap.guo; auto.
-Qed.
-
-Remark locmap_undef_equal:
-  forall x ll rs rs',
-  (forall l, Loc.notin l ll -> rs' l = rs l) ->
-  Locmap.undef ll rs' x = Locmap.undef ll rs x.
-Proof.
-  induction ll; intros; simpl.
-  apply H. simpl. auto.
-  apply IHll. intros. unfold Locmap.set. 
-  destruct (Loc.eq a l). auto. destruct (Loc.overlap a l) eqn:?. auto. 
-  apply H. simpl. split; auto. apply Loc.diff_sym. apply Loc.non_overlap_diff; auto.
-Qed. 
-
-Lemma agree_undef_move_2:
-  forall sm rs rs',
-  agree sm rs rs' ->
-  agree false (undef_setstack rs) (undef_setstack rs').
-Proof.
-  intros. rewrite agree_false.
-  apply extensionality; intros. unfold undef_setstack. apply locmap_undef_equal. auto.
-Qed.
-
-Lemma agree_undef_temps:
-  forall sm rs rs',
-  agree sm rs rs' ->
-  agree false (LTL.undef_temps rs) (LTL.undef_temps rs').
-Proof.
-  intros. rewrite agree_false.
-  apply extensionality; intros. unfold LTL.undef_temps. apply locmap_undef_equal.
-  intros. apply H. right. simpl in H0; simpl; tauto.
-Qed.
-
-Lemma agree_undef_op:
-  forall op sm rs rs',
-  agree sm rs rs' ->
-  agree false (undef_op op rs) (undef_op op rs').
-Proof.
-  intros op.
-  destruct op; exact agree_undef_temps || exact agree_undef_move_2.
-Qed.
-
-Lemma transf_code_cont:
-  forall c eqs k1 k2,
-  transf_code eqs c (k1 ++ k2) = List.rev k2 ++ transf_code eqs c k1.
-Proof.
-  induction c; simpl; intros.
-  unfold rev'; rewrite <- ! rev_alt; apply rev_app_distr. 
-  destruct a; try (rewrite <- IHc; reflexivity).
-  destruct (is_incoming s).
-  rewrite <- IHc; reflexivity.
-  destruct (contains_equation s m eqs). 
-  auto.
-  destruct (find_reg_containing s eqs).
-  destruct (safe_move_insertion c).
-  rewrite <- IHc; reflexivity.
-  rewrite <- IHc; reflexivity.
-  rewrite <- IHc; reflexivity.
-Qed.
-
-Corollary transf_code_eq:
-  forall eqs c i, transf_code eqs c (i :: nil) = i :: transf_code eqs c nil.
-Proof.
-  intros. change (i :: nil) with (nil ++ (i :: nil)). 
-  rewrite transf_code_cont. auto.
-Qed. 
-
-Lemma transl_find_label:
-  forall lbl c eqs,
-  find_label lbl (transf_code eqs c nil) =
-  option_map (fun c => transf_code nil c nil) (find_label lbl c).
-Proof.
-  induction c; intros.
-  auto.
-  destruct a; simpl; try (rewrite transf_code_eq; simpl; auto).
-  destruct (is_incoming s); simpl; auto.
-  destruct (contains_equation s m eqs). auto.
-  destruct (find_reg_containing s eqs); rewrite !transf_code_eq. 
-  destruct (safe_move_insertion c); simpl; auto.
-  simpl; auto.
-  destruct (peq lbl l); simpl; auto.
-Qed.
-
-(** * Semantic preservation *)
-
-Section PRESERVATION.
-
-Variable prog: program.
-Let tprog := transf_program prog.
-
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Lemma functions_translated:
-  forall v f,
-  Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog).
-
-Lemma function_ptr_translated:
-  forall v f,
-  Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
-
-Lemma symbols_preserved:
-  forall id,
-  Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (@Genv.find_symbol_transf _ _ _ transf_fundef prog).
-
-Lemma varinfo_preserved:
-  forall b, Genv.find_var_info tge b = Genv.find_var_info ge b.
-Proof (@Genv.find_var_info_transf _ _ _ transf_fundef prog).
-
-Lemma sig_preserved:
-  forall f, funsig (transf_fundef f) = funsig f.
-Proof.
-  destruct f; reflexivity.
-Qed.
-
-Lemma find_function_translated:
-  forall ros rs fd,
-  find_function ge ros rs = Some fd ->
-  find_function tge ros rs = Some (transf_fundef fd).
-Proof.
-  intros. destruct ros; simpl in *. 
-  apply functions_translated; auto.
-  rewrite symbols_preserved. destruct (Genv.find_symbol ge i). 
-  apply function_ptr_translated; auto. 
-  congruence.
-Qed.
-
-Inductive match_frames: stackframe -> stackframe -> Prop :=
-  | match_frames_intro:
-      forall f sp rs c,
-      match_frames (Stackframe f sp rs c)
-                   (Stackframe (transf_function f) sp rs (transf_code nil c nil)).
-
-Inductive match_states: state -> state -> Prop :=
-  | match_states_regular:
-      forall sm stk f sp c rs m stk' rs' eqs
-        (STK: list_forall2 match_frames stk stk')
-        (EQH: equations_hold rs' eqs)
-        (AG: agree sm rs rs')
-        (SAFE: sm = false \/ safe_move_insertion c = true),
-      match_states (State stk f sp c rs m)
-                   (State stk' (transf_function f) sp (transf_code eqs c nil) rs' m)
-  | match_states_call:
-      forall stk f rs m stk'
-        (STK: list_forall2 match_frames stk stk'),
-      match_states (Callstate stk f rs m)
-                   (Callstate stk' (transf_fundef f) rs m)
-  | match_states_return:
-      forall stk rs m stk'
-        (STK: list_forall2 match_frames stk stk'),
-      match_states (Returnstate stk rs m)
-                   (Returnstate stk' rs m).
-
-Definition measure (S: state) : nat :=
-  match S with
-  | State s f sp c rs m => List.length c
-  | _ => 0%nat
-  end.
-
-Remark match_parent_locset:
-  forall stk stk',
-  list_forall2 match_frames stk stk' ->
-  return_regs (parent_locset stk') = return_regs (parent_locset stk).
-Proof.
-  intros. inv H; auto. inv H0; auto.
-Qed.
-
-Theorem transf_step_correct:
-  forall S1 t S2, step ge S1 t S2 ->
-  forall S1' (MS: match_states S1 S1'),
-  (exists S2', step tge S1' t S2' /\ match_states S2 S2')
-  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
-Proof.
-Opaque destroyed_at_move_regs.
-  induction 1; intros; inv MS; simpl.
-(** getstack *)
-  simpl in SAFE. 
-  assert (SAFE': sm = false \/ ~In r destroyed_at_move_regs /\ safe_move_insertion b = true).
-    destruct (in_dec mreg_eq r destroyed_at_move_regs); simpl in SAFE; intuition congruence. 
-  destruct (is_incoming sl) eqn:?.
-  (* incoming, stays as getstack *)
-  assert (UGS: forall rs, undef_getstack sl rs = Locmap.set (R IT1) Vundef rs).
-    destruct sl; simpl in Heqb0; discriminate || auto.
-  left; econstructor; split.
-  rewrite transf_code_eq; constructor.
-  repeat rewrite UGS.
-  apply match_states_regular with sm. auto.
-  apply kill_loc_hold. apply kill_loc_hold; auto.
-  rewrite (agree_slot _ _ _ sl AG). apply agree_set. apply agree_set. auto.
-  tauto.
-  (* not incoming *)
-  assert (UGS: forall rs, undef_getstack sl rs = rs). 
-    destruct sl; simpl in Heqb0; discriminate || auto.
-  unfold contains_equation. 
-  destruct (in_dec eq_equation (mkeq r sl) eqs); simpl.
-  (* eliminated *)
-  right.  split. omega. split. auto. rewrite UGS.  
-  exploit EQH; eauto. simpl. intro EQ.
-  assert (EQ1: rs' (S sl) = rs (S sl)) by (eapply agree_slot; eauto).
-  assert (EQ2: rs' (R r) = rs (R r)) by (eapply agree_reg; eauto; tauto).
-  rewrite <- EQ1; rewrite EQ; rewrite EQ2. rewrite locmap_set_reg_same.
-  apply match_states_regular with sm; auto; tauto. 
-  (* found an equation *)
-  destruct (find_reg_containing sl eqs) as [r'|] eqn:?.
-  exploit EQH. eapply find_reg_containing_sound; eauto. 
-  simpl; intro EQ.
-  (* turned into a move *)
-  destruct (safe_move_insertion b) eqn:?. 
-  left; econstructor; split.
-  rewrite transf_code_eq. constructor. simpl; eauto. 
-  rewrite UGS. rewrite <- EQ.
-  apply match_states_regular with true; auto. 
-  apply eqs_movestack_hold; auto. 
-  rewrite (agree_slot _ _ _ sl AG). apply agree_set. eapply agree_undef_move_1; eauto.
-  (* left as a getstack *)
-  left; econstructor; split.
-  rewrite transf_code_eq. constructor.
-  repeat rewrite UGS.
-  apply match_states_regular with sm. auto. 
-  apply eqs_getstack_hold; auto. 
-  rewrite (agree_slot _ _ _ sl AG). apply agree_set. auto. 
-  intuition congruence. 
-  (* no equation, left as a getstack *)
-  left; econstructor; split.
-  rewrite transf_code_eq; constructor.
-  repeat rewrite UGS.
-  apply match_states_regular with sm. auto. 
-  apply eqs_getstack_hold; auto. 
-  rewrite (agree_slot _ _ _ sl AG). apply agree_set. auto.
-  tauto.
-
-(* setstack *)
-  left; econstructor; split. rewrite transf_code_eq; constructor.
-  apply match_states_regular with false; auto.
-  apply eqs_setstack_hold; auto.
-  rewrite (agree_reg _ _ _ r AG). apply agree_set. eapply agree_undef_move_2; eauto. 
-  simpl in SAFE. destruct (in_dec mreg_eq r destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-
-(* op *)
-  left; econstructor; split. rewrite transf_code_eq; constructor.
-  instantiate (1 := v). rewrite <- H.
-  rewrite (agree_regs _ _ _ args AG). 
-  apply eval_operation_preserved. exact symbols_preserved.
-  simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-  apply match_states_regular with false; auto.
-  apply kill_loc_hold; apply kill_at_op_hold; auto.
-  apply agree_set. eapply agree_undef_op; eauto.
-
-(* load *)
-  left; econstructor; split.
-  rewrite transf_code_eq; econstructor.  instantiate (1 := a).  rewrite <- H.
-  rewrite (agree_regs _ _ _ args AG). 
-  apply eval_addressing_preserved.  exact symbols_preserved.
-  simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-  eauto.
-  apply match_states_regular with false; auto.
-  apply kill_loc_hold; apply kill_temps_hold; auto.
-  apply agree_set. eapply agree_undef_temps; eauto.
-
-(* store *)
-Opaque list_disjoint_dec.
-  simpl in SAFE.
-  assert (sm = false \/ ~In src destroyed_at_move_regs /\ list_disjoint args destroyed_at_move_regs).
-    destruct SAFE. auto. right. 
-    destruct (list_disjoint_dec mreg_eq (src :: args) destroyed_at_move_regs); try discriminate.
-    split. eapply list_disjoint_notin; eauto with coqlib. eapply list_disjoint_cons_left; eauto. 
-  left; econstructor; split.
-  rewrite transf_code_eq; econstructor.  instantiate (1 := a).  rewrite <- H.
-  rewrite (agree_regs _ _ _ args AG). 
-  apply eval_addressing_preserved.  exact symbols_preserved.
-  tauto.
-  rewrite (agree_reg _ _ _ src AG).
-  eauto.
-  tauto.
-  apply match_states_regular with false; auto.
-  apply kill_temps_hold; auto.
-  eapply agree_undef_temps; eauto.
-
-(* call *)
-  simpl in SAFE. assert (sm = false) by intuition congruence. 
-  subst sm. rewrite agree_false in AG. subst rs'. 
-  left; econstructor; split.
-  rewrite transf_code_eq; constructor. eapply find_function_translated; eauto. 
-  symmetry; apply sig_preserved.
-  constructor. constructor; auto. constructor. 
-
-(* tailcall *)
-  simpl in SAFE. assert (sm = false) by intuition congruence. 
-  subst sm. rewrite agree_false in AG. subst rs'. 
-  left; econstructor; split.
-  rewrite transf_code_eq; constructor. eapply find_function_translated; eauto. 
-  symmetry; apply sig_preserved. eauto. 
-  rewrite (match_parent_locset _ _ STK). constructor; auto.
-
-(* builtin *)
-  left; econstructor; split.
-  rewrite transf_code_eq; constructor.
-  rewrite (agree_regs _ _ _ args AG). 
-  eapply external_call_symbols_preserved; eauto. 
-  exact symbols_preserved. exact varinfo_preserved.
-  simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-  apply match_states_regular with false; auto.
-  apply kill_loc_hold; apply kill_temps_hold; auto.
-  apply agree_set. eapply agree_undef_temps; eauto.
-
-(* annot *)
-  simpl in SAFE. assert (sm = false) by intuition congruence. 
-  subst sm. rewrite agree_false in AG. subst rs'. 
-  left; econstructor; split.
-  rewrite transf_code_eq; econstructor. eapply external_call_symbols_preserved; eauto. 
-  exact symbols_preserved. exact varinfo_preserved.
-  apply match_states_regular with false; auto.
-  rewrite agree_false; auto. 
-
-(* label *)
-  left; econstructor; split. rewrite transf_code_eq; constructor.
-  apply match_states_regular with false; auto. 
-  apply nil_hold.
-  simpl in SAFE. destruct SAFE. subst sm. auto. congruence.
-
-(* goto *)
-  generalize (transl_find_label lbl (fn_code f) nil).  rewrite H. simpl. intros.
-  left; econstructor; split. rewrite transf_code_eq; constructor; eauto.
-  apply match_states_regular with false; auto. 
-  apply nil_hold.
-  simpl in SAFE. destruct SAFE. subst sm. auto. congruence.
-
-(* cond true *)
-  generalize (transl_find_label lbl (fn_code f) nil).  rewrite H0. simpl. intros.
-  left; econstructor; split.
-  rewrite transf_code_eq; apply exec_Lcond_true; auto.
-  rewrite (agree_regs _ _ _ args AG). auto.
-  simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-  eauto.
-  apply match_states_regular with false; auto.
-  apply nil_hold. 
-  eapply agree_undef_temps; eauto.
-
-(* cond false *)
-  left; econstructor; split. rewrite transf_code_eq; apply exec_Lcond_false; auto.
-  rewrite (agree_regs _ _ _ args AG). auto.
-  simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-  apply match_states_regular with false; auto.
-  apply kill_temps_hold; auto.
-  eapply agree_undef_temps; eauto.
-
-(* jumptable *)
-  generalize (transl_find_label lbl (fn_code f) nil).  rewrite H1. simpl. intros.
-  left; econstructor; split. rewrite transf_code_eq; econstructor; eauto.
-  rewrite (agree_reg _ _ _ arg AG). auto.
-  simpl in SAFE. destruct (in_dec mreg_eq arg destroyed_at_move_regs); simpl in SAFE; intuition congruence.
-  apply match_states_regular with false; auto.
-  apply nil_hold.
-  eapply agree_undef_temps; eauto.
-
-(* return *)
-  simpl in SAFE. destruct SAFE; try discriminate. subst sm. rewrite agree_false in AG. subst rs'.
-  left; econstructor; split.
-  rewrite transf_code_eq; constructor. simpl. eauto. 
-  rewrite (match_parent_locset _ _ STK). 
-  constructor; auto.
-
-(* internal *)
-  left; econstructor; split.
-  constructor. simpl; eauto.
-  simpl. apply match_states_regular with false; auto. apply nil_hold. rewrite agree_false; auto.
-
-(* external *)
-  left; econstructor; split.
-  econstructor. eapply external_call_symbols_preserved; eauto. 
-  exact symbols_preserved. exact varinfo_preserved. 
-  auto. eauto. 
-  constructor; auto. 
-
-(* return *)
-  inv STK. inv H1. left; econstructor; split. constructor.
-  apply match_states_regular with false; auto.
-  apply nil_hold.
-  rewrite agree_false; auto.
-Qed.
-
-Lemma transf_initial_states:
-  forall st1, initial_state prog st1 ->
-  exists st2, initial_state tprog st2 /\ match_states st1 st2.
-Proof.
-  intros. inversion H.
-  econstructor; split.
-  econstructor.
-  apply Genv.init_mem_transf; eauto.
-  rewrite symbols_preserved. eauto.
-  apply function_ptr_translated; eauto. 
-  rewrite sig_preserved. auto.
-  econstructor; eauto. constructor.
-Qed.
-
-Lemma transf_final_states:
-  forall st1 st2 r, 
-  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
-Proof.
-  intros. inv H0. inv H. inv STK. econstructor. auto.
-Qed.
-
-Theorem transf_program_correct:
-  forward_simulation (Linear.semantics prog) (Linear.semantics tprog).
-Proof.
-  eapply forward_simulation_opt.
-  eexact symbols_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  eexact transf_step_correct. 
-Qed.
-
-End PRESERVATION.
diff --git a/backend/RREtyping.v b/backend/RREtyping.v
deleted file mode 100644
index 170d8ad..0000000
--- a/backend/RREtyping.v
+++ /dev/null
@@ -1,110 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Proof of type preservation for the [RRE] pass. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Locations.
-Require Import Linear.
-Require Import Lineartyping.
-Require Import Conventions.
-Require Import RRE.
-Require Import RREproof.
-
-Remark wt_cons:
-  forall f c i, wt_instr f i -> wt_code f c -> wt_code f (i::c).
-Proof.
-  intros; red; intros. simpl in H1; destruct H1. congruence. auto.
-Qed.
-
-Hint Constructors wt_instr : linearty.
-Hint Resolve wt_cons: linearty.
-
-Definition wt_eqs (eqs: equations) :=
-  forall e, In e eqs -> slot_type (e_slot e) = mreg_type (e_reg e).
-
-Lemma wt_eqs_nil:
-  wt_eqs nil.
-Proof. red; simpl; tauto. Qed.
-
-Lemma wt_eqs_cons:
-  forall r s eqs,
-  slot_type s = mreg_type r -> wt_eqs eqs -> wt_eqs (mkeq r s :: eqs).
-Proof.
-  intros; red; simpl; intros. destruct H1. subst; simpl; auto. auto.
-Qed.
-  
-Lemma wt_kill_loc:
-  forall l eqs,
-  wt_eqs eqs -> wt_eqs (kill_loc l eqs).
-Proof.
-  intros; red; intros. exploit In_kill_loc; eauto. intros [A B]. auto.
-Qed.
-
-Lemma wt_kill_locs:
-  forall ll eqs,
-  wt_eqs eqs -> wt_eqs (kill_locs ll eqs).
-Proof.
-  intros; red; intros. exploit In_kill_locs; eauto. intros [A B]. auto.
-Qed.
-
-Lemma wt_kill_temps:
-  forall eqs, wt_eqs eqs -> wt_eqs (kill_temps eqs).
-Proof.
-  exact (wt_kill_locs temporaries).
-Qed.
-
-Lemma wt_kill_at_move:
-  forall eqs, wt_eqs eqs -> wt_eqs (kill_at_move eqs).
-Proof.
-  exact (wt_kill_locs destroyed_at_move).
-Qed.
-
-Hint Resolve wt_eqs_nil wt_eqs_cons wt_kill_loc wt_kill_locs
-             wt_kill_temps wt_kill_at_move: linearty.
-
-Lemma wt_kill_op:
-  forall op eqs, wt_eqs eqs -> wt_eqs (kill_op op eqs).
-Proof.
-  intros; destruct op; simpl; apply wt_kill_locs; auto.
-Qed.
-
-Hint Resolve wt_kill_op: linearty.
-
-Lemma wt_transf_code:
-  forall f c eqs, wt_code f c -> wt_eqs eqs ->
-  wt_code (transf_function f) (transf_code eqs c nil).
-Proof.
-  induction c; intros; simpl.
-  red; simpl; tauto.
-  assert (WI: wt_instr f a) by auto with coqlib.
-  assert (WC: wt_code f c) by (red; auto with coqlib).
-  clear H.
-  inv WI; rewrite ? transf_code_eq; auto 10 with linearty.
-  destruct (is_incoming s) eqn:?. auto with linearty.
-  destruct (contains_equation s r eqs). auto with linearty.
-  destruct (find_reg_containing s eqs) as [r'|] eqn:?; auto with linearty.
-  assert (mreg_type r' = mreg_type r).
-  exploit H0. eapply find_reg_containing_sound; eauto. simpl. congruence.
-  rewrite ! transf_code_eq.
-  destruct (safe_move_insertion c); auto 10 with linearty.
-Qed.
-
-Lemma program_typing_preserved:
-  forall p, wt_program p -> wt_program (transf_program p).
-Proof.
-  intros. red; intros. exploit transform_program_function; eauto. 
-  intros [f0 [A B]]. subst f. exploit H; eauto. intros WTFD.
-  inv WTFD; simpl; constructor.  red; simpl. 
-  apply wt_transf_code; auto with linearty.
-Qed.
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index 007191a..f5e34e4 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -362,18 +362,6 @@ Fixpoint alloc_regs (map: mapping) (al: exprlist)
       ret (r :: rl)
   end.
 
-(** A variant of [alloc_regs] for two-address instructions:
-  reuse the result register as destination for the first argument. *)
-
-Definition alloc_regs_2addr (map: mapping) (al: exprlist) (rd: reg)
-                            : mon (list reg) :=
-  match al with
-  | Enil =>
-      ret nil
-  | Econs a bl =>
-      do rl <- alloc_regs map bl; ret (rd :: rl)
-  end.
-
 (** [alloc_optreg] is used for function calls.  If a destination is
   specified for the call, it is returned.  Otherwise, a fresh
   register is returned. *)
@@ -406,9 +394,7 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node)
   | Evar v =>
       do r <- find_var map v; add_move r rd nd
   | Eop op al =>
-      do rl <- if two_address_op op
-               then alloc_regs_2addr map al rd
-               else alloc_regs map al;
+      do rl <- alloc_regs map al;
       do no <- add_instr (Iop op rl rd nd);
       transl_exprlist map al rl no
   | Eload chunk addr al =>
@@ -427,6 +413,14 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node)
          transl_expr map b r nc
   | Eletvar n =>
       do r <- find_letvar map n; add_move r rd nd
+  | Ebuiltin ef al =>
+      do rl <- alloc_regs map al;
+      do no <- add_instr (Ibuiltin ef rl rd nd);
+      transl_exprlist map al rl no
+  | Eexternal id sg al =>
+      do rl <- alloc_regs map al;
+      do no <- add_instr (Icall sg (inr id) rl rd nd);
+      transl_exprlist map al rl no
   end
 
 (** Translation of a list of expressions.  The expressions are evaluated
@@ -495,16 +489,6 @@ Fixpoint transl_switch (r: reg) (nexits: list node) (t: comptree)
       add_instr (Iop op (r :: nil) rt n3)
   end.
 
-(** Detect a two-address operator at the top of an expression. *)
-
-Fixpoint expr_is_2addr_op (e: expr) : bool :=
-  match e with
-  | Eop op _ => two_address_op op
-  | Econdition cond el e2 e3 => expr_is_2addr_op e2 || expr_is_2addr_op e3
-  | Elet e1 e2 => expr_is_2addr_op e2
-  | _ => false
-  end.
-
 (** Translation of statements.  [transl_stmt map s nd nexits nret rret]
   enriches the current CFG with the RTL instructions necessary to
   execute the CminorSel statement [s], and returns the node of the first
@@ -524,12 +508,7 @@ Fixpoint transl_stmt (map: mapping) (s: stmt) (nd: node)
       ret nd
   | Sassign v b =>
       do r <- find_var map v;
-      if expr_is_2addr_op b then
-        do rd <- new_reg;
-        do n1 <- add_move rd r nd;
-        transl_expr map b rd n1
-      else
-        transl_expr map b r nd
+      transl_expr map b r nd
   | Sstore chunk addr al b =>
       do rl <- alloc_regs map al;
       do r <- alloc_reg map b;
diff --git a/backend/RTLgenaux.ml b/backend/RTLgenaux.ml
index 363bc2b..150de5a 100644
--- a/backend/RTLgenaux.ml
+++ b/backend/RTLgenaux.ml
@@ -34,6 +34,8 @@ let rec size_expr = function
       1 + size_exprs args + max (size_expr e1) (size_expr e2)
   | Elet(e1, e2) -> size_expr e1 + size_expr e2
   | Eletvar n -> 0
+  | Ebuiltin(ef, el) -> 2 + size_exprs el
+  | Eexternal(id, sg, el) -> 5 + size_exprs el
 
 and size_exprs = function
   | Enil -> 0
diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v
index 690611c..c3cae28 100644
--- a/backend/RTLgenproof.v
+++ b/backend/RTLgenproof.v
@@ -485,19 +485,18 @@ Section CORRECTNESS_EXPR.
 
 Variable sp: val.
 Variable e: env.
-Variable m tm: mem.
-Hypothesis mem_extends: Mem.extends m tm.
+Variable m: mem.
 
 (** The proof of semantic preservation for the translation of expressions
   is a simulation argument based on diagrams of the following form:
 <<
                     I /\ P
-    e, le, m, a ------------- State cs code sp ns rs m
+    e, le, m, a ------------- State cs code sp ns rs tm
          ||                      |
          ||                      |*
          ||                      |
          \/                      v
-    e, le, m', v ------------ State cs code sp nd rs' m'
+    e, le, m, v ------------ State cs code sp nd rs' tm'
                     I /\ Q
 >>
   where [tr_expr code map pr a ns nd rd] is assumed to hold.
@@ -521,27 +520,31 @@ Hypothesis mem_extends: Mem.extends m tm.
 
 Definition transl_expr_prop 
      (le: letenv) (a: expr) (v: val) : Prop :=
-  forall cs f map pr ns nd rd rs dst
+  forall tm cs f map pr ns nd rd rs dst
     (MWF: map_wf map)
     (TE: tr_expr f.(fn_code) map pr a ns nd rd dst)
-    (ME: match_env map e le rs),
-  exists rs',
-     star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm)
+    (ME: match_env map e le rs)
+    (EXT: Mem.extends m tm),
+  exists rs', exists tm',
+     star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm')
   /\ match_env map (set_optvar dst v e) le rs'
   /\ Val.lessdef v rs'#rd
-  /\ (forall r, In r pr -> rs'#r = rs#r).
+  /\ (forall r, In r pr -> rs'#r = rs#r)
+  /\ Mem.extends m tm'.
 
 Definition transl_exprlist_prop 
      (le: letenv) (al: exprlist) (vl: list val) : Prop :=
-  forall cs f map pr ns nd rl rs
+  forall tm cs f map pr ns nd rl rs
     (MWF: map_wf map)
     (TE: tr_exprlist f.(fn_code) map pr al ns nd rl)
-    (ME: match_env map e le rs),
-  exists rs',
-     star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm)
+    (ME: match_env map e le rs)
+    (EXT: Mem.extends m tm),
+  exists rs', exists tm',
+     star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm')
   /\ match_env map e le rs'
   /\ Val.lessdef_list vl rs'##rl
-  /\ (forall r, In r pr -> rs'#r = rs#r).
+  /\ (forall r, In r pr -> rs'#r = rs#r)
+  /\ Mem.extends m tm'.
 
 (** The correctness of the translation is a huge induction over
   the Cminor evaluation derivation for the source program.  To keep
@@ -559,21 +562,23 @@ Proof.
   intros; red; intros. inv TE.
   exploit match_env_find_var; eauto. intro EQ.
   exploit tr_move_correct; eauto. intros [rs' [A [B C]]]. 
-  exists rs'; split. eauto.
+  exists rs'; exists tm; split. eauto.
   destruct H2 as [[D E] | [D E]].
   (* optimized case *)
   subst r dst. simpl.
   assert (forall r, rs'#r = rs#r).
     intros. destruct (Reg.eq r rd). subst r. auto. auto. 
   split. eapply match_env_invariant; eauto.
-  split. congruence. auto.
+  split. congruence.
+  split; auto.
   (* general case *)
   split.
   apply match_env_invariant with (rs#rd <- (rs#r)).
   apply match_env_update_dest; auto. 
   intros. rewrite Regmap.gsspec. destruct (peq r0 rd). congruence. auto. 
   split. congruence. 
-  intros. apply C. intuition congruence.
+  split. intros. apply C. intuition congruence.
+  auto.
 Qed.
 
 Lemma transl_expr_Eop_correct:
@@ -586,9 +591,9 @@ Lemma transl_expr_Eop_correct:
 Proof.
   intros; red; intros. inv TE.
 (* normal case *) 
-  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RR1 RO1]]]].
+  exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RR1 [RO1 EXT1]]]]]].
   edestruct eval_operation_lessdef as [v' []]; eauto.
-  exists (rs1#rd <- v').
+  exists (rs1#rd <- v'); exists tm1.
 (* Exec *)
   split. eapply star_right. eexact EX1.
   eapply exec_Iop; eauto.
@@ -599,7 +604,9 @@ Proof.
 (* Result reg *)
   split. rewrite Regmap.gss. auto.
 (* Other regs *)
-  intros. rewrite Regmap.gso. auto. intuition congruence.
+  split. intros. rewrite Regmap.gso. auto. intuition congruence.
+(* Mem *)
+  auto.
 Qed.
 
 Lemma transl_expr_Eload_correct:
@@ -612,10 +619,10 @@ Lemma transl_expr_Eload_correct:
   transl_expr_prop le (Eload chunk addr args) v.
 Proof.
   intros; red; intros. inv TE.
-  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]].
   edestruct eval_addressing_lessdef as [vaddr' []]; eauto.
   edestruct Mem.loadv_extends as [v' []]; eauto.
-  exists (rs1#rd <- v').
+  exists (rs1#rd <- v'); exists tm1.
 (* Exec *)
   split. eapply star_right. eexact EX1. eapply exec_Iload. eauto.
   instantiate (1 := vaddr'). rewrite <- H3.
@@ -626,7 +633,9 @@ Proof.
 (* Result *)
   split. rewrite Regmap.gss. auto.
 (* Other regs *)
-  intros. rewrite Regmap.gso. auto. intuition congruence. 
+  split. intros. rewrite Regmap.gso. auto. intuition congruence.
+(* Mem *)
+  auto. 
 Qed.
 
 Lemma transl_expr_Econdition_correct:
@@ -640,11 +649,11 @@ Lemma transl_expr_Econdition_correct:
   transl_expr_prop le (Econdition cond al ifso ifnot) v.
 Proof.
   intros; red; intros; inv TE. inv H14.
-  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. 
+  exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]]. 
   assert (tr_expr f.(fn_code) map pr (if vcond then ifso else ifnot) (if vcond then ntrue else nfalse) nd rd dst).
     destruct vcond; auto.
-  exploit H3; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
-  exists rs2.
+  exploit H3; eauto. intros [rs2 [tm2 [EX2 [ME2 [RES2 [OTHER2 EXT2]]]]]].
+  exists rs2; exists tm2.
 (* Exec *)
   split. eapply star_trans. eexact EX1.
   eapply star_left. eapply exec_Icond. eauto. eapply eval_condition_lessdef; eauto. reflexivity. 
@@ -654,7 +663,9 @@ Proof.
 (* Result value *)
   split. assumption.
 (* Other regs *)
-  intros. transitivity (rs1#r); auto.
+  split. intros. transitivity (rs1#r); auto.
+(* Mem *)
+  auto.
 Qed.
 
 Lemma transl_expr_Elet_correct:
@@ -666,12 +677,12 @@ Lemma transl_expr_Elet_correct:
   transl_expr_prop le (Elet a1 a2) v2.
 Proof.
   intros; red; intros; inv TE. 
-  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]].
   assert (map_wf (add_letvar map r)).
     eapply add_letvar_wf; eauto. 
   exploit H2; eauto. eapply match_env_bind_letvar; eauto.
-  intros [rs2 [EX2 [ME3 [RES2 OTHER2]]]].
-  exists rs2.
+  intros [rs2 [tm2 [EX2 [ME3 [RES2 [OTHER2 EXT2]]]]]].
+  exists rs2; exists tm2.
 (* Exec *)
   split. eapply star_trans. eexact EX1. eexact EX2. auto.
 (* Match-env *)
@@ -679,7 +690,9 @@ Proof.
 (* Result *)
   split. assumption.
 (* Other regs *)
-  intros. transitivity (rs1#r0); auto.
+  split. intros. transitivity (rs1#r0); auto.
+(* Mem *)
+  auto.
 Qed.
 
 Lemma transl_expr_Eletvar_correct:
@@ -689,7 +702,7 @@ Lemma transl_expr_Eletvar_correct:
 Proof.
   intros; red; intros; inv TE.
   exploit tr_move_correct; eauto. intros [rs1 [EX1 [RES1 OTHER1]]].
-  exists rs1.
+  exists rs1; exists tm.
 (* Exec *)
   split. eexact EX1.
 (* Match-env *)
@@ -706,10 +719,73 @@ Proof.
 (* Result *)
   split. rewrite RES1. eapply match_env_find_letvar; eauto. 
 (* Other regs *)
-  intros. 
+  split. intros. 
   destruct H2 as [[A B] | [A B]].
   destruct (Reg.eq r0 rd); subst; auto.
   apply OTHER1. intuition congruence.
+(* Mem *)
+  auto.
+Qed.
+
+Lemma transl_expr_Ebuiltin_correct:
+  forall le ef al vl v,
+  eval_exprlist ge sp e m le al vl ->
+  transl_exprlist_prop le al vl ->
+  external_call ef ge vl m E0 v m ->
+  transl_expr_prop le (Ebuiltin ef al) v.
+Proof.
+  intros; red; intros. inv TE.
+  exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RR1 [RO1 EXT1]]]]]].
+  exploit external_call_mem_extends; eauto. 
+  intros [v' [tm2 [A [B [C [D E]]]]]].
+  exists (rs1#rd <- v'); exists tm2.
+(* Exec *)
+  split. eapply star_right. eexact EX1.
+  eapply exec_Ibuiltin; eauto.
+  eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved.
+  reflexivity.
+(* Match-env *)
+  split. eauto with rtlg.
+(* Result reg *)
+  split. rewrite Regmap.gss. auto.
+(* Other regs *)
+  split. intros. rewrite Regmap.gso. auto. intuition congruence.
+(* Mem *)
+  auto.
+Qed.
+
+Lemma transl_expr_Eexternal_correct:
+  forall le id sg al b ef vl v,
+  Genv.find_symbol ge id = Some b ->
+  Genv.find_funct_ptr ge b = Some (External ef) ->
+  ef_sig ef = sg ->
+  eval_exprlist ge sp e m le al vl ->
+  transl_exprlist_prop le al vl ->
+  external_call ef ge vl m E0 v m ->
+  transl_expr_prop le (Eexternal id sg al) v.
+Proof.
+  intros; red; intros. inv TE.
+  exploit H3; eauto. intros [rs1 [tm1 [EX1 [ME1 [RR1 [RO1 EXT1]]]]]].
+  exploit external_call_mem_extends; eauto. 
+  intros [v' [tm2 [A [B [C [D E]]]]]].
+  exploit function_ptr_translated; eauto. simpl. intros [tf [P Q]]. inv Q. 
+  exists (rs1#rd <- v'); exists tm2.
+(* Exec *)
+  split. eapply star_trans. eexact EX1.
+  eapply star_left. eapply exec_Icall; eauto.
+  simpl. rewrite symbols_preserved. rewrite H. eauto. auto.
+  eapply star_left. eapply exec_function_external.
+  eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved.
+  apply star_one. apply exec_return. 
+  reflexivity. reflexivity. reflexivity.
+(* Match-env *)
+  split. eauto with rtlg.
+(* Result reg *)
+  split. rewrite Regmap.gss. auto.
+(* Other regs *)
+  split. intros. rewrite Regmap.gso. auto. intuition congruence.
+(* Mem *)
+  auto.
 Qed.
 
 Lemma transl_exprlist_Enil_correct:
@@ -717,7 +793,7 @@ Lemma transl_exprlist_Enil_correct:
   transl_exprlist_prop le Enil nil.
 Proof.
   intros; red; intros; inv TE.
-  exists rs.
+  exists rs; exists tm.
   split. apply star_refl.
   split. assumption.
   split. constructor.
@@ -734,9 +810,9 @@ Lemma transl_exprlist_Econs_correct:
   transl_exprlist_prop le (Econs a1 al) (v1 :: vl).
 Proof.
   intros; red; intros; inv TE.
-  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
-  exists rs2.
+  exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]].
+  exploit H2; eauto. intros [rs2 [tm2 [EX2 [ME2 [RES2 [OTHER2 EXT2]]]]]].
+  exists rs2; exists tm2.
 (* Exec *)
   split. eapply star_trans. eexact EX1. eexact EX2. auto. 
 (* Match-env *)
@@ -746,9 +822,11 @@ Proof.
   simpl; tauto.
   auto.
 (* Other regs *)
-  intros. transitivity (rs1#r).
+  split. intros. transitivity (rs1#r).
   apply OTHER2; auto. simpl; tauto. 
   apply OTHER1; auto.
+(* Mem *)
+  auto.
 Qed.
 
 Theorem transl_expr_correct:
@@ -765,6 +843,8 @@ Proof
      transl_expr_Econdition_correct
      transl_expr_Elet_correct
      transl_expr_Eletvar_correct
+     transl_expr_Ebuiltin_correct
+     transl_expr_Eexternal_correct
      transl_exprlist_Enil_correct
      transl_exprlist_Econs_correct).
 
@@ -782,6 +862,8 @@ Proof
      transl_expr_Econdition_correct
      transl_expr_Elet_correct
      transl_expr_Eletvar_correct
+     transl_expr_Ebuiltin_correct
+     transl_expr_Eexternal_correct
      transl_exprlist_Enil_correct
      transl_exprlist_Econs_correct).
 
@@ -1019,37 +1101,25 @@ Proof.
   
   (* assign *)
   inv TS.
-  (* optimized translation (not 2 addr) *)
   exploit transl_expr_correct; eauto. 
-  intros [rs' [A [B [C D]]]].
+  intros [rs' [tm' [A [B [C [D E]]]]]].
   econstructor; split.
   right; split. eauto. Lt_state.
   econstructor; eauto. constructor.
-  (* alternate translation (2 addr) *)
-  exploit transl_expr_correct; eauto.
-  intros [rs' [A [B [C D]]]].
-  exploit tr_move_correct; eauto. 
-  intros [rs'' [P [Q R]]].
-  econstructor; split.
-  right; split. eapply star_trans. eexact A. eexact P. traceEq. Lt_state.
-  econstructor; eauto. constructor. 
-  simpl in B. apply match_env_invariant with (rs'#r <- (rs'#rd)). 
-  apply match_env_update_var; auto. 
-  intros. rewrite Regmap.gsspec. destruct (peq r0 r). congruence. auto. 
 
   (* store *)
   inv TS.
   exploit transl_exprlist_correct; eauto.
-  intros [rs' [A [B [C D]]]].
+  intros [rs' [tm' [A [B [C [D E]]]]]].
   exploit transl_expr_correct; eauto.
-  intros [rs'' [E [F [G J]]]].
+  intros [rs'' [tm'' [F [G [J [K L]]]]]].
   assert (Val.lessdef_list vl rs''##rl).
     replace (rs'' ## rl) with (rs' ## rl). auto.
-    apply list_map_exten. intros. apply J. auto.
+    apply list_map_exten. intros. apply K. auto.
   edestruct eval_addressing_lessdef as [vaddr' []]; eauto.
-  edestruct Mem.storev_extends as [tm' []]; eauto.
+  edestruct Mem.storev_extends as [tm''' []]; eauto.
   econstructor; split.
-  left; eapply plus_right. eapply star_trans. eexact A. eexact E. reflexivity.
+  left; eapply plus_right. eapply star_trans. eexact A. eexact F. reflexivity.
   eapply exec_Istore with (a := vaddr'). eauto.
   rewrite <- H4. apply eval_addressing_preserved. exact symbols_preserved.
   eauto. traceEq.
@@ -1059,9 +1129,9 @@ Proof.
   inv TS; inv H.
   (* indirect *)
   exploit transl_expr_correct; eauto.
-  intros [rs' [A [B [C D]]]].
+  intros [rs' [tm' [A [B [C [D X]]]]]].
   exploit transl_exprlist_correct; eauto.
-  intros [rs'' [E [F [G J]]]].
+  intros [rs'' [tm'' [E [F [G [J Y]]]]]].
   exploit functions_translated; eauto. intros [tf' [P Q]].
   econstructor; split.
   left; eapply plus_right. eapply star_trans. eexact A. eexact E. reflexivity.
@@ -1071,7 +1141,7 @@ Proof.
   constructor; auto. econstructor; eauto.
   (* direct *)
   exploit transl_exprlist_correct; eauto.
-  intros [rs'' [E [F [G J]]]].
+  intros [rs'' [tm'' [E [F [G [J Y]]]]]].
   exploit functions_translated; eauto. intros [tf' [P Q]].
   econstructor; split.
   left; eapply plus_right. eexact E.
@@ -1085,13 +1155,13 @@ Proof.
   inv TS; inv H.
   (* indirect *)
   exploit transl_expr_correct; eauto.
-  intros [rs' [A [B [C D]]]].
+  intros [rs' [tm' [A [B [C [D X]]]]]].
   exploit transl_exprlist_correct; eauto.
-  intros [rs'' [E [F [G J]]]].
+  intros [rs'' [tm'' [E [F [G [J Y]]]]]].
   exploit functions_translated; eauto. intros [tf' [P Q]].
   exploit match_stacks_call_cont; eauto. intros [U V].
   assert (fn_stacksize tf = fn_stackspace f). inv TF; auto.
-  edestruct Mem.free_parallel_extends as [tm' []]; eauto.
+  edestruct Mem.free_parallel_extends as [tm''' []]; eauto.
   econstructor; split.
   left; eapply plus_right. eapply star_trans. eexact A. eexact E. reflexivity.
   eapply exec_Itailcall; eauto. simpl. rewrite J. destruct C. eauto. discriminate P. simpl; auto.
@@ -1101,11 +1171,11 @@ Proof.
   constructor; auto.
   (* direct *)
   exploit transl_exprlist_correct; eauto.
-  intros [rs'' [E [F [G J]]]].
+  intros [rs'' [tm'' [E [F [G [J Y]]]]]].
   exploit functions_translated; eauto. intros [tf' [P Q]].
   exploit match_stacks_call_cont; eauto. intros [U V].
   assert (fn_stacksize tf = fn_stackspace f). inv TF; auto.
-  edestruct Mem.free_parallel_extends as [tm' []]; eauto.
+  edestruct Mem.free_parallel_extends as [tm''' []]; eauto.
   econstructor; split.
   left; eapply plus_right. eexact E.
   eapply exec_Itailcall; eauto. simpl. rewrite symbols_preserved. rewrite H5. 
@@ -1118,8 +1188,8 @@ Proof.
   (* builtin *)
   inv TS. 
   exploit transl_exprlist_correct; eauto.
-  intros [rs' [E [F [G J]]]].
-  edestruct external_call_mem_extends as [tv [tm' [A [B [C D]]]]]; eauto.
+  intros [rs' [tm' [E [F [G [J K]]]]]].
+  edestruct external_call_mem_extends as [tv [tm'' [A [B [C D]]]]]; eauto.
   econstructor; split.
   left. eapply plus_right. eexact E.
   eapply exec_Ibuiltin. eauto. 
@@ -1137,7 +1207,7 @@ Proof.
 
   (* ifthenelse *)
   inv TS. inv H13.
-  exploit transl_exprlist_correct; eauto. intros [rs' [A [B [C D]]]].
+  exploit transl_exprlist_correct; eauto. intros [rs' [tm' [A [B [C [D E]]]]]].
   econstructor; split.
   left. eapply plus_right. eexact A. eapply exec_Icond; eauto.
   eapply eval_condition_lessdef; eauto. traceEq.
@@ -1179,7 +1249,7 @@ Proof.
   inv TS.
   exploit validate_switch_correct; eauto. intro CTM.
   exploit transl_expr_correct; eauto. 
-  intros [rs' [A [B [C D]]]].
+  intros [rs' [tm' [A [B [C [D X]]]]]].
   exploit transl_switch_correct; eauto. inv C. auto.
   intros [nd [rs'' [E [F G]]]].
   econstructor; split.
@@ -1200,10 +1270,10 @@ Proof.
   (* return some *)
   inv TS.
   exploit transl_expr_correct; eauto.
-  intros [rs' [A [B [C D]]]].
+  intros [rs' [tm' [A [B [C [D E]]]]]].
   exploit match_stacks_call_cont; eauto. intros [U V].
   inversion TF.
-  edestruct Mem.free_parallel_extends as [tm' []]; eauto.
+  edestruct Mem.free_parallel_extends as [tm'' []]; eauto.
   econstructor; split.
   left; eapply plus_right. eexact A. eapply exec_Ireturn; eauto.
   rewrite H4; eauto. traceEq.
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index c50c702..d8d5dc8 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -500,32 +500,6 @@ Proof.
   right; eauto with rtlg.
 Qed.
 
-Lemma alloc_regs_2addr_valid:
-  forall al rd s1 s2 map rl i,
-  map_valid map s1 ->
-  reg_valid rd s1 ->
-  alloc_regs_2addr map al rd s1 = OK rl s2 i ->
-  regs_valid rl s2.
-Proof.
-  unfold alloc_regs_2addr; intros. 
-  destruct al; monadInv H1.
-  apply regs_valid_nil.
-  apply regs_valid_cons. eauto with rtlg. eauto with rtlg. 
-Qed.
-Hint Resolve alloc_regs_2addr_valid: rtlg.
-
-Lemma alloc_regs_2addr_fresh_or_in_map:
-  forall map al rd s rl s' i,
-  map_valid map s ->
-  alloc_regs_2addr map al rd s = OK rl s' i ->
-  forall r, In r rl -> r = rd \/ reg_in_map map r \/ reg_fresh r s.
-Proof.
-  unfold alloc_regs_2addr; intros. 
-  destruct al; monadInv H0.
-  elim H1.
-  simpl in H1; destruct H1. auto. right. eapply alloc_regs_fresh_or_in_map; eauto.
-Qed.
-
 (** A register is an adequate target for holding the value of an
   expression if
 - either the register is associated with a Cminor let-bound variable
@@ -628,24 +602,8 @@ Proof.
   apply regs_valid_cons; eauto with rtlg.
 Qed.
 
-Lemma alloc_regs_2addr_target_ok:
-  forall map al rd pr s1 rl s2 i,
-  map_valid map s1 ->
-  regs_valid pr s1 ->
-  reg_valid rd s1 -> 
-  ~(reg_in_map map rd) -> ~In rd pr ->
-  alloc_regs_2addr map al rd s1 = OK rl s2 i ->
-  target_regs_ok map pr al rl.
-Proof.
-  unfold alloc_regs_2addr; intros. destruct al; monadInv H4. 
-  constructor.
-  constructor. constructor; auto. 
-  eapply alloc_regs_target_ok; eauto.
-  apply regs_valid_cons; auto. 
-Qed.
-
 Hint Resolve new_reg_target_ok alloc_reg_target_ok 
-             alloc_regs_target_ok alloc_regs_2addr_target_ok: rtlg.  
+             alloc_regs_target_ok: rtlg.  
 
 (** The following predicate is a variant of [target_reg_ok] used
   to characterize registers that are adequate for holding the return
@@ -760,6 +718,17 @@ Inductive tr_expr (c: code):
       ((rd = r /\ dst = None) \/ (reg_map_ok map rd dst /\ ~In rd pr)) ->
       tr_move c ns r nd rd ->
       tr_expr c map pr (Eletvar n) ns nd rd dst
+  | tr_Ebuiltin: forall map pr ef al ns nd rd dst n1 rl,
+      tr_exprlist c map pr al ns n1 rl ->
+      c!n1 = Some (Ibuiltin ef rl rd nd) ->
+      reg_map_ok map rd dst -> ~In rd pr ->
+      tr_expr c map pr (Ebuiltin ef al) ns nd rd dst
+  | tr_Eexternal: forall map pr id sg al ns nd rd dst n1 rl,
+      tr_exprlist c map pr al ns n1 rl ->
+      c!n1 = Some (Icall sg (inr _ id) rl rd nd) ->
+      reg_map_ok map rd dst -> ~In rd pr ->
+      tr_expr c map pr (Eexternal id sg al) ns nd rd dst
+
 
 (** [tr_condition c map pr cond al ns ntrue nfalse rd] holds if the graph [c],
   starting at node [ns], contains instructions that compute the truth
@@ -834,13 +803,8 @@ Inductive tr_stmt (c: code) (map: mapping):
   | tr_Sskip: forall ns nexits ngoto nret rret,
      tr_stmt c map Sskip ns ns nexits ngoto nret rret
   | tr_Sassign: forall id a ns nd nexits ngoto nret rret r,
-     map.(map_vars)!id = Some r -> expr_is_2addr_op a = false ->
-     tr_expr c map nil a ns nd r (Some id) ->
-     tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret
-  | tr_Sassign_2: forall id a ns n1 nd nexits ngoto nret rret rd r,
      map.(map_vars)!id = Some r ->
-     tr_expr c map nil a ns n1 rd None ->
-     tr_move c n1 rd nd r ->
+     tr_expr c map nil a ns nd r (Some id) ->
      tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret
   | tr_Sstore: forall chunk addr al b ns nd nexits ngoto nret rret rd n1 rl n2,
      tr_exprlist c map nil al ns n1 rl ->
@@ -1008,9 +972,7 @@ Proof.
   inv OK. left; split; congruence. right; eauto with rtlg.
   eapply add_move_charact; eauto.
   (* Eop *)
-  inv OK. destruct (two_address_op o). 
-  econstructor; eauto with rtlg.
-  eapply transl_exprlist_charact; eauto with rtlg.
+  inv OK.
   econstructor; eauto with rtlg.
   eapply transl_exprlist_charact; eauto with rtlg.
   (* Eload *)
@@ -1045,6 +1007,14 @@ Proof.
   inv OK. left; split; congruence. right; eauto with rtlg.
   eapply add_move_charact; eauto.
   monadInv EQ1.
+  (* Ebuiltin *)
+  inv OK.
+  econstructor; eauto with rtlg.
+  eapply transl_exprlist_charact; eauto with rtlg.
+  (* Eexternal *)
+  inv OK.
+  econstructor; eauto with rtlg.
+  eapply transl_exprlist_charact; eauto with rtlg.
 
 (* Lists *)
 
@@ -1070,25 +1040,21 @@ Lemma transl_expr_assign_charact:
   forall id a map rd nd s ns s' INCR
      (TR: transl_expr map a rd nd s = OK ns s' INCR)
      (WF: map_valid map s)
-     (OK: reg_map_ok map rd (Some id))
-     (NOT2ADDR: expr_is_2addr_op a = false),
+     (OK: reg_map_ok map rd (Some id)),
    tr_expr s'.(st_code) map nil a ns nd rd (Some id).
 Proof.
-Opaque two_address_op.
   induction a; intros; monadInv TR; saturateTrans.
   (* Evar *)
   generalize EQ; unfold find_var. caseEq (map_vars map)!i; intros; inv EQ1.
   econstructor; eauto.
   eapply add_move_charact; eauto.
   (* Eop *)
-  simpl in NOT2ADDR. rewrite NOT2ADDR in EQ. 
   econstructor; eauto with rtlg. 
   eapply transl_exprlist_charact; eauto with rtlg.
   (* Eload *)
   econstructor; eauto with rtlg.
   eapply transl_exprlist_charact; eauto with rtlg. 
   (* Econdition *)
-  simpl in NOT2ADDR. destruct (orb_false_elim _ _ NOT2ADDR).
   econstructor; eauto with rtlg.
   econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. 
   apply tr_expr_incr with s2; auto. 
@@ -1096,7 +1062,6 @@ Opaque two_address_op.
   apply tr_expr_incr with s1; auto. 
   eapply IHa2; eauto 2 with rtlg.
   (* Elet *)
-  simpl in NOT2ADDR.
   econstructor. eapply new_reg_not_in_map; eauto with rtlg.  
   eapply transl_expr_charact; eauto 3 with rtlg.
   apply tr_expr_incr with s1; auto.
@@ -1109,6 +1074,12 @@ Opaque two_address_op.
   econstructor; eauto with rtlg. 
   eapply add_move_charact; eauto.
   monadInv EQ1.
+  (* Ebuiltin *)
+  econstructor; eauto with rtlg. 
+  eapply transl_exprlist_charact; eauto with rtlg.
+  (* Eexternal *)
+  econstructor; eauto with rtlg. 
+  eapply transl_exprlist_charact; eauto with rtlg.
 Qed.
 
 Lemma alloc_optreg_map_ok:
@@ -1141,7 +1112,6 @@ Proof.
   generalize tr_expr_incr tr_condition_incr tr_exprlist_incr; intros I1 I2 I3.
   pose (AT := fun pc i => instr_at_incr s1 s2 pc i EXT).
   induction 1; try (econstructor; eauto; fail).
-  eapply tr_Sassign_2; eauto. eapply tr_move_incr; eauto.
   econstructor; eauto. eapply tr_switch_incr; eauto.
 Qed.
 
@@ -1210,10 +1180,6 @@ Proof.
   constructor.
   (* Sassign *)
   revert EQ. unfold find_var. case_eq (map_vars map)!i; intros; monadInv EQ.
-  remember (expr_is_2addr_op e) as is2a. destruct is2a.
-  monadInv EQ0. eapply tr_Sassign_2; eauto. 
-  eapply transl_expr_charact; eauto with rtlg.
-  apply tr_move_incr with s1; auto. eapply add_move_charact; eauto. 
   eapply tr_Sassign; eauto.
   eapply transl_expr_assign_charact; eauto with rtlg.
   constructor. auto.
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index a1f9518..7ed7344 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -22,6 +22,7 @@ Require Import Registers.
 Require Import Globalenvs.
 Require Import Values.
 Require Import Integers.
+Require Import Memory.
 Require Import Events.
 Require Import RTL.
 Require Import Conventions.
@@ -109,7 +110,6 @@ Inductive wt_instr : instruction -> Prop :=
       forall ef args res s,
       List.map env args = (ef_sig ef).(sig_args) ->
       env res = proj_sig_res (ef_sig ef) ->
-      arity_ok (ef_sig ef).(sig_args) = true \/ ef_reloads ef = false ->
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
@@ -282,10 +282,7 @@ Definition type_instr (e: typenv) (i: instruction) : res typenv :=
       let sig := ef_sig ef in
       do x <- check_successor s;
       do e1 <- type_regs e args sig.(sig_args);
-      do e2 <- type_reg e1 res (proj_sig_res sig);
-      if (negb (ef_reloads ef)) || arity_ok sig.(sig_args)
-      then OK e2
-      else Error(msg "cannot reload builtin")
+      type_reg e1 res (proj_sig_res sig)
   | Icond cond args s1 s2 =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
@@ -518,9 +515,6 @@ Proof.
   destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
   destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2.
   eauto with ty.
-- (* builtin *)
-  destruct (negb (ef_reloads e0) || arity_ok (sig_args (ef_sig e0))) eqn:E; inv EQ3.
-  eauto with ty.
 - (* jumptable *)
   destruct (zle (list_length_z l * 4) Int.max_unsigned); inv EQ2.
   eauto with ty.
@@ -572,11 +566,9 @@ Proof.
   eapply type_regs_sound; eauto with ty.
   apply tailcall_is_possible_correct; auto.
 - (* builtin *)
-  destruct (negb (ef_reloads e0) || arity_ok (sig_args (ef_sig e0))) eqn:E; inv EQ3.
   constructor.
   eapply type_regs_sound; eauto with ty.
   eapply type_reg_sound; eauto.
-  destruct (ef_reloads e0); auto.
   eauto with ty.
 - (* cond *)
   constructor.
@@ -833,10 +825,7 @@ Proof.
   exploit type_reg_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
-  rewrite A; simpl; rewrite C; simpl.
-  destruct H2; rewrite H2. 
-  rewrite orb_true_r. auto. 
-  auto. 
+  rewrite A; simpl; rewrite C; auto.
 - (* cond *)
   exploit type_regs_complete. eauto. eauto. intros [e1 [A B]].
   exists e1; split; auto.
@@ -982,6 +971,34 @@ Proof.
   apply wt_regset_assign; auto.
 Qed.
 
+Lemma wt_exec_Iop:
+  forall (ge: genv) env f sp op args res s rs m v,
+  wt_instr env f (Iop op args res s) ->
+  eval_operation ge sp op rs##args m = Some v ->
+  wt_regset env rs ->
+  wt_regset env (rs#res <- v).
+Proof.
+  intros. inv H. 
+  simpl in H0. inv H0. apply wt_regset_assign; auto. rewrite <- H4; apply H1.
+  apply wt_regset_assign; auto. 
+  replace (env res) with (snd (type_of_operation op)).
+  eapply type_of_operation_sound; eauto.
+  rewrite <- H7; auto.
+Qed.
+
+Lemma wt_exec_Iload:
+  forall env f chunk addr args dst s m a v rs,
+  wt_instr env f (Iload chunk addr args dst s) ->
+  Mem.loadv chunk m a = Some v ->
+  wt_regset env rs ->
+  wt_regset env (rs#dst <- v).
+Proof.
+  intros. destruct a; simpl in H0; try discriminate.
+  apply wt_regset_assign; auto. 
+  inv H. rewrite H8. 
+  eapply Mem.load_type; eauto.
+Qed.
+
 Inductive wt_stackframes: list stackframe -> option typ -> Prop :=
   | wt_stackframes_nil:
       wt_stackframes nil (Some Tint)
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
new file mode 100644
index 0000000..fe981e3
--- /dev/null
+++ b/backend/Regalloc.ml
@@ -0,0 +1,986 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Register allocation by coloring of an interference graph *)
+
+(* The algorithm in a nutshell:
+
+   - Split live ranges
+   - Convert from RTL to XTL
+   - Eliminate dead code
+   - Repeat:
+       . Construct interference graph
+       . Color interference graph using IRC algorithm
+       . Check for variables that were spilled and must be in registers
+       . If none, convert to LTL and exit.
+       . If some, insert spill and reload instructions and try again
+     End Repeat
+*)
+
+open Format
+open Clflags
+open Camlcoq
+open Datatypes
+open Coqlib
+open Maps
+open AST
+open Memdata
+open Kildall
+open Registers
+open Op
+open Machregs
+open Locations
+open Conventions1
+open Conventions
+open IRC
+open XTL
+
+(* Detection of 2-address operations *)
+
+let is_two_address op args =
+  if two_address_op op then
+    match args with
+    | [] -> assert false
+    | arg1 :: argl -> Some(arg1, argl)
+  else None
+
+(* For tracing *)
+
+let destination_alloctrace : string option ref = ref None
+let pp = ref std_formatter
+
+let init_trace () =
+  if !option_dalloctrace && !pp == std_formatter then begin
+    match !destination_alloctrace with
+    | None -> ()  (* should not happen *)
+    | Some f -> pp := formatter_of_out_channel (open_out f)
+  end
+
+
+(**************** Initial conversion from RTL to XTL **************)
+
+let vreg tyenv r = V(r, tyenv r)
+
+let vregs tyenv rl = List.map (vreg tyenv) rl
+
+let rec expand_regs tyenv = function
+  | [] -> []
+  | r :: rl ->
+      match tyenv r with
+      | Tlong -> V(r, Tint) :: V(twin_reg r, Tint) :: expand_regs tyenv rl
+      | ty    -> V(r, ty) :: expand_regs tyenv rl
+
+let constrain_reg v c =
+  match c with
+  | None -> v
+  | Some mr -> L(R mr)
+
+let rec constrain_regs vl cl =
+  match vl, cl with
+  | [], _ -> []
+  | v1 :: vl', [] -> vl
+  | v1 :: vl', Some mr1 :: cl' -> L(R mr1) :: constrain_regs vl' cl'
+  | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl'
+
+let move v1 v2 k =
+  if v1 = v2 then k else Xmove(v1, v2) :: k
+
+let rec movelist vl1 vl2 k =
+  match vl1, vl2 with
+  | [], [] -> k
+  | v1 :: vl1, v2 :: vl2 -> move v1 v2 (movelist vl1 vl2 k)
+  | _, _ -> assert false
+
+let xparmove srcs dsts k =
+  assert (List.length srcs = List.length dsts);
+  match srcs, dsts with
+  | [], [] -> k
+  | [src], [dst] -> Xmove(src, dst) :: k
+  | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k
+
+(* Return the XTL basic block corresponding to the given RTL instruction.
+   Move and parallel move instructions are introduced to honor calling
+   conventions and register constraints on some operations.
+   64-bit integer variables are split in two 32-bit halves. *)
+
+let block_of_RTL_instr funsig tyenv = function
+  | RTL.Inop s ->
+      [Xbranch s]
+  | RTL.Iop(Omove, [arg], res, s) ->
+      if tyenv arg = Tlong then
+        [Xmove(V(arg, Tint), V(res, Tint));
+         Xmove(V(twin_reg arg, Tint), V(twin_reg res, Tint));
+         Xbranch s]
+      else
+        [Xmove(vreg tyenv arg, vreg tyenv res); Xbranch s]
+  | RTL.Iop(Omakelong, [arg1; arg2], res, s) ->
+      [Xmove(V(arg1, Tint), V(res, Tint));
+       Xmove(V(arg2, Tint), V(twin_reg res, Tint));
+       Xbranch s]
+  | RTL.Iop(Olowlong, [arg], res, s) ->
+      [Xmove(V(twin_reg arg, Tint), V(res, Tint)); Xbranch s]
+  | RTL.Iop(Ohighlong, [arg], res, s) ->
+      [Xmove(V(arg, Tint), V(res, Tint)); Xbranch s]
+  | RTL.Iop(op, args, res, s) ->
+      let (cargs, cres) = mregs_for_operation op in
+      let args1 = vregs tyenv args and res1 = vreg tyenv res in
+      let args2 = constrain_regs args1 cargs and res2 = constrain_reg res1 cres in
+      let (args3, res3) =
+        match is_two_address op args2 with
+        | None ->
+            (args2, res2)
+        | Some(arg, args2') ->
+            if arg = res2 || not (List.mem res2 args2') then
+              (args2, res2)
+            else
+              let t = new_temp (tyenv res) in (t :: args2', t) in
+      movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s])
+  | RTL.Iload(chunk, addr, args, dst, s) ->
+      if chunk = Mint64 then begin
+        match offset_addressing addr (coqint_of_camlint 4l) with
+        | None -> assert false
+        | Some addr' ->
+            [Xload(Mint32, addr, vregs tyenv args,
+                   V((if big_endian then dst else twin_reg dst), Tint));
+             Xload(Mint32, addr', vregs tyenv args,
+                   V((if big_endian then twin_reg dst else dst), Tint));
+             Xbranch s]
+      end else
+        [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
+  | RTL.Istore(chunk, addr, args, src, s) ->
+      if chunk = Mint64 then begin
+        match offset_addressing addr (coqint_of_camlint 4l) with
+        | None -> assert false
+        | Some addr' ->
+            [Xstore(Mint32, addr, vregs tyenv args,
+                   V((if big_endian then src else twin_reg src), Tint));
+             Xstore(Mint32, addr', vregs tyenv args,
+                   V((if big_endian then twin_reg src else src), Tint));
+             Xbranch s]
+      end else
+        [Xstore(chunk, addr, vregs tyenv args, vreg tyenv src); Xbranch s]
+  | RTL.Icall(sg, ros, args, res, s) ->
+      let args' = vlocs (loc_arguments sg)
+      and res' = vmregs (loc_result sg) in
+      xparmove (expand_regs tyenv args) args'
+       (Xcall(sg, sum_left_map (vreg tyenv) ros, args', res') ::
+         xparmove res' (expand_regs tyenv [res]) 
+           [Xbranch s])
+  | RTL.Itailcall(sg, ros, args) ->
+      let args' = vlocs (loc_arguments sg) in
+      xparmove (expand_regs tyenv args) args'
+        [Xtailcall(sg, sum_left_map (vreg tyenv) ros, args')]
+  | RTL.Ibuiltin(ef, args, res, s) ->
+      let (cargs, cres) = mregs_for_builtin ef in
+      let args1 = expand_regs tyenv args and res1 = expand_regs tyenv [res] in
+      let args2 = constrain_regs args1 cargs and res2 = constrain_regs res1 cres in
+      movelist args1 args2
+         (Xbuiltin(ef, args2, res2) :: movelist res2 res1 [Xbranch s])
+  | RTL.Icond(cond, args, s1, s2) ->
+      [Xcond(cond, vregs tyenv args, s1, s2)]
+  | RTL.Ijumptable(arg, tbl) ->
+      [Xjumptable(vreg tyenv arg, tbl)]
+  | RTL.Ireturn None ->
+      [Xreturn []]
+  | RTL.Ireturn (Some arg) ->
+      let args' = vmregs (loc_result funsig) in
+      xparmove (expand_regs tyenv [arg]) args' [Xreturn args']
+
+(* One above the [pc] nodes of the given RTL function *)
+
+let next_pc f =
+  PTree.fold
+    (fun npc pc i -> if P.lt pc npc then npc else P.succ pc)
+    f.RTL.fn_code P.one
+
+(* Translate an RTL function to an XTL function *)
+
+let function_of_RTL_function f tyenv =
+  let xc = PTree.map1 (block_of_RTL_instr f.RTL.fn_sig tyenv) f.RTL.fn_code in
+  (* Add moves for function parameters *)
+  let pc_entrypoint = next_pc f in
+  let b_entrypoint = 
+     xparmove (vlocs (loc_parameters f.RTL.fn_sig))
+              (expand_regs tyenv f.RTL.fn_params)
+              [Xbranch f.RTL.fn_entrypoint] in
+  { fn_sig = f.RTL.fn_sig;
+    fn_stacksize = f.RTL.fn_stacksize;
+    fn_entrypoint = pc_entrypoint;
+    fn_code = PTree.set pc_entrypoint b_entrypoint xc }
+
+
+(***************** Liveness analysis *****************)
+
+let vset_removelist vl after = List.fold_right VSet.remove vl after
+let vset_addlist vl after = List.fold_right VSet.add vl after
+let vset_addros vos after =
+  match vos with Coq_inl v -> VSet.add v after | Coq_inr id -> after
+
+let live_before instr after =
+  match instr with
+  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
+      if VSet.mem dst after
+      then VSet.add src (VSet.remove dst after)
+      else after
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      vset_addlist srcs (vset_removelist dsts after)
+  | Xop(op, args, res) ->
+      if VSet.mem res after
+      then vset_addlist args (VSet.remove res after)
+      else after
+  | Xload(chunk, addr, args, dst) ->
+      if VSet.mem dst after
+      then vset_addlist args (VSet.remove dst after)
+      else after
+  | Xstore(chunk, addr, args, src) ->
+      vset_addlist args (VSet.add src after)
+  | Xcall(sg, ros, args, res) ->
+      vset_addlist args (vset_addros ros (vset_removelist res after))
+  | Xtailcall(sg, ros, args) ->
+      vset_addlist args (vset_addros ros VSet.empty)
+  | Xbuiltin(ef, args, res) ->
+      vset_addlist args (vset_removelist res after)
+  | Xbranch s ->
+      after
+  | Xcond(cond, args, s1, s2) ->
+      List.fold_right VSet.add args after
+  | Xjumptable(arg, tbl) ->
+      VSet.add arg after
+  | Xreturn args ->
+      vset_addlist args VSet.empty
+
+let rec live_before_block blk after =
+  match blk with
+  | [] -> after
+  | instr :: blk -> live_before instr (live_before_block blk after)
+
+let transfer_live f pc after =
+  match PTree.get pc f.fn_code with
+  | None -> VSet.empty
+  | Some blk -> live_before_block blk after
+
+module VSetLat = struct
+  type t = VSet.t
+  let beq = VSet.equal
+  let bot = VSet.empty
+  let lub = VSet.union
+end
+
+module Liveness_Solver = Backward_Dataflow_Solver(VSetLat)(NodeSetBackward)
+
+let liveness_analysis f =
+  match Liveness_Solver.fixpoint (successors f) (transfer_live f) [] with
+  | None -> assert false
+  | Some lv -> lv
+
+(* Pair the instructions of a block with their live-before sets *)
+
+let pair_block_live blk after =
+  let rec pair_rec accu after = function
+  | [] -> accu
+  | instr :: blk ->
+      let before = live_before instr after in
+      pair_rec ((instr, before) :: accu) before blk in
+  pair_rec [] after (List.rev blk)
+
+
+(**************** Dead code elimination **********************)
+
+(* Eliminate pure instructions whose results are not used later. *)
+
+let rec dce_parmove srcs dsts after =
+  match srcs, dsts with
+  | [], [] -> [], []
+  | src1 :: srcs, dst1 :: dsts ->
+      let (srcs', dsts') = dce_parmove srcs dsts after in
+      if VSet.mem dst1 after
+      then (src1 :: srcs', dst1 :: dsts')
+      else (srcs', dsts')
+  | _, _ -> assert false
+
+let dce_instr instr after k =
+  match instr with
+  | Xmove(src, dst) ->
+      if VSet.mem dst after
+      then instr :: k
+      else k
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      let (srcs', dsts') = dce_parmove srcs dsts after in
+      Xparmove(srcs', dsts', itmp, ftmp) :: k
+  | Xop(op, args, res) ->
+      if VSet.mem res after
+      then instr :: k
+      else k
+  | Xload(chunk, addr, args, dst) ->
+      if VSet.mem dst after
+      then instr :: k
+      else k
+  | _ ->
+      instr :: k
+
+let rec dce_block blk after =
+  match blk with
+  | [] -> (after, [])
+  | instr :: blk' ->
+      let (after', tblk') = dce_block blk' after in
+      (live_before instr after', dce_instr instr after' tblk')
+
+let dead_code_elimination f liveness =
+  { f with fn_code =
+      PTree.map (fun pc blk -> snd(dce_block blk (PMap.get pc liveness)))
+                 f.fn_code }
+
+
+(*********************** Spill costs ****************************)
+
+(* Estimate spill costs and count other statistics for every variable.
+   Variables that must not be spilled are given infinite costs. *)
+
+let spill_costs f =
+
+  let costs = ref PTree.empty in
+
+  let get_stats r =
+    match PTree.get r !costs with
+    | Some st -> st
+    | None ->
+        let st = {cost = 0; usedefs = 0} in
+        costs := PTree.set r st !costs;
+        st in
+
+  let charge amount uses v =
+    match v with
+    | L l -> ()
+    | V(r, ty) ->
+        let st = get_stats r in
+        let c1 = st.cost + amount in
+        let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
+        st.cost <- c2;
+        st.usedefs <- st.usedefs + uses in
+
+  let charge_list amount uses vl =
+    List.iter (charge amount uses) vl in
+
+  let charge_ros amount ros =
+    match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in
+
+  let charge_instr = function
+    | Xmove(src, dst) ->
+        charge 1 1 src; charge 1 1 dst
+    | Xreload(src, dst) ->
+        charge 1 1 src; charge max_int 1 dst
+        (* dest must not be spilled! *)
+    | Xspill(src, dst) ->
+        charge max_int 1 src; charge 1 1 dst
+        (* source must not be spilled! *)
+    | Xparmove(srcs, dsts, itmp, ftmp) ->
+        charge_list 1 1 srcs; charge_list 1 1 dsts;
+        charge max_int 0 itmp; charge max_int 0 ftmp
+        (* temps must not be spilled *)
+    | Xop(op, args, res) ->
+        charge_list 10 1 args; charge 10 1 res
+    | Xload(chunk, addr, args, dst) ->
+        charge_list 10 1 args; charge 10 1 dst
+    | Xstore(chunk, addr, args, src) ->
+        charge_list 10 1 args; charge 10 1 src
+    | Xcall(sg, vos, args, res) ->
+        charge_ros 10 vos
+    | Xtailcall(sg, vos, args) ->
+        charge_ros 10 vos
+    | Xbuiltin(ef, args, res) ->
+        begin match ef with
+        | EF_vstore _ | EF_vstore_global _ | EF_memcpy _ ->
+            (* result is not used but should not be spilled *)
+            charge_list 10 1 args; charge_list max_int 0 res
+        | EF_annot _ ->
+            (* arguments are not actually used, so don't charge;
+               result is never used but should not be spilled *)
+            charge_list max_int 0 res
+        | EF_annot_val _ ->
+            (* like a move *)
+            charge_list 1 1 args; charge_list 1 1 res
+        | _ ->
+            charge_list 10 1 args; charge_list 10 1 res
+        end
+    | Xbranch _ -> ()
+    | Xcond(cond, args, _, _) ->
+        charge_list 10 1 args
+    | Xjumptable(arg, _) ->
+        charge 10 1 arg
+    | Xreturn optarg ->
+        () in
+
+  let charge_block blk = List.iter charge_instr blk in
+
+  PTree.fold
+    (fun () pc blk -> charge_block blk)
+    f.fn_code ();
+  if !option_dalloctrace then begin
+    fprintf !pp "------------------ Unspillable variables --------------@ @.";
+    fprintf !pp "@[<hov 1>";
+    PTree.fold
+      (fun () r st ->
+        if st.cost = max_int then fprintf !pp "@ x%ld" (P.to_int32 r))
+      !costs ();
+    fprintf !pp "@]@ @."
+  end;
+  (* Result is cost function: pseudoreg -> stats *)
+  get_stats
+
+
+(********* Construction and coloring of the interference graph **************)
+
+let add_interfs_def g res live =
+  VSet.iter (fun v -> if v <> res then IRC.add_interf g v res) live
+
+let add_interfs_move g src dst live =
+  VSet.iter (fun v -> if v <> src && v <> dst then IRC.add_interf g v dst) live
+
+let add_interfs_destroyed g live mregs =
+  List.iter
+    (fun mr -> VSet.iter (IRC.add_interf g (L (R mr))) live)
+    mregs
+
+let add_interfs_live g live v =
+  VSet.iter (fun v' -> IRC.add_interf g v v') live
+
+let add_interfs_list g v vl =
+  List.iter (IRC.add_interf g v) vl
+
+let rec add_interfs_pairwise g = function
+  | [] -> ()
+  | v1 :: vl -> add_interfs_list g v1 vl; add_interfs_pairwise g vl
+
+let add_interfs_instr g instr live =
+  match instr with
+  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
+      IRC.add_pref g src dst;
+      add_interfs_move g src dst live
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      List.iter2 (IRC.add_pref g) srcs dsts;
+      (* Interferences with live across *)
+      let across = vset_removelist dsts live in
+      List.iter (add_interfs_live g across) dsts;
+      add_interfs_live g across itmp; add_interfs_live g across ftmp;
+      (* All destinations must be pairwise different *)
+      add_interfs_pairwise g dsts;
+      (* The temporaries must be different from sources and dests *)
+      add_interfs_list g itmp srcs; add_interfs_list g itmp dsts;
+      add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts;
+      (* Take into account destroyed reg when accessing Incoming param *)
+      if List.exists (function (L(S(Incoming, _, _))) -> true | _ -> false) srcs
+      then add_interfs_list g (vmreg temp_for_parent_frame) dsts
+  | Xop(op, args, res) ->
+      begin match is_two_address op args with
+      | None ->
+          add_interfs_def g res live;
+      | Some(arg1, argl) ->
+          (* Treat as "res := arg1; res := op(res, argl)" *)
+          add_interfs_def g res live;
+          IRC.add_pref g arg1 res;
+          add_interfs_move g arg1 res
+            (vset_addlist (res :: argl) (VSet.remove res live))
+      end;
+      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op);
+  | Xload(chunk, addr, args, dst) ->
+      add_interfs_def g dst live;
+      add_interfs_destroyed g (VSet.remove dst live) 
+                              (destroyed_by_load chunk addr)
+  | Xstore(chunk, addr, args, src) ->
+      add_interfs_destroyed g live (destroyed_by_store chunk addr)
+  | Xcall(sg, vos, args, res) ->
+      add_interfs_destroyed g (vset_removelist res live) destroyed_at_call
+  | Xtailcall(sg, Coq_inl v, args) ->
+      List.iter (fun r -> IRC.add_interf g (vmreg r) v) int_callee_save_regs
+  | Xtailcall(sg, Coq_inr id, args) ->
+      ()
+  | Xbuiltin(ef, args, res) ->
+      (* Interferences with live across *)
+      let across = vset_removelist res live in
+      List.iter (add_interfs_live g across) res;
+      (* All results must be pairwise different *)
+      add_interfs_pairwise g res;
+      add_interfs_destroyed g across (destroyed_by_builtin ef);
+      begin match ef, args, res with
+      | EF_annot_val _, [arg], [res] -> IRC.add_pref g arg res (* like a move *)
+      | _ -> ()
+      end
+  | Xbranch s ->
+      ()
+  | Xcond(cond, args, s1, s2) ->
+      add_interfs_destroyed g live (destroyed_by_cond cond)
+  | Xjumptable(arg, tbl) ->
+      add_interfs_destroyed g live destroyed_by_jumptable
+  | Xreturn optarg ->
+      ()
+
+let rec add_interfs_block g blk live =
+  match blk with
+  | [] -> live
+  | instr :: blk' ->
+      let live' = add_interfs_block g blk' live in
+      add_interfs_instr g instr live';
+      live_before instr live'
+
+let find_coloring f liveness =
+  (*type_function f;  (* for debugging *)*)
+  let g = IRC.init (spill_costs f) in
+  PTree.fold
+    (fun () pc blk -> ignore (add_interfs_block g blk (PMap.get pc liveness)))
+    f.fn_code ();
+  IRC.coloring g
+
+
+(*********** Determination of variables that need spill code insertion *****)
+
+let is_reg alloc v =
+  match alloc v with R _ -> true | S _ -> false
+
+let add_tospill alloc v ts =
+  match alloc v with R _ -> ts | S _ -> VSet.add v ts
+
+let addlist_tospill alloc vl ts =
+  List.fold_right (add_tospill alloc) vl ts
+
+let addros_tospill alloc ros ts =
+  match ros with Coq_inl v -> add_tospill alloc v ts | Coq_inr s -> ts
+
+let tospill_instr alloc instr ts =
+  match instr with
+  | Xmove(src, dst) ->
+      if is_reg alloc src || is_reg alloc dst || alloc src = alloc dst
+      then ts
+      else VSet.add src (VSet.add dst ts)
+  | Xreload(src, dst) ->
+      assert (is_reg alloc dst);
+      ts
+  | Xspill(src, dst) ->
+      assert (is_reg alloc src);
+      ts
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      assert (is_reg alloc itmp && is_reg alloc ftmp);
+      ts
+  | Xop(op, args, res) ->
+      addlist_tospill alloc args (add_tospill alloc res ts)
+  | Xload(chunk, addr, args, dst) ->
+      addlist_tospill alloc args (add_tospill alloc dst ts)
+  | Xstore(chunk, addr, args, src) ->
+      addlist_tospill alloc args (add_tospill alloc src ts)
+  | Xcall(sg, vos, args, res) ->
+      addros_tospill alloc vos ts
+  | Xtailcall(sg, vos, args) ->
+      addros_tospill alloc vos ts
+  | Xbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_annot _ -> ts
+      |      _     -> addlist_tospill alloc args (addlist_tospill alloc res ts)
+      end
+  | Xbranch s ->
+      ts
+  | Xcond(cond, args, s1, s2) ->
+      addlist_tospill alloc args ts
+  | Xjumptable(arg, tbl) ->
+      add_tospill alloc arg ts
+  | Xreturn optarg ->
+      ts
+
+let rec tospill_block alloc blk ts =
+  match blk with
+  | [] -> ts
+  | instr :: blk' -> tospill_block alloc blk' (tospill_instr alloc instr ts)
+
+let tospill_function f alloc =
+  PTree.fold
+    (fun ts pc blk -> tospill_block alloc blk ts)
+    f.fn_code VSet.empty
+
+
+(********************* Spilling ***********************)
+
+(* We follow a semi-naive spilling strategy.  By default, we spill at
+  every definition of a variable that could not be allocated a register,
+  and we reload before every use.  However, we also maintain a list of
+  equations of the form [spilled-var = temp] that keep track of
+  variables that were recently spilled or reloaded.  Based on these
+  equations, we can avoid reloading a spilled variable if its value
+  is still available in a temporary register. *)
+
+let rec find_reg_containing v = function
+  | [] -> None
+  | (var, temp, date) :: eqs ->
+      if var = v then Some temp else find_reg_containing v eqs
+
+let add v t eqs = (v, t, 0) :: eqs
+
+let kill x eqs =
+  List.filter (fun (v, t, date) -> v <> x && t <> x) eqs
+  
+let reload_var tospill eqs v =
+  if not (VSet.mem v tospill) then
+    (v, [], eqs)
+  else
+    match find_reg_containing v eqs with
+    | Some t ->
+        (*printf "Reusing %a for %a@ @." PrintXTL.var t PrintXTL.var v;*)
+        (t, [], eqs)
+    | None ->
+        let t = new_temp (typeof v) in (t, [Xreload(v, t)], add v t eqs)
+
+let rec reload_vars tospill eqs vl =
+  match vl with
+  | [] -> ([], [], eqs)
+  | v1 :: vs ->
+      let (t1, c1, eqs1) = reload_var tospill eqs v1 in
+      let (ts, cs, eqs2) = reload_vars tospill eqs1 vs in
+      (t1 :: ts, c1 @ cs, eqs2)
+
+let save_var tospill eqs v =
+  if not (VSet.mem v tospill) then
+    (v, [], kill v eqs)
+  else begin
+    let t = new_temp (typeof v) in
+    (t, [Xspill(t, v)], add v t (kill v eqs))
+  end
+
+let rec save_vars tospill eqs vl =
+  match vl with
+  | [] -> ([], [], eqs)
+  | v1 :: vs ->
+      let (t1, c1, eqs1) = save_var tospill eqs v1 in
+      let (ts, cs, eqs2) = save_vars tospill eqs1 vs in
+      (t1 :: ts, c1 @ cs, eqs2)
+
+(* Trimming equations when we have too many or when they are too old.
+   The goal is to limit the live range of unspillable temporaries.
+   By setting [max_age] to zero, we can effectively deactivate
+   the reuse strategy and fall back to a naive "reload at every use"
+   strategy. *)
+
+let max_age = ref 3
+let max_num_eqs = ref 3
+
+let rec trim count eqs =
+  if count <= 0 then [] else
+    match eqs with
+    | [] -> []
+    | (v, t, date) :: eqs' -> 
+        if date <= !max_age
+        then (v, t, date + 1) :: trim (count - 1) eqs'
+        else []
+
+(* Insertion of spill and reload instructions. *)
+
+let spill_instr tospill eqs instr =
+  let eqs = trim !max_num_eqs eqs in
+  match instr with
+  | Xmove(src, dst) ->
+      if VSet.mem src tospill && VSet.mem dst tospill then begin
+        let (src', c1, eqs1) = reload_var tospill eqs src in
+        (c1 @ [Xspill(src', dst)], add dst src' (kill dst eqs1))
+      end else begin
+        ([instr], kill dst eqs)
+      end
+  | Xreload(src, dst) ->
+      assert false
+  | Xspill(src, dst) ->
+      assert false
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      ([instr], List.fold_right kill dsts eqs)
+  | Xop(op, args, res) ->
+      begin match is_two_address op args with
+      | None ->
+          let (args', c1, eqs1) = reload_vars tospill eqs args in
+          let (res', c2, eqs2) = save_var tospill eqs1 res in
+          (c1 @ Xop(op, args', res') :: c2, eqs2)
+      | Some(arg1, argl) ->
+          begin match VSet.mem res tospill, VSet.mem arg1 tospill with
+          | false, false ->
+              let (argl', c1, eqs1) = reload_vars tospill eqs argl in
+              (c1 @ [Xop(op, arg1 :: argl', res)], kill res eqs1)
+          | true, false ->
+              let tmp = new_temp (typeof res) in
+              let (argl', c1, eqs1) = reload_vars tospill eqs argl in
+              (c1 @ [Xmove(arg1, tmp); Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], 
+               add res tmp (kill res eqs1))
+          | false, true ->
+              let eqs1 = add arg1 res (kill res eqs) in
+              let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in
+              (Xreload(arg1, res) :: c1 @ [Xop(op, res :: argl', res)],
+               kill res eqs2)
+          | true, true ->
+              let tmp = new_temp (typeof res) in              
+              let eqs1 = add arg1 tmp eqs in
+              let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in
+              (Xreload(arg1, tmp) :: c1 @ [Xop(op, tmp :: argl', tmp); Xspill(tmp, res)],
+               add res tmp (kill tmp (kill res eqs2)))
+          end
+      end          
+  | Xload(chunk, addr, args, dst) ->
+      let (args', c1, eqs1) = reload_vars tospill eqs args in
+      let (dst', c2, eqs2) = save_var tospill eqs1 dst in
+      (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2)
+  | Xstore(chunk, addr, args, src) ->
+      let (args', c1, eqs1) = reload_vars tospill eqs args in
+      let (src', c2, eqs2) = reload_var tospill eqs1 src in
+      (c1 @ c2 @ [Xstore(chunk, addr, args', src')], eqs2)
+  | Xcall(sg, Coq_inl v, args, res) ->
+      let (v', c1, eqs1) = reload_var tospill eqs v in
+      (c1 @ [Xcall(sg, Coq_inl v', args, res)], [])
+  | Xcall(sg, Coq_inr id, args, res) ->
+      ([instr], [])
+  | Xtailcall(sg, Coq_inl v, args) ->
+      let (v', c1, eqs1) = reload_var tospill eqs v in
+      (c1 @ [Xtailcall(sg, Coq_inl v', args)], [])
+  | Xtailcall(sg, Coq_inr id, args) ->
+      ([instr], [])
+  | Xbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_annot _ ->
+          ([instr], eqs)
+      | _ ->
+          let (args', c1, eqs1) = reload_vars tospill eqs args in
+          let (res', c2, eqs2) = save_vars tospill eqs1 res in
+          (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
+      end
+  | Xbranch s ->
+      ([instr], eqs)
+  | Xcond(cond, args, s1, s2) ->
+     let (args', c1, eqs1) = reload_vars tospill eqs args in
+     (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
+  | Xjumptable(arg, tbl) ->
+      let (arg', c1, eqs1) = reload_var tospill eqs arg in
+      (c1 @ [Xjumptable(arg', tbl)], eqs1)
+  | Xreturn optarg ->
+      ([instr], [])
+
+let rec spill_block tospill pc blk eqs =
+  match blk with
+  | [] -> ([], eqs)
+  | instr :: blk' ->
+      let (c1, eqs1) = spill_instr tospill eqs instr in
+      let (c2, eqs2) = spill_block tospill pc blk' eqs1 in
+      (c1 @ c2, eqs2)
+
+(*
+let spill_block tospill pc blk eqs =
+  printf "@[<hov 2>spill_block: at %ld: " (camlint_of_positive pc);
+  List.iter (fun (x,y,d) -> printf "@ %a=%a" PrintXTL.var x PrintXTL.var y) eqs;
+  printf "@]@.";
+  spill_block tospill pc blk eqs
+*)
+
+let spill_function f tospill round =
+  max_num_eqs := 3;
+  max_age := (if round <= 10 then 3 else if round <= 20 then 1 else 0);
+  transform_basic_blocks (spill_block tospill) [] f
+
+
+(***************** Generation of LTL from XTL ***********************)
+
+(** Apply a register allocation to an XTL function, producing an LTL function.
+  Raise [Bad_LTL] if some pseudoregisters were mapped to stack locations
+  while machine registers were expected, or in other words if spilling
+  and reloading code must be inserted. *)
+
+exception Bad_LTL
+
+let mreg_of alloc v = match alloc v with R mr -> mr | S _ -> raise Bad_LTL
+
+let mregs_of alloc vl = List.map (mreg_of alloc) vl
+
+let mros_of alloc vos = sum_left_map (mreg_of alloc) vos
+
+let make_move src dst k =
+  match src, dst with
+  | R rsrc, R rdst ->
+      if rsrc = rdst then k else LTL.Lop(Omove, [rsrc], rdst) :: k
+  | R rsrc, S(sl, ofs, ty) ->
+      LTL.Lsetstack(rsrc, sl, ofs, ty) :: k
+  | S(sl, ofs, ty), R rdst ->
+      LTL.Lgetstack(sl, ofs, ty, rdst) :: k
+  | S _, S _ ->
+      if src = dst then k else raise Bad_LTL
+
+type parmove_status = To_move | Being_moved | Moved
+
+let make_parmove srcs dsts itmp ftmp k =
+  let src = Array.of_list srcs
+  and dst = Array.of_list dsts in
+  let n = Array.length src in
+  assert (Array.length dst = n);
+  let status = Array.make n To_move in
+  let temp_for =
+    function Tint -> itmp | Tfloat -> ftmp | Tlong -> assert false in
+  let code = ref [] in
+  let add_move s d =
+    match s, d with
+    | R rs, R rd ->
+        code := LTL.Lop(Omove, [rs], rd) :: !code
+    | R rs, S(sl, ofs, ty) ->
+        code := LTL.Lsetstack(rs, sl, ofs, ty) :: !code
+    | S(sl, ofs, ty), R rd ->
+        code := LTL.Lgetstack(sl, ofs, ty, rd) :: !code
+    | S(sls, ofss, tys), S(sld, ofsd, tyd) ->
+        let tmp = temp_for tys in
+        (* code will be reversed at the end *)
+        code := LTL.Lsetstack(tmp, sld, ofsd, tyd) ::
+                LTL.Lgetstack(sls, ofss, tys, tmp) :: !code
+    in
+  let rec move_one i =
+    if src.(i) <> dst.(i) then begin
+      status.(i) <- Being_moved;
+      for j = 0 to n - 1 do
+        if src.(j) = dst.(i) then
+          match status.(j) with
+          | To_move ->
+              move_one j
+          | Being_moved ->
+              let tmp = R (temp_for (Loc.coq_type src.(j))) in
+              add_move src.(j) tmp;
+              src.(j) <- tmp
+          | Moved ->
+              ()
+      done;
+      add_move src.(i) dst.(i);
+      status.(i) <- Moved
+    end in
+  for i = 0 to n - 1 do
+    if status.(i) = To_move then move_one i
+  done;
+  List.rev_append !code k
+
+let transl_instr alloc instr k =
+  match instr with
+  | Xmove(src, dst) | Xreload(src, dst) | Xspill(src, dst) ->
+      make_move (alloc src) (alloc dst) k
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      make_parmove (List.map alloc srcs) (List.map alloc dsts)
+                   (mreg_of alloc itmp) (mreg_of alloc ftmp) k
+  | Xop(op, args, res) ->
+      let rargs = mregs_of alloc args
+      and rres  = mreg_of alloc res in
+      begin match is_two_address op rargs with
+      | None ->
+          LTL.Lop(op, rargs, rres) :: k
+      | Some(rarg1, rargl) ->
+          if rarg1 = rres then
+            LTL.Lop(op, rargs, rres) :: k
+          else
+            LTL.Lop(Omove, [rarg1], rres) :: 
+            LTL.Lop(op, rres :: rargl, rres) :: k
+      end
+  | Xload(chunk, addr, args, dst) ->
+      LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
+  | Xstore(chunk, addr, args, src) ->
+      LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k
+  | Xcall(sg, vos, args, res) ->
+      LTL.Lcall(sg, mros_of alloc vos) :: k
+  | Xtailcall(sg, vos, args) ->
+      LTL.Ltailcall(sg, mros_of alloc vos) :: []
+  | Xbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_annot _ ->
+          LTL.Lannot(ef, List.map alloc args) :: k
+      | _ ->
+          LTL.Lbuiltin(ef, mregs_of alloc args, mregs_of alloc res) :: k
+      end
+  | Xbranch s ->
+      LTL.Lbranch s :: []
+  | Xcond(cond, args, s1, s2) ->
+      LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
+  | Xjumptable(arg, tbl) ->
+      LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
+  | Xreturn optarg ->
+      LTL.Lreturn :: []
+
+let rec transl_block alloc blk =
+  match blk with
+  | [] -> []
+  | instr :: blk' -> transl_instr alloc instr (transl_block alloc blk')
+
+let transl_function fn alloc =
+  { LTL.fn_sig = fn.fn_sig;
+    LTL.fn_stacksize = fn.fn_stacksize;
+    LTL.fn_entrypoint = fn.fn_entrypoint;
+    LTL.fn_code = PTree.map1 (transl_block alloc) fn.fn_code 
+  }
+
+
+(******************* All together *********************)
+
+exception Timeout
+
+let rec first_round f liveness =
+  let alloc = find_coloring f liveness in
+  if !option_dalloctrace then begin
+    fprintf !pp "-------------- After initial register allocation@ @.";
+    PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f
+  end;
+  let ts = tospill_function f alloc in
+  if VSet.is_empty ts then success f alloc else more_rounds f ts 1
+
+and more_rounds f ts count =
+  if count >= 40 then raise Timeout;
+  let f' = spill_function f ts count in
+  let liveness = liveness_analysis f' in
+  let alloc = find_coloring f' liveness in
+  if !option_dalloctrace then begin
+    fprintf !pp "-------------- After register allocation (round %d)@ @." count;
+    PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f'
+  end;
+  let ts' = tospill_function f' alloc in
+  if VSet.is_empty ts'
+  then success f' alloc
+  else begin
+    if !option_dalloctrace then begin
+      fprintf !pp "--- Remain to be spilled:@ @.";
+      VSet.iter (fun v -> fprintf !pp "%a " PrintXTL.var v) ts';
+      fprintf !pp "@ @."
+    end;      
+    more_rounds f (VSet.union ts ts') (count + 1)
+  end
+
+and success f alloc =
+  let f' = transl_function f alloc in
+  if !option_dalloctrace then begin
+    fprintf !pp "-------------- Candidate allocation@ @.";
+    PrintLTL.print_function !pp P.one f'
+  end;
+  f'
+
+open Errors
+
+let regalloc f =
+  init_trace();
+  reset_temps();
+  let f1 = Splitting.rename_function f in
+  match RTLtyping.type_function f1 with
+  | Error msg ->
+      Error(MSG (coqstring_of_camlstring "RTL code after splitting is ill-typed:") :: msg)
+  | OK tyenv ->
+      let f2 = function_of_RTL_function f1 tyenv in
+      let liveness = liveness_analysis f2 in
+      let f3 = dead_code_elimination f2 liveness in
+      if !option_dalloctrace then begin
+        fprintf !pp "-------------- Initial XTL@ @.";
+        PrintXTL.print_function !pp f3
+      end;
+      try
+        OK(first_round f3 liveness)
+      with 
+      | Timeout ->
+          Error(msg (coqstring_of_camlstring "Spilling fails to converge"))
+      | Type_error_at pc ->
+          Error [MSG(coqstring_of_camlstring "Ill-typed XTL code at PC "); 
+                 POS pc]
+      | Bad_LTL ->
+          Error(msg (coqstring_of_camlstring "Bad LTL after spilling"))
diff --git a/backend/Reload.v b/backend/Reload.v
deleted file mode 100644
index be844b3..0000000
--- a/backend/Reload.v
+++ /dev/null
@@ -1,274 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Reloading, spilling, and explication of calling conventions. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Op.
-Require Import Locations.
-Require Import LTLin.
-Require Import Conventions.
-Require Import Parallelmove.
-Require Import Linear.
-
-Open Local Scope error_monad_scope.
-
-(** * Spilling and reloading *)
-
-(** Operations in the Linear language, like those of the target processor,
-  operate only over machine registers, but not over stack slots.
-  Consider the LTLin instruction
-<<
-        r1 <- Lop(Oadd, r1 :: r2 :: nil)
->>
-  and assume that [r1] and [r2] are assigned to stack locations [S s1]
-  and [S s2], respectively.  The translated LTL code must load these
-  stack locations into temporary integer registers (this is called
-  ``reloading''), perform the [Oadd] operation over these temporaries,
-  leave the result in a temporary, then store the temporary back to
-  stack location [S s1] (this is called ``spilling'').  In other term,
-  the generated Linear code has the following shape:
-<<
-        IT1 <- Lgetstack s1;
-        IT2 <- Lgetstack s2;
-        IT1 <- Lop(Oadd, IT1 :: IT2 :: nil);
-        Lsetstack s1 IT1;
->>
-  This section provides functions that assist in choosing appropriate
-  temporaries and inserting the required spilling and reloading
-  operations. *)
-
-(** ** Allocation of temporary registers for reloading and spilling. *)
-
-(** [reg_for l] returns a machine register appropriate for working
-    over the location [l]: either the machine register [m] if [l = R m],
-    or a temporary register of [l]'s type if [l] is a stack slot. *)
-
-Definition reg_for (l: loc) : mreg :=
-  match l with
-  | R r => r
-  | S s => match slot_type s with Tint => IT1 | Tfloat => FT1 end
-  end.
-
-(** [regs_for ll] is similar, for a list of locations [ll].
-    We ensure that distinct temporaries are used for
-    different elements of [ll].
-    The result is correct only if [enough_temporaries ll = true]
-    (see below). *)
-
-Fixpoint regs_for_rec (locs: list loc) (itmps ftmps: list mreg)
-                      {struct locs} : list mreg :=
-  match locs with
-  | nil => nil
-  | R r :: ls => r :: regs_for_rec ls itmps ftmps
-  | S s :: ls =>
-      match slot_type s with
-      | Tint =>
-          match itmps with
-          | nil => nil
-          | it1 :: its => it1 :: regs_for_rec ls its ftmps
-          end
-      | Tfloat =>
-          match ftmps with
-          | nil => nil
-          | ft1 :: fts => ft1 :: regs_for_rec ls itmps fts
-          end
-      end
-  end.
-
-Definition regs_for (locs: list loc) :=
-  regs_for_rec locs int_temporaries float_temporaries.
-
-(** Detect the situations where not enough temporaries are available
-    for reloading. *)
-
-Fixpoint enough_temporaries_rec (locs: list loc) (itmps ftmps: list mreg)
-                                {struct locs} : bool :=
-  match locs with
-  | nil => true
-  | R r :: ls => enough_temporaries_rec ls itmps ftmps
-  | S s :: ls =>
-      match slot_type s with
-      | Tint =>
-          match itmps with
-          | nil => false
-          | it1 :: its => enough_temporaries_rec ls its ftmps
-          end
-      | Tfloat =>
-          match ftmps with
-          | nil => false
-          | ft1 :: fts => enough_temporaries_rec ls itmps fts
-          end
-      end
-  end.
-
-Definition enough_temporaries (locs: list loc) :=
-  enough_temporaries_rec locs int_temporaries float_temporaries.
-
-(** ** Insertion of Linear reloads, stores and moves *)
-
-(** [add_spill src dst k] prepends to [k] the instructions needed
-  to assign location [dst] the value of machine register [mreg]. *)
-
-Definition add_spill (src: mreg) (dst: loc) (k: code) :=
-  match dst with
-  | R rd => if mreg_eq src rd then k else Lop Omove (src :: nil) rd :: k
-  | S sl => Lsetstack src sl :: k
-  end.
-
-(** [add_reload src dst k] prepends to [k] the instructions needed
-  to assign machine register [mreg] the value of the location [src]. *)
-
-Definition add_reload (src: loc) (dst: mreg) (k: code) :=
-  match src with
-  | R rs => if mreg_eq rs dst then k else Lop Omove (rs :: nil) dst :: k
-  | S sl => Lgetstack sl dst :: k
-  end.
-
-(** [add_reloads] is similar for a list of locations (as sources)
-  and a list of machine registers (as destinations).  *)
-
-Fixpoint add_reloads (srcs: list loc) (dsts: list mreg) (k: code)
-                                            {struct srcs} : code :=
-  match srcs, dsts with
-  | s1 :: sl, t1 :: tl => add_reload s1 t1 (add_reloads sl tl k)
-  | _, _ => k
-  end.
-
-(** [add_move src dst k] prepends to [k] the instructions that copy
-  the value of location [src] into location [dst]. *)
-
-Definition add_move (src dst: loc) (k: code) :=
-  if Loc.eq src dst then k else
-    match src, dst with
-    | R rs, _ =>
-        add_spill rs dst k
-    | _, R rd =>
-        add_reload src rd k
-    | S ss, S sd =>
-        let tmp :=
-          match slot_type ss with Tint => IT1 | Tfloat => FT1 end in
-        add_reload src tmp (add_spill tmp dst k)
-    end.
-
-(** [parallel_move srcs dsts k] is similar, but for a list of
-  source locations and a list of destination locations of the same
-  length.  This is a parallel move, meaning that some of the 
-  destinations can also occur as sources. *)
-
-Definition parallel_move (srcs dsts: list loc) (k: code) : code :=
-  List.fold_right
-    (fun p k => add_move (fst p) (snd p) k)
-     k (parmove srcs dsts).
-
-(** * Code transformation *)
-
-(** We insert appropriate reload, spill and parallel move operations
-  around each of the instructions of the source code. *)
-
-Definition transf_instr 
-     (f: LTLin.function) (instr: LTLin.instruction) (k: code) : code :=
-  match instr with
-  | LTLin.Lop op args res =>
-      match is_move_operation op args with
-      | Some src =>
-          add_move src res k
-      | None =>
-          let rargs := regs_for args in
-          let rres := reg_for res in
-          add_reloads args rargs (Lop op rargs rres :: add_spill rres res k)
-      end
-  | LTLin.Lload chunk addr args dst =>
-      let rargs := regs_for args in
-      let rdst := reg_for dst in
-      add_reloads args rargs
-        (Lload chunk addr rargs rdst :: add_spill rdst dst k)
-  | LTLin.Lstore chunk addr args src =>
-      if enough_temporaries (src :: args) then
-        match regs_for (src :: args) with
-        | nil => k                       (* never happens *)
-        | rsrc :: rargs =>
-            add_reloads (src :: args) (rsrc :: rargs)
-              (Lstore chunk addr rargs rsrc :: k)
-        end
-      else
-        let rargs := regs_for args in
-        let rsrc := reg_for src in
-        add_reloads args rargs
-          (Lop (op_for_binary_addressing addr) rargs IT2 ::
-           add_reload src rsrc
-             (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) rsrc :: k))
-  | LTLin.Lcall sig los args res =>
-      let largs := loc_arguments sig in
-      let rres  := loc_result sig in
-      match los with
-      | inl fn =>
-          parallel_move args largs
-            (add_reload fn (reg_for fn)
-              (Lcall sig (inl _ (reg_for fn)) :: add_spill rres res k))
-      | inr id =>
-          parallel_move args largs
-            (Lcall sig (inr _ id) :: add_spill rres res k)
-      end
-  | LTLin.Ltailcall sig los args =>
-      let largs := loc_arguments sig in
-      match los with
-      | inl fn =>
-          parallel_move args largs
-            (add_reload fn IT1
-              (Ltailcall sig (inl _ IT1) :: k))
-      | inr id =>
-          parallel_move args largs
-            (Ltailcall sig (inr _ id) :: k)
-      end
-  | LTLin.Lbuiltin ef args dst =>
-      if ef_reloads ef then
-       (let rargs := regs_for args in
-        let rdst := reg_for dst in
-        add_reloads args rargs
-          (Lbuiltin ef rargs rdst :: add_spill rdst dst k))
-      else
-        Lannot ef args :: k
-  | LTLin.Llabel lbl =>
-      Llabel lbl :: k
-  | LTLin.Lgoto lbl =>
-      Lgoto lbl :: k
-  | LTLin.Lcond cond args lbl =>
-      let rargs := regs_for args in
-      add_reloads args rargs (Lcond cond rargs lbl :: k)
-  | LTLin.Ljumptable arg tbl =>
-      let rarg := reg_for arg in
-      add_reload arg rarg (Ljumptable rarg tbl :: k)
-  | LTLin.Lreturn None =>
-      Lreturn :: k
-  | LTLin.Lreturn (Some loc) =>
-      add_reload loc (loc_result (LTLin.fn_sig f)) (Lreturn :: k)
-  end.
-
-Definition transf_code (f: LTLin.function) (c: LTLin.code) : code :=
-  list_fold_right (transf_instr f) c nil.
-
-Definition transf_function (f: LTLin.function) : function :=
-  mkfunction
-    (LTLin.fn_sig f)
-    (LTLin.fn_stacksize f)
-    (parallel_move (loc_parameters (LTLin.fn_sig f)) (LTLin.fn_params f)
-       (transf_code f (LTLin.fn_code f))).
-
-Definition transf_fundef (fd: LTLin.fundef) : Linear.fundef :=
-  transf_fundef transf_function fd.
-
-Definition transf_program (p: LTLin.program) : Linear.program :=
-  transform_program transf_fundef p.
-
diff --git a/backend/Reloadproof.v b/backend/Reloadproof.v
deleted file mode 100644
index fe6e475..0000000
--- a/backend/Reloadproof.v
+++ /dev/null
@@ -1,1487 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for the [Reload] pass. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Values.
-Require Import Memory.
-Require Import Events.
-Require Import Globalenvs.
-Require Import Smallstep.
-Require Import Op.
-Require Import Locations.
-Require Import Conventions.
-Require Import RTLtyping.
-Require Import LTLin.
-Require Import LTLintyping.
-Require Import Linear.
-Require Import Parallelmove.
-Require Import Reload.
-
-(** * Exploitation of the typing hypothesis *)
-
-Remark arity_ok_rec_incr_1:
-  forall tys it itmps ftmps,
-  arity_ok_rec tys itmps ftmps = true ->
-  arity_ok_rec tys (it :: itmps) ftmps = true.
-Proof.
-  induction tys; intros until ftmps; simpl.
-  tauto.
-  destruct a. 
-  destruct itmps. congruence. auto.
-  destruct ftmps. congruence. auto.
-Qed.
-
-Remark arity_ok_rec_incr_2:
-  forall tys ft itmps ftmps,
-  arity_ok_rec tys itmps ftmps = true ->
-  arity_ok_rec tys itmps (ft :: ftmps) = true.
-Proof.
-  induction tys; intros until ftmps; simpl.
-  tauto.
-  destruct a. 
-  destruct itmps. congruence. auto.
-  destruct ftmps. congruence. auto.
-Qed.
-
-Remark arity_ok_rec_decr:
-  forall tys ty itmps ftmps,
-  arity_ok_rec (ty :: tys) itmps ftmps = true ->
-  arity_ok_rec tys itmps ftmps = true.
-Proof.
-  intros until ftmps. simpl. destruct ty. 
-  destruct itmps. congruence. intros. apply arity_ok_rec_incr_1; auto.
-  destruct ftmps. congruence. intros. apply arity_ok_rec_incr_2; auto.
-Qed.
-
-Lemma arity_ok_enough_rec:
-  forall locs itmps ftmps,
-  arity_ok_rec (List.map Loc.type locs) itmps ftmps = true ->
-  enough_temporaries_rec locs itmps ftmps = true.
-Proof.
-  induction locs; intros until ftmps.
-  simpl. auto.
-  simpl enough_temporaries_rec. simpl map. 
-  destruct a. intros. apply IHlocs. eapply arity_ok_rec_decr; eauto. 
-  simpl. destruct (slot_type s). 
-  destruct itmps; auto.
-  destruct ftmps; auto.
-Qed.
-
-Lemma arity_ok_enough:
-  forall locs,
-  arity_ok (List.map Loc.type locs) = true ->
-  enough_temporaries locs = true.
-Proof.
-  unfold arity_ok, enough_temporaries. intros. 
-  apply arity_ok_enough_rec; auto.
-Qed.
-
-Lemma enough_temporaries_op_args:
-  forall (op: operation) (args: list loc) (res: loc),
-  (List.map Loc.type args, Loc.type res) = type_of_operation op ->
-  enough_temporaries args = true.
-Proof.
-  intros. apply arity_ok_enough.
-  replace (map Loc.type args) with (fst (type_of_operation op)).
-  destruct op; try (destruct c); try (destruct a); compute; reflexivity.
-  rewrite <- H. auto.
-Qed.
-
-Lemma enough_temporaries_addr:
-  forall (addr: addressing) (args: list loc),
-  List.map Loc.type args = type_of_addressing addr ->
-  enough_temporaries args = true.
-Proof.
-  intros. apply arity_ok_enough. rewrite H. 
-  destruct addr; compute; reflexivity.
-Qed.
-
-Lemma enough_temporaries_cond:
-  forall (cond: condition) (args: list loc),
-  List.map Loc.type args = type_of_condition cond ->
-  enough_temporaries args = true.
-Proof.
-  intros. apply arity_ok_enough. rewrite H.
-  destruct cond; compute; reflexivity.
-Qed.
-
-Lemma arity_ok_rec_length:
-  forall tys itmps ftmps,
-  (length tys <= length itmps)%nat ->
-  (length tys <= length ftmps)%nat ->
-  arity_ok_rec tys itmps ftmps = true.
-Proof.
-  induction tys; intros until ftmps; simpl.
-  auto.
-  intros. destruct a.
-  destruct itmps; simpl in H. omegaContradiction. apply IHtys; omega.
-  destruct ftmps; simpl in H0. omegaContradiction. apply IHtys; omega.
-Qed.
-
-Lemma enough_temporaries_length:
-  forall args,
-  (length args <= 2)%nat -> enough_temporaries args = true.
-Proof.
-  intros. apply arity_ok_enough. unfold arity_ok.
-  apply arity_ok_rec_length. 
-  rewrite list_length_map. simpl. omega.
-  rewrite list_length_map. simpl. omega.
-Qed.
-
-Lemma not_enough_temporaries_length:
-  forall src args,
-  enough_temporaries (src :: args) = false ->
-  (length args >= 2)%nat.
-Proof.
-  intros.
-  assert (length (src :: args) <= 2 \/ length (src :: args) > 2)%nat by omega.
-  destruct H0.
-  exploit enough_temporaries_length. eauto. congruence. 
-  simpl in H0. omega.
-Qed.
-
-Lemma not_enough_temporaries_addr:
-  forall (ge: genv) sp addr src args ls v m,
-  enough_temporaries (src :: args) = false ->
-  eval_addressing ge sp addr (List.map ls args) = Some v ->
-  eval_operation ge sp (op_for_binary_addressing addr) (List.map ls args) m = Some v.
-Proof.
-  intros.
-  apply eval_op_for_binary_addressing; auto.
-  rewrite list_length_map. eapply not_enough_temporaries_length; eauto.
-Qed.
-
-(** Some additional properties of [reg_for] and [regs_for]. *)
-
-Lemma regs_for_cons:
-  forall src args,
-  exists rsrc, exists rargs, regs_for (src :: args) = rsrc :: rargs.
-Proof.
-  intros. unfold regs_for. simpl. 
-  destruct src. econstructor; econstructor; reflexivity. 
-  destruct (slot_type s); econstructor; econstructor; reflexivity.
-Qed.
-
-Lemma reg_for_not_IT2:
-  forall l, loc_acceptable l -> reg_for l <> IT2.
-Proof.
-  intros. destruct l; simpl. 
-  red; intros; subst m. simpl in H. intuition congruence.
-  destruct (slot_type s); congruence.
-Qed.
-
-(** * Correctness of the Linear constructors *)
-
-(** This section proves theorems that establish the correctness of the
-  Linear constructor functions such as [add_move].  The theorems are of
-  the general form ``the generated Linear instructions execute and
-  modify the location set in the expected way: the result location(s)
-  contain the expected values; other, non-temporary locations keep
-  their values''. *)
-
-Section LINEAR_CONSTRUCTORS.
-
-Variable ge: genv.
-Variable stk: list stackframe.
-Variable f: function.
-Variable sp: val.
-
-Lemma reg_for_spec:
-  forall l,
-  R(reg_for l) = l \/ In (R (reg_for l)) temporaries.
-Proof.
-  intros. unfold reg_for. destruct l. tauto.
-  case (slot_type s); simpl; tauto.
-Qed.
-
-Lemma reg_for_diff:
-  forall l l',
-  Loc.diff l l' -> Loc.notin l' temporaries ->
-  Loc.diff (R (reg_for l)) l'.
-Proof.
-  intros. destruct (reg_for_spec l). 
-  rewrite H1; auto.
-  apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto.
-Qed.
-
-Lemma add_reload_correct:
-  forall src dst k rs m,
-  exists rs',
-  star step ge (State stk f sp (add_reload src dst k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  rs' (R dst) = rs src /\
-  forall l, 
-    Loc.diff (R dst) l -> 
-    loc_acceptable src \/ Loc.diff (R IT1) l ->
-    Loc.notin l destroyed_at_move ->
-    rs' l = rs l.
-Proof.
-  intros. unfold add_reload. destruct src.
-  destruct (mreg_eq m0 dst).
-  subst dst. exists rs. split. apply star_refl. tauto.
-  econstructor.
-  split. apply star_one; apply exec_Lop. simpl; reflexivity.
-  unfold undef_op. split. apply Locmap.gss. 
-  intros. rewrite Locmap.gso; auto; apply Locmap.guo; auto.
-  econstructor.
-  split. apply star_one; apply exec_Lgetstack. 
-  split. apply Locmap.gss.
-  intros. rewrite Locmap.gso; auto. 
-  destruct s; unfold undef_getstack; unfold loc_acceptable in H0; auto.
-  apply Locmap.gso. tauto.
-Qed.
-
-Lemma add_reload_correct_2:
-  forall src k rs m,
-  loc_acceptable src ->
-  exists rs',
-  star step ge (State stk f sp (add_reload src (reg_for src) k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  rs' (R (reg_for src)) = rs src /\
-  (forall l, Loc.notin l temporaries -> rs' l = rs l) /\
-  rs' (R IT2) = rs (R IT2).
-Proof.
-  intros. unfold reg_for, add_reload; destruct src.
-  rewrite dec_eq_true. exists rs; split. constructor. auto.
-  set (t := match slot_type s with
-                    | Tint => IT1
-                    | Tfloat => FT1
-                    end).
-  exists (Locmap.set (R t) (rs (S s)) (undef_getstack s rs)).
-  split. apply star_one; apply exec_Lgetstack. 
-  split. apply Locmap.gss.
-  split. intros. rewrite Locmap.gso; auto. 
-  destruct s; unfold undef_getstack; unfold loc_acceptable in H; auto.
-  apply Locmap.gso. tauto.
-  apply Loc.diff_sym. simpl in H0; unfold t; destruct (slot_type s); tauto.
-  rewrite Locmap.gso. unfold undef_getstack. 
-  destruct s; simpl in H; reflexivity || contradiction. 
-  unfold t; destruct (slot_type s); red; congruence.
-Qed.
-
-Lemma add_spill_correct:
-  forall src dst k rs m,
-  exists rs',
-  star step ge (State stk f sp (add_spill src dst k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  rs' dst = rs (R src) /\
-  forall l, Loc.diff dst l -> Loc.notin l destroyed_at_move -> rs' l = rs l.
-Proof.
-  intros. unfold add_spill. destruct dst.
-  destruct (mreg_eq src m0).
-  subst src. exists rs. split. apply star_refl. tauto.
-  econstructor.
-  split. apply star_one. apply exec_Lop. simpl; reflexivity.
-  split. apply Locmap.gss.
-  intros. rewrite Locmap.gso; auto; unfold undef_op; apply Locmap.guo; auto. 
-  econstructor.
-  split. apply star_one. apply exec_Lsetstack. 
-  split. apply Locmap.gss.
-  intros. rewrite Locmap.gso; auto; unfold undef_setstack; apply Locmap.guo; auto.
-Qed.
-
-Remark notin_destroyed_move_1:
-  forall r, ~In r destroyed_at_move_regs -> Loc.notin (R r) destroyed_at_move.
-Proof.
-  intros. simpl in *. intuition congruence.
-Qed.
-
-Remark notin_destroyed_move_2:
-  forall s, Loc.notin (S s) destroyed_at_move.
-Proof.
-  intros. simpl in *. destruct s; auto.
-Qed.
-
-Lemma add_reloads_correct_rec:
-  forall srcs itmps ftmps k rs m,
-  locs_acceptable srcs ->
-  enough_temporaries_rec srcs itmps ftmps = true ->
-  (forall r, In (R r) srcs -> In r itmps -> False) ->
-  (forall r, In (R r) srcs -> In r ftmps -> False) ->
-  (forall r, In (R r) srcs -> ~In r destroyed_at_move_regs) ->
-  list_disjoint itmps ftmps ->
-  list_norepet itmps ->
-  list_norepet ftmps ->
-  list_disjoint itmps destroyed_at_move_regs ->
-  list_disjoint ftmps destroyed_at_move_regs ->
-  exists rs',
-  star step ge 
-      (State stk f sp (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m)
-   E0 (State stk f sp k rs' m) /\
-  reglist rs' (regs_for_rec srcs itmps ftmps) = map rs srcs /\
-  (forall r, ~(In r itmps) -> ~(In r ftmps) -> ~(In r destroyed_at_move_regs) -> rs' (R r) = rs (R r)) /\
-  (forall s, rs' (S s) = rs (S s)).
-Proof.
-Opaque destroyed_at_move_regs.
-  induction srcs; simpl; intros.
-  (* base case *)
-  exists rs. split. apply star_refl. tauto.
-  (* inductive case *)
-  simpl in H0. 
-  assert (ACC1: loc_acceptable a) by (auto with coqlib).
-  assert (ACC2: locs_acceptable srcs) by (red; auto with coqlib).
-  destruct a as [r | s].
-  (* a is a register *)
-  simpl add_reload. rewrite dec_eq_true. 
-  exploit IHsrcs; eauto.
-  intros [rs' [EX [RES [OTH1 OTH2]]]].
-  exists rs'. split. eauto.
-  split. simpl. decEq.
-    apply OTH1. red; intros; eauto. red; intros; eauto. auto.
-    auto.
-  auto.
-  (* a is a stack slot *)
-  destruct (slot_type s).
-  (* ... of integer type *)
-  destruct itmps as [ | it1 itmps ]. discriminate. inv H5.
-  destruct (add_reload_correct (S s) it1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m)
-  as [rs1 [A [B C]]].
-  exploit IHsrcs; eauto with coqlib.
-  eapply list_disjoint_cons_left; eauto.
-  eapply list_disjoint_cons_left; eauto. 
-  intros [rs' [P [Q [T U]]]]. 
-  exists rs'. split. eapply star_trans; eauto. 
-  split. simpl. decEq. rewrite <- B. apply T.
-    auto.
-    eapply list_disjoint_notin. eexact H4. eauto with coqlib.
-    eapply list_disjoint_notin. eapply H7. auto with coqlib.
-    rewrite Q. apply list_map_exten. intros. symmetry. apply C. 
-    simpl. destruct x; auto. red; intro; subst m0. apply H1 with it1; auto with coqlib.
-    auto.
-    destruct x. apply notin_destroyed_move_1. auto. apply notin_destroyed_move_2.
-  split. simpl. intros. transitivity (rs1 (R r)).
-    apply T; tauto. apply C. simpl. tauto. auto.
-    apply notin_destroyed_move_1; auto.
-  intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto. apply notin_destroyed_move_2.
-  (* ... of float type *)
-  destruct ftmps as [ | ft1 ftmps ]. discriminate. inv H6.
-  destruct (add_reload_correct (S s) ft1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m)
-  as [rs1 [A [B C]]].
-  exploit IHsrcs; eauto with coqlib.
-  eapply list_disjoint_cons_right; eauto.
-  eapply list_disjoint_cons_left; eauto.
-  intros [rs' [P [Q [T U]]]]. 
-  exists rs'. split. eapply star_trans; eauto.
-  split. simpl. decEq. rewrite <- B. apply T. 
-    eapply list_disjoint_notin; eauto. apply list_disjoint_sym. apply H4. auto with coqlib.
-    auto.
-    eapply list_disjoint_notin. eexact H8. auto with coqlib.
-    rewrite Q. apply list_map_exten. intros. symmetry. apply C. 
-    simpl. destruct x; auto. red; intro; subst m0. apply H2 with ft1; auto with coqlib. auto.
-    destruct x. apply notin_destroyed_move_1. auto. apply notin_destroyed_move_2.
-  split. simpl. intros. transitivity (rs1 (R r)).
-    apply T; tauto. apply C. simpl. tauto. auto.  
-    apply notin_destroyed_move_1; auto.
-  intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto. apply notin_destroyed_move_2; auto.
-Qed.
-
-Lemma add_reloads_correct:
-  forall srcs k rs m,
-  enough_temporaries srcs = true ->
-  locs_acceptable srcs ->
-  exists rs',
-  star step ge (State stk f sp (add_reloads srcs (regs_for srcs) k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  reglist rs' (regs_for srcs) = List.map rs srcs /\
-  forall l, Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-Transparent destroyed_at_move_regs.
-  intros.
-  unfold enough_temporaries in H.
-  exploit add_reloads_correct_rec. eauto. eauto. 
-    intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2.  
-    simpl. intuition congruence. 
-    intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2. 
-    simpl. intuition congruence. 
-    intros. generalize (H0 _ H1). unfold loc_acceptable.
-    simpl. intuition congruence. 
-    red; simpl; intros. intuition congruence.
-    unfold int_temporaries. NoRepet.
-    unfold float_temporaries. NoRepet.
-    red; simpl; intros. intuition congruence.
-    red; simpl; intros. intuition congruence.
-  intros [rs' [EX [RES [OTH1 OTH2]]]].
-  exists rs'. split. eexact EX. 
-  split. exact RES.
-  intros. destruct l. generalize (Loc.notin_not_in _ _ H1); simpl; intro.
-  apply OTH1; simpl; intuition congruence.
-  apply OTH2.
-Qed.
-
-Lemma add_move_correct:
-  forall src dst k rs m,
-  exists rs',
-  star step ge (State stk f sp (add_move src dst k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  rs' dst = rs src /\
-  forall l, 
-    Loc.diff l dst -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> Loc.notin l destroyed_at_move ->
-    rs' l = rs l.
-Proof.
-  intros; unfold add_move.
-  destruct (Loc.eq src dst).
-  subst dst. exists rs. split. apply star_refl. tauto.
-  destruct src.
-  (* src is a register *)
-  generalize (add_spill_correct m0 dst k rs m); intros [rs' [EX [RES OTH]]].
-  exists rs'; intuition. apply OTH. apply Loc.diff_sym; auto. auto.
-  destruct dst.
-  (* src is a stack slot, dst a register *)
-  generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]].
-  exists rs'; intuition. apply OTH. apply Loc.diff_sym; auto. right; apply Loc.diff_sym; auto. auto.
-  (* src and dst are stack slots *)
-  set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end).
-  generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m);
-  intros [rs1 [EX1 [RES1 OTH1]]].
-  generalize (add_spill_correct tmp (S s0) k rs1 m);
-  intros [rs2 [EX2 [RES2 OTH2]]].
-  exists rs2. split.
-  eapply star_trans; eauto. traceEq.
-  split. congruence.
-  intros. rewrite OTH2. apply OTH1. 
-  apply Loc.diff_sym. unfold tmp; case (slot_type s); auto.
-  right. apply Loc.diff_sym; auto. auto. 
-  apply Loc.diff_sym; auto. auto.
-Qed.
-
-Lemma effect_move_sequence:
-  forall k moves rs m,
-  let k' := List.fold_right (fun p k => add_move (fst p) (snd p) k) k moves in
-  exists rs',
-  star step ge (State stk f sp k' rs m)
-            E0 (State stk f sp k rs' m) /\
-  effect_seqmove moves rs rs'.
-Proof.
-  induction moves; intros until m; simpl.
-  exists rs; split. constructor. constructor.
-  destruct a as [src dst]; simpl.
-  set (k1 := fold_right
-              (fun (p : loc * loc) (k : code) => add_move (fst p) (snd p) k)
-              k moves) in *.
-  destruct (add_move_correct src dst k1 rs m) as [rs1 [A [B C]]].
-  destruct (IHmoves rs1 m) as [rs' [D E]].
-  exists rs'; split. 
-  eapply star_trans; eauto.
-  econstructor; eauto. red. tauto. 
-Qed.
-
-Lemma parallel_move_correct:
-  forall srcs dsts k rs m,
-  List.length srcs = List.length dsts ->
-  Loc.no_overlap srcs dsts ->
-  Loc.norepet dsts ->
-  Loc.disjoint srcs temporaries ->
-  Loc.disjoint dsts temporaries ->
-  exists rs',
-  star step ge (State stk f sp (parallel_move srcs dsts k) rs m)
-               E0 (State stk f sp k rs' m) /\
-  List.map rs' dsts = List.map rs srcs /\
-  forall l, Loc.notin l dsts -> Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. 
-  generalize (effect_move_sequence k (parmove srcs dsts) rs m).
-  intros [rs' [EXEC EFFECT]].
-  exists rs'. split. exact EXEC. 
-  apply effect_parmove; auto.
-Qed.
-
-Lemma parallel_move_arguments_correct:
-  forall args sg k rs m,
-  List.map Loc.type args = sg.(sig_args) ->
-  locs_acceptable args ->
-  exists rs',
-  star step ge (State stk f sp (parallel_move args (loc_arguments sg) k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  List.map rs' (loc_arguments sg) = List.map rs args /\
-  forall l, Loc.notin l (loc_arguments sg) -> Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. apply parallel_move_correct.
-  transitivity (length sg.(sig_args)).
-  rewrite <- H. symmetry; apply list_length_map. 
-  symmetry. apply loc_arguments_length.
-  apply no_overlap_arguments; auto.
-  apply loc_arguments_norepet.
-  apply locs_acceptable_disj_temporaries; auto.
-  apply loc_arguments_not_temporaries.
-Qed.
-
-Lemma parallel_move_parameters_correct:
-  forall params sg k rs m,
-  List.map Loc.type params = sg.(sig_args) ->
-  locs_acceptable params ->
-  Loc.norepet params ->
-  exists rs',
-  star step ge (State stk f sp (parallel_move (loc_parameters sg) params k) rs m)
-            E0 (State stk f sp k rs' m) /\
-  List.map rs' params = List.map rs (loc_parameters sg) /\
-  forall l, Loc.notin l params -> Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. apply parallel_move_correct.
-  transitivity (length sg.(sig_args)).
-  apply loc_parameters_length.
-  rewrite <- H. apply list_length_map.
-  apply no_overlap_parameters; auto.
-  auto. apply loc_parameters_not_temporaries. 
-  apply locs_acceptable_disj_temporaries; auto.
-Qed.
-
-End LINEAR_CONSTRUCTORS.
-
-(** * Agreement between values of locations *)
-
-(** The predicate [agree] states that two location maps
-  give compatible values to all acceptable locations,
-  that is, non-temporary registers and [Local] stack slots.
-  The notion of compatibility used is the [Val.lessdef] ordering,
-  which enables a [Vundef] value in the original program to be refined
-  into any value in the transformed program.  
-
-  A typical situation where this refinement of values occurs is at
-  function entry point.  In LTLin, all registers except those
-  belonging to the function parameters are set to [Vundef].  In
-  Linear, these registers have whatever value they had in the caller
-  function.  This difference is harmless: if the original LTLin code
-  does not get stuck, we know that it does not use any of these
-  [Vundef] values. *)
-
-Definition agree (rs1 rs2: locset) : Prop :=
-  forall l, loc_acceptable l -> Val.lessdef (rs1 l) (rs2 l).
-
-Lemma agree_loc:
-  forall rs1 rs2 l,
-  agree rs1 rs2 -> loc_acceptable l -> Val.lessdef (rs1 l) (rs2 l).
-Proof.
-  auto.
-Qed.
-
-Lemma agree_locs:
-  forall rs1 rs2 ll,
-  agree rs1 rs2 -> locs_acceptable ll -> 
-  Val.lessdef_list (map rs1 ll) (map rs2 ll).
-Proof.
-  induction ll; simpl; intros.
-  constructor.
-  constructor. apply H. apply H0; auto with coqlib.
-  apply IHll; auto. red; intros. apply H0; auto with coqlib.
-Qed.
-
-Lemma agree_exten:
-  forall rs ls1 ls2,
-  agree rs ls1 ->
-  (forall l, Loc.notin l temporaries -> ls2 l = ls1 l) ->
-  agree rs ls2.
-Proof.
-  intros; red; intros. rewrite H0. auto. 
-  apply temporaries_not_acceptable; auto.
-Qed.
-
-Remark undef_temps_others:
-  forall rs l,
-  Loc.notin l temporaries -> LTL.undef_temps rs l = rs l.
-Proof.
-  intros. apply Locmap.guo; auto. 
-Qed.
-
-Remark undef_op_others:
-  forall op rs l,
-  Loc.notin l temporaries -> undef_op op rs l = rs l.
-Proof.
-  intros. generalize (undef_temps_others rs l H); intro. 
-  unfold undef_op; destruct op; auto; apply Locmap.guo; simpl in *; tauto.
-Qed.
-
-Lemma agree_undef_temps:
-  forall rs1 rs2,
-  agree rs1 rs2 -> agree (LTL.undef_temps rs1) rs2.
-Proof.
-  intros; red; intros. rewrite undef_temps_others; auto. 
-  apply Conventions.temporaries_not_acceptable. auto.
-Qed.
-
-Lemma agree_undef_temps2:
-  forall rs1 rs2,
-  agree rs1 rs2 -> agree (LTL.undef_temps rs1) (LTL.undef_temps rs2).
-Proof.
-  intros. apply agree_exten with rs2. apply agree_undef_temps; auto.
-  intros. apply undef_temps_others; auto.
-Qed.
-
-Lemma agree_set:
-  forall rs1 rs2 rs2' l v,
-  loc_acceptable l ->
-  Val.lessdef v (rs2' l) ->
-  (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') ->
-  agree rs1 rs2 -> agree (Locmap.set l v rs1) rs2'.
-Proof.
-  intros; red; intros.
-  destruct (Loc.eq l l0).
-  subst l0. rewrite Locmap.gss. auto.
-  assert (Loc.diff l l0). eapply loc_acceptable_noteq_diff; eauto.
-  rewrite Locmap.gso; auto. rewrite H1. auto. auto.
-  apply temporaries_not_acceptable; auto. 
-Qed.
-
-Lemma agree_set2:
-  forall rs1 rs2 rs2' l v,
-  loc_acceptable l ->
-  Val.lessdef v (rs2' l) ->
-  (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') ->
-  agree rs1 rs2 -> agree (Locmap.set l v (LTL.undef_temps rs1)) rs2'.
-Proof.
-  intros. eapply agree_set; eauto. apply agree_undef_temps; auto.
-Qed.
-
-Lemma agree_find_funct:
-  forall (ge: Linear.genv) rs ls r f,
-  Genv.find_funct ge (rs r) = Some f ->
-  agree rs ls ->
-  loc_acceptable r ->
-  Genv.find_funct ge (ls r) = Some f.
-Proof.
-  intros. 
-  assert (Val.lessdef (rs r) (ls r)). eapply agree_loc; eauto.
-  exploit Genv.find_funct_inv; eauto. intros [b EQ]. rewrite EQ in H2.
-  inv H2. rewrite <- EQ. auto.
-Qed.
-
-Lemma agree_postcall_1:
-  forall rs ls,
-  agree rs ls ->
-  agree (LTL.postcall_locs rs) ls.
-Proof.
-  intros; red; intros. unfold LTL.postcall_locs.
-  destruct l; auto. 
-  destruct (In_dec Loc.eq (R m) temporaries). constructor.
-  destruct (In_dec Loc.eq (R m) destroyed_at_call). constructor.
-  auto.
-Qed.
-
-Lemma agree_postcall_2:
-  forall rs ls ls',
-  agree (LTL.postcall_locs rs) ls ->
-  (forall l,
-      loc_acceptable l -> ~In l destroyed_at_call -> ~In l temporaries ->
-      ls' l = ls l) ->
-  agree (LTL.postcall_locs rs) ls'.
-Proof.
-  intros; red; intros. generalize (H l H1). unfold LTL.postcall_locs. 
-  destruct l. 
-  destruct (In_dec Loc.eq (R m) temporaries). intro; constructor.
-  destruct (In_dec Loc.eq (R m) destroyed_at_call). intro; constructor.
-  intro. rewrite H0; auto. 
-  intro. rewrite H0; auto.
-  simpl. intuition congruence.
-  simpl. intuition congruence.
-Qed.
-
-Lemma agree_postcall_call:
-  forall rs ls ls' sig,
-  agree rs ls ->
-  (forall l,
-     Loc.notin l (loc_arguments sig) -> Loc.notin l temporaries -> 
-     ls' l = ls l) ->
-  agree (LTL.postcall_locs rs) ls'.
-Proof.
-  intros. apply agree_postcall_2 with ls. apply agree_postcall_1; auto.
-  intros. apply H0.
-  apply arguments_not_preserved; auto.
-  destruct l; simpl. simpl in H2. intuition congruence. 
-  destruct s; tauto.
-  apply temporaries_not_acceptable; auto.
-Qed.
-
-Lemma agree_init_locs:
-  forall ls dsts vl,
-  locs_acceptable dsts ->
-  Loc.norepet dsts ->
-  Val.lessdef_list vl (map ls dsts) ->
-  agree (LTL.init_locs vl dsts) ls.
-Proof.
-  induction dsts; intros; simpl.
-  red; intros. unfold Locmap.init. constructor.
-  simpl in H1. inv H1. inv H0. 
-  apply agree_set with ls. apply H; auto with coqlib. auto. auto.
-  apply IHdsts; auto. red; intros; apply H; auto with coqlib.
-Qed.
-
-Lemma call_regs_parameters:
-  forall ls sig,
-  map (call_regs ls) (loc_parameters sig) = map ls (loc_arguments sig).
-Proof.
-  intros. unfold loc_parameters. rewrite list_map_compose. 
-  apply list_map_exten; intros. 
-  unfold call_regs, parameter_of_argument. 
-  generalize (loc_arguments_acceptable _ _ H). 
-  unfold loc_argument_acceptable. 
-  destruct x.
-  intros. destruct (in_dec Loc.eq (R m) temporaries). contradiction. auto.
-  destruct s; intros; try contradiction. auto.
-Qed. 
-
-Lemma return_regs_preserve:
-  forall ls1 ls2 l,
-  ~ In l temporaries ->
-  ~ In l destroyed_at_call ->
-  return_regs ls1 ls2 l = ls1 l.
-Proof.
-  intros. unfold return_regs. destruct l; auto.
-  destruct (In_dec Loc.eq (R m) temporaries). contradiction.
-  destruct (In_dec Loc.eq (R m) destroyed_at_call). contradiction.
-  auto.
-Qed.
-
-Lemma return_regs_arguments:
-  forall ls1 ls2 sig,
-  tailcall_possible sig ->
-  map (return_regs ls1 ls2) (loc_arguments sig) = map ls2 (loc_arguments sig).
-Proof.
-  intros. apply list_map_exten; intros. 
-  unfold return_regs. generalize (H x H0). destruct x; intros.
-  destruct (In_dec Loc.eq (R m) temporaries). auto.
-  destruct (In_dec Loc.eq (R m) destroyed_at_call). auto.
-  elim n0. eapply arguments_caller_save; eauto.
-  contradiction.
-Qed.
-
-Lemma return_regs_result:
-  forall ls1 ls2 sig,
-  return_regs ls1 ls2 (R (loc_result sig)) = ls2 (R (loc_result sig)).
-Proof.
-  intros. unfold return_regs. 
-  destruct (In_dec Loc.eq (R (loc_result sig)) temporaries). auto.
-  destruct (In_dec Loc.eq (R (loc_result sig)) destroyed_at_call). auto.
-  generalize (loc_result_caller_save sig). tauto.
-Qed.
-
-(** * Preservation of labels and gotos *)
-
-Lemma find_label_add_spill:
-  forall lbl src dst k,
-  find_label lbl (add_spill src dst k) = find_label lbl k.
-Proof.
-  intros. destruct dst; simpl; auto.
-  destruct (mreg_eq src m); auto.
-Qed.
-
-Lemma find_label_add_reload:
-  forall lbl src dst k,
-  find_label lbl (add_reload src dst k) = find_label lbl k.
-Proof.
-  intros. destruct src; simpl; auto.
-  destruct (mreg_eq m dst); auto.
-Qed.
-
-Lemma find_label_add_reloads:
-  forall lbl srcs dsts k,
-  find_label lbl (add_reloads srcs dsts k) = find_label lbl k.
-Proof.
-  induction srcs; intros; simpl. auto.
-  destruct dsts; auto. rewrite find_label_add_reload. auto.
-Qed.
-
-Lemma find_label_add_move:
-  forall lbl src dst k,
-  find_label lbl (add_move src dst k) = find_label lbl k.
-Proof.
-  intros; unfold add_move.
-  destruct (Loc.eq src dst); auto.
-  destruct src. apply find_label_add_spill.
-  destruct dst. apply find_label_add_reload.
-  rewrite find_label_add_reload. apply find_label_add_spill.
-Qed.
-
-Lemma find_label_parallel_move:
-  forall lbl srcs dsts k,
-  find_label lbl (parallel_move srcs dsts k) = find_label lbl k.
-Proof.
-  intros. unfold parallel_move. generalize (parmove srcs dsts). 
-  induction m; simpl. auto.
-  rewrite find_label_add_move. auto.
-Qed.
-
-Hint Rewrite find_label_add_spill find_label_add_reload
-             find_label_add_reloads find_label_add_move
-             find_label_parallel_move: labels.
-
-Opaque reg_for.
-
-Ltac FL := simpl; autorewrite with labels; auto.
-
-Lemma find_label_transf_instr:
-  forall lbl sg instr k,
-  find_label lbl (transf_instr sg instr k) =
-  if LTLin.is_label lbl instr then Some k else find_label lbl k.
-Proof.
-  intros. destruct instr; FL.
-  destruct (is_move_operation o l); FL; FL.
-  FL.
-  destruct (enough_temporaries (l0 :: l)).
-    destruct (regs_for (l0 :: l)); FL.
-    FL. FL. 
-  destruct s0; FL; FL; FL.
-  destruct s0; FL; FL; FL.
-  destruct (ef_reloads e). FL. FL. FL.
-  destruct o; FL.
-Qed.
-
-Lemma transf_code_cons:
-  forall f i c, transf_code f (i :: c) = transf_instr f i (transf_code f c).
-Proof.
-  unfold transf_code; intros. rewrite list_fold_right_eq; auto. 
-Qed.
-
-Lemma find_label_transf_code:
-  forall sg lbl c,
-  find_label lbl (transf_code sg c) =
-  option_map (transf_code sg) (LTLin.find_label lbl c).
-Proof.
-  induction c; simpl.
-  auto.
-  rewrite transf_code_cons.
-  rewrite find_label_transf_instr.
-  destruct (LTLin.is_label lbl a); auto.
-Qed.
-
-Lemma find_label_transf_function:
-  forall lbl f c,
-  LTLin.find_label lbl (LTLin.fn_code f) = Some c ->
-  find_label lbl (Linear.fn_code (transf_function f)) =
-  Some (transf_code f c).
-Proof.
-  intros. destruct f; simpl in *. FL. 
-  rewrite find_label_transf_code. rewrite H; auto.
-Qed.
-
-(** * Semantic preservation *)
-
-Section PRESERVATION.
-
-Variable prog: LTLin.program.
-Let tprog := transf_program prog.
-Hypothesis WT_PROG: LTLintyping.wt_program prog.
-
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Lemma functions_translated:
-  forall v f,
-  Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog).
-
-Lemma function_ptr_translated:
-  forall v f,
-  Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
-
-Lemma symbols_preserved:
-  forall id,
-  Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (@Genv.find_symbol_transf _ _ _ transf_fundef prog).
-
-Lemma varinfo_preserved:
-  forall b, Genv.find_var_info tge b = Genv.find_var_info ge b.
-Proof (@Genv.find_var_info_transf _ _ _ transf_fundef prog).
-
-Lemma sig_preserved:
-  forall f, funsig (transf_fundef f) = LTLin.funsig f.
-Proof.
-  destruct f; reflexivity.
-Qed.
-
-Lemma find_function_wt:
-  forall ros rs f,
-  LTLin.find_function ge ros rs = Some f -> wt_fundef f.
-Proof.
-  intros until f. destruct ros; simpl.
-  intro. eapply Genv.find_funct_prop with (p := prog); eauto. 
-  caseEq (Genv.find_symbol ge i); intros. 
-  eapply Genv.find_funct_ptr_prop with (p := prog); eauto. 
-  congruence.
-Qed.
-
-(** The [match_state] predicate relates states in the original LTLin
-  program and the transformed Linear program.  The main property
-  it enforces are:
-- Agreement between the values of locations in the two programs,
-  according to the [agree] or [agree_arguments] predicates.
-- Agreement between the memory states of the two programs,
-  according to the [Mem.lessdef] predicate.
-- Lists of LTLin instructions appearing in the source state
-  are always suffixes of the code for the corresponding functions.
-- Well-typedness of the source code, which ensures that
-  only acceptable locations are accessed by this code.
-- Agreement over return types during calls: the return type of a function
-  is always equal to the return type of the signature of the corresponding
-  call.  This invariant is necessary since the conventional location
-  used for passing return values depend on the return type.  This invariant
-  is enforced through the third parameter of the [match_stackframes]
-  predicate, which is the signature of the called function.
-*)
-
-Inductive match_stackframes:
-       list LTLin.stackframe -> list Linear.stackframe -> signature -> Prop :=
-  | match_stackframes_nil:
-      forall sig,
-      sig.(sig_res) = Some Tint ->
-      match_stackframes nil nil sig
-  | match_stackframes_cons:
-      forall res f sp c rs s s' c' ls sig,
-      match_stackframes s s' (LTLin.fn_sig f) ->
-      c' = add_spill (loc_result sig) res (transf_code f c) ->      
-      agree (LTL.postcall_locs rs) ls ->
-      loc_acceptable res ->
-      wt_function f ->
-      is_tail c (LTLin.fn_code f) ->
-      match_stackframes
-         (LTLin.Stackframe res f sp (LTL.postcall_locs rs) c :: s)
-         (Linear.Stackframe (transf_function f) sp ls c' :: s')
-         sig.
-
-Inductive match_states: LTLin.state -> Linear.state -> Prop :=
-  | match_states_intro:
-      forall s f sp c rs m s' ls tm
-        (STACKS: match_stackframes s s' (LTLin.fn_sig f))
-        (AG: agree rs ls)
-        (WT: wt_function f)
-        (TL: is_tail c (LTLin.fn_code f))
-        (MMD: Mem.extends m tm),
-      match_states (LTLin.State s f sp c rs m)
-                   (Linear.State s' (transf_function f) sp (transf_code f c) ls tm)
-  | match_states_call:
-      forall s f args m s' ls tm
-        (STACKS: match_stackframes s s' (LTLin.funsig f))
-        (AG: Val.lessdef_list args (map ls (loc_arguments (LTLin.funsig f))))
-        (PRES: forall l, ~In l temporaries -> ~In l destroyed_at_call ->
-                 ls l = parent_locset s' l)
-        (WT: wt_fundef f)
-        (MMD: Mem.extends m tm),
-      match_states (LTLin.Callstate s f args m)
-                   (Linear.Callstate s' (transf_fundef f) ls tm)
-  | match_states_return:
-      forall s res m s' ls tm sig
-        (STACKS: match_stackframes s s' sig)
-        (AG: Val.lessdef res (ls (R (loc_result sig))))
-        (PRES: forall l, ~In l temporaries -> ~In l destroyed_at_call ->
-                 ls l = parent_locset s' l)
-        (MMD: Mem.extends m tm),
-      match_states (LTLin.Returnstate s res m)
-                   (Linear.Returnstate s' ls tm).
-
-Lemma match_stackframes_change_sig:
-  forall s s' sig1 sig2,
-  match_stackframes s s' sig1 ->
-  sig2.(sig_res) = sig1.(sig_res) ->
-  match_stackframes s s' sig2.
-Proof.
-  intros. inv H. constructor. congruence.
-  econstructor; eauto. unfold loc_result. rewrite H0. auto.
-Qed.
-
-Ltac ExploitWT :=
-  match goal with
-  | [ WT: wt_function _, TL: is_tail _ _ |- _ ] =>
-       generalize (wt_instrs _ WT _ (is_tail_in TL)); intro WTI
-  end.
-
-(** The proof of semantic preservation is a simulation argument
-  based on diagrams of the following form:
-<<
-           st1 --------------- st2
-            |                   |
-           t|                  *|t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  It is possible for the transformed code to take no transition,
-  remaining in the same state; for instance, if the source transition
-  corresponds to a move instruction that was eliminated.  
-  To ensure that this cannot occur infinitely often in a row,
-  we use the following [measure] function that just counts the 
-  remaining number of instructions in the source code sequence. *)
-
-Definition measure (st: LTLin.state) : nat :=
-  match st with
-  | LTLin.State s f sp c ls m => List.length c
-  | LTLin.Callstate s f ls m => 0%nat
-  | LTLin.Returnstate s ls m => 0%nat
-  end.
-
-Theorem transf_step_correct:
-  forall s1 t s2, LTLin.step ge s1 t s2 ->
-  forall s1' (MS: match_states s1 s1'),
-  (exists s2', plus Linear.step tge s1' t s2' /\ match_states s2 s2')
-  \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat.
-Proof.
-  Opaque regs_for. Opaque add_reloads.
-  induction 1; intros; try inv MS; try rewrite transf_code_cons; simpl.
-
-  (* Lop *)
-  ExploitWT. inv WTI. 
-  (* move *)
-  simpl.
-  destruct (add_move_correct tge s' (transf_function f) sp r1 res (transf_code f b) ls tm)
-        as [ls2 [A [B C]]].
-  inv A. 
-  right. split. omega. split. auto.
-  rewrite H1. rewrite H1. econstructor; eauto with coqlib. 
-  apply agree_set2 with ls2; auto. 
-  rewrite B. simpl in H; inversion H. auto.
-  left; econstructor; split. eapply plus_left; eauto. 
-  econstructor; eauto with coqlib. 
-  apply agree_set2 with ls; auto.
-  rewrite B. simpl in H; inversion H. auto.
-  intros. apply C. apply Loc.diff_sym; auto.
-  simpl in H7; tauto. simpl in H7; tauto. simpl in *; tauto.
-  (* other ops *)
-  assert (is_move_operation op args = None).
-    caseEq (is_move_operation op args); intros.
-    destruct (is_move_operation_correct _ _ H0). congruence.
-    auto.
-  rewrite H0.
-  exploit add_reloads_correct. 
-    eapply enough_temporaries_op_args; eauto. auto.
-  intros [ls2 [A [B C]]]. instantiate (1 := ls) in B. 
-  assert (exists tv, eval_operation tge sp op (reglist ls2 (regs_for args)) tm = Some tv
-                     /\ Val.lessdef v tv).
-    apply eval_operation_lessdef with (map rs args) m; auto.
-    rewrite B. eapply agree_locs; eauto. 
-    rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
-  destruct H1 as [tv [P Q]].
-  exploit add_spill_correct.
-  intros [ls3 [D [E F]]].
-  left; econstructor; split.
-  eapply star_plus_trans. eexact A. 
-  eapply plus_left. eapply exec_Lop with (v := tv); eauto.
-  eexact D. eauto. traceEq.
-  econstructor; eauto with coqlib.
-  apply agree_set2 with ls; auto.
-  rewrite E. rewrite Locmap.gss. auto.
-  intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_op_others; auto. 
-  apply reg_for_diff; auto. simpl in *; tauto.
-
-  (* Lload *)
-  ExploitWT; inv WTI.
-  exploit add_reloads_correct. 
-    eapply enough_temporaries_addr; eauto. auto.
-  intros [ls2 [A [B C]]].
-  assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta
-                     /\ Val.lessdef a ta).
-    apply eval_addressing_lessdef with (map rs args).
-    rewrite B. eapply agree_locs; eauto. 
-    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
-  destruct H1 as [ta [P Q]].
-  exploit Mem.loadv_extends; eauto. intros [tv [R S]].
-  exploit add_spill_correct.
-  intros [ls3 [D [E F]]].
-  left; econstructor; split.
-  eapply star_plus_trans. eauto. 
-  eapply plus_left. eapply exec_Lload; eauto.
-  eauto. auto. traceEq.
-  econstructor; eauto with coqlib.
-  apply agree_set2 with ls; auto.
-  rewrite E. rewrite Locmap.gss. auto.
-  intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto. 
-  apply reg_for_diff; auto. simpl in *; tauto.
-
-  (* Lstore *)
-  ExploitWT; inv WTI.
-  caseEq (enough_temporaries (src :: args)); intro ENOUGH. 
-  destruct (regs_for_cons src args) as [rsrc [rargs EQ]]. rewrite EQ.
-  exploit add_reloads_correct.
-    eauto. red; simpl; intros. destruct H1. congruence. auto. 
-  intros [ls2 [A [B C]]]. rewrite EQ in A. rewrite EQ in B.
-  injection B. intros D E. 
-  simpl in B.
-  assert (exists ta, eval_addressing tge sp addr (reglist ls2 rargs) = Some ta
-                     /\ Val.lessdef a ta).
-    apply eval_addressing_lessdef with (map rs args).
-    rewrite D. eapply agree_locs; eauto. 
-    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
-  destruct H1 as [ta [P Q]].
-  assert (X: Val.lessdef (rs src) (ls2 (R rsrc))).
-    rewrite E. eapply agree_loc; eauto.
-  exploit Mem.storev_extends. eexact MMD. eauto. eexact Q. eexact X. 
-  intros [tm2 [Y Z]].
-  left; econstructor; split. 
-  eapply plus_right. eauto. 
-  eapply exec_Lstore with (a := ta); eauto.
-  traceEq.
-  econstructor; eauto with coqlib.
-  apply agree_undef_temps2. apply agree_exten with ls; auto.
-  (* not enough temporaries *)
-  destruct (add_reloads_correct tge s' (transf_function f) sp args
-             (Lop (op_for_binary_addressing addr) (regs_for args) IT2
-            :: add_reload src (reg_for src)
-                 (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src)
-                  :: transf_code f b)) ls tm)
-  as [ls2 [A [B C]]].
-    eapply enough_temporaries_addr; eauto. auto.
-  assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta
-                     /\ Val.lessdef a ta).
-    apply eval_addressing_lessdef with (map rs args).
-    rewrite B. eapply agree_locs; eauto. 
-    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
-  destruct H1 as [ta [P Q]].
-  set (ls3 := Locmap.set (R IT2) ta (undef_op (op_for_binary_addressing addr) ls2)).
-  destruct (add_reload_correct_2 tge s' (transf_function f) sp src
-              (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src)
-                  :: transf_code f b)
-              ls3 tm H8)
-  as [ls4 [D [E [F G]]]].
-  assert (NT: Loc.notin src temporaries) by (apply temporaries_not_acceptable; auto).
-  assert (X: Val.lessdef (rs src) (ls4 (R (reg_for src)))).
-    rewrite E. unfold ls3. rewrite Locmap.gso.
-    rewrite undef_op_others; auto. rewrite C; auto.
-    apply Loc.diff_sym. simpl in NT; tauto.
-  exploit Mem.storev_extends. eexact MMD. eauto. eexact Q. eexact X.
-  intros [tm2 [Y Z]].
-  left; econstructor; split.
-  eapply star_plus_trans. eauto. 
-  eapply plus_left. eapply exec_Lop with (v := ta). 
-    rewrite B. eapply not_enough_temporaries_addr; eauto.
-    rewrite <- B; auto.
-  eapply star_right. eauto. 
-  eapply exec_Lstore with (a := ta); eauto.
-    simpl reglist. rewrite G. unfold ls3. rewrite Locmap.gss. simpl. 
-    destruct ta; simpl in Y; try discriminate. simpl; rewrite Int.add_zero; auto.
-  reflexivity. reflexivity. traceEq. 
-  econstructor; eauto with coqlib.
-  apply agree_undef_temps2. apply agree_exten with ls; auto.
-  intros. rewrite F; auto. unfold ls3. rewrite Locmap.gso.
-  rewrite undef_op_others; auto. 
-  apply Loc.diff_sym. simpl in H1; tauto.
-
-  (* Lcall *)
-  ExploitWT. inversion WTI. subst ros0 args0 res0. rewrite <- H0.  
-  assert (WTF': wt_fundef f'). eapply find_function_wt; eauto.
-  destruct ros as [fn | id].
-  (* indirect call *)
-  red in H6. destruct H6 as [OK1 [OK2 OK3]].
-  rewrite <- H0 in H4. rewrite <- H0 in OK3.
-  destruct (parallel_move_arguments_correct tge s' (transf_function f) sp 
-              args sig
-              (add_reload fn (reg_for fn)
-                (Lcall sig (inl ident (reg_for fn))
-                  :: add_spill (loc_result sig) res (transf_code f b)))
-              ls tm H4 H7)
-  as [ls2 [A [B C]]].
-  destruct (add_reload_correct_2 tge s' (transf_function f) sp fn
-             (Lcall sig (inl ident (reg_for fn))
-              :: add_spill (loc_result sig) res (transf_code f b))
-             ls2 tm OK2)
-  as [ls3 [D [E [F G]]]].
-  assert (ARGS: Val.lessdef_list (map rs args) 
-                                 (map ls3 (loc_arguments sig))).
-    replace (map ls3 (loc_arguments sig)) with (map ls2 (loc_arguments sig)).
-    rewrite B. apply agree_locs; auto. 
-    apply list_map_exten; intros. apply F.
-    apply Loc.disjoint_notin with (loc_arguments sig).
-    apply loc_arguments_not_temporaries. auto.
-  left; econstructor; split.
-  eapply star_plus_trans. eexact A. eapply plus_right. eexact D. 
-  eapply exec_Lcall; eauto.
-    simpl. rewrite E. rewrite C. eapply agree_find_funct; eauto. 
-    apply functions_translated. eauto.
-    apply loc_acceptable_notin_notin; auto. 
-    apply temporaries_not_acceptable; auto.
-    rewrite H0; symmetry; apply sig_preserved.
-  eauto. traceEq.
-  econstructor; eauto. 
-  econstructor; eauto with coqlib.
-  rewrite H0. auto.
-  apply agree_postcall_call with ls sig; auto.
-  intros. rewrite F; auto. congruence. 
-  (* direct call *)
-  rewrite <- H0 in H4.
-  destruct (parallel_move_arguments_correct tge s' (transf_function f) sp
-              args sig
-              (Lcall sig (inr mreg id)
-                :: add_spill (loc_result sig) res (transf_code f b))
-              ls tm H4 H7)
-  as [ls3 [D [E F]]].
-  assert (ARGS: Val.lessdef_list (map rs args) (map ls3 (loc_arguments sig))).
-    rewrite E. apply agree_locs; auto.
-  left; econstructor; split.
-  eapply plus_right. eauto.
-  eapply exec_Lcall; eauto.
-    simpl. rewrite symbols_preserved. 
-    generalize H; simpl. destruct (Genv.find_symbol ge id).
-    apply function_ptr_translated; auto. congruence.
-    rewrite H0. symmetry; apply sig_preserved.
-  traceEq.
-  econstructor; eauto.
-  econstructor; eauto with coqlib. rewrite H0; auto. 
-  apply agree_postcall_call with ls sig; auto.
-  congruence.
-
-  (* Ltailcall *)
-  ExploitWT. inversion WTI. subst ros0 args0.
-  assert (WTF': wt_fundef f'). eapply find_function_wt; eauto.
-  rewrite <- H0.
-  exploit Mem.free_parallel_extends; eauto. intros [tm' [FREE MMD']].
-  destruct ros as [fn | id].
-  (* indirect call *)
-  red in H5. destruct H5 as [OK1 [OK2 OK3]].
-  rewrite <- H0 in H4. rewrite <- H0 in OK3.
-  destruct (parallel_move_arguments_correct tge s' (transf_function f) (Vptr stk Int.zero)
-              args sig
-              (add_reload fn IT1
-                (Ltailcall sig (inl ident IT1) :: transf_code f b))
-              ls tm H4 H6)
-  as [ls2 [A [B C]]].
-  destruct (add_reload_correct tge s' (transf_function f) (Vptr stk Int.zero) fn IT1
-             (Ltailcall sig (inl ident IT1) :: transf_code f b)
-             ls2 tm)
-  as [ls3 [D [E F]]].
-  assert (ARGS: Val.lessdef_list (map rs args) 
-                                 (map ls3 (loc_arguments sig))).
-    replace (map ls3 (loc_arguments sig)) with (map ls2 (loc_arguments sig)).
-    rewrite B. apply agree_locs; auto. 
-    apply list_map_exten; intros.
-    exploit Loc.disjoint_notin. apply loc_arguments_not_temporaries. eauto. simpl; intros.
-    apply F.
-    apply Loc.diff_sym; tauto.
-    auto.
-    simpl; tauto.
-  left; econstructor; split.
-  eapply star_plus_trans. eexact A. eapply plus_right. eexact D. 
-  eapply exec_Ltailcall; eauto.
-    simpl. rewrite E. rewrite C. eapply agree_find_funct; eauto. 
-    apply functions_translated. eauto.
-    apply loc_acceptable_notin_notin; auto. 
-    apply temporaries_not_acceptable; auto.
-    rewrite H0; symmetry; apply sig_preserved.
-  eauto. traceEq.
-  econstructor; eauto. 
-  eapply match_stackframes_change_sig; eauto.
-  rewrite return_regs_arguments; auto. congruence.
-  exact (return_regs_preserve (parent_locset s') ls3).
-  (* direct call *)
-  rewrite <- H0 in H4.
-  destruct (parallel_move_arguments_correct tge s' (transf_function f) (Vptr stk Int.zero)
-              args sig
-              (Ltailcall sig (inr mreg id) :: transf_code f b)
-              ls tm H4 H6)
-  as [ls3 [D [E F]]].
-  assert (ARGS: Val.lessdef_list (map rs args) 
-                                 (map ls3 (loc_arguments sig))).
-    rewrite E. apply agree_locs. apply agree_exten with ls; auto. auto.
-  left; econstructor; split.
-  eapply plus_right. eauto.
-  eapply exec_Ltailcall; eauto.
-    simpl. rewrite symbols_preserved. 
-    generalize H; simpl. destruct (Genv.find_symbol ge id).
-    apply function_ptr_translated; auto. congruence.
-    rewrite H0. symmetry; apply sig_preserved.
-  traceEq.
-  econstructor; eauto.
-  eapply match_stackframes_change_sig; eauto.
-  rewrite return_regs_arguments; auto. congruence.
-  exact (return_regs_preserve (parent_locset s') ls3).
-
-  (* Lbuiltin *)
-  ExploitWT; inv WTI.
-  case_eq (ef_reloads ef); intro RELOADS.
-  exploit add_reloads_correct.
-    instantiate (1 := args). apply arity_ok_enough. rewrite H3. destruct H5. auto. congruence. auto.
-  intros [ls2 [A [B C]]].
-  exploit external_call_mem_extends; eauto. 
-  apply agree_locs; eauto. 
-  intros [v' [tm' [P [Q [R S]]]]].
-  exploit add_spill_correct.
-  intros [ls3 [D [E F]]].
-  left; econstructor; split.
-  eapply star_plus_trans. eauto. 
-  eapply plus_left. eapply exec_Lbuiltin. rewrite B. 
-  eapply external_call_symbols_preserved; eauto.
-  exact symbols_preserved. exact varinfo_preserved.
-  eauto. reflexivity. traceEq. 
-  econstructor; eauto with coqlib.
-  apply agree_set with ls; auto.
-  rewrite E. rewrite Locmap.gss. auto.
-  intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto. 
-  apply reg_for_diff; auto. simpl in *; tauto.
-  (* no reload *)
-  exploit external_call_mem_extends; eauto. 
-  apply agree_locs; eauto. 
-  intros [v' [tm' [P [Q [R S]]]]].
-  assert (EQ: v = Vundef).
-    destruct ef; simpl in RELOADS; try congruence. simpl in H; inv H. auto.
-  subst v.
-  left; econstructor; split.
-  apply plus_one. eapply exec_Lannot.
-  eapply external_call_symbols_preserved; eauto.
-  exact symbols_preserved. exact varinfo_preserved.
-  econstructor; eauto with coqlib.
-  apply agree_set with ls; auto.
-
-  (* Llabel *)
-  left; econstructor; split.
-  apply plus_one. apply exec_Llabel.
-  econstructor; eauto with coqlib.
-
-  (* Lgoto *)
-  left; econstructor; split.
-  apply plus_one. apply exec_Lgoto. apply find_label_transf_function; eauto.
-  econstructor; eauto.
-  eapply LTLin.find_label_is_tail; eauto.
-
-  (* Lcond true *)
-  ExploitWT; inv WTI.
-  exploit add_reloads_correct.
-    eapply enough_temporaries_cond; eauto. auto.
-  intros [ls2 [A [B C]]].
-  left; econstructor; split.
-  eapply plus_right. eauto. eapply exec_Lcond_true; eauto.
-  rewrite B. apply eval_condition_lessdef with (map rs args) m; auto.
-  eapply agree_locs; eauto. 
-  apply find_label_transf_function; eauto.
-  traceEq.
-  econstructor; eauto.
-  apply agree_undef_temps2. apply agree_exten with ls; auto.
-  eapply LTLin.find_label_is_tail; eauto.
-
-  (* Lcond false *)
-  ExploitWT; inv WTI.
-  exploit add_reloads_correct.
-    eapply enough_temporaries_cond; eauto. auto.
-  intros [ls2 [A [B C]]].
-  left; econstructor; split.
-  eapply plus_right. eauto. eapply exec_Lcond_false; eauto.
-  rewrite B. apply eval_condition_lessdef with (map rs args) m; auto.
-  eapply agree_locs; eauto. 
-  traceEq.
-  econstructor; eauto with coqlib.
-  apply agree_undef_temps2. apply agree_exten with ls; auto.
-
-  (* Ljumptable *)
-  ExploitWT; inv WTI.
-  exploit add_reload_correct_2; eauto.
-  intros [ls2 [A [B [C D]]]].
-  left; econstructor; split.
-  eapply plus_right. eauto. eapply exec_Ljumptable; eauto. 
-  assert (Val.lessdef (rs arg) (ls arg)). apply AG. auto.
-  rewrite H in H2. inv H2. congruence.
-  apply find_label_transf_function; eauto.
-  traceEq.
-  econstructor; eauto with coqlib.
-  apply agree_undef_temps2. apply agree_exten with ls; auto.
-  eapply LTLin.find_label_is_tail; eauto.
-
-  (* Lreturn *)
-  ExploitWT; inv WTI.
-  exploit Mem.free_parallel_extends; eauto. intros [tm' [FREE MMD']]. 
-  destruct or; simpl.
-  (* with an argument *)
-  exploit add_reload_correct.
-  intros [ls2 [A [B C]]].
-  left; econstructor; split.
-  eapply plus_right. eauto. eapply exec_Lreturn; eauto.
-  traceEq.
-  econstructor; eauto.
-  rewrite return_regs_result. rewrite B. apply agree_loc; auto.
-  apply return_regs_preserve.
-  (* without an argument *)
-  left; econstructor; split.
-  apply plus_one. eapply exec_Lreturn; eauto.  
-  econstructor; eauto.
-  apply return_regs_preserve.
-
-  (* internal function *)
-  simpl in WT. inversion_clear WT. inversion H0. simpl in AG.
-  exploit Mem.alloc_extends. eauto. eauto. 
-  instantiate (1 := 0); omega. instantiate (1 := LTLin.fn_stacksize f); omega.
-  intros [tm' [ALLOC MMD']].
-  destruct (parallel_move_parameters_correct tge s' (transf_function f)
-               (Vptr stk Int.zero) (LTLin.fn_params f) (LTLin.fn_sig f)
-               (transf_code f (LTLin.fn_code f)) (call_regs ls) tm'
-               wt_params wt_acceptable wt_norepet)
-  as [ls2 [A [B C]]].
-  assert (AG2: agree (LTL.init_locs args (fn_params f)) ls2).
-    apply agree_init_locs; auto.
-    rewrite B. rewrite call_regs_parameters. auto. 
-  left; econstructor; split.
-  eapply plus_left.
-  eapply exec_function_internal; eauto.
-  simpl. eauto. traceEq.
-  econstructor; eauto with coqlib.
-
-  (* external function *)
-  exploit external_call_mem_extends; eauto.
-  intros [res' [tm' [A [B [C D]]]]]. 
-  left; econstructor; split.
-  apply plus_one. eapply exec_function_external; eauto. 
-  eapply external_call_symbols_preserved; eauto.
-  exact symbols_preserved. exact varinfo_preserved.
-  econstructor; eauto.
-  simpl. rewrite Locmap.gss. auto.
-  intros. rewrite Locmap.gso. auto. 
-  simpl. destruct l; auto. red; intro. elim H1. subst m0. 
-  generalize (loc_result_caller_save (ef_sig ef)). tauto.
-
-  (* return *)
-  inv STACKS. 
-  exploit add_spill_correct. intros [ls2 [A [B C]]].
-  left; econstructor; split.
-  eapply plus_left. eapply exec_return; eauto.
-  eauto. traceEq.
-  econstructor; eauto. 
-  apply agree_set with ls; auto.
-  rewrite B. auto.
-  intros. apply C; auto. simpl in *; tauto.
-  unfold parent_locset in PRES.
-  apply agree_postcall_2 with ls0; auto. 
-Qed.
-
-Lemma transf_initial_states:
-  forall st1, LTLin.initial_state prog st1 ->
-  exists st2, Linear.initial_state tprog st2 /\ match_states st1 st2.
-Proof.
-  intros. inversion H.
-  econstructor; split.
-  econstructor.
-  apply Genv.init_mem_transf; eauto.
-  rewrite symbols_preserved. eauto.
-  apply function_ptr_translated; eauto. 
-  rewrite sig_preserved. auto.
-  econstructor; eauto. constructor. rewrite H3; auto.
-  rewrite H3. simpl. constructor.
-  eapply Genv.find_funct_ptr_prop; eauto.
-  apply Mem.extends_refl.
-Qed.
-
-Lemma transf_final_states:
-  forall st1 st2 r, 
-  match_states st1 st2 -> LTLin.final_state st1 r -> Linear.final_state st2 r.
-Proof.
-  intros. inv H0. inv H. inv STACKS. econstructor.
-  unfold loc_result in AG. rewrite H in AG. inv AG. auto. 
-Qed.
-
-Theorem transf_program_correct:
-  forward_simulation (LTLin.semantics prog) (Linear.semantics tprog).
-Proof.
-  eapply forward_simulation_star.
-  eexact symbols_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  eexact transf_step_correct. 
-Qed.
-
-End PRESERVATION.
diff --git a/backend/Reloadtyping.v b/backend/Reloadtyping.v
deleted file mode 100644
index 70d79bc..0000000
--- a/backend/Reloadtyping.v
+++ /dev/null
@@ -1,353 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Proof of type preservation for Reload and of
-    correctness of computation of stack bounds for Linear. *)
-
-Require Import Coqlib.
-Require Import AST.
-Require Import Op.
-Require Import Locations.
-Require Import LTLin.
-Require Import LTLintyping.
-Require Import Linear.
-Require Import Lineartyping.
-Require Import Conventions.
-Require Import Parallelmove.
-Require Import Reload.
-Require Import Reloadproof.
-
-(** * Typing Linear constructors *)
-
-(** We show that the Linear constructor functions defined in [Reload]
-  generate well-typed instruction sequences,
-  given sufficient typing and well-formedness hypotheses over the locations involved. *)
-
-Hint Constructors wt_instr: reloadty.
-
-Remark wt_code_cons:
-  forall f i c, wt_instr f i -> wt_code f c -> wt_code f (i :: c).
-Proof.
-  intros; red; simpl; intros. elim H1; intro.
-  subst i; auto. auto.
-Qed.
-
-Hint Resolve wt_code_cons: reloadty.
-
-Definition loc_valid (f: function) (l: loc) :=
-  match l with R _ => True | S s => slot_valid f s end.
-
-Lemma loc_acceptable_valid:
-  forall f l, loc_acceptable l -> loc_valid f l.
-Proof.
-  destruct l; simpl; intro. auto.
-  destruct s; simpl. omega. tauto. tauto.
-Qed.
-
-Definition loc_writable (l: loc) :=
-  match l with R _ => True | S s => slot_writable s end.
-
-Lemma loc_acceptable_writable:
-  forall l, loc_acceptable l -> loc_writable l.
-Proof.
-  destruct l; simpl; intro. auto.
-  destruct s; simpl; tauto.
-Qed.
-
-Hint Resolve loc_acceptable_valid loc_acceptable_writable: reloadty.
-
-Definition locs_valid (f: function) (ll: list loc) :=
-  forall l, In l ll -> loc_valid f l.
-Definition locs_writable (ll: list loc) :=
-  forall l, In l ll -> loc_writable l.
-
-Lemma locs_acceptable_valid:
-  forall f ll, locs_acceptable ll -> locs_valid f ll.
-Proof.
-  unfold locs_acceptable, locs_valid. auto with reloadty.
-Qed.
-
-Lemma locs_acceptable_writable:
-  forall ll, locs_acceptable ll -> locs_writable ll.
-Proof.
-  unfold locs_acceptable, locs_writable. auto with reloadty.
-Qed.
-
-Hint Resolve locs_acceptable_valid locs_acceptable_writable: reloadty.
-
-Lemma wt_add_reload:
-  forall f src dst k,
-  loc_valid f src -> Loc.type src = mreg_type dst ->
-  wt_code f k -> wt_code f (add_reload src dst k).
-Proof.
-  intros; unfold add_reload.
-  destruct src; eauto with reloadty.
-  destruct (mreg_eq m dst); eauto with reloadty.
-Qed.
-
-Hint Resolve wt_add_reload: reloadty.
-
-Lemma wt_add_reloads:
-  forall f srcs dsts k,
-  locs_valid f srcs -> map Loc.type srcs = map mreg_type dsts ->
-  wt_code f k -> wt_code f (add_reloads srcs dsts k).
-Proof.
-  induction srcs; destruct dsts; simpl; intros; try congruence.
-  auto. inv H0. apply wt_add_reload; auto with coqlib reloadty.
-  apply IHsrcs; auto. red; intros; auto with coqlib.
-Qed.
-
-Hint Resolve wt_add_reloads: reloadty.
-
-Lemma wt_add_spill:
-  forall f src dst k,
-  loc_valid f dst -> loc_writable dst -> Loc.type dst = mreg_type src ->
-  wt_code f k -> wt_code f (add_spill src dst k).
-Proof.
-  intros; unfold add_spill.
-  destruct dst; eauto with reloadty.
-  destruct (mreg_eq src m); eauto with reloadty.
-Qed.
-
-Hint Resolve wt_add_spill: reloadty.
-
-Lemma wt_add_move:
-  forall f src dst k,
-  loc_valid f src -> loc_valid f dst -> loc_writable dst ->
-  Loc.type dst = Loc.type src ->
-  wt_code f k -> wt_code f (add_move src dst k).
-Proof.
-  intros. unfold add_move.
-  destruct (Loc.eq src dst); auto.
-  destruct src; auto with reloadty.
-  destruct dst; auto with reloadty.
-  set (tmp := match slot_type s with
-                    | Tint => IT1
-                    | Tfloat => FT1
-                    end).
-  assert (mreg_type tmp = Loc.type (S s)).
-    simpl. destruct (slot_type s); reflexivity.
-  apply wt_add_reload; auto with reloadty. 
-  apply wt_add_spill; auto. congruence.
-Qed.
-
-Hint Resolve wt_add_move: reloadty.
-
-Lemma wt_add_moves:
-  forall f b moves,
-  (forall s d, In (s, d) moves ->
-      loc_valid f s /\ loc_valid f d /\ loc_writable d /\ Loc.type s = Loc.type d) ->
-  wt_code f b ->
-  wt_code f 
-    (List.fold_right (fun p k => add_move (fst p) (snd p) k) b moves).
-Proof.
-  induction moves; simpl; intros.
-  auto.
-  destruct a as [s d]. simpl.
-  destruct (H s d) as [A [B [C D]]]. auto.
-  auto with reloadty.
-Qed.
-
-Theorem wt_parallel_move:
- forall f srcs dsts b,
- List.map Loc.type srcs = List.map Loc.type dsts ->
- locs_valid f srcs -> locs_valid f dsts -> locs_writable dsts ->
- wt_code f b ->  wt_code f (parallel_move srcs dsts b).
-Proof.
-  intros. unfold parallel_move. apply wt_add_moves; auto.
-  intros. 
-  elim (parmove_prop_2 _ _ _ _ H4); intros A B.
-  split. destruct A as [C|[C|C]]; auto; subst s; exact I.
-  split. destruct B as [C|[C|C]]; auto; subst d; exact I.
-  split. destruct B as [C|[C|C]]; auto; subst d; exact I.
-  eapply parmove_prop_3; eauto.
-Qed.
-Hint Resolve wt_parallel_move: reloadty.
-
-Lemma wt_reg_for:
-  forall l, mreg_type (reg_for l) = Loc.type l.
-Proof.
-  intros. destruct l; simpl. auto.
-  case (slot_type s); reflexivity.
-Qed.
-Hint Resolve wt_reg_for: reloadty.
-
-Lemma wt_regs_for_rec:
-  forall locs itmps ftmps,
-  enough_temporaries_rec locs itmps ftmps = true ->
-  (forall r, In r itmps -> mreg_type r = Tint) ->
-  (forall r, In r ftmps -> mreg_type r = Tfloat) ->
-  List.map mreg_type (regs_for_rec locs itmps ftmps) = List.map Loc.type locs.
-Proof.
-  induction locs; intros.
-  simpl. auto.
-  simpl in H. simpl. 
-  destruct a. 
-  simpl. decEq. eauto. 
-  caseEq (slot_type s); intro SLOTTYPE; rewrite SLOTTYPE in H.
-  destruct itmps. discriminate.
-  simpl. decEq. 
-  rewrite SLOTTYPE. auto with coqlib.
-  apply IHlocs; auto with coqlib.
-  destruct ftmps. discriminate. simpl. decEq.
-  rewrite SLOTTYPE. auto with coqlib.
-  apply IHlocs; auto with coqlib.
-Qed.
-
-Lemma wt_regs_for:
-  forall locs,
-  enough_temporaries locs = true ->
-  List.map mreg_type (regs_for locs) = List.map Loc.type locs.
-Proof.
-  intros. unfold regs_for. apply wt_regs_for_rec.
-  auto.
-  simpl; intros; intuition; subst r; reflexivity.
-  simpl; intros; intuition; subst r; reflexivity.
-Qed.
-Hint Resolve wt_regs_for: reloadty.
-
-Hint Resolve enough_temporaries_op_args enough_temporaries_cond enough_temporaries_addr: reloadty.
-
-Hint Extern 4 (_ = _) => congruence : reloadty.
-
-Lemma wt_transf_instr:
-  forall f instr k,
-  LTLintyping.wt_instr (LTLin.fn_sig f) instr ->
-  wt_code (transf_function f) k ->
-  wt_code (transf_function f) (transf_instr f instr k).
-Proof.
-  Opaque reg_for regs_for.
-  intros. inv H; simpl; auto with reloadty.
-  caseEq (is_move_operation op args); intros.
-  destruct (is_move_operation_correct _ _ H). congruence.
-  assert (map mreg_type (regs_for args) = map Loc.type args).
-    eauto with reloadty.
-  assert (mreg_type (reg_for res) = Loc.type res). eauto with reloadty.
-  auto with reloadty.
-
-  assert (map mreg_type (regs_for args) = map Loc.type args).
-    eauto with reloadty.
-  assert (mreg_type (reg_for dst) = Loc.type dst). eauto with reloadty.
-  auto with reloadty.
-
-  caseEq (enough_temporaries (src :: args)); intro ENOUGH.
-  caseEq (regs_for (src :: args)); intros. auto.
-  assert (map mreg_type (regs_for (src :: args)) = map Loc.type (src :: args)).
-    apply wt_regs_for. auto. 
-  rewrite H in H5. injection H5; intros.
-  auto with reloadty.
-  apply wt_add_reloads; auto with reloadty.
-  symmetry. apply wt_regs_for. eauto with reloadty.
-  assert (op_for_binary_addressing addr <> Omove).
-    unfold op_for_binary_addressing; destruct addr; congruence.
-  assert (type_of_operation (op_for_binary_addressing addr) = (type_of_addressing addr, Tint)).
-    apply type_op_for_binary_addressing. 
-    rewrite <- H1. rewrite list_length_map.
-    eapply not_enough_temporaries_length; eauto. 
-  apply wt_code_cons.
-  constructor; auto. rewrite H5. decEq. rewrite <- H1. eauto with reloadty.
-  apply wt_add_reload; auto with reloadty. 
-  apply wt_code_cons; auto. constructor. reflexivity.
-  rewrite <- H2. eauto with reloadty.
-
-  assert (locs_valid (transf_function f) (loc_arguments sig)).
-    red; intros. generalize (loc_arguments_acceptable sig l H).
-    destruct l; simpl; auto. destruct s; simpl; intuition.
-  assert (locs_writable (loc_arguments sig)).
-    red; intros. generalize (loc_arguments_acceptable sig l H6).
-    destruct l; simpl; auto. destruct s; simpl; intuition.
-  assert (map Loc.type args = map Loc.type (loc_arguments sig)).
-    rewrite loc_arguments_type; auto.
-  assert (Loc.type res = mreg_type (loc_result sig)).
-    rewrite H2. unfold loc_result. unfold proj_sig_res. 
-    destruct (sig_res sig); auto. destruct t; auto.
-  destruct ros.
-  destruct H3 as [A [B C]].
-  apply wt_parallel_move; eauto with reloadty.
-  apply wt_add_reload; eauto with reloadty. 
-  apply wt_code_cons; eauto with reloadty. 
-  constructor. rewrite <- A. auto with reloadty. 
-  auto with reloadty.
-
-  assert (locs_valid (transf_function f) (loc_arguments sig)).
-    red; intros. generalize (loc_arguments_acceptable sig l H).
-    destruct l; simpl; auto. destruct s; simpl; intuition.
-  assert (locs_writable (loc_arguments sig)).
-    red; intros. generalize (loc_arguments_acceptable sig l H6).
-    destruct l; simpl; auto. destruct s; simpl; intuition.
-  assert (map Loc.type args = map Loc.type (loc_arguments sig)).
-    rewrite loc_arguments_type; auto.
-  destruct ros. destruct H2 as [A [B C]]. auto 10 with reloadty. 
-  auto 10 with reloadty.
-  destruct (ef_reloads ef) eqn:?.
-  assert (arity_ok (sig_args (ef_sig ef)) = true) by intuition congruence.
-  assert (map mreg_type (regs_for args) = map Loc.type args).
-    apply wt_regs_for. apply arity_ok_enough. congruence.
-  assert (mreg_type (reg_for res) = Loc.type res). eauto with reloadty.
-  auto 10 with reloadty.
-  auto with reloadty.
-
-  assert (map mreg_type (regs_for args) = map Loc.type args).
-    eauto with reloadty.
-  auto with reloadty.
-
-  assert (mreg_type (reg_for arg) = Loc.type arg).
-    eauto with reloadty.
-  auto with reloadty.
-
-  destruct optres; simpl in *; auto with reloadty.
-  apply wt_add_reload; auto with reloadty. 
-  unfold loc_result. rewrite <- H1.
-  destruct (Loc.type l); reflexivity.
-Qed.
-
-Lemma wt_transf_code:
-  forall f  c,
-  LTLintyping.wt_code (LTLin.fn_sig f) c ->
-  Lineartyping.wt_code (transf_function f) (transf_code f c).
-Proof.
-  induction c; simpl; intros.
-  red; simpl; tauto.
-  rewrite transf_code_cons.
-  apply wt_transf_instr; auto with coqlib. 
-  apply IHc. red; auto with coqlib. 
-Qed.
-
-Lemma wt_transf_fundef:
-  forall fd,
-  LTLintyping.wt_fundef fd ->
-  Lineartyping.wt_fundef (transf_fundef fd).
-Proof.
-  intros. destruct fd; simpl.
-  inv H. inv H1. constructor. unfold wt_function. simpl. 
-  apply wt_parallel_move; auto with reloadty.
-    rewrite loc_parameters_type. auto.
-    red; intros.
-    destruct (list_in_map_inv _ _ _ H) as [r [A B]].
-    generalize (loc_arguments_acceptable _ _ B).
-    destruct r; intro.
-    rewrite A. simpl. auto.
-    red in H0. destruct s; try tauto.
-    simpl in A. subst l. simpl. auto.
-  apply wt_transf_code; auto.
-  constructor.
-Qed.
-
-Lemma program_typing_preserved:
-  forall p,
-  LTLintyping.wt_program p ->
-  Lineartyping.wt_program (transf_program p).
-Proof.
-  intros; red; intros. 
-  destruct (transform_program_function _ _ _ _ H0) as [f0 [A B]].
-  subst f; apply wt_transf_fundef. eauto.
-Qed.
diff --git a/backend/SelectLong.vp b/backend/SelectLong.vp
new file mode 100644
index 0000000..8eb32c3
--- /dev/null
+++ b/backend/SelectLong.vp
@@ -0,0 +1,368 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Instruction selection for 64-bit integer operations *)
+
+Require Import Coqlib.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import SelectOp.
+
+Open Local Scope cminorsel_scope.
+
+(** Some operations on 64-bit integers are transformed into calls to
+  runtime library functions.  The following record type collects
+  the names of these functions. *)
+
+Record helper_functions : Type := mk_helper_functions {
+  i64_dtos: ident;                      (**r float -> signed long *)
+  i64_dtou: ident;                      (**r float -> unsigned long *)
+  i64_stod: ident;                      (**r signed long -> float *)
+  i64_utod: ident;                      (**r unsigned long -> float *)
+  i64_neg: ident;                       (**r opposite *)
+  i64_add: ident;                       (**r addition *)
+  i64_sub: ident;                       (**r subtraction *)
+  i64_mul: ident;                       (**r multiplication 32x32->64 *)
+  i64_sdiv: ident;                      (**r signed division *)
+  i64_udiv: ident;                      (**r unsigned division *)
+  i64_smod: ident;                      (**r signed remainder *)
+  i64_umod: ident;                      (**r unsigned remainder *)
+  i64_shl: ident;                       (**r shift left *)
+  i64_shr: ident;                       (**r shift right unsigned *)
+  i64_sar: ident;                       (**r shift right signed *)
+  i64_scmp: ident;                      (**r signed comparison *)
+  i64_ucmp: ident                       (**r unsigned comparison *)
+}.
+
+Definition sig_l_l := mksignature (Tlong :: nil) (Some Tlong).
+Definition sig_l_f := mksignature (Tlong :: nil) (Some Tfloat).
+Definition sig_f_l := mksignature (Tfloat :: nil) (Some Tlong).
+Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) (Some Tlong).
+Definition sig_li_l := mksignature (Tlong :: Tint :: nil) (Some Tlong).
+Definition sig_ll_i := mksignature (Tlong :: Tlong :: nil) (Some Tint).
+Definition sig_ii_l := mksignature (Tint :: Tint :: nil) (Some Tlong).
+
+Section SELECT.
+
+Variable hf: helper_functions.
+
+Definition makelong (h l: expr): expr :=
+  Eop Omakelong (h ::: l ::: Enil).
+
+Nondetfunction splitlong (e: expr) (f: expr -> expr -> expr) :=
+  match e with
+  | Eop Omakelong (h ::: l ::: Enil) => f h l
+  | _ => Elet e (f (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil)))
+  end.
+
+Nondetfunction splitlong2 (e1 e2: expr) (f: expr -> expr -> expr -> expr -> expr) :=
+  match e1, e2 with
+  | Eop Omakelong (h1 ::: l1 ::: Enil), Eop Omakelong (h2 ::: l2 ::: Enil) =>
+      f h1 l1 h2 l2
+  | Eop Omakelong (h1 ::: l1 ::: Enil), t2 =>
+      Elet t2 (f (lift h1) (lift l1)
+                 (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil)))
+  | t1, Eop Omakelong (h2 ::: l2 ::: Enil) =>
+      Elet t1 (f (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil))
+                 (lift h2) (lift l2))
+  | _, _ =>
+      Elet e1 (Elet (lift e2)
+        (f (Eop Ohighlong (Eletvar 1 ::: Enil)) (Eop Olowlong (Eletvar 1 ::: Enil))
+           (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil))))
+  end.
+
+Nondetfunction lowlong (e: expr) :=
+  match e with
+  | Eop Omakelong (e1 ::: e2 ::: Enil) => e2
+  | _ => Eop Olowlong (e ::: Enil)
+  end.
+
+Nondetfunction highlong (e: expr) :=
+  match e with
+  | Eop Omakelong (e1 ::: e2 ::: Enil) => e1
+  | _ => Eop Ohighlong (e ::: Enil)
+  end.
+
+Definition longconst (n: int64) : expr :=
+  makelong (Eop (Ointconst (Int64.hiword n)) Enil)
+           (Eop (Ointconst (Int64.loword n)) Enil).
+
+Nondetfunction is_longconst (e: expr) :=
+  match e with
+  | Eop Omakelong (Eop (Ointconst h) Enil ::: Eop (Ointconst l) Enil ::: Enil) =>
+      Some(Int64.ofwords h l)
+  | _ =>
+      None
+  end.
+
+Definition is_longconst_zero (e: expr) :=
+  match is_longconst e with
+  | Some n => Int64.eq n Int64.zero
+  | None => false
+  end.
+
+Definition intoflong (e: expr) := lowlong e.
+
+Definition longofint (e: expr) :=
+  Elet e (makelong (shrimm (Eletvar O) (Int.repr 31)) (Eletvar O)).
+
+Definition longofintu (e: expr) :=
+  makelong (Eop (Ointconst Int.zero) Enil) e.
+
+Definition negl (e: expr) :=
+  match is_longconst e with
+  | Some n => longconst (Int64.neg n)
+  | None => Ebuiltin (EF_builtin hf.(i64_neg) sig_l_l) (e ::: Enil)
+  end.
+
+Definition notl (e: expr) :=
+  splitlong e (fun h l => makelong (notint h) (notint l)).
+
+Definition longoffloat (arg: expr) := 
+  Eexternal hf.(i64_dtos) sig_f_l (arg ::: Enil).
+Definition longuoffloat (arg: expr) :=
+  Eexternal hf.(i64_dtou) sig_f_l (arg ::: Enil).
+Definition floatoflong (arg: expr) :=
+  Eexternal hf.(i64_stod) sig_l_f (arg ::: Enil).
+Definition floatoflongu (arg: expr) :=
+  Eexternal hf.(i64_utod) sig_l_f (arg ::: Enil).
+
+Definition andl (e1 e2: expr) :=
+  splitlong2 e1 e2 (fun h1 l1 h2 l2 => makelong (and h1 h2) (and l1 l2)).
+
+Definition orl (e1 e2: expr) :=
+  splitlong2 e1 e2 (fun h1 l1 h2 l2 => makelong (or h1 h2) (or l1 l2)).
+
+Definition xorl (e1 e2: expr) :=
+  splitlong2 e1 e2 (fun h1 l1 h2 l2 => makelong (xor h1 h2) (xor l1 l2)).
+
+Definition shllimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then e1 else
+  if Int.ltu n Int.iwordsize then
+   splitlong e1 (fun h l =>
+     makelong (or (shlimm h n) (shruimm l (Int.sub Int.iwordsize n)))
+              (shlimm l n))
+  else if Int.ltu n Int64.iwordsize' then
+    makelong (shlimm (lowlong e1) (Int.sub n Int.iwordsize))
+             (Eop (Ointconst Int.zero) Enil)
+  else
+    Eexternal hf.(i64_shl) sig_li_l (e1 ::: Eop (Ointconst n) Enil ::: Enil).
+
+Definition shrluimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then e1 else
+  if Int.ltu n Int.iwordsize then
+    splitlong e1 (fun h l =>
+      makelong (shruimm h n)
+               (or (shruimm l n) (shlimm h (Int.sub Int.iwordsize n))))
+  else if Int.ltu n Int64.iwordsize' then
+    makelong (Eop (Ointconst Int.zero) Enil)
+             (shruimm (highlong e1) (Int.sub n Int.iwordsize))
+  else
+    Eexternal hf.(i64_shr) sig_li_l (e1 ::: Eop (Ointconst n) Enil ::: Enil).
+
+Definition shrlimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then e1 else
+  if Int.ltu n Int.iwordsize then
+    splitlong e1 (fun h l =>
+      makelong (shrimm h n)
+               (or (shruimm l n) (shlimm h (Int.sub Int.iwordsize n))))
+  else if Int.ltu n Int64.iwordsize' then
+    Elet (highlong e1)
+      (makelong (shrimm (Eletvar 0) (Int.repr 31))
+                (shrimm (Eletvar 0) (Int.sub n Int.iwordsize)))
+  else
+    Eexternal hf.(i64_sar) sig_li_l (e1 ::: Eop (Ointconst n) Enil ::: Enil).
+
+Definition is_intconst (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Some n
+  | _ => None
+  end.
+
+Definition shll (e1 e2: expr) :=
+  match is_intconst e2 with
+  | Some n => shllimm e1 n
+  | None => Eexternal hf.(i64_shl) sig_li_l (e1 ::: e2 ::: Enil)
+  end.
+
+Definition shrlu (e1 e2: expr) :=
+  match is_intconst e2 with
+  | Some n => shrluimm e1 n
+  | None => Eexternal hf.(i64_shr) sig_li_l (e1 ::: e2 ::: Enil)
+  end.
+
+Definition shrl (e1 e2: expr) :=
+  match is_intconst e2 with
+  | Some n => shrlimm e1 n
+  | None => Eexternal hf.(i64_sar) sig_li_l (e1 ::: e2 ::: Enil)
+  end.
+
+Definition addl (e1 e2: expr) :=
+  let default := Ebuiltin (EF_builtin hf.(i64_add) sig_ll_l) (e1 ::: e2 ::: Enil) in
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 => longconst (Int64.add n1 n2)
+  | Some n1, _ => if Int64.eq n1 Int64.zero then e2 else default
+  | _, Some n2 => if Int64.eq n2 Int64.zero then e1 else default
+  | _, _ => default
+  end.
+
+Definition subl (e1 e2: expr) :=
+  let default := Ebuiltin (EF_builtin hf.(i64_sub) sig_ll_l) (e1 ::: e2 ::: Enil) in
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 => longconst (Int64.sub n1 n2)
+  | Some n1, _ => if Int64.eq n1 Int64.zero then negl e2 else default
+  | _, Some n2 => if Int64.eq n2 Int64.zero then e1 else default
+  | _, _ => default
+  end.
+
+Definition mull_base (e1 e2: expr) :=
+  splitlong2 e1 e2 (fun h1 l1 h2 l2 =>
+    Elet (Ebuiltin (EF_builtin hf.(i64_mul) sig_ii_l) (l1 ::: l2 ::: Enil))
+      (makelong
+        (add (add (Eop Ohighlong (Eletvar O ::: Enil))
+                  (mul (lift l1) (lift h2)))
+             (mul (lift h1) (lift l2)))
+        (Eop Olowlong (Eletvar O ::: Enil)))).
+
+Definition mullimm (e: expr) (n: int64) :=
+  if Int64.eq n Int64.zero then longconst Int64.zero else
+  if Int64.eq n Int64.one then e else
+  match Int64.is_power2 n with
+  | Some l => shllimm e (Int.repr (Int64.unsigned l))
+  | None   => mull_base e (longconst n)
+  end.
+
+Definition mull (e1 e2: expr) :=
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 => longconst (Int64.mul n1 n2)
+  | Some n1, _ => mullimm e2 n1
+  | _, Some n2 => mullimm e1 n2
+  | _, _ => mull_base e1 e2
+  end.
+
+Definition binop_long (id: ident) (sem: int64 -> int64 -> int64) (e1 e2: expr) :=
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 => longconst (sem n1 n2)
+  | _, _ => Eexternal id sig_ll_l (e1 ::: e2 ::: Enil)
+  end.
+
+Definition divl := binop_long hf.(i64_sdiv) Int64.divs.
+Definition modl := binop_long hf.(i64_smod) Int64.mods.
+
+Definition divlu (e1 e2: expr) :=
+  let default := Eexternal hf.(i64_udiv) sig_ll_l (e1 ::: e2 ::: Enil) in
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 => longconst (Int64.divu n1 n2)
+  | _, Some n2 =>
+      match Int64.is_power2 n2 with
+      | Some l => shrluimm e1 (Int.repr (Int64.unsigned l))
+      | None   => default
+      end
+  | _, _ => default
+  end.
+
+Definition modlu (e1 e2: expr) :=
+  let default := Eexternal hf.(i64_umod) sig_ll_l (e1 ::: e2 ::: Enil) in
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 => longconst (Int64.modu n1 n2)
+  | _, Some n2 =>
+      match Int64.is_power2 n2 with
+      | Some l => andl e1 (longconst (Int64.sub n2 Int64.one))
+      | None   => default
+      end
+  | _, _ => default
+  end.
+
+Definition cmpl_eq_zero (e: expr) :=
+  splitlong e (fun h l => comp Ceq (or h l) (Eop (Ointconst Int.zero) Enil)).
+
+Definition cmpl_ne_zero (e: expr) :=
+  splitlong e (fun h l => comp Cne (or h l) (Eop (Ointconst Int.zero) Enil)).
+
+Definition cmplu (c: comparison) (e1 e2: expr) :=
+  match c with
+  | Ceq =>
+      if is_longconst_zero e2
+      then cmpl_eq_zero e1
+      else cmpl_eq_zero (xorl e1 e2)
+  | Cne =>
+      if is_longconst_zero e2
+      then cmpl_ne_zero e1
+      else cmpl_ne_zero (xorl e1 e2)
+  | _ =>
+      comp c (Eexternal hf.(i64_ucmp) sig_ll_i (e1 ::: e2 ::: Enil))
+             (Eop (Ointconst Int.zero) Enil)
+  end.
+
+Definition cmpl (c: comparison) (e1 e2: expr) :=
+  let default := 
+    comp c (Eexternal hf.(i64_scmp) sig_ll_i (e1 ::: e2 ::: Enil))
+           (Eop (Ointconst Int.zero) Enil) in
+  match c with
+  | Ceq =>
+      if is_longconst_zero e2
+      then cmpl_eq_zero e1
+      else cmpl_eq_zero (xorl e1 e2)
+  | Cne =>
+      if is_longconst_zero e2
+      then cmpl_ne_zero e1
+      else cmpl_ne_zero (xorl e1 e2)
+  | Clt =>
+      if is_longconst_zero e2
+      then comp Clt (highlong e1) (Eop (Ointconst Int.zero) Enil)
+      else default
+  | Cge =>
+      if is_longconst_zero e2
+      then comp Cge (highlong e1) (Eop (Ointconst Int.zero) Enil)
+      else default
+  | _ =>
+      default
+  end.
+
+End SELECT.
+
+(** Setting up the helper functions *)
+
+Require Import Errors.
+
+Local Open Scope string_scope.
+Local Open Scope error_monad_scope.
+
+Parameter get_helper: Cminor.genv -> String.string -> signature -> res ident.
+Parameter get_builtin: String.string -> signature -> res ident.
+
+Definition get_helpers (ge: Cminor.genv): res helper_functions :=
+  do i64_dtos <- get_helper ge "__i64_dtos" sig_f_l ;
+  do i64_dtou <- get_helper ge "__i64_dtou" sig_f_l ;
+  do i64_stod <- get_helper ge "__i64_stod" sig_l_f ;
+  do i64_utod <- get_helper ge "__i64_utod" sig_l_f ;
+  do i64_neg <- get_builtin "__builtin_negl" sig_l_l ;
+  do i64_add <- get_builtin "__builtin_addl" sig_ll_l ;
+  do i64_sub <- get_builtin "__builtin_subl" sig_ll_l ;
+  do i64_mul <- get_builtin "__builtin_mull" sig_ll_l ;
+  do i64_sdiv <- get_helper ge "__i64_sdiv" sig_ll_l ;
+  do i64_udiv <- get_helper ge "__i64_udiv" sig_ll_l ;
+  do i64_smod <- get_helper ge "__i64_smod" sig_ll_l ;
+  do i64_umod <- get_helper ge "__i64_umod" sig_ll_l ;
+  do i64_shl <- get_helper ge "__i64_shl" sig_li_l ;
+  do i64_shr <- get_helper ge "__i64_shr" sig_li_l ;
+  do i64_sar <- get_helper ge "__i64_sar" sig_li_l ;
+  do i64_scmp <- get_helper ge "__i64_scmp" sig_ll_i ;
+  do i64_ucmp <- get_helper ge "__i64_ucmp" sig_ll_i ;
+  OK (mk_helper_functions
+     i64_dtos i64_dtou i64_stod i64_utod
+     i64_neg i64_add i64_sub i64_mul i64_sdiv i64_udiv i64_smod i64_umod
+     i64_shl i64_shr i64_sar
+     i64_scmp i64_ucmp).
diff --git a/backend/SelectLongproof.v b/backend/SelectLongproof.v
new file mode 100644
index 0000000..aca05bf
--- /dev/null
+++ b/backend/SelectLongproof.v
@@ -0,0 +1,1063 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness of instruction selection for 64-bit integer operations *)
+
+Require Import Coqlib.
+Require Import AST.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Events.
+Require Import Cminor.
+Require Import Op.
+Require Import CminorSel.
+Require Import SelectOp.
+Require Import SelectOpproof.
+Require Import SelectLong.
+
+Open Local Scope cminorsel_scope.
+
+(** * Axiomatization of the helper functions *)
+
+Section HELPERS.
+
+Context {F V: Type} (ge: Genv.t (AST.fundef F) V).
+
+Definition helper_implements (id: ident) (sg: signature) (vargs: list val) (vres: val) : Prop :=
+  exists b, exists ef,
+     Genv.find_symbol ge id = Some b
+  /\ Genv.find_funct_ptr ge b = Some (External ef)
+  /\ ef_sig ef = sg
+  /\ forall m, external_call ef ge vargs m E0 vres m.
+
+Definition builtin_implements (id: ident) (sg: signature) (vargs: list val) (vres: val) : Prop :=
+  forall m, external_call (EF_builtin id sg) ge vargs m E0 vres m.
+
+Definition i64_helpers_correct (hf: helper_functions) : Prop :=
+    (forall x z, Val.longoffloat x = Some z -> helper_implements hf.(i64_dtos) sig_f_l (x::nil) z)
+  /\(forall x z, Val.longuoffloat x = Some z -> helper_implements hf.(i64_dtou) sig_f_l (x::nil) z)
+  /\(forall x z, Val.floatoflong x = Some z -> helper_implements hf.(i64_stod) sig_l_f (x::nil) z)
+  /\(forall x z, Val.floatoflongu x = Some z -> helper_implements hf.(i64_utod) sig_l_f (x::nil) z)
+  /\(forall x, builtin_implements hf.(i64_neg) sig_l_l (x::nil) (Val.negl x))
+  /\(forall x y, builtin_implements hf.(i64_add) sig_ll_l (x::y::nil) (Val.addl x y))
+  /\(forall x y, builtin_implements hf.(i64_sub) sig_ll_l (x::y::nil) (Val.subl x y))
+  /\(forall x y, builtin_implements hf.(i64_mul) sig_ii_l (x::y::nil) (Val.mull' x y))
+  /\(forall x y z, Val.divls x y = Some z -> helper_implements hf.(i64_sdiv) sig_ll_l (x::y::nil) z)
+  /\(forall x y z, Val.divlu x y = Some z -> helper_implements hf.(i64_udiv) sig_ll_l (x::y::nil) z)
+  /\(forall x y z, Val.modls x y = Some z -> helper_implements hf.(i64_smod) sig_ll_l (x::y::nil) z)
+  /\(forall x y z, Val.modlu x y = Some z -> helper_implements hf.(i64_umod) sig_ll_l (x::y::nil) z)
+  /\(forall x y, helper_implements hf.(i64_shl) sig_li_l (x::y::nil) (Val.shll x y))
+  /\(forall x y, helper_implements hf.(i64_shr) sig_li_l (x::y::nil) (Val.shrlu x y))
+  /\(forall x y, helper_implements hf.(i64_sar) sig_li_l (x::y::nil) (Val.shrl x y))
+  /\(forall c x y, exists z, helper_implements hf.(i64_scmp) sig_ll_i (x::y::nil) z
+                          /\ Val.cmpl c x y = Val.cmp c z Vzero)
+  /\(forall c x y, exists z, helper_implements hf.(i64_ucmp) sig_ll_i (x::y::nil) z
+                          /\ Val.cmplu c x y = Val.cmp c z Vzero).
+
+End HELPERS.
+
+(** * Correctness of the instruction selection functions for 64-bit operators *)
+
+Section CMCONSTR.
+
+Variable hf: helper_functions.
+Variable ge: genv.
+Hypothesis HELPERS: i64_helpers_correct ge hf.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
+Ltac UseHelper :=
+  red in HELPERS;
+  repeat (eauto; match goal with | [ H: _ /\ _ |- _ ] => destruct H end).
+
+Lemma eval_helper:
+  forall le id sg args vargs vres,
+  eval_exprlist ge sp e m le args vargs ->
+  helper_implements ge id sg vargs vres ->
+  eval_expr ge sp e m le (Eexternal id sg args) vres.
+Proof.
+  intros. destruct H0 as (b & ef & A & B & C & D). econstructor; eauto.
+Qed.
+
+Corollary eval_helper_1:
+  forall le id sg arg1 varg1 vres,
+  eval_expr ge sp e m le arg1 varg1 ->
+  helper_implements ge id sg (varg1::nil) vres ->
+  eval_expr ge sp e m le (Eexternal id sg (arg1 ::: Enil)) vres.
+Proof.
+  intros. eapply eval_helper; eauto. constructor; auto. constructor.
+Qed.
+
+Corollary eval_helper_2:
+  forall le id sg arg1 arg2 varg1 varg2 vres,
+  eval_expr ge sp e m le arg1 varg1 ->
+  eval_expr ge sp e m le arg2 varg2 ->
+  helper_implements ge id sg (varg1::varg2::nil) vres ->
+  eval_expr ge sp e m le (Eexternal id sg (arg1 ::: arg2 ::: Enil)) vres.
+Proof.
+  intros. eapply eval_helper; eauto. constructor; auto. constructor; auto. constructor.
+Qed.
+
+Remark eval_builtin_1:
+  forall le id sg arg1 varg1 vres,
+  eval_expr ge sp e m le arg1 varg1 ->
+  builtin_implements ge id sg (varg1::nil) vres ->
+  eval_expr ge sp e m le (Ebuiltin (EF_builtin id sg) (arg1 ::: Enil)) vres.
+Proof.
+  intros. econstructor. econstructor. eauto. constructor. apply H0. 
+Qed.
+
+Remark eval_builtin_2:
+  forall le id sg arg1 arg2 varg1 varg2 vres,
+  eval_expr ge sp e m le arg1 varg1 ->
+  eval_expr ge sp e m le arg2 varg2 ->
+  builtin_implements ge id sg (varg1::varg2::nil) vres ->
+  eval_expr ge sp e m le (Ebuiltin (EF_builtin id sg) (arg1 ::: arg2 ::: Enil)) vres.
+Proof.
+  intros. econstructor. constructor; eauto. constructor; eauto. constructor. apply H1.
+Qed.
+
+Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop :=
+  forall le a x,
+  eval_expr ge sp e m le a x ->
+  exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v.
+
+Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop :=
+  forall le a x b y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
+
+Ltac EvalOp :=
+  eauto;
+  match goal with
+  | [ |- eval_exprlist _ _ _ _ _ Enil _ ] => constructor
+  | [ |- eval_exprlist _ _ _ _ _ (_:::_) _ ] => econstructor; EvalOp
+  | [ |- eval_expr _ _ _ _ _ (Eletvar _) _ ] => constructor; simpl; eauto
+  | [ |- eval_expr _ _ _ _ _ (Elet _ _) _ ] => econstructor; EvalOp
+  | [ |- eval_expr _ _ _ _ _ (lift _) _ ] => apply eval_lift; EvalOp
+  | [ |- eval_expr _ _ _ _ _ _ _ ] => eapply eval_Eop; [EvalOp | simpl; eauto]
+  | _ => idtac
+  end.
+
+Lemma eval_splitlong:
+  forall le a f v sem,
+  (forall le a b x y,
+   eval_expr ge sp e m le a x ->
+   eval_expr ge sp e m le b y ->
+   exists v, eval_expr ge sp e m le (f a b) v /\
+             (forall p q, x = Vint p -> y = Vint q -> v = sem (Vlong (Int64.ofwords p q)))) ->
+  match v with Vlong _ => True | _ => sem v = Vundef end ->
+  eval_expr ge sp e m le a v ->
+  exists v', eval_expr ge sp e m le (splitlong a f) v' /\ Val.lessdef (sem v) v'.
+Proof.
+  intros until sem; intros EXEC UNDEF.
+  unfold splitlong. case (splitlong_match a); intros.
+- InvEval. subst v.
+  exploit EXEC. eexact H2. eexact H3. intros [v' [A B]]. 
+  exists v'; split. auto.
+  destruct v1; simpl in *; try (rewrite UNDEF; auto). 
+  destruct v0; simpl in *; try (rewrite UNDEF; auto). 
+  erewrite B; eauto.
+- exploit (EXEC (v :: le) (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))).
+  EvalOp. EvalOp.
+  intros [v' [A B]].
+  exists v'; split. econstructor; eauto.
+  destruct v; try (rewrite UNDEF; auto). erewrite B; simpl; eauto. rewrite Int64.ofwords_recompose. auto.
+Qed.
+
+Lemma eval_splitlong2:
+  forall le a b f va vb sem,
+  (forall le a1 a2 b1 b2 x1 x2 y1 y2,
+   eval_expr ge sp e m le a1 x1 ->
+   eval_expr ge sp e m le a2 x2 ->
+   eval_expr ge sp e m le b1 y1 ->
+   eval_expr ge sp e m le b2 y2 ->
+   exists v,
+     eval_expr ge sp e m le (f a1 a2 b1 b2) v /\
+     (forall p1 p2 q1 q2,
+       x1 = Vint p1 -> x2 = Vint p2 -> y1 = Vint q1 -> y2 = Vint q2 ->
+       v = sem (Vlong (Int64.ofwords p1 p2)) (Vlong (Int64.ofwords q1 q2)))) ->
+  match va, vb with Vlong _, Vlong _ => True | _, _ => sem va vb = Vundef end ->
+  eval_expr ge sp e m le a va ->
+  eval_expr ge sp e m le b vb ->
+  exists v, eval_expr ge sp e m le (splitlong2 a b f) v /\ Val.lessdef (sem va vb) v.
+Proof.
+  intros until sem; intros EXEC UNDEF.
+  unfold splitlong2. case (splitlong2_match a); intros.
+- InvEval. subst va vb.
+  exploit (EXEC le h1 l1 h2 l2); eauto. intros [v [A B]].
+  exists v; split; auto.
+  destruct v1; simpl in *; try (rewrite UNDEF; auto).
+  destruct v0; try (rewrite UNDEF; auto).
+  destruct v2; simpl in *; try (rewrite UNDEF; auto).
+  destruct v3; try (rewrite UNDEF; auto).
+  erewrite B; eauto.
+- InvEval. subst va. 
+  exploit (EXEC (vb :: le) (lift h1) (lift l1) 
+                (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))).
+  EvalOp. EvalOp. EvalOp. EvalOp. 
+  intros [v [A B]].
+  exists v; split.
+  econstructor; eauto.
+  destruct v1; simpl in *; try (rewrite UNDEF; auto).
+  destruct v0; try (rewrite UNDEF; auto).
+  destruct vb; try (rewrite UNDEF; auto).
+  erewrite B; simpl; eauto. rewrite Int64.ofwords_recompose. auto.
+- InvEval. subst vb. 
+  exploit (EXEC (va :: le)
+                (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))
+                (lift h2) (lift l2)).
+  EvalOp. EvalOp. EvalOp. EvalOp.
+  intros [v [A B]].
+  exists v; split.
+  econstructor; eauto.
+  destruct va; try (rewrite UNDEF; auto).
+  destruct v1; simpl in *; try (rewrite UNDEF; auto). 
+  destruct v0; try (rewrite UNDEF; auto).
+  erewrite B; simpl; eauto. rewrite Int64.ofwords_recompose. auto. 
+- exploit (EXEC (vb :: va :: le)
+                (Eop Ohighlong (Eletvar 1 ::: Enil)) (Eop Olowlong (Eletvar 1 ::: Enil))
+                (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))).
+  EvalOp. EvalOp. EvalOp. EvalOp.
+  intros [v [A B]].
+  exists v; split. EvalOp. 
+  destruct va; try (rewrite UNDEF; auto); destruct vb; try (rewrite UNDEF; auto).
+  erewrite B; simpl; eauto. rewrite ! Int64.ofwords_recompose; auto.
+Qed.
+
+Lemma is_longconst_sound:
+  forall le a x n,
+  is_longconst a = Some n ->
+  eval_expr ge sp e m le a x ->
+  x = Vlong n.
+Proof.
+  unfold is_longconst; intros until n; intros LC. 
+  destruct (is_longconst_match a); intros.
+  inv LC. InvEval. simpl in H5. inv H5. auto. 
+  discriminate.
+Qed.
+
+Lemma is_longconst_zero_sound:
+  forall le a x,
+  is_longconst_zero a = true ->
+  eval_expr ge sp e m le a x ->
+  x = Vlong Int64.zero.
+Proof.
+  unfold is_longconst_zero; intros. 
+  destruct (is_longconst a) as [n|] eqn:E; try discriminate.
+  revert H. predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  intros. subst. eapply is_longconst_sound; eauto. 
+  congruence.
+Qed.
+
+Lemma eval_lowlong: unary_constructor_sound lowlong Val.loword.
+Proof.
+  unfold lowlong; red. intros until x. destruct (lowlong_match a); intros.
+  InvEval. subst x. exists v0; split; auto. 
+  destruct v1; simpl; auto. destruct v0; simpl; auto. 
+  rewrite Int64.lo_ofwords. auto. 
+  exists (Val.loword x); split; auto. EvalOp. 
+Qed.
+
+Lemma eval_highlong: unary_constructor_sound highlong Val.hiword.
+Proof.
+  unfold highlong; red. intros until x. destruct (highlong_match a); intros.
+  InvEval. subst x. exists v1; split; auto. 
+  destruct v1; simpl; auto. destruct v0; simpl; auto. 
+  rewrite Int64.hi_ofwords. auto. 
+  exists (Val.hiword x); split; auto. EvalOp. 
+Qed.
+
+Lemma eval_longconst: 
+  forall le n, eval_expr ge sp e m le (longconst n) (Vlong n).
+Proof.
+  intros. EvalOp. rewrite Int64.ofwords_recompose; auto. 
+Qed.
+
+Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword.
+Proof eval_lowlong.
+
+Theorem eval_longofintu: unary_constructor_sound longofintu Val.longofintu.
+Proof.
+  red; intros. unfold longofintu. econstructor; split. EvalOp. 
+  unfold Val.longofintu. destruct x; auto. 
+  replace (Int64.repr (Int.unsigned i)) with (Int64.ofwords Int.zero i); auto.
+  apply Int64.same_bits_eq; intros. 
+  rewrite Int64.testbit_repr by auto.
+  rewrite Int64.bits_ofwords by auto.
+  fold (Int.testbit i i0).
+  destruct (zlt i0 Int.zwordsize).
+  auto.
+  rewrite Int.bits_zero. rewrite Int.bits_above by omega. auto.
+Qed.
+
+Theorem eval_longofint: unary_constructor_sound longofint Val.longofint.
+Proof.
+  red; intros. unfold longofint.
+  exploit (eval_shrimm ge sp e m (Int.repr 31) (x :: le) (Eletvar 0)). EvalOp.
+  intros [v1 [A B]].
+  econstructor; split. EvalOp.
+  destruct x; simpl; auto. 
+  simpl in B. inv B. simpl. 
+  replace (Int64.repr (Int.signed i))
+     with (Int64.ofwords (Int.shr i (Int.repr 31)) i); auto.
+  apply Int64.same_bits_eq; intros. 
+  rewrite Int64.testbit_repr by auto.
+  rewrite Int64.bits_ofwords by auto.
+  rewrite Int.bits_signed by omega.
+  destruct (zlt i0 Int.zwordsize).
+  auto.
+  assert (Int64.zwordsize = 2 * Int.zwordsize) by reflexivity. 
+  rewrite Int.bits_shr by omega.
+  change (Int.unsigned (Int.repr 31)) with (Int.zwordsize - 1).
+  f_equal. destruct (zlt (i0 - Int.zwordsize + (Int.zwordsize - 1)) Int.zwordsize); omega.
+Qed.
+
+Theorem eval_negl: unary_constructor_sound (negl hf) Val.negl.
+Proof.
+  unfold negl; red; intros. destruct (is_longconst a) eqn:E.
+  econstructor; split. apply eval_longconst.  
+  exploit is_longconst_sound; eauto. intros EQ; subst x. simpl. auto.
+  econstructor; split. eapply eval_builtin_1; eauto. UseHelper. auto.
+Qed.
+
+Theorem eval_notl: unary_constructor_sound notl Val.notl.
+Proof.
+  red; intros. unfold notl. apply eval_splitlong; auto. 
+  intros. 
+  exploit eval_notint. eexact H0. intros [va [A B]].
+  exploit eval_notint. eexact H1. intros [vb [C D]].
+  exists (Val.longofwords va vb); split. EvalOp.
+  intros; subst. simpl in *. inv B; inv D. 
+  simpl. unfold Int.not. rewrite <- Int64.decompose_xor. auto.
+  destruct x; auto.
+Qed.
+
+Theorem eval_longoffloat:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longoffloat x = Some y ->
+  exists v, eval_expr ge sp e m le (longoffloat hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longoffloat. econstructor; split. 
+  eapply eval_helper_1; eauto. UseHelper. 
+  auto.
+Qed.
+
+Theorem eval_longuoffloat:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longuoffloat x = Some y ->
+  exists v, eval_expr ge sp e m le (longuoffloat hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longuoffloat. econstructor; split. 
+  eapply eval_helper_1; eauto. UseHelper. 
+  auto.
+Qed.
+
+Theorem eval_floatoflong:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.floatoflong x = Some y ->
+  exists v, eval_expr ge sp e m le (floatoflong hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold floatoflong. econstructor; split. 
+  eapply eval_helper_1; eauto. UseHelper. 
+  auto.
+Qed.
+
+Theorem eval_floatoflongu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.floatoflongu x = Some y ->
+  exists v, eval_expr ge sp e m le (floatoflongu hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold floatoflongu. econstructor; split. 
+  eapply eval_helper_1; eauto. UseHelper. 
+  auto.
+Qed.
+
+Theorem eval_andl: binary_constructor_sound andl Val.andl.
+Proof.
+  red; intros. unfold andl. apply eval_splitlong2; auto.
+  intros. 
+  exploit eval_and. eexact H1. eexact H3. intros [va [A B]].
+  exploit eval_and. eexact H2. eexact H4. intros [vb [C D]].
+  exists (Val.longofwords va vb); split. EvalOp. 
+  intros; subst. simpl in B; inv B. simpl in D; inv D.
+  simpl. f_equal. rewrite Int64.decompose_and. auto.
+  destruct x; auto. destruct y; auto.
+Qed.
+
+Theorem eval_orl: binary_constructor_sound orl Val.orl.
+Proof.
+  red; intros. unfold orl. apply eval_splitlong2; auto.
+  intros. 
+  exploit eval_or. eexact H1. eexact H3. intros [va [A B]].
+  exploit eval_or. eexact H2. eexact H4. intros [vb [C D]].
+  exists (Val.longofwords va vb); split. EvalOp. 
+  intros; subst. simpl in B; inv B. simpl in D; inv D.
+  simpl. f_equal. rewrite Int64.decompose_or. auto.
+  destruct x; auto. destruct y; auto.
+Qed.
+
+Theorem eval_xorl: binary_constructor_sound xorl Val.xorl.
+Proof.
+  red; intros. unfold xorl. apply eval_splitlong2; auto.
+  intros. 
+  exploit eval_xor. eexact H1. eexact H3. intros [va [A B]].
+  exploit eval_xor. eexact H2. eexact H4. intros [vb [C D]].
+  exists (Val.longofwords va vb); split. EvalOp. 
+  intros; subst. simpl in B; inv B. simpl in D; inv D.
+  simpl. f_equal. rewrite Int64.decompose_xor. auto.
+  destruct x; auto. destruct y; auto.
+Qed.
+
+Lemma is_intconst_sound:
+  forall le a x n,
+  is_intconst a = Some n ->
+  eval_expr ge sp e m le a x ->
+  x = Vint n.
+Proof.
+  unfold is_intconst; intros until n; intros LC. 
+  destruct a; try discriminate. destruct o; try discriminate. destruct e0; try discriminate.
+  inv LC. intros. InvEval. auto.
+Qed.
+
+Remark eval_shift_imm:
+  forall (P: expr -> Prop) n a0 a1 a2 a3,
+  (n = Int.zero -> P a0) ->
+  (0 <= Int.unsigned n < Int.zwordsize ->
+   Int.ltu n Int.iwordsize = true ->
+   Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize = true ->
+   Int.ltu n Int64.iwordsize' = true ->
+   P a1) ->
+  (Int.zwordsize <= Int.unsigned n < Int64.zwordsize ->
+   Int.ltu (Int.sub n Int.iwordsize) Int.iwordsize = true ->
+   P a2) ->
+  P a3 ->
+  P (if Int.eq n Int.zero then a0
+     else if Int.ltu n Int.iwordsize then a1
+     else if Int.ltu n Int64.iwordsize' then a2
+     else a3).
+Proof.
+  intros until a3; intros A0 A1 A2 A3.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  apply A0; auto.
+  assert (NZ: Int.unsigned n <> 0). 
+  { red; intros. elim H. rewrite <- (Int.repr_unsigned n). rewrite H0. auto. }
+  destruct (Int.ltu n Int.iwordsize) eqn:LT.
+  exploit Int.ltu_iwordsize_inv; eauto. intros RANGE.
+  assert (0 <= Int.zwordsize - Int.unsigned n < Int.zwordsize) by omega.
+  apply A1. auto. auto. 
+  unfold Int.ltu, Int.sub. rewrite Int.unsigned_repr_wordsize. 
+  rewrite Int.unsigned_repr. rewrite zlt_true; auto. omega. 
+  generalize Int.wordsize_max_unsigned; omega. 
+  unfold Int.ltu. rewrite zlt_true; auto. 
+  change (Int.unsigned Int64.iwordsize') with 64.
+  change Int.zwordsize with 32 in RANGE. omega.
+  destruct (Int.ltu n Int64.iwordsize') eqn:LT'.
+  exploit Int.ltu_inv; eauto. 
+  change (Int.unsigned Int64.iwordsize') with (Int.zwordsize * 2).
+  intros RANGE.
+  assert (Int.zwordsize <= Int.unsigned n).
+    unfold Int.ltu in LT. rewrite Int.unsigned_repr_wordsize in LT. 
+    destruct (zlt (Int.unsigned n) Int.zwordsize). discriminate. omega. 
+  apply A2. tauto. unfold Int.ltu, Int.sub. rewrite Int.unsigned_repr_wordsize. 
+  rewrite Int.unsigned_repr. rewrite zlt_true; auto. omega. 
+  generalize Int.wordsize_max_unsigned; omega. 
+  auto.
+Qed.
+
+Lemma eval_shllimm:
+  forall n,
+  unary_constructor_sound (fun e => shllimm hf e n) (fun v => Val.shll v (Vint n)).
+Proof.
+  unfold shllimm; red; intros.
+  apply eval_shift_imm; intros.
+  + (* n = 0 *)
+    subst n. exists x; split; auto. destruct x; simpl; auto. 
+    change (Int64.shl' i Int.zero) with (Int64.shl i Int64.zero). 
+    rewrite Int64.shl_zero. auto.
+  + (* 0 < n < 32 *)
+    apply eval_splitlong with (sem := fun x => Val.shll x (Vint n)); auto.
+    intros.
+    exploit eval_shlimm. eexact H4. instantiate (1 := n). intros [v1 [A1 B1]].
+    exploit eval_shlimm. eexact H5. instantiate (1 := n). intros [v2 [A2 B2]].
+    exploit eval_shruimm. eexact H5. instantiate (1 := Int.sub Int.iwordsize n). intros [v3 [A3 B3]].
+    exploit eval_or. eexact A1. eexact A3. intros [v4 [A4 B4]].
+    econstructor; split. EvalOp. 
+    intros. subst. simpl in *. rewrite H1 in *. rewrite H2 in *. rewrite H3. 
+    inv B1; inv B2; inv B3. simpl in B4. inv B4.
+    simpl. rewrite Int64.decompose_shl_1; auto.
+    destruct x; auto.
+  + (* 32 <= n < 64 *)
+    exploit eval_lowlong. eexact H. intros [v1 [A1 B1]].
+    exploit eval_shlimm. eexact A1. instantiate (1 := Int.sub n Int.iwordsize). intros [v2 [A2 B2]].
+    econstructor; split. EvalOp.
+    destruct x; simpl; auto.
+    destruct (Int.ltu n Int64.iwordsize'); auto. 
+    simpl in B1; inv B1. simpl in B2. rewrite H1 in B2. inv B2. 
+    simpl. erewrite <- Int64.decompose_shl_2. instantiate (1 := Int64.hiword i). 
+    rewrite Int64.ofwords_recompose. auto. auto.
+  + (* n >= 64 *)
+    econstructor; split. eapply eval_helper_2; eauto. EvalOp. UseHelper. auto.
+Qed.
+
+Theorem eval_shll: binary_constructor_sound (shll hf) Val.shll.
+Proof.
+  unfold shll; red; intros.
+  destruct (is_intconst b) as [n|] eqn:IC.
+- (* Immediate *)
+  exploit is_intconst_sound; eauto. intros EQ; subst y; clear H0.
+  eapply eval_shllimm; eauto.
+- (* General case *)
+  econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto.
+Qed.
+
+Lemma eval_shrluimm:
+  forall n,
+  unary_constructor_sound (fun e => shrluimm hf e n) (fun v => Val.shrlu v (Vint n)).
+Proof.
+  unfold shrluimm; red; intros. apply eval_shift_imm; intros.
+  + (* n = 0 *)
+    subst n. exists x; split; auto. destruct x; simpl; auto. 
+    change (Int64.shru' i Int.zero) with (Int64.shru i Int64.zero). 
+    rewrite Int64.shru_zero. auto.
+  + (* 0 < n < 32 *)
+    apply eval_splitlong with (sem := fun x => Val.shrlu x (Vint n)); auto.
+    intros.
+    exploit eval_shruimm. eexact H5. instantiate (1 := n). intros [v1 [A1 B1]].
+    exploit eval_shruimm. eexact H4. instantiate (1 := n). intros [v2 [A2 B2]].
+    exploit eval_shlimm. eexact H4. instantiate (1 := Int.sub Int.iwordsize n). intros [v3 [A3 B3]].
+    exploit eval_or. eexact A1. eexact A3. intros [v4 [A4 B4]].
+    econstructor; split. EvalOp. 
+    intros. subst. simpl in *. rewrite H1 in *. rewrite H2 in *. rewrite H3. 
+    inv B1; inv B2; inv B3. simpl in B4. inv B4.
+    simpl. rewrite Int64.decompose_shru_1; auto.
+    destruct x; auto.
+  + (* 32 <= n < 64 *)
+    exploit eval_highlong. eexact H. intros [v1 [A1 B1]].
+    exploit eval_shruimm. eexact A1. instantiate (1 := Int.sub n Int.iwordsize). intros [v2 [A2 B2]].
+    econstructor; split. EvalOp.
+    destruct x; simpl; auto.
+    destruct (Int.ltu n Int64.iwordsize'); auto. 
+    simpl in B1; inv B1. simpl in B2. rewrite H1 in B2. inv B2. 
+    simpl. erewrite <- Int64.decompose_shru_2. instantiate (1 := Int64.loword i). 
+    rewrite Int64.ofwords_recompose. auto. auto.
+  + (* n >= 64 *)
+    econstructor; split. eapply eval_helper_2; eauto. EvalOp. UseHelper. auto.
+Qed.
+
+Theorem eval_shrlu: binary_constructor_sound (shrlu hf) Val.shrlu.
+Proof.
+  unfold shrlu; red; intros.
+  destruct (is_intconst b) as [n|] eqn:IC.
+- (* Immediate *)
+  exploit is_intconst_sound; eauto. intros EQ; subst y; clear H0.
+  eapply eval_shrluimm; eauto.
+- (* General case *)
+  econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto.
+Qed.
+
+Lemma eval_shrlimm:
+  forall n,
+  unary_constructor_sound (fun e => shrlimm hf e n) (fun v => Val.shrl v (Vint n)).
+Proof.
+  unfold shrlimm; red; intros. apply eval_shift_imm; intros.
+  + (* n = 0 *)
+    subst n. exists x; split; auto. destruct x; simpl; auto. 
+    change (Int64.shr' i Int.zero) with (Int64.shr i Int64.zero). 
+    rewrite Int64.shr_zero. auto.
+  + (* 0 < n < 32 *)
+    apply eval_splitlong with (sem := fun x => Val.shrl x (Vint n)); auto.
+    intros.
+    exploit eval_shruimm. eexact H5. instantiate (1 := n). intros [v1 [A1 B1]].
+    exploit eval_shrimm. eexact H4. instantiate (1 := n). intros [v2 [A2 B2]].
+    exploit eval_shlimm. eexact H4. instantiate (1 := Int.sub Int.iwordsize n). intros [v3 [A3 B3]].
+    exploit eval_or. eexact A1. eexact A3. intros [v4 [A4 B4]].
+    econstructor; split. EvalOp. 
+    intros. subst. simpl in *. rewrite H1 in *. rewrite H2 in *. rewrite H3. 
+    inv B1; inv B2; inv B3. simpl in B4. inv B4.
+    simpl. rewrite Int64.decompose_shr_1; auto.
+    destruct x; auto.
+  + (* 32 <= n < 64 *)
+    exploit eval_highlong. eexact H. intros [v1 [A1 B1]].
+    assert (eval_expr ge sp e m (v1 :: le) (Eletvar 0) v1) by EvalOp.
+    exploit eval_shrimm. eexact H2. instantiate (1 := Int.sub n Int.iwordsize). intros [v2 [A2 B2]].
+    exploit eval_shrimm. eexact H2. instantiate (1 := Int.repr 31). intros [v3 [A3 B3]].
+    econstructor; split. EvalOp.
+    destruct x; simpl; auto.
+    destruct (Int.ltu n Int64.iwordsize'); auto. 
+    simpl in B1; inv B1. simpl in B2. rewrite H1 in B2. inv B2.
+    simpl in B3. inv B3.
+    change (Int.ltu (Int.repr 31) Int.iwordsize) with true. simpl.
+    erewrite <- Int64.decompose_shr_2. instantiate (1 := Int64.loword i). 
+    rewrite Int64.ofwords_recompose. auto. auto.
+  + (* n >= 64 *)
+    econstructor; split. eapply eval_helper_2; eauto. EvalOp. UseHelper. auto.
+Qed.
+
+Theorem eval_shrl: binary_constructor_sound (shrl hf) Val.shrl.
+Proof.
+  unfold shrl; red; intros.
+  destruct (is_intconst b) as [n|] eqn:IC.
+- (* Immediate *)
+  exploit is_intconst_sound; eauto. intros EQ; subst y; clear H0.
+  eapply eval_shrlimm; eauto.
+- (* General case *)
+  econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto.
+Qed.
+
+Theorem eval_addl: binary_constructor_sound (addl hf) Val.addl.
+Proof.
+  unfold addl; red; intros.
+  set (default := Ebuiltin (EF_builtin (i64_add hf) sig_ll_l) (a ::: b ::: Enil)).
+  assert (DEFAULT:
+    exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.addl x y) v).
+  {
+    econstructor; split. eapply eval_builtin_2; eauto. UseHelper. auto. 
+  }
+  destruct (is_longconst a) as [p|] eqn:LC1;
+  destruct (is_longconst b) as [q|] eqn:LC2.
+- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. 
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  econstructor; split. apply eval_longconst. simpl; auto.
+- predSpec Int64.eq Int64.eq_spec p Int64.zero; auto.
+  subst p. exploit (is_longconst_sound le a); eauto. intros EQ; subst x. 
+  exists y; split; auto. simpl. destruct y; auto. rewrite Int64.add_zero_l; auto.
+- predSpec Int64.eq Int64.eq_spec q Int64.zero; auto.
+  subst q. exploit (is_longconst_sound le b); eauto. intros EQ; subst y. 
+  exists x; split; auto. destruct x; simpl; auto. rewrite Int64.add_zero; auto.
+- auto.
+Qed.
+
+Theorem eval_subl: binary_constructor_sound (subl hf) Val.subl.
+Proof.
+  unfold subl; red; intros.
+  set (default := Ebuiltin (EF_builtin (i64_sub hf) sig_ll_l) (a ::: b ::: Enil)).
+  assert (DEFAULT:
+    exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.subl x y) v).
+  {
+    econstructor; split. eapply eval_builtin_2; eauto. UseHelper. auto. 
+  }
+  destruct (is_longconst a) as [p|] eqn:LC1;
+  destruct (is_longconst b) as [q|] eqn:LC2.
+- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. 
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  econstructor; split. apply eval_longconst. simpl; auto.
+- predSpec Int64.eq Int64.eq_spec p Int64.zero; auto.
+  replace (Val.subl x y) with (Val.negl y). eapply eval_negl; eauto.
+  subst p. exploit (is_longconst_sound le a); eauto. intros EQ; subst x.
+  destruct y; simpl; auto. 
+- predSpec Int64.eq Int64.eq_spec q Int64.zero; auto.
+  subst q. exploit (is_longconst_sound le b); eauto. intros EQ; subst y. 
+  exists x; split; auto. destruct x; simpl; auto. rewrite Int64.sub_zero_l; auto.
+- auto.
+Qed.
+
+Lemma eval_mull_base: binary_constructor_sound (mull_base hf) Val.mull.
+Proof.
+  unfold mull_base; red; intros. apply eval_splitlong2; auto.
+- intros. 
+  set (p := Val.mull' x2 y2). set (le1 := p :: le0).
+  assert (E1: eval_expr ge sp e m le1 (Eop Olowlong (Eletvar O ::: Enil)) (Val.loword p)) by EvalOp.
+  assert (E2: eval_expr ge sp e m le1 (Eop Ohighlong (Eletvar O ::: Enil)) (Val.hiword p)) by EvalOp.
+  exploit eval_mul. apply eval_lift. eexact H2. apply eval_lift. eexact H3. 
+  instantiate (1 := p). fold le1. intros [v3 [E3 L3]].
+  exploit eval_mul. apply eval_lift. eexact H1. apply eval_lift. eexact H4. 
+  instantiate (1 := p). fold le1. intros [v4 [E4 L4]].
+  exploit eval_add. eexact E2. eexact E3. intros [v5 [E5 L5]].
+  exploit eval_add. eexact E5. eexact E4. intros [v6 [E6 L6]].
+  exists (Val.longofwords v6 (Val.loword p)); split.
+  EvalOp. eapply eval_builtin_2; eauto. UseHelper. 
+  intros. unfold le1, p in *; subst; simpl in *.
+  inv L3. inv L4. inv L5. simpl in L6. inv L6. 
+  simpl. f_equal. symmetry. apply Int64.decompose_mul. 
+- destruct x; auto; destruct y; auto. 
+Qed.
+
+Lemma eval_mullimm:
+  forall n, unary_constructor_sound (fun a => mullimm hf a n) (fun v => Val.mull v (Vlong n)).
+Proof.
+  unfold mullimm; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  subst n. econstructor; split. apply eval_longconst. 
+  destruct x; simpl; auto. rewrite Int64.mul_zero. auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.one.
+  subst n. exists x; split; auto. 
+  destruct x; simpl; auto. rewrite Int64.mul_one. auto.
+  destruct (Int64.is_power2 n) as [l|] eqn:P2. 
+  exploit eval_shllimm. eauto. instantiate (1 := Int.repr (Int64.unsigned l)).
+  intros [v [A B]].
+  exists v; split; auto. 
+  destruct x; simpl; auto. 
+  erewrite Int64.mul_pow2 by eauto.
+  assert (EQ: Int.unsigned (Int.repr (Int64.unsigned l)) = Int64.unsigned l).
+  { apply Int.unsigned_repr.
+    exploit Int64.is_power2_rng; eauto.
+    assert (Int64.zwordsize < Int.max_unsigned) by (compute; auto). 
+    omega.
+  }
+  simpl in B. 
+  replace (Int.ltu (Int.repr (Int64.unsigned l)) Int64.iwordsize')
+     with (Int64.ltu l Int64.iwordsize) in B.
+  erewrite Int64.is_power2_range in B by eauto. 
+  unfold Int64.shl' in B. rewrite EQ in B. auto. 
+  unfold Int64.ltu, Int.ltu. rewrite EQ. auto.
+  apply eval_mull_base; auto. apply eval_longconst. 
+Qed.
+
+Theorem eval_mull: binary_constructor_sound (mull hf) Val.mull.
+Proof.
+  unfold mull; red; intros.
+  destruct (is_longconst a) as [p|] eqn:LC1;
+  destruct (is_longconst b) as [q|] eqn:LC2.
+- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. 
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  econstructor; split. apply eval_longconst. simpl; auto.
+- exploit (is_longconst_sound le a); eauto. intros EQ; subst x.
+  replace (Val.mull (Vlong p) y) with (Val.mull y (Vlong p)) in *.
+  eapply eval_mullimm; eauto. 
+  destruct y; simpl; auto. rewrite Int64.mul_commut; auto. 
+- exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  eapply eval_mullimm; eauto.
+- apply eval_mull_base; auto.
+Qed.
+
+Lemma eval_binop_long:
+  forall id sem le a b x y z,
+  (forall p q, x = Vlong p -> y = Vlong q -> z = Vlong (sem p q)) ->
+  helper_implements ge id sig_ll_l (x::y::nil) z ->
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (binop_long id sem a b) v /\ Val.lessdef z v.
+Proof.
+  intros. unfold binop_long. 
+  destruct (is_longconst a) as [p|] eqn:LC1. 
+  destruct (is_longconst b) as [q|] eqn:LC2.
+  exploit is_longconst_sound. eexact LC1. eauto. intros EQ; subst x.
+  exploit is_longconst_sound. eexact LC2. eauto. intros EQ; subst y.
+  econstructor; split. EvalOp. erewrite H by eauto. rewrite Int64.ofwords_recompose. auto. 
+  econstructor; split. eapply eval_helper_2; eauto. auto.
+  econstructor; split. eapply eval_helper_2; eauto. auto.
+Qed.
+
+Theorem eval_divl:
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.divls x y = Some z ->
+  exists v, eval_expr ge sp e m le (divl hf a b) v /\ Val.lessdef z v.
+Proof.
+  intros. eapply eval_binop_long; eauto. 
+  intros; subst; simpl in H1.
+  destruct (Int64.eq q Int64.zero
+         || Int64.eq p (Int64.repr Int64.min_signed) && Int64.eq q Int64.mone); inv H1. 
+  auto.
+  UseHelper. 
+Qed.
+
+Theorem eval_modl:
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.modls x y = Some z ->
+  exists v, eval_expr ge sp e m le (modl hf a b) v /\ Val.lessdef z v.
+Proof.
+  intros. eapply eval_binop_long; eauto. 
+  intros; subst; simpl in H1.
+  destruct (Int64.eq q Int64.zero
+         || Int64.eq p (Int64.repr Int64.min_signed) && Int64.eq q Int64.mone); inv H1. 
+  auto.
+  UseHelper. 
+Qed.
+
+Theorem eval_divlu:
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.divlu x y = Some z ->
+  exists v, eval_expr ge sp e m le (divlu hf a b) v /\ Val.lessdef z v.
+Proof.
+  intros. unfold divlu. 
+  set (default := Eexternal (i64_udiv hf) sig_ll_l (a ::: b ::: Enil)).
+  assert (DEFAULT:
+    exists v, eval_expr ge sp e m le default v /\ Val.lessdef z v).
+  {
+    econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. 
+  }
+  destruct (is_longconst a) as [p|] eqn:LC1;
+  destruct (is_longconst b) as [q|] eqn:LC2.
+- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. 
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  econstructor; split. apply eval_longconst.
+  simpl in H1. destruct (Int64.eq q Int64.zero); inv H1. auto. 
+- auto.
+- destruct (Int64.is_power2 q) as [l|] eqn:P2; auto.
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  replace z with (Val.shrlu x (Vint (Int.repr (Int64.unsigned l)))).
+  apply eval_shrluimm. auto.
+  destruct x; simpl in H1; try discriminate. 
+  destruct (Int64.eq q Int64.zero); inv H1. 
+  simpl. 
+  assert (EQ: Int.unsigned (Int.repr (Int64.unsigned l)) = Int64.unsigned l).
+  { apply Int.unsigned_repr.
+    exploit Int64.is_power2_rng; eauto.
+    assert (Int64.zwordsize < Int.max_unsigned) by (compute; auto). 
+    omega.
+  }
+  replace (Int.ltu (Int.repr (Int64.unsigned l)) Int64.iwordsize')
+     with (Int64.ltu l Int64.iwordsize).
+  erewrite Int64.is_power2_range by eauto.
+  erewrite Int64.divu_pow2 by eauto. 
+  unfold Int64.shru', Int64.shru. rewrite EQ. auto. 
+  unfold Int64.ltu, Int.ltu. rewrite EQ. auto.
+- auto.
+Qed.
+
+Theorem eval_modlu:
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.modlu x y = Some z ->
+  exists v, eval_expr ge sp e m le (modlu hf a b) v /\ Val.lessdef z v.
+Proof.
+  intros. unfold modlu. 
+  set (default := Eexternal (i64_umod hf) sig_ll_l (a ::: b ::: Enil)).
+  assert (DEFAULT:
+    exists v, eval_expr ge sp e m le default v /\ Val.lessdef z v).
+  {
+    econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. 
+  }
+  destruct (is_longconst a) as [p|] eqn:LC1;
+  destruct (is_longconst b) as [q|] eqn:LC2.
+- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. 
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  econstructor; split. apply eval_longconst.
+  simpl in H1. destruct (Int64.eq q Int64.zero); inv H1. auto. 
+- auto.
+- destruct (Int64.is_power2 q) as [l|] eqn:P2; auto.
+  exploit (is_longconst_sound le b); eauto. intros EQ; subst y.
+  replace z with (Val.andl x (Vlong (Int64.sub q Int64.one))).
+  apply eval_andl. auto. apply eval_longconst. 
+  destruct x; simpl in H1; try discriminate. 
+  destruct (Int64.eq q Int64.zero); inv H1. 
+  simpl. 
+  erewrite Int64.modu_and by eauto. auto.
+- auto.
+Qed.
+
+Remark decompose_cmpl_eq_zero:
+  forall h l,
+  Int64.eq (Int64.ofwords h l) Int64.zero = Int.eq (Int.or h l) Int.zero.
+Proof.
+  intros. 
+  assert (Int64.zwordsize = Int.zwordsize * 2) by reflexivity.
+  predSpec Int64.eq Int64.eq_spec (Int64.ofwords h l) Int64.zero.
+  replace (Int.or h l) with Int.zero. rewrite Int.eq_true. auto.
+  apply Int.same_bits_eq; intros. 
+  rewrite Int.bits_zero. rewrite Int.bits_or by auto. 
+  symmetry. apply orb_false_intro. 
+  transitivity (Int64.testbit (Int64.ofwords h l) (i + Int.zwordsize)).
+  rewrite Int64.bits_ofwords by omega. rewrite zlt_false by omega. f_equal; omega.
+  rewrite H0. apply Int64.bits_zero. 
+  transitivity (Int64.testbit (Int64.ofwords h l) i).
+  rewrite Int64.bits_ofwords by omega. rewrite zlt_true by omega. auto. 
+  rewrite H0. apply Int64.bits_zero.
+  symmetry. apply Int.eq_false. red; intros; elim H0. 
+  apply Int64.same_bits_eq; intros. 
+  rewrite Int64.bits_zero. rewrite Int64.bits_ofwords by auto. 
+  destruct (zlt i Int.zwordsize).
+  assert (Int.testbit (Int.or h l) i = false) by (rewrite H1; apply Int.bits_zero). 
+  rewrite Int.bits_or in H3 by omega. exploit orb_false_elim; eauto. tauto. 
+  assert (Int.testbit (Int.or h l) (i - Int.zwordsize) = false) by (rewrite H1; apply Int.bits_zero). 
+  rewrite Int.bits_or in H3 by omega. exploit orb_false_elim; eauto. tauto.
+Qed.
+
+Lemma eval_cmpl_eq_zero:
+  unary_constructor_sound cmpl_eq_zero (fun v => Val.cmpl Ceq v (Vlong Int64.zero)).
+Proof.
+  red; intros. unfold cmpl_eq_zero. 
+  apply eval_splitlong with (sem := fun x => Val.cmpl Ceq x (Vlong Int64.zero)); auto.
+- intros. 
+  exploit eval_or. eexact H0. eexact H1. intros [v1 [A1 B1]].
+  exploit eval_comp. eexact A1. instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp.
+  instantiate (1 := Ceq). intros [v2 [A2 B2]].
+  exists v2; split. auto. intros; subst. 
+  simpl in B1; inv B1. unfold Val.cmp in B2; simpl in B2.
+  unfold Val.cmpl; simpl. rewrite decompose_cmpl_eq_zero. 
+  destruct (Int.eq (Int.or p q)); inv B2; auto.
+- destruct x; auto.
+Qed.
+
+Lemma eval_cmpl_ne_zero:
+  unary_constructor_sound cmpl_ne_zero (fun v => Val.cmpl Cne v (Vlong Int64.zero)).
+Proof.
+  red; intros. unfold cmpl_eq_zero. 
+  apply eval_splitlong with (sem := fun x => Val.cmpl Cne x (Vlong Int64.zero)); auto.
+- intros. 
+  exploit eval_or. eexact H0. eexact H1. intros [v1 [A1 B1]].
+  exploit eval_comp. eexact A1. instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp.
+  instantiate (1 := Cne). intros [v2 [A2 B2]].
+  exists v2; split. auto. intros; subst. 
+  simpl in B1; inv B1. unfold Val.cmp in B2; simpl in B2.
+  unfold Val.cmpl; simpl. rewrite decompose_cmpl_eq_zero. 
+  destruct (Int.eq (Int.or p q)); inv B2; auto.
+- destruct x; auto.
+Qed.
+
+Remark int64_eq_xor:
+  forall p q, Int64.eq p q = Int64.eq (Int64.xor p q) Int64.zero.
+Proof.
+  intros.
+  predSpec Int64.eq Int64.eq_spec p q.
+  subst q. rewrite Int64.xor_idem. rewrite Int64.eq_true. auto.
+  predSpec Int64.eq Int64.eq_spec (Int64.xor p q) Int64.zero.
+  elim H. apply Int64.xor_zero_equal; auto.
+  auto.
+Qed.
+
+Theorem eval_cmplu: forall c, binary_constructor_sound (cmplu hf c) (Val.cmplu c).
+Proof.
+  intros; red; intros. unfold cmplu. 
+  set (default := comp c (Eexternal (i64_ucmp hf) sig_ll_i (a ::: b ::: Enil))
+                         (Eop (Ointconst Int.zero) Enil)).
+  assert (DEFAULT:
+    exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.cmplu c x y) v).
+  {
+    assert (HELP: exists z, helper_implements ge hf.(i64_ucmp) sig_ll_i (x::y::nil) z
+                            /\ Val.cmplu c x y = Val.cmp c z Vzero)
+    by UseHelper.
+    destruct HELP as [z [A B]].
+    exploit eval_comp. eapply eval_helper_2. eexact H. eexact H0. eauto. 
+    instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. 
+    instantiate (1 := c). fold Vzero. intros [v [C D]].
+    econstructor; split; eauto. congruence.
+  }
+  destruct c; auto.
+- (* Ceq *)
+  destruct (is_longconst_zero b) eqn:LC.
++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0.
+  apply eval_cmpl_eq_zero; auto.
++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]].
+  exploit eval_cmpl_eq_zero. eexact A. intros [v' [C D]].
+  exists v'; split; auto.
+  eapply Val.lessdef_trans; [idtac|eexact D]. 
+  destruct x; auto. destruct y; auto. simpl in B; inv B. 
+  unfold Val.cmplu, Val.cmpl; simpl. rewrite int64_eq_xor; auto.
+- (* Cne *)
+  destruct (is_longconst_zero b) eqn:LC.
++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0.
+  apply eval_cmpl_ne_zero; auto.
++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]].
+  exploit eval_cmpl_ne_zero. eexact A. intros [v' [C D]].
+  exists v'; split; auto.
+  eapply Val.lessdef_trans; [idtac|eexact D]. 
+  destruct x; auto. destruct y; auto. simpl in B; inv B. 
+  unfold Val.cmplu, Val.cmpl; simpl. rewrite int64_eq_xor; auto.
+Qed.
+
+Remark decompose_cmpl_lt_zero:
+  forall h l,
+  Int64.lt (Int64.ofwords h l) Int64.zero = Int.lt h Int.zero.
+Proof.
+  intros. 
+  generalize (Int64.shru_lt_zero (Int64.ofwords h l)).
+  change (Int64.shru (Int64.ofwords h l) (Int64.repr (Int64.zwordsize - 1)))
+    with (Int64.shru' (Int64.ofwords h l) (Int.repr 63)).
+  rewrite Int64.decompose_shru_2. 
+  change (Int.sub (Int.repr 63) Int.iwordsize)
+    with (Int.repr (Int.zwordsize - 1)). 
+  rewrite Int.shru_lt_zero. 
+  destruct (Int64.lt (Int64.ofwords h l) Int64.zero); destruct (Int.lt h Int.zero); auto; intros.
+  elim Int64.one_not_zero. auto. 
+  elim Int64.one_not_zero. auto.
+  vm_compute. intuition congruence.
+Qed.
+
+Theorem eval_cmpl: forall c, binary_constructor_sound (cmpl hf c) (Val.cmpl c).
+Proof.
+  intros; red; intros. unfold cmpl. 
+  set (default := comp c (Eexternal (i64_scmp hf) sig_ll_i (a ::: b ::: Enil))
+                         (Eop (Ointconst Int.zero) Enil)).
+  assert (DEFAULT:
+    exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.cmpl c x y) v).
+  {
+    assert (HELP: exists z, helper_implements ge hf.(i64_scmp) sig_ll_i (x::y::nil) z
+                            /\ Val.cmpl c x y = Val.cmp c z Vzero)
+    by UseHelper.
+    destruct HELP as [z [A B]].
+    exploit eval_comp. eapply eval_helper_2. eexact H. eexact H0. eauto. 
+    instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. 
+    instantiate (1 := c). fold Vzero. intros [v [C D]].
+    econstructor; split; eauto. congruence.
+  }
+  destruct c; auto.
+- (* Ceq *)
+  destruct (is_longconst_zero b) eqn:LC.
++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0.
+  apply eval_cmpl_eq_zero; auto.
++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]].
+  exploit eval_cmpl_eq_zero. eexact A. intros [v' [C D]].
+  exists v'; split; auto.
+  eapply Val.lessdef_trans; [idtac|eexact D]. 
+  destruct x; auto. destruct y; auto. simpl in B; inv B. 
+  unfold Val.cmpl; simpl. rewrite int64_eq_xor; auto.
+- (* Cne *)
+  destruct (is_longconst_zero b) eqn:LC.
++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0.
+  apply eval_cmpl_ne_zero; auto.
++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]].
+  exploit eval_cmpl_ne_zero. eexact A. intros [v' [C D]].
+  exists v'; split; auto.
+  eapply Val.lessdef_trans; [idtac|eexact D]. 
+  destruct x; auto. destruct y; auto. simpl in B; inv B. 
+  unfold Val.cmpl; simpl. rewrite int64_eq_xor; auto.
+- (* Clt *)
+  destruct (is_longconst_zero b) eqn:LC.
++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0.
+  exploit eval_highlong. eexact H. intros [v1 [A1 B1]].
+  exploit eval_comp. eexact A1. 
+  instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp.
+  instantiate (1 := Clt). intros [v2 [A2 B2]].
+  econstructor; split. eauto. 
+  destruct x; simpl in *; auto. inv B1.
+  unfold Val.cmp, Val.cmp_bool, Val.of_optbool, Int.cmp in B2.
+  unfold Val.cmpl, Val.cmpl_bool, Val.of_optbool, Int64.cmp.
+  rewrite <- (Int64.ofwords_recompose i). rewrite decompose_cmpl_lt_zero.
+  auto. 
++ apply DEFAULT.
+- (* Cge *)
+  destruct (is_longconst_zero b) eqn:LC.
++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0.
+  exploit eval_highlong. eexact H. intros [v1 [A1 B1]].
+  exploit eval_comp. eexact A1. 
+  instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp.
+  instantiate (1 := Cge). intros [v2 [A2 B2]].
+  econstructor; split. eauto. 
+  destruct x; simpl in *; auto. inv B1.
+  unfold Val.cmp, Val.cmp_bool, Val.of_optbool, Int.cmp in B2.
+  unfold Val.cmpl, Val.cmpl_bool, Val.of_optbool, Int64.cmp.
+  rewrite <- (Int64.ofwords_recompose i). rewrite decompose_cmpl_lt_zero.
+  auto. 
++ apply DEFAULT.
+Qed.
+
+End CMCONSTR.
+
diff --git a/backend/Selection.v b/backend/Selection.v
index 29bdabc..7964feb 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -24,12 +24,14 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Errors.
 Require Import Integers.
 Require Import Globalenvs.
 Require Cminor.
 Require Import Op.
 Require Import CminorSel.
 Require Import SelectOp.
+Require Import SelectLong.
 
 Open Local Scope cminorsel_scope.
 
@@ -55,12 +57,17 @@ Definition store (chunk: memory_chunk) (e1 e2: expr) :=
 
 (** Instruction selection for operator applications.  Most of the work
     is done by the processor-specific smart constructors defined
-    in module [SelectOp]. *)
+    in modules [SelectOp] and [SelectLong]. *)
+
+Section SELECTION.
+
+Variable hf: helper_functions.
 
 Definition sel_constant (cst: Cminor.constant) : expr :=
   match cst with
   | Cminor.Ointconst n => Eop (Ointconst n) Enil
   | Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil
+  | Cminor.Olongconst n => longconst n
   | Cminor.Oaddrsymbol id ofs => addrsymbol id ofs
   | Cminor.Oaddrstack ofs => addrstack ofs
   end.
@@ -80,6 +87,15 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
   | Cminor.Ointuoffloat => intuoffloat arg
   | Cminor.Ofloatofint => floatofint arg
   | Cminor.Ofloatofintu => floatofintu arg
+  | Cminor.Onegl => negl hf arg
+  | Cminor.Onotl => notl arg
+  | Cminor.Ointoflong => intoflong arg
+  | Cminor.Olongofint => longofint arg
+  | Cminor.Olongofintu => longofintu arg
+  | Cminor.Olongoffloat => longoffloat hf arg
+  | Cminor.Olonguoffloat => longuoffloat hf arg
+  | Cminor.Ofloatoflong => floatoflong hf arg
+  | Cminor.Ofloatoflongu => floatoflongu hf arg
   end.
 
 Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
@@ -101,9 +117,24 @@ Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   | Cminor.Osubf => subf arg1 arg2
   | Cminor.Omulf => mulf arg1 arg2
   | Cminor.Odivf => divf arg1 arg2
+  | Cminor.Oaddl => addl hf arg1 arg2
+  | Cminor.Osubl => subl hf arg1 arg2
+  | Cminor.Omull => mull hf arg1 arg2
+  | Cminor.Odivl => divl hf arg1 arg2
+  | Cminor.Odivlu => divlu hf arg1 arg2
+  | Cminor.Omodl => modl hf arg1 arg2
+  | Cminor.Omodlu => modlu hf arg1 arg2
+  | Cminor.Oandl => andl arg1 arg2
+  | Cminor.Oorl => orl arg1 arg2
+  | Cminor.Oxorl => xorl arg1 arg2
+  | Cminor.Oshll => shll hf arg1 arg2
+  | Cminor.Oshrl => shrl hf arg1 arg2
+  | Cminor.Oshrlu => shrlu hf arg1 arg2
   | Cminor.Ocmp c => comp c arg1 arg2
   | Cminor.Ocmpu c => compu c arg1 arg2
   | Cminor.Ocmpf c => compf c arg1 arg2
+  | Cminor.Ocmpl c => cmpl hf c arg1 arg2
+  | Cminor.Ocmplu c => cmplu hf c arg1 arg2
   end.
 
 (** Conversion from Cminor expression to Cminorsel expressions *)
@@ -186,22 +217,26 @@ Fixpoint sel_stmt (ge: Cminor.genv) (s: Cminor.stmt) : stmt :=
   | Cminor.Sgoto lbl => Sgoto lbl
   end.
 
-(** Conversion of functions and programs. *)
+End SELECTION.
+
+(** Conversion of functions. *)
 
-Definition sel_function (ge: Cminor.genv) (f: Cminor.function) : function :=
+Definition sel_function (hf: helper_functions) (ge: Cminor.genv) (f: Cminor.function) : function :=
   mkfunction
     f.(Cminor.fn_sig)
     f.(Cminor.fn_params)
     f.(Cminor.fn_vars)
     f.(Cminor.fn_stackspace)
-    (sel_stmt ge f.(Cminor.fn_body)).
+    (sel_stmt hf ge f.(Cminor.fn_body)).
 
-Definition sel_fundef (ge: Cminor.genv) (f: Cminor.fundef) : fundef :=
-  transf_fundef (sel_function ge) f.
+Definition sel_fundef (hf: helper_functions) (ge: Cminor.genv) (f: Cminor.fundef) : fundef :=
+  transf_fundef (sel_function hf ge) f.
 
-Definition sel_program (p: Cminor.program) : program :=
-  let ge := Genv.globalenv p in
-  transform_program (sel_fundef ge) p.
+(** Conversion of programs. *)
 
+Local Open Scope error_monad_scope.
 
+Definition sel_program (p: Cminor.program) : res program :=
+  let ge := Genv.globalenv p in
+  do hf <- get_helpers ge; OK (transform_program (sel_fundef hf ge) p).
 
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 0269438..525a29d 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -15,6 +15,7 @@
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
+Require Import Errors.
 Require Import Integers.
 Require Import Values.
 Require Import Memory.
@@ -25,27 +26,114 @@ Require Import Cminor.
 Require Import Op.
 Require Import CminorSel.
 Require Import SelectOp.
+Require Import SelectLong.
 Require Import Selection.
 Require Import SelectOpproof.
+Require Import SelectLongproof.
 
 Open Local Scope cminorsel_scope.
 
+
 (** * Correctness of the instruction selection functions for expressions *)
 
+Section PRESERVATION.
+
+Variable prog: Cminor.program.
+Let ge := Genv.globalenv prog.
+Variable hf: helper_functions.
+Let tprog := transform_program (sel_fundef hf ge) prog.
+Let tge := Genv.globalenv tprog.
+Hypothesis HELPERS: i64_helpers_correct tge hf.
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  intros; unfold ge, tge, tprog. apply Genv.find_symbol_transf.
+Qed.
+
+Lemma function_ptr_translated:
+  forall (b: block) (f: Cminor.fundef),
+  Genv.find_funct_ptr ge b = Some f ->
+  Genv.find_funct_ptr tge b = Some (sel_fundef hf ge f).
+Proof.  
+  intros. 
+  exact (Genv.find_funct_ptr_transf (sel_fundef hf ge) _ _ H).
+Qed.
+
+Lemma functions_translated:
+  forall (v v': val) (f: Cminor.fundef),
+  Genv.find_funct ge v = Some f ->
+  Val.lessdef v v' ->
+  Genv.find_funct tge v' = Some (sel_fundef hf ge f).
+Proof.  
+  intros. inv H0.
+  exact (Genv.find_funct_transf (sel_fundef hf ge) _ _ H).
+  simpl in H. discriminate.
+Qed.
+
+Lemma sig_function_translated:
+  forall f,
+  funsig (sel_fundef hf ge f) = Cminor.funsig f.
+Proof.
+  intros. destruct f; reflexivity.
+Qed.
+
+Lemma varinfo_preserved:
+  forall b, Genv.find_var_info tge b = Genv.find_var_info ge b.
+Proof.
+  intros; unfold ge, tge, tprog, sel_program. 
+  apply Genv.find_var_info_transf.
+Qed.
+
+Lemma helper_implements_preserved:
+  forall id sg vargs vres,
+  helper_implements ge id sg vargs vres ->
+  helper_implements tge id sg vargs vres.
+Proof.
+  intros. destruct H as (b & ef & A & B & C & D).
+  exploit function_ptr_translated; eauto. simpl. intros. 
+  exists b; exists ef. 
+  split. rewrite symbols_preserved. auto.
+  split. auto.
+  split. auto.
+  intros. eapply external_call_symbols_preserved; eauto. 
+  exact symbols_preserved. exact varinfo_preserved.
+Qed.
+
+Lemma builtin_implements_preserved:
+  forall id sg vargs vres,
+  builtin_implements ge id sg vargs vres ->
+  builtin_implements tge id sg vargs vres.
+Proof.
+  unfold builtin_implements; intros.
+  eapply external_call_symbols_preserved; eauto. 
+  exact symbols_preserved. exact varinfo_preserved.
+Qed.
+
+Lemma helpers_correct_preserved:
+  forall h, i64_helpers_correct ge h -> i64_helpers_correct tge h.
+Proof.
+  unfold i64_helpers_correct; intros.
+  repeat (match goal with [ H: _ /\ _ |- _ /\ _ ] => destruct H; split end);
+  intros; try (eapply helper_implements_preserved; eauto);
+  try (eapply builtin_implements_preserved; eauto).
+  exploit H14; eauto. intros [z [A B]]. exists z; split; eauto. eapply helper_implements_preserved; eauto.
+  exploit H15; eauto. intros [z [A B]]. exists z; split; eauto. eapply helper_implements_preserved; eauto.
+Qed.
+
 Section CMCONSTR.
 
-Variable ge: genv.
 Variable sp: val.
 Variable e: env.
 Variable m: mem.
 
 Lemma eval_condition_of_expr:
   forall le a v b,
-  eval_expr ge sp e m le a v ->
+  eval_expr tge sp e m le a v ->
   Val.bool_of_val v b ->
   match condition_of_expr a with (cond, args) =>
     exists vl,
-    eval_exprlist ge sp e m le args vl /\
+    eval_exprlist tge sp e m le args vl /\
     eval_condition cond vl m = Some b
   end.
 Proof.
@@ -60,9 +148,9 @@ Qed.
 
 Lemma eval_load:
   forall le a v chunk v',
-  eval_expr ge sp e m le a v ->
+  eval_expr tge sp e m le a v ->
   Mem.loadv chunk m v = Some v' ->
-  eval_expr ge sp e m le (load chunk a) v'.
+  eval_expr tge sp e m le (load chunk a) v'.
 Proof.
   intros. generalize H0; destruct v; simpl; intro; try discriminate.
   unfold load. 
@@ -73,11 +161,11 @@ Qed.
 
 Lemma eval_store:
   forall chunk a1 a2 v1 v2 f k m',
-  eval_expr ge sp e m nil a1 v1 ->
-  eval_expr ge sp e m nil a2 v2 ->
+  eval_expr tge sp e m nil a1 v1 ->
+  eval_expr tge sp e m nil a2 v2 ->
   Mem.storev chunk m v1 v2 = Some m' ->
-  step ge (State f (store chunk a1 a2) k sp e m)
-       E0 (State f Sskip k sp e m').
+  step tge (State f (store chunk a1 a2) k sp e m)
+        E0 (State f Sskip k sp e m').
 Proof.
   intros. generalize H1; destruct v1; simpl; intro; try discriminate.
   unfold store.
@@ -90,9 +178,9 @@ Qed.
 
 Lemma eval_sel_unop:
   forall le op a1 v1 v,
-  eval_expr ge sp e m le a1 v1 ->
+  eval_expr tge sp e m le a1 v1 ->
   eval_unop op v1 = Some v ->
-  exists v', eval_expr ge sp e m le (sel_unop op a1) v' /\ Val.lessdef v v'.
+  exists v', eval_expr tge sp e m le (sel_unop hf op a1) v' /\ Val.lessdef v v'.
 Proof.
   destruct op; simpl; intros; FuncInv; try subst v.
   apply eval_cast8unsigned; auto.
@@ -108,14 +196,23 @@ Proof.
   eapply eval_intuoffloat; eauto.
   eapply eval_floatofint; eauto.
   eapply eval_floatofintu; eauto.
+  eapply eval_negl; eauto.
+  eapply eval_notl; eauto.
+  eapply eval_intoflong; eauto.
+  eapply eval_longofint; eauto.
+  eapply eval_longofintu; eauto.
+  eapply eval_longoffloat; eauto.
+  eapply eval_longuoffloat; eauto.
+  eapply eval_floatoflong; eauto.
+  eapply eval_floatoflongu; eauto.
 Qed.
 
 Lemma eval_sel_binop:
   forall le op a1 a2 v1 v2 v,
-  eval_expr ge sp e m le a1 v1 ->
-  eval_expr ge sp e m le a2 v2 ->
+  eval_expr tge sp e m le a1 v1 ->
+  eval_expr tge sp e m le a2 v2 ->
   eval_binop op v1 v2 m = Some v ->
-  exists v', eval_expr ge sp e m le (sel_binop op a1 a2) v' /\ Val.lessdef v v'.
+  exists v', eval_expr tge sp e m le (sel_binop hf op a1 a2) v' /\ Val.lessdef v v'.
 Proof.
   destruct op; simpl; intros; FuncInv; try subst v.
   apply eval_add; auto.
@@ -135,9 +232,24 @@ Proof.
   apply eval_subf; auto.
   apply eval_mulf; auto.
   apply eval_divf; auto.
+  eapply eval_addl; eauto.
+  eapply eval_subl; eauto.
+  eapply eval_mull; eauto.
+  eapply eval_divl; eauto.
+  eapply eval_divlu; eauto.
+  eapply eval_modl; eauto.
+  eapply eval_modlu; eauto.
+  eapply eval_andl; eauto.
+  eapply eval_orl; eauto.
+  eapply eval_xorl; eauto.
+  eapply eval_shll; eauto.
+  eapply eval_shrl; eauto.
+  eapply eval_shrlu; eauto.
   apply eval_comp; auto.
   apply eval_compu; auto.
   apply eval_compf; auto.
+  apply eval_cmpl; auto.
+  apply eval_cmplu; auto.
 Qed.
 
 End CMCONSTR.
@@ -156,7 +268,7 @@ Proof.
 Qed.
 
 Lemma classify_call_correct:
-  forall ge sp e m a v fd,
+  forall sp e m a v fd,
   Cminor.eval_expr ge sp e m a v ->
   Genv.find_funct ge v = Some fd ->
   match classify_call ge a with
@@ -215,57 +327,6 @@ Qed.
 
 (** * Semantic preservation for instruction selection. *)
 
-Section PRESERVATION.
-
-Variable prog: Cminor.program.
-Let tprog := sel_program prog.
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-(** Relationship between the global environments for the original
-  Cminor program and the generated CminorSel program. *)
-
-Lemma symbols_preserved:
-  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof.
-  intros; unfold ge, tge, tprog, sel_program. 
-  apply Genv.find_symbol_transf.
-Qed.
-
-Lemma function_ptr_translated:
-  forall (b: block) (f: Cminor.fundef),
-  Genv.find_funct_ptr ge b = Some f ->
-  Genv.find_funct_ptr tge b = Some (sel_fundef ge f).
-Proof.  
-  intros. 
-  exact (Genv.find_funct_ptr_transf (sel_fundef ge) _ _ H).
-Qed.
-
-Lemma functions_translated:
-  forall (v v': val) (f: Cminor.fundef),
-  Genv.find_funct ge v = Some f ->
-  Val.lessdef v v' ->
-  Genv.find_funct tge v' = Some (sel_fundef ge f).
-Proof.  
-  intros. inv H0.
-  exact (Genv.find_funct_transf (sel_fundef ge) _ _ H).
-  simpl in H. discriminate.
-Qed.
-
-Lemma sig_function_translated:
-  forall f,
-  funsig (sel_fundef ge f) = Cminor.funsig f.
-Proof.
-  intros. destruct f; reflexivity.
-Qed.
-
-Lemma varinfo_preserved:
-  forall b, Genv.find_var_info tge b = Genv.find_var_info ge b.
-Proof.
-  intros; unfold ge, tge, tprog, sel_program. 
-  apply Genv.find_var_info_transf.
-Qed.
-
 (** Relationship between the local environments. *)
 
 Definition env_lessdef (e1 e2: env) : Prop :=
@@ -305,7 +366,7 @@ Lemma sel_expr_correct:
   Cminor.eval_expr ge sp e m a v ->
   forall e' le m',
   env_lessdef e e' -> Mem.extends m m' ->
-  exists v', eval_expr tge sp e' m' le (sel_expr a) v' /\ Val.lessdef v v'.
+  exists v', eval_expr tge sp e' m' le (sel_expr hf a) v' /\ Val.lessdef v v'.
 Proof.
   induction 1; intros; simpl.
   (* Evar *)
@@ -314,6 +375,9 @@ Proof.
   destruct cst; simpl in *; inv H. 
   exists (Vint i); split; auto. econstructor. constructor. auto. 
   exists (Vfloat f); split; auto. econstructor. constructor. auto.
+  exists (Val.longofwords (Vint (Int64.hiword i)) (Vint (Int64.loword i))); split.
+  eapply eval_Eop. constructor. EvalOp. simpl; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
+  simpl. rewrite Int64.ofwords_recompose. auto.
   rewrite <- symbols_preserved. fold (symbol_address tge i i0). apply eval_addrsymbol.
   apply eval_addrstack.
   (* Eunop *)
@@ -338,7 +402,7 @@ Lemma sel_exprlist_correct:
   Cminor.eval_exprlist ge sp e m a v ->
   forall e' le m',
   env_lessdef e e' -> Mem.extends m m' ->
-  exists v', eval_exprlist tge sp e' m' le (sel_exprlist a) v' /\ Val.lessdef_list v v'.
+  exists v', eval_exprlist tge sp e' m' le (sel_exprlist hf a) v' /\ Val.lessdef_list v v'.
 Proof.
   induction 1; intros; simpl. 
   exists (@nil val); split; auto. constructor.
@@ -354,30 +418,30 @@ Inductive match_cont: Cminor.cont -> CminorSel.cont -> Prop :=
       match_cont Cminor.Kstop Kstop
   | match_cont_seq: forall s k k',
       match_cont k k' ->
-      match_cont (Cminor.Kseq s k) (Kseq (sel_stmt ge s) k')
+      match_cont (Cminor.Kseq s k) (Kseq (sel_stmt hf ge s) k')
   | match_cont_block: forall k k',
       match_cont k k' ->
       match_cont (Cminor.Kblock k) (Kblock k')
   | match_cont_call: forall id f sp e k e' k',
       match_cont k k' -> env_lessdef e e' ->
-      match_cont (Cminor.Kcall id f sp e k) (Kcall id (sel_function ge f) sp e' k').
+      match_cont (Cminor.Kcall id f sp e k) (Kcall id (sel_function hf ge f) sp e' k').
 
 Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
   | match_state: forall f s k s' k' sp e m e' m',
-      s' = sel_stmt ge s ->
+      s' = sel_stmt hf ge s ->
       match_cont k k' ->
       env_lessdef e e' ->
       Mem.extends m m' ->
       match_states
         (Cminor.State f s k sp e m)
-        (State (sel_function ge f) s' k' sp e' m')
+        (State (sel_function hf ge f) s' k' sp e' m')
   | match_callstate: forall f args args' k k' m m',
       match_cont k k' ->
       Val.lessdef_list args args' ->
       Mem.extends m m' ->
       match_states
         (Cminor.Callstate f args k m)
-        (Callstate (sel_fundef ge f) args' k' m')
+        (Callstate (sel_fundef hf ge f) args' k' m')
   | match_returnstate: forall v v' k k' m m',
       match_cont k k' ->
       Val.lessdef v v' ->
@@ -393,7 +457,7 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
       eval_exprlist tge sp e' m' nil al args' ->
       match_states
         (Cminor.Callstate (External ef) args (Cminor.Kcall optid f sp e k) m)
-        (State (sel_function ge f) (Sbuiltin optid ef al) k' sp e' m')
+        (State (sel_function hf ge f) (Sbuiltin optid ef al) k' sp e' m')
   | match_builtin_2: forall v v' optid f sp e k m e' m' k',
       match_cont k k' ->
       Val.lessdef v v' ->
@@ -401,7 +465,7 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
       Mem.extends m m' ->
       match_states
         (Cminor.Returnstate v (Cminor.Kcall optid f sp e k) m)
-        (State (sel_function ge f) Sskip k' sp (set_optvar optid v' e') m').
+        (State (sel_function hf ge f) Sskip k' sp (set_optvar optid v' e') m').
 
 Remark call_cont_commut:
   forall k k', match_cont k k' -> match_cont (Cminor.call_cont k) (call_cont k').
@@ -412,15 +476,15 @@ Qed.
 Remark find_label_commut:
   forall lbl s k k',
   match_cont k k' ->
-  match Cminor.find_label lbl s k, find_label lbl (sel_stmt ge s) k' with
+  match Cminor.find_label lbl s k, find_label lbl (sel_stmt hf ge s) k' with
   | None, None => True
-  | Some(s1, k1), Some(s1', k1') => s1' = sel_stmt ge s1 /\ match_cont k1 k1'
+  | Some(s1, k1), Some(s1', k1') => s1' = sel_stmt hf ge s1 /\ match_cont k1 k1'
   | _, _ => False
   end.
 Proof.
   induction s; intros; simpl; auto.
 (* store *)
-  unfold store. destruct (addressing m (sel_expr e)); simpl; auto.
+  unfold store. destruct (addressing m (sel_expr hf e)); simpl; auto.
 (* call *)
   destruct (classify_call ge e); simpl; auto.
 (* tailcall *)
@@ -428,13 +492,13 @@ Proof.
 (* seq *)
   exploit (IHs1 (Cminor.Kseq s2 k)). constructor; eauto. 
   destruct (Cminor.find_label lbl s1 (Cminor.Kseq s2 k)) as [[sx kx] | ];
-  destruct (find_label lbl (sel_stmt ge s1) (Kseq (sel_stmt ge s2) k')) as [[sy ky] | ];
+  destruct (find_label lbl (sel_stmt hf ge s1) (Kseq (sel_stmt hf ge s2) k')) as [[sy ky] | ];
   intuition. apply IHs2; auto.
 (* ifthenelse *)
-  destruct (condition_of_expr (sel_expr e)) as [cond args]. simpl.
+  destruct (condition_of_expr (sel_expr hf e)) as [cond args]. simpl.
   exploit (IHs1 k); eauto. 
   destruct (Cminor.find_label lbl s1 k) as [[sx kx] | ];
-  destruct (find_label lbl (sel_stmt ge s1) k') as [[sy ky] | ];
+  destruct (find_label lbl (sel_stmt hf ge s1) k') as [[sy ky] | ];
   intuition. apply IHs2; auto.
 (* loop *)
   apply IHs. constructor; auto.
@@ -531,9 +595,9 @@ Proof.
   exploit sel_expr_correct; eauto. intros [v' [A B]].
   assert (Val.bool_of_val v' b). inv B. auto. inv H0.
   exploit eval_condition_of_expr; eauto.
-  destruct (condition_of_expr (sel_expr a)) as [cond args].
+  destruct (condition_of_expr (sel_expr hf a)) as [cond args].
   intros [vl' [C D]].
-  left; exists (State (sel_function ge f) (if b then sel_stmt ge s1 else sel_stmt ge s2) k' sp e' m'); split.
+  left; exists (State (sel_function hf ge f) (if b then sel_stmt hf ge s1 else sel_stmt hf ge s2) k' sp e' m'); split.
   econstructor; eauto. 
   constructor; auto. destruct b; auto.
   (* Sloop *)
@@ -566,7 +630,7 @@ Proof.
   exploit (find_label_commut lbl (Cminor.fn_body f) (Cminor.call_cont k)).
     apply call_cont_commut; eauto.
   rewrite H. 
-  destruct (find_label lbl (sel_stmt ge (Cminor.fn_body f)) (call_cont k'0))
+  destruct (find_label lbl (sel_stmt hf ge (Cminor.fn_body f)) (call_cont k'0))
   as [[s'' k'']|] eqn:?; intros; try contradiction.
   destruct H0. 
   left; econstructor; split.
@@ -623,14 +687,22 @@ Proof.
   intros. inv H0. inv H. inv H3. inv H5. constructor.
 Qed.
 
+End PRESERVATION.
+
+Axiom get_helpers_correct:
+  forall ge hf, get_helpers ge = OK hf -> i64_helpers_correct ge hf.
+
 Theorem transf_program_correct:
+  forall prog tprog,
+  sel_program prog = OK tprog ->
   forward_simulation (Cminor.semantics prog) (CminorSel.semantics tprog).
 Proof.
+  intros. unfold sel_program in H. 
+  destruct (get_helpers (Genv.globalenv prog)) as [hf|] eqn:E; simpl in H; try discriminate.
+  inv H.
   eapply forward_simulation_opt.
-  eexact symbols_preserved.
-  eexact sel_initial_states.
-  eexact sel_final_states.
-  eexact sel_step_correct. 
+  apply symbols_preserved.
+  apply sel_initial_states.
+  apply sel_final_states.
+  apply sel_step_correct. apply helpers_correct_preserved. apply get_helpers_correct. auto.
 Qed.
-
-End PRESERVATION.
diff --git a/backend/Splitting.ml b/backend/Splitting.ml
new file mode 100644
index 0000000..60f6818
--- /dev/null
+++ b/backend/Splitting.ml
@@ -0,0 +1,184 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Live range splitting over RTL *)
+
+open Camlcoq
+open Datatypes
+open Coqlib
+open Maps
+open AST
+open Kildall
+open Registers
+open RTL
+
+(* We represent live ranges by the following unification variables.
+   Live range inference manipulates only variable live ranges.
+   Code rewriting assigns fresh RTL registers to live ranges. *)
+
+type live_range = { source: reg; mutable kind: live_range_kind }
+
+and live_range_kind =
+  | Link of live_range
+  | Var
+  | Reg of reg
+
+let new_range r = { source = r; kind = Var }
+
+let rec repr lr =
+  match lr.kind with
+  | Link lr' -> let lr'' = repr lr' in lr.kind <- Link lr''; lr''
+  | _ -> lr
+
+let same_range lr1 lr2 = 
+  lr1 == lr2 || (* quick test for speed *) 
+  repr lr1 == repr lr2 (* the real test *) 
+
+let unify lr1 lr2 =
+  let lr1 = repr lr1 and lr2 = repr lr2 in
+  if lr1 != lr2 then begin
+    match lr1.kind, lr2.kind with
+    | Var, _ -> lr1.kind <- Link lr2
+    | _, Var -> lr2.kind <- Link lr1
+    | _, _   -> assert false
+  end
+
+let reg_for lr =
+  let lr = repr lr in
+  match lr.kind with
+  | Link _ -> assert false
+  | Reg r -> r
+  | Var -> let r = XTL.new_reg() in lr.kind <- Reg r; r
+
+(* Live range inference is a variant on liveness analysis.
+   At each PC and for each register, liveness analysis determines
+   whether the reg is live or not.  Live range inference associates
+   a live range to the reg if it is live, and no live range if it
+   is dead. *)
+
+module LRMap = struct
+
+  type t = live_range PTree.t   (* live register -> live range *)
+
+  let beq m1 m2 = PTree.beq same_range m1 m2
+
+  let bot : t = PTree.empty
+
+  let lub_opt_range olr1 olr2 =
+    match olr1, olr2 with
+    | Some lr1, Some lr2 -> unify lr1 lr2; olr1
+    | Some _, None -> olr1
+    | None, _ -> olr2
+
+  let lub m1 m2 =
+    PTree.combine lub_opt_range m1 m2
+
+end
+
+module Solver = Backward_Dataflow_Solver(LRMap)(NodeSetBackward)
+
+(* A cache of live ranges associated to (pc, used reg) pairs. *)
+
+let live_range_cache =
+  (Hashtbl.create 123 : (int32 * int32, live_range) Hashtbl.t)
+
+let live_range_for pc r =
+  let pc' = P.to_int32 pc
+  and r' = P.to_int32 r in
+  try
+    Hashtbl.find live_range_cache (pc',r')
+  with Not_found ->
+    let lr = new_range r in
+    Hashtbl.add live_range_cache (pc', r') lr;
+    lr
+
+(* The transfer function *)
+
+let reg_live pc r map =
+  match PTree.get r map with
+  | Some lr -> map                                     (* already live *)
+  | None    -> PTree.set r (live_range_for pc r) map   (* becomes live *)
+
+let reg_list_live pc rl map = List.fold_right (reg_live pc) rl map
+
+let reg_dead r map =
+  PTree.remove r map
+
+let transfer f pc after =
+  match PTree.get pc f.fn_code with
+  | None ->
+      LRMap.bot
+  | Some i ->
+      let across =
+        match instr_defs i with
+        | None -> after
+        | Some r -> reg_dead r after in
+      reg_list_live pc (instr_uses i) across
+
+(* The live range analysis *)
+
+let analysis f = Solver.fixpoint (successors f) (transfer f) []
+
+(* Produce renamed registers for each instruction. *)
+
+let ren_reg map r =
+  match PTree.get r map with
+  | Some lr -> reg_for lr
+  | None -> XTL.new_reg()
+
+let ren_regs map rl =
+  List.map (ren_reg map) rl
+
+let ren_ros map ros =
+  sum_left_map (ren_reg map) ros
+
+(* Rename in an instruction *)
+
+let ren_instr f maps pc i =
+  let after = PMap.get pc maps in
+  let before = transfer f pc after in
+  match i with
+  | Inop s -> Inop s
+  | Iop(op, args, res, s) ->
+      Iop(op, ren_regs before args, ren_reg after res, s)
+  | Iload(chunk, addr, args, dst, s) ->
+      Iload(chunk, addr, ren_regs before args, ren_reg after dst, s)
+  | Istore(chunk, addr, args, src, s) ->
+      Istore(chunk, addr, ren_regs before args, ren_reg before src, s)
+  | Icall(sg, ros, args, res, s) ->
+      Icall(sg, ren_ros before ros, ren_regs before args, ren_reg after res, s)
+  | Itailcall(sg, ros, args) ->
+      Itailcall(sg, ren_ros before ros, ren_regs before args)
+  | Ibuiltin(ef, args, res, s) ->
+      Ibuiltin(ef, ren_regs before args, ren_reg after res, s)
+  | Icond(cond, args, s1, s2) ->
+      Icond(cond, ren_regs before args, s1, s2)
+  | Ijumptable(arg, tbl) ->
+      Ijumptable(ren_reg before arg, tbl)
+  | Ireturn optarg ->
+      Ireturn(option_map (ren_reg before) optarg)
+
+(* Rename live ranges in a function *)
+
+let rename_function f =
+  Hashtbl.clear live_range_cache;
+  let maps =
+    match analysis f with
+    | None -> assert false
+    | Some maps -> maps in
+  let before_entrypoint =
+    transfer f f.fn_entrypoint (PMap.get f.fn_entrypoint maps) in
+  { fn_sig = f.fn_sig;
+    fn_params = ren_regs before_entrypoint f.fn_params;
+    fn_stacksize = f.fn_stacksize;
+    fn_code = PTree.map (ren_instr f maps) f.fn_code;
+    fn_entrypoint = f.fn_entrypoint }
diff --git a/backend/Stacking.v b/backend/Stacking.v
index 03e882e..f7c16d1 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -23,6 +23,7 @@ Require Import Bounds.
 Require Import Mach.
 Require Import Conventions.
 Require Import Stacklayout.
+Require Import Lineartyping.
 
 (** * Layout of activation records *)
 
@@ -44,8 +45,7 @@ Definition offset_of_index (fe: frame_env) (idx: frame_index) :=
   match idx with
   | FI_link => fe.(fe_ofs_link)
   | FI_retaddr => fe.(fe_ofs_retaddr)
-  | FI_local x Tint => fe.(fe_ofs_int_local) + 4 * x
-  | FI_local x Tfloat => fe.(fe_ofs_float_local) + 8 * x
+  | FI_local x ty => fe.(fe_ofs_local) + 4 * x
   | FI_arg x ty => fe_ofs_arg + 4 * x
   | FI_saved_int x => fe.(fe_ofs_int_callee_save) + 4 * x
   | FI_saved_float x => fe.(fe_ofs_float_callee_save) + 8 * x
@@ -133,8 +133,8 @@ Definition transl_addr (fe: frame_env) (addr: addressing) :=
 Definition transl_annot_param (fe: frame_env) (l: loc) : annot_param :=
   match l with
   | R r => APreg r
-  | S (Local ofs ty) => APstack (chunk_of_type ty) (offset_of_index fe (FI_local ofs ty))
-  | S _ => APstack Mint32 (-1)     (**r never happens *)
+  | S Local ofs ty => APstack (chunk_of_type ty) (offset_of_index fe (FI_local ofs ty))
+  | S _ _ _ => APstack Mint32 (-1)     (**r never happens *)
   end.
 
 
@@ -150,22 +150,22 @@ Definition transl_annot_param (fe: frame_env) (l: loc) : annot_param :=
 Definition transl_instr
     (fe: frame_env) (i: Linear.instruction) (k: Mach.code) : Mach.code :=
   match i with
-  | Lgetstack s r =>
-      match s with
-      | Local ofs ty =>
+  | Lgetstack sl ofs ty r =>
+      match sl with
+      | Local =>
           Mgetstack (Int.repr (offset_of_index fe (FI_local ofs ty))) ty r :: k
-      | Incoming ofs ty =>
+      | Incoming =>
           Mgetparam (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty r :: k
-      | Outgoing ofs ty =>
+      | Outgoing =>
           Mgetstack (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty r :: k
       end
-  | Lsetstack r s =>
-      match s with
-      | Local ofs ty =>
+  | Lsetstack r sl ofs ty =>
+      match sl with
+      | Local =>
           Msetstack r (Int.repr (offset_of_index fe (FI_local ofs ty))) ty :: k
-      | Incoming ofs ty =>
+      | Incoming =>
           k (* should not happen *)
-      | Outgoing ofs ty =>
+      | Outgoing =>
           Msetstack r (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty :: k
       end
   | Lop op args res =>
@@ -216,7 +216,9 @@ Open Local Scope string_scope.
 
 Definition transf_function (f: Linear.function) : res Mach.function :=
   let fe := make_env (function_bounds f) in
-  if zlt Int.max_unsigned fe.(fe_size) then
+  if negb (wt_function f) then
+    Error (msg "Ill-formed Linear code")
+  else if zlt Int.max_unsigned fe.(fe_size) then
     Error (msg "Too many spilled variables, stack size exceeded")
   else
     OK (Mach.mkfunction
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index a73f0aa..1808402 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -25,7 +25,7 @@ Require Import Events.
 Require Import Globalenvs.
 Require Import Smallstep.
 Require Import Locations.
-Require LTL.
+Require Import LTL.
 Require Import Linear.
 Require Import Lineartyping.
 Require Import Mach.
@@ -83,17 +83,28 @@ Lemma unfold_transf_function:
          (Int.repr fe.(fe_ofs_retaddr)).
 Proof.
   generalize TRANSF_F. unfold transf_function.
+  destruct (wt_function f); simpl negb.
   destruct (zlt Int.max_unsigned (fe_size (make_env (function_bounds f)))).
   intros; discriminate.
   intros. unfold fe. unfold b. congruence.
+  intros; discriminate.
+Qed.
+
+Lemma transf_function_well_typed:
+  wt_function f = true.
+Proof.
+  generalize TRANSF_F. unfold transf_function.
+  destruct (wt_function f); simpl negb. auto. intros; discriminate.
 Qed.
 
 Lemma size_no_overflow: fe.(fe_size) <= Int.max_unsigned.
 Proof.
   generalize TRANSF_F. unfold transf_function.
+  destruct (wt_function f); simpl negb.
   destruct (zlt Int.max_unsigned (fe_size (make_env (function_bounds f)))).
   intros; discriminate.
   intros. unfold fe. unfold b. omega.
+  intros; discriminate.
 Qed.
 
 Remark bound_stack_data_stacksize:
@@ -109,9 +120,8 @@ Definition index_valid (idx: frame_index) :=
   match idx with
   | FI_link => True
   | FI_retaddr => True
-  | FI_local x Tint => 0 <= x < b.(bound_int_local)
-  | FI_local x Tfloat => 0 <= x < b.(bound_float_local)
-  | FI_arg x ty => 0 <= x /\ x + typesize ty <= b.(bound_outgoing)
+  | FI_local x ty => ty <> Tlong /\ 0 <= x /\ x + typesize ty <= b.(bound_local)
+  | FI_arg x ty => ty <> Tlong /\ 0 <= x /\ x + typesize ty <= b.(bound_outgoing)
   | FI_saved_int x => 0 <= x < b.(bound_int_callee_save)
   | FI_saved_float x => 0 <= x < b.(bound_float_callee_save)
   end.
@@ -134,7 +144,7 @@ Definition index_diff (idx1 idx2: frame_index) : Prop :=
   | FI_link, FI_link => False
   | FI_retaddr, FI_retaddr => False
   | FI_local x1 ty1, FI_local x2 ty2 =>
-      x1 <> x2 \/ ty1 <> ty2
+      x1 + typesize ty1 <= x2 \/ x2 + typesize ty2 <= x1
   | FI_arg x1 ty1, FI_arg x2 ty2 =>
       x1 + typesize ty1 <= x2 \/ x2 + typesize ty2 <= x1
   | FI_saved_int x1, FI_saved_int x2 => x1 <> x2
@@ -150,8 +160,7 @@ Proof.
 Qed.
 
 Ltac AddPosProps :=
-  generalize (bound_int_local_pos b); intro;
-  generalize (bound_float_local_pos b); intro;
+  generalize (bound_local_pos b); intro;
   generalize (bound_int_callee_save_pos b); intro;
   generalize (bound_float_callee_save_pos b); intro;
   generalize (bound_outgoing_pos b); intro;
@@ -166,6 +175,12 @@ Qed.
 
 Opaque function_bounds.
 
+Ltac InvIndexValid :=
+  match goal with
+  | [ H: ?ty <> Tlong /\ _ |- _ ] =>
+       destruct H; generalize (typesize_pos ty) (typesize_typesize ty); intros
+  end.
+
 Lemma offset_of_index_disj:
   forall idx1 idx2,
   index_valid idx1 -> index_valid idx2 ->
@@ -177,12 +192,11 @@ Proof.
   generalize (frame_env_separated b). intuition. fold fe in H.
   AddPosProps.
   destruct idx1; destruct idx2;
-  try (destruct t); try (destruct t0);
-  unfold offset_of_index, type_of_index, AST.typesize;
-  simpl in V1; simpl in V2; simpl in DIFF;
-  try omega.
-  assert (z <> z0). intuition auto. omega.
-  assert (z <> z0). intuition auto. omega.
+  simpl in V1; simpl in V2; repeat InvIndexValid; simpl in DIFF;
+  unfold offset_of_index, type_of_index;
+  change (AST.typesize Tint) with 4;
+  change (AST.typesize Tfloat) with 8;
+  omega.
 Qed.
 
 Lemma offset_of_index_disj_stack_data_1:
@@ -194,9 +208,11 @@ Proof.
   intros idx V.
   generalize (frame_env_separated b). intuition. fold fe in H.
   AddPosProps.
-  destruct idx; try (destruct t);
-  unfold offset_of_index, type_of_index, AST.typesize;
-  simpl in V;
+  destruct idx;
+  simpl in V; repeat InvIndexValid;
+  unfold offset_of_index, type_of_index;
+  change (AST.typesize Tint) with 4;
+  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -250,18 +266,22 @@ Qed.
 
 Lemma index_local_valid:
   forall ofs ty,
-  slot_within_bounds f b (Local ofs ty) ->
+  slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
   index_valid (FI_local ofs ty).
 Proof.
-  unfold slot_within_bounds, index_valid. auto.
+  unfold slot_within_bounds, slot_valid, index_valid; intros. 
+  InvBooleans. 
+  split. destruct ty; congruence. auto.
 Qed.
 
 Lemma index_arg_valid:
   forall ofs ty,
-  slot_within_bounds f b (Outgoing ofs ty) ->
+  slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
   index_valid (FI_arg ofs ty).
 Proof.
-  unfold slot_within_bounds, index_valid. auto.
+  unfold slot_within_bounds, slot_valid, index_valid; intros. 
+  InvBooleans. 
+  split. destruct ty; congruence. auto.
 Qed.
 
 Lemma index_saved_int_valid:
@@ -300,9 +320,10 @@ Proof.
   intros idx V.
   generalize (frame_env_separated b). intros [A B]. fold fe in A. fold fe in B.
   AddPosProps.
-  destruct idx; try (destruct t);
-  unfold offset_of_index, type_of_index, AST.typesize;
-  simpl in V;
+  destruct idx; simpl in V; repeat InvIndexValid;
+  unfold offset_of_index, type_of_index;
+  change (AST.typesize Tint) with 4;
+  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -459,7 +480,9 @@ Proof.
   apply offset_of_index_perm; auto.
   replace (align_chunk (chunk_of_type (type_of_index idx))) with 4.
   apply offset_of_index_aligned; auto.
-  destruct (type_of_index idx); auto.
+  assert (type_of_index idx <> Tlong).
+    destruct idx; simpl in *; tauto || congruence.
+  destruct (type_of_index idx); reflexivity || congruence. 
   exists m'; auto.
 Qed.
 
@@ -539,7 +562,10 @@ Proof.
   apply Mem.range_perm_implies with Freeable; auto with mem.
   apply offset_of_index_perm; auto. 
   replace (align_chunk (chunk_of_type (type_of_index idx))) with 4. 
-  apply offset_of_index_aligned. destruct (type_of_index idx); auto.
+  apply offset_of_index_aligned.
+  assert (type_of_index idx <> Tlong).
+    destruct idx; simpl in *; tauto || congruence.
+  destruct (type_of_index idx); reflexivity || congruence. 
   intros [v C]. 
   exists v; split; auto. constructor; auto. 
 Qed.
@@ -570,19 +596,19 @@ Record agree_frame (j: meminj) (ls ls0: locset)
         at the corresponding offsets. *)
     agree_locals:
       forall ofs ty, 
-      slot_within_bounds f b (Local ofs ty) ->
-      index_contains_inj j m' sp' (FI_local ofs ty) (ls (S (Local ofs ty)));
+      slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
+      index_contains_inj j m' sp' (FI_local ofs ty) (ls (S Local ofs ty));
     agree_outgoing:
       forall ofs ty, 
-      slot_within_bounds f b (Outgoing ofs ty) ->
-      index_contains_inj j m' sp' (FI_arg ofs ty) (ls (S (Outgoing ofs ty)));
+      slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
+      index_contains_inj j m' sp' (FI_arg ofs ty) (ls (S Outgoing ofs ty));
 
     (** Incoming stack slots have the same value as the
         corresponding Outgoing stack slots in the caller *)
     agree_incoming:
        forall ofs ty, 
-       In (S (Incoming ofs ty)) (loc_parameters f.(Linear.fn_sig)) ->
-       ls (S (Incoming ofs ty)) = ls0 (S (Outgoing ofs ty));
+       In (S Incoming ofs ty) (loc_parameters f.(Linear.fn_sig)) ->
+       ls (S Incoming ofs ty) = ls0 (S Outgoing ofs ty);
 
     (** The back link and return address slots of the Mach frame contain
         the [parent] and [retaddr] values, respectively. *)
@@ -640,8 +666,8 @@ Hint Resolve agree_unused_reg agree_locals agree_outgoing agree_incoming
 Definition agree_callee_save (ls ls0: locset) : Prop :=
   forall l,
   match l with
-  | R r => In r int_callee_save_regs \/ In r float_callee_save_regs
-  | S s => True
+  | R r => ~In r destroyed_at_call
+  | S _ _ _ => True
   end ->
   ls l = ls0 l.
 
@@ -680,6 +706,18 @@ Proof.
   rewrite Locmap.gso; auto. red. auto.
 Qed.
 
+Lemma agree_regs_set_regs:
+  forall j rl vl vl' ls rs,
+  agree_regs j ls rs ->
+  val_list_inject j vl vl' ->
+  agree_regs j (Locmap.setlist (map R rl) vl ls) (set_regs rl vl' rs).
+Proof.
+  induction rl; simpl; intros. 
+  auto.
+  inv H0. auto.
+  apply IHrl; auto. apply agree_regs_set_reg; auto. 
+Qed.
+
 Lemma agree_regs_exten:
   forall j ls rs ls' rs',
   agree_regs j ls rs ->
@@ -692,52 +730,24 @@ Proof.
   rewrite A; rewrite B; auto.
 Qed.
 
-Lemma agree_regs_undef_list:
+Lemma agree_regs_undef_regs:
   forall j rl ls rs,
   agree_regs j ls rs ->
-  agree_regs j (Locmap.undef (List.map R rl) ls) (undef_regs rl rs).
+  agree_regs j (LTL.undef_regs rl ls) (Mach.undef_regs rl rs).
 Proof.
   induction rl; simpl; intros.
-  auto. 
-  apply IHrl. apply agree_regs_set_reg; auto. 
-Qed.
-
-Lemma agree_regs_undef_temps:
-  forall j ls rs,
-  agree_regs j ls rs ->
-  agree_regs j (LTL.undef_temps ls) (undef_temps rs).
-Proof.
-  unfold LTL.undef_temps, undef_temps, temporaries.
-  intros; apply agree_regs_undef_list; auto.
-Qed.
-
-Lemma agree_regs_undef_setstack:
-  forall j ls rs,
-  agree_regs j ls rs ->
-  agree_regs j (Linear.undef_setstack ls) (undef_setstack rs).
-Proof.
-  intros. unfold Linear.undef_setstack, undef_setstack, destroyed_at_move.
-  apply agree_regs_undef_list; auto.
-Qed.
-
-Lemma agree_regs_undef_op:
-  forall op j ls rs,
-  agree_regs j ls rs ->
-  agree_regs j (Linear.undef_op op ls) (undef_op (transl_op fe op) rs).
-Proof.
-  intros. generalize (agree_regs_undef_temps _ _ _ H); intro A.
-Opaque destroyed_at_move_regs.
-  destruct op; auto; simpl; apply agree_regs_undef_setstack; auto.
+  auto.
+  apply agree_regs_set_reg; auto. 
 Qed.
 
 (** Preservation under assignment of stack slot *)
 
 Lemma agree_regs_set_slot:
-  forall j ls rs ss v,
+  forall j ls rs sl ofs ty v,
   agree_regs j ls rs ->
-  agree_regs j (Locmap.set (S ss) v ls) rs.
+  agree_regs j (Locmap.set (S sl ofs ty) v ls) rs.
 Proof.
-  intros; red; intros. rewrite Locmap.gso; auto. red. destruct ss; auto.
+  intros; red; intros. rewrite Locmap.gso; auto. red. auto.
 Qed.
 
 (** Preservation by increasing memory injections *)
@@ -754,16 +764,10 @@ Qed.
 Lemma agree_regs_call_regs:
   forall j ls rs,
   agree_regs j ls rs ->
-  agree_regs j (call_regs ls) (undef_temps rs).
+  agree_regs j (call_regs ls) rs.
 Proof.
-  intros. 
-  assert (agree_regs j (LTL.undef_temps ls) (undef_temps rs)).
-    apply agree_regs_undef_temps; auto.
-  unfold call_regs; intros; red; intros.
-  destruct (in_dec Loc.eq (R r) temporaries).
-  auto.
-  generalize (H0 r). unfold LTL.undef_temps. rewrite Locmap.guo. auto.
-  apply Loc.reg_notin; auto.
+  intros.
+  unfold call_regs; intros; red; intros; auto.
 Qed.
 
 (** ** Properties of [agree_frame] *)
@@ -782,62 +786,49 @@ Proof.
   apply wt_setloc; auto.
 Qed.
 
-Remark temporary_within_bounds:
-  forall r, In (R r) temporaries -> mreg_within_bounds b r.
+Lemma agree_frame_set_regs:
+  forall j ls0 m sp m' sp' parent ra rl vl ls,
+  agree_frame j ls ls0 m sp m' sp' parent ra ->
+  (forall r, In r rl -> mreg_within_bounds b r) ->
+  Val.has_type_list vl (map Loc.type (map R rl)) ->
+  agree_frame j (Locmap.setlist (map R rl) vl ls) ls0 m sp m' sp' parent ra.
 Proof.
-  intros; red. destruct (mreg_type r). 
-  destruct (zlt (index_int_callee_save r) 0). 
-  generalize (bound_int_callee_save_pos b). omega.
-  exploit int_callee_save_not_destroyed. 
-  left. eauto with coqlib. apply index_int_callee_save_pos2; auto.
-  contradiction.
-  destruct (zlt (index_float_callee_save r) 0). 
-  generalize (bound_float_callee_save_pos b). omega.
-  exploit float_callee_save_not_destroyed. 
-  left. eauto with coqlib. apply index_float_callee_save_pos2; auto.
-  contradiction.
+  induction rl; destruct vl; simpl; intros; intuition.
+  apply IHrl; auto. 
+  eapply agree_frame_set_reg; eauto. 
 Qed.
 
-Lemma agree_frame_undef_locs:
+Lemma agree_frame_undef_regs:
   forall j ls0 m sp m' sp' parent ra regs ls,
   agree_frame j ls ls0 m sp m' sp' parent ra ->
-  incl (List.map R regs) temporaries ->
-  agree_frame j (Locmap.undef (List.map R regs) ls) ls0 m sp m' sp' parent ra.
+  (forall r, In r regs -> mreg_within_bounds b r) ->
+  agree_frame j (LTL.undef_regs regs ls) ls0 m sp m' sp' parent ra.
 Proof.
   induction regs; simpl; intros.
   auto.
-  apply IHregs; eauto with coqlib.
-  apply agree_frame_set_reg; auto. 
-  apply temporary_within_bounds; eauto with coqlib.
-  red; auto.
-Qed.
-
-Lemma agree_frame_undef_temps:
-  forall j ls ls0 m sp m' sp' parent ra,
-  agree_frame j ls ls0 m sp m' sp' parent ra ->
-  agree_frame j (LTL.undef_temps ls) ls0 m sp m' sp' parent ra.
-Proof.
-  intros. unfold temporaries. apply agree_frame_undef_locs; auto. apply incl_refl. 
+  apply agree_frame_set_reg; auto. red; auto.
 Qed.
 
-Lemma agree_frame_undef_setstack:
-  forall j ls ls0 m sp m' sp' parent ra,
-  agree_frame j ls ls0 m sp m' sp' parent ra ->
-  agree_frame j (Linear.undef_setstack ls) ls0 m sp m' sp' parent ra.
+Lemma caller_save_reg_within_bounds:
+  forall r,
+  In r destroyed_at_call -> mreg_within_bounds b r.
 Proof.
-  intros. unfold Linear.undef_setstack, destroyed_at_move. 
-  apply agree_frame_undef_locs; auto.
-  red; simpl; tauto.
+  intros. red.
+  destruct (zlt (index_int_callee_save r) 0).
+  destruct (zlt (index_float_callee_save r) 0).
+  generalize (bound_int_callee_save_pos b) (bound_float_callee_save_pos b); omega.
+  exfalso. eapply float_callee_save_not_destroyed; eauto. eapply index_float_callee_save_pos2; eauto.
+  exfalso. eapply int_callee_save_not_destroyed; eauto. eapply index_int_callee_save_pos2; eauto.
 Qed.
 
-Lemma agree_frame_undef_op:
-  forall j ls ls0 m sp m' sp' parent ra op,
+Lemma agree_frame_undef_locs:
+  forall j ls0 m sp m' sp' parent ra regs ls,
   agree_frame j ls ls0 m sp m' sp' parent ra ->
-  agree_frame j (Linear.undef_op op ls) ls0 m sp m' sp' parent ra.
+  incl regs destroyed_at_call ->
+  agree_frame j (LTL.undef_regs regs ls) ls0 m sp m' sp' parent ra.
 Proof.
-  intros. 
-  exploit agree_frame_undef_temps; eauto.
-  destruct op; simpl; auto; intros; apply agree_frame_undef_setstack; auto. 
+  intros. eapply agree_frame_undef_regs; eauto. 
+  intros. apply caller_save_reg_within_bounds. auto. 
 Qed.
 
 (** Preservation by assignment to local slot *)
@@ -845,31 +836,35 @@ Qed.
 Lemma agree_frame_set_local:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
-  slot_within_bounds f b (Local ofs ty) ->
+  slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
   val_inject j v v' ->
   Val.has_type v ty ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_local ofs ty)) v' = Some m'' ->
-  agree_frame j (Locmap.set (S (Local ofs ty)) v ls) ls0 m sp m'' sp' parent retaddr.
+  agree_frame j (Locmap.set (S Local ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H3.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H4.
   constructor; auto; intros.
 (* local *)
-  unfold Locmap.set. simpl. destruct (Loc.eq (S (Local ofs ty)) (S (Local ofs0 ty0))).
-  inv e. eapply gss_index_contains_inj; eauto. 
-  eapply gso_index_contains_inj. eauto. simpl; auto. eauto with stacking.
-  simpl. destruct (zeq ofs ofs0); auto. destruct (typ_eq ty ty0); auto. congruence.
+  unfold Locmap.set.
+  destruct (Loc.eq (S Local ofs ty) (S Local ofs0 ty0)).
+  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  destruct (Loc.diff_dec (S Local ofs ty) (S Local ofs0 ty0)).
+  eapply gso_index_contains_inj. eauto. eauto with stacking. eauto. 
+  simpl. simpl in d. intuition.
+  apply index_contains_inj_undef. auto with stacking.
+  red; intros. eapply Mem.perm_store_1; eauto.
 (* outgoing *)
   rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking.
-  simpl; auto. red; auto.
+  red; auto. red; left; congruence. 
 (* parent *)
-  eapply gso_index_contains; eauto. red; auto.
+  eapply gso_index_contains; eauto with stacking. red; auto.
 (* retaddr *)
-  eapply gso_index_contains; eauto. red; auto.
+  eapply gso_index_contains; eauto with stacking. red; auto.
 (* int callee save *)
-  eapply gso_index_contains_inj; eauto. simpl; auto. 
+  eapply gso_index_contains_inj; eauto with stacking. simpl; auto. 
 (* float callee save *)
-  eapply gso_index_contains_inj; eauto. simpl; auto.
+  eapply gso_index_contains_inj; eauto with stacking. simpl; auto.
 (* valid *)
   eauto with mem.
 (* perm *)
@@ -883,25 +878,26 @@ Qed.
 Lemma agree_frame_set_outgoing:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
-  slot_within_bounds f b (Outgoing ofs ty) ->
+  slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
   val_inject j v v' ->
   Val.has_type v ty ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_arg ofs ty)) v' = Some m'' ->
-  agree_frame j (Locmap.set (S (Outgoing ofs ty)) v ls) ls0 m sp m'' sp' parent retaddr.
+  agree_frame j (Locmap.set (S Outgoing ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H3.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H4.
   constructor; auto; intros.
 (* local *)
-  rewrite Locmap.gso. eapply gso_index_contains_inj; eauto. simpl; auto. red; auto.
+  rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. red; auto.
+  red; left; congruence.
 (* outgoing *)
-  unfold Locmap.set. simpl. destruct (Loc.eq (S (Outgoing ofs ty)) (S (Outgoing ofs0 ty0))).
-  inv e. eapply gss_index_contains_inj; eauto.
-  case_eq (Loc.overlap_aux ty ofs ofs0 || Loc.overlap_aux ty0 ofs0 ofs); intros.
-  apply index_contains_inj_undef. auto.
+  unfold Locmap.set. destruct (Loc.eq (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
+  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  destruct (Loc.diff_dec (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
+  eapply gso_index_contains_inj; eauto with stacking.
+  red. red in d. intuition. 
+  apply index_contains_inj_undef. auto with stacking.
   red; intros. eapply Mem.perm_store_1; eauto.
-  eapply gso_index_contains_inj; eauto.
-  red. eapply Loc.overlap_aux_false_1; eauto.
 (* parent *)
   eapply gso_index_contains; eauto with stacking. red; auto.
 (* retaddr *)
@@ -1038,18 +1034,6 @@ Qed.
 
 (** Preservation at return points (when [ls] is changed but not [ls0]). *)
 
-Remark mreg_not_within_bounds_callee_save:
-  forall r,
-  ~mreg_within_bounds b r -> In r int_callee_save_regs \/ In r float_callee_save_regs.
-Proof.
-  intro r; unfold mreg_within_bounds.
-  destruct (mreg_type r); intro.
-  left. apply index_int_callee_save_pos2. 
-  generalize (bound_int_callee_save_pos b). omega.
-  right. apply index_float_callee_save_pos2. 
-  generalize (bound_float_callee_save_pos b). omega.
-Qed.
-
 Lemma agree_frame_return:
   forall j ls ls0 m sp m' sp' parent retaddr ls',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
@@ -1058,7 +1042,7 @@ Lemma agree_frame_return:
   agree_frame j ls' ls0 m sp m' sp' parent retaddr.
 Proof.
   intros. red in H0. inv H; constructor; auto; intros.
-  rewrite H0; auto. apply mreg_not_within_bounds_callee_save; auto. 
+  rewrite H0; auto. red; intros; elim H. apply caller_save_reg_within_bounds; auto. 
   rewrite H0; auto.
   rewrite H0; auto.
   rewrite H0; auto.
@@ -1073,13 +1057,12 @@ Lemma agree_frame_tailcall:
   agree_frame j ls ls0' m sp m' sp' parent retaddr.
 Proof.
   intros. red in H0. inv H; constructor; auto; intros.
-  rewrite <- H0; auto. apply mreg_not_within_bounds_callee_save; auto. 
-  rewrite <- H0; auto.
-  rewrite <- H0; auto.
+  rewrite <- H0; auto. red; intros; elim H. apply caller_save_reg_within_bounds; auto. 
   rewrite <- H0; auto.
+  rewrite <- H0. auto. red; intros. eapply int_callee_save_not_destroyed; eauto. 
+  rewrite <- H0. auto. red; intros. eapply float_callee_save_not_destroyed; eauto. 
 Qed.
 
-
 (** Properties of [agree_callee_save]. *)
 
 Lemma agree_callee_save_return_regs:
@@ -1088,21 +1071,107 @@ Lemma agree_callee_save_return_regs:
 Proof.
   intros; red; intros.
   unfold return_regs. destruct l; auto.
-  generalize (int_callee_save_not_destroyed m); intro.
-  generalize (float_callee_save_not_destroyed m); intro.
-  destruct (In_dec Loc.eq (R m) temporaries). tauto.
-  destruct (In_dec Loc.eq (R m) destroyed_at_call). tauto.
-  auto.
+  rewrite pred_dec_false; auto. 
 Qed.
 
 Lemma agree_callee_save_set_result:
-  forall ls1 ls2 v sg,
+  forall sg vl ls1 ls2,
   agree_callee_save ls1 ls2 ->
-  agree_callee_save (Locmap.set (R (loc_result sg)) v ls1) ls2.
+  agree_callee_save (Locmap.setlist (map R (loc_result sg)) vl ls1) ls2.
+Proof.
+  intros sg. generalize (loc_result_caller_save sg).
+  generalize (loc_result sg).
+Opaque destroyed_at_call.
+  induction l; simpl; intros.
+  auto.
+  destruct vl; auto. 
+  apply IHl; auto.
+  red; intros. rewrite Locmap.gso. apply H0; auto. 
+  destruct l0; simpl; auto. 
+Qed.
+
+(** Properties of destroyed registers. *)
+
+Lemma check_mreg_list_incl:
+  forall l1 l2,
+  forallb (fun r => In_dec mreg_eq r l2) l1 = true ->
+  incl l1 l2.
 Proof.
-  intros; red; intros. rewrite <- H; auto. 
-  apply Locmap.gso. destruct l; simpl; auto.
-  red; intro. subst m. elim (loc_result_not_callee_save _ H0).
+  intros; red; intros. 
+  rewrite forallb_forall in H. eapply proj_sumbool_true. eauto. 
+Qed.
+
+Remark destroyed_by_op_caller_save:
+  forall op, incl (destroyed_by_op op) destroyed_at_call.
+Proof.
+  destruct op; apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark destroyed_by_load_caller_save:
+  forall chunk addr, incl (destroyed_by_load chunk addr) destroyed_at_call.
+Proof.
+  intros. destruct chunk; apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark destroyed_by_store_caller_save:
+  forall chunk addr, incl (destroyed_by_store chunk addr) destroyed_at_call.
+Proof.
+  intros. destruct chunk; apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark destroyed_by_cond_caller_save:
+  forall cond, incl (destroyed_by_cond cond) destroyed_at_call.
+Proof.
+  destruct cond; apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark destroyed_by_jumptable_caller_save:
+  incl destroyed_by_jumptable destroyed_at_call.
+Proof.
+  apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark destroyed_at_function_entry_caller_save:
+  incl destroyed_at_function_entry destroyed_at_call.
+Proof.
+  apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark destroyed_by_move_at_function_entry:
+  incl (destroyed_by_op Omove) destroyed_at_function_entry.
+Proof.
+  apply check_mreg_list_incl; compute; auto.
+Qed.
+
+Remark temp_for_parent_frame_caller_save:
+  In temp_for_parent_frame destroyed_at_call.
+Proof.
+  Transparent temp_for_parent_frame.
+  Transparent destroyed_at_call.
+  unfold temp_for_parent_frame; simpl; tauto.
+Qed.
+
+Hint Resolve destroyed_by_op_caller_save destroyed_by_load_caller_save
+    destroyed_by_store_caller_save
+    destroyed_by_cond_caller_save destroyed_by_jumptable_caller_save
+    destroyed_at_function_entry_caller_save: stacking.
+
+Remark transl_destroyed_by_op:
+  forall op e, destroyed_by_op (transl_op e op) = destroyed_by_op op.
+Proof.
+  intros; destruct op; reflexivity.
+Qed.
+
+Remark transl_destroyed_by_load:
+  forall chunk addr e, destroyed_by_load chunk (transl_addr e addr) = destroyed_by_load chunk addr.
+Proof.
+  intros; destruct chunk; reflexivity.
+Qed.
+
+Remark transl_destroyed_by_store:
+  forall chunk addr e, destroyed_by_store chunk (transl_addr e addr) = destroyed_by_store chunk addr.
+Proof.
+  intros; destruct chunk; reflexivity.
 Qed.
 
 (** * Correctness of saving and restoring of callee-save registers *)
@@ -1157,7 +1226,7 @@ Hypothesis csregs_typ:
   forall r, In r csregs -> mreg_type r = ty.
 
 Hypothesis ls_temp_undef:
-  forall r, In r destroyed_at_move_regs -> ls (R r) = Vundef.
+  forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef.
 
 Hypothesis wt_ls: wt_locset ls.
 
@@ -1200,18 +1269,18 @@ Proof.
   (* a store takes place *)
   exploit store_index_succeeds. apply (mkindex_valid a); auto with coqlib. 
   eauto. instantiate (1 := rs a). intros [m1 ST].
-  exploit (IHl k (undef_setstack rs) m1).  auto with coqlib. auto. 
+  exploit (IHl k (undef_regs (destroyed_by_op Omove) rs) m1).  auto with coqlib. auto. 
   red; eauto with mem.
   apply agree_regs_exten with ls rs. auto.
-  intros. destruct (In_dec mreg_eq r destroyed_at_move_regs). 
+  intros. destruct (In_dec mreg_eq r (destroyed_by_op Omove)).
   left. apply ls_temp_undef; auto. 
-  right; split. auto. unfold undef_setstack, undef_move. apply undef_regs_other; auto.
+  right; split. auto. apply undef_regs_other; auto.
   intros [rs' [m' [A [B [C [D [E F]]]]]]].
   exists rs'; exists m'. 
   split. eapply star_left; eauto. econstructor. 
   rewrite <- (mkindex_typ (number a)). 
   apply store_stack_succeeds; auto with coqlib.
-  traceEq.
+  auto. traceEq.
   split; intros.
   simpl in H3. destruct (mreg_eq a r). subst r.
   assert (index_contains_inj j m1 sp (mkindex (number a)) (ls (R a))).
@@ -1240,9 +1309,33 @@ Qed.
 
 End SAVE_CALLEE_SAVE.
 
+Remark LTL_undef_regs_same:
+  forall r rl ls, In r rl -> LTL.undef_regs rl ls (R r) = Vundef.
+Proof.
+  induction rl; simpl; intros. contradiction. 
+  unfold Locmap.set. destruct (Loc.eq (R a) (R r)). auto. 
+  destruct (Loc.diff_dec (R a) (R r)); auto. 
+  apply IHrl. intuition congruence.
+Qed.
+
+Remark LTL_undef_regs_others:
+  forall r rl ls, ~In r rl -> LTL.undef_regs rl ls (R r) = ls (R r).
+Proof.
+  induction rl; simpl; intros. auto.
+  rewrite Locmap.gso. apply IHrl. intuition. red. intuition. 
+Qed.
+
+Remark LTL_undef_regs_slot:
+  forall sl ofs ty rl ls, LTL.undef_regs rl ls (S sl ofs ty) = ls (S sl ofs ty).
+Proof.
+  induction rl; simpl; intros. auto.
+  rewrite Locmap.gso. apply IHrl. red; auto. 
+Qed.
+
 Lemma save_callee_save_correct:
-  forall j ls rs sp cs fb k m,
-  agree_regs j (call_regs ls) rs -> wt_locset (call_regs ls) ->
+  forall j ls0 rs sp cs fb k m,
+  let ls := LTL.undef_regs destroyed_at_function_entry ls0 in
+  agree_regs j ls rs -> wt_locset ls ->
   frame_perm_freeable m sp ->
   exists rs', exists m',
     star step tge 
@@ -1250,10 +1343,10 @@ Lemma save_callee_save_correct:
     E0 (State cs fb (Vptr sp Int.zero) k rs' m')
   /\ (forall r,
        In r int_callee_save_regs -> index_int_callee_save r < b.(bound_int_callee_save) ->
-       index_contains_inj j m' sp (FI_saved_int (index_int_callee_save r)) (call_regs ls (R r)))
+       index_contains_inj j m' sp (FI_saved_int (index_int_callee_save r)) (ls (R r)))
   /\ (forall r,
        In r float_callee_save_regs -> index_float_callee_save r < b.(bound_float_callee_save) ->
-       index_contains_inj j m' sp (FI_saved_float (index_float_callee_save r)) (call_regs ls (R r)))
+       index_contains_inj j m' sp (FI_saved_float (index_float_callee_save r)) (ls (R r)))
   /\ (forall idx v,
        index_valid idx ->
        match idx with FI_saved_int _ => False | FI_saved_float _ => False | _ => True end ->
@@ -1261,18 +1354,16 @@ Lemma save_callee_save_correct:
        index_contains m' sp idx v)
   /\ stores_in_frame sp m m'
   /\ frame_perm_freeable m' sp
-  /\ agree_regs j (call_regs ls) rs'.
+  /\ agree_regs j ls rs'.
 Proof.
   intros.
-  assert (ls_temp_undef: forall r, In r destroyed_at_move_regs -> call_regs ls (R r) = Vundef).
-    intros; unfold call_regs. apply pred_dec_true.
-Transparent destroyed_at_move_regs.
-    simpl in *; intuition congruence.
+  assert (UNDEF: forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef).
+    intros. unfold ls. apply LTL_undef_regs_same. apply destroyed_by_move_at_function_entry; auto.
   exploit (save_callee_save_regs_correct 
              fe_num_int_callee_save
              index_int_callee_save
              FI_saved_int Tint
-             j cs fb sp int_callee_save_regs (call_regs ls)).
+             j cs fb sp int_callee_save_regs ls).
   intros. apply index_int_callee_save_inj; auto. 
   intros. simpl. split. apply Zge_le. apply index_int_callee_save_pos; auto. assumption.
   auto.
@@ -1290,7 +1381,7 @@ Transparent destroyed_at_move_regs.
              fe_num_float_callee_save
              index_float_callee_save
              FI_saved_float Tfloat
-             j cs fb sp float_callee_save_regs (call_regs ls)).
+             j cs fb sp float_callee_save_regs ls).
   intros. apply index_float_callee_save_inj; auto. 
   intros. simpl. split. apply Zge_le. apply index_float_callee_save_pos; auto. assumption.
   simpl; auto.
@@ -1366,10 +1457,12 @@ Qed.
   saving of the used callee-save registers). *)
 
 Lemma function_prologue_correct:
-  forall j ls ls0 rs m1 m1' m2 sp parent ra cs fb k,
+  forall j ls ls0 ls1 rs rs1 m1 m1' m2 sp parent ra cs fb k,
   agree_regs j ls rs ->
   agree_callee_save ls ls0 ->
   wt_locset ls ->
+  ls1 = LTL.undef_regs destroyed_at_function_entry (LTL.call_regs ls) ->
+  rs1 = undef_regs destroyed_at_function_entry rs ->
   Mem.inject j m1 m1' ->
   Mem.alloc m1 0 f.(Linear.fn_stacksize) = (m2, sp) ->
   Val.has_type parent Tint -> Val.has_type ra Tint ->
@@ -1378,16 +1471,16 @@ Lemma function_prologue_correct:
   /\ store_stack m2' (Vptr sp' Int.zero) Tint tf.(fn_link_ofs) parent = Some m3'
   /\ store_stack m3' (Vptr sp' Int.zero) Tint tf.(fn_retaddr_ofs) ra = Some m4'
   /\ star step tge 
-         (State cs fb (Vptr sp' Int.zero) (save_callee_save fe k) (undef_temps rs) m4')
+         (State cs fb (Vptr sp' Int.zero) (save_callee_save fe k) rs1 m4')
       E0 (State cs fb (Vptr sp' Int.zero) k rs' m5')
-  /\ agree_regs j' (call_regs ls) rs'
-  /\ agree_frame j' (call_regs ls) ls0 m2 sp m5' sp' parent ra
+  /\ agree_regs j' ls1 rs'
+  /\ agree_frame j' ls1 ls0 m2 sp m5' sp' parent ra
   /\ inject_incr j j'
   /\ inject_separated j j' m1 m1'
   /\ Mem.inject j' m2 m5'
   /\ stores_in_frame sp' m2' m5'.
 Proof.
-  intros until k; intros AGREGS AGCS WTREGS INJ1 ALLOC TYPAR TYRA.
+  intros until k; intros AGREGS AGCS WTREGS LS1 RS1 INJ1 ALLOC TYPAR TYRA.
   rewrite unfold_transf_function.
   unfold fn_stacksize, fn_link_ofs, fn_retaddr_ofs.
   (* Allocation step *)
@@ -1424,9 +1517,12 @@ Proof.
   assert (PERM4: frame_perm_freeable m4' sp').
     red; intros. eauto with mem. 
   exploit save_callee_save_correct. 
+    instantiate (1 := rs1). instantiate (1 := call_regs ls). instantiate (1 := j').
+    subst rs1. apply agree_regs_undef_regs. 
     apply agree_regs_call_regs. eapply agree_regs_inject_incr; eauto.
-    apply wt_call_regs. auto.
+    apply wt_undef_regs. apply wt_call_regs. auto.
     eexact PERM4.
+  rewrite <- LS1.
   intros [rs' [m5' [STEPS [ICS [FCS [OTHERS [STORES [PERM5 AGREGS']]]]]]]].
   (* stores in frames *)
   assert (SIF: stores_in_frame sp' m2' m5').
@@ -1460,15 +1556,20 @@ Proof.
   (* agree frame *)
   split. constructor; intros.
     (* unused regs *)
-    unfold call_regs. destruct (in_dec Loc.eq (R r) temporaries).
-    elim H. apply temporary_within_bounds; auto.
-    apply AGCS. apply mreg_not_within_bounds_callee_save; auto.
+    assert (~In r destroyed_at_call). 
+      red; intros; elim H; apply caller_save_reg_within_bounds; auto.
+    rewrite LS1. rewrite LTL_undef_regs_others. unfold call_regs. 
+    apply AGCS; auto. red; intros; elim H0. 
+    apply destroyed_at_function_entry_caller_save; auto.
     (* locals *)
-    simpl. apply index_contains_inj_undef; auto.
+    rewrite LS1. rewrite LTL_undef_regs_slot. unfold call_regs. 
+    apply index_contains_inj_undef; auto with stacking.
     (* outgoing *)
-    simpl. apply index_contains_inj_undef; auto.
+    rewrite LS1. rewrite LTL_undef_regs_slot. unfold call_regs. 
+    apply index_contains_inj_undef; auto with stacking.
     (* incoming *)
-    unfold call_regs. apply AGCS. auto.
+    rewrite LS1. rewrite LTL_undef_regs_slot. unfold call_regs.
+    apply AGCS; auto. 
     (* parent *)
     apply OTHERS; auto. red; auto.
     eapply gso_index_contains; eauto. red; auto.
@@ -1478,17 +1579,17 @@ Proof.
     apply OTHERS; auto. red; auto.
     eapply gss_index_contains; eauto. red; auto.
     (* int callee save *)
-    rewrite <- AGCS. replace (ls (R r)) with (call_regs ls (R r)).
-    apply ICS; auto.
-    unfold call_regs. apply pred_dec_false.
-    red; intros; exploit int_callee_save_not_destroyed; eauto. 
-    auto.
+    assert (~In r destroyed_at_call). 
+      red; intros. eapply int_callee_save_not_destroyed; eauto.
+    exploit ICS; eauto. rewrite LS1. rewrite LTL_undef_regs_others. unfold call_regs.
+    rewrite AGCS; auto. 
+    red; intros; elim H1. apply destroyed_at_function_entry_caller_save; auto.
     (* float callee save *)
-    rewrite <- AGCS. replace (ls (R r)) with (call_regs ls (R r)).
-    apply FCS; auto.
-    unfold call_regs. apply pred_dec_false.
-    red; intros; exploit float_callee_save_not_destroyed; eauto. 
-    auto.
+    assert (~In r destroyed_at_call). 
+      red; intros. eapply float_callee_save_not_destroyed; eauto.
+    exploit FCS; eauto. rewrite LS1. rewrite LTL_undef_regs_others. unfold call_regs.
+    rewrite AGCS; auto. 
+    red; intros; elim H1. apply destroyed_at_function_entry_caller_save; auto.
     (* inj *)
     auto.
     (* inj_unique *)
@@ -1502,7 +1603,7 @@ Proof.
     (* perms *)
     auto.
     (* wt *)
-    apply wt_call_regs; auto.
+    rewrite LS1. apply wt_undef_regs. apply wt_call_regs; auto.
   (* incr *)
   split. auto.
   (* separated *)
@@ -1625,7 +1726,12 @@ Proof.
              Tint
              int_callee_save_regs
              j cs fb sp' ls0 m'); auto.
-  intros. unfold mreg_within_bounds. rewrite (int_callee_save_type r H1). tauto.
+  intros. unfold mreg_within_bounds. split; intros.
+  split; auto. destruct (zlt (index_float_callee_save r) 0).
+  generalize (bound_float_callee_save_pos b). omega. 
+  eelim int_float_callee_save_disjoint. eauto. 
+  eapply index_float_callee_save_pos2. eauto. auto.
+  destruct H2; auto. 
   eapply agree_saved_int; eauto. 
   apply incl_refl.
   apply int_callee_save_norepet.
@@ -1638,7 +1744,12 @@ Proof.
              Tfloat
              float_callee_save_regs
              j cs fb sp' ls0 m'); auto.
-  intros. unfold mreg_within_bounds. rewrite (float_callee_save_type r H1). tauto.
+  intros. unfold mreg_within_bounds. split; intros.
+  split; auto. destruct (zlt (index_int_callee_save r) 0).
+  generalize (bound_int_callee_save_pos b). omega. 
+  eelim int_float_callee_save_disjoint. 
+  eapply index_int_callee_save_pos2. eauto. eauto. auto.
+  destruct H2; auto. 
   eapply agree_saved_float; eauto. 
   apply incl_refl.
   apply float_callee_save_norepet.
@@ -1672,7 +1783,6 @@ Lemma function_epilogue_correct:
     E0 (State cs fb (Vptr sp' Int.zero) k rs1 m')
   /\ agree_regs j (return_regs ls0 ls) rs1
   /\ agree_callee_save (return_regs ls0 ls) ls0
-  /\ rs1 IT1 = rs IT1
   /\ Mem.inject j m1 m1'.
 Proof.
   intros.
@@ -1707,16 +1817,12 @@ Proof.
     eapply index_contains_load_stack with (idx := FI_retaddr); eauto with stacking.
   split. auto.
   split. eexact A.
-  split. red;intros. unfold return_regs. 
+  split. red; intros. unfold return_regs.
     generalize (register_classification r) (int_callee_save_not_destroyed r) (float_callee_save_not_destroyed r); intros.
-    destruct (in_dec Loc.eq (R r) temporaries).
+    destruct (in_dec mreg_eq r destroyed_at_call). 
     rewrite C; auto. 
-    destruct (in_dec Loc.eq (R r) destroyed_at_call).
-    rewrite C; auto.
-    intuition.
+    apply B. intuition. 
   split. apply agree_callee_save_return_regs.
-  split. apply C. apply int_callee_save_not_destroyed. left; simpl; auto. 
-  apply float_callee_save_not_destroyed. left; simpl; auto.
   auto.
 Qed.
 
@@ -1741,15 +1847,15 @@ Inductive match_stacks (j: meminj) (m m': mem):
       match_stacks j m m' nil nil sg bound bound'
   | match_stacks_cons: forall f sp ls c cs fb sp' ra c' cs' sg bound bound' trf
         (TAIL: is_tail c (Linear.fn_code f))
-        (WTF: wt_function f)
+        (WTF: wt_function f = true)
         (FINDF: Genv.find_funct_ptr tge fb = Some (Internal trf))
         (TRF: transf_function f = OK trf)
         (TRC: transl_code (make_env (function_bounds f)) c = c')
         (TY_RA: Val.has_type ra Tint)
         (FRM: agree_frame f j ls (parent_locset cs) m sp m' sp' (parent_sp cs') (parent_ra cs'))
         (ARGS: forall ofs ty,
-             In (S (Outgoing ofs ty)) (loc_arguments sg) ->
-             slot_within_bounds f (function_bounds f) (Outgoing ofs ty))
+           In (S Outgoing ofs ty) (loc_arguments sg) ->
+           slot_within_bounds (function_bounds f) Outgoing ofs ty)
         (STK: match_stacks j m m' cs cs' (Linear.fn_sig f) sp sp')
         (BELOW: sp < bound)
         (BELOW': sp' < bound'),
@@ -1903,8 +2009,9 @@ Lemma match_stacks_change_sig:
   tailcall_possible sg1 ->
   match_stacks j m m' cs cs' sg1 bound bound'.
 Proof.
-  induction 1; intros. econstructor; eauto. econstructor; eauto. 
-  intros. elim (H0 _ H1). 
+  induction 1; intros.
+  econstructor; eauto. 
+  econstructor; eauto. intros. elim (H0 _ H1).
 Qed.
 
 (** [match_stacks] implies [match_globalenvs], which implies [meminj_preserves_globals]. *)
@@ -2182,18 +2289,6 @@ Proof.
   rewrite symbols_preserved. auto.
 Qed.
 
-Hypothesis wt_prog: wt_program prog.
-
-Lemma find_function_well_typed:
-  forall ros ls f,
-  Linear.find_function ge ros ls = Some f -> wt_fundef f.
-Proof.
-  intros until f; destruct ros; simpl; unfold ge.
-  intro. eapply Genv.find_funct_prop; eauto.
-  destruct (Genv.find_symbol (Genv.globalenv prog) i); try congruence.
-  intro. eapply Genv.find_funct_ptr_prop; eauto. 
-Qed.
-
 (** Preservation of the arguments to an external call. *)
 
 Section EXTERNAL_ARGUMENTS.
@@ -2218,15 +2313,21 @@ Proof.
   intros.
   assert (loc_argument_acceptable l). apply loc_arguments_acceptable with sg; auto.
   destruct l; red in H0.
-  exists (rs m0); split. constructor. auto. 
-  destruct s; try contradiction.
-  inv MS. 
+  exists (rs r); split. constructor. auto. 
+  destruct sl; try contradiction.
+  inv MS.
   elim (H4 _ H).
-  unfold parent_sp. 
+  unfold parent_sp.
+  assert (slot_valid f Outgoing pos ty = true).
+    exploit loc_arguments_acceptable; eauto. intros [A B]. 
+    unfold slot_valid. unfold proj_sumbool. rewrite zle_true. 
+    destruct ty; reflexivity || congruence. omega.
+  assert (slot_within_bounds (function_bounds f) Outgoing pos ty).
+    eauto.
   exploit agree_outgoing; eauto. intros [v [A B]].
   exists v; split.
   constructor. 
-  eapply index_contains_load_stack with (idx := FI_arg z t); eauto. 
+  eapply index_contains_load_stack with (idx := FI_arg pos ty); eauto. 
   red in AGCS. rewrite AGCS; auto.
 Qed.
 
@@ -2273,37 +2374,34 @@ Hypothesis AGF: agree_frame f j ls ls0 m sp m' sp' parent retaddr.
 
 Lemma transl_annot_param_correct:
   forall l,
-  loc_acceptable l -> 
-  match l with S s => slot_within_bounds f b s | _ => True end ->
+  loc_valid f l = true ->
+  match l with S sl ofs ty => slot_within_bounds b sl ofs ty | _ => True end ->
   exists v, annot_arg rs m' (Vptr sp' Int.zero) (transl_annot_param fe l) v
          /\ val_inject j (ls l) v.
 Proof.
-  intros. destruct l; red in H. 
+  intros. destruct l; simpl in H.
 (* reg *)
-  exists (rs m0); split. constructor. auto.
+  exists (rs r); split. constructor. auto.
 (* stack *) 
-  destruct s; try contradiction.
+  destruct sl; try discriminate. 
   exploit agree_locals; eauto. intros [v [A B]]. inv A. 
   exists v; split. constructor. rewrite Zplus_0_l. auto. auto.
 Qed.
 
 Lemma transl_annot_params_correct:
   forall ll,
-  locs_acceptable ll ->
-  (forall s, In (S s) ll -> slot_within_bounds f b s) ->
+  forallb (loc_valid f) ll = true ->
+  (forall sl ofs ty, In (S sl ofs ty) ll -> slot_within_bounds b sl ofs ty) ->
   exists vl,
      annot_arguments rs m' (Vptr sp' Int.zero) (map (transl_annot_param fe) ll) vl
   /\ val_list_inject j (map ls ll) vl.
 Proof.
-  induction ll; intros. 
+  induction ll; simpl; intros. 
   exists (@nil val); split; constructor.
-  exploit (transl_annot_param_correct a). 
-    apply H; auto with coqlib.
-    destruct a; auto with coqlib.
+  InvBooleans. 
+  exploit (transl_annot_param_correct a). auto. destruct a; auto. 
   intros [v1 [A B]].
-  exploit IHll.
-    red; intros; apply H; auto with coqlib.
-    intros; apply H0; auto with coqlib.
+  exploit IHll. auto. auto. 
   intros [vl [C D]].
   exists (v1 :: vl); split; constructor; auto.
 Qed.
@@ -2339,7 +2437,7 @@ Inductive match_states: Linear.state -> Mach.state -> Prop :=
         (STACKS: match_stacks j m m' cs cs' f.(Linear.fn_sig) sp sp')
         (TRANSL: transf_function f = OK tf)
         (FIND: Genv.find_funct_ptr tge fb = Some (Internal tf))
-        (WTF: wt_function f)
+        (WTF: wt_function f = true)
         (AGREGS: agree_regs j ls rs)
         (AGFRAME: agree_frame f j ls (parent_locset cs) m sp m' sp' (parent_sp cs') (parent_ra cs'))
         (TAIL: is_tail c (Linear.fn_code f)),
@@ -2351,7 +2449,6 @@ Inductive match_states: Linear.state -> Mach.state -> Prop :=
         (STACKS: match_stacks j m m' cs cs' (Linear.funsig f) (Mem.nextblock m) (Mem.nextblock m'))
         (TRANSL: transf_fundef f = OK tf)
         (FIND: Genv.find_funct_ptr tge fb = Some tf)
-        (WTF: wt_fundef f)
         (WTLS: wt_locset ls)
         (AGREGS: agree_regs j ls rs)
         (AGLOCS: agree_callee_save ls (parent_locset cs)),
@@ -2372,16 +2469,21 @@ Theorem transf_step_correct:
   forall s1' (MS: match_states s1 s1'),
   exists s2', plus step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
+  assert (USEWTF: forall f i c,
+          wt_function f = true -> is_tail (i :: c) (Linear.fn_code f) ->
+          wt_instr f i = true).
+    intros. unfold wt_function, wt_code in H. rewrite forallb_forall in H.
+    apply H. eapply is_tail_in; eauto. 
   induction 1; intros;
   try inv MS;
   try rewrite transl_code_eq;
-  try (generalize (WTF _ (is_tail_in TAIL)); intro WTI);
-  try (generalize (function_is_within_bounds f WTF _ (is_tail_in TAIL));
+  try (generalize (USEWTF _ _ _ WTF TAIL); intro WTI; simpl in WTI; InvBooleans);
+  try (generalize (function_is_within_bounds f _ (is_tail_in TAIL));
        intro BOUND; simpl in BOUND);
   unfold transl_instr.
 
   (* Lgetstack *)
-  inv WTI. destruct BOUND. unfold undef_getstack; destruct sl.
+  destruct BOUND. unfold destroyed_by_getstack; destruct sl.
   (* Lgetstack, local *)
   exploit agree_locals; eauto. intros [v [A B]].
   econstructor; split.
@@ -2389,26 +2491,33 @@ Proof.
   eapply index_contains_load_stack; eauto.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg; auto.
-  apply agree_frame_set_reg; auto. simpl; rewrite <- H1. eapply agree_wt_ls; eauto.
+  apply agree_frame_set_reg; auto. simpl. rewrite <- H. eapply agree_wt_ls; eauto.
   (* Lgetstack, incoming *)
-  red in H2. exploit incoming_slot_in_parameters; eauto. intros IN_ARGS.
-  inv STACKS. elim (H6 _ IN_ARGS). 
+  unfold slot_valid in H0. InvBooleans.
+  exploit incoming_slot_in_parameters; eauto. intros IN_ARGS.
+  inversion STACKS; clear STACKS.
+  elim (H7 _ IN_ARGS).
+  subst bound bound' s cs'.
   exploit agree_outgoing. eexact FRM. eapply ARGS; eauto.
+  exploit loc_arguments_acceptable; eauto. intros [A B].
+  unfold slot_valid, proj_sumbool. rewrite zle_true. 
+  destruct ty; reflexivity || congruence. omega.
   intros [v [A B]].
   econstructor; split.
   apply plus_one. eapply exec_Mgetparam; eauto. 
   rewrite (unfold_transf_function _ _ TRANSL). unfold fn_link_ofs. 
   eapply index_contains_load_stack with (idx := FI_link). eapply TRANSL. eapply agree_link; eauto.
   simpl parent_sp.
-  change (offset_of_index (make_env (function_bounds f)) (FI_arg z t))
-    with (offset_of_index (make_env (function_bounds f0)) (FI_arg z t)).
-  eapply index_contains_load_stack with (idx := FI_arg z t). eauto. eauto.
+  change (offset_of_index (make_env (function_bounds f)) (FI_arg ofs ty))
+    with (offset_of_index (make_env (function_bounds f0)) (FI_arg ofs ty)).
+  eapply index_contains_load_stack with (idx := FI_arg ofs ty). eauto. eauto.
   exploit agree_incoming; eauto. intros EQ; simpl in EQ.
   econstructor; eauto with coqlib. econstructor; eauto.
   apply agree_regs_set_reg. apply agree_regs_set_reg. auto. auto. congruence. 
   eapply agree_frame_set_reg; eauto. eapply agree_frame_set_reg; eauto. 
-  apply temporary_within_bounds. simpl; auto. 
-  simpl; auto. simpl; rewrite <- H1. eapply agree_wt_ls; eauto.
+  apply caller_save_reg_within_bounds. 
+  apply temp_for_parent_frame_caller_save.
+  subst ty. simpl. eapply agree_wt_ls; eauto.
   (* Lgetstack, outgoing *)
   exploit agree_outgoing; eauto. intros [v [A B]].
   econstructor; split.
@@ -2416,14 +2525,13 @@ Proof.
   eapply index_contains_load_stack; eauto.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg; auto.
-  apply agree_frame_set_reg; auto. simpl; rewrite <- H1; eapply agree_wt_ls; eauto.
+  apply agree_frame_set_reg; auto. simpl; rewrite <- H; eapply agree_wt_ls; eauto.
 
   (* Lsetstack *)
-  inv WTI.
   set (idx := match sl with
-              | Local ofs ty => FI_local ofs ty
-              | Incoming ofs ty => FI_link (*dummy*)
-              | Outgoing ofs ty => FI_arg ofs ty
+              | Local => FI_local ofs ty
+              | Incoming => FI_link (*dummy*)
+              | Outgoing => FI_arg ofs ty
               end).
   assert (index_valid f idx).
     unfold idx; destruct sl.
@@ -2431,13 +2539,13 @@ Proof.
     red; auto.
     apply index_arg_valid; auto.
   exploit store_index_succeeds; eauto. eapply agree_perm; eauto.
-  instantiate (1 := rs0 r). intros [m1' STORE].
+  instantiate (1 := rs0 src). intros [m1' STORE].
   econstructor; split.
-  apply plus_one. destruct sl; simpl in H3.
-    econstructor. eapply store_stack_succeeds with (idx := idx); eauto. 
-    contradiction.
-    econstructor. eapply store_stack_succeeds with (idx := idx); eauto. 
-  econstructor; eauto with coqlib.
+  apply plus_one. destruct sl; simpl in H0.
+    econstructor. eapply store_stack_succeeds with (idx := idx); eauto. eauto.
+    discriminate.
+    econstructor. eapply store_stack_succeeds with (idx := idx); eauto. auto. 
+  econstructor. 
   eapply Mem.store_outside_inject; eauto. 
     intros. exploit agree_inj_unique; eauto. intros [EQ1 EQ2]; subst b' delta.
     rewrite size_type_chunk in H5.
@@ -2446,20 +2554,30 @@ Proof.
     omega.
   apply match_stacks_change_mach_mem with m'; auto.
   eauto with mem. eauto with mem. intros. rewrite <- H4; eapply Mem.load_store_other; eauto. left; unfold block; omega.
-  apply agree_regs_set_slot. apply agree_regs_undef_setstack; auto. 
+  eauto. eauto. auto. 
+  apply agree_regs_set_slot. apply agree_regs_undef_regs; auto. 
   destruct sl.
-    eapply agree_frame_set_local. eapply agree_frame_undef_setstack; eauto. auto. auto.
-    simpl in H1; rewrite H1; eapply agree_wt_ls; eauto. auto.
-    simpl in H3; contradiction.
-    eapply agree_frame_set_outgoing. apply agree_frame_undef_setstack; eauto. auto. auto.
-    simpl in H1; rewrite H1; eapply agree_wt_ls; eauto. auto.
+    eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto.
+    apply destroyed_by_op_caller_save. auto. auto. apply AGREGS. 
+    rewrite H; eapply agree_wt_ls; eauto.
+    assumption. 
+    simpl in H0; discriminate.
+    eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto.
+    apply destroyed_by_op_caller_save. auto. auto. apply AGREGS. 
+    rewrite H; eapply agree_wt_ls; eauto.
+    assumption. 
+  eauto with coqlib.
 
   (* Lop *)
   assert (Val.has_type v (mreg_type res)).
-    inv WTI. simpl in H. inv H. rewrite <- H1. eapply agree_wt_ls; eauto. 
+    destruct (is_move_operation op args) as [arg|] eqn:?.
+    exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst op args. 
+    InvBooleans. simpl in H. inv H. rewrite <- H0. eapply agree_wt_ls; eauto. 
     replace (mreg_type res) with (snd (type_of_operation op)).
-    eapply type_of_operation_sound; eauto. 
-    rewrite <- H4; auto.
+    eapply type_of_operation_sound; eauto.
+    red; intros. subst op. simpl in Heqo.
+    destruct args; simpl in H. discriminate. destruct args. discriminate. simpl in H. discriminate.
+    destruct (type_of_operation op) as [targs tres]. InvBooleans. auto. 
   assert (exists v',
           eval_operation ge (Vptr sp' Int.zero) (transl_op (make_env (function_bounds f)) op) rs0##args m' = Some v'
        /\ val_inject j v v').
@@ -2468,12 +2586,14 @@ Proof.
   eapply agree_inj; eauto. eapply agree_reglist; eauto.
   destruct H1 as [v' [A B]].
   econstructor; split. 
-  apply plus_one. constructor. 
+  apply plus_one. econstructor. 
   instantiate (1 := v'). rewrite <- A. apply eval_operation_preserved. 
-  exact symbols_preserved.
+  exact symbols_preserved. eauto.
   econstructor; eauto with coqlib.
-  apply agree_regs_set_reg; auto. apply agree_regs_undef_op; auto. 
-  apply agree_frame_set_reg; auto. apply agree_frame_undef_op; auto.
+  apply agree_regs_set_reg; auto.
+  rewrite transl_destroyed_by_op.  apply agree_regs_undef_regs; auto. 
+  apply agree_frame_set_reg; auto. apply agree_frame_undef_locs; auto.
+  apply destroyed_by_op_caller_save.
 
   (* Lload *)
   assert (exists a',
@@ -2482,17 +2602,17 @@ Proof.
   eapply eval_addressing_inject; eauto. 
   eapply match_stacks_preserves_globals; eauto.
   eapply agree_inj; eauto. eapply agree_reglist; eauto.
-  destruct H1 as [a' [A B]].
+  destruct H2 as [a' [A B]].
   exploit Mem.loadv_inject; eauto. intros [v' [C D]].
   econstructor; split. 
   apply plus_one. econstructor. 
   instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved.
-  eexact C.
+  eexact C. eauto.
   econstructor; eauto with coqlib.
-  apply agree_regs_set_reg; auto. apply agree_regs_undef_temps; auto. 
-  apply agree_frame_set_reg; auto. apply agree_frame_undef_temps; auto.
-  simpl. inv WTI. rewrite H6. 
-  inv B; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto. 
+  apply agree_frame_set_reg. apply agree_frame_undef_locs; auto.
+  apply destroyed_by_load_caller_save. auto. 
+  simpl. rewrite H1. destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
 
   (* Lstore *)
   assert (exists a',
@@ -2506,12 +2626,14 @@ Proof.
   econstructor; split. 
   apply plus_one. econstructor. 
   instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved.
-  eexact C.
+  eexact C. eauto.
   econstructor; eauto with coqlib.
   eapply match_stacks_parallel_stores. eexact MINJ. eexact B. eauto. eauto. auto. 
-  apply agree_regs_undef_temps; auto. 
-  apply agree_frame_undef_temps; auto.
+  rewrite transl_destroyed_by_store. 
+  apply agree_regs_undef_regs; auto. 
+  apply agree_frame_undef_locs; auto.
   eapply agree_frame_parallel_stores; eauto.
+  apply destroyed_by_store_caller_save. 
 
   (* Lcall *)
   exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
@@ -2524,84 +2646,80 @@ Proof.
   econstructor; eauto.
   econstructor; eauto with coqlib.
   simpl; auto.
-  intros; red. split.
-    generalize (loc_arguments_acceptable _ _ H0). simpl. omega.
+  intros; red.
     apply Zle_trans with (size_arguments (Linear.funsig f')); auto.
     apply loc_arguments_bounded; auto.
   eapply agree_valid_linear; eauto.
   eapply agree_valid_mach; eauto.
-  eapply find_function_well_typed; eauto.
   eapply agree_wt_ls; eauto. 
   simpl; red; auto.
 
   (* Ltailcall *)
-  exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
   exploit function_epilogue_correct; eauto.
-  intros [rs1 [m1' [P [Q [R [S [T [U [V W]]]]]]]]].
+  intros [rs1 [m1' [P [Q [R [S [T [U V]]]]]]]].
+  exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
   econstructor; split.
-  eapply plus_right. eexact S. econstructor; eauto.
-  replace (find_function_ptr tge ros rs1)
-     with (find_function_ptr tge ros rs0). eauto.
-  destruct ros; simpl; auto. inv WTI. rewrite V; auto. 
-  traceEq.
+  eapply plus_right. eexact S. econstructor; eauto. traceEq.
   econstructor; eauto.
-  inv WTI. apply match_stacks_change_sig with (Linear.fn_sig f); auto.
+  apply match_stacks_change_sig with (Linear.fn_sig f); auto.
   apply match_stacks_change_bounds with stk sp'.
   apply match_stacks_change_linear_mem with m. 
   apply match_stacks_change_mach_mem with m'0.
   auto. 
   eauto with mem. intros. eapply Mem.perm_free_1; eauto. left; unfold block; omega. 
-  intros. rewrite <- H2. eapply Mem.load_free; eauto. left; unfold block; omega.
+  intros. rewrite <- H3. eapply Mem.load_free; eauto. left; unfold block; omega.
   eauto with mem. intros. eapply Mem.perm_free_3; eauto. 
   apply Zlt_le_weak. change (Mem.valid_block m' stk). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_linear; eauto. 
   apply Zlt_le_weak. change (Mem.valid_block m1' sp'). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_mach; eauto. 
-  eapply find_function_well_typed; eauto.
+  apply zero_size_arguments_tailcall_possible; auto. 
   apply wt_return_regs; auto. eapply match_stacks_wt_locset; eauto. eapply agree_wt_ls; eauto.
 
   (* Lbuiltin *)
-  exploit external_call_mem_inject; eauto. 
+  exploit external_call_mem_inject'; eauto. 
     eapply match_stacks_preserves_globals; eauto.
     eapply agree_reglist; eauto. 
   intros [j' [res' [m1' [A [B [C [D [E [F G]]]]]]]]].
   econstructor; split.
   apply plus_one. econstructor; eauto. 
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto with coqlib.
+  inversion H; inversion A; subst.
   eapply match_stack_change_extcall; eauto.
   apply Zlt_le_weak. change (Mem.valid_block m sp0). eapply agree_valid_linear; eauto.
   apply Zlt_le_weak. change (Mem.valid_block m'0 sp'). eapply agree_valid_mach; eauto.
-  apply agree_regs_set_reg; auto. apply agree_regs_undef_temps; auto. eapply agree_regs_inject_incr; eauto.
-  apply agree_frame_set_reg; auto. apply agree_frame_undef_temps; auto.
+  apply agree_regs_set_regs; auto. apply agree_regs_undef_regs; auto. eapply agree_regs_inject_incr; eauto.
+  apply agree_frame_set_regs; auto. apply agree_frame_undef_regs; auto.
   eapply agree_frame_inject_incr; eauto. 
   apply agree_frame_extcall_invariant with m m'0; auto.
-  eapply external_call_valid_block; eauto.
-  intros. eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto.
-  eapply external_call_valid_block; eauto.
+  eapply external_call_valid_block'; eauto.
+  intros. inv H; eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto.
+  eapply external_call_valid_block'; eauto.
   eapply agree_valid_mach; eauto.
-  inv WTI. simpl; rewrite H4. eapply external_call_well_typed; eauto.
+  simpl. rewrite list_map_compose.
+  change (fun x => Loc.type (R x)) with mreg_type.
+  rewrite H0. eapply external_call_well_typed'; eauto.
 
   (* Lannot *)
-  inv WTI.
   exploit transl_annot_params_correct; eauto.
   intros [vargs' [P Q]]. 
-  exploit external_call_mem_inject; eauto. 
+  exploit external_call_mem_inject'; eauto. 
     eapply match_stacks_preserves_globals; eauto.
   intros [j' [res' [m1' [A [B [C [D [E [F G]]]]]]]]].
   econstructor; split.
   apply plus_one. econstructor; eauto. 
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto with coqlib.
-  eapply match_stack_change_extcall; eauto.
+  inv H; inv A. eapply match_stack_change_extcall; eauto.
   apply Zlt_le_weak. change (Mem.valid_block m sp0). eapply agree_valid_linear; eauto.
   apply Zlt_le_weak. change (Mem.valid_block m'0 sp'). eapply agree_valid_mach; eauto.
   eapply agree_regs_inject_incr; eauto.
   eapply agree_frame_inject_incr; eauto. 
   apply agree_frame_extcall_invariant with m m'0; auto.
-  eapply external_call_valid_block; eauto.
-  intros. eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto.
-  eapply external_call_valid_block; eauto.
+  eapply external_call_valid_block'; eauto.
+  intros. inv H; eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto.
+  eapply external_call_valid_block'; eauto.
   eapply agree_valid_mach; eauto.
 
   (* Llabel *)
@@ -2620,19 +2738,20 @@ Proof.
   econstructor; split.
   apply plus_one. eapply exec_Mcond_true; eauto.
   eapply eval_condition_inject; eauto. eapply agree_reglist; eauto.
-  eapply transl_find_label; eauto. 
-  econstructor; eauto with coqlib.
-  apply agree_regs_undef_temps; auto.
-  apply agree_frame_undef_temps; auto.
+  eapply transl_find_label; eauto.
+  econstructor. eauto. eauto. eauto. eauto. eauto. 
+  apply agree_regs_undef_regs; auto. 
+  apply agree_frame_undef_locs; auto. apply destroyed_by_cond_caller_save. 
   eapply find_label_tail; eauto.
 
   (* Lcond, false *)
   econstructor; split.
   apply plus_one. eapply exec_Mcond_false; eauto.
   eapply eval_condition_inject; eauto. eapply agree_reglist; eauto.
-  econstructor; eauto with coqlib.
-  apply agree_regs_undef_temps; auto.
-  apply agree_frame_undef_temps; auto.
+  econstructor. eauto. eauto. eauto. eauto. eauto. 
+  apply agree_regs_undef_regs; auto. 
+  apply agree_frame_undef_locs; auto. apply destroyed_by_cond_caller_save. 
+  eauto with coqlib.
 
   (* Ljumptable *)
   assert (rs0 arg = Vint n).
@@ -2640,14 +2759,14 @@ Proof.
   econstructor; split.
   apply plus_one; eapply exec_Mjumptable; eauto. 
   apply transl_find_label; eauto.
-  econstructor; eauto.
-  apply agree_regs_undef_temps; auto.
-  apply agree_frame_undef_temps; auto.
+  econstructor. eauto. eauto. eauto. eauto. eauto.
+  apply agree_regs_undef_regs; auto.
+  apply agree_frame_undef_locs; auto. apply destroyed_by_jumptable_caller_save.
   eapply find_label_tail; eauto.
 
   (* Lreturn *)
   exploit function_epilogue_correct; eauto.
-  intros [rs1 [m1' [P [Q [R [S [T [U [V W]]]]]]]]].
+  intros [rs1 [m1' [P [Q [R [S [T [U V]]]]]]]].
   econstructor; split.
   eapply plus_right. eexact S. econstructor; eauto.
   traceEq.
@@ -2667,7 +2786,6 @@ Proof.
   revert TRANSL. unfold transf_fundef, transf_partial_fundef.
   caseEq (transf_function f); simpl; try congruence.
   intros tfn TRANSL EQ. inversion EQ; clear EQ; subst tf.
-  inversion WTF as [|f' WTFN]. subst f'.
   exploit function_prologue_correct; eauto.
   eapply match_stacks_type_sp; eauto. 
   eapply match_stacks_type_retaddr; eauto.
@@ -2689,30 +2807,28 @@ Proof.
   intros. eapply stores_in_frame_perm; eauto with mem.
   intros. rewrite <- H1. transitivity (Mem.load chunk m2' b ofs). eapply stores_in_frame_contents; eauto.
   eapply Mem.load_alloc_unchanged; eauto. red. congruence.
+  eapply transf_function_well_typed; eauto.
   auto with coqlib.
 
   (* external function *)
   simpl in TRANSL. inversion TRANSL; subst tf.
-  inversion WTF. subst ef0.
   exploit transl_external_arguments; eauto. intros [vl [ARGS VINJ]].
-  exploit external_call_mem_inject; eauto. 
+  exploit external_call_mem_inject'; eauto. 
   eapply match_stacks_preserves_globals; eauto.
   intros [j' [res' [m1' [A [B [C [D [E [F G]]]]]]]]].
   econstructor; split.
   apply plus_one. eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
   apply match_stacks_change_bounds with (Mem.nextblock m) (Mem.nextblock m'0).
-  eapply match_stack_change_extcall; eauto. omega. omega.
-  exploit external_call_valid_block. eexact H. 
+  inv H0; inv A. eapply match_stack_change_extcall; eauto. omega. omega.
+  exploit external_call_valid_block'. eexact H0. 
     instantiate (1 := (Mem.nextblock m - 1)). red; omega. unfold Mem.valid_block; omega.
-  exploit external_call_valid_block. eexact A. 
+  exploit external_call_valid_block'. eexact A. 
     instantiate (1 := (Mem.nextblock m'0 - 1)). red; omega. unfold Mem.valid_block; omega.
-  apply wt_setloc; auto. simpl. rewrite loc_result_type.
-  change (Val.has_type res (proj_sig_res (ef_sig ef))). 
-  eapply external_call_well_typed; eauto. 
-  apply agree_regs_set_reg; auto. apply agree_regs_inject_incr with j; auto. 
+  inv H0. apply wt_setlist_result. eapply external_call_well_typed; eauto. auto.
+  apply agree_regs_set_regs; auto. apply agree_regs_inject_incr with j; auto. 
   apply agree_callee_save_set_result; auto. 
 
   (* return *)
@@ -2745,8 +2861,6 @@ Proof.
     intros. change (Mem.valid_block m0 b0). eapply Genv.find_funct_ptr_not_fresh; eauto.
     intros. change (Mem.valid_block m0 b0). eapply Genv.find_var_info_not_fresh; eauto.
   rewrite H3. red; intros. contradiction.
-  eapply Genv.find_funct_ptr_prop. eexact wt_prog. 
-  fold ge0; eauto.
   apply wt_init.
   unfold Locmap.init. red; intros; auto.
   unfold parent_locset. red; auto.
@@ -2757,9 +2871,8 @@ Lemma transf_final_states:
   match_states st1 st2 -> Linear.final_state st1 r -> Mach.final_state st2 r.
 Proof.
   intros. inv H0. inv H. inv STACKS.
-  constructor.
-  set (rres := loc_result {| sig_args := nil; sig_res := Some Tint |}) in *.
-  generalize (AGREGS rres). rewrite H1. intros IJ; inv IJ. auto.
+  generalize (AGREGS r0). rewrite H2. intros A; inv A. 
+  econstructor; eauto. 
 Qed.
 
 Theorem transf_program_correct:
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index 18414a8..bdc8117 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -64,9 +64,9 @@ Require Import LTL.
 
 Module U := UnionFind.UF(PTree).
 
-Definition record_goto (uf: U.t) (pc: node) (i: instruction) : U.t :=
-  match i with
-  | Lnop s => U.union uf pc s
+Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t :=
+  match b with
+  | Lbranch s :: _ => U.union uf pc s
   | _ => uf
   end.
 
@@ -77,37 +77,23 @@ Definition record_gotos (f: LTL.function) : U.t :=
   successor [s] of every instruction by the canonical representative
   of its equivalence class in the union-find data structure. *)
 
-Definition tunnel_instr (uf: U.t) (b: instruction) : instruction :=
-  match b with
-  | Lnop s =>
-      Lnop (U.repr uf s)
-  | Lop op args res s =>
-      Lop op args res (U.repr uf s)
-  | Lload chunk addr args dst s =>
-      Lload chunk addr args dst (U.repr uf s)
-  | Lstore chunk addr args src s =>
-      Lstore chunk addr args src (U.repr uf s)
-  | Lcall sig ros args res s =>
-      Lcall sig ros args res (U.repr uf s)
-  | Ltailcall sig ros args =>
-      Ltailcall sig ros args
-  | Lbuiltin ef args res s =>
-      Lbuiltin ef args res (U.repr uf s)
-  | Lcond cond args s1 s2 =>
-      Lcond cond args (U.repr uf s1) (U.repr uf s2)
-  | Ljumptable arg tbl =>
-      Ljumptable arg (List.map (U.repr uf) tbl)
-  | Lreturn or =>
-      Lreturn or
+Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
+  match i with
+  | Lbranch s => Lbranch (U.repr uf s)
+  | Lcond cond args s1 s2 => Lcond cond args (U.repr uf s1) (U.repr uf s2)
+  | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
+  | _ => i
   end.
 
+Definition tunnel_block (uf: U.t) (b: bblock) : bblock :=
+  List.map (tunnel_instr uf) b.
+
 Definition tunnel_function (f: LTL.function) : LTL.function :=
   let uf := record_gotos f in
   mkfunction
     (fn_sig f)
-    (fn_params f)
     (fn_stacksize f)
-    (PTree.map (fun pc b => tunnel_instr uf b) (fn_code f))
+    (PTree.map1 (tunnel_block uf) (fn_code f))
     (U.repr uf (fn_entrypoint f)).
 
 Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index d589260..d02cb2e 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -36,16 +36,16 @@ Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
            else if peq (U.repr u x) pc then (f x + f s + 1)%nat
            else f x.
 
-Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (i: instruction) : U.t * (node -> nat) :=
-  match i with
-  | Lnop s => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f)
+Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) :=
+  match b with
+  | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f)
   | _ => uf
   end.
 
 Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop :=
   forall pc,
   match c!pc with
-  | Some(Lnop s) =>
+  | Some(Lbranch s :: b) =>
       U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat
   | _ =>
       U.repr (fst uf) pc = pc
@@ -65,21 +65,21 @@ Proof.
   red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty.
 
 (* inductive case *)
-  intros m uf pc i; intros. destruct uf as [u f]. 
+  intros m uf pc bb; intros. destruct uf as [u f]. 
   assert (PC: U.repr u pc = pc). 
     generalize (H1 pc). rewrite H. auto.
-  assert (record_goto' (u, f) pc i = (u, f)
-          \/ exists s, i = Lnop s /\ record_goto' (u, f) pc i = (U.union u pc s, measure_edge u pc s f)).
-    unfold record_goto'; simpl. destruct i; auto. right. exists n; auto.
-  destruct H2 as [B | [s [EQ B]]].
+  assert (record_goto' (u, f) pc bb = (u, f)
+          \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)).
+    unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto.
+  destruct H2 as [B | [s [bb' [EQ B]]]].
 
 (* u and f are unchanged *)
   rewrite B.
   red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. 
-  destruct i; auto.
+  destruct bb; auto. destruct i; auto.
   apply H1. 
 
-(* i is Lnop s, u becomes union u pc s, f becomes measure_edge u pc s f *)
+(* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *)
   rewrite B.
   red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ.
 
@@ -91,11 +91,11 @@ Proof.
 (* An old instruction *)
   assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc').
     intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. 
-  generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct i0; auto.
+  generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto.
   intros [P | [P Q]]. left; auto. right.
   split. apply U.sameclass_union_2. auto.
   unfold measure_edge. destruct (peq (U.repr u s) pc). auto.
-  rewrite P. destruct (peq (U.repr u n0) pc). omega. auto. 
+  rewrite P. destruct (peq (U.repr u s0) pc). omega. auto. 
 Qed.
 
 Definition record_gotos' (f: function) :=
@@ -110,7 +110,7 @@ Proof.
   induction l; intros; simpl.
   auto.
   unfold record_goto' at 2. unfold record_goto at 2. 
-  destruct (snd a); apply IHl.
+  destruct (snd a). apply IHl. destruct i; apply IHl.
 Qed.
 
 Definition branch_target (f: function) (pc: node) : node :=
@@ -122,7 +122,7 @@ Definition count_gotos (f: function) (pc: node) : nat :=
 Theorem record_gotos_correct:
   forall f pc,
   match f.(fn_code)!pc with
-  | Some(Lnop s) =>
+  | Some(Lbranch s :: b) =>
        branch_target f pc = pc \/
        (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat
   | _ => branch_target f pc = pc
@@ -204,15 +204,18 @@ Qed.
   the values of locations and the memory states are identical in the
   original and transformed codes. *)
 
+Definition tunneled_block (f: function) (b: bblock) :=
+  tunnel_block (record_gotos f) b.
+
 Definition tunneled_code (f: function) :=
-  PTree.map (fun pc b => tunnel_instr (record_gotos f) b) (fn_code f).
+  PTree.map1 (tunneled_block f) (fn_code f).
 
 Inductive match_stackframes: stackframe -> stackframe -> Prop :=
   | match_stackframes_intro:
-      forall res f sp ls0 pc,
+      forall f sp ls0 bb,
       match_stackframes
-         (Stackframe res f sp ls0 pc)
-         (Stackframe res (tunnel_function f) sp ls0 (branch_target f pc)).
+         (Stackframe f sp ls0 bb)
+         (Stackframe (tunnel_function f) sp ls0 (tunneled_block f bb)).
 
 Inductive match_states: state -> state -> Prop :=
   | match_states_intro:
@@ -220,6 +223,16 @@ Inductive match_states: state -> state -> Prop :=
       list_forall2 match_stackframes s ts ->
       match_states (State s f sp pc ls m)
                    (State ts (tunnel_function f) sp (branch_target f pc) ls m)
+  | match_states_block:
+      forall s f sp bb ls m ts,
+      list_forall2 match_stackframes s ts ->
+      match_states (Block s f sp bb ls m)
+                   (Block ts (tunnel_function f) sp (tunneled_block f bb) ls m)
+  | match_states_interm:
+      forall s f sp pc bb ls m ts,
+      list_forall2 match_stackframes s ts ->
+      match_states (Block s f sp (Lbranch pc :: bb) ls m)
+                   (State ts (tunnel_function f) sp (branch_target f pc) ls m)
   | match_states_call:
       forall s f ls m ts,
       list_forall2 match_stackframes s ts ->
@@ -238,17 +251,19 @@ Inductive match_states: state -> state -> Prop :=
 
 Definition measure (st: state) : nat :=
   match st with
-  | State s f sp pc ls m => count_gotos f pc
+  | State s f sp pc ls m => (count_gotos f pc * 2)%nat
+  | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
+  | Block s f sp bb ls m => 0%nat
   | Callstate s f ls m => 0%nat
   | Returnstate s ls m => 0%nat
   end.
 
-Lemma tunnel_function_lookup:
-  forall f pc i,
-  f.(fn_code)!pc = Some i ->
-  (tunnel_function f).(fn_code)!pc = Some (tunnel_instr (record_gotos f) i).
+Lemma match_parent_locset:
+  forall s ts,
+  list_forall2 match_stackframes s ts ->
+  parent_locset ts = parent_locset s.
 Proof.
-  intros. simpl. rewrite PTree.gmap. rewrite H. auto.
+  induction 1; simpl. auto. inv H; auto.
 Qed.
 
 Lemma tunnel_step_correct:
@@ -258,98 +273,113 @@ Lemma tunnel_step_correct:
   \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
 Proof.
   induction 1; intros; try inv MS.
-  (* Lnop *)
-  generalize (record_gotos_correct f pc); rewrite H. 
-  intros [A | [B C]].
-  left; econstructor; split.
-  eapply exec_Lnop. rewrite A. 
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
-  econstructor; eauto. 
-  right. split. simpl. auto. split. auto. 
-  rewrite B. econstructor; eauto. 
+
+  (* entering a block *)
+  assert (DEFAULT: branch_target f pc = pc ->
+    (exists st2' : state,
+     step tge (State ts (tunnel_function f) sp (branch_target f pc) rs m) E0 st2'
+     /\ match_states (Block s f sp bb rs m) st2')).
+  intros. rewrite H0. econstructor; split. 
+  econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto. 
+  econstructor; eauto.
+
+  generalize (record_gotos_correct f pc). rewrite H. 
+  destruct bb; auto. destruct i; auto. 
+  intros [A | [B C]]. auto. 
+  right. split. simpl. omega. 
+  split. auto.
+  rewrite B. econstructor; eauto.
+
   (* Lop *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  left; simpl; econstructor; split.
   eapply exec_Lop with (v := v); eauto.
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
-  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
   econstructor; eauto.
   (* Lload *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  left; simpl; econstructor; split.
   eapply exec_Lload with (a := a). 
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
-  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto.
+  rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto.
+  (* Lgetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto.
+  (* Lsetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
   econstructor; eauto.
   (* Lstore *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  left; simpl; econstructor; split.
   eapply exec_Lstore with (a := a).
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
-  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto.
+  rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
   econstructor; eauto.
   (* Lcall *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A.
-  left; econstructor; split. 
-  eapply exec_Lcall with (f' := tunnel_fundef f'); eauto.
-  rewrite A. rewrite (tunnel_function_lookup _ _ _ H); simpl.
-  rewrite sig_preserved. auto.
+  left; simpl; econstructor; split. 
+  eapply exec_Lcall with (fd := tunnel_fundef fd); eauto.
   apply find_function_translated; auto.
+  rewrite sig_preserved. auto.
   econstructor; eauto.
   constructor; auto. 
   constructor; auto.
   (* Ltailcall *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A.
-  left; econstructor; split. 
-  eapply exec_Ltailcall with (f' := tunnel_fundef f'); eauto.
-  rewrite A. rewrite (tunnel_function_lookup _ _ _ H); simpl.
-  rewrite sig_preserved. auto.
+  left; simpl; econstructor; split. 
+  eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto.
+  erewrite match_parent_locset; eauto. 
   apply find_function_translated; auto.
+  apply sig_preserved.
+  erewrite <- match_parent_locset; eauto.
   econstructor; eauto.
   (* Lbuiltin *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  left; simpl; econstructor; split.
   eapply exec_Lbuiltin; eauto. 
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
-  (* cond *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  (* Lannot *)
+  left; simpl; econstructor; split.
+  eapply exec_Lannot; eauto. 
+  eapply external_call_symbols_preserved'; eauto.
+  exact symbols_preserved. exact varinfo_preserved.
+  econstructor; eauto.
+
+  (* Lbranch (preserved) *)
+  left; simpl; econstructor; split.
+  eapply exec_Lbranch; eauto. 
+  fold (branch_target f pc). econstructor; eauto.
+  (* Lbranch (eliminated) *)
+  right; split. simpl. omega. split. auto. constructor; auto. 
+
+  (* Lcond *)
+  left; simpl; econstructor; split.
   eapply exec_Lcond; eauto.
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto.
   destruct b; econstructor; eauto.
-  (* jumptable *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  (* Ljumptable *)
+  left; simpl; econstructor; split.
   eapply exec_Ljumptable. 
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto.
-  eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H1. reflexivity. 
+  eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto.
   econstructor; eauto. 
-  (* return *)
-  generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A.
-  left; econstructor; split.
+  (* Lreturn *)
+  left; simpl; econstructor; split.
   eapply exec_Lreturn; eauto.
-  rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto.
-  simpl. constructor; auto.
+  erewrite <- match_parent_locset; eauto.
+  constructor; auto.
   (* internal function *)
-  simpl. left; econstructor; split.
+  left; simpl; econstructor; split.
   eapply exec_function_internal; eauto.
   simpl. econstructor; eauto. 
   (* external function *)
-  simpl. left; econstructor; split.
+  left; simpl; econstructor; split.
   eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   simpl. econstructor; eauto. 
   (* return *)
   inv H3. inv H1.
   left; econstructor; split.
   eapply exec_return; eauto.
-  constructor. auto.
+  constructor; auto.
 Qed.
 
 Lemma transf_initial_states:
@@ -357,7 +387,7 @@ Lemma transf_initial_states:
   exists st2, initial_state tprog st2 /\ match_states st1 st2.
 Proof.
   intros. inversion H. 
-  exists (Callstate nil (tunnel_fundef f) nil m0); split.
+  exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split.
   econstructor; eauto.
   apply Genv.init_mem_transf; auto.
   change (prog_main tprog) with (prog_main prog).
@@ -371,7 +401,7 @@ Lemma transf_final_states:
   forall st1 st2 r, 
   match_states st1 st2 -> final_state st1 r -> final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv H4. constructor.  
+  intros. inv H0. inv H. inv H6. econstructor; eauto.  
 Qed.
 
 Theorem transf_program_correct:
diff --git a/backend/Tunnelingtyping.v b/backend/Tunnelingtyping.v
deleted file mode 100644
index dfc36b6..0000000
--- a/backend/Tunnelingtyping.v
+++ /dev/null
@@ -1,103 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Type preservation for the Tunneling pass *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import LTL.
-Require Import LTLtyping.
-Require Import Tunneling.
-Require Import Tunnelingproof.
-
-(** Tunneling preserves typing. *)
-
-Lemma branch_target_valid_1:
-  forall f pc, wt_function f ->
-  valid_successor f pc ->
-  valid_successor f (branch_target f pc).
-Proof.
-  intros. 
-  assert (forall n p,
-          (count_gotos f p < n)%nat ->
-          valid_successor f p ->
-          valid_successor f (branch_target f p)).
-  induction n; intros.
-  omegaContradiction.
-  elim H2; intros i EQ.
-  generalize (record_gotos_correct f p). rewrite EQ.
-  destruct i; try congruence.
-  intros [A | [B C]]. congruence. 
-  generalize (wt_instrs _ H _ _ EQ); intro WTI; inv WTI.
-  rewrite B. apply IHn. omega. auto. 
-
-  apply H1 with (Datatypes.S (count_gotos f pc)); auto.
-Qed.
-
-Lemma tunnel_valid_successor:
-  forall f pc,
-  valid_successor f pc -> valid_successor (tunnel_function f) pc.
-Proof.
-  intros. destruct H as [i AT].
-  unfold valid_successor; simpl. rewrite PTree.gmap. rewrite AT. 
-  simpl. exists (tunnel_instr (record_gotos f) i); auto.
-Qed.
-
-Lemma branch_target_valid:
-  forall f pc,
-  wt_function f ->
-  valid_successor f pc ->
-  valid_successor (tunnel_function f) (branch_target f pc).
-Proof.
-  intros. apply tunnel_valid_successor. apply branch_target_valid_1; auto.
-Qed.
-  
-Lemma wt_tunnel_instr:
-  forall f i,
-  wt_function f ->
-  wt_instr f i -> wt_instr (tunnel_function f) (tunnel_instr (record_gotos f) i).
-Proof.
-  intros; inv H0; simpl; econstructor; eauto;
-  try (eapply branch_target_valid; eauto).
-  intros. exploit list_in_map_inv; eauto. intros [x [A B]]. subst lbl.
-  eapply branch_target_valid; eauto.
-  rewrite list_length_z_map. auto.
-Qed.
-
-Lemma wt_tunnel_function:
-  forall f, wt_function f -> wt_function (tunnel_function f).
-Proof.
-  intros. inversion H. constructor; simpl; auto.
-  intros until instr. rewrite PTree.gmap. unfold option_map. 
-  caseEq (fn_code f)!pc. intros b0 AT EQ. inv EQ. 
-  apply wt_tunnel_instr; eauto.
-  congruence.
-  eapply branch_target_valid; eauto.
-Qed.
-
-Lemma wt_tunnel_fundef:
-  forall f, wt_fundef f -> wt_fundef (tunnel_fundef f).
-Proof.
-  intros. inversion H; simpl. constructor; auto. 
-  constructor. apply wt_tunnel_function; auto.
-Qed.
-
-Lemma program_typing_preserved:
-  forall (p: LTL.program),
-  wt_program p -> wt_program (tunnel_program p).
-Proof.
-  intros; red; intros.
-  generalize (transform_program_function tunnel_fundef p i f H0).
-  intros [f0 [IN TRANSF]].
-  subst f. apply wt_tunnel_fundef. eauto.
-Qed.
diff --git a/backend/XTL.ml b/backend/XTL.ml
new file mode 100644
index 0000000..93cab36
--- /dev/null
+++ b/backend/XTL.ml
@@ -0,0 +1,213 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** The XTL intermediate language for register allocation *)
+
+open Datatypes
+open Camlcoq
+open Maps
+open AST
+open Registers
+open Op
+open Locations
+
+type var = V of reg * typ | L of loc
+
+type node = P.t
+
+type instruction =
+  | Xmove of var * var
+  | Xreload of var * var
+  | Xspill of var * var
+  | Xparmove of var list * var list * var * var
+  | Xop of operation * var list * var
+  | Xload of memory_chunk * addressing * var list * var
+  | Xstore of memory_chunk * addressing * var list * var
+  | Xcall of signature * (var, ident) sum * var list * var list
+  | Xtailcall of signature * (var, ident) sum * var list
+  | Xbuiltin of external_function * var list * var list
+  | Xbranch of node
+  | Xcond of condition * var list * node * node
+  | Xjumptable of var * node list
+  | Xreturn of var list
+
+type block = instruction list
+  (* terminated by one of Xbranch, Xcond, Xjumptable, Xtailcall or Xreturn *)
+
+type code = block PTree.t
+  (* mapping node -> block *)
+
+type xfunction = {
+  fn_sig: signature;
+  fn_stacksize: Z.t;
+  fn_code: code;
+  fn_entrypoint: node
+}
+
+(* Type of a variable *)
+
+let typeof = function V(_, ty) -> ty | L l -> Loc.coq_type l
+
+(* Constructors for type [var] *)
+
+let vloc l = L l
+let vlocs ll = List.map vloc ll
+let vmreg mr = L(R mr)
+let vmregs mrl = List.map vmreg mrl
+
+(* Sets of variables *)
+
+module VSet = Set.Make(struct type t = var let compare = compare end)
+
+(*** Generation of fresh registers and fresh register variables *)
+
+let next_temp = ref P.one
+
+let twin_table : (int32, P.t) Hashtbl.t = Hashtbl.create 27
+
+let reset_temps () =
+  next_temp := P.one; Hashtbl.clear twin_table
+
+let new_reg() =
+  let r = !next_temp in next_temp := P.succ !next_temp; r
+
+let new_temp ty = V (new_reg(), ty)
+
+let twin_reg r =
+  let r = P.to_int32 r in
+  try
+    Hashtbl.find twin_table r
+  with Not_found ->
+    let t = new_reg() in Hashtbl.add twin_table r t; t
+
+(*** Successors (for dataflow analysis) *)
+
+let rec successors_block = function
+  | Xbranch s :: _ -> [s]
+  | Xtailcall(sg, vos, args) :: _ -> []
+  | Xcond(cond, args, s1, s2) :: _ -> [s1; s2]
+  | Xjumptable(arg, tbl) :: _ -> tbl
+  | Xreturn  _:: _ -> []
+  | instr :: blk -> successors_block blk
+  | [] -> assert false
+
+let successors fn =
+  PTree.map1 successors_block fn.fn_code
+
+(**** Type checking for XTL *)
+
+exception Type_error
+
+exception Type_error_at of node
+
+let set_var_type v ty =
+  if typeof v <> ty then raise Type_error
+
+let rec set_vars_type vl tyl =
+  match vl, tyl with
+  | [], [] -> ()
+  | v1 :: vl, ty1 :: tyl -> set_var_type v1 ty1; set_vars_type vl tyl
+  | _, _ -> raise Type_error
+
+let unify_var_type v1 v2 =
+  if typeof v1 <> typeof v2 then raise Type_error
+
+let type_instr = function
+  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
+      unify_var_type src dst
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      List.iter2 unify_var_type srcs dsts;
+      set_var_type itmp Tint;
+      set_var_type ftmp Tfloat
+  | Xop(op, args, res) ->
+      let (targs, tres) = type_of_operation op in
+      set_vars_type args targs;
+      set_var_type res tres
+  | Xload(chunk, addr, args, dst) ->
+      set_vars_type args (type_of_addressing addr);
+      set_var_type dst (type_of_chunk chunk)
+  | Xstore(chunk, addr, args, src) ->
+      set_vars_type args (type_of_addressing addr);
+      set_var_type src (type_of_chunk chunk)
+  | Xcall(sg, Coq_inl v, args, res) ->
+      set_var_type v Tint
+  | Xcall(sg, Coq_inr id, args, res) ->
+      ()
+  | Xtailcall(sg, Coq_inl v, args) ->
+      set_var_type v Tint
+  | Xtailcall(sg, Coq_inr id, args) ->
+      ()
+  | Xbuiltin(ef, args, res) ->
+      let sg = ef_sig ef in
+      set_vars_type args sg.sig_args;
+      set_vars_type res (Events.proj_sig_res' sg)
+  | Xbranch s ->
+      ()
+  | Xcond(cond, args, s1, s2) ->
+      set_vars_type args (type_of_condition cond)
+  | Xjumptable(arg, tbl) ->
+      set_var_type arg Tint
+  | Xreturn args ->
+      ()
+
+let type_block blk =
+  List.iter type_instr blk
+
+let type_function f =
+  PTree.fold
+    (fun () pc blk ->
+       try 
+         type_block blk
+       with Type_error ->
+         raise (Type_error_at pc))
+    f.fn_code ()
+
+(*** A generic framework for transforming extended basic blocks *)
+
+(* Determine instructions that start an extended basic block.
+   These are instructions that have >= 2 predecessors. *)
+
+let basic_blocks_map f = (* return mapping pc -> number of predecessors *)
+  let add_successor map s =
+    PMap.set s (1 + PMap.get s map) map in
+  let add_successors_block map pc blk =
+    List.fold_left add_successor map (successors_block blk) in
+  PTree.fold add_successors_block f.fn_code
+    (PMap.set f.fn_entrypoint 2 (PMap.init 0))
+
+let transform_basic_blocks
+       (transf: node -> block -> 'state -> block * 'state)
+       (top: 'state)
+       f =
+  let bbmap = basic_blocks_map f in
+  let rec transform_block st newcode pc bb =
+    assert (PTree.get pc newcode = None);
+    let (bb', st') = transf pc bb st in
+    (* Record new code after transformation *)
+    let newcode' = PTree.set pc bb' newcode in
+    (* Propagate outgoing state to all successors *)
+    List.fold_left (transform_successor st') newcode' (successors_block bb)
+  and transform_successor st newcode pc =
+    if PMap.get pc bbmap <> 1 then newcode else begin
+      match PTree.get pc f.fn_code with
+      | None -> newcode
+      | Some bb -> transform_block st newcode pc bb
+    end in
+  (* Iterate over all extended basic block heads *)
+  let newcode =
+    PTree.fold
+      (fun newcode pc bb ->
+        if PMap.get pc bbmap >= 2
+        then transform_block top newcode pc bb
+        else newcode)
+      f.fn_code PTree.empty
+  in {f with fn_code = newcode}
diff --git a/backend/XTL.mli b/backend/XTL.mli
new file mode 100644
index 0000000..f2c2715
--- /dev/null
+++ b/backend/XTL.mli
@@ -0,0 +1,100 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** The XTL intermediate language for register allocation *)
+
+open Datatypes
+open Camlcoq
+open Maps
+open AST
+open Registers
+open Machregs
+open Locations
+open Op
+
+type var = V of reg * typ | L of loc
+
+type node = P.t
+
+type instruction =
+  | Xmove of var * var
+  | Xreload of var * var
+  | Xspill of var * var
+  | Xparmove of var list * var list * var * var
+  | Xop of operation * var list * var
+  | Xload of memory_chunk * addressing * var list * var
+  | Xstore of memory_chunk * addressing * var list * var
+  | Xcall of signature * (var, ident) sum * var list * var list
+  | Xtailcall of signature * (var, ident) sum * var list
+  | Xbuiltin of external_function * var list * var list
+  | Xbranch of node
+  | Xcond of condition * var list * node * node
+  | Xjumptable of var * node list
+  | Xreturn of var list
+
+type block = instruction list
+  (* terminated by one of Xbranch, Xcond, Xjumptable, Xtailcall or Xreturn *)
+
+type code = block PTree.t
+  (* mapping node -> block *)
+
+type xfunction = {
+  fn_sig: signature;
+  fn_stacksize: Z.t;
+  fn_code: code;
+  fn_entrypoint: node
+}
+
+(* Type of a variable *)
+
+val typeof: var -> typ
+
+(* Constructors for type [var] *)
+
+val vloc: loc -> var
+val vlocs: loc list -> var list
+val vmreg: mreg -> var
+val vmregs: mreg list -> var list
+
+(* Sets of variables *)
+
+module VSet: Set.S with type elt = var
+
+(* Generation of fresh registers and fresh register variables *)
+
+val reset_temps: unit -> unit
+val new_reg: unit -> reg
+val new_temp: typ -> var
+val twin_reg: reg -> reg
+
+(* Type checking *)
+
+val type_function: xfunction -> unit
+exception Type_error_at of node
+
+(* Successors for dataflow analysis *)
+
+val successors_block: block -> node list
+val successors: xfunction -> node list PTree.t
+
+(* A generic framework for transforming extended basic blocks *)
+
+val transform_basic_blocks:
+  (node -> block -> 'state -> block * 'state) ->
+  'state ->
+  xfunction -> xfunction
+(* First arg is the transfer function
+      (node, block, state before) -> (transformed block, state after).
+   Second arg is "top" state, to be used as initial state for
+   extended basic block heads. *)
+
+
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index 3bf459f..0ffbd66 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -160,10 +160,12 @@ let register_stub_function name tres targs =
   let rec letters_of_type = function
     | Tnil -> []
     | Tcons(Tfloat _, tl) -> "f" :: letters_of_type tl
+    | Tcons(Tlong _, tl) -> "l" :: letters_of_type tl
     | Tcons(_, tl) -> "i" :: letters_of_type tl in
   let rec types_of_types = function
     | Tnil -> Tnil
     | Tcons(Tfloat _, tl) -> Tcons(Tfloat(F64, noattr), types_of_types tl)
+    | Tcons(Tlong _, tl) -> Tcons(Tlong(Signed, noattr), types_of_types tl)
     | Tcons(_, tl) -> Tcons(Tpointer(Tvoid, noattr), types_of_types tl) in
   let stub_name =
     name ^ "$" ^ String.concat "" (letters_of_type targs) in
@@ -211,6 +213,7 @@ let mergeTypAttr ty a2 =
   | Tvoid -> ty
   | Tint(sz, sg, a1) -> Tint(sz, sg, mergeAttr a1 a2)
   | Tfloat(sz, a1) -> Tfloat(sz, mergeAttr a1 a2)
+  | Tlong(sg, a1) -> Tlong(sg, mergeAttr a1 a2)
   | Tpointer(ty', a1) -> Tpointer(ty', mergeAttr a1 a2)
   | Tarray(ty', sz, a1) -> Tarray(ty', sz, mergeAttr a1 a2)
   | Tfunction(targs, tres) -> ty
@@ -233,8 +236,7 @@ let convertIkind = function
   | C.ILong -> (Signed, I32)
   | C.IULong -> (Unsigned, I32)
   (* Special-cased in convertTyp below *)
-  | C.ILongLong -> unsupported "'long long' type"; (Signed, I32)
-  | C.IULongLong -> unsupported "'unsigned long long' type"; (Unsigned, I32)
+  | C.ILongLong | C.IULongLong -> assert false
 
 let convertFkind = function
   | C.FFloat -> F32
@@ -243,14 +245,6 @@ let convertFkind = function
       if not !Clflags.option_flongdouble then unsupported "'long double' type"; 
       F64
 
-let int64_struct a =
-  let ty = Tint(I32,Unsigned,noattr) in
-  Tstruct(intern_string "struct __int64",
-    (if Memdataaux.big_endian
-     then Fcons(intern_string "hi", ty, Fcons(intern_string "lo", ty, Fnil))
-     else Fcons(intern_string "lo", ty, Fcons(intern_string "hi", ty, Fnil))),
-    a)
-
 (** A cache for structs and unions already converted *)
 
 let compositeCache : (C.ident, coq_type) Hashtbl.t = Hashtbl.create 77
@@ -260,8 +254,10 @@ let convertTyp env t =
   let rec convertTyp seen t =
     match Cutil.unroll env t with
     | C.TVoid a -> Tvoid
-    | C.TInt((C.ILongLong|C.IULongLong), a) when !Clflags.option_flonglong ->
-        int64_struct (convertAttr a)
+    | C.TInt(C.ILongLong, a) ->
+        Tlong(Signed, convertAttr a)
+    | C.TInt(C.IULongLong, a) ->
+        Tlong(Unsigned, convertAttr a)
     | C.TInt(ik, a) ->
         let (sg, sz) = convertIkind ik in Tint(sz, sg, convertAttr a)
     | C.TFloat(fk, a) ->
@@ -360,14 +356,8 @@ let string_of_type ty =
   Format.pp_print_flush fb ();
   Buffer.contents b
 
-let first_class_value env ty =
-  match Cutil.unroll env ty with
-  | C.TInt((C.ILongLong|C.IULongLong), _) -> false
-  | _ -> true
-
 let supported_return_type env ty =
   match Cutil.unroll env ty with
-  | C.TInt((C.ILongLong|C.IULongLong), _) -> false
   | C.TStruct _  | C.TUnion _ -> false
   | _ -> true
 
@@ -419,10 +409,6 @@ let convertFloat f kind =
 
 let ezero = Eval(Vint(coqint_of_camlint 0l), type_int32s)
 
-let check_assignop msg env e =
-  if not (first_class_value env e.etyp) then
-    unsupported (msg ^ " on a l-value of type " ^ string_of_type e.etyp)
-
 let rec convertExpr env e =
   let ty = convertTyp env e.etyp in
   match e.edesc with
@@ -430,13 +416,11 @@ let rec convertExpr env e =
   | C.EUnop((C.Oderef|C.Odot _|C.Oarrow _), _)
   | C.EBinop(C.Oindex, _, _, _) ->
       let l = convertLvalue env e in
-      if not (first_class_value env e.etyp) then
-        unsupported ("r-value of type " ^ string_of_type e.etyp);
       Evalof(l, ty)
 
+  | C.EConst(C.CInt(i, (ILongLong|IULongLong), _)) ->
+      Eval(Vlong(coqint_of_camlint64 i), ty)
   | C.EConst(C.CInt(i, k, _)) ->
-      if k = C.ILongLong || k = C.IULongLong then
-        unsupported "'long long' integer literal";
       Eval(Vint(convertInt i), ty)
   | C.EConst(C.CFloat(f, k)) ->
       if k = C.FLongDouble && not !Clflags.option_flongdouble then
@@ -465,21 +449,17 @@ let rec convertExpr env e =
   | C.EUnop(C.Oaddrof, e1) ->
       Eaddrof(convertLvalue env e1, ty)
   | C.EUnop(C.Opreincr, e1) ->
-      check_assignop "pre-increment" env e1;
       coq_Epreincr Incr (convertLvalue env e1) ty
   | C.EUnop(C.Opredecr, e1) ->
-      check_assignop "pre-decrement" env e1;
       coq_Epreincr Decr (convertLvalue env e1) ty
   | C.EUnop(C.Opostincr, e1) ->
-      check_assignop "post-increment" env e1;
       Epostincr(Incr, convertLvalue env e1, ty)
   | C.EUnop(C.Opostdecr, e1) ->
-      check_assignop "post-decrement" env e1;
       Epostincr(Decr, convertLvalue env e1, ty)
 
   | C.EBinop((C.Oadd|C.Osub|C.Omul|C.Odiv|C.Omod|C.Oand|C.Oor|C.Oxor|
               C.Oshl|C.Oshr|C.Oeq|C.One|C.Olt|C.Ogt|C.Ole|C.Oge) as op,
-             e1, e2, _) ->
+             e1, e2, tyres) ->
       let op' =
         match op with
         | C.Oadd -> Oadd
@@ -503,7 +483,6 @@ let rec convertExpr env e =
   | C.EBinop(C.Oassign, e1, e2, _) ->
       let e1' = convertLvalue env e1 in
       let e2' = convertExpr env e2 in
-      check_assignop "assignment" env e1;
       Eassign(e1', e2', ty)
   | C.EBinop((C.Oadd_assign|C.Osub_assign|C.Omul_assign|C.Odiv_assign|
               C.Omod_assign|C.Oand_assign|C.Oor_assign|C.Oxor_assign|
@@ -525,7 +504,6 @@ let rec convertExpr env e =
         | _ -> assert false in
       let e1' = convertLvalue env e1 in
       let e2' = convertExpr env e2 in
-      check_assignop "assignment-operation" env e1;
       Eassignop(op', e1', e2', tyres, ty)
   | C.EBinop(C.Ocomma, e1, e2, _) ->
       Ecomma(convertExpr env e1, convertExpr env e2, ty)
@@ -537,8 +515,6 @@ let rec convertExpr env e =
   | C.EConditional(e1, e2, e3) ->
       Econdition(convertExpr env e1, convertExpr env e2, convertExpr env e3, ty)
   | C.ECast(ty1, e1) ->
-      if not (first_class_value env ty1) then
-        unsupported ("cast to type " ^ string_of_type ty1);
       Ecast(convertExpr env e1, convertTyp env ty1)
 
   | C.ECall({edesc = C.EVar {name = "__builtin_annot"}}, args) ->
diff --git a/cfrontend/CPragmas.ml b/cfrontend/CPragmas.ml
index 7bf80bb..5ae147e 100644
--- a/cfrontend/CPragmas.ml
+++ b/cfrontend/CPragmas.ml
@@ -49,8 +49,7 @@ let process_reserve_register_pragma name =
       C2C.error "unknown register in `reserve_register' pragma"
   | Some r ->
       if Machregsaux.can_reserve_register r then
-        Coloringaux.reserved_registers :=
-          r :: !Coloringaux.reserved_registers
+        IRC.reserved_registers := r :: !IRC.reserved_registers
       else
         C2C.error "cannot reserve this register (not a callee-save)"
 
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index ebc27ad..a19a0e2 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -104,6 +104,7 @@ Definition eventval_of_val (v: val) (t: typ) : option eventval :=
   match v, t with
   | Vint i, AST.Tint => Some (EVint i)
   | Vfloat f, AST.Tfloat => Some (EVfloat f)
+  | Vlong n, AST.Tlong => Some (EVlong n)
   | Vptr b ofs, AST.Tint => do id <- Genv.invert_symbol ge b; Some (EVptr_global id ofs)
   | _, _ => None
   end.
@@ -122,6 +123,7 @@ Definition val_of_eventval (ev: eventval) (t: typ) : option val :=
   match ev, t with
   | EVint i, AST.Tint => Some (Vint i)
   | EVfloat f, AST.Tfloat => Some (Vfloat f)
+  | EVlong n, AST.Tlong => Some (Vlong n)
   | EVptr_global id ofs, AST.Tint => do b <- Genv.find_symbol ge id; Some (Vptr b ofs)
   | _, _ => None
   end.
@@ -132,6 +134,7 @@ Proof.
   intros. destruct v; destruct t; simpl in H; inv H.
   constructor.
   constructor.
+  constructor.
   destruct (Genv.invert_symbol ge b) as [id|] eqn:?; inv H1. 
   constructor. apply Genv.invert_find_symbol; auto.
 Qed.
@@ -166,6 +169,7 @@ Proof.
   intros. destruct ev; destruct t; simpl in H; inv H.
   constructor.
   constructor.
+  constructor.
   destruct (Genv.find_symbol ge i) as [b|] eqn:?; inv H1.
   constructor. auto.
 Qed.
diff --git a/cfrontend/Clight.v b/cfrontend/Clight.v
index e9ec7cc..3bd06df 100644
--- a/cfrontend/Clight.v
+++ b/cfrontend/Clight.v
@@ -48,6 +48,7 @@ Require Import Cop.
 Inductive expr : Type :=
   | Econst_int: int -> type -> expr       (**r integer literal *)
   | Econst_float: float -> type -> expr   (**r float literal *)
+  | Econst_long: int64 -> type -> expr    (**r long integer literal *)
   | Evar: ident -> type -> expr           (**r variable *)
   | Etempvar: ident -> type -> expr       (**r temporary variable *)
   | Ederef: expr -> type -> expr          (**r pointer dereference (unary [*]) *)
@@ -68,6 +69,7 @@ Definition typeof (e: expr) : type :=
   match e with
   | Econst_int _ ty => ty
   | Econst_float _ ty => ty
+  | Econst_long _ ty => ty
   | Evar _ ty => ty
   | Etempvar _ ty => ty
   | Ederef _ ty => ty
@@ -347,6 +349,8 @@ Inductive eval_expr: expr -> val -> Prop :=
       eval_expr (Econst_int i ty) (Vint i)
   | eval_Econst_float:   forall f ty,
       eval_expr (Econst_float f ty) (Vfloat f)
+  | eval_Econst_long:   forall i ty,
+      eval_expr (Econst_long i ty) (Vlong i)
   | eval_Etempvar:  forall id ty v,
       le!id = Some v ->
       eval_expr (Etempvar id ty) v
diff --git a/cfrontend/Cminorgen.v b/cfrontend/Cminorgen.v
index af85971..419ffff 100644
--- a/cfrontend/Cminorgen.v
+++ b/cfrontend/Cminorgen.v
@@ -171,6 +171,7 @@ Definition of_chunk (chunk: memory_chunk) :=
   | Mint16signed => Int16s
   | Mint16unsigned => Int16u
   | Mint32 => Any
+  | Mint64 => Any
   | Mfloat32 => Float32
   | Mfloat64 => Any
   | Mfloat64al32 => Any
@@ -239,6 +240,8 @@ Definition transl_constant (cst: Csharpminor.constant): (constant * approx) :=
       (Ointconst n, Approx.of_int n)
   | Csharpminor.Ofloatconst n =>
       (Ofloatconst n, Approx.of_float n)
+  | Csharpminor.Olongconst n =>
+      (Olongconst n, Any)
   end.
 
 (** Translation of expressions.  Return both a Cminor expression and
diff --git a/cfrontend/Cminorgenproof.v b/cfrontend/Cminorgenproof.v
index a61808a..9d3d255 100644
--- a/cfrontend/Cminorgenproof.v
+++ b/cfrontend/Cminorgenproof.v
@@ -1495,6 +1495,8 @@ Ltac TrivialExists :=
       exists (Vint x); split; [eauto with evalexpr | constructor]
   | [ |- exists y, _ /\ val_inject _ (Vfloat ?x) _ ] =>
       exists (Vfloat x); split; [eauto with evalexpr | constructor]
+  | [ |- exists y, _ /\ val_inject _ (Vlong ?x) _ ] =>
+      exists (Vlong x); split; [eauto with evalexpr | constructor]
   | _ => idtac
   end.
 
@@ -1522,6 +1524,15 @@ Proof.
   inv H0; simpl in H; inv H. simpl. destruct (Float.intuoffloat f0); simpl in *; inv H1. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float.longoffloat f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float.longuoffloat f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. TrivialExists.
 Qed.
 
 (** Compatibility of [eval_binop] with respect to [val_inject]. *)
@@ -1566,46 +1577,43 @@ Proof.
   inv H; inv H0; inv H1; TrivialExists.
   inv H; inv H0; inv H1; TrivialExists.
   inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H0; try discriminate; inv H1; try discriminate. simpl in *. 
+    destruct (Int64.eq i0 Int64.zero
+      || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H; TrivialExists.
+  inv H0; try discriminate; inv H1; try discriminate. simpl in *. 
+    destruct (Int64.eq i0 Int64.zero); inv H. TrivialExists.
+  inv H0; try discriminate; inv H1; try discriminate. simpl in *. 
+    destruct (Int64.eq i0 Int64.zero
+      || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H; TrivialExists.
+  inv H0; try discriminate; inv H1; try discriminate. simpl in *. 
+    destruct (Int64.eq i0 Int64.zero); inv H. TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists. simpl. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  inv H; inv H0; inv H1; TrivialExists. simpl. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  inv H; inv H0; inv H1; TrivialExists. simpl. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool.
 (* cmpu *)
-  inv H; inv H0; inv H1; TrivialExists.
-    apply val_inject_val_of_optbool.
-    apply val_inject_val_of_optbool.
-    apply val_inject_val_of_optbool.
-Opaque Int.add.
-    unfold Val.cmpu. simpl.
-    destruct (zeq b1 b0); subst.
-    (* same blocks *)
-    rewrite H0 in H. inv H. rewrite zeq_true.
-    fold (Mem.weak_valid_pointer m b0 (Int.unsigned ofs1)).
-    fold (Mem.weak_valid_pointer m b0 (Int.unsigned ofs0)).
-    fold (Mem.weak_valid_pointer tm b2 (Int.unsigned (Int.add ofs1 (Int.repr delta)))).
-    fold (Mem.weak_valid_pointer tm b2 (Int.unsigned (Int.add ofs0 (Int.repr delta)))).
-    destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs1)) eqn:?; auto.
-    destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; auto.
-    rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb) by eauto.
-    rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb0) by eauto.
-    rewrite Int.translate_cmpu
-      by eauto using Mem.weak_valid_pointer_inject_no_overflow.
-    apply val_inject_val_of_optbool.
-    (* different source blocks *)
-    destruct (Mem.valid_pointer m b1 (Int.unsigned ofs1)) eqn:?; auto.
-    destruct (Mem.valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; auto.
-    destruct (zeq b2 b3).
-    fold (Mem.weak_valid_pointer tm b2 (Int.unsigned (Int.add ofs1 (Int.repr delta)))).
-    fold (Mem.weak_valid_pointer tm b3 (Int.unsigned (Int.add ofs0 (Int.repr delta0)))).
-    rewrite Mem.valid_pointer_implies
-      by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb); eauto).
-    rewrite Mem.valid_pointer_implies
-      by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb0); eauto).
-    exploit Mem.different_pointers_inject; eauto. intros [A|A]; [congruence |].
-    destruct c; simpl; auto.
-    rewrite Int.eq_false. constructor. congruence.
-    rewrite Int.eq_false. constructor. congruence.
-    rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb) by eauto.
-    rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb0) by eauto.
-    apply val_inject_val_of_optbool.
-  (* cmpf *)
+  inv H. econstructor; split; eauto. 
+  unfold Val.cmpu. 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m) c v1 v2) as [b|] eqn:E.
+  replace (Val.cmpu_bool (Mem.valid_pointer tm) c tv1 tv2) with (Some b).
+  destruct b; simpl; constructor.
+  symmetry. eapply val_cmpu_bool_inject; eauto.
+  intros; eapply Mem.valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
+  intros; eapply Mem.different_pointers_inject; eauto.
+  simpl; auto.
+(* cmpf *)
+  inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool.
+(* cmpl *)
+  inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool.
+(* cmplu *)
   inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool.
 Qed.
 
@@ -1843,6 +1851,8 @@ Proof.
   repeat rewrite Int.zero_ext_and; auto. omega. omega.
   (* int32 *)
   exists va; auto.
+  (* int64 *)
+  exists va; auto.
   (* float32 *)
   exploit eval_uncast_float32; eauto. intros [v' [A B]].
   exists v'; split; auto.
@@ -2036,6 +2046,7 @@ Proof.
   destruct cst; simpl; intros; inv H. 
   exists (Vint i); intuition. apply approx_of_int_sound.
   exists (Vfloat f0); intuition. apply approx_of_float_sound.
+  exists (Vlong i); intuition. 
 Qed.
 
 Lemma transl_expr_correct:
diff --git a/cfrontend/Cop.v b/cfrontend/Cop.v
index af7aaa8..c6e07b7 100644
--- a/cfrontend/Cop.v
+++ b/cfrontend/Cop.v
@@ -76,23 +76,35 @@ Inductive classify_cast_cases : Type :=
   | cast_case_f2f (sz2:floatsize)                  (**r float -> float *)
   | cast_case_i2f (si1:signedness) (sz2:floatsize) (**r int -> float *)
   | cast_case_f2i (sz2:intsize) (si2:signedness)   (**r float -> int *)
+  | cast_case_l2l                       (**r long -> long *)
+  | cast_case_i2l (si1: signedness)     (**r int -> long *)
+  | cast_case_l2i (sz2: intsize) (si2: signedness) (**r long -> int *)
+  | cast_case_l2f (si1: signedness) (sz2: floatsize) (**r long -> float *)
+  | cast_case_f2l (si2: signedness)     (**r float -> long *)
   | cast_case_f2bool                    (**r float -> bool *)
+  | cast_case_l2bool                    (**r long -> bool *)
   | cast_case_p2bool                    (**r pointer -> bool *)
   | cast_case_struct (id1: ident) (fld1: fieldlist) (id2: ident) (fld2: fieldlist) (**r struct -> struct *)
   | cast_case_union (id1: ident) (fld1: fieldlist) (id2: ident) (fld2: fieldlist) (**r union -> union *)
   | cast_case_void                                 (**r any -> void *)
   | cast_case_default.
 
-Function classify_cast (tfrom tto: type) : classify_cast_cases :=
+Definition classify_cast (tfrom tto: type) : classify_cast_cases :=
   match tto, tfrom with
-  | Tint I32 si2 _, (Tint _ _ _ | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral
+  | Tint I32 si2 _, (Tint _ _ _ | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral
   | Tint IBool _ _, Tfloat _ _ => cast_case_f2bool
-  | Tint IBool _ _, (Tpointer _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_p2bool
+  | Tint IBool _ _, (Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_p2bool
   | Tint sz2 si2 _, Tint sz1 si1 _ => cast_case_i2i sz2 si2
   | Tint sz2 si2 _, Tfloat sz1 _ => cast_case_f2i sz2 si2
   | Tfloat sz2 _, Tfloat sz1 _ => cast_case_f2f sz2
   | Tfloat sz2 _, Tint sz1 si1 _ => cast_case_i2f si1 sz2
-  | Tpointer _ _, (Tint _ _ _ | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral
+  | (Tpointer _ _ | Tcomp_ptr _ _), (Tint _ _ _ | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral
+  | Tlong _ _, Tlong _ _ => cast_case_l2l
+  | Tlong _ _, Tint sz1 si1 _ => cast_case_i2l si1
+  | Tint IBool _ _, Tlong _ _ => cast_case_l2bool
+  | Tint sz2 si2 _, Tlong _ _ => cast_case_l2i sz2 si2
+  | Tlong si2 _, Tfloat sz1 _ => cast_case_f2l si2
+  | Tfloat sz2 _, Tlong si1 _ => cast_case_l2f si1 sz2
   | Tstruct id2 fld2 _, Tstruct id1 fld1 _ => cast_case_struct id1 fld1 id2 fld2
   | Tunion id2 fld2 _, Tunion id1 fld1 _ => cast_case_union id1 fld1 id2 fld2
   | Tvoid, _ => cast_case_void
@@ -131,7 +143,25 @@ Definition cast_float_float (sz: floatsize) (f: float) : float :=
   | F64 => f
   end.
 
-Function sem_cast (v: val) (t1 t2: type) : option val :=
+Definition cast_int_long (si: signedness) (i: int) : int64 :=
+  match si with
+  | Signed => Int64.repr (Int.signed i)
+  | Unsigned => Int64.repr (Int.unsigned i)
+  end.
+
+Definition cast_long_float (si : signedness) (i: int64) : float :=
+  match si with
+  | Signed => Float.floatoflong i
+  | Unsigned => Float.floatoflongu i
+  end.
+
+Definition cast_float_long (si : signedness) (f: float) : option int64 :=
+  match si with
+  | Signed => Float.longoffloat f
+  | Unsigned => Float.longuoffloat f
+  end.
+
+Definition sem_cast (v: val) (t1 t2: type) : option val :=
   match classify_cast t1 t2 with
   | cast_case_neutral =>
       match v with
@@ -174,10 +204,53 @@ Function sem_cast (v: val) (t1 t2: type) : option val :=
       | Vptr _ _ => Some (Vint Int.one)
       | _ => None
       end
+  | cast_case_l2l =>
+      match v with
+      | Vlong n => Some (Vlong n)
+      | _ => None
+      end
+  | cast_case_i2l si =>
+      match v with
+      | Vint n => Some(Vlong (cast_int_long si n))
+      | _ => None
+      end
+  | cast_case_l2i sz si =>
+      match v with
+      | Vlong n => Some(Vint (cast_int_int sz si (Int.repr (Int64.unsigned n))))
+      | _ => None
+      end
+  | cast_case_l2bool =>
+      match v with
+      | Vlong n =>
+          Some(Vint(if Int64.eq n Int64.zero then Int.zero else Int.one))
+      | _ => None
+      end
+  | cast_case_l2f si1 sz2 =>
+      match v with
+      | Vlong i => Some (Vfloat (cast_float_float sz2 (cast_long_float si1 i)))
+      | _ => None
+      end
+  | cast_case_f2l si2 =>
+      match v with
+      | Vfloat f =>
+          match cast_float_long si2 f with
+          | Some i => Some (Vlong i)
+          | None => None
+          end
+      | _ => None
+      end
   | cast_case_struct id1 fld1 id2 fld2 =>
-      if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None
+      match v with
+      | Vptr b ofs =>
+          if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None
+      | _ => None
+      end
   | cast_case_union id1 fld1 id2 fld2 =>
-      if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None
+      match v with
+      | Vptr b ofs =>
+          if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None
+      | _ => None
+      end
   | cast_case_void =>
       Some v
   | cast_case_default =>
@@ -192,13 +265,15 @@ Inductive classify_bool_cases : Type :=
   | bool_case_i                           (**r integer *)
   | bool_case_f                           (**r float *)
   | bool_case_p                           (**r pointer *)
+  | bool_case_l                           (**r long *)
   | bool_default.
 
 Definition classify_bool (ty: type) : classify_bool_cases :=
   match typeconv ty with
   | Tint _ _ _ => bool_case_i
-  | Tpointer _ _ => bool_case_p
+  | Tpointer _ _ | Tcomp_ptr _ _ => bool_case_p
   | Tfloat _ _ => bool_case_f
+  | Tlong _ _ => bool_case_l
   | _ => bool_default
   end.
 
@@ -207,7 +282,7 @@ Definition classify_bool (ty: type) : classify_bool_cases :=
   considered as true.  The integer zero (which also represents
   the null pointer) and the float 0.0 are false. *)
 
-Function bool_val (v: val) (t: type) : option bool :=
+Definition bool_val (v: val) (t: type) : option bool :=
   match classify_bool t with
   | bool_case_i =>
       match v with
@@ -225,6 +300,11 @@ Function bool_val (v: val) (t: type) : option bool :=
       | Vptr b ofs => Some true
       | _ => None
       end
+  | bool_case_l =>
+      match v with
+      | Vlong n => Some (negb (Int64.eq n Int64.zero))
+      | _ => None
+      end
   | bool_default => None
   end.
 
@@ -239,25 +319,29 @@ Proof.
   assert (A: classify_bool t =
     match t with
     | Tint _ _ _ => bool_case_i
-    | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _ => bool_case_p
+    | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ => bool_case_p
     | Tfloat _ _ => bool_case_f
+    | Tlong _ _ => bool_case_l
     | _ => bool_default
     end).
-  unfold classify_bool; destruct t; simpl; auto. destruct i; auto. destruct s; auto.
-
+  {
+    unfold classify_bool; destruct t; simpl; auto. destruct i; auto.
+  }
   unfold bool_val. rewrite A. unfold sem_cast. destruct t; simpl; auto; destruct v; auto.
-  destruct (Int.eq i0 Int.zero); auto. 
+  destruct (Int.eq i0 Int.zero); auto.
+  destruct (Int64.eq i Int64.zero); auto.
   destruct (Float.cmp Ceq f0 Float.zero); auto.
   destruct (Int.eq i Int.zero); auto. 
   destruct (Int.eq i Int.zero); auto. 
   destruct (Int.eq i Int.zero); auto. 
+  destruct (Int.eq i0 Int.zero); auto. 
 Qed.
 
 (** ** Unary operators *)
 
 (** *** Boolean negation *)
 
-Function sem_notbool (v: val) (ty: type) : option val :=
+Definition sem_notbool (v: val) (ty: type) : option val :=
   match classify_bool ty with
   | bool_case_i =>
       match v with
@@ -275,6 +359,11 @@ Function sem_notbool (v: val) (ty: type) : option val :=
       | Vptr _ _ => Some Vfalse
       | _ => None
       end
+  | bool_case_l =>
+      match v with
+      | Vlong n => Some (Val.of_bool (Int64.eq n Int64.zero))
+      | _ => None
+      end
   | bool_default => None
   end.
 
@@ -294,6 +383,7 @@ Qed.
 Inductive classify_neg_cases : Type :=
   | neg_case_i(s: signedness)              (**r int *)
   | neg_case_f                             (**r float *)
+  | neg_case_l(s: signedness)              (**r long *)
   | neg_default.
 
 Definition classify_neg (ty: type) : classify_neg_cases :=
@@ -301,10 +391,11 @@ Definition classify_neg (ty: type) : classify_neg_cases :=
   | Tint I32 Unsigned _ => neg_case_i Unsigned
   | Tint _ _ _ => neg_case_i Signed
   | Tfloat _ _ => neg_case_f
+  | Tlong si _ => neg_case_l si
   | _ => neg_default
   end.
 
-Function sem_neg (v: val) (ty: type) : option val :=
+Definition sem_neg (v: val) (ty: type) : option val :=
   match classify_neg ty with
   | neg_case_i sg =>
       match v with
@@ -316,6 +407,11 @@ Function sem_neg (v: val) (ty: type) : option val :=
       | Vfloat f => Some (Vfloat (Float.neg f))
       | _ => None
       end
+  | neg_case_l sg =>
+      match v with
+      | Vlong n => Some (Vlong (Int64.neg n))
+      | _ => None
+      end
   | neg_default => None
   end.
 
@@ -323,20 +419,27 @@ Function sem_neg (v: val) (ty: type) : option val :=
 
 Inductive classify_notint_cases : Type :=
   | notint_case_i(s: signedness)              (**r int *)
+  | notint_case_l(s: signedness)              (**r long *)
   | notint_default.
 
 Definition classify_notint (ty: type) : classify_notint_cases :=
   match ty with
   | Tint I32 Unsigned _ => notint_case_i Unsigned
   | Tint _ _ _ => notint_case_i Signed
+  | Tlong si _ => notint_case_l si
   | _ => notint_default
   end.
 
-Function sem_notint (v: val) (ty: type): option val :=
+Definition sem_notint (v: val) (ty: type): option val :=
   match classify_notint ty with
   | notint_case_i sg =>
       match v with
-      | Vint n => Some (Vint (Int.xor n Int.mone))
+      | Vint n => Some (Vint (Int.not n))
+      | _ => None
+      end
+  | notint_case_l sg =>
+      match v with
+      | Vlong n => Some (Vlong (Int64.not n))
       | _ => None
       end
   | notint_default => None
@@ -351,59 +454,119 @@ Function sem_notint (v: val) (ty: type): option val :=
   (e.g. division, modulus, comparisons), the unsigned operation is selected
   if at least one of the arguments is of type "unsigned int32", otherwise
   the signed operation is performed.
+- Likewise if both arguments are of long integer type.
 - If both arguments are of float type, a float operation is performed.
   We choose to perform all float arithmetic in double precision,
   even if both arguments are single-precision floats.
-- If one argument has integer type and the other has float type,
+- If one argument has integer/long type and the other has float type,
   we convert the integer argument to float, then perform the float operation.
+- If one argument has long integer type and the other integer type,
+  we convert the integer argument to long, then perform the long operation
 *)
 
-(** *** Addition *)
-
-Inductive classify_add_cases : Type :=
-  | add_case_ii(s: signedness)         (**r int, int *)
-  | add_case_ff                        (**r float, float *)
-  | add_case_if(s: signedness)         (**r int, float *)
-  | add_case_fi(s: signedness)         (**r float, int *)
-  | add_case_pi(ty: type)(a: attr)     (**r pointer, int *)
-  | add_case_ip(ty: type)(a: attr)     (**r int, pointer *)
-  | add_default.
-
-Definition classify_add (ty1: type) (ty2: type) :=
+Inductive binarith_cases: Type :=
+  | bin_case_ii(s: signedness)         (**r int, int *)
+  | bin_case_ff                        (**r float, float *)
+  | bin_case_if(s: signedness)         (**r int, float *)
+  | bin_case_fi(s: signedness)         (**r float, int *)
+  | bin_case_ll(s: signedness)         (**r long, long *)
+  | bin_case_il(s1 s2: signedness)     (**r int, long *)
+  | bin_case_li(s1 s2: signedness)     (**r long, int *)
+  | bin_case_lf(s: signedness)         (**r long, float *)
+  | bin_case_fl(s: signedness)         (**r float, long *)
+  | bin_default.
+
+Definition classify_binarith (ty1: type) (ty2: type) : binarith_cases :=
   match typeconv ty1, typeconv ty2 with
-  | Tint I32 Unsigned _, Tint _ _ _ => add_case_ii Unsigned
-  | Tint _ _ _, Tint I32 Unsigned _ => add_case_ii Unsigned
-  | Tint _ _ _, Tint _ _ _ => add_case_ii Signed
-  | Tfloat _ _, Tfloat _ _ => add_case_ff
-  | Tint _ sg _, Tfloat _ _ => add_case_if sg
-  | Tfloat _ _, Tint _ sg _ => add_case_fi sg
-  | Tpointer ty a, Tint _ _ _ => add_case_pi ty a
-  | Tint _ _ _, Tpointer ty a => add_case_ip ty a
-  | _, _ => add_default
+  | Tint I32 Unsigned _, Tint _ _ _ => bin_case_ii Unsigned
+  | Tint _ _ _, Tint I32 Unsigned _ => bin_case_ii Unsigned
+  | Tint _ _ _, Tint _ _ _ => bin_case_ii Signed
+  | Tfloat _ _, Tfloat _ _ => bin_case_ff
+  | Tint _ sg _, Tfloat _ _ => bin_case_if sg
+  | Tfloat _ _, Tint _ sg _ => bin_case_fi sg
+  | Tlong Signed _, Tlong Signed _ => bin_case_ll Signed
+  | Tlong _ _, Tlong _ _ => bin_case_ll Unsigned
+  | Tint _ s1 _, Tlong s2 _ => bin_case_il s1 s2
+  | Tlong s1 _, Tint _ s2 _ => bin_case_li s1 s2
+  | Tlong sg _, Tfloat _ _ => bin_case_lf sg
+  | Tfloat _ _, Tlong sg _ => bin_case_fl sg
+  | _, _ => bin_default
   end.
 
-Function sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
-  match classify_add t1 t2 with 
-  | add_case_ii sg =>                   (**r integer addition *)
+Definition sem_binarith
+    (sem_int: signedness -> int -> int -> option val)
+    (sem_long: signedness -> int64 -> int64 -> option val)
+    (sem_float: float -> float -> option val)
+    (v1: val) (t1: type) (v2: val) (t2: type) : option val :=
+  match classify_binarith t1 t2 with
+  | bin_case_ii sg =>
+      match v1, v2 with
+      | Vint n1, Vint n2 => sem_int sg n1 n2
+      | _,  _ => None
+      end
+  | bin_case_ff =>
+      match v1, v2 with
+      | Vfloat n1, Vfloat n2 => sem_float n1 n2
+      | _,  _ => None
+      end
+  | bin_case_if sg =>
+      match v1, v2 with
+      | Vint n1, Vfloat n2 => sem_float (cast_int_float sg n1) n2
+      | _, _ => None
+      end
+  | bin_case_fi sg =>
+      match v1, v2 with
+      | Vfloat n1, Vint n2 => sem_float n1 (cast_int_float sg n2)
+      | _, _ => None
+      end
+  | bin_case_ll sg =>
+      match v1, v2 with
+      | Vlong n1, Vlong n2 => sem_long sg n1 n2
+      | _,  _ => None
+      end
+  | bin_case_il sg1 sg2 =>
       match v1, v2 with
-      | Vint n1, Vint n2 => Some (Vint (Int.add n1 n2))
+      | Vint n1, Vlong n2 => sem_long sg2 (cast_int_long sg1 n1) n2
       | _,  _ => None
       end
-  | add_case_ff =>                      (**r float addition *)
+  | bin_case_li sg1 sg2 =>
       match v1, v2 with
-      | Vfloat n1, Vfloat n2 => Some (Vfloat (Float.add n1 n2))
+      | Vlong n1, Vint n2 => sem_long sg1 n1 (cast_int_long sg2 n2)
       | _,  _ => None
       end
-  | add_case_if sg =>                   (**r int plus float *)
+  | bin_case_lf sg =>
       match v1, v2 with
-      | Vint n1, Vfloat n2 => Some (Vfloat (Float.add (cast_int_float sg n1) n2))
+      | Vlong n1, Vfloat n2 => sem_float (cast_long_float sg n1) n2
       | _, _ => None
       end
-  | add_case_fi sg =>                   (**r float plus int *)
+  | bin_case_fl sg =>
       match v1, v2 with
-      | Vfloat n1, Vint n2 => Some (Vfloat (Float.add n1 (cast_int_float sg n2)))
+      | Vfloat n1, Vlong n2 => sem_float n1 (cast_long_float sg n2)
       | _, _ => None
       end
+  | bin_default => None
+  end.
+
+(** *** Addition *)
+
+Inductive classify_add_cases : Type :=
+  | add_case_pi(ty: type)(a: attr)     (**r pointer, int *)
+  | add_case_ip(ty: type)(a: attr)     (**r int, pointer *)
+  | add_case_pl(ty: type)(a: attr)     (**r pointer, long *)
+  | add_case_lp(ty: type)(a: attr)     (**r long, pointer *)
+  | add_default.                       (**r numerical type, numerical type *)
+
+Definition classify_add (ty1: type) (ty2: type) :=
+  match typeconv ty1, typeconv ty2 with
+  | Tpointer ty a, Tint _ _ _ => add_case_pi ty a
+  | Tint _ _ _, Tpointer ty a => add_case_ip ty a
+  | Tpointer ty a, Tlong _ _ => add_case_pl ty a
+  | Tlong _ _, Tpointer ty a => add_case_lp ty a
+  | _, _ => add_default
+  end.
+
+Definition sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  match classify_add t1 t2 with 
   | add_case_pi ty _ =>                 (**r pointer plus integer *)
       match v1,v2 with
       | Vptr b1 ofs1, Vint n2 => 
@@ -416,59 +579,57 @@ Function sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
         Some (Vptr b2 (Int.add ofs2 (Int.mul (Int.repr (sizeof ty)) n1)))
       | _,  _ => None
       end   
-  | add_default => None
-end.
+  | add_case_pl ty _ =>                 (**r pointer plus long *)
+      match v1,v2 with
+      | Vptr b1 ofs1, Vlong n2 => 
+        let n2 := Int.repr (Int64.unsigned n2) in
+        Some (Vptr b1 (Int.add ofs1 (Int.mul (Int.repr (sizeof ty)) n2)))
+      | _,  _ => None
+      end   
+  | add_case_lp ty _ =>                 (**r long plus pointer *)
+      match v1,v2 with
+      | Vlong n1, Vptr b2 ofs2 => 
+        let n1 := Int.repr (Int64.unsigned n1) in
+        Some (Vptr b2 (Int.add ofs2 (Int.mul (Int.repr (sizeof ty)) n1)))
+      | _,  _ => None
+      end   
+  | add_default =>
+      sem_binarith
+        (fun sg n1 n2 => Some(Vint(Int.add n1 n2)))
+        (fun sg n1 n2 => Some(Vlong(Int64.add n1 n2)))
+        (fun n1 n2 => Some(Vfloat(Float.add n1 n2)))
+        v1 t1 v2 t2
+  end.
 
 (** *** Subtraction *)
 
 Inductive classify_sub_cases : Type :=
-  | sub_case_ii(s: signedness)          (**r int , int *)
-  | sub_case_ff                         (**r float , float *)
-  | sub_case_if(s: signedness)          (**r int, float *)
-  | sub_case_fi(s: signedness)          (**r float, int *)
-  | sub_case_pi(ty: type)               (**r pointer, int *)
+  | sub_case_pi(ty: type)(a: attr)      (**r pointer, int *)
   | sub_case_pp(ty: type)               (**r pointer, pointer *)
-  | sub_default.
+  | sub_case_pl(ty: type)(a: attr)      (**r pointer, long *)
+  | sub_default.                        (**r numerical type, numerical type *)
 
 Definition classify_sub (ty1: type) (ty2: type) :=
   match typeconv ty1, typeconv ty2 with
-  | Tint I32 Unsigned _, Tint _ _ _ => sub_case_ii Unsigned
-  | Tint _ _ _, Tint I32 Unsigned _ => sub_case_ii Unsigned
-  | Tint _ _ _, Tint _ _ _ => sub_case_ii Signed
-  | Tfloat _ _ , Tfloat _ _ => sub_case_ff
-  | Tint _ sg _, Tfloat _ _ => sub_case_if sg
-  | Tfloat _ _, Tint _ sg _ => sub_case_fi sg
-  | Tpointer ty _, Tint _ _ _ => sub_case_pi ty
+  | Tpointer ty a, Tint _ _ _ => sub_case_pi ty a
   | Tpointer ty _ , Tpointer _ _ => sub_case_pp ty
-  | _ ,_ => sub_default
+  | Tpointer ty a, Tlong _ _ => sub_case_pl ty a
+  | _, _ => sub_default
   end.
 
-Function sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+Definition sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
   match classify_sub t1 t2 with
-  | sub_case_ii sg =>            (**r integer subtraction *)
-      match v1,v2 with
-      | Vint n1, Vint n2 => Some (Vint (Int.sub n1 n2))
-      | _,  _ => None
-      end 
-  | sub_case_ff =>               (**r float subtraction *)
+  | sub_case_pi ty attr =>            (**r pointer minus integer *)
       match v1,v2 with
-      | Vfloat f1, Vfloat f2 => Some (Vfloat(Float.sub f1 f2))
+      | Vptr b1 ofs1, Vint n2 => 
+          Some (Vptr b1 (Int.sub ofs1 (Int.mul (Int.repr (sizeof ty)) n2)))
       | _,  _ => None
       end
-  | sub_case_if sg =>            (**r int minus float *)
-      match v1, v2 with
-      | Vint n1, Vfloat n2 => Some (Vfloat (Float.sub (cast_int_float sg n1) n2))
-      | _, _ => None
-      end
-  | sub_case_fi sg =>            (**r float minus int *)
-      match v1, v2 with
-      | Vfloat n1, Vint n2 => Some (Vfloat (Float.sub n1 (cast_int_float sg n2)))
-      | _, _ => None
-      end
-  | sub_case_pi ty =>            (**r pointer minus integer *)
+  | sub_case_pl ty attr =>            (**r pointer minus long *)
       match v1,v2 with
-      | Vptr b1 ofs1, Vint n2 => 
-            Some (Vptr b1 (Int.sub ofs1 (Int.mul (Int.repr (sizeof ty)) n2)))
+      | Vptr b1 ofs1, Vlong n2 => 
+          let n2 := Int.repr (Int64.unsigned n2) in
+          Some (Vptr b1 (Int.sub ofs1 (Int.mul (Int.repr (sizeof ty)) n2)))
       | _,  _ => None
       end
   | sub_case_pp ty =>          (**r pointer minus pointer *)
@@ -480,255 +641,187 @@ Function sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
           else None
       | _, _ => None
       end
-  | sub_default => None
+  | sub_default =>
+      sem_binarith
+        (fun sg n1 n2 => Some(Vint(Int.sub n1 n2)))
+        (fun sg n1 n2 => Some(Vlong(Int64.sub n1 n2)))
+        (fun n1 n2 => Some(Vfloat(Float.sub n1 n2)))
+        v1 t1 v2 t2
   end.
  
-(** *** Multiplication *)
-
-Inductive classify_mul_cases : Type:=
-  | mul_case_ii(s: signedness) (**r int , int *)
-  | mul_case_ff                (**r float , float *)
-  | mul_case_if(s: signedness) (**r int, float *)
-  | mul_case_fi(s: signedness) (**r float, int *)
-  | mul_default.
-
-Definition classify_mul (ty1: type) (ty2: type) :=
-  match typeconv ty1, typeconv ty2 with
-  | Tint I32 Unsigned _, Tint _ _ _ => mul_case_ii Unsigned
-  | Tint _ _ _, Tint I32 Unsigned _ => mul_case_ii Unsigned
-  | Tint _ _ _, Tint _ _ _ => mul_case_ii Signed
-  | Tfloat _ _ , Tfloat _ _ => mul_case_ff
-  | Tint _ sg _, Tfloat _ _ => mul_case_if sg
-  | Tfloat _ _, Tint _ sg _ => mul_case_fi sg
-  | _,_  => mul_default
-end.
-
-Function sem_mul (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
- match classify_mul t1 t2 with
-  | mul_case_ii sg =>
-      match v1,v2 with
-      | Vint n1, Vint n2 => Some (Vint (Int.mul n1 n2))
-      | _,  _ => None
-      end
-  | mul_case_ff =>
-      match v1,v2 with
-      | Vfloat f1, Vfloat f2 => Some (Vfloat (Float.mul f1 f2))
-      | _,  _ => None
-      end
-  | mul_case_if sg =>
-      match v1, v2 with
-      | Vint n1, Vfloat n2 => Some (Vfloat (Float.mul (cast_int_float sg n1) n2))
-      | _, _ => None
-      end
-  | mul_case_fi sg =>
-      match v1, v2 with
-      | Vfloat n1, Vint n2 => Some (Vfloat (Float.mul n1 (cast_int_float sg n2)))
-      | _, _ => None
-      end
-  | mul_default =>
-      None
-end.
-
-(** *** Division *)
-
-Inductive classify_div_cases : Type:=
-  | div_case_ii(s: signedness) (**r int , int *)
-  | div_case_ff                (**r float , float *)
-  | div_case_if(s: signedness) (**r int, float *)
-  | div_case_fi(s: signedness) (**r float, int *)
-  | div_default.
-
-Definition classify_div (ty1: type) (ty2: type) :=
-  match typeconv ty1, typeconv ty2 with
-  | Tint I32 Unsigned _, Tint _ _ _ => div_case_ii Unsigned
-  | Tint _ _ _, Tint I32 Unsigned _ => div_case_ii Unsigned
-  | Tint _ _ _, Tint _ _ _ => div_case_ii Signed
-  | Tfloat _ _ , Tfloat _ _ => div_case_ff
-  | Tint _ sg _, Tfloat _ _ => div_case_if sg
-  | Tfloat _ _, Tint _ sg _ => div_case_fi sg
-  | _,_  => div_default
-end.
-
-Function sem_div (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
-   match classify_div t1 t2 with
-  | div_case_ii Unsigned =>
-      match v1,v2 with
-      | Vint n1, Vint n2 =>
-          if Int.eq n2 Int.zero then None else Some (Vint (Int.divu n1 n2))
-      | _,_ => None
-      end
-  | div_case_ii Signed =>
-      match v1,v2 with
-       | Vint n1, Vint n2 =>
+(** *** Multiplication, division, modulus *)
+
+Definition sem_mul (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_binarith
+    (fun sg n1 n2 => Some(Vint(Int.mul n1 n2)))
+    (fun sg n1 n2 => Some(Vlong(Int64.mul n1 n2)))
+    (fun n1 n2 => Some(Vfloat(Float.mul n1 n2)))
+    v1 t1 v2 t2.
+
+Definition sem_div (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_binarith
+    (fun sg n1 n2 =>
+      match sg with
+      | Signed =>
           if Int.eq n2 Int.zero
           || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone
-          then None else Some (Vint(Int.divs n1 n2))
-      | _,_ => None
-      end
-  | div_case_ff =>
-      match v1,v2 with
-      | Vfloat f1, Vfloat f2 => Some (Vfloat(Float.div f1 f2))
-      | _,  _ => None
-      end 
-  | div_case_if sg =>
-      match v1, v2 with
-      | Vint n1, Vfloat n2 => Some (Vfloat (Float.div (cast_int_float sg n1) n2))
-      | _, _ => None
-      end
-  | div_case_fi sg =>
-      match v1, v2 with
-      | Vfloat n1, Vint n2 => Some (Vfloat (Float.div n1 (cast_int_float sg n2)))
-      | _, _ => None
-      end
-  | div_default =>
-      None
-end.
-
-(** *** Integer-only binary operations: modulus, bitwise "and", "or", and "xor" *)
-
-Inductive classify_binint_cases : Type:=
-  | binint_case_ii(s: signedness) (**r int , int *)
-  | binint_default.
-
-Definition classify_binint (ty1: type) (ty2: type) :=
-  match typeconv ty1, typeconv ty2 with
-  | Tint I32 Unsigned _, Tint _ _ _ => binint_case_ii Unsigned
-  | Tint _ _ _, Tint I32 Unsigned _ => binint_case_ii Unsigned
-  | Tint _ _ _, Tint _ _ _ => binint_case_ii Signed
-  | _,_  => binint_default
-end.
-
-Function sem_mod (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
-  match classify_binint t1 t2 with
-  | binint_case_ii Unsigned =>
-      match v1, v2 with
-      | Vint n1, Vint n2 =>
-          if Int.eq n2 Int.zero then None else Some (Vint (Int.modu n1 n2))
-      | _, _ => None
-      end
-  | binint_case_ii Signed =>
-      match v1,v2 with
-      | Vint n1, Vint n2 =>
+          then None else Some(Vint(Int.divs n1 n2))
+      | Unsigned =>
+          if Int.eq n2 Int.zero
+          then None else Some(Vint(Int.divu n1 n2))
+      end)
+    (fun sg n1 n2 =>
+      match sg with
+      | Signed =>
+          if Int64.eq n2 Int64.zero
+          || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone
+          then None else Some(Vlong(Int64.divs n1 n2))
+      | Unsigned =>
+          if Int64.eq n2 Int64.zero
+          then None else Some(Vlong(Int64.divu n1 n2))
+      end)
+    (fun n1 n2 => Some(Vfloat(Float.div n1 n2)))
+    v1 t1 v2 t2.
+
+Definition sem_mod (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_binarith
+    (fun sg n1 n2 =>
+      match sg with
+      | Signed =>
           if Int.eq n2 Int.zero
           || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone
-          then None else Some (Vint (Int.mods n1 n2))
-      | _, _ => None
-      end
-  | binint_default =>
-      None
-  end.
-
-Function sem_and (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
-  match classify_binint t1 t2 with
-  | binint_case_ii sg =>
-      match v1, v2 with
-      | Vint n1, Vint n2 => Some (Vint(Int.and n1 n2))
-      | _, _ => None
-      end
-  | binint_default => None
-  end.
-
-Function sem_or (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
-  match classify_binint t1 t2 with
-  | binint_case_ii sg =>
-      match v1, v2 with
-      | Vint n1, Vint n2 => Some (Vint(Int.or n1 n2))
-      | _, _ => None
-      end
-  | binint_default => None
-  end.
-
-Function sem_xor (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
-  match classify_binint t1 t2 with
-  | binint_case_ii sg =>
-      match v1, v2 with
-      | Vint n1, Vint n2 => Some (Vint(Int.xor n1 n2))
-      | _, _ => None
-      end
-  | binint_default => None
-  end.
+          then None else Some(Vint(Int.mods n1 n2))
+      | Unsigned =>
+          if Int.eq n2 Int.zero
+          then None else Some(Vint(Int.modu n1 n2))
+      end)
+    (fun sg n1 n2 =>
+      match sg with
+      | Signed =>
+          if Int64.eq n2 Int64.zero
+          || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone
+          then None else Some(Vlong(Int64.mods n1 n2))
+      | Unsigned =>
+          if Int64.eq n2 Int64.zero
+          then None else Some(Vlong(Int64.modu n1 n2))
+      end)
+    (fun n1 n2 => None)
+    v1 t1 v2 t2.
+
+Definition sem_and (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_binarith
+    (fun sg n1 n2 => Some(Vint(Int.and n1 n2)))
+    (fun sg n1 n2 => Some(Vlong(Int64.and n1 n2)))
+    (fun n1 n2 => None)
+    v1 t1 v2 t2.
+
+Definition sem_or (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_binarith
+    (fun sg n1 n2 => Some(Vint(Int.or n1 n2)))
+    (fun sg n1 n2 => Some(Vlong(Int64.or n1 n2)))
+    (fun n1 n2 => None)
+    v1 t1 v2 t2.
+
+Definition sem_xor (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_binarith
+    (fun sg n1 n2 => Some(Vint(Int.xor n1 n2)))
+    (fun sg n1 n2 => Some(Vlong(Int64.xor n1 n2)))
+    (fun n1 n2 => None)
+    v1 t1 v2 t2.
 
 (** *** Shifts *)
 
+(** Shifts do not perform the usual binary conversions.  Instead,
+  each argument is converted independently, and the signedness
+  of the result is always that of the first argument. *)
+
 Inductive classify_shift_cases : Type:=
-  | shift_case_ii(s: signedness) (**r int , int *)
+  | shift_case_ii(s: signedness)         (**r int , int *)
+  | shift_case_ll(s: signedness)         (**r long, long *)
+  | shift_case_il(s: signedness)         (**r int, long *)
+  | shift_case_li(s: signedness)         (**r long, int *)
   | shift_default.
 
 Definition classify_shift (ty1: type) (ty2: type) :=
   match typeconv ty1, typeconv ty2 with
   | Tint I32 Unsigned _, Tint _ _ _ => shift_case_ii Unsigned
   | Tint _ _ _, Tint _ _ _ => shift_case_ii Signed
+  | Tint I32 Unsigned _, Tlong _ _ => shift_case_il Unsigned
+  | Tint _ _ _, Tlong _ _ => shift_case_il Signed
+  | Tlong s _, Tint _ _ _ => shift_case_li s
+  | Tlong s _, Tlong _ _ => shift_case_ll s
   | _,_  => shift_default
-end.
+  end.
 
-Function sem_shl (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+Definition sem_shift
+    (sem_int: signedness -> int -> int -> int)
+    (sem_long: signedness -> int64 -> int64 -> int64)
+    (v1: val) (t1: type) (v2: val) (t2: type) : option val :=
   match classify_shift t1 t2 with
   | shift_case_ii sg =>
       match v1, v2 with
-      | Vint n1, Vint n2 =>
-         if Int.ltu n2 Int.iwordsize then Some (Vint(Int.shl n1 n2)) else None
+      | Vint n1, Vint n2 => 
+          if Int.ltu n2 Int.iwordsize
+          then Some(Vint(sem_int sg n1 n2)) else None
+      | _, _ => None
+      end
+  | shift_case_il sg =>
+      match v1, v2 with
+      | Vint n1, Vlong n2 => 
+          if Int64.ltu n2 (Int64.repr 32)
+          then Some(Vint(sem_int sg n1 (Int64.loword n2))) else None
+      | _, _ => None
+      end
+  | shift_case_li sg =>
+      match v1, v2 with
+      | Vlong n1, Vint n2 => 
+          if Int.ltu n2 Int64.iwordsize'
+          then Some(Vlong(sem_long sg n1 (Int64.repr (Int.unsigned n2)))) else None
+      | _, _ => None
+      end
+  | shift_case_ll sg =>
+      match v1, v2 with
+      | Vlong n1, Vlong n2 => 
+          if Int64.ltu n2 Int64.iwordsize
+          then Some(Vlong(sem_long sg n1 n2)) else None
       | _, _ => None
       end
   | shift_default => None
   end.
 
-Function sem_shr (v1: val) (t1: type) (v2: val) (t2: type): option val :=
-  match classify_shift t1 t2 with 
-  | shift_case_ii Unsigned =>
-      match v1,v2 with 
-      | Vint n1, Vint n2 =>
-          if Int.ltu n2 Int.iwordsize then Some (Vint (Int.shru n1 n2)) else None
-      | _,_ => None
-      end
-   | shift_case_ii Signed =>
-      match v1,v2 with
-      | Vint n1,  Vint n2 =>
-          if Int.ltu n2 Int.iwordsize then Some (Vint (Int.shr n1 n2)) else None
-      | _,  _ => None
-      end
-   | shift_default =>
-      None
-   end.
+Definition sem_shl (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_shift
+    (fun sg n1 n2 => Int.shl n1 n2)
+    (fun sg n1 n2 => Int64.shl n1 n2)
+    v1 t1 v2 t2.
+
+Definition sem_shr (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
+  sem_shift
+    (fun sg n1 n2 => match sg with Signed => Int.shr n1 n2 | Unsigned => Int.shru n1 n2 end)
+    (fun sg n1 n2 => match sg with Signed => Int64.shr n1 n2 | Unsigned => Int64.shru n1 n2 end)
+    v1 t1 v2 t2.
 
 (** *** Comparisons *)
 
-Inductive classify_cmp_cases : Type:=
-  | cmp_case_ii(s: signedness) (**r int, int *)
-  | cmp_case_pp                (**r pointer, pointer *)
-  | cmp_case_ff                (**r float , float *)
-  | cmp_case_if(s: signedness) (**r int, float *)
-  | cmp_case_fi(s: signedness) (**r float, int *)
-  | cmp_default.
+Inductive classify_cmp_cases : Type :=
+  | cmp_case_pp                       (**r pointer, pointer *)
+  | cmp_default.                      (**r numerical, numerical *)
 
 Definition classify_cmp (ty1: type) (ty2: type) :=
   match typeconv ty1, typeconv ty2 with 
-  | Tint I32 Unsigned _ , Tint _ _ _ => cmp_case_ii Unsigned
-  | Tint _ _ _ , Tint I32 Unsigned _ => cmp_case_ii Unsigned
-  | Tint _ _ _ , Tint _ _ _ => cmp_case_ii Signed
-  | Tfloat _ _ , Tfloat _ _ => cmp_case_ff
-  | Tint _ sg _, Tfloat _ _ => cmp_case_if sg
-  | Tfloat _ _, Tint _ sg _ => cmp_case_fi sg
   | Tpointer _ _ , Tpointer _ _ => cmp_case_pp
   | Tpointer _ _ , Tint _ _ _ => cmp_case_pp
   | Tint _ _ _, Tpointer _ _ => cmp_case_pp
-  | _ , _ => cmp_default
+  | _, _ => cmp_default
   end.
 
-Function sem_cmp (c:comparison)
+Definition sem_cmp (c:comparison)
                   (v1: val) (t1: type) (v2: val) (t2: type)
                   (m: mem): option val :=
   match classify_cmp t1 t2 with
-  | cmp_case_ii Signed =>
-      match v1,v2 with
-      | Vint n1, Vint n2 => Some (Val.of_bool (Int.cmp c n1 n2))
-      | _,  _ => None
-      end
-  | cmp_case_ii Unsigned =>
-      match v1,v2 with
-      | Vint n1, Vint n2 => Some (Val.of_bool (Int.cmpu c n1 n2))
-      | _,  _ => None
-      end
   | cmp_case_pp =>
+     option_map Val.of_bool (Val.cmpu_bool (Mem.valid_pointer m) c v1 v2)
+(*
       match v1,v2 with
       | Vint n1, Vint n2 => Some (Val.of_bool (Int.cmpu c n1 n2))
       | Vptr b1 ofs1,  Vptr b2 ofs2  =>
@@ -752,22 +845,16 @@ Function sem_cmp (c:comparison)
           else None
       | _,  _ => None
       end
-  | cmp_case_ff =>
-      match v1,v2 with
-      | Vfloat f1, Vfloat f2 => Some (Val.of_bool (Float.cmp c f1 f2))  
-      | _,  _ => None
-      end
-  | cmp_case_if sg =>
-      match v1, v2 with
-      | Vint n1, Vfloat n2 => Some (Val.of_bool (Float.cmp c (cast_int_float sg n1) n2))
-      | _, _ => None
-      end
-  | cmp_case_fi sg =>
-      match v1, v2 with
-      | Vfloat n1, Vint n2 => Some (Val.of_bool (Float.cmp c n1 (cast_int_float sg n2)))
-      | _, _ => None
-      end
-  | cmp_default => None
+*)
+  | cmp_default =>
+      sem_binarith
+        (fun sg n1 n2 =>
+            Some(Val.of_bool(match sg with Signed => Int.cmp c n1 n2 | Unsigned => Int.cmpu c n1 n2 end)))
+        (fun sg n1 n2 =>
+            Some(Val.of_bool(match sg with Signed => Int64.cmp c n1 n2 | Unsigned => Int64.cmpu c n1 n2 end)))
+        (fun n1 n2 =>
+            Some(Val.of_bool(Float.cmp c n1 n2)))
+        v1 t1 v2 t2
   end.
 
 (** ** Function applications *)
@@ -821,3 +908,249 @@ Definition sem_incrdecr (id: incr_or_decr) (v: val) (ty: type) :=
   | Incr => sem_add v ty (Vint Int.one) type_int32s
   | Decr => sem_sub v ty (Vint Int.one) type_int32s
   end.
+
+(** * Compatibility with extensions and injections *)
+
+Section GENERIC_INJECTION.
+
+Variable f: meminj.
+Variables m m': mem.
+
+Hypothesis valid_pointer_inj:
+  forall b1 ofs b2 delta,
+  f b1 = Some(b2, delta) ->
+  Mem.valid_pointer m b1 (Int.unsigned ofs) = true ->
+  Mem.valid_pointer m' b2 (Int.unsigned (Int.add ofs (Int.repr delta))) = true.
+
+Hypothesis weak_valid_pointer_inj:
+  forall b1 ofs b2 delta,
+  f b1 = Some(b2, delta) ->
+  Mem.weak_valid_pointer m b1 (Int.unsigned ofs) = true ->
+  Mem.weak_valid_pointer m' b2 (Int.unsigned (Int.add ofs (Int.repr delta))) = true.
+
+Hypothesis weak_valid_pointer_no_overflow:
+  forall b1 ofs b2 delta,
+  f b1 = Some(b2, delta) ->
+  Mem.weak_valid_pointer m b1 (Int.unsigned ofs) = true ->
+  0 <= Int.unsigned ofs + Int.unsigned (Int.repr delta) <= Int.max_unsigned.
+
+Hypothesis valid_different_pointers_inj:
+  forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2,
+  b1 <> b2 ->
+  Mem.valid_pointer m b1 (Int.unsigned ofs1) = true ->
+  Mem.valid_pointer m b2 (Int.unsigned ofs2) = true ->
+  f b1 = Some (b1', delta1) ->
+  f b2 = Some (b2', delta2) ->
+  b1' <> b2' \/
+  Int.unsigned (Int.add ofs1 (Int.repr delta1)) <> Int.unsigned (Int.add ofs2 (Int.repr delta2)).
+
+Remark val_inject_vtrue: forall f, val_inject f Vtrue Vtrue.
+Proof. unfold Vtrue; auto. Qed.
+
+Remark val_inject_vfalse: forall f, val_inject f Vfalse Vfalse.
+Proof. unfold Vfalse; auto. Qed.
+
+Remark val_inject_of_bool: forall f b, val_inject f (Val.of_bool b) (Val.of_bool b).
+Proof. intros. unfold Val.of_bool. destruct b; [apply val_inject_vtrue|apply val_inject_vfalse]. 
+Qed.
+
+Hint Resolve val_inject_vtrue val_inject_vfalse val_inject_of_bool.
+
+Ltac TrivialInject :=
+  match goal with
+  | |- exists v', Some ?v = Some v' /\ _ => exists v; split; auto
+  | _ => idtac
+  end.
+
+Lemma sem_unary_operation_inject:
+  forall op v1 ty v tv1,
+  sem_unary_operation op v1 ty = Some v ->
+  val_inject f v1 tv1 ->
+  exists tv, sem_unary_operation op tv1 ty = Some tv /\ val_inject f v tv.
+Proof.
+  unfold sem_unary_operation; intros. destruct op.
+  (* notbool *)
+  unfold sem_notbool in *; destruct (classify_bool ty); inv H0; inv H; TrivialInject.
+  (* notint *)
+  unfold sem_notint in *; destruct (classify_notint ty); inv H0; inv H; TrivialInject.
+  (* neg *)
+  unfold sem_neg in *; destruct (classify_neg ty); inv H0; inv H; TrivialInject.
+Qed.
+
+Definition optval_self_injects (ov: option val) : Prop :=
+  match ov with
+  | Some (Vptr b ofs) => False
+  | _ => True
+  end.
+
+Remark sem_binarith_inject:
+  forall sem_int sem_long sem_float v1 t1 v2 t2 v v1' v2',
+  sem_binarith sem_int sem_long sem_float v1 t1 v2 t2 = Some v ->
+  val_inject f v1 v1' -> val_inject f v2 v2' ->
+  (forall sg n1 n2, optval_self_injects (sem_int sg n1 n2)) ->
+  (forall sg n1 n2, optval_self_injects (sem_long sg n1 n2)) ->
+  (forall n1 n2, optval_self_injects (sem_float n1 n2)) ->
+  exists v', sem_binarith sem_int sem_long sem_float v1' t1 v2' t2 = Some v' /\ val_inject f v v'.
+Proof.
+  intros. 
+  assert (SELF: forall ov v, ov = Some v -> optval_self_injects ov -> val_inject f v v).
+  {
+    intros. subst ov; simpl in H6. destruct v0; contradiction || constructor.
+  }
+  exists v.
+  unfold sem_binarith in *; destruct (classify_binarith t1 t2); inv H0; inv H1; discriminate || eauto.
+Qed.
+
+Remark sem_shift_inject:
+  forall sem_int sem_long v1 t1 v2 t2 v v1' v2',
+  sem_shift sem_int sem_long v1 t1 v2 t2 = Some v ->
+  val_inject f v1 v1' -> val_inject f v2 v2' ->
+  exists v', sem_shift sem_int sem_long v1' t1 v2' t2 = Some v' /\ val_inject f v v'.
+Proof.
+  intros. exists v.
+  unfold sem_shift in *; destruct (classify_shift t1 t2); inv H0; inv H1; try discriminate.
+  destruct (Int.ltu i0 Int.iwordsize); inv H; auto.
+  destruct (Int64.ltu i0 Int64.iwordsize); inv H; auto.
+  destruct (Int64.ltu i0 (Int64.repr 32)); inv H; auto.
+  destruct (Int.ltu i0 Int64.iwordsize'); inv H; auto.
+Qed.
+
+Remark sem_cmp_inj:
+  forall cmp v1 tv1 ty1 v2 tv2 ty2 v,
+  sem_cmp cmp v1 ty1 v2 ty2 m = Some v ->
+  val_inject f v1 tv1 ->
+  val_inject f v2 tv2 ->
+  exists tv, sem_cmp cmp tv1 ty1 tv2 ty2 m' = Some tv /\ val_inject f v tv.
+Proof.
+  intros.
+  unfold sem_cmp in *; destruct (classify_cmp ty1 ty2).
+- (* pointer - pointer *)
+  destruct (Val.cmpu_bool (Mem.valid_pointer m) cmp v1 v2) as [b|] eqn:E; simpl in H; inv H.
+  replace (Val.cmpu_bool (Mem.valid_pointer m') cmp tv1 tv2) with (Some b).
+  simpl. TrivialInject. 
+  symmetry. eapply val_cmpu_bool_inject; eauto. 
+- (* numerical - numerical *)
+  assert (SELF: forall b, optval_self_injects (Some (Val.of_bool b))).
+  {
+    destruct b; exact I.
+  }
+  eapply sem_binarith_inject; eauto.
+Qed.
+
+Lemma sem_binary_operation_inj:
+  forall op v1 ty1 v2 ty2 v tv1 tv2,
+  sem_binary_operation op v1 ty1 v2 ty2 m = Some v ->
+  val_inject f v1 tv1 -> val_inject f v2 tv2 ->
+  exists tv, sem_binary_operation op tv1 ty1 tv2 ty2 m' = Some tv /\ val_inject f v tv.
+Proof.
+  unfold sem_binary_operation; intros; destruct op.
+- (* add *)
+  unfold sem_add in *; destruct (classify_add ty1 ty2).
+  + inv H0; inv H1; inv H. TrivialInject. 
+    econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut.
+  + inv H0; inv H1; inv H. TrivialInject. 
+    econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut.
+  + inv H0; inv H1; inv H. TrivialInject. 
+    econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut.
+  + inv H0; inv H1; inv H. TrivialInject. 
+    econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut.
+  + eapply sem_binarith_inject; eauto; intros; exact I.
+- (* sub *)
+  unfold sem_sub in *; destruct (classify_sub ty1 ty2).
+  + inv H0; inv H1; inv H. TrivialInject.
+    econstructor. eauto. rewrite Int.sub_add_l. auto.
+  + inv H0; inv H1; inv H. TrivialInject.
+    destruct (zeq b1 b0); try discriminate. subst b1. 
+    rewrite H0 in H2; inv H2. rewrite zeq_true. 
+    destruct (Int.eq (Int.repr (sizeof ty)) Int.zero); inv H3.
+    rewrite Int.sub_shifted. TrivialInject.
+  + inv H0; inv H1; inv H. TrivialInject.
+    econstructor. eauto. rewrite Int.sub_add_l. auto.
+  + eapply sem_binarith_inject; eauto; intros; exact I.
+- (* mul *)
+  eapply sem_binarith_inject; eauto; intros; exact I.
+- (* div *)
+  eapply sem_binarith_inject; eauto; intros.
+  destruct sg.
+  destruct (Int.eq n2 Int.zero
+            || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone); exact I.
+  destruct (Int.eq n2 Int.zero); exact I.
+  destruct sg.
+  destruct (Int64.eq n2 Int64.zero
+            || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); exact I.
+  destruct (Int64.eq n2 Int64.zero); exact I.
+  exact I.
+- (* mod *)
+  eapply sem_binarith_inject; eauto; intros.
+  destruct sg.
+  destruct (Int.eq n2 Int.zero
+            || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone); exact I.
+  destruct (Int.eq n2 Int.zero); exact I.
+  destruct sg.
+  destruct (Int64.eq n2 Int64.zero
+            || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); exact I.
+  destruct (Int64.eq n2 Int64.zero); exact I.
+  exact I.
+- (* and *)
+  eapply sem_binarith_inject; eauto; intros; exact I.
+- (* or *)
+  eapply sem_binarith_inject; eauto; intros; exact I.
+- (* xor *)
+  eapply sem_binarith_inject; eauto; intros; exact I.
+- (* shl *)
+  eapply sem_shift_inject; eauto.
+- (* shr *)
+  eapply sem_shift_inject; eauto.
+  (* comparisons *)
+- eapply sem_cmp_inj; eauto.
+- eapply sem_cmp_inj; eauto.
+- eapply sem_cmp_inj; eauto.
+- eapply sem_cmp_inj; eauto.
+- eapply sem_cmp_inj; eauto.
+- eapply sem_cmp_inj; eauto.
+Qed.
+
+Lemma sem_cast_inject:
+  forall v1 ty1 ty v tv1,
+  sem_cast v1 ty1 ty = Some v ->
+  val_inject f v1 tv1 ->
+  exists tv, sem_cast tv1 ty1 ty = Some tv /\ val_inject f v tv.
+Proof.
+  unfold sem_cast; intros; destruct (classify_cast ty1 ty);
+  inv H0; inv H; TrivialInject.
+- econstructor; eauto. 
+- destruct (cast_float_int si2 f0); inv H1; TrivialInject.
+- destruct (cast_float_long si2 f0); inv H1; TrivialInject.
+- destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; TrivialInject. econstructor; eauto.
+- destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; TrivialInject. econstructor; eauto.
+- econstructor; eauto.
+Qed.
+
+Lemma bool_val_inject:
+  forall v ty b tv,
+  bool_val v ty = Some b ->
+  val_inject f v tv ->
+  bool_val tv ty = Some b.
+Proof.
+  unfold bool_val; intros. 
+  destruct (classify_bool ty); inv H0; congruence.
+Qed.
+
+End GENERIC_INJECTION.
+
+Lemma sem_binary_operation_inject:
+  forall f m m' op v1 ty1 v2 ty2 v tv1 tv2,
+  sem_binary_operation op v1 ty1 v2 ty2 m = Some v ->
+  val_inject f v1 tv1 -> val_inject f v2 tv2 ->
+  Mem.inject f m m' ->
+  exists tv, sem_binary_operation op tv1 ty1 tv2 ty2 m' = Some tv /\ val_inject f v tv.
+Proof.
+  intros. eapply sem_binary_operation_inj; eauto. 
+  intros; eapply Mem.valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
+  intros; eapply Mem.different_pointers_inject; eauto.
+Qed.
+
+
+
diff --git a/cfrontend/Csharpminor.v b/cfrontend/Csharpminor.v
index d0bd9f4..718daa1 100644
--- a/cfrontend/Csharpminor.v
+++ b/cfrontend/Csharpminor.v
@@ -33,7 +33,8 @@ Require Import Smallstep.
 
 Inductive constant : Type :=
   | Ointconst: int -> constant          (**r integer constant *)
-  | Ofloatconst: float -> constant.     (**r floating-point constant *)
+  | Ofloatconst: float -> constant      (**r floating-point constant *)
+  | Olongconst: int64 -> constant.      (**r long integer constant *)
 
 Definition unary_operation : Type := Cminor.unary_operation.
 Definition binary_operation : Type := Cminor.binary_operation.
@@ -238,6 +239,7 @@ Definition eval_constant (cst: constant) : option val :=
   match cst with
   | Ointconst n => Some (Vint n)
   | Ofloatconst n => Some (Vfloat n)
+  | Olongconst n => Some (Vlong n)
   end.
 
 Definition eval_unop := Cminor.eval_unop.
diff --git a/cfrontend/Cshmgen.v b/cfrontend/Cshmgen.v
index 51f89da..2a27852 100644
--- a/cfrontend/Cshmgen.v
+++ b/cfrontend/Cshmgen.v
@@ -44,7 +44,7 @@ Open Local Scope error_monad_scope.
 -  The C types of the arguments of the operation.  These types
      are used to insert the necessary numeric conversions and to
      resolve operation overloading.
-   Most of these functions return an [option expr], with [None] 
+   Most of these functions return a [res expr], with [Error]
    denoting a case where the operation is not defined at the given types.
 *)
 
@@ -52,6 +52,8 @@ Definition make_intconst (n: int) :=  Econst (Ointconst n).
 
 Definition make_floatconst (f: float) :=  Econst (Ofloatconst f).
 
+Definition make_longconst (f: int64) :=  Econst (Olongconst f).
+
 Definition make_floatofint (e: expr) (sg: signedness) :=
   match sg with
   | Signed => Eunop Ofloatofint e
@@ -64,6 +66,24 @@ Definition make_intoffloat (e: expr) (sg: signedness) :=
   | Unsigned => Eunop Ointuoffloat e
   end.
 
+Definition make_longofint (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Olongofint e
+  | Unsigned => Eunop Olongofintu e
+  end.
+
+Definition make_floatoflong (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Ofloatoflong e
+  | Unsigned => Eunop Ofloatoflongu e
+  end.
+
+Definition make_longoffloat (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Olongoffloat e
+  | Unsigned => Eunop Olonguoffloat e
+  end.
+
 (** [make_boolean e ty] returns a Csharpminor expression that evaluates
    to the boolean value of [e].  *)
 
@@ -80,6 +100,7 @@ Definition make_boolean (e: expr) (ty: type) :=
   | bool_case_i => make_cmp_ne_zero e
   | bool_case_f => Ebinop (Ocmpf Cne) e (make_floatconst Float.zero)
   | bool_case_p => Ebinop (Ocmpu Cne) e (make_intconst Int.zero)
+  | bool_case_l => Ebinop (Ocmpl Cne) e (make_longconst Int64.zero)
   | bool_default => e   (**r should not happen *)
   end.
 
@@ -88,6 +109,7 @@ Definition make_notbool (e: expr) (ty: type) :=
   | bool_case_i => OK (Ebinop (Ocmp Ceq) e (make_intconst Int.zero))
   | bool_case_f => OK (Ebinop (Ocmpf Ceq) e (make_floatconst Float.zero))
   | bool_case_p => OK (Ebinop (Ocmpu Ceq) e (make_intconst Int.zero))
+  | bool_case_l => OK (Ebinop (Ocmpl Ceq) e (make_longconst Int64.zero))
   | _ => Error (msg "Cshmgen.make_notbool")
   end.
 
@@ -95,96 +117,144 @@ Definition make_neg (e: expr) (ty: type) :=
   match classify_neg ty with
   | neg_case_i _ => OK (Eunop Onegint e)
   | neg_case_f => OK (Eunop Onegf e)
+  | neg_case_l _ => OK (Eunop Onegl e)
   | _ => Error (msg "Cshmgen.make_neg")
   end.
 
 Definition make_notint (e: expr) (ty: type) :=
-  Eunop Onotint e.
+  match classify_notint ty with
+  | notint_case_i _ => OK (Eunop Onotint e)
+  | notint_case_l _ => OK (Eunop Onotl e)
+  | _ => Error (msg "Cshmgen.make_notint")
+  end.
+
+Definition make_binarith (iop iopu fop lop lopu: binary_operation)
+                         (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
+  match classify_binarith ty1 ty2 with
+  | bin_case_ii Signed => OK (Ebinop iop e1 e2)
+  | bin_case_ii Unsigned => OK (Ebinop iopu e1 e2)
+  | bin_case_ff => OK (Ebinop fop e1 e2)
+  | bin_case_if sg => OK (Ebinop fop (make_floatofint e1 sg) e2)
+  | bin_case_fi sg => OK (Ebinop fop e1 (make_floatofint e2 sg))
+  | bin_case_ll Signed => OK (Ebinop lop e1 e2)
+  | bin_case_ll Unsigned => OK (Ebinop lopu e1 e2)
+  | bin_case_il sg1 Signed => OK (Ebinop lop (make_longofint e1 sg1) e2)
+  | bin_case_il sg1 Unsigned => OK (Ebinop lopu (make_longofint e1 sg1) e2)
+  | bin_case_li Signed sg2 => OK (Ebinop lop e1 (make_longofint e2 sg2))
+  | bin_case_li Unsigned sg2 => OK (Ebinop lopu e1 (make_longofint e2 sg2))
+  | bin_case_lf sg => OK (Ebinop fop (make_floatoflong e1 sg) e2)
+  | bin_case_fl sg => OK (Ebinop fop e1 (make_floatoflong e2 sg))
+  | bin_default => Error (msg "Cshmgen.make_binarith")
+  end.
 
 Definition make_add (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
   match classify_add ty1 ty2 with
-  | add_case_ii _ => OK (Ebinop Oadd e1 e2)
-  | add_case_ff => OK (Ebinop Oaddf e1 e2)
-  | add_case_if sg => OK (Ebinop Oaddf (make_floatofint e1 sg) e2)
-  | add_case_fi sg => OK (Ebinop Oaddf e1 (make_floatofint e2 sg))
   | add_case_pi ty _ =>
       let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
       OK (Ebinop Oadd e1 (Ebinop Omul n e2))
   | add_case_ip ty _ =>
       let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
       OK (Ebinop Oadd e2 (Ebinop Omul n e1))
-  | add_default => Error (msg "Cshmgen.make_add")
+  | add_case_pl ty _ =>
+      let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
+      OK (Ebinop Oadd e1 (Ebinop Omul n (Eunop Ointoflong e2)))
+  | add_case_lp ty _ =>
+      let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
+      OK (Ebinop Oadd e2 (Ebinop Omul n (Eunop Ointoflong e1)))
+  | add_default =>
+      make_binarith Oadd Oadd Oaddf Oaddl Oaddl e1 ty1 e2 ty2
   end.
 
 Definition make_sub (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
   match classify_sub ty1 ty2 with
-  | sub_case_ii _ => OK (Ebinop Osub e1 e2)
-  | sub_case_ff => OK (Ebinop Osubf e1 e2)
-  | sub_case_if sg => OK (Ebinop Osubf (make_floatofint e1 sg) e2)
-  | sub_case_fi sg => OK (Ebinop Osubf e1 (make_floatofint e2 sg))
-  | sub_case_pi ty =>
+  | sub_case_pi ty _ =>
       let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
       OK (Ebinop Osub e1 (Ebinop Omul n e2))
   | sub_case_pp ty =>
       let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
       OK (Ebinop Odivu (Ebinop Osub e1 e2) n)
-  | sub_default => Error (msg "Cshmgen.make_sub")
+  | sub_case_pl ty _ =>
+      let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
+      OK (Ebinop Osub e1 (Ebinop Omul n (Eunop Ointoflong e2)))
+  | sub_default =>
+      make_binarith Osub Osub Osubf Osubl Osubl e1 ty1 e2 ty2
   end.
 
 Definition make_mul (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  match classify_mul ty1 ty2 with
-  | mul_case_ii _ => OK (Ebinop Omul e1 e2)
-  | mul_case_ff => OK (Ebinop Omulf e1 e2)
-  | mul_case_if sg => OK (Ebinop Omulf (make_floatofint e1 sg) e2)
-  | mul_case_fi sg => OK (Ebinop Omulf e1 (make_floatofint e2 sg))
-  | mul_default => Error (msg "Cshmgen.make_mul")
-  end.
+  make_binarith Omul Omul Omulf Omull Omull e1 ty1 e2 ty2.
 
 Definition make_div (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  match classify_div ty1 ty2 with
-  | div_case_ii Unsigned => OK (Ebinop Odivu e1 e2)
-  | div_case_ii Signed => OK (Ebinop Odiv e1 e2)
-  | div_case_ff => OK (Ebinop Odivf e1 e2)
-  | div_case_if sg => OK (Ebinop Odivf (make_floatofint e1 sg) e2)
-  | div_case_fi sg => OK (Ebinop Odivf e1 (make_floatofint e2 sg))
-  | div_default => Error (msg "Cshmgen.make_div")
+  make_binarith Odiv Odivu Odivf Odivl Odivlu e1 ty1 e2 ty2.
+
+Definition make_binarith_int (iop iopu lop lopu: binary_operation)
+                             (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
+  match classify_binarith ty1 ty2 with
+  | bin_case_ii Signed => OK (Ebinop iop e1 e2)
+  | bin_case_ii Unsigned => OK (Ebinop iopu e1 e2)
+  | bin_case_ll Signed => OK (Ebinop lop e1 e2)
+  | bin_case_ll Unsigned => OK (Ebinop lopu e1 e2)
+  | bin_case_il sg1 Signed => OK (Ebinop lop (make_longofint e1 sg1) e2)
+  | bin_case_il sg1 Unsigned => OK (Ebinop lopu (make_longofint e1 sg1) e2)
+  | bin_case_li Signed sg2 => OK (Ebinop lop e1 (make_longofint e2 sg2))
+  | bin_case_li Unsigned sg2 => OK (Ebinop lopu e1 (make_longofint e2 sg2))
+  | _ => Error (msg "Cshmgen.make_binarith_int")
   end.
 
 Definition make_mod (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  match classify_binint ty1 ty2 with
-  | binint_case_ii Unsigned => OK (Ebinop Omodu e1 e2)
-  | binint_case_ii Signed => OK (Ebinop Omod e1 e2)
-  | mod_default => Error (msg "Cshmgen.make_mod")
-  end.
+  make_binarith_int Omod Omodu Omodl Omodlu e1 ty1 e2 ty2.
 
 Definition make_and (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  OK(Ebinop Oand e1 e2).
+  make_binarith_int Oand Oand Oandl Oandl e1 ty1 e2 ty2.
 
 Definition make_or (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  OK(Ebinop Oor e1 e2).
+  make_binarith_int Oor Oor Oorl Oorl e1 ty1 e2 ty2.
 
 Definition make_xor (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  OK(Ebinop Oxor e1 e2).
+  make_binarith_int Oxor Oxor Oxorl Oxorl e1 ty1 e2 ty2.
+
+(*
+Definition make_shift (iop iopu lop lopu: binary_operation)
+                      (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
+  match classify_shift ty1 ty2 with
+  | shift_case_ii Signed => OK (Ebinop iop e1 e2)
+  | shift_case_ii Unsigned => OK (Ebinop iopu e1 e2)
+  | shift_case_li Signed => OK (Ebinop lop e1 e2)
+  | shift_case_li Unsigned => OK (Ebinop lopu e1 e2)
+  | shift_case_il Signed => OK (Ebinop iop e1 (Eunop Ointoflong e2))
+  | shift_case_il Unsigned => OK (Ebinop iopu e1 (Eunop Ointoflong e2))
+  | shift_case_ll Signed => OK (Ebinop lop e1 (Eunop Ointoflong e2))
+  | shift_case_ll Unsigned => OK (Ebinop lopu e1 (Eunop Ointoflong e2))
+  | shift_default => Error (msg "Cshmgen.make_shift")
+  end.
+*)
 
 Definition make_shl (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  OK(Ebinop Oshl e1 e2).
+  match classify_shift ty1 ty2 with
+  | shift_case_ii _ => OK (Ebinop Oshl e1 e2)
+  | shift_case_li _ => OK (Ebinop Oshll e1 e2)
+  | shift_case_il _ => OK (Ebinop Oshl e1 (Eunop Ointoflong e2))
+  | shift_case_ll _ => OK (Ebinop Oshll e1 (Eunop Ointoflong e2))
+  | shift_default => Error (msg "Cshmgen.make_shl")
+  end.
 
 Definition make_shr (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
   match classify_shift ty1 ty2 with
-  | shift_case_ii Unsigned => OK (Ebinop Oshru e1 e2)
   | shift_case_ii Signed => OK (Ebinop Oshr e1 e2)
-  | shr_default => Error (msg "Cshmgen.make_shr")
+  | shift_case_ii Unsigned => OK (Ebinop Oshru e1 e2)
+  | shift_case_li Signed => OK (Ebinop Oshrl e1 e2)
+  | shift_case_li Unsigned => OK (Ebinop Oshrlu e1 e2)
+  | shift_case_il Signed => OK (Ebinop Oshr e1 (Eunop Ointoflong e2))
+  | shift_case_il Unsigned => OK (Ebinop Oshru e1 (Eunop Ointoflong e2))
+  | shift_case_ll Signed => OK (Ebinop Oshrl e1 (Eunop Ointoflong e2))
+  | shift_case_ll Unsigned => OK (Ebinop Oshrlu e1 (Eunop Ointoflong e2))
+  | shift_default => Error (msg "Cshmgen.make_shr")
   end.
 
 Definition make_cmp (c: comparison) (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
   match classify_cmp ty1 ty2 with
-  | cmp_case_ii Signed => OK (Ebinop (Ocmp c) e1 e2)
-  | cmp_case_ii Unsigned => OK (Ebinop (Ocmpu c) e1 e2)
   | cmp_case_pp => OK (Ebinop (Ocmpu c) e1 e2)
-  | cmp_case_ff => OK (Ebinop (Ocmpf c) e1 e2)
-  | cmp_case_if sg => OK (Ebinop (Ocmpf c) (make_floatofint e1 sg) e2)
-  | cmp_case_fi sg => OK (Ebinop (Ocmpf c) e1 (make_floatofint e2 sg))
-  | cmp_default => Error (msg "Cshmgen.make_cmp")
+  | cmp_default =>
+      make_binarith (Ocmp c) (Ocmpu c) (Ocmpf c) (Ocmpl c) (Ocmplu c) e1 ty1 e2 ty2
   end.
 
 (** [make_cast from to e] applies to [e] the numeric conversions needed
@@ -208,17 +278,23 @@ Definition make_cast_float (e: expr) (sz: floatsize) :=
 
 Definition make_cast (from to: type) (e: expr) :=
   match classify_cast from to with
-  | cast_case_neutral => e
-  | cast_case_i2i sz2 si2 => make_cast_int e sz2 si2
-  | cast_case_f2f sz2 => make_cast_float e sz2
-  | cast_case_i2f si1 sz2 => make_cast_float (make_floatofint e si1) sz2
-  | cast_case_f2i sz2 si2 => make_cast_int (make_intoffloat e si2) sz2 si2
-  | cast_case_f2bool => Ebinop (Ocmpf Cne) e (make_floatconst Float.zero)
-  | cast_case_p2bool => Ebinop (Ocmpu Cne) e (make_intconst Int.zero)
-  | cast_case_struct id1 fld1 id2 fld2 => e
-  | cast_case_union id1 fld1 id2 fld2 => e
-  | cast_case_void => e
-  | cast_case_default => e
+  | cast_case_neutral => OK e
+  | cast_case_i2i sz2 si2 => OK (make_cast_int e sz2 si2)
+  | cast_case_f2f sz2 => OK (make_cast_float e sz2)
+  | cast_case_i2f si1 sz2 => OK (make_cast_float (make_floatofint e si1) sz2)
+  | cast_case_f2i sz2 si2 => OK (make_cast_int (make_intoffloat e si2) sz2 si2)
+  | cast_case_l2l => OK e
+  | cast_case_i2l si1 => OK (make_longofint e si1)
+  | cast_case_l2i sz2 si2 => OK (make_cast_int (Eunop Ointoflong e) sz2 si2)
+  | cast_case_l2f si1 sz2 => OK (make_cast_float (make_floatoflong e si1) sz2)
+  | cast_case_f2l si2 => OK (make_longoffloat e si2)
+  | cast_case_f2bool => OK (Ebinop (Ocmpf Cne) e (make_floatconst Float.zero))
+  | cast_case_l2bool => OK (Ebinop (Ocmpl Cne) e (make_longconst Int64.zero))
+  | cast_case_p2bool => OK (Ebinop (Ocmpu Cne) e (make_intconst Int.zero))
+  | cast_case_struct id1 fld1 id2 fld2 => OK e
+  | cast_case_union id1 fld1 id2 fld2 => OK e
+  | cast_case_void => OK e
+  | cast_case_default => Error (msg "Cshmgen.make_cast")
   end.
 
 (** [make_load addr ty_res] loads a value of type [ty_res] from
@@ -259,7 +335,7 @@ Definition make_store (addr: expr) (ty: type) (rhs: expr) :=
 Definition transl_unop (op: Cop.unary_operation) (a: expr) (ta: type) : res expr :=
   match op with
   | Cop.Onotbool => make_notbool a ta
-  | Cop.Onotint => OK(make_notint a ta)
+  | Cop.Onotint => make_notint a ta
   | Cop.Oneg => make_neg a ta
   end.
 
@@ -297,6 +373,8 @@ Fixpoint transl_expr (a: Clight.expr) {struct a} : res expr :=
       OK(make_intconst n)
   | Clight.Econst_float n _ =>
       OK(make_floatconst n)
+  | Clight.Econst_long n _ =>
+      OK(make_longconst n)
   | Clight.Evar id ty =>
       make_load (Eaddrof id) ty
   | Clight.Etempvar id ty =>
@@ -315,7 +393,7 @@ Fixpoint transl_expr (a: Clight.expr) {struct a} : res expr :=
       transl_binop op tb (typeof b) tc (typeof c)
   | Clight.Ecast b ty =>
       do tb <- transl_expr b;
-      OK (make_cast (typeof b) ty tb)
+      make_cast (typeof b) ty tb
   | Clight.Efield b i ty => 
       match typeof b with
       | Tstruct _ fld _ =>
@@ -369,8 +447,9 @@ Fixpoint transl_arglist (al: list Clight.expr) (tyl: typelist)
   | nil, Tnil => OK nil
   | a1 :: a2, Tcons ty1 ty2 =>
       do ta1 <- transl_expr a1;
-      do ta2 <- transl_arglist a2 ty2;
-      OK (make_cast (typeof a1) ty1 ta1 :: ta2)
+      do ta1' <- make_cast (typeof a1) ty1 ta1;
+      do ta2 <- transl_arglist a2 ty2;      
+      OK (ta1' :: ta2)
   | _, _ =>
       Error(msg "Cshmgen.transl_arglist: arity mismatch")
   end.
@@ -408,7 +487,8 @@ Fixpoint transl_statement (tyret: type) (nbrk ncnt: nat)
   | Clight.Sassign b c =>
       do tb <- transl_lvalue b;
       do tc <- transl_expr c;
-      make_store tb (typeof b) (make_cast (typeof c) (typeof b) tc)
+      do tc' <- make_cast (typeof c) (typeof b) tc;
+      make_store tb (typeof b) tc'
   | Clight.Sset x b =>
       do tb <- transl_expr b;
       OK(Sset x tb)
@@ -442,7 +522,8 @@ Fixpoint transl_statement (tyret: type) (nbrk ncnt: nat)
       OK (Sexit ncnt)
   | Clight.Sreturn (Some e) =>
       do te <- transl_expr e;
-      OK (Sreturn (Some (make_cast (typeof e) tyret te)))
+      do te' <- make_cast (typeof e) tyret te;
+      OK (Sreturn (Some te'))
   | Clight.Sreturn None =>
       OK (Sreturn None)
   | Clight.Sswitch a sl =>
@@ -486,9 +567,6 @@ Definition transl_function (f: Clight.function) : res function :=
        (map fst (Clight.fn_temps f))
        tbody).
 
-Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2}
-                     := list_eq_dec typ_eq.
-
 Definition transl_fundef (f: Clight.fundef) : res fundef :=
   match f with
   | Clight.Internal g => 
diff --git a/cfrontend/Cshmgenproof.v b/cfrontend/Cshmgenproof.v
index ad5ada6..7c24d0d 100644
--- a/cfrontend/Cshmgenproof.v
+++ b/cfrontend/Cshmgenproof.v
@@ -67,157 +67,10 @@ Proof.
   rewrite H0; reflexivity.
 Qed.
 
-(*
-Lemma var_kind_by_value:
-  forall ty chunk,
-  access_mode ty = By_value chunk ->
-  var_kind_of_type ty = OK(Vscalar chunk).
-Proof.
-  intros ty chunk; destruct ty; simpl; try congruence.
-  destruct i; try congruence; destruct s; congruence.
-  destruct f; congruence.
-Qed.
-
-Lemma var_kind_by_reference:
-  forall ty vk,
-  access_mode ty = By_reference \/ access_mode ty = By_copy ->
-  var_kind_of_type ty = OK vk ->
-  vk = Varray (Ctypes.sizeof ty) (Ctypes.alignof ty).
-Proof.
-  intros ty vk; destruct ty; simpl; try intuition congruence.
-  destruct i; try congruence; destruct s; intuition congruence.
-  destruct f; intuition congruence.
-Qed.
-
-Lemma sizeof_var_kind_of_type:
-  forall ty vk,
-  var_kind_of_type ty = OK vk ->
-  Csharpminor.sizeof vk = Ctypes.sizeof ty.
-Proof.
-  intros ty vk.
-  assert (sizeof (Varray (Ctypes.sizeof ty) (Ctypes.alignof ty)) = Ctypes.sizeof ty).
-    simpl. rewrite Zmax_spec. apply zlt_false. 
-    generalize (Ctypes.sizeof_pos ty). omega.
-  destruct ty; try (destruct i; try destruct s); try (destruct f); 
-  simpl; intro EQ; inversion EQ; subst vk; auto.
-Qed.
-*)
-(****
-Remark cast_int_int_normalized:
-  forall sz si a chunk n,
-  access_mode (Tint sz si a) = By_value chunk ->
-  val_normalized (Vint (cast_int_int sz si n)) chunk.
-Proof.
-  unfold access_mode, cast_int_int, val_normalized; intros. destruct sz.
-  destruct si; inv H; simpl.
-  rewrite Int.sign_ext_idem; auto. compute; auto.
-  rewrite Int.zero_ext_idem; auto. compute; auto.
-  destruct si; inv H; simpl.
-  rewrite Int.sign_ext_idem; auto. compute; auto.
-  rewrite Int.zero_ext_idem; auto. compute; auto.
-  inv H. auto.
-  inv H. destruct (Int.eq n Int.zero); auto.
-Qed.
-
-Remark cast_float_float_normalized:
-  forall sz a chunk n,
-  access_mode (Tfloat sz a) = By_value chunk ->
-  val_normalized (Vfloat (cast_float_float sz n)) chunk.
-Proof.
-  unfold access_mode, cast_float_float, val_normalized; intros. 
-  destruct sz; inv H; simpl.
-  rewrite Float.singleoffloat_idem. auto.
-  auto.
-Qed.
-
-Remark cast_neutral_normalized:
-  forall ty1 ty2 chunk,
-  classify_cast ty1 ty2 = cast_case_neutral ->
-  access_mode ty2 = By_value chunk ->
-  chunk = Mint32.
-Proof.
-  intros. functional inversion H; subst; simpl in H0; congruence.
-Qed.
-
-Remark cast_to_bool_normalized:
-  forall ty1 ty2 chunk,
-  classify_cast ty1 ty2 = cast_case_f2bool \/
-  classify_cast ty1 ty2 = cast_case_p2bool ->
-  access_mode ty2 = By_value chunk ->
-  chunk = Mint8unsigned.
-Proof.
-  intros. destruct ty2; simpl in *; try discriminate. 
-  destruct i; destruct ty1; intuition congruence. 
-  destruct ty1; intuition discriminate.
-  destruct ty1; intuition discriminate.
-Qed.
-
-Lemma cast_result_normalized:
-  forall chunk v1 ty1 ty2 v2,
-  sem_cast v1 ty1 ty2 = Some v2 ->
-  access_mode ty2 = By_value chunk ->
-  val_normalized v2 chunk.
-Proof.
-  intros. functional inversion H; subst.
-  rewrite (cast_neutral_normalized ty1 ty2 chunk); auto. red; auto.
-  rewrite (cast_neutral_normalized ty1 ty2 chunk); auto. red; auto.
-  functional inversion H2; subst. eapply cast_int_int_normalized; eauto.
-  functional inversion H2; subst. eapply cast_float_float_normalized; eauto.
-  functional inversion H2; subst. eapply cast_float_float_normalized; eauto.
-  functional inversion H2; subst. eapply cast_int_int_normalized; eauto.
-  rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. red; auto.
-  rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. red; auto.
-  rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. destruct (Int.eq i Int.zero); red; auto.
-  rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. red; auto.
-  functional inversion H2; subst. simpl in H0. congruence.
-  functional inversion H2; subst. simpl in H0. congruence.
-  functional inversion H5; subst. simpl in H0. congruence.
-Qed.
-
-Definition val_casted (v: val) (ty: type) : Prop :=
-  exists v0, exists ty0, sem_cast v0 ty0 ty = Some v.
-
-Lemma val_casted_normalized:
-  forall v ty chunk,
-  val_casted v ty -> access_mode ty = By_value chunk -> val_normalized v chunk.
-Proof.
-  intros. destruct H as [v0 [ty0 CAST]]. eapply cast_result_normalized; eauto.
-Qed.
-
-Fixpoint val_casted_list (vl: list val) (tyl: typelist) {struct vl}: Prop :=
-  match vl, tyl with
-  | nil, Tnil => True
-  | v1 :: vl', Tcons ty1 tyl' => val_casted v1 ty1 /\ val_casted_list vl' tyl'
-  | _, _ => False
-  end.
-
-Lemma eval_exprlist_casted:
-  forall ge e le m al tyl vl,
-  Clight.eval_exprlist ge e le m al tyl vl ->
-  val_casted_list vl tyl.
-Proof.
-  induction 1; simpl.
-  auto.
-  split. exists v1; exists (typeof a); auto. eauto.
-Qed.
-
-*******)
-
 (** * Properties of the translation functions *)
 
 (** Transformation of expressions and statements. *)
 
-(*
-Lemma is_variable_correct:
-  forall a id,
-  is_variable a = Some id ->
-  a = Clight.Evar id (typeof a).
-Proof.
-  intros until id. unfold is_variable; destruct a; intros; try discriminate.
-  simpl. congruence.
-Qed.
-*)
-
 Lemma transl_expr_lvalue:
   forall ge e le m a loc ofs ta,
   Clight.eval_lvalue ge e le m a loc ofs ->
@@ -283,6 +136,13 @@ Proof.
   intros. unfold make_floatconst. econstructor. reflexivity. 
 Qed.
 
+Lemma make_longconst_correct:
+  forall n e le m,
+  eval_expr ge e le m (make_longconst n) (Vlong n).
+Proof.
+  intros. unfold make_floatconst. econstructor. reflexivity. 
+Qed.
+
 Lemma make_floatofint_correct:
   forall a n sg e le m,
   eval_expr ge e le m a (Vint n) ->
@@ -302,8 +162,38 @@ Proof.
   destruct sg; econstructor; eauto; simpl; rewrite H0; auto.
 Qed.
 
-Hint Resolve make_intconst_correct make_floatconst_correct
+Lemma make_longofint_correct:
+  forall a n sg e le m,
+  eval_expr ge e le m a (Vint n) ->
+  eval_expr ge e le m (make_longofint a sg) (Vlong(cast_int_long sg n)).
+Proof.
+  intros. unfold make_longofint, cast_int_long. 
+  destruct sg; econstructor; eauto. 
+Qed.
+
+Lemma make_floatoflong_correct:
+  forall a n sg e le m,
+  eval_expr ge e le m a (Vlong n) ->
+  eval_expr ge e le m (make_floatoflong a sg) (Vfloat(cast_long_float sg n)).
+Proof.
+  intros. unfold make_floatoflong, cast_int_long. 
+  destruct sg; econstructor; eauto. 
+Qed.
+
+Lemma make_longoffloat_correct:
+  forall e le m a sg f i,
+  eval_expr ge e le m a (Vfloat f) ->
+  cast_float_long sg f = Some i ->
+  eval_expr ge e le m (make_longoffloat a sg) (Vlong i).
+Proof.
+  unfold cast_float_long, make_longoffloat; intros.
+  destruct sg; econstructor; eauto; simpl; rewrite H0; auto.
+Qed.
+
+Hint Resolve make_intconst_correct make_floatconst_correct make_longconst_correct
              make_floatofint_correct make_intoffloat_correct
+             make_longofint_correct
+             make_floatoflong_correct make_longoffloat_correct
              eval_Eunop eval_Ebinop: cshm.
 Hint Extern 2 (@eq trace _ _) => traceEq: cshm.
 
@@ -357,38 +247,34 @@ Qed.
 Hint Resolve make_cast_int_correct make_cast_float_correct: cshm.
 
 Lemma make_cast_correct:
-  forall e le m a v ty1 ty2 v',
+  forall e le m a b v ty1 ty2 v',
+  make_cast ty1 ty2 a = OK b ->
   eval_expr ge e le m a v ->
   sem_cast v ty1 ty2 = Some v' ->
-  eval_expr ge e le m (make_cast ty1 ty2 a) v'.
+  eval_expr ge e le m b v'.
 Proof.
-  intros. unfold make_cast. functional inversion H0; subst.
-  (* neutral *)
-  rewrite H2; auto.
-  rewrite H2; auto.
-  (* int -> int *)
-  rewrite H2. auto with cshm. 
-  (* float -> float *)
-  rewrite H2. auto with cshm.
-  (* int -> float *)
-  rewrite H2. auto with cshm. 
+  intros. unfold make_cast, sem_cast in *;
+  destruct (classify_cast ty1 ty2); inv H; destruct v; inv H1; eauto with cshm.
   (* float -> int *)
-  rewrite H2. eauto with cshm.
+  destruct (cast_float_int si2 f) as [i|] eqn:E; inv H2. eauto with cshm.
+  (* float -> long *)
+  destruct (cast_float_long si2 f) as [i|] eqn:E; inv H2. eauto with cshm.
   (* float -> bool *)
-  rewrite H2. econstructor; eauto with cshm.
-  simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq. rewrite H7; auto.
-  rewrite H2. econstructor; eauto with cshm.
-  simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq. rewrite H7; auto.
-  (* pointer -> bool *)
-  rewrite H2. econstructor; eauto with cshm.
-  simpl. unfold Val.cmpu, Val.cmpu_bool, Int.cmpu. destruct (Int.eq i Int.zero); reflexivity.
-  rewrite H2. econstructor; eauto with cshm.
-  (* struct -> struct *)
-  rewrite H2. auto.
-  (* union -> union *)
-  rewrite H2. auto.
-  (* any -> void *)
-  rewrite H5. auto.
+  econstructor; eauto with cshm.
+  simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq.
+  destruct (Float.cmp Ceq f Float.zero); auto.
+  (* long -> bool *)
+  econstructor; eauto with cshm.
+  simpl. unfold Val.cmpl, Val.cmpl_bool, Int64.cmp.
+  destruct (Int64.eq i Int64.zero); auto.
+  (* int -> bool *)
+  econstructor; eauto with cshm.
+  simpl. unfold Val.cmpu, Val.cmpu_bool, Int.cmpu.
+  destruct (Int.eq i Int.zero); auto.
+  (* struct *)
+  destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; auto.
+  (* union *)
+  destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; auto.
 Qed.
 
 Lemma make_boolean_correct:
@@ -413,6 +299,10 @@ Proof.
   unfold Val.cmpu, Val.cmpu_bool. simpl.
   destruct (Int.eq i Int.zero); simpl; constructor.
   exists Vtrue; split. econstructor; eauto with cshm. constructor.
+(* long *)
+  econstructor; split. econstructor; eauto with cshm. simpl. eauto. 
+  unfold Val.cmpl, Val.cmpl_bool. simpl. 
+  destruct (Int64.eq i Int64.zero); simpl; constructor. 
 Qed.
 
 Lemma make_neg_correct:
@@ -422,10 +312,8 @@ Lemma make_neg_correct:
   eval_expr ge e le m a va ->
   eval_expr ge e le m c v.
 Proof.
-  intros until m; intro SEM. unfold make_neg. 
-  functional inversion SEM; intros.
-  rewrite H0 in H4. inv H4. eapply eval_Eunop; eauto with cshm.
-  rewrite H0 in H4. inv H4. eauto with cshm.
+  unfold sem_neg, make_neg; intros until m; intros SEM MAKE EV1;
+  destruct (classify_neg tya); inv MAKE; destruct va; inv SEM; eauto with cshm.
 Qed.
 
 Lemma make_notbool_correct:
@@ -435,20 +323,19 @@ Lemma make_notbool_correct:
   eval_expr ge e le m a va ->
   eval_expr ge e le m c v.
 Proof.
-  intros until m; intro SEM. unfold make_notbool. 
-  functional inversion SEM; intros; rewrite H0 in H4; inversion H4; simpl;
-  eauto with cshm.
+  unfold sem_notbool, make_notbool; intros until m; intros SEM MAKE EV1;
+  destruct (classify_bool tya); inv MAKE; destruct va; inv SEM; eauto with cshm.
 Qed.
 
 Lemma make_notint_correct:
   forall a tya c va v e le m,
   sem_notint va tya = Some v ->
-  make_notint a tya = c ->  
+  make_notint a tya = OK c ->  
   eval_expr ge e le m a va ->
   eval_expr ge e le m c v.
 Proof.
-  intros until m; intro SEM. unfold make_notint. 
-  functional inversion SEM; intros. subst. eauto with cshm. 
+  unfold sem_notint, make_notint; intros until m; intros SEM MAKE EV1;
+  destruct (classify_notint tya); inv MAKE; destruct va; inv SEM; eauto with cshm.
 Qed.
 
 Definition binary_constructor_correct
@@ -461,99 +348,206 @@ Definition binary_constructor_correct
   eval_expr ge e le m b vb ->
   eval_expr ge e le m c v.
 
+Section MAKE_BIN.
+
+Variable sem_int: signedness -> int -> int -> option val.
+Variable sem_long: signedness -> int64 -> int64 -> option val.
+Variable sem_float: float -> float -> option val.
+Variables iop iopu fop lop lopu: binary_operation.
+
+Hypothesis iop_ok:
+  forall x y m, eval_binop iop (Vint x) (Vint y) m = sem_int Signed x y.
+Hypothesis iopu_ok:
+  forall x y m, eval_binop iopu (Vint x) (Vint y) m = sem_int Unsigned x y.
+Hypothesis lop_ok:
+  forall x y m, eval_binop lop (Vlong x) (Vlong y) m = sem_long Signed x y.
+Hypothesis lopu_ok:
+  forall x y m, eval_binop lopu (Vlong x) (Vlong y) m = sem_long Unsigned x y.
+Hypothesis fop_ok:
+  forall x y m, eval_binop fop (Vfloat x) (Vfloat y) m = sem_float x y.
+
+Lemma make_binarith_correct:
+  binary_constructor_correct
+    (make_binarith iop iopu fop lop lopu)
+    (sem_binarith sem_int sem_long sem_float).
+Proof.
+  red; unfold make_binarith, sem_binarith;
+  intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_binarith tya tyb); inv MAKE;
+  destruct va; try discriminate; destruct vb; try discriminate.
+- destruct s; inv H0; econstructor; eauto with cshm. 
+  rewrite iop_ok; auto. rewrite iopu_ok; auto.
+- erewrite <- fop_ok in SEM; eauto with cshm.
+- erewrite <- fop_ok in SEM; eauto with cshm.
+- erewrite <- fop_ok in SEM; eauto with cshm.
+- destruct s; inv H0; econstructor; eauto with cshm. 
+  rewrite lop_ok; auto. rewrite lopu_ok; auto.
+- destruct s2; inv H0; econstructor; eauto with cshm. 
+  rewrite lop_ok; auto. rewrite lopu_ok; auto.
+- destruct s1; inv H0; econstructor; eauto with cshm. 
+  rewrite lop_ok; auto. rewrite lopu_ok; auto.
+- erewrite <- fop_ok in SEM; eauto with cshm.
+- erewrite <- fop_ok in SEM; eauto with cshm.
+Qed.
+
+Lemma make_binarith_int_correct:
+  binary_constructor_correct
+    (make_binarith_int iop iopu lop lopu)
+    (sem_binarith sem_int sem_long (fun x y => None)).
+Proof.
+  red; unfold make_binarith_int, sem_binarith;
+  intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_binarith tya tyb); inv MAKE;
+  destruct va; try discriminate; destruct vb; try discriminate.
+- destruct s; inv H0; econstructor; eauto with cshm. 
+  rewrite iop_ok; auto. rewrite iopu_ok; auto.
+- destruct s; inv H0; econstructor; eauto with cshm. 
+  rewrite lop_ok; auto. rewrite lopu_ok; auto.
+- destruct s2; inv H0; econstructor; eauto with cshm. 
+  rewrite lop_ok; auto. rewrite lopu_ok; auto.
+- destruct s1; inv H0; econstructor; eauto with cshm. 
+  rewrite lop_ok; auto. rewrite lopu_ok; auto.
+Qed.
+
+End MAKE_BIN.
+
+Hint Extern 2 (@eq (option val) _ _) => (simpl; reflexivity) : cshm.
+
 Lemma make_add_correct: binary_constructor_correct make_add sem_add.
 Proof.
-  red; intros until m. intro SEM. unfold make_add. 
-  functional inversion SEM; rewrite H0; intros;
-  inversion H7; eauto with cshm. 
-  eapply eval_Ebinop. eauto. 
-  eapply eval_Ebinop. eauto with cshm. eauto.
-  simpl. reflexivity. reflexivity. 
-  eapply eval_Ebinop. eauto. 
-  eapply eval_Ebinop. eauto with cshm. eauto. 
-  simpl. reflexivity. simpl. reflexivity.
+  red; unfold make_add, sem_add;
+  intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_add tya tyb); inv MAKE.
+- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm.
+- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm.
+- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm.
+- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm.
+- eapply make_binarith_correct; eauto; intros; auto.
 Qed.
 
 Lemma make_sub_correct: binary_constructor_correct make_sub sem_sub.
 Proof.
-  red; intros until m. intro SEM. unfold make_sub. 
-  functional inversion SEM; rewrite H0; intros;
-  inversion H7; eauto with cshm. 
-  eapply eval_Ebinop. eauto. 
-  eapply eval_Ebinop. eauto with cshm. eauto.
-  simpl. reflexivity. reflexivity. 
-  inversion H9. eapply eval_Ebinop. 
-  eapply eval_Ebinop; eauto. 
-  simpl. unfold eq_block; rewrite H3. reflexivity.
-  eauto with cshm. simpl. rewrite H8. reflexivity.
+  red; unfold make_sub, sem_sub;
+  intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_sub tya tyb); inv MAKE.
+- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm.
+- destruct va; try discriminate; destruct vb; inv SEM.
+  destruct (zeq b0 b1); try discriminate. destruct (Int.eq (Int.repr (sizeof ty)) Int.zero) eqn:E; inv H0.
+  econstructor; eauto with cshm. rewrite zeq_true. simpl. rewrite E; auto. 
+- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm.
+- eapply make_binarith_correct; eauto; intros; auto.
 Qed.
 
 Lemma make_mul_correct: binary_constructor_correct make_mul sem_mul.
 Proof.
-  red; intros until m. intro SEM. unfold make_mul. 
-  functional inversion SEM; rewrite H0; intros;
-  inversion H7; eauto with cshm. 
+  apply make_binarith_correct; intros; auto.
 Qed.
 
 Lemma make_div_correct: binary_constructor_correct make_div sem_div.
 Proof.
-  red; intros until m. intro SEM. unfold make_div. 
-  functional inversion SEM; rewrite H0; intros.
-  inversion H8. eapply eval_Ebinop; eauto with cshm. 
-  simpl. rewrite H7; auto.
-  inversion H8. eapply eval_Ebinop; eauto with cshm. 
-  simpl. rewrite H7; auto.
-  inversion H7; eauto with cshm. 
-  inversion H7; eauto with cshm. 
-  inversion H7; eauto with cshm. 
+  apply make_binarith_correct; intros; auto.
 Qed.
 
 Lemma make_mod_correct: binary_constructor_correct make_mod sem_mod.
-  red; intros until m. intro SEM. unfold make_mod. 
-  functional inversion SEM; rewrite H0; intros.
-  inversion H8. eapply eval_Ebinop; eauto with cshm. 
-  simpl. rewrite H7; auto.
-  inversion H8. eapply eval_Ebinop; eauto with cshm. 
-  simpl. rewrite H7; auto.
+Proof.
+  apply make_binarith_int_correct; intros; auto.
 Qed.
 
 Lemma make_and_correct: binary_constructor_correct make_and sem_and.
 Proof.
-  red; intros until m. intro SEM. unfold make_and. 
-  functional inversion SEM. intros. inversion H7. 
-  eauto with cshm. 
+  apply make_binarith_int_correct; intros; auto.
 Qed.
 
 Lemma make_or_correct: binary_constructor_correct make_or sem_or.
 Proof.
-  red; intros until m. intro SEM. unfold make_or. 
-  functional inversion SEM. intros. inversion H7. 
-  eauto with cshm. 
+  apply make_binarith_int_correct; intros; auto.
 Qed.
 
 Lemma make_xor_correct: binary_constructor_correct make_xor sem_xor.
 Proof.
-  red; intros until m. intro SEM. unfold make_xor. 
-  functional inversion SEM. intros. inversion H7. 
-  eauto with cshm. 
+  apply make_binarith_int_correct; intros; auto.
+Qed.
+
+Ltac comput val :=
+  let x := fresh in set val as x in *; vm_compute in x; subst x.
+
+Remark small_shift_amount_1:
+  forall i,
+  Int64.ltu i Int64.iwordsize = true ->
+  Int.ltu (Int64.loword i) Int64.iwordsize' = true
+  /\ Int64.unsigned i = Int.unsigned (Int64.loword i).
+Proof.
+  intros. apply Int64.ltu_inv in H. comput (Int64.unsigned Int64.iwordsize). 
+  assert (Int64.unsigned i = Int.unsigned (Int64.loword i)).
+  {
+    unfold Int64.loword. rewrite Int.unsigned_repr; auto. 
+    comput Int.max_unsigned; omega.
+  }
+  split; auto. unfold Int.ltu. apply zlt_true. rewrite <- H0. tauto.
+Qed.
+
+Remark small_shift_amount_2:
+  forall i,
+  Int64.ltu i (Int64.repr 32) = true ->
+  Int.ltu (Int64.loword i) Int.iwordsize = true.
+Proof.
+  intros. apply Int64.ltu_inv in H. comput (Int64.unsigned (Int64.repr 32)).
+  assert (Int64.unsigned i = Int.unsigned (Int64.loword i)).
+  {
+    unfold Int64.loword. rewrite Int.unsigned_repr; auto. 
+    comput Int.max_unsigned; omega.
+  }
+  unfold Int.ltu. apply zlt_true. rewrite <- H0. tauto.
+Qed.
+
+Lemma small_shift_amount_3:
+  forall i,
+  Int.ltu i Int64.iwordsize' = true ->
+  Int64.unsigned (Int64.repr (Int.unsigned i)) = Int.unsigned i.
+Proof.
+  intros. apply Int.ltu_inv in H. comput (Int.unsigned Int64.iwordsize'). 
+  apply Int64.unsigned_repr. comput Int64.max_unsigned; omega.
 Qed.
 
 Lemma make_shl_correct: binary_constructor_correct make_shl sem_shl.
 Proof.
-  red; intros until m. intro SEM. unfold make_shl. 
-  functional inversion SEM. intros. inversion H8. 
-  eapply eval_Ebinop; eauto with cshm. 
-  simpl. rewrite H7. auto.
+  red; unfold make_shl, sem_shl, sem_shift;
+  intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_shift tya tyb); inv MAKE;
+  destruct va; try discriminate; destruct vb; try discriminate.
+- destruct (Int.ltu i0 Int.iwordsize) eqn:E; inv SEM.
+  econstructor; eauto. simpl; rewrite E; auto.
+- destruct (Int64.ltu i0 Int64.iwordsize) eqn:E; inv SEM.
+  exploit small_shift_amount_1; eauto. intros [A B].
+  econstructor; eauto with cshm. simpl. rewrite A. 
+  f_equal; f_equal. unfold Int64.shl', Int64.shl. rewrite B; auto.
+- destruct (Int64.ltu i0 (Int64.repr 32)) eqn:E; inv SEM.
+  econstructor; eauto with cshm. simpl. rewrite small_shift_amount_2; auto. 
+- destruct (Int.ltu i0 Int64.iwordsize') eqn:E; inv SEM. 
+  econstructor; eauto with cshm. simpl. rewrite E. 
+  unfold Int64.shl', Int64.shl. rewrite small_shift_amount_3; auto.
 Qed.
 
 Lemma make_shr_correct: binary_constructor_correct make_shr sem_shr.
 Proof.
-  red; intros until m. intro SEM. unfold make_shr. 
-  functional inversion SEM; intros; rewrite H0 in H8; inversion H8.
-  eapply eval_Ebinop; eauto with cshm.
-  simpl; rewrite H7; auto.
-  eapply eval_Ebinop; eauto with cshm.
-  simpl; rewrite H7; auto.
+  red; unfold make_shr, sem_shr, sem_shift;
+  intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_shift tya tyb); inv MAKE;
+  destruct va; try discriminate; destruct vb; try discriminate.
+- destruct (Int.ltu i0 Int.iwordsize) eqn:E; inv SEM.
+  destruct s; inv H0; econstructor; eauto; simpl; rewrite E; auto.
+- destruct (Int64.ltu i0 Int64.iwordsize) eqn:E; inv SEM.
+  exploit small_shift_amount_1; eauto. intros [A B].
+  destruct s; inv H0; econstructor; eauto with cshm; simpl; rewrite A;
+  f_equal; f_equal.
+  unfold Int64.shr', Int64.shr; rewrite B; auto.
+  unfold Int64.shru', Int64.shru; rewrite B; auto.
+- destruct (Int64.ltu i0 (Int64.repr 32)) eqn:E; inv SEM.
+  destruct s; inv H0; econstructor; eauto with cshm; simpl; rewrite small_shift_amount_2; auto. 
+- destruct (Int.ltu i0 Int64.iwordsize') eqn:E; inv SEM.
+  destruct s; inv H0; econstructor; eauto with cshm; simpl; rewrite E. 
+  unfold Int64.shr', Int64.shr; rewrite small_shift_amount_3; auto.
+  unfold Int64.shru', Int64.shru; rewrite small_shift_amount_3; auto.
 Qed.
 
 Lemma make_cmp_correct:
@@ -564,35 +558,12 @@ Lemma make_cmp_correct:
   eval_expr ge e le m b vb ->
   eval_expr ge e le m c v.
 Proof.
-  intros until m. intro SEM. unfold make_cmp.
-  functional inversion SEM; rewrite H0; intros.
-  (** ii Signed *)
-  inversion H8; eauto with cshm.
-  (* ii Unsigned *) 
-  inversion H8. eauto with cshm.
-  (* pp int int *)
-  inversion H8. eauto with cshm.
-  (* pp ptr ptr *)
-  inversion H10. eapply eval_Ebinop; eauto with cshm.
-  simpl. unfold Val.cmpu. simpl. unfold Mem.weak_valid_pointer in *. 
-  rewrite H3. rewrite H9. auto.
-  inversion H10. eapply eval_Ebinop; eauto with cshm.
-  simpl. unfold Val.cmpu. simpl. rewrite H3. rewrite H9.
-  destruct cmp; simpl in *; inv H; auto.
-  (* pp ptr int *)
-  inversion H9. eapply eval_Ebinop; eauto with cshm.
-  simpl. unfold Val.cmpu. simpl. rewrite H8.
-  destruct cmp; simpl in *; inv H; auto.
-  (* pp int ptr *)
-  inversion H9. eapply eval_Ebinop; eauto with cshm.
-  simpl. unfold Val.cmpu. simpl. rewrite H8.
-  destruct cmp; simpl in *; inv H; auto.
-  (* ff *)
-  inversion H8. eauto with cshm.
-  (* if *)
-  inversion H8. eauto with cshm.
-  (* fi *)
-  inversion H8. eauto with cshm.
+  unfold sem_cmp, make_cmp; intros until m; intros SEM MAKE EV1 EV2;
+  destruct (classify_cmp tya tyb).
+- inv MAKE. destruct (Val.cmpu_bool (Mem.valid_pointer m) cmp va vb) as [bv|] eqn:E;
+  simpl in SEM; inv SEM.
+  econstructor; eauto. simpl. unfold Val.cmpu. rewrite E. auto.
+- eapply make_binarith_correct; eauto; intros; auto.
 Qed.
 
 Lemma transl_unop_correct:
@@ -604,7 +575,7 @@ Lemma transl_unop_correct:
 Proof.
   intros. destruct op; simpl in *.
   eapply make_notbool_correct; eauto. 
-  eapply make_notint_correct with (tya := tya); eauto. congruence.
+  eapply make_notint_correct; eauto. 
   eapply make_neg_correct; eauto.
 Qed.
 
@@ -903,6 +874,8 @@ Proof.
   apply make_intconst_correct.
 (* const float *)
   apply make_floatconst_correct.
+(* const long *)
+  apply make_longconst_correct.
 (* temp var *)
   constructor; auto.
 (* addrof *)
@@ -957,8 +930,7 @@ Proof.
   induction 1; intros.
   monadInv H. constructor.
   monadInv H2. constructor. 
-  eapply make_cast_correct. eapply transl_expr_correct; eauto. auto. 
-  eauto.
+  eapply make_cast_correct; eauto. eapply transl_expr_correct; eauto. auto. 
 Qed.
 
 End EXPR.
@@ -1103,8 +1075,8 @@ Proof.
 (* skip *)
   auto.
 (* assign *)
-  unfold make_store, make_memcpy in EQ2.
-  destruct (access_mode (typeof e)); inv EQ2; auto.
+  unfold make_store, make_memcpy in EQ3.
+  destruct (access_mode (typeof e)); inv EQ3; auto.
 (* set *)
   auto.
 (* call *)
@@ -1196,7 +1168,7 @@ Proof.
   monadInv TR.
   assert (SAME: ts' = ts /\ tk' = tk).
     inversion MTR. auto. 
-    subst ts. unfold make_store, make_memcpy in EQ2. destruct (access_mode (typeof a1)); congruence.
+    subst ts. unfold make_store, make_memcpy in EQ3. destruct (access_mode (typeof a1)); congruence.
   destruct SAME; subst ts' tk'.
   econstructor; split.
   apply plus_one. eapply make_store_correct; eauto.
@@ -1327,7 +1299,7 @@ Proof.
   monadInv TR. inv MTR. 
   econstructor; split.
   apply plus_one. constructor.
-  eapply make_cast_correct. eapply transl_expr_correct; eauto. eauto.
+  eapply make_cast_correct; eauto. eapply transl_expr_correct; eauto.
   eapply match_env_free_blocks; eauto.
   econstructor; eauto.
   eapply match_cont_call_cont. eauto. 
diff --git a/cfrontend/Ctypes.v b/cfrontend/Ctypes.v
index c05f21a..bf483a0 100644
--- a/cfrontend/Ctypes.v
+++ b/cfrontend/Ctypes.v
@@ -26,7 +26,7 @@ Require Import Errors.
   union).  Numeric types (integers and floats) fully specify the
   bit size of the type.  An integer type is a pair of a signed/unsigned
   flag and a bit size: 8, 16, or 32 bits, or the special [IBool] size
-  standing for the C99 [_Bool] type. *)
+  standing for the C99 [_Bool] type.  64-bit integers are treated separately. *)
 
 Inductive signedness : Type :=
   | Signed: signedness
@@ -92,6 +92,7 @@ Definition noattr := {| attr_volatile := false |}.
 Inductive type : Type :=
   | Tvoid: type                                    (**r the [void] type *)
   | Tint: intsize -> signedness -> attr -> type    (**r integer types *)
+  | Tlong: signedness -> attr -> type              (**r 64-bit integer types *)
   | Tfloat: floatsize -> attr -> type              (**r floating-point types *)
   | Tpointer: type -> attr -> type                 (**r pointer types ([*ty]) *)
   | Tarray: type -> Z -> attr -> type              (**r array types ([ty[len]]) *)
@@ -131,6 +132,7 @@ Definition attr_of_type (ty: type) :=
   match ty with
   | Tvoid => noattr
   | Tint sz si a => a
+  | Tlong si a => a
   | Tfloat sz a => a
   | Tpointer elt a => a
   | Tarray elt sz a => a
@@ -148,8 +150,7 @@ Definition type_bool := Tint IBool Signed noattr.
 
 Definition typeconv (ty: type) : type :=
   match ty with
-  | Tint I32 Unsigned _ => ty
-  | Tint _ _ a          => Tint I32 Signed a
+  | Tint (I8 | I16 | IBool) _ a => Tint I32 Signed a
   | Tarray t sz a       => Tpointer t a
   | Tfunction _ _       => Tpointer ty noattr
   | _                   => ty
@@ -166,6 +167,7 @@ Fixpoint alignof (t: type) : Z :=
   | Tint I16 _ _ => 2
   | Tint I32 _ _ => 4
   | Tint IBool _ _ => 1
+  | Tlong _ _ => 8
   | Tfloat F32 _ => 4
   | Tfloat F64 _ => 8
   | Tpointer _ _ => 4
@@ -218,6 +220,7 @@ Fixpoint sizeof (t: type) : Z :=
   | Tint I16 _ _ => 2
   | Tint I32 _ _ => 4
   | Tint IBool _ _ => 1
+  | Tlong _ _ => 8
   | Tfloat F32 _ => 4
   | Tfloat F64 _ => 8
   | Tpointer _ _ => 4
@@ -425,6 +428,7 @@ Definition access_mode (ty: type) : mode :=
   | Tint I16 Unsigned _ => By_value Mint16unsigned
   | Tint I32 _ _ => By_value Mint32
   | Tint IBool _ _ => By_value Mint8unsigned
+  | Tlong _ _ => By_value Mint64
   | Tfloat F32 _ => By_value Mfloat32
   | Tfloat F64 _ => By_value Mfloat64
   | Tvoid => By_nothing
@@ -458,6 +462,7 @@ Fixpoint unroll_composite (ty: type) : type :=
   match ty with
   | Tvoid => ty
   | Tint _ _ _ => ty
+  | Tlong _ _ => ty
   | Tfloat _ _ => ty
   | Tpointer t1 a => Tpointer (unroll_composite t1) a
   | Tarray t1 sz a => Tarray (unroll_composite t1) sz a
@@ -526,6 +531,7 @@ Fixpoint type_of_params (params: list (ident * type)) : typelist :=
 Definition typ_of_type (t: type) : AST.typ :=
   match t with
   | Tfloat _ _ => AST.Tfloat
+  | Tlong _ _ => AST.Tlong
   | _ => AST.Tint
   end.
 
@@ -533,6 +539,7 @@ Definition opttyp_of_type (t: type) : option AST.typ :=
   match t with
   | Tvoid => None
   | Tfloat _ _ => Some AST.Tfloat
+  | Tlong _ _ => Some AST.Tlong
   | _ => Some AST.Tint
   end.
 
diff --git a/cfrontend/Initializers.v b/cfrontend/Initializers.v
index 7711ade..657f607 100644
--- a/cfrontend/Initializers.v
+++ b/cfrontend/Initializers.v
@@ -54,7 +54,7 @@ Fixpoint constval (a: expr) : res val :=
   match a with
   | Eval v ty =>
       match v with
-      | Vint _ | Vfloat _ => OK v
+      | Vint _ | Vfloat _ | Vlong _ => OK v
       | Vptr _ _ | Vundef => Error(msg "illegal constant")
       end
   | Evalof l ty =>
@@ -152,6 +152,7 @@ Definition transl_init_single (ty: type) (a: expr) : res init_data :=
   | Vint n, Tint I32 sg _ => OK(Init_int32 n)
   | Vint n, Tpointer _ _ => OK(Init_int32 n)
   | Vint n, Tcomp_ptr _ _ => OK(Init_int32 n)
+  | Vlong n, Tlong _ _ => OK(Init_int64 n)
   | Vfloat f, Tfloat F32 _ => OK(Init_float32 f)
   | Vfloat f, Tfloat F64 _ => OK(Init_float64 f)
   | Vptr (Zpos id) ofs, Tint I32 sg _ => OK(Init_addrof id ofs)
diff --git a/cfrontend/Initializersproof.v b/cfrontend/Initializersproof.v
index 75b73ff..c2ca135 100644
--- a/cfrontend/Initializersproof.v
+++ b/cfrontend/Initializersproof.v
@@ -333,45 +333,19 @@ Qed.
 
 (** * Soundness of the compile-time evaluator *)
 
-(** [match_val v v'] holds if the compile-time value [v]
-  (with symbolic pointers) matches the run-time value [v']
-  (with concrete pointers). *)
-
-Inductive match_val: val -> val -> Prop :=
-  | match_vint: forall n,
-      match_val (Vint n) (Vint n)
-  | match_vfloat: forall f,
-      match_val (Vfloat f) (Vfloat f)
-  | match_vptr: forall id b ofs,
-      Genv.find_symbol ge id = Some b ->
-      match_val (Vptr (Zpos id) ofs) (Vptr b ofs)
-  | match_vundef:
-      match_val Vundef Vundef.
-
-Lemma match_val_of_bool:
-  forall b, match_val (Val.of_bool b) (Val.of_bool b).
-Proof.
-  destruct b; constructor.
-Qed.
-
-Hint Constructors match_val: mval.
-Hint Resolve match_val_of_bool: mval.
-
-(** The [match_val] relation commutes with the evaluation functions
-  from [Csem]. *)
-
-Lemma sem_unary_match:
-  forall op ty v1 v v1' v',
-  sem_unary_operation op v1 ty = Some v ->
-  sem_unary_operation op v1' ty = Some v' ->
-  match_val v1 v1' ->
-  match_val v v'.
-Proof.
-  intros. destruct op; simpl in *.
-  unfold sem_notbool in *. destruct (classify_bool ty); inv H1; inv H; inv H0; auto with mval. constructor.
-  unfold sem_notint in *. destruct (classify_notint ty); inv H1; inv H; inv H0; auto with mval.
-  unfold sem_neg in *. destruct (classify_neg ty); inv H1; inv H; inv H0; auto with mval.
-Qed.
+(** A global environment [ge] induces a memory injection mapping
+  our symbolic pointers [Vptr (Zpos id) ofs] to run-time pointers
+  [Vptr b ofs] where [Genv.find_symbol ge id = Some b]. *)
+
+Definition inj (b: block) :=
+  match b with
+  | Zpos id =>
+      match Genv.find_symbol ge id with
+      | Some b' => Some (b', 0)
+      | None => None
+      end
+  | _ => None
+  end.
 
 Lemma mem_empty_not_valid_pointer:
   forall b ofs, Mem.valid_pointer Mem.empty b ofs = false.
@@ -387,104 +361,18 @@ Proof.
   now rewrite !mem_empty_not_valid_pointer.
 Qed.
 
-Lemma sem_cmp_match:
-  forall c v1 ty1 v2 ty2 m v v1' v2' v',
-  sem_cmp c v1 ty1 v2 ty2 Mem.empty = Some v ->
-  sem_cmp c v1' ty1 v2' ty2 m = Some v' ->
-  match_val v1 v1' -> match_val v2 v2' ->
-  match_val v v'.
-Proof.
-Opaque zeq.
-  intros. unfold sem_cmp in *.
-  destruct (classify_cmp ty1 ty2); try (destruct s); inv H1; inv H2; inv H; inv H0; auto with mval.
-- destruct (Int.eq n Int.zero); try discriminate. 
-  unfold Val.cmp_different_blocks in *. destruct c; inv H3; inv H2; constructor.
-- destruct (Int.eq n Int.zero); try discriminate. 
-  unfold Val.cmp_different_blocks in *. destruct c; inv H2; inv H1; constructor.
-- destruct (zeq (Z.pos id) (Z.pos id0)); discriminate.
-Qed.
-
-Lemma sem_binary_match:
-  forall op v1 ty1 v2 ty2 m v v1' v2' v',
-  sem_binary_operation op v1 ty1 v2 ty2 Mem.empty = Some v ->
-  sem_binary_operation op v1' ty1 v2' ty2 m = Some v' ->
-  match_val v1 v1' -> match_val v2 v2' ->
-  match_val v v'.
-Proof.
-  intros. unfold sem_binary_operation in *; destruct op.
-(* add *)
-  unfold sem_add in *. destruct (classify_add ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval.
-(* sub *)
-  unfold sem_sub in *. destruct (classify_sub ty1 ty2); inv H1; inv H2; try (inv H; inv H0; auto with mval; fail).
-  destruct (zeq (Zpos id) (Zpos id0)); try discriminate.
-  assert (b0 = b) by congruence. subst b0. rewrite zeq_true in H0. 
-  destruct (Int.eq (Int.repr (sizeof ty)) Int.zero); inv H; inv H0; auto with mval.
-(* mul *)
-  unfold sem_mul in *. destruct (classify_mul ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval.
-(* div *)
-  unfold sem_div in H0. functional inversion H; rewrite H4 in H0; inv H1; inv H2; inv H0.
-  inv H12. rewrite H11 in H2. inv H2. constructor.
-  inv H12. rewrite H11 in H2. inv H2. constructor.
-  inv H11. constructor.
-  inv H11. constructor.
-  inv H11. constructor.
-(* mod *)
-  unfold sem_mod in H0. functional inversion H; rewrite H4 in H0; inv H1; inv H2; inv H0.
-  inv H12. rewrite H11 in H2. inv H2. constructor.
-  inv H12. rewrite H11 in H2. inv H2. constructor.
-(* and *)
-  unfold sem_and in *. destruct (classify_binint ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval.
-(* or *)
-  unfold sem_or in *. destruct (classify_binint ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval.
-(* xor *)
-  unfold sem_xor in *. destruct (classify_binint ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval.
-(* shl *)
-  unfold sem_shl in *. destruct (classify_shift ty1 ty2); inv H1; inv H2; try discriminate.
-  destruct (Int.ltu n0 Int.iwordsize); inv H0; inv H; constructor.
-(* shr *)
-  unfold sem_shr in *. destruct (classify_shift ty1 ty2); try discriminate;
-  destruct s; inv H1; inv H2; try discriminate.
-  destruct (Int.ltu n0 Int.iwordsize); inv H0; inv H; constructor.
-  destruct (Int.ltu n0 Int.iwordsize); inv H0; inv H; constructor.
-(* comparisons *)
-  eapply sem_cmp_match; eauto.
-  eapply sem_cmp_match; eauto.
-  eapply sem_cmp_match; eauto.
-  eapply sem_cmp_match; eauto.
-  eapply sem_cmp_match; eauto.
-  eapply sem_cmp_match; eauto.
-Qed.
-
 Lemma sem_cast_match:
-  forall v1 ty1 ty2 v2, sem_cast v1 ty1 ty2 = Some v2 ->
-  forall v1' v2',  match_val v1' v1 -> do_cast v1' ty1 ty2 = OK v2' ->
-  match_val v2' v2.
+  forall v1 ty1 ty2 v2 v1' v2',
+  sem_cast v1 ty1 ty2 = Some v2 ->
+  do_cast v1' ty1 ty2 = OK v2' ->
+  val_inject inj v1' v1 ->
+  val_inject inj v2' v2.
 Proof.
-  intros. unfold do_cast in H1. destruct (sem_cast v1' ty1 ty2) as [v2''|] eqn:?; inv H1.
-  unfold sem_cast in H; functional inversion Heqo; subst. 
-  rewrite H2 in H. inv H0. inv H. constructor.
-  rewrite H2 in H. inv H0. inv H. constructor; auto.
-  rewrite H2 in H. inv H0. inv H. constructor.
-  rewrite H2 in H. inv H0. inv H. constructor.
-  rewrite H2 in H. inv H0. inv H. constructor.
-  rewrite H2 in H. inv H0. destruct (cast_float_int si2 f); inv H. inv H7. constructor.
-  rewrite H2 in H. inv H0. inv H. rewrite H7. constructor.
-  rewrite H2 in H. inv H0. inv H. rewrite H7. constructor.
-  rewrite H2 in H. inv H0. inv H. constructor.
-  rewrite H2 in H. inv H0. inv H. constructor.
-  rewrite H2 in H. destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. auto.
-  rewrite H2 in H. destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. auto.
-  rewrite H5 in H. inv H. auto.
+  intros. unfold do_cast in H0. destruct (sem_cast v1' ty1 ty2) as [v2''|] eqn:E; inv H0.
+  exploit sem_cast_inject. eexact E. eauto. 
+  intros [v' [A B]]. congruence.
 Qed.
 
-Lemma bool_val_match:
-  forall v v' ty,
-  match_val v v' ->
-  bool_val v ty = bool_val v' ty.
-Proof.
-  intros. inv H; auto.
-Qed.
-  
 (** Soundness of [constval] with respect to the big-step semantics *)
 
 Lemma constval_rvalue:
@@ -492,13 +380,13 @@ Lemma constval_rvalue:
   eval_simple_rvalue empty_env m a v ->
   forall v',
   constval a = OK v' ->
-  match_val v' v
+  val_inject inj v' v
 with constval_lvalue:
   forall m a b ofs,
   eval_simple_lvalue empty_env m a b ofs ->
   forall v',
   constval a = OK v' ->
-  match_val v' (Vptr b ofs).
+  val_inject inj v' (Vptr b ofs).
 Proof.
   (* rvalue *)
   induction 1; intros vres CV; simpl in CV; try (monadInv CV).
@@ -509,11 +397,18 @@ Proof.
   (* addrof *)
   eauto.
   (* unop *)
-  revert EQ0. caseEq (sem_unary_operation op x (typeof r1)); intros; inv EQ0.
-  eapply sem_unary_match; eauto. 
+  destruct (sem_unary_operation op x (typeof r1)) as [v1'|] eqn:E; inv EQ0.
+  exploit sem_unary_operation_inject. eexact E. eauto. 
+  intros [v' [A B]]. congruence.
   (* binop *)
-  revert EQ2. caseEq (sem_binary_operation op x (typeof r1) x0 (typeof r2) Mem.empty); intros; inv EQ2.
-  eapply sem_binary_match; eauto.
+  destruct (sem_binary_operation op x (typeof r1) x0 (typeof r2) Mem.empty) as [v1'|] eqn:E; inv EQ2.
+  exploit (sem_binary_operation_inj inj Mem.empty m).
+  intros. rewrite mem_empty_not_valid_pointer in H3; discriminate.
+  intros. rewrite mem_empty_not_weak_valid_pointer in H3; discriminate.
+  intros. rewrite mem_empty_not_weak_valid_pointer in H3; discriminate.
+  intros. rewrite mem_empty_not_valid_pointer in H3; discriminate.
+  eauto. eauto. eauto. 
+  intros [v' [A B]]. congruence.
   (* cast *)
   eapply sem_cast_match; eauto. 
   (* sizeof *)
@@ -521,18 +416,26 @@ Proof.
   (* alignof *)
   constructor.
   (* seqand *)
-  rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. 
-  monadInv EQ2. eapply sem_cast_match; eauto. eapply sem_cast_match; eauto.
-  rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. 
-  monadInv EQ2. constructor.
+  destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. 
+  exploit bool_val_inject. eexact E. eauto. intros E'.
+  assert (b = true) by congruence. subst b. monadInv H5. 
+  eapply sem_cast_match; eauto. eapply sem_cast_match; eauto.
+  destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. 
+  exploit bool_val_inject. eexact E. eauto. intros E'.
+  assert (b = false) by congruence. subst b. inv H2. auto.
   (* seqor *)
-  rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. 
-  monadInv EQ2. eapply sem_cast_match; eauto. eapply sem_cast_match; eauto.
-  rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. 
-  monadInv EQ2. constructor.
+  destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. 
+  exploit bool_val_inject. eexact E. eauto. intros E'.
+  assert (b = false) by congruence. subst b. monadInv H5. 
+  eapply sem_cast_match; eauto. eapply sem_cast_match; eauto.
+  destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. 
+  exploit bool_val_inject. eexact E. eauto. intros E'.
+  assert (b = true) by congruence. subst b. inv H2. auto.
   (* conditional *)
-  rewrite (bool_val_match x v1 (typeof r1)) in EQ3; auto.
-  rewrite H0 in EQ3. destruct b; eapply sem_cast_match; eauto.
+  destruct (bool_val x (typeof r1)) as [b'|] eqn:E; inv EQ3.
+  exploit bool_val_inject. eexact E. eauto. intros E'.
+  assert (b' = b) by congruence. subst b'. 
+  destruct b; eapply sem_cast_match; eauto.
   (* comma *)
   auto.
   (* paren *)
@@ -543,12 +446,14 @@ Proof.
   (* var local *)
   unfold empty_env in H. rewrite PTree.gempty in H. congruence.
   (* var_global *)
-  constructor; auto.
+  econstructor. unfold inj. rewrite H0. eauto. auto. 
   (* deref *)
   eauto.
   (* field struct *)
   rewrite H0 in CV. monadInv CV. exploit constval_rvalue; eauto. intro MV. inv MV.
-  simpl. replace x with delta by congruence. constructor. auto.
+  simpl. replace x with delta by congruence. econstructor; eauto. 
+  rewrite ! Int.add_assoc. f_equal. apply Int.add_commut. 
+  simpl. auto.
   (* field union *)
   rewrite H0 in CV. eauto.
 Qed.
@@ -569,7 +474,7 @@ Theorem constval_steps:
   forall f r m v v' ty m',
   star step ge (ExprState f r Kstop empty_env m) E0 (ExprState f (Eval v' ty) Kstop empty_env m') ->
   constval r = OK v ->
-  m' = m /\ ty = typeof r /\ match_val v v'.
+  m' = m /\ ty = typeof r /\ val_inject inj v v'.
 Proof.
   intros. exploit eval_simple_steps; eauto. eapply constval_simple; eauto. 
   intros [A [B C]]. intuition. eapply constval_rvalue; eauto.
@@ -595,19 +500,23 @@ Proof.
   inv D. 
   (* int *)
   destruct ty; try discriminate. 
-  destruct i; inv EQ2.
+  destruct i0; inv EQ2.
   destruct s; simpl in H2; inv H2. rewrite <- Mem.store_signed_unsigned_8; auto. auto.
   destruct s; simpl in H2; inv H2. rewrite <- Mem.store_signed_unsigned_16; auto. auto.
   simpl in H2; inv H2. assumption.
   inv EQ2. simpl in H2; inv H2. assumption.
+  (* long *)
+  destruct ty; inv EQ2. simpl in H2; inv H2. assumption.
   (* float *)
   destruct ty; try discriminate. 
   destruct f1; inv EQ2; simpl in H2; inv H2; assumption.
   (* pointer *)
-  assert (data = Init_addrof id ofs0 /\ chunk = Mint32).
+  unfold inj in H. destruct b1; try discriminate.
+  assert (data = Init_addrof p ofs1 /\ chunk = Mint32).
     destruct ty; inv EQ2; inv H2.
     destruct i; inv H5. intuition congruence. auto.
-  destruct H4; subst. rewrite H. assumption.
+  destruct H4; subst. destruct (Genv.find_symbol ge p); inv H. 
+  rewrite Int.add_zero in H3. auto.
   (* undef *)
   discriminate.
 Qed.
@@ -624,12 +533,15 @@ Proof.
   destruct ty; try discriminate. 
   destruct i0; inv EQ2; reflexivity.
   inv EQ2; reflexivity.
+  inv EQ2; reflexivity.
+  destruct ty; inv EQ2; reflexivity. 
   destruct ty; try discriminate.
   destruct f0; inv EQ2; reflexivity.
   destruct b; try discriminate.
   destruct ty; try discriminate.
   destruct i0; inv EQ2; reflexivity.
   inv EQ2; reflexivity.
+  inv EQ2; reflexivity.
 Qed.
 
 Notation idlsize := Genv.init_data_list_size.
diff --git a/cfrontend/PrintClight.ml b/cfrontend/PrintClight.ml
index d61afa3..a8b2b98 100644
--- a/cfrontend/PrintClight.ml
+++ b/cfrontend/PrintClight.ml
@@ -43,6 +43,7 @@ let rec precedence = function
   | Efield _ -> (16, LtoR)
   | Econst_int _ -> (16, NA)
   | Econst_float _ -> (16, NA)
+  | Econst_long _ -> (16, NA)
   | Eunop _ -> (15, RtoL)
   | Eaddrof _ -> (15, RtoL)
   | Ecast _ -> (14, RtoL)
@@ -79,6 +80,8 @@ let rec expr p (prec, e) =
       fprintf p "%ld" (camlint_of_coqint n)
   | Econst_float(f, _) ->
       fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Econst_long(n, _) ->
+      fprintf p "%LdLL" (camlint64_of_coqint n)
   | Eunop(op, a1, _) ->
       fprintf p "%s%a" (name_unop op) expr (prec', a1)
   | Eaddrof(a1, _) ->
@@ -258,6 +261,7 @@ let rec collect_expr e =
   match e with
   | Econst_int _ -> ()
   | Econst_float _ -> ()
+  | Econst_long _ -> ()
   | Evar _ -> ()
   | Etempvar _ -> ()
   | Ederef(r, _) -> collect_expr r
diff --git a/cfrontend/PrintCsyntax.ml b/cfrontend/PrintCsyntax.ml
index b1ee234..f91dca6 100644
--- a/cfrontend/PrintCsyntax.ml
+++ b/cfrontend/PrintCsyntax.ml
@@ -64,6 +64,11 @@ let name_floattype sz =
   | F32 -> "float"
   | F64 -> "double"
 
+let name_longtype sg =
+  match sg with
+  | Signed -> "long long"
+  | Unsigned -> "unsigned long long"
+
 (* Collecting the names and fields of structs and unions *)
 
 module StructUnion = Map.Make(String)
@@ -91,6 +96,8 @@ let rec name_cdecl id ty =
       name_inttype sz sg ^ attributes a ^ name_optid id
   | Tfloat(sz, a) ->
       name_floattype sz ^ attributes a ^ name_optid id
+  | Tlong(sg, a) ->
+      name_longtype sg ^ attributes a ^ name_optid id
   | Tpointer(t, a) ->
       let id' =
         match t with
@@ -174,6 +181,7 @@ let print_value p v =
   match v with
   | Vint n -> fprintf p "%ld" (camlint_of_coqint n)
   | Vfloat f -> fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Vlong n -> fprintf p "%Ld" (camlint64_of_coqint n)
   | Vptr(b, ofs) -> fprintf p "<ptr%a>" !print_pointer_hook (b, ofs)
   | Vundef -> fprintf p "<undef>"
 
@@ -397,6 +405,7 @@ let print_init p = function
   | Init_int8 n -> fprintf p "%ld,@ " (camlint_of_coqint n)
   | Init_int16 n -> fprintf p "%ld,@ " (camlint_of_coqint n)
   | Init_int32 n -> fprintf p "%ld,@ " (camlint_of_coqint n)
+  | Init_int64 n -> fprintf p "%Ld,@ " (camlint64_of_coqint n)
   | Init_float32 n -> fprintf p "%F,@ " (camlfloat_of_coqfloat n)
   | Init_float64 n -> fprintf p "%F,@ " (camlfloat_of_coqfloat n)
   | Init_space n -> fprintf p "/* skip %ld, */@ " (camlint_of_coqint n)
@@ -444,6 +453,7 @@ let rec collect_type = function
   | Tvoid -> ()
   | Tint _ -> ()
   | Tfloat _ -> ()
+  | Tlong _ -> ()
   | Tpointer(t, _) -> collect_type t
   | Tarray(t, _, _) -> collect_type t
   | Tfunction(args, res) -> collect_type_list args; collect_type res
diff --git a/cfrontend/SimplExpr.v b/cfrontend/SimplExpr.v
index 2dbd97e..98dad93 100644
--- a/cfrontend/SimplExpr.v
+++ b/cfrontend/SimplExpr.v
@@ -108,6 +108,7 @@ Function eval_simpl_expr (a: expr) : option val :=
   match a with
   | Econst_int n _ => Some(Vint n)
   | Econst_float n _ => Some(Vfloat n)
+  | Econst_long n _ => Some(Vlong n)
   | Ecast b ty => 
       match eval_simpl_expr b with
       | None => None
@@ -222,6 +223,8 @@ Fixpoint transl_expr (dst: destination) (a: Csyntax.expr) : mon (list statement
       ret (finish dst nil (Econst_int n ty))
   | Csyntax.Eval (Vfloat n) ty =>
       ret (finish dst nil (Econst_float n ty))
+  | Csyntax.Eval (Vlong n) ty =>
+      ret (finish dst nil (Econst_long n ty))
   | Csyntax.Eval _ ty =>
       error (msg "SimplExpr.transl_expr: Eval")
   | Csyntax.Esizeof ty' ty =>
diff --git a/cfrontend/SimplExprspec.v b/cfrontend/SimplExprspec.v
index b31738b..1e7a84c 100644
--- a/cfrontend/SimplExprspec.v
+++ b/cfrontend/SimplExprspec.v
@@ -768,6 +768,10 @@ Opaque makeif.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
+  intros. destruct dst; simpl in *; inv H2. 
+    constructor. auto. intros; constructor.
+    constructor.
+    constructor. auto. intros; constructor.
 (* var *)
   monadInv H; econstructor; split; auto with gensym. UseFinish. constructor.
 (* field *)
diff --git a/cfrontend/SimplLocals.v b/cfrontend/SimplLocals.v
index 18eeea2..edcd5fe 100644
--- a/cfrontend/SimplLocals.v
+++ b/cfrontend/SimplLocals.v
@@ -46,6 +46,7 @@ Definition make_cast (a: expr) (tto: type) : expr :=
   | cast_case_neutral => a
   | cast_case_i2i I32 _ => a
   | cast_case_f2f F64 => a
+  | cast_case_l2l => a
   | cast_case_struct _ _ _ _ => a
   | cast_case_union _ _ _ _ => a
   | cast_case_void => a
@@ -58,6 +59,7 @@ Fixpoint simpl_expr (cenv: compilenv) (a: expr) : expr :=
   match a with
   | Econst_int _ _ => a
   | Econst_float _ _ => a
+  | Econst_long _ _ => a
   | Evar id ty => if VSet.mem id cenv then Etempvar id ty else Evar id ty
   | Etempvar id ty => Etempvar id ty
   | Ederef a1 ty => Ederef (simpl_expr cenv a1) ty
@@ -156,6 +158,7 @@ Fixpoint addr_taken_expr (a: expr): VSet.t :=
   match a with
   | Econst_int _ _ => VSet.empty
   | Econst_float _ _ => VSet.empty
+  | Econst_long _ _ => VSet.empty
   | Evar id ty => VSet.empty
   | Etempvar id ty => VSet.empty
   | Ederef a1 ty => addr_taken_expr a1
@@ -224,8 +227,7 @@ Definition add_lifted (cenv: compilenv) (vars1 vars2: list (ident * type)) :=
 
 Definition transf_function (f: function) : res function :=
   let cenv := cenv_for f in
-  do x <- assertion (list_disjoint_dec ident_eq (var_names f.(fn_params))
-                                                (var_names f.(fn_temps)));
+  assertion (list_disjoint_dec ident_eq (var_names f.(fn_params)) (var_names f.(fn_temps)));
   do body' <- simpl_stmt cenv f.(fn_body);
   OK {| fn_return := f.(fn_return);
         fn_params := f.(fn_params);
diff --git a/cfrontend/SimplLocalsproof.v b/cfrontend/SimplLocalsproof.v
index e3aa4e2..1dcf630 100644
--- a/cfrontend/SimplLocalsproof.v
+++ b/cfrontend/SimplLocalsproof.v
@@ -202,16 +202,22 @@ Inductive val_casted: val -> type -> Prop :=
   | val_casted_float: forall sz attr n,
       cast_float_float sz n = n ->
       val_casted (Vfloat n) (Tfloat sz attr)
+  | val_casted_long: forall si attr n,
+      val_casted (Vlong n) (Tlong si attr)
   | val_casted_ptr_ptr: forall b ofs ty attr,
       val_casted (Vptr b ofs) (Tpointer ty attr)
   | val_casted_int_ptr: forall n ty attr,
       val_casted (Vint n) (Tpointer ty attr)
   | val_casted_ptr_int: forall b ofs si attr,
       val_casted (Vptr b ofs) (Tint I32 si attr)
-  | val_casted_struct: forall id fld attr v,
-      val_casted v (Tstruct id fld attr)
-  | val_casted_union: forall id fld attr v,
-      val_casted v (Tunion id fld attr)
+  | val_casted_ptr_cptr: forall b ofs id attr,
+      val_casted (Vptr b ofs) (Tcomp_ptr id attr)
+  | val_casted_int_cptr: forall n id attr,
+      val_casted (Vint n) (Tcomp_ptr id attr)
+  | val_casted_struct: forall id fld attr b ofs,
+      val_casted (Vptr b ofs) (Tstruct id fld attr)
+  | val_casted_union: forall id fld attr b ofs,
+      val_casted (Vptr b ofs) (Tunion id fld attr)
   | val_casted_void: forall v,
       val_casted v Tvoid.
 
@@ -240,12 +246,15 @@ Proof.
   constructor.
 (* int *)
   destruct i; destruct ty; simpl in H; try discriminate; destruct v; inv H.
-  constructor. apply (cast_int_int_idem I8 s). 
+  constructor. apply (cast_int_int_idem I8 s).
+  constructor. apply (cast_int_int_idem I8 s).
   destruct (cast_float_int s f0); inv H1.   constructor. apply (cast_int_int_idem I8 s). 
-  constructor. apply (cast_int_int_idem I16 s). 
+  constructor. apply (cast_int_int_idem I16 s).
+  constructor. apply (cast_int_int_idem I16 s).
   destruct (cast_float_int s f0); inv H1.   constructor. apply (cast_int_int_idem I16 s). 
   constructor. auto.
-  constructor. 
+  constructor.
+  constructor. auto. 
   destruct (cast_float_int s f0); inv H1. constructor. auto.
   constructor. auto.
   constructor.
@@ -253,7 +262,10 @@ Proof.
   constructor.
   constructor; auto.
   constructor; auto.
+  constructor; auto.
+  constructor; auto.
   constructor. simpl. destruct (Int.eq i0 Int.zero); auto.
+  constructor. simpl. destruct (Int64.eq i Int64.zero); auto.
   constructor. simpl. destruct (Float.cmp Ceq f0 Float.zero); auto.
   constructor. simpl. destruct (Int.eq i Int.zero); auto.
   constructor; auto.
@@ -261,20 +273,31 @@ Proof.
   constructor; auto.
   constructor. simpl. destruct (Int.eq i Int.zero); auto.
   constructor; auto.
+  constructor. simpl. destruct (Int.eq i0 Int.zero); auto.
+  constructor; auto.
+(* long *)
+  destruct ty; try discriminate.
+  destruct v; inv H. constructor.
+  destruct v; inv H. constructor.
+  destruct v; try discriminate. destruct (cast_float_long s f0); inv H. constructor.
 (* float *)
   destruct ty; simpl in H; try discriminate; destruct v; inv H.
   constructor. apply cast_float_float_idem.
   constructor. apply cast_float_float_idem.
+  constructor. apply cast_float_float_idem.
 (* pointer *)
   destruct ty; simpl in H; try discriminate; destruct v; inv H; try constructor.
 (* impossible cases *)
   discriminate.
   discriminate.
-(* structs,unions *)
-  constructor.
-  constructor.
-(* impossible cases *)
-  discriminate.
+(* structs *)
+  destruct ty; try discriminate; destruct v; try discriminate.
+  destruct (ident_eq i0 i && fieldlist_eq f0 f); inv H; constructor.
+(* unions *)
+  destruct ty; try discriminate; destruct v; try discriminate.
+  destruct (ident_eq i0 i && fieldlist_eq f0 f); inv H; constructor.
+(* comp_ptr *)
+  destruct ty; simpl in H; try discriminate; destruct v; inv H; constructor.
 Qed.
 
 Lemma val_casted_load_result:
@@ -293,6 +316,9 @@ Proof.
   inv H0; auto.
   inv H0; auto.
   inv H0; auto.
+  inv H0; auto.
+  discriminate.
+  discriminate.
   discriminate.
   discriminate.
   discriminate.
@@ -301,24 +327,18 @@ Qed.
 Lemma cast_val_casted:
   forall v ty, val_casted v ty -> sem_cast v ty ty = Some v.
 Proof.
-  intros. inversion H; clear H; subst v ty; unfold sem_cast; simpl.
+  intros. inversion H; clear H; subst v ty; unfold sem_cast; simpl; auto.
   destruct sz; congruence.
   congruence.
-  auto.
-  auto.
-  auto.
   unfold proj_sumbool; repeat rewrite dec_eq_true; auto.
   unfold proj_sumbool; repeat rewrite dec_eq_true; auto.
-  auto.
 Qed.
 
 Lemma val_casted_inject:
   forall f v v' ty,
   val_inject f v v' -> val_casted v ty -> val_casted v' ty.
 Proof.
-  intros. inv H.
-  auto.
-  auto.
+  intros. inv H; auto.
   inv H0; constructor.
   inv H0; constructor.
 Qed.
@@ -356,7 +376,10 @@ Proof.
   destruct v1; inv H0; auto.
   destruct sz2; auto. destruct v1; inv H0; auto.
   destruct sz2; auto. destruct v1; inv H0; auto.
+  destruct v1; inv H0; auto.
+  destruct v1; try discriminate.
   destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H0; auto.
+  destruct v1; try discriminate.
   destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H0; auto.
   inv H0; auto.
 Qed.
@@ -1324,176 +1347,6 @@ Proof.
   exists (Vptr loc' ofs'); split; auto. eapply deref_loc_copy; eauto.
 Qed.
 
-Remark val_inject_vtrue: forall f, val_inject f Vtrue Vtrue.
-Proof. unfold Vtrue; auto. Qed.
-
-Remark val_inject_vfalse: forall f, val_inject f Vfalse Vfalse.
-Proof. unfold Vfalse; auto. Qed.
-
-Remark val_inject_of_bool: forall f b, val_inject f (Val.of_bool b) (Val.of_bool b).
-Proof. intros. unfold Val.of_bool. destruct b; [apply val_inject_vtrue|apply val_inject_vfalse]. 
-Qed.
-
-Hint Resolve val_inject_vtrue val_inject_vfalse val_inject_of_bool.
-
-Ltac TrivialInject :=
-  match goal with
-  | |- exists v', Some ?v = Some v' /\ _ => exists v; split; auto
-(*
-  | |- exists v', _ /\ val_inject _ (Vint ?n) _ => exists (Vint n); split; auto
-  | |- exists v', _ /\ val_inject _ (Vfloat ?n) _ => exists (Vfloat n); split; auto
-  | |- exists v', _ /\ val_inject _ Vtrue _ => exists Vtrue; split; auto
-  | |- exists v', _ /\ val_inject _ Vfalse _ => exists Vfalse; split; auto
-  | |- exists v', _ /\ val_inject _ (Val.of_bool ?b) _ => exists (Val.of_bool b); split; auto
-*)
-  | _ => idtac
-  end.
-
-Lemma sem_unary_operation_inject:
-  forall op v1 ty v tv1,
-  sem_unary_operation op v1 ty = Some v ->
-  val_inject f v1 tv1 ->
-  exists tv, sem_unary_operation op tv1 ty = Some tv /\ val_inject f v tv.
-Proof.
-  unfold sem_unary_operation; intros. destruct op.
-  (* notbool *)
-  unfold sem_notbool in *; destruct (classify_bool ty); inv H0; inv H; TrivialInject.
-  (* notint *)
-  unfold sem_notint in *; destruct (classify_notint ty); inv H0; inv H; TrivialInject.
-  (* neg *)
-  unfold sem_neg in *; destruct (classify_neg ty); inv H0; inv H; TrivialInject.
-Qed.
-
-Lemma sem_cmp_inject:
-  forall cmp v1 tv1 ty1 v2 tv2 ty2 v,
-  sem_cmp cmp v1 ty1 v2 ty2 m = Some v ->
-  val_inject f v1 tv1 ->
-  val_inject f v2 tv2 ->
-  exists tv, sem_cmp cmp tv1 ty1 tv2 ty2 tm = Some tv /\ val_inject f v tv.
-Proof.
-  unfold sem_cmp; intros. 
-  assert (MM: option_map Val.of_bool (Val.cmp_different_blocks cmp) = Some v ->
-      exists tv, option_map Val.of_bool (Val.cmp_different_blocks cmp) = Some tv /\ val_inject f v tv).
-    intros. exists v; split; auto. 
-    destruct cmp; simpl in H2; inv H2; auto.
-
-  destruct (classify_cmp ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  destruct (Int.eq i Int.zero); try discriminate; auto.
-  destruct (Int.eq i Int.zero); try discriminate; auto.
-
-  destruct (zeq b1 b0); subst.
-  rewrite H0 in H2. inv H2. rewrite zeq_true.
-  destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs1)) eqn:?; try discriminate.
-  destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; try discriminate.
-  simpl H3.
-  rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb) by eauto.
-  rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb0) by eauto.
-  simpl. replace  (Int.cmpu cmp (Int.add ofs1 (Int.repr delta))
-                         (Int.add ofs0 (Int.repr delta)))
-     with  (Int.cmpu cmp ofs1 ofs0).
-  inv H3; TrivialInject.
-  symmetry. apply Int.translate_cmpu. 
-  eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
-  eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
-  destruct (Mem.valid_pointer m b1 (Int.unsigned ofs1)) eqn:?; try discriminate.
-  destruct (Mem.valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; try discriminate.
-  destruct (zeq b2 b3); subst.
-  rewrite Mem.valid_pointer_implies
-    by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb); eauto).
-  rewrite Mem.valid_pointer_implies
-    by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb0); eauto).
-  simpl.
-  exploit Mem.different_pointers_inject; eauto. intros [[]|A]. easy.
-  destruct cmp; simpl in H3; inv H3.
-  simpl. unfold Int.eq. rewrite zeq_false; auto.
-  simpl. unfold Int.eq. rewrite zeq_false; auto.
-  rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb) by eauto.
-  rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb0) by eauto.
-  simpl in H3 |- *. auto.
-Qed.
-
-Lemma sem_binary_operation_inject:
-  forall op v1 ty1 v2 ty2 v tv1 tv2,
-  sem_binary_operation op v1 ty1 v2 ty2 m = Some v ->
-  val_inject f v1 tv1 -> val_inject f v2 tv2 ->
-  exists tv, sem_binary_operation op tv1 ty1 tv2 ty2 tm = Some tv /\ val_inject f v tv.
-Proof.
-  unfold sem_binary_operation; intros. destruct op.
-(* add *)
-  unfold sem_add in *; destruct (classify_add ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut.
-  econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut.
-(* sub *)
-  unfold sem_sub in *; destruct (classify_sub ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  econstructor. eauto. rewrite Int.sub_add_l. auto. 
-  destruct (zeq b1 b0); try discriminate. subst b1. rewrite H0 in H2; inv H2.
-  rewrite zeq_true. destruct (Int.eq (Int.repr (sizeof ty)) Int.zero); inv H3.
-  rewrite Int.sub_shifted. TrivialInject. 
-(* mul *)
-  unfold sem_mul in *; destruct (classify_mul ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-(* div *)
-  unfold sem_div in *; destruct (classify_div ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  destruct ( Int.eq i0 Int.zero
-           || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H1; TrivialInject.
-  destruct (Int.eq i0 Int.zero); inv H1; TrivialInject.
-(* mod *)
-  unfold sem_mod in *; destruct (classify_binint ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  destruct ( Int.eq i0 Int.zero
-           || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H1; TrivialInject.
-  destruct (Int.eq i0 Int.zero); inv H1; TrivialInject.
-(* and *)
-  unfold sem_and in *; destruct (classify_binint ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-(* or *)
-  unfold sem_or in *; destruct (classify_binint ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-(* xor *)
-  unfold sem_xor in *; destruct (classify_binint ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-(* shl *)
-  unfold sem_shl in *; destruct (classify_shift ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  destruct (Int.ltu i0 Int.iwordsize); inv H1; TrivialInject.
-(* shr *)
-  unfold sem_shr in *; destruct (classify_shift ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject.
-  destruct (Int.ltu i0 Int.iwordsize); inv H1; TrivialInject.
-  destruct (Int.ltu i0 Int.iwordsize); inv H1; TrivialInject.
-(* comparisons *)
-  eapply sem_cmp_inject; eauto.
-  eapply sem_cmp_inject; eauto.
-  eapply sem_cmp_inject; eauto.
-  eapply sem_cmp_inject; eauto.
-  eapply sem_cmp_inject; eauto.
-  eapply sem_cmp_inject; eauto.
-Qed.
-
-Lemma sem_cast_inject:
-  forall v1 ty1 ty v tv1,
-  sem_cast v1 ty1 ty = Some v ->
-  val_inject f v1 tv1 ->
-  exists tv, sem_cast tv1 ty1 ty = Some tv /\ val_inject f v tv.
-Proof.
-  unfold sem_cast; intros.
-  destruct (classify_cast ty1 ty); try discriminate.
-  inv H0; inv H; TrivialInject. econstructor; eauto.
-  inv H0; inv H; TrivialInject. 
-  inv H0; inv H; TrivialInject. 
-  inv H0; inv H; TrivialInject. 
-  inv H0; try discriminate. destruct (cast_float_int si2 f0); inv H. TrivialInject.
-  inv H0; inv H. TrivialInject.
-  inv H0; inv H. TrivialInject.
-  TrivialInject.
-  destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. TrivialInject.
-  destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. TrivialInject.
-  inv H; TrivialInject.
-Qed.
-
-Lemma bool_val_inject:
-  forall v ty b tv,
-  bool_val v ty = Some b ->
-  val_inject f v tv ->
-  bool_val tv ty = Some b.
-Proof.
-  unfold bool_val; intros. 
-  destruct (classify_bool ty); inv H0; congruence.
-Qed.
-
 Lemma eval_simpl_expr:
   forall a v,
   eval_expr ge e le m a v ->
@@ -1512,6 +1365,7 @@ Proof.
 (* const *)
   exists (Vint i); split; auto. constructor.
   exists (Vfloat f0); split; auto. constructor.
+  exists (Vlong i); split; auto. constructor.
 (* tempvar *)
   exploit me_temps; eauto. intros [[tv [A B]] C]. 
   exists tv; split; auto. constructor; auto.
@@ -2184,7 +2038,7 @@ Proof.
 
 (* goto *)
   generalize TRF; intros TRF'. monadInv TRF'. 
-  exploit (simpl_find_label j (cenv_for f) m lo tlo lbl (fn_body f) (call_cont k) x0 (call_cont tk)).
+  exploit (simpl_find_label j (cenv_for f) m lo tlo lbl (fn_body f) (call_cont k) x (call_cont tk)).
     eauto. eapply match_cont_call_cont. eauto.  
     apply compat_cenv_for.
   rewrite H. intros [ts' [tk' [A [B [C D]]]]]. 
@@ -2197,12 +2051,10 @@ Proof.
   generalize EQ; intro EQ'; monadInv EQ'.
   assert (list_norepet (var_names (fn_params f ++ fn_vars f))).
     unfold var_names. rewrite map_app. auto.
-  assert (list_disjoint (var_names (fn_params f)) (var_names (fn_temps f))).
-    monadInv EQ0. auto. 
   exploit match_envs_alloc_variables; eauto.
     instantiate (1 := cenv_for_gen (addr_taken_stmt f.(fn_body)) (fn_params f ++ fn_vars f)).
-    intros. eapply cenv_for_gen_by_value; eauto. rewrite VSF.mem_iff. eexact H5.
-    intros. eapply cenv_for_gen_domain. rewrite VSF.mem_iff. eexact H4.
+    intros. eapply cenv_for_gen_by_value; eauto. rewrite VSF.mem_iff. eexact H4.
+    intros. eapply cenv_for_gen_domain. rewrite VSF.mem_iff. eexact H3.
   intros [j' [te [tm0 [A [B [C [D [E F]]]]]]]].
   exploit store_params_correct.
     eauto. 
@@ -2212,7 +2064,7 @@ Proof.
     eexact B. eexact C.
     intros. apply (create_undef_temps_lifted id f). auto.
     intros. destruct (create_undef_temps (fn_temps f))!id as [v|] eqn:?; auto.
-    exploit create_undef_temps_inv; eauto. intros [P Q]. elim (H3 id id); auto.
+    exploit create_undef_temps_inv; eauto. intros [P Q]. elim (l id id); auto.
   intros [tel [tm1 [P [Q [R [S T]]]]]].
   change (cenv_for_gen (addr_taken_stmt (fn_body f)) (fn_params f ++ fn_vars f))
     with (cenv_for f) in *.
diff --git a/checklink/Asm_printers.ml b/checklink/Asm_printers.ml
index aeb4b3f..00b6e2e 100644
--- a/checklink/Asm_printers.ml
+++ b/checklink/Asm_printers.ml
@@ -221,6 +221,7 @@ let string_of_init_data = function
 | Init_int8(i)    -> "Init_int8("    ^ string_of_int (z_int_lax i)  ^ ")"
 | Init_int16(i)   -> "Init_int16("   ^ string_of_int (z_int_lax i)  ^ ")"
 | Init_int32(i)   -> "Init_int32("   ^ string_of_int32i (z_int32 i) ^ ")"
+| Init_int64(i)   -> "Init_int64("   ^ string_of_int64i (z_int64 i) ^ ")"
 | Init_float32(f) -> "Init_float32(" ^ string_of_ffloat f           ^ ")"
 | Init_float64(f) -> "Init_float64(" ^ string_of_ffloat f           ^ ")"
 | Init_space(z)   -> "Init_space("   ^ string_of_int (z_int z)      ^ ")"
diff --git a/checklink/Check.ml b/checklink/Check.ml
index 5821953..73a7310 100644
--- a/checklink/Check.ml
+++ b/checklink/Check.ml
@@ -2696,6 +2696,14 @@ let compare_data (l: init_data list) (bs: bitstring) (sfw: s_framework)
               else ERR("Wrong float64")
           | { _ } -> error
         )
+        | Init_int64(i) -> (
+          bitmatch bs with
+          | { j : 64 : int; bs : -1 : bitstring } ->
+              if z_int64 i = j
+              then compare_data_aux l bs (s + 8) sfw
+              else ERR("Wrong int64")
+          | { _ } -> error
+        )
         | Init_space(z) -> (
           let space_size = z_int z in
           bitmatch bs with
diff --git a/checklink/Library.ml b/checklink/Library.ml
index acea80a..69a0d6e 100644
--- a/checklink/Library.ml
+++ b/checklink/Library.ml
@@ -110,6 +110,8 @@ let z_int z = Safe32.to_int (z_int32 z)
 
 let z_int_lax z = Safe32.to_int (z_int32_lax z)
 
+let z_int64 = Camlcoq.Z.to_int64
+
 (* Some more printers *)
 
 let string_of_ffloat f = string_of_float (camlfloat_of_coqfloat f)
@@ -144,6 +146,7 @@ let string_of_int32 = Printf.sprintf "0x%08lx"
 let string_of_int64 = Printf.sprintf "0x%08Lx"
 (* To print counts/indices *)
 let string_of_int32i = Int32.to_string
+let string_of_int64i = Int64.to_string
 
 let string_of_positive p = string_of_int32i (positive_int32 p)
 
diff --git a/common/AST.v b/common/AST.v
index 8595b95..1f7f7d3 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -17,6 +17,7 @@
   the abstract syntax trees of many of the intermediate languages. *)
 
 Require Import Coqlib.
+Require String.
 Require Import Errors.
 Require Import Integers.
 Require Import Floats.
@@ -32,16 +33,19 @@ Definition ident := positive.
 
 Definition ident_eq := peq.
 
-(** The intermediate languages are weakly typed, using only two types:
-  [Tint] for integers and pointers, and [Tfloat] for floating-point
-  numbers. *)
+Parameter ident_of_string : String.string -> ident.
+
+(** The intermediate languages are weakly typed, using only three
+  types: [Tint] for 32-bit integers and pointers, [Tfloat] for 64-bit
+  floating-point numbers, [Tlong] for 64-bit integers. *)
 
 Inductive typ : Type :=
-  | Tint : typ
-  | Tfloat : typ.
+  | Tint
+  | Tfloat
+  | Tlong.
 
 Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 4 | Tfloat => 8 end.
+  match ty with Tint => 4 | Tfloat => 8 | Tlong => 8 end.
 
 Lemma typesize_pos: forall ty, typesize ty > 0.
 Proof. destruct ty; simpl; omega. Qed.
@@ -54,6 +58,9 @@ Lemma opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}.
 Proof. decide equality. apply typ_eq. Defined.
 Global Opaque opt_typ_eq.
 
+Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2}
+                     := list_eq_dec typ_eq.
+
 (** Additionally, function definitions and function calls are annotated
   by function signatures indicating the number and types of arguments,
   as well as the type of the returned value if any.  These signatures
@@ -76,14 +83,15 @@ Definition proj_sig_res (s: signature) : typ :=
   chunk of memory being accessed. *)
 
 Inductive memory_chunk : Type :=
-  | Mint8signed : memory_chunk     (**r 8-bit signed integer *)
-  | Mint8unsigned : memory_chunk   (**r 8-bit unsigned integer *)
-  | Mint16signed : memory_chunk    (**r 16-bit signed integer *)
-  | Mint16unsigned : memory_chunk  (**r 16-bit unsigned integer *)
-  | Mint32 : memory_chunk          (**r 32-bit integer, or pointer *)
-  | Mfloat32 : memory_chunk        (**r 32-bit single-precision float *)
-  | Mfloat64 : memory_chunk        (**r 64-bit double-precision float *)
-  | Mfloat64al32 : memory_chunk.   (**r 64-bit double-precision float, 4-aligned *)
+  | Mint8signed     (**r 8-bit signed integer *)
+  | Mint8unsigned   (**r 8-bit unsigned integer *)
+  | Mint16signed    (**r 16-bit signed integer *)
+  | Mint16unsigned  (**r 16-bit unsigned integer *)
+  | Mint32          (**r 32-bit integer, or pointer *)
+  | Mint64          (**r 64-bit integer *)
+  | Mfloat32        (**r 32-bit single-precision float *)
+  | Mfloat64        (**r 64-bit double-precision float *)
+  | Mfloat64al32.   (**r 64-bit double-precision float, 4-aligned *)
 
 (** The type (integer/pointer or float) of a chunk. *)
 
@@ -94,6 +102,7 @@ Definition type_of_chunk (c: memory_chunk) : typ :=
   | Mint16signed => Tint
   | Mint16unsigned => Tint
   | Mint32 => Tint
+  | Mint64 => Tlong
   | Mfloat32 => Tfloat
   | Mfloat64 => Tfloat
   | Mfloat64al32 => Tfloat
@@ -105,6 +114,7 @@ Inductive init_data: Type :=
   | Init_int8: int -> init_data
   | Init_int16: int -> init_data
   | Init_int32: int -> init_data
+  | Init_int64: int64 -> init_data
   | Init_float32: float -> init_data
   | Init_float64: float -> init_data
   | Init_space: Z -> init_data
@@ -549,4 +559,3 @@ Definition transf_partial_fundef (fd: fundef A): res (fundef B) :=
   end.
 
 End TRANSF_PARTIAL_FUNDEF.
-
diff --git a/common/Errors.v b/common/Errors.v
index 6b863a0..78e1199 100644
--- a/common/Errors.v
+++ b/common/Errors.v
@@ -96,30 +96,11 @@ Qed.
 
 (** Assertions *)
 
-Definition assertion (b: bool) : res unit :=
-  if b then OK tt else Error(msg "Assertion failed").
+Definition assertion_failed {A: Type} : res A := Error(msg "Assertion failed").
 
-Remark assertion_inversion:
-  forall b x, assertion b = OK x -> b = true.
-Proof.
-  unfold assertion; intros. destruct b; inv H; auto.
-Qed.
-
-Remark assertion_inversion_1:
-  forall (P Q: Prop) (a: {P}+{Q}) x,
-  assertion (proj_sumbool a) = OK x -> P.
-Proof.
-  intros. exploit assertion_inversion; eauto. 
-  unfold proj_sumbool. destruct a. auto. congruence.
-Qed.
-
-Remark assertion_inversion_2:
-  forall (P Q: Prop) (a: {P}+{Q}) x,
-  assertion (negb(proj_sumbool a)) = OK x -> Q.
-Proof.
-  intros. exploit assertion_inversion; eauto. 
-  unfold proj_sumbool. destruct a; simpl. congruence. auto.
-Qed.
+Notation "'assertion' A ; B" := (if A then B else assertion_failed)
+  (at level 200, A at level 100, B at level 200)
+  : error_monad_scope.
 
 (** This is the familiar monadic map iterator. *)
 
@@ -180,26 +161,19 @@ Ltac monadInv1 H :=
       destruct (bind2_inversion F G H) as [x1 [x2 [EQ1 EQ2]]];
       clear H;
       try (monadInv1 EQ2)))))
-  | (assertion (negb (proj_sumbool ?a)) = OK ?X) =>
-      let A := fresh "A" in (generalize (assertion_inversion_2 _ H); intro A);
-      clear H
-  | (assertion (proj_sumbool ?a) = OK ?X) =>
-      let A := fresh "A" in (generalize (assertion_inversion_1 _ H); intro A);
-      clear H
-  | (assertion ?b = OK ?X) =>
-      let A := fresh "A" in (generalize (assertion_inversion _ H); intro A);
-      clear H
+  | (match ?X with left _ => _ | right _ => assertion_failed end = OK _) =>
+      destruct X; [try (monadInv1 H) | discriminate]
+  | (match (negb ?X) with true => _ | false => assertion_failed end = OK _) =>
+      destruct X as [] eqn:?; [discriminate | try (monadInv1 H)]
+  | (match ?X with true => _ | false => assertion_failed end = OK _) =>
+      destruct X as [] eqn:?; [try (monadInv1 H) | discriminate]
   | (mmap ?F ?L = OK ?M) =>
       generalize (mmap_inversion F L H); intro
   end.
 
 Ltac monadInv H :=
+  monadInv1 H ||
   match type of H with
-  | (OK _ = OK _) => monadInv1 H
-  | (Error _ = OK _) => monadInv1 H
-  | (bind ?F ?G = OK ?X) => monadInv1 H
-  | (bind2 ?F ?G = OK ?X) => monadInv1 H
-  | (assertion _ = OK _) => monadInv1 H
   | (?F _ _ _ _ _ _ _ _ = OK _) => 
       ((progress simpl in H) || unfold F in H); monadInv1 H
   | (?F _ _ _ _ _ _ _ = OK _) => 
diff --git a/common/Events.v b/common/Events.v
index e310bfe..f342799 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -55,6 +55,7 @@ Require Import Globalenvs.
 
 Inductive eventval: Type :=
   | EVint: int -> eventval
+  | EVlong: int64 -> eventval
   | EVfloat: float -> eventval
   | EVptr_global: ident -> int -> eventval.
 
@@ -267,6 +268,8 @@ Variable ge: Genv.t F V.
 Inductive eventval_match: eventval -> typ -> val -> Prop :=
   | ev_match_int: forall i,
       eventval_match (EVint i) Tint (Vint i)
+  | ev_match_long: forall i,
+      eventval_match (EVlong i) Tlong (Vlong i)
   | ev_match_float: forall f,
       eventval_match (EVfloat f) Tfloat (Vfloat f)
   | ev_match_ptr: forall id b ofs,
@@ -327,7 +330,7 @@ Lemma eventval_match_inject:
   forall ev ty v1 v2,
   eventval_match ev ty v1 -> val_inject f v1 v2 -> eventval_match ev ty v2.
 Proof.
-  intros. inv H; inv H0. constructor. constructor.
+  intros. inv H; inv H0; try constructor.
   destruct glob_pres as [A [B C]].
   exploit A; eauto. intro EQ; rewrite H3 in EQ; inv EQ.
   rewrite Int.add_zero. econstructor; eauto. 
@@ -337,7 +340,7 @@ Lemma eventval_match_inject_2:
   forall ev ty v,
   eventval_match ev ty v -> val_inject f v v.
 Proof.
-  induction 1. constructor. constructor.
+  induction 1; auto.
   destruct glob_pres as [A [B C]].
   exploit A; eauto. intro EQ.
   econstructor; eauto. rewrite Int.add_zero; auto.
@@ -379,6 +382,7 @@ Qed.
 Definition eventval_valid (ev: eventval) : Prop :=
   match ev with
   | EVint _ => True
+  | EVlong _ => True
   | EVfloat _ => True
   | EVptr_global id ofs => exists b, Genv.find_symbol ge id = Some b
   end.
@@ -386,6 +390,7 @@ Definition eventval_valid (ev: eventval) : Prop :=
 Definition eventval_type (ev: eventval) : typ :=
   match ev with
   | EVint _ => Tint
+  | EVlong _ => Tlong
   | EVfloat _ => Tfloat
   | EVptr_global id ofs => Tint
   end.
@@ -396,6 +401,7 @@ Lemma eventval_valid_match:
 Proof.
   intros. subst ty. destruct ev; simpl in *.
   exists (Vint i); constructor.
+  exists (Vlong i); constructor.
   exists (Vfloat f0); constructor.
   destruct H as [b A]. exists (Vptr b i0); constructor; auto.
 Qed.
@@ -1693,3 +1699,190 @@ Proof.
   intros. exploit external_call_determ. eexact H. eexact H0. intuition.
 Qed.
 
+(** Late in the back-end, calling conventions for external calls change:
+  arguments and results of type [Tlong] are passed as two integers.
+  We now wrap [external_call] to adapt to this convention. *)
+
+Fixpoint decode_longs (tyl: list typ) (vl: list val) : list val :=
+  match tyl with
+  | nil => nil
+  | Tlong :: tys =>
+      match vl with
+      | v1 :: v2 :: vs => Val.longofwords v1 v2 :: decode_longs tys vs
+      | _ => nil
+      end
+  | ty :: tys =>
+      match vl with
+      | v1 :: vs => v1 :: decode_longs tys vs
+      | _ => nil
+      end
+  end.
+
+Definition encode_long (oty: option typ) (v: val) : list val :=
+  match oty with
+  | Some Tlong => Val.hiword v :: Val.loword v :: nil
+  | _ => v :: nil
+  end.
+
+Definition proj_sig_res' (s: signature) : list typ :=
+  match s.(sig_res) with
+  | Some Tlong => Tint :: Tint :: nil
+  | Some ty => ty :: nil
+  | None => Tint :: nil
+  end.
+
+Inductive external_call'
+      (ef: external_function) (F V: Type) (ge: Genv.t F V)
+      (vargs: list val) (m1: mem) (t: trace) (vres: list val) (m2: mem) : Prop :=
+  external_call'_intro: forall v,
+    external_call ef ge (decode_longs (sig_args (ef_sig ef)) vargs) m1 t v m2 ->
+    vres = encode_long (sig_res (ef_sig ef)) v ->
+    external_call' ef ge vargs m1 t vres m2.
+
+Lemma decode_longs_lessdef:
+  forall tyl vl1 vl2, Val.lessdef_list vl1 vl2 -> Val.lessdef_list (decode_longs tyl vl1) (decode_longs tyl vl2).
+Proof.
+  induction tyl; simpl; intros. 
+  auto.
+  destruct a; inv H; auto. inv H1; auto. constructor; auto. apply Val.longofwords_lessdef; auto. 
+Qed.
+
+Lemma decode_longs_inject:
+  forall f tyl vl1 vl2, val_list_inject f vl1 vl2 -> val_list_inject f (decode_longs tyl vl1) (decode_longs tyl vl2).
+Proof.
+  induction tyl; simpl; intros. 
+  auto.
+  destruct a; inv H; auto. inv H1; auto. constructor; auto. apply val_longofwords_inject; auto. 
+Qed.
+
+Lemma encode_long_lessdef:
+  forall oty v1 v2, Val.lessdef v1 v2 -> Val.lessdef_list (encode_long oty v1) (encode_long oty v2).
+Proof.
+  intros. destruct oty as [[]|]; simpl; auto. 
+  constructor. apply Val.hiword_lessdef; auto. constructor. apply Val.loword_lessdef; auto. auto.
+Qed.
+
+Lemma encode_long_inject:
+  forall f oty v1 v2, val_inject f v1 v2 -> val_list_inject f (encode_long oty v1) (encode_long oty v2).
+Proof.
+  intros. destruct oty as [[]|]; simpl; auto. 
+  constructor. apply val_hiword_inject; auto. constructor. apply val_loword_inject; auto. auto.
+Qed.
+
+Lemma encode_long_has_type:
+  forall v sg,
+  Val.has_type v (proj_sig_res sg) ->
+  Val.has_type_list (encode_long (sig_res sg) v) (proj_sig_res' sg).
+Proof.
+  unfold proj_sig_res, proj_sig_res', encode_long; intros.
+  destruct (sig_res sg) as [[] | ]; simpl; auto.
+  destruct v; simpl; auto.
+Qed.
+
+Lemma external_call_well_typed':
+  forall ef (F V : Type) (ge : Genv.t F V) vargs m1 t vres m2,
+  external_call' ef ge vargs m1 t vres m2 ->
+  Val.has_type_list vres (proj_sig_res' (ef_sig ef)).
+Proof.
+  intros. inv H. apply encode_long_has_type. 
+  eapply external_call_well_typed; eauto. 
+Qed.
+
+Lemma external_call_symbols_preserved':
+  forall ef F1 F2 V (ge1: Genv.t F1 V) (ge2: Genv.t F2 V) vargs m1 t vres m2,
+  external_call' ef ge1 vargs m1 t vres m2 ->
+  (forall id, Genv.find_symbol ge2 id = Genv.find_symbol ge1 id) ->
+  (forall b, Genv.find_var_info ge2 b = Genv.find_var_info ge1 b) ->
+  external_call' ef ge2 vargs m1 t vres m2.
+Proof.
+  intros. inv H. exists v; auto. eapply external_call_symbols_preserved; eauto.
+Qed.
+
+Lemma external_call_valid_block':
+  forall ef (F V : Type) (ge : Genv.t F V) vargs m1 t vres m2 b,
+  external_call' ef ge vargs m1 t vres m2 ->
+  Mem.valid_block m1 b -> Mem.valid_block m2 b.
+Proof.
+  intros. inv H. eapply external_call_valid_block; eauto.
+Qed.
+
+Lemma external_call_mem_extends':
+  forall ef (F V : Type) (ge : Genv.t F V) vargs m1 t vres m2 m1' vargs',
+  external_call' ef ge vargs m1 t vres m2 ->
+  Mem.extends m1 m1' ->
+  Val.lessdef_list vargs vargs' ->
+  exists vres' m2',
+     external_call' ef ge vargs' m1' t vres' m2'
+  /\ Val.lessdef_list vres vres'
+  /\ Mem.extends m2 m2'
+  /\ mem_unchanged_on (loc_out_of_bounds m1) m1' m2'.
+Proof.
+  intros. inv H. 
+  exploit external_call_mem_extends; eauto.
+  eapply decode_longs_lessdef; eauto. 
+  intros (v' & m2' & A & B & C & D).
+  exists (encode_long (sig_res (ef_sig ef)) v'); exists m2'; intuition.
+  econstructor; eauto.
+  eapply encode_long_lessdef; eauto.
+Qed.
+
+Lemma external_call_mem_inject':
+  forall ef F V (ge: Genv.t F V) vargs m1 t vres m2 f m1' vargs',
+  meminj_preserves_globals ge f ->
+  external_call' ef ge vargs m1 t vres m2 ->
+  Mem.inject f m1 m1' ->
+  val_list_inject f vargs vargs' ->
+  exists f' vres' m2',
+     external_call' ef ge vargs' m1' t vres' m2'
+  /\ val_list_inject f' vres vres'
+  /\ Mem.inject f' m2 m2'
+  /\ mem_unchanged_on (loc_unmapped f) m1 m2
+  /\ mem_unchanged_on (loc_out_of_reach f m1) m1' m2'
+  /\ inject_incr f f'
+  /\ inject_separated f f' m1 m1'.
+Proof.
+  intros. inv H0. 
+  exploit external_call_mem_inject; eauto.
+  eapply decode_longs_inject; eauto.
+  intros (f' & v' & m2' & A & B & C & D & E & P & Q).
+  exists f'; exists (encode_long (sig_res (ef_sig ef)) v'); exists m2'; intuition.
+  econstructor; eauto.
+  apply encode_long_inject; auto.
+Qed.
+
+Lemma external_call_determ':
+  forall ef (F V : Type) (ge : Genv.t F V) vargs m t1 vres1 m1 t2 vres2 m2,
+  external_call' ef ge vargs m t1 vres1 m1 ->
+  external_call' ef ge vargs m t2 vres2 m2 ->
+  match_traces ge t1 t2 /\ (t1 = t2 -> vres1 = vres2 /\ m1 = m2).
+Proof.
+  intros. inv H; inv H0. exploit external_call_determ. eexact H1. eexact H. 
+  intros [A B]. split. auto. intros. destruct B as [C D]; auto. subst. auto. 
+Qed.
+
+Lemma external_call_match_traces':
+  forall ef (F V : Type) (ge : Genv.t F V) vargs m t1 vres1 m1 t2 vres2 m2,
+  external_call' ef ge vargs m t1 vres1 m1 ->
+  external_call' ef ge vargs m t2 vres2 m2 ->
+  match_traces ge t1 t2.
+Proof.
+  intros. inv H; inv H0. eapply external_call_match_traces; eauto.
+Qed.
+
+Lemma external_call_deterministic':
+  forall ef (F V : Type) (ge : Genv.t F V) vargs m t vres1 m1 vres2 m2,
+  external_call' ef ge vargs m t vres1 m1 ->
+  external_call' ef ge vargs m t vres2 m2 ->
+  vres1 = vres2 /\ m1 = m2.
+Proof.
+  intros. inv H; inv H0. 
+  exploit external_call_deterministic. eexact H1. eexact H. intros [A B].
+  split; congruence.
+Qed.
+
+
+
+
+
+
+
diff --git a/common/Globalenvs.v b/common/Globalenvs.v
index 3d9f499..a082819 100644
--- a/common/Globalenvs.v
+++ b/common/Globalenvs.v
@@ -491,6 +491,7 @@ Definition init_data_size (i: init_data) : Z :=
   | Init_int8 _ => 1
   | Init_int16 _ => 2
   | Init_int32 _ => 4
+  | Init_int64 _ => 8
   | Init_float32 _ => 4
   | Init_float64 _ => 8
   | Init_addrof _ _ => 4
@@ -508,6 +509,7 @@ Definition store_init_data (m: mem) (b: block) (p: Z) (id: init_data) : option m
   | Init_int8 n => Mem.store Mint8unsigned m b p (Vint n)
   | Init_int16 n => Mem.store Mint16unsigned m b p (Vint n)
   | Init_int32 n => Mem.store Mint32 m b p (Vint n)
+  | Init_int64 n => Mem.store Mint64 m b p (Vlong n)
   | Init_float32 n => Mem.store Mfloat32 m b p (Vfloat n)
   | Init_float64 n => Mem.store Mfloat64 m b p (Vfloat n)
   | Init_addrof symb ofs =>
@@ -761,6 +763,9 @@ Fixpoint load_store_init_data (m: mem) (b: block) (p: Z) (il: list init_data) {s
   | Init_int32 n :: il' =>
       Mem.load Mint32 m b p = Some(Vint n)
       /\ load_store_init_data m b (p + 4) il'
+  | Init_int64 n :: il' =>
+      Mem.load Mint64 m b p = Some(Vlong n)
+      /\ load_store_init_data m b (p + 8) il'
   | Init_float32 n :: il' =>
       Mem.load Mfloat32 m b p = Some(Vfloat(Float.singleoffloat n))
       /\ load_store_init_data m b (p + 4) il'
@@ -795,6 +800,7 @@ Proof.
   eapply (A Mint8unsigned (Vint i)); eauto.
   eapply (A Mint16unsigned (Vint i)); eauto.
   eapply (A Mint32 (Vint i)); eauto.
+  eapply (A Mint64 (Vlong i)); eauto.
   eapply (A Mfloat32 (Vfloat f)); eauto.
   eapply (A Mfloat64 (Vfloat f)); eauto.
   destruct (find_symbol ge i); try congruence. exists b0; split; auto. 
diff --git a/common/Memdata.v b/common/Memdata.v
index 3de5f39..c62ba99 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -35,6 +35,7 @@ Definition size_chunk (chunk: memory_chunk) : Z :=
   | Mint16signed => 2
   | Mint16unsigned => 2
   | Mint32 => 4
+  | Mint64 => 8
   | Mfloat32 => 4
   | Mfloat64 => 8
   | Mfloat64al32 => 8
@@ -82,6 +83,7 @@ Definition align_chunk (chunk: memory_chunk) : Z :=
   | Mint16signed => 2
   | Mint16unsigned => 2
   | Mint32 => 4
+  | Mint64 => 8
   | Mfloat32 => 4
   | Mfloat64 => 8
   | Mfloat64al32 => 4
@@ -96,7 +98,7 @@ Qed.
 Lemma align_size_chunk_divides:
   forall chunk, (align_chunk chunk | size_chunk chunk).
 Proof.
-  intros. destruct chunk; simpl; try apply Zdivide_refl. exists 2; auto. 
+  intros. destruct chunk; simpl; try apply Zdivide_refl; exists 2; auto. 
 Qed.
 
 Lemma align_le_divides:
@@ -340,6 +342,7 @@ Definition encode_val (chunk: memory_chunk) (v: val) : list memval :=
   | Vint n, (Mint16signed | Mint16unsigned) => inj_bytes (encode_int 2%nat (Int.unsigned n))
   | Vint n, Mint32 => inj_bytes (encode_int 4%nat (Int.unsigned n))
   | Vptr b ofs, Mint32 => inj_pointer 4%nat b ofs
+  | Vlong n, Mint64 => inj_bytes (encode_int 8%nat (Int64.unsigned n))
   | Vfloat n, Mfloat32 => inj_bytes (encode_int 4%nat (Int.unsigned (Float.bits_of_single n)))
   | Vfloat n, (Mfloat64 | Mfloat64al32) => inj_bytes (encode_int 8%nat (Int64.unsigned (Float.bits_of_double n)))
   | _, _ => list_repeat (size_chunk_nat chunk) Undef
@@ -354,6 +357,7 @@ Definition decode_val (chunk: memory_chunk) (vl: list memval) : val :=
       | Mint16signed => Vint(Int.sign_ext 16 (Int.repr (decode_int bl)))
       | Mint16unsigned => Vint(Int.zero_ext 16 (Int.repr (decode_int bl)))
       | Mint32 => Vint(Int.repr(decode_int bl))
+      | Mint64 => Vlong(Int64.repr(decode_int bl))
       | Mfloat32 => Vfloat(Float.single_of_bits (Int.repr (decode_int bl)))
       | Mfloat64 | Mfloat64al32 => Vfloat(Float.double_of_bits (Int64.repr (decode_int bl)))
       end
@@ -394,14 +398,19 @@ Definition decode_encode_val (v1: val) (chunk1 chunk2: memory_chunk) (v2: val) :
   | Vint n, Mint16unsigned, Mint16unsigned => v2 = Vint(Int.zero_ext 16 n)
   | Vint n, Mint32, Mint32 => v2 = Vint n
   | Vint n, Mint32, Mfloat32 => v2 = Vfloat(Float.single_of_bits n)
-  | Vint n, (Mfloat32 | Mfloat64 | Mfloat64al32), _ => v2 = Vundef
+  | Vint n, (Mint64 | Mfloat32 | Mfloat64 | Mfloat64al32), _ => v2 = Vundef
   | Vint n, _, _ => True (**r nothing meaningful to say about v2 *)
   | Vptr b ofs, Mint32, Mint32 => v2 = Vptr b ofs
   | Vptr b ofs, _, _ => v2 = Vundef
+  | Vlong n, Mint64, Mint64 => v2 = Vlong n
+  | Vlong n, Mint64, (Mfloat64 | Mfloat64al32) => v2 = Vfloat(Float.double_of_bits n)
+  | Vlong n, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mfloat32|Mfloat64|Mfloat64al32), _ => v2 = Vundef
+  | Vlong n, _, _ => True (**r nothing meaningful to say about v2 *)
   | Vfloat f, Mfloat32, Mfloat32 => v2 = Vfloat(Float.singleoffloat f)
   | Vfloat f, Mfloat32, Mint32 => v2 = Vint(Float.bits_of_single f)
   | Vfloat f, (Mfloat64 | Mfloat64al32), (Mfloat64 | Mfloat64al32) => v2 = Vfloat f
-  | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32), _ => v2 = Vundef
+  | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mint64), _ => v2 = Vundef
+  | Vfloat f, (Mfloat64 | Mfloat64al32), Mint64 => v2 = Vlong(Float.bits_of_double f)
   | Vfloat f, _, _ => True   (* nothing interesting to say about v2 *)
   end.
 
@@ -419,7 +428,6 @@ Opaque inj_pointer.
   intros.
   destruct v; destruct chunk1; simpl; try (apply decode_val_undef);
   destruct chunk2; unfold decode_val; auto; try (rewrite proj_inj_bytes).
-  (* int-int *)
   rewrite decode_encode_int_1. decEq. apply Int.sign_ext_zero_ext. omega.
   rewrite decode_encode_int_1. decEq. apply Int.zero_ext_idem. omega.
   rewrite decode_encode_int_1. decEq. apply Int.sign_ext_zero_ext. omega.
@@ -430,10 +438,15 @@ Opaque inj_pointer.
   rewrite decode_encode_int_2. decEq. apply Int.zero_ext_idem. omega.
   rewrite decode_encode_int_4. auto.
   rewrite decode_encode_int_4. auto.
+  rewrite decode_encode_int_8. auto.
+  rewrite decode_encode_int_8. auto.
+  rewrite decode_encode_int_8. auto.
   rewrite decode_encode_int_4. auto.
-  rewrite decode_encode_int_4. decEq. apply Float.single_of_bits_of_single. 
+  rewrite decode_encode_int_4. decEq. apply Float.single_of_bits_of_single.
+  rewrite decode_encode_int_8. auto.
   rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
   rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
+  rewrite decode_encode_int_8. auto.
   rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
   rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
   change (proj_bytes (inj_pointer 4 b i)) with (@None (list byte)). simpl. 
@@ -815,6 +828,7 @@ Proof.
   intros. inv H; simpl.
   destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self.
   destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self.
+  destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self.
   destruct chunk; try (apply repeat_Undef_inject_self). 
   repeat econstructor; eauto. 
   replace (size_chunk_nat chunk) with (length (encode_val chunk v2)).
@@ -845,3 +859,143 @@ Proof.
   constructor.
 Qed. 
 
+(** * Breaking 64-bit memory accesses into two 32-bit accesses *)
+
+Lemma int_of_bytes_append:
+  forall l2 l1, 
+  int_of_bytes (l1 ++ l2) = int_of_bytes l1 + int_of_bytes l2 * two_p (Z_of_nat (length l1) * 8).
+Proof.
+  induction l1; simpl int_of_bytes; intros.
+  simpl. ring.
+  simpl length. rewrite inj_S. 
+  replace (Z.succ (Z.of_nat (length l1)) * 8) with (Z_of_nat (length l1) * 8 + 8) by omega.
+  rewrite two_p_is_exp. change (two_p 8) with 256. rewrite IHl1. ring.
+  omega. omega.
+Qed.
+
+Lemma int_of_bytes_range:
+  forall l, 0 <= int_of_bytes l < two_p (Z_of_nat (length l) * 8).
+Proof.
+  induction l; intros. 
+  simpl. omega.
+  simpl length. rewrite inj_S. 
+  replace (Z.succ (Z.of_nat (length l)) * 8) with (Z.of_nat (length l) * 8 + 8) by omega.
+  rewrite two_p_is_exp. change (two_p 8) with 256. 
+  simpl int_of_bytes. generalize (Byte.unsigned_range a). 
+  change Byte.modulus with 256. omega. 
+  omega. omega. 
+Qed.
+
+Lemma length_proj_bytes:
+  forall l b, proj_bytes l = Some b -> length b = length l.
+Proof.
+  induction l; simpl; intros. 
+  inv H; auto.
+  destruct a; try discriminate. 
+  destruct (proj_bytes l) eqn:E; inv H. 
+  simpl. f_equal. auto.
+Qed.
+
+Lemma proj_bytes_append:
+  forall l2 l1,
+  proj_bytes (l1 ++ l2) =
+  match proj_bytes l1, proj_bytes l2 with
+  | Some b1, Some b2 => Some (b1 ++ b2)
+  | _, _ => None
+  end.
+Proof.
+  induction l1; simpl.
+  destruct (proj_bytes l2); auto.
+  destruct a; auto. rewrite IHl1. 
+  destruct (proj_bytes l1); auto. destruct (proj_bytes l2); auto. 
+Qed.
+
+Lemma decode_val_int64:
+  forall l1 l2,
+  length l1 = 4%nat -> length l2 = 4%nat ->
+  decode_val Mint64 (l1 ++ l2) =
+  Val.longofwords (decode_val Mint32 (if big_endian then l1 else l2))
+                  (decode_val Mint32 (if big_endian then l2 else l1)).
+Proof.
+  intros. unfold decode_val.
+  assert (PP: forall vl, match proj_pointer vl with Vundef => True | Vptr _ _ => True | _ => False end).
+    intros. unfold proj_pointer. destruct vl; auto. destruct m; auto. 
+    destruct (check_pointer 4 b i (Pointer b i n :: vl)); auto. 
+  assert (PP1: forall vl v2, Val.longofwords (proj_pointer vl) v2 = Vundef).
+    intros. generalize (PP vl). destruct (proj_pointer vl); reflexivity || contradiction.
+  assert (PP2: forall v1 vl, Val.longofwords v1 (proj_pointer vl) = Vundef).
+    intros. destruct v1; simpl; auto. 
+    generalize (PP vl). destruct (proj_pointer vl); reflexivity || contradiction.
+  rewrite proj_bytes_append. 
+  destruct (proj_bytes l1) as [b1|] eqn:B1; destruct (proj_bytes l2) as [b2|] eqn:B2.
+- exploit length_proj_bytes. eexact B1. rewrite H; intro L1.
+  exploit length_proj_bytes. eexact B2. rewrite H0; intro L2.
+  assert (UR: forall l, length l = 4%nat -> Int.unsigned (Int.repr (int_of_bytes l)) = int_of_bytes l).
+    intros. apply Int.unsigned_repr. 
+    generalize (int_of_bytes_range l). rewrite H1. 
+    change (two_p (Z.of_nat 4 * 8)) with (Int.max_unsigned + 1). 
+    omega.
+  unfold decode_int, rev_if_be. destruct big_endian; rewrite B1; rewrite B2.
+  + rewrite <- (rev_length b1) in L1. 
+    rewrite <- (rev_length b2) in L2.
+    rewrite rev_app_distr.
+    set (b1' := rev b1) in *; set (b2' := rev b2) in *.
+    unfold Val.longofwords. f_equal. rewrite Int64.ofwords_add. f_equal.
+    rewrite !UR by auto. rewrite int_of_bytes_append. 
+    rewrite L2. change (Z.of_nat 4 * 8) with 32. ring.
+  + unfold Val.longofwords. f_equal. rewrite Int64.ofwords_add. f_equal.
+    rewrite !UR by auto. rewrite int_of_bytes_append. 
+    rewrite L1. change (Z.of_nat 4 * 8) with 32. ring.
+- destruct big_endian; rewrite B1; rewrite B2; auto.
+- destruct big_endian; rewrite B1; rewrite B2; auto.
+- destruct big_endian; rewrite B1; rewrite B2; auto.
+Qed.
+
+Lemma bytes_of_int_append:
+  forall n2 x2 n1 x1,
+  0 <= x1 < two_p (Z_of_nat n1 * 8) ->
+  bytes_of_int (n1 + n2) (x1 + x2 * two_p (Z_of_nat n1 * 8)) =
+  bytes_of_int n1 x1 ++ bytes_of_int n2 x2.
+Proof.
+  induction n1; intros.
+- simpl in *. f_equal. omega. 
+- assert (E: two_p (Z.of_nat (S n1) * 8) = two_p (Z.of_nat n1 * 8) * 256).
+  {
+    rewrite inj_S. change 256 with (two_p 8). rewrite <- two_p_is_exp. 
+    f_equal. omega. omega. omega. 
+  }
+  rewrite E in *. simpl. f_equal.
+  apply Byte.eqm_samerepr. exists (x2 * two_p (Z.of_nat n1 * 8)). 
+  change Byte.modulus with 256. ring.
+  rewrite Zmult_assoc. rewrite Z_div_plus. apply IHn1.
+  apply Zdiv_interval_1. omega. apply two_p_gt_ZERO; omega. omega. 
+  assumption. omega.
+Qed.
+
+Lemma bytes_of_int64:
+  forall i,
+  bytes_of_int 8 (Int64.unsigned i) =
+  bytes_of_int 4 (Int.unsigned (Int64.loword i)) ++ bytes_of_int 4 (Int.unsigned (Int64.hiword i)).
+Proof.
+  intros. transitivity (bytes_of_int (4 + 4) (Int64.unsigned (Int64.ofwords (Int64.hiword i) (Int64.loword i)))).
+  f_equal. f_equal. rewrite Int64.ofwords_recompose. auto.
+  rewrite Int64.ofwords_add'.
+  change 32 with (Z_of_nat 4 * 8). 
+  rewrite Zplus_comm. apply bytes_of_int_append. apply Int.unsigned_range. 
+Qed.
+
+Lemma encode_val_int64:
+  forall v,
+  encode_val Mint64 v =
+     encode_val Mint32 (if big_endian then Val.hiword v else Val.loword v)
+  ++ encode_val Mint32 (if big_endian then Val.loword v else Val.hiword v).
+Proof.
+  intros. destruct v; destruct big_endian eqn:BI; try reflexivity;
+  unfold Val.loword, Val.hiword, encode_val.
+  unfold inj_bytes. rewrite <- map_app. f_equal. 
+  unfold encode_int, rev_if_be. rewrite BI. rewrite <- rev_app_distr. f_equal.
+  apply bytes_of_int64. 
+  unfold inj_bytes. rewrite <- map_app. f_equal. 
+  unfold encode_int, rev_if_be. rewrite BI.
+  apply bytes_of_int64.
+Qed.
diff --git a/common/Memory.v b/common/Memory.v
index 12a0f45..54d50f7 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -862,6 +862,60 @@ Proof.
   rewrite inj_S. omega.
 Qed.
 
+Theorem load_int64_split:
+  forall m b ofs v,
+  load Mint64 m b ofs = Some v ->
+  exists v1 v2,
+     load Mint32 m b ofs = Some (if big_endian then v1 else v2)
+  /\ load Mint32 m b (ofs + 4) = Some (if big_endian then v2 else v1)
+  /\ v = Val.longofwords v1 v2.
+Proof.
+  intros. 
+  exploit load_valid_access; eauto. intros [A B]. simpl in *.
+  exploit load_loadbytes. eexact H. simpl. intros [bytes [LB EQ]].
+  change 8 with (4 + 4) in LB. 
+  exploit loadbytes_split. eexact LB. omega. omega. 
+  intros (bytes1 & bytes2 & LB1 & LB2 & APP).
+  change 4 with (size_chunk Mint32) in LB1.
+  exploit loadbytes_load. eexact LB1.
+  simpl. apply Zdivides_trans with 8; auto. exists 2; auto.
+  intros L1.
+  change 4 with (size_chunk Mint32) in LB2.
+  exploit loadbytes_load. eexact LB2.
+  simpl. apply Zdivide_plus_r. apply Zdivides_trans with 8; auto. exists 2; auto. exists 1; auto.
+  intros L2.
+  exists (decode_val Mint32 (if big_endian then bytes1 else bytes2));
+  exists (decode_val Mint32 (if big_endian then bytes2 else bytes1)).
+  split. destruct big_endian; auto.
+  split. destruct big_endian; auto.
+  rewrite EQ. rewrite APP. apply decode_val_int64.
+  erewrite loadbytes_length; eauto. reflexivity. 
+  erewrite loadbytes_length; eauto. reflexivity. 
+Qed.
+
+Theorem loadv_int64_split:
+  forall m a v,
+  loadv Mint64 m a = Some v ->
+  exists v1 v2,
+     loadv Mint32 m a = Some (if big_endian then v1 else v2)
+  /\ loadv  Mint32 m (Val.add a (Vint (Int.repr 4))) = Some (if big_endian then v2 else v1)
+  /\ v = Val.longofwords v1 v2.
+Proof.
+  intros. destruct a; simpl in H; try discriminate.
+  exploit load_int64_split; eauto. intros (v1 & v2 & L1 & L2 & EQ).
+  assert (NV: Int.unsigned (Int.add i (Int.repr 4)) = Int.unsigned i + 4).
+    rewrite Int.add_unsigned. apply Int.unsigned_repr.
+    exploit load_valid_access. eexact H. intros [P Q]. simpl in Q.
+    exploit (Zdivide_interval (Int.unsigned i) Int.modulus 8).
+    omega. apply Int.unsigned_range. auto. exists (two_p (32-3)); reflexivity. 
+    unfold Int.max_unsigned. omega.
+  exists v1; exists v2.
+Opaque Int.repr.
+  split. auto.
+  split. simpl. rewrite NV. auto. 
+  auto.
+Qed.
+
 (** ** Properties related to [store] *)
 
 Theorem valid_access_store:
@@ -1581,6 +1635,44 @@ Proof.
   exists m1; split; auto. 
 Qed.
 
+Theorem store_int64_split:
+  forall m b ofs v m',
+  store Mint64 m b ofs v = Some m' ->
+  exists m1,
+     store Mint32 m b ofs (if big_endian then Val.hiword v else Val.loword v) = Some m1
+  /\ store Mint32 m1 b (ofs + 4) (if big_endian then Val.loword v else Val.hiword v) = Some m'.
+Proof.
+  intros. 
+  exploit store_valid_access_3; eauto. intros [A B]. simpl in *.
+  exploit store_storebytes. eexact H. intros SB.
+  rewrite encode_val_int64 in SB. 
+  exploit storebytes_split. eexact SB. intros [m1 [SB1 SB2]]. 
+  rewrite encode_val_length in SB2. simpl in SB2. 
+  exists m1; split. 
+  apply storebytes_store. exact SB1.
+  simpl. apply Zdivides_trans with 8; auto. exists 2; auto.
+  apply storebytes_store. exact SB2. 
+  simpl. apply Zdivide_plus_r. apply Zdivides_trans with 8; auto. exists 2; auto. exists 1; auto.
+Qed.
+
+Theorem storev_int64_split:
+  forall m a v m',
+  storev Mint64 m a v = Some m' ->
+  exists m1,
+     storev Mint32 m a (if big_endian then Val.hiword v else Val.loword v) = Some m1
+  /\ storev Mint32 m1 (Val.add a (Vint (Int.repr 4))) (if big_endian then Val.loword v else Val.hiword v) = Some m'.
+Proof.
+  intros. destruct a; simpl in H; try discriminate.
+  exploit store_int64_split; eauto. intros [m1 [A B]].
+  exists m1; split.
+  exact A.
+  unfold storev, Val.add. rewrite Int.add_unsigned. rewrite Int.unsigned_repr. exact B.
+  exploit store_valid_access_3. eexact H. intros [P Q]. simpl in Q.
+  exploit (Zdivide_interval (Int.unsigned i) Int.modulus 8).
+    omega. apply Int.unsigned_range. auto. exists (two_p (32-3)); reflexivity. 
+  change (Int.unsigned (Int.repr 4)) with 4. unfold Int.max_unsigned. omega. 
+Qed.
+
 (** ** Properties related to [alloc]. *)
 
 Section ALLOC.
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index 7f2ed3f..c18b09d 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -16,7 +16,7 @@ open Printf
 open Camlcoq
 open AST
 
-let name_of_type = function Tint -> "int" | Tfloat -> "float"
+let name_of_type = function Tint -> "int" | Tfloat -> "float" | Tlong -> "long"
 
 let name_of_chunk = function
   | Mint8signed -> "int8signed"
@@ -24,6 +24,7 @@ let name_of_chunk = function
   | Mint16signed -> "int16signed"
   | Mint16unsigned -> "int16unsigned"
   | Mint32 -> "int32"
+  | Mint64 -> "int64"
   | Mfloat32 -> "float32"
   | Mfloat64 -> "float64"
   | Mfloat64al32 -> "float64al32"
diff --git a/common/Values.v b/common/Values.v
index f629628..f917e0b 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -36,6 +36,7 @@ Definition eq_block := zeq.
 Inductive val: Type :=
   | Vundef: val
   | Vint: int -> val
+  | Vlong: int64 -> val
   | Vfloat: float -> val
   | Vptr: block -> int -> val.
 
@@ -58,6 +59,7 @@ Definition has_type (v: val) (t: typ) : Prop :=
   match v, t with
   | Vundef, _ => True
   | Vint _, Tint => True
+  | Vlong _, Tlong => True
   | Vfloat _, Tfloat => True
   | Vptr _ _, Tint => True
   | _, _ => False
@@ -70,6 +72,12 @@ Fixpoint has_type_list (vl: list val) (tl: list typ) {struct vl} : Prop :=
   | _, _ => False
   end.
 
+Definition has_opttype (v: val) (ot: option typ) : Prop :=
+  match ot with
+  | None => v = Vundef
+  | Some t => has_type v t
+  end.
+
 (** Truth values.  Pointers and non-zero integers are treated as [True].
   The integer 0 (also used to represent the null pointer) is [False].
   [Vundef] and floats are neither true nor false. *)
@@ -127,12 +135,6 @@ Definition floatofintu (v: val) : option val :=
   | _ => None
   end.
 
-Definition floatofwords (v1 v2: val) : val :=
-  match v1, v2 with
-  | Vint n1, Vint n2 => Vfloat (Float.from_words n1 n2)
-  | _, _ => Vundef
-  end.
-
 Definition negint (v: val) : val :=
   match v with
   | Vint n => Vint (Int.neg n)
@@ -344,6 +346,183 @@ Definition divf (v1 v2: val): val :=
   | _, _ => Vundef
   end.
 
+Definition floatofwords (v1 v2: val) : val :=
+  match v1, v2 with
+  | Vint n1, Vint n2 => Vfloat (Float.from_words n1 n2)
+  | _, _ => Vundef
+  end.
+
+(** Operations on 64-bit integers *)
+
+Definition longofwords (v1 v2: val) : val :=
+  match v1, v2 with
+  | Vint n1, Vint n2 => Vlong (Int64.ofwords n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition loword (v: val) : val :=
+  match v with
+  | Vlong n  => Vint (Int64.loword n)
+  | _ => Vundef
+  end.
+
+Definition hiword (v: val) : val :=
+  match v with
+  | Vlong n  => Vint (Int64.hiword n)
+  | _ => Vundef
+  end.
+
+Definition negl (v: val) : val :=
+  match v with
+  | Vlong n => Vlong (Int64.neg n)
+  | _ => Vundef
+  end.
+
+Definition notl (v: val) : val :=
+  match v with
+  | Vlong n => Vlong (Int64.not n)
+  | _ => Vundef
+  end.
+
+Definition longofint (v: val) : val :=
+  match v with
+  | Vint n => Vlong (Int64.repr (Int.signed n))
+  | _ => Vundef
+  end.
+
+Definition longofintu (v: val) : val :=
+  match v with
+  | Vint n => Vlong (Int64.repr (Int.unsigned n))
+  | _ => Vundef
+  end.
+
+Definition longoffloat (v: val) : option val :=
+  match v with
+  | Vfloat f => option_map Vlong (Float.longoffloat f)
+  | _ => None
+  end.
+
+Definition longuoffloat (v: val) : option val :=
+  match v with
+  | Vfloat f => option_map Vlong (Float.longuoffloat f)
+  | _ => None
+  end.
+
+Definition floatoflong (v: val) : option val :=
+  match v with
+  | Vlong n => Some (Vfloat (Float.floatoflong n))
+  | _ => None
+  end.
+
+Definition floatoflongu (v: val) : option val :=
+  match v with
+  | Vlong n => Some (Vfloat (Float.floatoflongu n))
+  | _ => None
+  end.
+
+Definition addl (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Vlong(Int64.add n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition subl (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Vlong(Int64.sub n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition mull (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Vlong(Int64.mul n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition mull' (v1 v2: val): val :=
+  match v1, v2 with
+  | Vint n1, Vint n2 => Vlong(Int64.mul' n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition divls (v1 v2: val): option val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 =>
+      if Int64.eq n2 Int64.zero
+      || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone
+      then None
+      else Some(Vlong(Int64.divs n1 n2))
+  | _, _ => None
+  end.
+
+Definition modls (v1 v2: val): option val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 =>
+      if Int64.eq n2 Int64.zero
+      || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone
+      then None
+      else Some(Vlong(Int64.mods n1 n2))
+  | _, _ => None
+  end.
+
+Definition divlu (v1 v2: val): option val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 =>
+      if Int64.eq n2 Int64.zero then None else Some(Vlong(Int64.divu n1 n2))
+  | _, _ => None
+  end.
+
+Definition modlu (v1 v2: val): option val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 =>
+      if Int64.eq n2 Int64.zero then None else Some(Vlong(Int64.modu n1 n2))
+  | _, _ => None
+  end.
+
+Definition andl (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Vlong(Int64.and n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition orl (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Vlong(Int64.or n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition xorl (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Vlong(Int64.xor n1 n2)
+  | _, _ => Vundef
+  end.
+
+Definition shll (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vint n2 =>
+     if Int.ltu n2 Int64.iwordsize'
+     then Vlong(Int64.shl' n1 n2)
+     else Vundef
+  | _, _ => Vundef
+  end.
+
+Definition shrl (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vint n2 =>
+     if Int.ltu n2 Int64.iwordsize'
+     then Vlong(Int64.shr' n1 n2)
+     else Vundef
+  | _, _ => Vundef
+  end.
+
+Definition shrlu (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vint n2 =>
+     if Int.ltu n2 Int64.iwordsize'
+     then Vlong(Int64.shru' n1 n2)
+     else Vundef
+  | _, _ => Vundef
+  end.
+
 (** Comparisons *)
 
 Section COMPARISONS.
@@ -392,6 +571,18 @@ Definition cmpf_bool (c: comparison) (v1 v2: val): option bool :=
   | _, _ => None
   end.
 
+Definition cmpl_bool (c: comparison) (v1 v2: val): option bool :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Some (Int64.cmp c n1 n2)
+  | _, _ => None
+  end.
+
+Definition cmplu_bool (c: comparison) (v1 v2: val): option bool :=
+  match v1, v2 with
+  | Vlong n1, Vlong n2 => Some (Int64.cmpu c n1 n2)
+  | _, _ => None
+  end.
+
 Definition of_optbool (ob: option bool): val :=
   match ob with Some true => Vtrue | Some false => Vfalse | None => Vundef end.
 
@@ -404,6 +595,12 @@ Definition cmpu (c: comparison) (v1 v2: val): val :=
 Definition cmpf (c: comparison) (v1 v2: val): val :=
   of_optbool (cmpf_bool c v1 v2).
 
+Definition cmpl (c: comparison) (v1 v2: val): val :=
+  of_optbool (cmpl_bool c v1 v2).
+
+Definition cmplu (c: comparison) (v1 v2: val): val :=
+  of_optbool (cmplu_bool c v1 v2).
+
 End COMPARISONS.
 
 (** [load_result] is used in the memory model (library [Mem])
@@ -423,6 +620,7 @@ Definition load_result (chunk: memory_chunk) (v: val) :=
   | Mint16unsigned, Vint n => Vint (Int.zero_ext 16 n)
   | Mint32, Vint n => Vint n
   | Mint32, Vptr b ofs => Vptr b ofs
+  | Mint64, Vlong n => Vlong n
   | Mfloat32, Vfloat f => Vfloat(Float.singleoffloat f)
   | (Mfloat64 | Mfloat64al32), Vfloat f => Vfloat f
   | _, _ => Vundef
@@ -981,6 +1179,12 @@ Inductive lessdef: val -> val -> Prop :=
   | lessdef_refl: forall v, lessdef v v
   | lessdef_undef: forall v, lessdef Vundef v.
 
+Lemma lessdef_same:
+  forall v1 v2, v1 = v2 -> lessdef v1 v2.
+Proof.
+  intros. subst v2. constructor.
+Qed.
+
 Lemma lessdef_trans:
   forall v1 v2 v3, lessdef v1 v2 -> lessdef v2 v3 -> lessdef v1 v3.
 Proof.
@@ -1071,6 +1275,25 @@ Proof.
   intros. destruct ob; simpl; auto. rewrite (H b); auto. 
 Qed.
 
+Lemma longofwords_lessdef:
+  forall v1 v2 v1' v2',
+  lessdef v1 v1' -> lessdef v2 v2' -> lessdef (longofwords v1 v2) (longofwords v1' v2').
+Proof.
+  intros. unfold longofwords. inv H; auto. inv H0; auto. destruct v1'; auto.
+Qed.
+
+Lemma loword_lessdef:
+  forall v v', lessdef v v' -> lessdef (loword v) (loword v').
+Proof.
+  intros. inv H; auto. 
+Qed.
+
+Lemma hiword_lessdef:
+  forall v v', lessdef v v' -> lessdef (hiword v) (hiword v').
+Proof.
+  intros. inv H; auto. 
+Qed.
+
 End Val.
 
 (** * Values and memory injections *)
@@ -1093,6 +1316,8 @@ Definition meminj : Type := block -> option (block * Z).
 Inductive val_inject (mi: meminj): val -> val -> Prop :=
   | val_inject_int:
       forall i, val_inject mi (Vint i) (Vint i)
+  | val_inject_long:
+      forall i, val_inject mi (Vlong i) (Vlong i)
   | val_inject_float:
       forall f, val_inject mi (Vfloat f) (Vfloat f)
   | val_inject_ptr:
@@ -1103,8 +1328,7 @@ Inductive val_inject (mi: meminj): val -> val -> Prop :=
   | val_inject_undef: forall v,
       val_inject mi Vundef v.
 
-Hint Resolve val_inject_int val_inject_float val_inject_ptr 
-             val_inject_undef.
+Hint Constructors val_inject.
 
 Inductive val_list_inject (mi: meminj): list val -> list val-> Prop:= 
   | val_nil_inject :
@@ -1225,6 +1449,25 @@ Proof.
   now erewrite !valid_ptr_inj by eauto.
 Qed.
 
+Lemma val_longofwords_inject:
+  forall v1 v2 v1' v2',
+  val_inject f v1 v1' -> val_inject f v2 v2' -> val_inject f (Val.longofwords v1 v2) (Val.longofwords v1' v2').
+Proof.
+  intros. unfold Val.longofwords. inv H; auto. inv H0; auto.
+Qed.
+
+Lemma val_loword_inject:
+  forall v v', val_inject f v v' -> val_inject f (Val.loword v) (Val.loword v').
+Proof.
+  intros. unfold Val.loword; inv H; auto. 
+Qed.
+
+Lemma val_hiword_inject:
+  forall v v', val_inject f v v' -> val_inject f (Val.hiword v) (Val.hiword v').
+Proof.
+  intros. unfold Val.hiword; inv H; auto. 
+Qed.
+
 End VAL_INJ_OPS.
 
 (** Monotone evolution of a memory injection. *)
@@ -1288,9 +1531,8 @@ Lemma val_inject_id:
   val_inject inject_id v1 v2 <-> Val.lessdef v1 v2.
 Proof.
   intros; split; intros.
-  inv H. constructor. constructor.
+  inv H; auto. 
   unfold inject_id in H0. inv H0. rewrite Int.add_zero. constructor.
-  constructor.
   inv H. destruct v2; econstructor. unfold inject_id; reflexivity. rewrite Int.add_zero; auto.
   constructor.
 Qed.
diff --git a/configure b/configure
index 9c3d6a7..5a1b4ba 100755
--- a/configure
+++ b/configure
@@ -61,7 +61,7 @@ done
 # Per-target configuration
 
 cchecklink=false
-has_runtime_lib=false
+has_runtime_lib=true
 
 case "$target" in
   powerpc-linux|ppc-linux|powerpc-eabi|ppc-eabi)
@@ -218,8 +218,8 @@ CLINKER=gcc
 # Math library.  Set to empty under MacOS X
 LIBMATH=-lm
 
-# Obsolete; do not change
-HAS_RUNTIME_LIB=false
+# Do not change
+HAS_RUNTIME_LIB=true
 EOF
 fi
 
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index a433c59..619be1e 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -37,6 +37,7 @@ let option_dinlining = ref false
 let option_dconstprop = ref false
 let option_dcse = ref false
 let option_dalloc = ref false
+let option_dalloctrace = ref false
 let option_dmach = ref false
 let option_dasm = ref false
 let option_sdump = ref false
diff --git a/driver/Compiler.v b/driver/Compiler.v
index ea277ac..5d9e1a7 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -28,7 +28,6 @@ Require Cminor.
 Require CminorSel.
 Require RTL.
 Require LTL.
-Require LTLin.
 Require Linear.
 Require Mach.
 Require Asm.
@@ -49,16 +48,9 @@ Require Allocation.
 Require Tunneling.
 Require Linearize.
 Require CleanupLabels.
-Require Reload.
-Require RRE.
 Require Stacking.
 Require Asmgen.
-(** Type systems. *)
-Require RTLtyping.
-Require LTLtyping.
-Require LTLintyping.
-Require Lineartyping.
-(** Proofs of semantic preservation and typing preservation. *)
+(** Proofs of semantic preservation. *)
 Require SimplExprproof.
 Require SimplLocalsproof.
 Require Cshmgenproof.
@@ -71,17 +63,9 @@ Require Renumberproof.
 Require Constpropproof.
 Require CSEproof.
 Require Allocproof.
-Require Alloctyping.
 Require Tunnelingproof.
-Require Tunnelingtyping.
 Require Linearizeproof.
-Require Linearizetyping.
 Require CleanupLabelsproof.
-Require CleanupLabelstyping.
-Require Reloadproof.
-Require Reloadtyping.
-Require RREproof.
-Require RREtyping.
 Require Stackingproof.
 Require Asmgenproof.
 
@@ -93,7 +77,7 @@ Parameter print_RTL_tailcall: RTL.program -> unit.
 Parameter print_RTL_inline: RTL.program -> unit.
 Parameter print_RTL_constprop: RTL.program -> unit.
 Parameter print_RTL_cse: RTL.program -> unit.
-Parameter print_LTLin: LTLin.program -> unit.
+Parameter print_LTL: LTL.program -> unit.
 Parameter print_Mach: Mach.program -> unit.
 
 Open Local Scope string_scope.
@@ -137,12 +121,10 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
   @@@ CSE.transf_program
    @@ print print_RTL_cse
   @@@ Allocation.transf_program
+   @@ print print_LTL
    @@ Tunneling.tunnel_program
   @@@ Linearize.transf_program
    @@ CleanupLabels.transf_program
-   @@ print print_LTLin
-   @@ Reload.transf_program
-   @@ RRE.transf_program
   @@@ Stacking.transf_program
    @@ print print_Mach
   @@@ Asmgen.transf_program.
@@ -150,7 +132,7 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
 Definition transf_cminor_program (p: Cminor.program) : res Asm.program :=
    OK p
    @@ print print_Cminor
-   @@ Selection.sel_program
+  @@@ Selection.sel_program
   @@@ RTLgen.transl_program
   @@@ transf_rtl_program.
 
@@ -223,20 +205,7 @@ Proof.
   set (p5 := Tunneling.tunnel_program p4) in *.
   destruct (Linearize.transf_program p5) as [p6|] eqn:?; simpl in H; try discriminate.
   set (p7 := CleanupLabels.transf_program p6) in *.
-  set (p8 := Reload.transf_program p7) in *.
-  set (p9 := RRE.transf_program p8) in *.
-  destruct (Stacking.transf_program p9) as [p10|] eqn:?; simpl in H; try discriminate.
-
-  assert(TY1: LTLtyping.wt_program p5).
-    eapply Tunnelingtyping.program_typing_preserved. 
-    eapply Alloctyping.program_typing_preserved; eauto.
-  assert(TY2: LTLintyping.wt_program p7).
-    eapply CleanupLabelstyping.program_typing_preserved.
-    eapply Linearizetyping.program_typing_preserved; eauto.
-  assert(TY3: Lineartyping.wt_program p9).
-    eapply RREtyping.program_typing_preserved.
-    eapply Reloadtyping.program_typing_preserved; eauto.
-
+  destruct (Stacking.transf_program p7) as [p8|] eqn:?; simpl in H; try discriminate.
   eapply compose_forward_simulation. apply Tailcallproof.transf_program_correct. 
   eapply compose_forward_simulation. apply Inliningproof.transf_program_correct. eassumption.
   eapply compose_forward_simulation. apply Renumberproof.transf_program_correct. 
@@ -245,12 +214,10 @@ Proof.
   eapply compose_forward_simulation. apply CSEproof.transf_program_correct. eassumption.
   eapply compose_forward_simulation. apply Allocproof.transf_program_correct. eassumption.
   eapply compose_forward_simulation. apply Tunnelingproof.transf_program_correct.
-  eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. eauto. 
+  eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. 
   eapply compose_forward_simulation. apply CleanupLabelsproof.transf_program_correct. 
-  eapply compose_forward_simulation. apply Reloadproof.transf_program_correct. eauto.
-  eapply compose_forward_simulation. apply RREproof.transf_program_correct. eauto.
   eapply compose_forward_simulation. apply Stackingproof.transf_program_correct.
-    eexact Asmgenproof.return_address_exists. eassumption. eauto. 
+    eexact Asmgenproof.return_address_exists. eassumption.
   apply Asmgenproof.transf_program_correct; eauto.
   split. auto. 
   apply forward_to_backward_simulation. auto. 
@@ -269,9 +236,9 @@ Proof.
   unfold transf_cminor_program in H.
   repeat rewrite compose_print_identity in H.
   simpl in H. 
-  set (p1 := Selection.sel_program p) in *.
+  destruct (Selection.sel_program p) as [p1|] eqn:?; simpl in H; try discriminate.
   destruct (RTLgen.transl_program p1) as [p2|] eqn:?; simpl in H; try discriminate.
-  eapply compose_forward_simulation. apply Selectionproof.transf_program_correct.
+  eapply compose_forward_simulation. apply Selectionproof.transf_program_correct. eauto.
   eapply compose_forward_simulation. apply RTLgenproof.transf_program_correct. eassumption.
   exact (fst (transf_rtl_program_correct _ _ H)).
 
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 3a5981e..eb0e004 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -89,6 +89,7 @@ let parse_c_file sourcename ifile =
   (* Simplification options *)
   let simplifs =
     "b" (* blocks: mandatory *)
+(*  ^ (if !option_flonglong then "l" else "") *)
   ^ (if !option_fstruct_return then "s" else "")
   ^ (if !option_fbitfields then "f" else "")
   ^ (if !option_fpacked_structs then "p" else "")
@@ -148,7 +149,8 @@ let compile_c_ast sourcename csyntax ofile =
   set_dest PrintRTL.destination_inlining option_dinlining ".inlining.rtl";
   set_dest PrintRTL.destination_constprop option_dconstprop ".constprop.rtl";
   set_dest PrintRTL.destination_cse option_dcse ".cse.rtl";
-  set_dest PrintLTLin.destination option_dalloc ".alloc.ltl";
+  set_dest Regalloc.destination_alloctrace option_dalloctrace ".alloctrace";
+  set_dest PrintLTL.destination option_dalloc ".alloc.ltl";
   set_dest PrintMach.destination option_dmach ".mach";
   (* Convert to Asm *)
   let asm =
@@ -451,6 +453,7 @@ let cmdline_actions =
   "-dconstprop$", Set option_dconstprop;
   "-dcse$", Set option_dcse;
   "-dalloc$", Set option_dalloc;
+  "-dalloctrace$", Set option_dalloctrace;
   "-dmach$", Set option_dmach;
   "-dasm$", Set option_dasm;
   "-sdump$", Set option_sdump;
diff --git a/driver/Interp.ml b/driver/Interp.ml
index b7971ed..4f50514 100644
--- a/driver/Interp.ml
+++ b/driver/Interp.ml
@@ -49,6 +49,7 @@ let print_id_ofs p (id, ofs) =
 let print_eventval p = function
   | EVint n -> fprintf p "%ld" (camlint_of_coqint n)
   | EVfloat f -> fprintf p "%F" (camlfloat_of_coqfloat f)
+  | EVlong n -> fprintf p "%Ld" (camlint64_of_coqint n)
   | EVptr_global(id, ofs) -> fprintf p "&%a" print_id_ofs (id, ofs)
 
 let print_eventval_list p = function
diff --git a/exportclight/Clightdefs.v b/exportclight/Clightdefs.v
index 4c3c942..1cb93d5 100644
--- a/exportclight/Clightdefs.v
+++ b/exportclight/Clightdefs.v
@@ -32,6 +32,8 @@ Definition tushort := Tint I16 Unsigned noattr.
 Definition tint := Tint I32 Signed noattr.
 Definition tuint := Tint I32 Unsigned noattr.
 Definition tbool := Tint IBool Unsigned noattr.
+Definition tlong := Tlong Signed noattr.
+Definition tulong := Tlong Unsigned noattr.
 Definition tfloat := Tfloat F32 noattr.
 Definition tdouble := Tfloat F64 noattr.
 Definition tptr (t: type) := Tpointer t noattr.
@@ -43,6 +45,7 @@ Definition tvolatile (ty: type) :=
   match ty with
   | Tvoid => Tvoid
   | Tint sz si a => Tint sz si volatile_attr
+  | Tlong si a => Tlong si volatile_attr
   | Tfloat sz a => Tfloat sz volatile_attr
   | Tpointer elt a => Tpointer elt volatile_attr
   | Tarray elt sz a => Tarray elt sz volatile_attr
diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml
index ef00512..04f170d 100644
--- a/exportclight/ExportClight.ml
+++ b/exportclight/ExportClight.ml
@@ -92,6 +92,12 @@ let coqfloat p n =
   then fprintf p "(Float.double_of_bits (Int64.repr %Ld))" n
   else fprintf p "(Float.double_of_bits (Int64.repr (%Ld)))" n
 
+let coqint64 p n =
+  let n = camlint64_of_coqint n in
+  if n >= 0L
+  then fprintf p "(Int64.repr %Ld)" n
+  else fprintf p "(Int64.repr (%Ld))" n
+
 (* Types *)
 
 let use_struct_names = ref true
@@ -114,6 +120,11 @@ and rtyp p = function
         | I32, Signed -> "tint"
         | I32, Unsigned -> "tuint"
         | IBool, _ -> "tbool")
+  | Tlong(sg, _) ->
+      fprintf p "%s" (
+        match sg with
+        | Signed -> "tlong"
+        | Unsigned -> "tulong")
   | Tfloat(sz, _) ->
       fprintf p "%s" (
         match sz with
@@ -151,7 +162,8 @@ and fieldlist p = function
 (* External functions *)
 
 let asttype p t =
-  fprintf p "%s" (match t with AST.Tint -> "AST.Tint" | AST.Tfloat -> "AST.Tfloat")
+  fprintf p "%s"
+     (match t with AST.Tint -> "AST.Tint" | AST.Tfloat -> "AST.Tfloat" | AST.Tlong -> "AST.Tlong")
 
 let name_of_chunk = function
   | Mint8signed -> "Mint8signed"
@@ -159,6 +171,7 @@ let name_of_chunk = function
   | Mint16signed -> "Mint16signed"
   | Mint16unsigned -> "Mint16unsigned"
   | Mint32 -> "Mint32"
+  | Mint64 -> "Mint64"
   | Mfloat32 -> "Mfloat32"
   | Mfloat64 -> "Mfloat64"
   | Mfloat64al32 -> "Mfloat64al32"
@@ -240,6 +253,8 @@ let rec expr p = function
       fprintf p "(Econst_int %a %a)" coqint n typ t
   | Econst_float(n, t) ->
       fprintf p "(Econst_float %a %a)" coqfloat n typ t
+  | Econst_long(n, t) ->
+      fprintf p "(Econst_long %a %a)" coqint64 n typ t
   | Eunop(op, a1, t) ->
       fprintf p "@[<hov 2>(Eunop %s@ %a@ %a)@]"
          (name_unop op) expr a1 typ t
@@ -316,6 +331,7 @@ let init_data p = function
   | Init_int8 n -> fprintf p "Init_int8 %a" coqint n
   | Init_int16 n -> fprintf p "Init_int16 %a" coqint n
   | Init_int32 n -> fprintf p "Init_int32 %a" coqint n
+  | Init_int64 n -> fprintf p "Init_int64 %a" coqint64 n
   | Init_float32 n -> fprintf p "Init_float32 %a" coqfloat n
   | Init_float64 n -> fprintf p "Init_float64 %a" coqfloat n
   | Init_space n -> fprintf p "Init_space %ld" (Z.to_int32 n)
@@ -359,6 +375,7 @@ let register_struct_union ty =
 let rec collect_type = function
   | Tvoid -> ()
   | Tint _ -> ()
+  | Tlong _ -> ()
   | Tfloat _ -> ()
   | Tpointer(t, _) -> collect_type t
   | Tarray(t, _, _) -> collect_type t
@@ -382,6 +399,7 @@ let rec collect_expr e =
   match e with
   | Econst_int _ -> ()
   | Econst_float _ -> ()
+  | Econst_long _ -> ()
   | Evar _ -> ()
   | Etempvar _ -> ()
   | Ederef(r, _) -> collect_expr r
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 1397416..804ccef 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -11,13 +11,14 @@
 (* *********************************************************************)
 
 Require Wfsimpl.
+Require AST.
 Require Iteration.
 Require Floats.
+Require SelectLong.
 Require RTLgen.
 Require Inlining.
 Require ConstpropOp.
 Require Constprop.
-Require Coloring.
 Require Allocation.
 Require Compiler.
 
@@ -28,6 +29,10 @@ Require Import ExtrOcamlString.
 (* Wfsimpl *)
 Extraction Inline Wfsimpl.Fix Wfsimpl.Fixm.
 
+(* AST *)
+Extract Constant AST.ident_of_string =>
+  "fun s -> Camlcoq.intern_string (Camlcoq.camlstring_of_coqstring s)".
+
 (* Memdata *)
 Extract Constant Memdata.big_endian => "Memdataaux.big_endian".
 
@@ -44,6 +49,15 @@ Extract Constant Iteration.GenIter.iterate =>
      match f a with Coq_inl b -> Some b | Coq_inr a' -> iter f a'
    in iter".
 
+(* Selection *)
+
+Extract Constant SelectLong.get_helper =>
+  "fun ge s sg ->
+     Errors.OK (Camlcoq.intern_string (Camlcoq.camlstring_of_coqstring s))".
+Extract Constant SelectLong.get_builtin =>
+  "fun s sg ->
+     Errors.OK (Camlcoq.intern_string (Camlcoq.camlstring_of_coqstring s))".
+
 (* RTLgen *)
 Extract Constant RTLgen.compile_switch => "RTLgenaux.compile_switch".
 Extract Constant RTLgen.more_likely => "RTLgenaux.more_likely".
@@ -59,8 +73,15 @@ Extract Constant ConstpropOp.propagate_float_constants =>
 Extract Constant Constprop.generate_float_constants =>
   "fun _ -> !Clflags.option_ffloatconstprop >= 2".
 
-(* Coloring *)
-Extract Constant Coloring.graph_coloring => "Coloringaux.graph_coloring".
+(* Allocation *)
+Extract Constant Allocation.eq_operation => "(=)".
+Extract Constant Allocation.eq_addressing => "(=)".
+Extract Constant Allocation.eq_opt_addressing => "(=)".
+Extract Constant Allocation.eq_condition => "(=)".
+Extract Constant Allocation.eq_chunk => "(=)".
+Extract Constant Allocation.eq_external_function => "(=)".
+Extract Constant Allocation.eq_signature => "(=)".
+Extract Constant Allocation.regalloc => "Regalloc.regalloc".
 
 (* Linearize *)
 Extract Constant Linearize.enumerate_aux => "Linearizeaux.enumerate_aux".
@@ -77,7 +98,7 @@ Extract Constant Compiler.print_RTL_tailcall => "PrintRTL.print_tailcall".
 Extract Constant Compiler.print_RTL_inline => "PrintRTL.print_inlining".
 Extract Constant Compiler.print_RTL_constprop => "PrintRTL.print_constprop".
 Extract Constant Compiler.print_RTL_cse => "PrintRTL.print_cse".
-Extract Constant Compiler.print_LTLin => "PrintLTLin.print_if".
+Extract Constant Compiler.print_LTL => "PrintLTL.print_if".
 Extract Constant Compiler.print_Mach => "PrintMach.print_if".
 Extract Constant Compiler.print => "fun (f: 'a -> unit) (x: 'a) -> f x; x".
 (*Extraction Inline Compiler.apply_total Compiler.apply_partial.*)
@@ -106,5 +127,10 @@ Separate Extraction
    Compiler.transf_c_program Compiler.transf_cminor_program
    Cexec.do_initial_state Cexec.do_step Cexec.at_final_state
    Initializers.transl_init Initializers.constval
-   Csyntax.Eindex Csyntax.Epreincr.
+   Csyntax.Eindex Csyntax.Epreincr
+   Conventions1.dummy_int_reg Conventions1.dummy_float_reg
+   RTL.instr_defs RTL.instr_uses
+   Machregs.mregs_for_operation Machregs.mregs_for_builtin
+   Machregs.two_address_op.
+
 
diff --git a/ia32/Asm.v b/ia32/Asm.v
index df901db..2757061 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -163,6 +163,7 @@ Inductive instruction: Type :=
   | Pshr_ri (rd: ireg) (n: int)
   | Psar_rcl (rd: ireg)
   | Psar_ri (rd: ireg) (n: int)
+  | Pshld_ri (rd: ireg) (r1: ireg) (n: int)
   | Pror_ri (rd: ireg) (n: int)
   | Pcmp_rr (r1 r2: ireg) 
   | Pcmp_ri (r1: ireg) (n: int)
@@ -193,7 +194,7 @@ Inductive instruction: Type :=
   | Plabel(l: label)
   | Pallocframe(sz: Z)(ofs_ra ofs_link: int)
   | Pfreeframe(sz: Z)(ofs_ra ofs_link: int)
-  | Pbuiltin(ef: external_function)(args: list preg)(res: preg)
+  | Pbuiltin(ef: external_function)(args: list preg)(res: list preg)
   | Pannot(ef: external_function)(args: list annot_param)
 
 with annot_param : Type :=
@@ -232,6 +233,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
   | r :: l' => undef_regs l' (rs#r <- Vundef)
   end.
 
+(** Assigning multiple registers *)
+
+Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
+  match rl, vl with
+  | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1)
+  | _, _ => rs
+  end.
+
 Section RELSEM.
 
 (** Looking up instructions in a code sequence by position. *)
@@ -433,9 +442,10 @@ Definition exec_load (chunk: memory_chunk) (m: mem)
   end.
 
 Definition exec_store (chunk: memory_chunk) (m: mem)
-                      (a: addrmode) (rs: regset) (r1: preg) :=
+                      (a: addrmode) (rs: regset) (r1: preg)
+                      (destroyed: list preg) :=
   match Mem.storev chunk m (eval_addrmode a rs) (rs r1) with
-  | Some m' => Next (nextinstr_nf (if preg_eq r1 ST0 then rs#ST0 <- Vundef else rs)) m'
+  | Some m' => Next (nextinstr_nf (undef_regs destroyed rs)) m'
   | None => Stuck
   end.
 
@@ -470,7 +480,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pmov_rm rd a =>
       exec_load Mint32 m a rs rd
   | Pmov_mr a r1 =>
-      exec_store Mint32 m a rs r1
+      exec_store Mint32 m a rs r1 nil
   | Pmovd_fr rd r1 =>
       Next (nextinstr (rs#rd <- (rs r1))) m
   | Pmovd_rf rd r1 =>
@@ -482,7 +492,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pmovsd_fm rd a =>
       exec_load Mfloat64al32 m a rs rd
   | Pmovsd_mf a r1 =>
-      exec_store Mfloat64al32 m a rs r1
+      exec_store Mfloat64al32 m a rs r1 nil
   | Pfld_f r1 =>
       Next (nextinstr (rs#ST0 <- (rs r1))) m
   | Pfld_m a =>
@@ -490,14 +500,14 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pfstp_f rd =>
       Next (nextinstr (rs#rd <- (rs ST0) #ST0 <- Vundef)) m
   | Pfstp_m a =>
-      exec_store Mfloat64al32 m a rs ST0
+      exec_store Mfloat64al32 m a rs ST0 (ST0 :: nil)
   | Pxchg_rr r1 r2 =>
       Next (nextinstr (rs#r1 <- (rs r2) #r2 <- (rs r1))) m
   (** Moves with conversion *)
   | Pmovb_mr a r1 =>
-      exec_store Mint8unsigned m a rs r1
+      exec_store Mint8unsigned m a rs r1 nil
   | Pmovw_mr a r1 =>
-      exec_store Mint16unsigned m a rs r1
+      exec_store Mint16unsigned m a rs r1 nil
   | Pmovzb_rr rd r1 =>
       Next (nextinstr (rs#rd <- (Val.zero_ext 8 rs#r1))) m
   | Pmovzb_rm rd a =>
@@ -519,7 +529,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pcvtsd2ss_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
   | Pcvtsd2ss_mf a r1 =>
-      exec_store Mfloat32 m a rs r1
+      exec_store Mfloat32 m a rs r1 (FR XMM7 :: nil)
   | Pcvttsd2si_rf rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
   | Pcvtsi2sd_fr rd r1 =>
@@ -573,6 +583,10 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd rs#ECX))) m
   | Psar_ri rd n =>
       Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd (Vint n)))) m
+  | Pshld_ri rd r1 n =>
+      Next (nextinstr_nf
+              (rs#rd <- (Val.or (Val.shl rs#rd (Vint n))
+                                (Val.shru rs#r1 (Vint (Int.sub Int.iwordsize n)))))) m
   | Pror_ri rd n =>
       Next (nextinstr_nf (rs#rd <- (Val.ror rs#rd (Vint n)))) m
   | Pcmp_rr r1 r2 =>
@@ -590,7 +604,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       | None => Next (nextinstr (rs#rd <- Vundef)) m
       end
   | Psetcc c rd =>
-      Next (nextinstr (rs#ECX <- Vundef #rd <- (Val.of_optbool (eval_testcond c rs)))) m
+      Next (nextinstr (rs#rd <- (Val.of_optbool (eval_testcond c rs)))) m
   (** Arithmetic operations over floats *)
   | Paddd_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.addf rs#rd rs#r1))) m
@@ -632,7 +646,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       | Vint n => 
           match list_nth_z tbl (Int.unsigned n) with
           | None => Stuck
-          | Some lbl => goto_label c lbl (rs #ECX <- Vundef #EDX <- Vundef) m
+          | Some lbl => goto_label c lbl rs m
           end
       | _ => Stuck
       end
@@ -686,6 +700,8 @@ Definition preg_of (r: mreg) : preg :=
   match r with
   | AX => IR EAX
   | BX => IR EBX
+  | CX => IR ECX
+  | DX => IR EDX
   | SI => IR ESI
   | DI => IR EDI
   | BP => IR EBP
@@ -695,10 +711,8 @@ Definition preg_of (r: mreg) : preg :=
   | X3 => FR XMM3
   | X4 => FR XMM4
   | X5 => FR XMM5
-  | IT1 => IR EDX
-  | IT2 => IR ECX
-  | FT1 => FR XMM6
-  | FT2 => FR XMM7
+  | X6 => FR XMM6
+  | X7 => FR XMM7
   | FP0 => ST0
   end.
 
@@ -706,24 +720,24 @@ Definition preg_of (r: mreg) : preg :=
     We exploit the calling conventions from module [Conventions], except that
     we use machine registers instead of locations. *)
 
+Definition chunk_of_type (ty: typ) :=
+  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end.
+
 Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
   | extcall_arg_reg: forall r,
       extcall_arg rs m (R r) (rs (preg_of r))
-  | extcall_arg_int_stack: forall ofs bofs v,
+  | extcall_arg_stack: forall ofs ty bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mint32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tint)) v
-  | extcall_arg_float_stack: forall ofs bofs v,
-      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64al32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
+      Mem.loadv (chunk_of_type ty) m
+                (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
+      extcall_arg rs m (S Outgoing ofs ty) v.
 
 Definition extcall_arguments
     (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
   list_forall2 (extcall_arg rs m) (loc_arguments sg) args.
 
-Definition loc_external_result (sg: signature) : preg :=
-  preg_of (loc_result sg).
+Definition loc_external_result (sg: signature) : list preg :=
+  map preg_of (loc_result sg).
 
 (** Extract the values of the arguments of an annotation. *)
 
@@ -753,33 +767,31 @@ Inductive step: state -> trace -> state -> Prop :=
       exec_instr c i rs m = Next rs' m' ->
       step (State rs m) E0 (State rs' m')
   | exec_step_builtin:
-      forall b ofs c ef args res rs m t v m',
+      forall b ofs c ef args res rs m t vl rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) ->
-      external_call ef ge (map rs args) m t v m' ->
-      step (State rs m) t
-           (State (nextinstr_nf(rs #EDX <- Vundef #ECX <- Vundef
-                                #XMM6 <- Vundef #XMM7 <- Vundef
-                                #ST0 <- Vundef
-                                #res <- v)) m')
+      external_call' ef ge (map rs args) m t vl m' ->
+      rs' = nextinstr_nf 
+             (set_regs res vl
+               (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+      step (State rs m) t (State rs' m')
   | exec_step_annot:
       forall b ofs c ef args rs m vargs t v m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some (Pannot ef args) ->
       annot_arguments rs m args vargs ->
-      external_call ef ge vargs m t v m' ->
+      external_call' ef ge vargs m t v m' ->
       step (State rs m) t
            (State (nextinstr rs) m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
       rs PC = Vptr b Int.zero ->
       Genv.find_funct_ptr ge b = Some (External ef) ->
-      external_call ef ge args m t res m' ->
       extcall_arguments rs m (ef_sig ef) args ->
-      rs' = (rs#(loc_external_result (ef_sig ef)) <- res
-               #PC <- (rs RA)) ->
+      external_call' ef ge args m t res m' ->
+      rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) ->
       step (State rs m) t (State rs' m').
 
 End RELSEM.
@@ -847,21 +859,21 @@ Ltac Equalities :=
   discriminate.
   discriminate.
   inv H11. 
-  exploit external_call_determ. eexact H4. eexact H11. intros [A B].
+  exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   inv H12.
   assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0.
-  exploit external_call_determ. eexact H5. eexact H13. intros [A B].
+  exploit external_call_determ'. eexact H5. eexact H13. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
-  exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+  exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
 (* trace length *)
   red; intros; inv H; simpl.
   omega.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
+  inv H3. eapply external_call_trace_length; eauto.
+  inv H4. eapply external_call_trace_length; eauto.
+  inv H3. eapply external_call_trace_length; eauto.
 (* initial states *)
   inv H; inv H0. f_equal. congruence.
 (* final no step *)
@@ -882,17 +894,3 @@ Definition data_preg (r: preg) : bool :=
   | RA => false
   end.
 
-Definition nontemp_preg (r: preg) : bool :=
-  match r with
-  | PC => false
-  | IR ECX => false
-  | IR EDX => false
-  | IR _ => true
-  | FR XMM6 => false
-  | FR XMM7 => false
-  | FR _ => true
-  | ST0 => false
-  | CR _ => false
-  | RA => false
-  end.
-
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index 6e3ccf8..78f7d6e 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -17,6 +17,7 @@ Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Floats.
+Require Import Memdata.
 Require Import Op.
 Require Import Locations.
 Require Import Mach.
@@ -54,59 +55,12 @@ Definition mk_mov (rd rs: preg) (k: code) : res code :=
   | _, _ => Error(msg "Asmgen.mk_mov")
   end.
 
-Definition mk_shift (shift_instr: ireg -> instruction)
-                    (r1 r2: ireg) (k: code) : res code :=
-  if ireg_eq r2 ECX then
-    OK (shift_instr r1 :: k)
-  else if ireg_eq r1 ECX then
-    OK (Pxchg_rr r2 ECX :: shift_instr r2 :: Pxchg_rr r2 ECX :: k)
-  else
-    OK (Pmov_rr ECX r2 :: shift_instr r1 :: k).
-
-Definition mk_mov2 (src1 dst1 src2 dst2: ireg) (k: code) : code :=
-  if ireg_eq src1 dst1 then
-    Pmov_rr dst2 src2 :: k
-  else if ireg_eq src2 dst2 then
-    Pmov_rr dst1 src1 :: k
-  else if ireg_eq src2 dst1 then
-    if ireg_eq src1 dst2 then
-      Pxchg_rr src1 src2 :: k
-    else
-      Pmov_rr dst2 src2 :: Pmov_rr dst1 src1 :: k
-  else
-    Pmov_rr dst1 src1 :: Pmov_rr dst2 src2 :: k.
-
-Definition mk_div (div_instr: ireg -> instruction)
-                  (r1 r2: ireg) (k: code) : res code :=
-  if ireg_eq r1 EAX then
-    if ireg_eq r2 EDX then
-      OK (Pmov_rr ECX EDX :: div_instr ECX :: k)
-    else
-      OK (div_instr r2 :: k)
-  else
-      OK (Pmovd_fr XMM7 EAX ::
-          mk_mov2 r1 EAX r2 ECX 
-            (div_instr ECX :: Pmov_rr r1 EAX :: Pmovd_rf EAX XMM7 :: k)).
-
-Definition mk_mod (div_instr: ireg -> instruction)
-                  (r1 r2: ireg) (k: code) : res code :=
-  if ireg_eq r1 EAX then
-    if ireg_eq r2 EDX then
-      OK (Pmov_rr ECX EDX :: div_instr ECX :: Pmov_rr EAX EDX :: k)
-    else
-      OK (div_instr r2 :: Pmov_rr EAX EDX :: k)
-  else
-      OK (Pmovd_fr XMM7 EAX ::
-          mk_mov2 r1 EAX r2 ECX 
-            (div_instr ECX :: Pmov_rr r1 EDX :: Pmovd_rf EAX XMM7 :: k)).
-
-Definition mk_shrximm (r: ireg) (n: int) (k: code) : res code :=
-  let tmp := if ireg_eq r ECX then EDX else ECX in
+Definition mk_shrximm (n: int) (k: code) : res code :=
   let p := Int.sub (Int.shl Int.one n) Int.one in
-  OK (Ptest_rr r r ::
-      Plea tmp (Addrmode (Some r) None (inl _ p)) ::
-      Pcmov Cond_l r tmp ::
-      Psar_ri r n :: k).
+  OK (Ptest_rr EAX EAX ::
+      Plea ECX (Addrmode (Some EAX) None (inl _ p)) ::
+      Pcmov Cond_l EAX ECX ::
+      Psar_ri EAX n :: k).
 
 Definition low_ireg (r: ireg) : bool :=
   match r with
@@ -118,7 +72,7 @@ Definition mk_intconv (mk: ireg -> ireg -> instruction) (rd rs: ireg) (k: code)
   if low_ireg rs then
     OK (mk rd rs :: k)
   else
-    OK (Pmov_rr EDX rs :: mk rd EDX :: k).
+    OK (Pmov_rr EAX rs :: mk rd EAX :: k).
 
 Definition addressing_mentions (addr: addrmode) (r: ireg) : bool :=
   match addr with Addrmode base displ const =>
@@ -130,11 +84,11 @@ Definition mk_smallstore (sto: addrmode -> ireg ->instruction)
                          (addr: addrmode) (rs: ireg) (k: code) :=
   if low_ireg rs then
     OK (sto addr rs :: k)
-  else if addressing_mentions addr ECX then
-    OK (Plea ECX addr :: Pmov_rr EDX rs ::
-        sto (Addrmode (Some ECX) None (inl _ Int.zero)) EDX :: k)
+  else if addressing_mentions addr EAX then
+    OK (Plea ECX addr :: Pmov_rr EAX rs ::
+        sto (Addrmode (Some ECX) None (inl _ Int.zero)) EAX :: k)
   else
-    OK (Pmov_rr ECX rs :: sto addr ECX :: k).
+    OK (Pmov_rr EAX rs :: sto addr EAX :: k).
 
 (** Accessing slots in the stack frame.  *)
 
@@ -149,6 +103,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
       | ST0  => OK (Pfld_m (Addrmode (Some base) None (inl _ ofs)) :: k)
       | _ => Error (msg "Asmgen.loadind")
       end
+  | Tlong =>
+      Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
@@ -162,6 +118,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
       | ST0  => OK (Pfstp_m (Addrmode (Some base) None (inl _ ofs)) :: k)
       | _ => Error (msg "Asmgen.loadind")
       end
+  | Tlong =>
+      Error (msg "Asmgen.storeind")
   end.
 
 (** Translation of addressing modes *)
@@ -284,19 +242,25 @@ Definition testcond_for_condition (cond: condition) : extcond :=
 
 (** Acting upon extended conditions. *)
 
-Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) :=
+Definition mk_setcc_base (cond: extcond) (rd: ireg) (k: code) :=
   match cond with
-  | Cond_base c => Psetcc c rd :: k
+  | Cond_base c =>
+      Psetcc c rd :: k
   | Cond_and c1 c2 =>
-      if ireg_eq rd EDX
-      then Psetcc c1 EDX :: Psetcc c2 ECX :: Pand_rr EDX ECX :: k
-      else Psetcc c1 EDX :: Psetcc c2 rd  :: Pand_rr rd EDX :: k
+      if ireg_eq rd EAX
+      then Psetcc c1 EAX :: Psetcc c2 ECX :: Pand_rr EAX ECX :: k
+      else Psetcc c1 EAX :: Psetcc c2 rd  :: Pand_rr rd EAX :: k
   | Cond_or c1 c2 => 
-      if ireg_eq rd EDX
-      then Psetcc c1 EDX :: Psetcc c2 ECX :: Por_rr EDX ECX :: k
-      else Psetcc c1 EDX :: Psetcc c2 rd  :: Por_rr rd EDX :: k
+      if ireg_eq rd EAX
+      then Psetcc c1 EAX :: Psetcc c2 ECX :: Por_rr EAX ECX :: k
+      else Psetcc c1 EAX :: Psetcc c2 rd  :: Por_rr rd EAX :: k
   end.
 
+Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) :=
+  if low_ireg rd
+  then mk_setcc_base cond rd k
+  else mk_setcc_base cond EAX (Pmov_rr rd EAX :: k).
+
 Definition mk_jcc (cond: extcond) (lbl: label) (k: code) :=
   match cond with
   | Cond_base c => Pjcc c lbl :: k
@@ -330,91 +294,106 @@ Definition transl_op
   | Ocast16unsigned, a1 :: nil =>
       do r1 <- ireg_of a1; do r <- ireg_of res; mk_intconv Pmovzw_rr r r1 k
   | Oneg, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pneg r :: k)
   | Osub, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Psub_rr r r2 :: k)
   | Omul, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pimul_rr r r2 :: k)
   | Omulimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pimul_ri r n :: k)
   | Odiv, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_div Pidiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res AX);
+      OK(Pidiv ECX :: k)
   | Odivu, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_div Pdiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res AX);
+      OK(Pdiv ECX :: k)
   | Omod, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_mod Pidiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res DX);
+      OK(Pidiv ECX :: k)
   | Omodu, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_mod Pdiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res DX);
+      OK(Pdiv ECX :: k)
   | Oand, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pand_rr r r2 :: k)
   | Oandimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pand_ri r n :: k)
   | Oor, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Por_rr r r2 :: k)
   | Oorimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Por_ri r n :: k)
   | Oxor, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pxor_rr r r2 :: k)
   | Oxorimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pxor_ri r n :: k)
   | Oshl, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Psal_rcl r r2 k
+      assertion (mreg_eq a1 res);
+      assertion (mreg_eq a2 CX);
+      do r <- ireg_of res; OK (Psal_rcl r :: k)
   | Oshlimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Psal_ri r n :: k)
   | Oshr, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Psar_rcl r r2 k
+      assertion (mreg_eq a1 res);
+      assertion (mreg_eq a2 CX);
+      do r <- ireg_of res; OK (Psar_rcl r :: k)
   | Oshrimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Psar_ri r n :: k)
   | Oshru, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Pshr_rcl r r2 k
+      assertion (mreg_eq a1 res);
+      assertion (mreg_eq a2 CX);
+      do r <- ireg_of res; OK (Pshr_rcl r :: k)
   | Oshruimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pshr_ri r n :: k)
   | Oshrximm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; mk_shrximm r n k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq res AX);
+      mk_shrximm n k
   | Ororimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pror_ri r n :: k)
+  | Oshldimm n, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pshld_ri r r2 n :: k)
   | Olea addr, _ =>
       do am <- transl_addressing addr args; do r <- ireg_of res;
       OK (Plea r am :: k)
   | Onegf, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; OK (Pnegd r :: k)
   | Oabsf, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; OK (Pabsd r :: k)
   | Oaddf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Paddd_ff r r2 :: k)
   | Osubf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Psubd_ff r r2 :: k)
   | Omulf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuld_ff r r2 :: k)
   | Odivf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivd_ff r r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtsd2ss_ff r r1 :: k)
@@ -450,6 +429,8 @@ Definition transl_load (chunk: memory_chunk)
       do r <- freg_of dest; OK(Pcvtss2sd_fm r am :: k)
   | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
+  | Mint64 =>
+      Error (msg "Asmgen.transl_load")
   end.
 
 Definition transl_store (chunk: memory_chunk)
@@ -467,6 +448,8 @@ Definition transl_store (chunk: memory_chunk)
       do r <- freg_of src; OK(Pcvtsd2ss_mf am r :: k)
   | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of src; OK(Pmovsd_mf am r :: k)
+  | Mint64 =>
+      Error (msg "Asmgen.transl_store")
   end.
 
 (** Translation of arguments to annotations *)
@@ -491,7 +474,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
         loadind EDX ofs ty dst k
       else
         (do k1 <- loadind EDX ofs ty dst k;
-         loadind ESP f.(fn_link_ofs) Tint IT1 k1)
+         loadind ESP f.(fn_link_ofs) Tint DX k1)
   | Mop op args res =>
       transl_op op args res k
   | Mload chunk addr args dst =>
@@ -521,7 +504,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
       OK (Pfreeframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) :: 
           Pret :: k)
   | Mbuiltin ef args res =>
-      OK (Pbuiltin ef (List.map preg_of args) (preg_of res) :: k)
+      OK (Pbuiltin ef (List.map preg_of args) (List.map preg_of res) :: k)
   | Mannot ef args =>
       OK (Pannot ef (map transl_annot_param args) :: k)
   end.
@@ -531,7 +514,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
 Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
   match i with
   | Msetstack src ofs ty => before
-  | Mgetparam ofs ty dst => negb (mreg_eq dst IT1)
+  | Mgetparam ofs ty dst => negb (mreg_eq dst DX)
   | _ => false
   end.
 
diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v
index e43392a..ca0fd18 100644
--- a/ia32/Asmgenproof.v
+++ b/ia32/Asmgenproof.v
@@ -138,49 +138,8 @@ Proof.
 Qed.
 Hint Resolve mk_mov_label: labels.
 
-Remark mk_shift_label:
-  forall f r1 r2 k c, mk_shift f r1 r2 k = OK c -> 
-  (forall r, nolabel (f r)) ->
-  tail_nolabel k c.
-Proof.
-  unfold mk_shift; intros. TailNoLabel.
-Qed.
-Hint Resolve mk_shift_label: labels.
-
-Remark mk_mov2_label:
-  forall r1 r2 r3 r4 k,
-  tail_nolabel k (mk_mov2 r1 r2 r3 r4 k).
-Proof.
-  intros; unfold mk_mov2. 
-  destruct (ireg_eq r1 r2); TailNoLabel.
-  destruct (ireg_eq r3 r4); TailNoLabel.
-  destruct (ireg_eq r3 r2); TailNoLabel.
-  destruct (ireg_eq r1 r4); TailNoLabel.
-Qed.
-Hint Resolve mk_mov2_label: labels.
-
-Remark mk_div_label:
-  forall f r1 r2 k c, mk_div f r1 r2 k = OK c -> 
-  (forall r, nolabel (f r)) ->
-  tail_nolabel k c.
-Proof.
-  unfold mk_div; intros. TailNoLabel. 
-  eapply tail_nolabel_trans; TailNoLabel.
-Qed.
-Hint Resolve mk_div_label: labels.
-
-Remark mk_mod_label:
-  forall f r1 r2 k c, mk_mod f r1 r2 k = OK c -> 
-  (forall r, nolabel (f r)) ->
-  tail_nolabel k c.
-Proof.
-  unfold mk_mod; intros. TailNoLabel. 
-  eapply tail_nolabel_trans; TailNoLabel.
-Qed.
-Hint Resolve mk_mod_label: labels.
-
 Remark mk_shrximm_label:
-  forall r n k c, mk_shrximm r n k = OK c -> tail_nolabel k c.
+  forall n k c, mk_shrximm n k = OK c -> tail_nolabel k c.
 Proof.
   intros. monadInv H; TailNoLabel.
 Qed.
@@ -212,6 +171,7 @@ Proof.
   unfold loadind; intros. destruct ty.
   TailNoLabel.
   destruct (preg_of dst); TailNoLabel.
+  discriminate.
 Qed.
 
 Remark storeind_label:
@@ -222,13 +182,23 @@ Proof.
   unfold storeind; intros. destruct ty.
   TailNoLabel.
   destruct (preg_of src); TailNoLabel.
+  discriminate.
+Qed.
+
+Remark mk_setcc_base_label:
+  forall xc rd k,
+  tail_nolabel k (mk_setcc_base xc rd k).
+Proof.
+  intros. destruct xc; simpl; destruct (ireg_eq rd EAX); TailNoLabel.
 Qed.
 
 Remark mk_setcc_label:
   forall xc rd k,
   tail_nolabel k (mk_setcc xc rd k).
 Proof.
-  intros. destruct xc; simpl; destruct (ireg_eq rd EDX); TailNoLabel.
+  intros. unfold mk_setcc. destruct (low_ireg rd).
+  apply mk_setcc_base_label.
+  eapply tail_nolabel_trans. apply mk_setcc_base_label. TailNoLabel.
 Qed.
 
 Remark mk_jcc_label:
@@ -534,7 +504,7 @@ Proof.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
   exploit storeind_correct; eauto. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. unfold undef_setstack. eapply agree_undef_move; eauto.
+  split. eapply agree_undef_regs; eauto. 
   simpl; intros. rewrite Q; auto with asmgen.
 
 - (* Mgetparam *)
@@ -547,9 +517,9 @@ Proof.
   intros [v' [C D]].
 Opaque loadind.
   left; eapply exec_straight_steps; eauto; intros. 
-  assert (DIFF: negb (mreg_eq dst IT1) = true -> IR EDX <> preg_of dst).
-    intros. change (IR EDX) with (preg_of IT1). red; intros. 
-    unfold proj_sumbool in H1. destruct (mreg_eq dst IT1); try discriminate.
+  assert (DIFF: negb (mreg_eq dst DX) = true -> IR EDX <> preg_of dst).
+    intros. change (IR EDX) with (preg_of DX). red; intros. 
+    unfold proj_sumbool in H1. destruct (mreg_eq dst DX); try discriminate.
     elim n. eapply preg_of_injective; eauto.
   destruct ep; simpl in TR.
 (* EDX contains parent *)
@@ -577,10 +547,7 @@ Opaque loadind.
   exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. 
   assert (S: Val.lessdef v (rs2 (preg_of res))) by (eapply Val.lessdef_trans; eauto).
   exists rs2; split. eauto.
-  split.
-  unfold undef_op.
-  destruct op; try (eapply agree_set_undef_mreg; eauto).
-  eapply agree_set_undef_move_mreg; eauto. 
+  split. eapply agree_set_undef_mreg; eauto.
   simpl; congruence.
 
 - (* Mload *)
@@ -606,7 +573,7 @@ Opaque loadind.
   intros. simpl in TR. 
   exploit transl_store_correct; eauto. intros [rs2 [P Q]]. 
   exists rs2; split. eauto.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_undef_regs; eauto.  
   simpl; congruence.
 
 - (* Mcall *)
@@ -700,22 +667,26 @@ Opaque loadind.
   inv AT. monadInv H3. 
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H2); intro NOOV.
-  exploit external_call_mem_extends; eauto. eapply preg_vals; eauto.
+  exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_builtin. eauto. eauto.
   eapply find_instr_tail; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
+  eauto.
   econstructor; eauto.
   instantiate (2 := tf); instantiate (1 := x).
   unfold nextinstr_nf, nextinstr. rewrite Pregmap.gss.
-  simpl undef_regs. repeat rewrite Pregmap.gso; auto with asmgen. 
+  rewrite undef_regs_other. rewrite set_pregs_other_2. rewrite undef_regs_other_2.
   rewrite <- H0. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
-  apply agree_nextinstr_nf. eapply agree_set_undef_mreg; eauto. 
-  rewrite Pregmap.gss. auto. 
-  intros. Simplifs. 
+  rewrite preg_notin_charact. intros. auto with asmgen.
+  rewrite preg_notin_charact. intros. auto with asmgen.
+  auto with asmgen.
+  simpl; intros. intuition congruence.
+  apply agree_nextinstr_nf. eapply agree_set_mregs; auto.
+  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. 
   congruence.
 
 - (* Mannot *)
@@ -723,12 +694,12 @@ Opaque loadind.
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H3); intro NOOV.
   exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. 
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_annot. eauto. eauto.
   eapply find_instr_tail; eauto. eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   eapply match_states_intro with (ep := false); eauto with coqlib.
   unfold nextinstr. rewrite Pregmap.gss. 
@@ -762,7 +733,7 @@ Opaque loadind.
 (* simple jcc *)
   exists (Pjcc c1 lbl); exists k; exists rs'.
   split. eexact A.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_exten; eauto. 
   simpl. rewrite B. auto.
 (* jcc; jcc *)
   destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
@@ -771,13 +742,13 @@ Opaque loadind.
   (* first jcc jumps *)
   exists (Pjcc c1 lbl); exists (Pjcc c2 lbl :: k); exists rs'.
   split. eexact A.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_exten; eauto. 
   simpl. rewrite TC1. auto.
   (* second jcc jumps *)
   exists (Pjcc c2 lbl); exists k; exists (nextinstr rs').
   split. eapply exec_straight_trans. eexact A. 
   eapply exec_straight_one. simpl. rewrite TC1. auto. auto.
-  split. eapply agree_exten_temps; eauto.
+  split. eapply agree_exten; eauto.
   intros; Simplifs.
   simpl. rewrite eval_testcond_nextinstr. rewrite TC2.
   destruct b2; auto || discriminate.
@@ -787,7 +758,7 @@ Opaque loadind.
   destruct (andb_prop _ _ H3). subst. 
   exists (Pjcc2 c1 c2 lbl); exists k; exists rs'.
   split. eexact A.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_exten; eauto. 
   simpl. rewrite TC1; rewrite TC2; auto.
 
 - (* Mcond false *)
@@ -801,7 +772,7 @@ Opaque loadind.
   econstructor; split.
   eapply exec_straight_trans. eexact A. 
   apply exec_straight_one. simpl. rewrite B. eauto. auto. 
-  split. apply agree_nextinstr. eapply agree_exten_temps; eauto.
+  split. apply agree_nextinstr. eapply agree_exten; eauto.
   simpl; congruence.
 (* jcc ; jcc *)
   destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
@@ -811,7 +782,7 @@ Opaque loadind.
   eapply exec_straight_trans. eexact A. 
   eapply exec_straight_two. simpl. rewrite TC1. eauto. auto. 
   simpl. rewrite eval_testcond_nextinstr. rewrite TC2. eauto. auto. auto.
-  split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten_temps; eauto.
+  split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten; eauto.
   simpl; congruence.
 (* jcc2 *)
   destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
@@ -822,7 +793,7 @@ Opaque loadind.
   rewrite TC1; rewrite TC2. 
   destruct b1. simpl in *. subst b2. auto. auto.
   auto.
-  split. apply agree_nextinstr. eapply agree_exten_temps; eauto.
+  split. apply agree_nextinstr. eapply agree_exten; eauto.
   rewrite H1; congruence.
 
 - (* Mjumptable *)
@@ -830,8 +801,7 @@ Opaque loadind.
   inv AT. monadInv H6. 
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H5); intro NOOV.
-  exploit find_label_goto_label. eauto. eauto. instantiate (2 := rs0#ECX <- Vundef #EDX <- Vundef). 
-  repeat (rewrite Pregmap.gso by auto with asmgen). eauto. eauto. 
+  exploit find_label_goto_label; eauto. 
   intros [tc' [rs' [A [B C]]]].
   exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
   left; econstructor; split.
@@ -839,7 +809,8 @@ Opaque loadind.
   eapply find_instr_tail; eauto. 
   simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eauto.
   econstructor; eauto. 
-  eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simplifs. 
+Transparent destroyed_by_jumptable. 
+  simpl. eapply agree_exten; eauto. intros. rewrite C; auto with asmgen.
   congruence.
 
 - (* Mreturn *)
@@ -890,8 +861,9 @@ Opaque loadind.
   subst x. unfold fn_code. eapply code_tail_next_int. rewrite list_length_z_cons. omega. 
   constructor. 
   apply agree_nextinstr. eapply agree_change_sp; eauto.
-  apply agree_exten_temps with rs0; eauto.
-  intros; Simplifs.
+Transparent destroyed_at_function_entry.
+  apply agree_undef_regs with rs0; eauto.
+  simpl; intros. apply Pregmap.gso; auto with asmgen. tauto. 
   congruence. 
   intros. Simplifs. eapply agree_sp; eauto.
 
@@ -900,17 +872,15 @@ Opaque loadind.
   intros [tf [A B]]. simpl in B. inv B.
   exploit extcall_arguments_match; eauto. 
   intros [args' [C D]].
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [res' [m2' [P [Q [R S]]]]].
   left; econstructor; split.
   apply plus_one. eapply exec_step_external; eauto. 
-  eapply external_call_symbols_preserved; eauto. 
+  eapply external_call_symbols_preserved'; eauto. 
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
   unfold loc_external_result.
-  eapply agree_set_mreg; eauto. 
-  rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. 
-  intros; Simplifs.
+  apply agree_set_other; auto. apply agree_set_mregs; auto.
 
 - (* return *)
   inv STACKS. simpl in *.
@@ -942,10 +912,9 @@ Lemma transf_final_states:
   forall st1 st2 r, 
   match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv STACKS. constructor.
-  auto. 
-  compute in H1. 
-  generalize (preg_val _ _ _ AX AG). rewrite H1. intros LD; inv LD. auto.
+  intros. inv H0. inv H. constructor. auto. 
+  compute in H1. inv H1.  
+  generalize (preg_val _ _ _ AX AG). rewrite H2. intros LD; inv LD. auto.
 Qed.
 
 Theorem transf_program_correct:
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index e3e62cc..303337e 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -37,10 +37,11 @@ Lemma agree_nextinstr_nf:
   agree ms sp rs -> agree ms sp (nextinstr_nf rs).
 Proof.
   intros. unfold nextinstr_nf. apply agree_nextinstr. 
-  apply agree_undef_regs. auto.
+  apply agree_undef_nondata_regs. auto.
   intro. simpl. ElimOrEq; auto. 
 Qed.
 
+(*
 Lemma agree_undef_move:
   forall ms sp rs rs',
   agree ms sp rs ->
@@ -71,6 +72,7 @@ Proof.
   congruence. auto. 
   intros. rewrite Pregmap.gso; auto. 
 Qed.
+*)
 
 (** Useful properties of the PC register. *)
 
@@ -95,13 +97,6 @@ Proof.
   intros. apply nextinstr_nf_inv. destruct r; auto || discriminate.
 Qed.
 
-Lemma nextinstr_nf_inv2:
-  forall r rs, 
-  nontemp_preg r = true -> (nextinstr_nf rs)#r = rs#r.
-Proof.
-  intros. apply nextinstr_nf_inv1; auto with asmgen.
-Qed.
-
 Lemma nextinstr_nf_set_preg:
   forall rs m v,
   (nextinstr_nf (rs#(preg_of m) <- v))#PC = Val.add rs#PC Vone.
@@ -166,180 +161,7 @@ Proof.
   split. Simplifs. intros; Simplifs.
 Qed.
 
-(** Smart constructor for shifts *)
-
-Lemma mk_shift_correct:
-  forall sinstr ssem r1 r2 k c rs1 m,
-  mk_shift sinstr r1 r2 k = OK c ->
-  (forall r c rs m,
-   exec_instr ge c (sinstr r) rs m = Next (nextinstr_nf (rs#r <- (ssem rs#r rs#ECX))) m) ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = ssem rs1#r1 rs1#r2
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
-Proof.
-  unfold mk_shift; intros. 
-  destruct (ireg_eq r2 ECX).
-(* fast case *)
-  monadInv H.
-  econstructor. split. apply exec_straight_one. apply H0. auto. 
-  split. Simplifs. intros; Simplifs.
-(* xchg case *)
-  destruct (ireg_eq r1 ECX); monadInv H. 
-  econstructor. split. eapply exec_straight_three.
-  simpl; eauto. 
-  apply H0.
-  simpl; eauto.
-  auto. auto. auto. 
-  split. Simplifs. f_equal. Simplifs.
-  intros; Simplifs. destruct (preg_eq r r2). subst r. Simplifs. Simplifs.
-(* general case *)
-  econstructor. split. eapply exec_straight_two. simpl; eauto. apply H0. 
-  auto. auto. 
-  split. Simplifs. f_equal. Simplifs. intros. Simplifs. 
-Qed.
-
-(** Parallel move 2 *)
-
-Lemma mk_mov2_correct:
-  forall src1 dst1 src2 dst2 k rs m,
-  dst1 <> dst2 ->
-  exists rs',
-     exec_straight ge fn (mk_mov2 src1 dst1 src2 dst2 k) rs m k rs' m
-  /\ rs'#dst1 = rs#src1
-  /\ rs'#dst2 = rs#src2
-  /\ forall r, r <> PC -> r <> dst1 -> r <> dst2 -> rs'#r = rs#r.
-Proof.
-  intros. unfold mk_mov2. 
-(* single moves *)
-  destruct (ireg_eq src1 dst1). subst.
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-  destruct (ireg_eq src2 dst2). subst.
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-(* xchg *)
-  destruct (ireg_eq src2 dst1). destruct (ireg_eq src1 dst2).
-  subst. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-(* move 2; move 1 *)
-  subst. econstructor; split. eapply exec_straight_two.
-  simpl; eauto. simpl; eauto. auto. auto.
-  intuition Simplifs.
-(* move 1; move 2*)
-  subst. econstructor; split. eapply exec_straight_two.
-  simpl; eauto. simpl; eauto. auto. auto.
-  intuition Simplifs.
-Qed.
-
-(** Smart constructor for division *)
-
-Lemma mk_div_correct:
-  forall mkinstr dsem msem r1 r2 k c (rs1: regset) m vq vr,
-  mk_div mkinstr r1 r2 k = OK c ->
-  (forall r c rs m,
-   exec_instr ge c (mkinstr r) rs m =
-      let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r in
-      match dsem vn vd, msem vn vd with
-      | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m
-      | _, _ => Stuck
-      end) ->
-  dsem rs1#r1 rs1#r2 = Some vq ->
-  msem rs1#r1 rs1#r2 = Some vr ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = vq
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
-Proof.
-  unfold mk_div; intros. 
-  destruct (ireg_eq r1 EAX). destruct (ireg_eq r2 EDX); monadInv H.
-(* r1=EAX r2=EDX *)
-  econstructor. split. eapply exec_straight_two. simpl; eauto. 
-  rewrite H0.
-  change (nextinstr rs1 # ECX <- (rs1 EDX) EAX) with (rs1#EAX). 
-  change ((nextinstr rs1 # ECX <- (rs1 EDX)) # EDX <- Vundef ECX) with (rs1#EDX).
-  rewrite H1. rewrite H2. eauto. auto. auto.
-  intuition Simplifs.
-(* r1=EAX r2<>EDX *)
-  econstructor. split. eapply exec_straight_one. rewrite H0. 
-  replace (rs1 # EDX <- Vundef r2) with (rs1 r2). rewrite H1; rewrite H2. eauto. 
-  symmetry. Simplifs. auto. 
-  intuition Simplifs.
-(* r1 <> EAX *)
-  monadInv H.
-  set (rs2 := nextinstr (rs1#XMM7 <- (rs1#EAX))).
-  exploit (mk_mov2_correct r1 EAX r2 ECX). congruence. instantiate (1 := rs2). 
-  intros [rs3 [A [B [C D]]]].
-  econstructor; split.
-  apply exec_straight_step with rs2 m; auto.
-  eapply exec_straight_trans. eexact A. 
-  eapply exec_straight_three.
-  rewrite H0. replace (rs3 EAX) with (rs1 r1). replace (rs3 # EDX <- Vundef ECX) with (rs1 r2).
-  rewrite H1; rewrite H2. eauto.  
-  simpl; eauto. simpl; eauto.
-  auto. auto. auto. 
-  split. Simplifs. 
-  intros. destruct (preg_eq r EAX). subst.
-  Simplifs. rewrite D; auto with asmgen. 
-  Simplifs. rewrite D; auto with asmgen. unfold rs2; Simplifs.
-Qed.
-
-(** Smart constructor for modulus *)
-
-Lemma mk_mod_correct:
- forall mkinstr dsem msem r1 r2 k c (rs1: regset) m vq vr,
-  mk_mod mkinstr r1 r2 k = OK c ->
-  (forall r c rs m,
-   exec_instr ge c (mkinstr r) rs m =
-      let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r in
-      match dsem vn vd, msem vn vd with
-      | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m
-      | _, _ => Stuck
-      end) ->
-  dsem rs1#r1 rs1#r2 = Some vq ->
-  msem rs1#r1 rs1#r2 = Some vr ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = vr
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
-Proof.
-  unfold mk_mod; intros. 
-  destruct (ireg_eq r1 EAX). destruct (ireg_eq r2 EDX); monadInv H.
-(* r1=EAX r2=EDX *)
-  econstructor. split. eapply exec_straight_three.
-  simpl; eauto.
-  rewrite H0.
-  change (nextinstr rs1 # ECX <- (rs1 EDX) EAX) with (rs1#EAX). 
-  change ((nextinstr rs1 # ECX <- (rs1 EDX)) # EDX <- Vundef ECX) with (rs1#EDX).
-  rewrite H1. rewrite H2. eauto.
-  simpl; eauto.
-  auto. auto. auto. 
-  intuition Simplifs.
-(* r1=EAX r2<>EDX *)
-  econstructor. split. eapply exec_straight_two. rewrite H0. 
-  replace (rs1 # EDX <- Vundef r2) with (rs1 r2). rewrite H1; rewrite H2. eauto. 
-  symmetry. Simplifs.
-  simpl; eauto. 
-  auto. auto.
-  intuition Simplifs.
-(* r1 <> EAX *)
-  monadInv H.
-  set (rs2 := nextinstr (rs1#XMM7 <- (rs1#EAX))).
-  exploit (mk_mov2_correct r1 EAX r2 ECX). congruence. instantiate (1 := rs2). 
-  intros [rs3 [A [B [C D]]]].
-  econstructor; split.
-  apply exec_straight_step with rs2 m; auto.
-  eapply exec_straight_trans. eexact A. 
-  eapply exec_straight_three.
-  rewrite H0. replace (rs3 EAX) with (rs1 r1). replace (rs3 # EDX <- Vundef ECX) with (rs1 r2).
-  rewrite H1; rewrite H2. eauto.  
-  simpl; eauto. simpl; eauto.
-  auto. auto. auto. 
-  split. Simplifs. 
-  intros. destruct (preg_eq r EAX). subst.
-  Simplifs. rewrite D; auto with asmgen.
-  Simplifs. rewrite D; auto with asmgen. unfold rs2; Simplifs.
-Qed.
+(** Properties of division *)
 
 Remark divs_mods_exist:
   forall v1 v2,
@@ -368,46 +190,42 @@ Qed.
 (** Smart constructor for [shrx] *)
 
 Lemma mk_shrximm_correct:
-  forall r1 n k c (rs1: regset) v m,
-  mk_shrximm r1 n k = OK c ->
-  Val.shrx (rs1#r1) (Vint n) = Some v ->
+  forall n k c (rs1: regset) v m,
+  mk_shrximm n k = OK c ->
+  Val.shrx (rs1#EAX) (Vint n) = Some v ->
   exists rs2,
      exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = v
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
+  /\ rs2#EAX = v
+  /\ forall r, data_preg r = true -> r <> EAX -> r <> ECX -> rs2#r = rs1#r.
 Proof.
   unfold mk_shrximm; intros. inv H.
   exploit Val.shrx_shr; eauto. intros [x [y [A [B C]]]].
   inversion B; clear B; subst y; subst v; clear H0.
-  set (tmp := if ireg_eq r1 ECX then EDX else ECX).
-  assert (TMP1: tmp <> r1). unfold tmp; destruct (ireg_eq r1 ECX); congruence. 
-  assert (TMP2: nontemp_preg tmp = false). unfold tmp; destruct (ireg_eq r1 ECX); auto. 
   set (tnm1 := Int.sub (Int.shl Int.one n) Int.one).
   set (x' := Int.add x tnm1).
   set (rs2 := nextinstr (compare_ints (Vint x) (Vint Int.zero) rs1 m)).
-  set (rs3 := nextinstr (rs2#tmp <- (Vint x'))).
-  set (rs4 := nextinstr (if Int.lt x Int.zero then rs3#r1 <- (Vint x') else rs3)).
-  set (rs5 := nextinstr_nf (rs4#r1 <- (Val.shr rs4#r1 (Vint n)))).
-  assert (rs3#r1 = Vint x). unfold rs3. Simplifs. 
-  assert (rs3#tmp = Vint x'). unfold rs3. Simplifs. 
+  set (rs3 := nextinstr (rs2#ECX <- (Vint x'))).
+  set (rs4 := nextinstr (if Int.lt x Int.zero then rs3#EAX <- (Vint x') else rs3)).
+  set (rs5 := nextinstr_nf (rs4#EAX <- (Val.shr rs4#EAX (Vint n)))).
+  assert (rs3#EAX = Vint x). unfold rs3. Simplifs.
+  assert (rs3#ECX = Vint x'). unfold rs3. Simplifs.
   exists rs5. split. 
   apply exec_straight_step with rs2 m. simpl. rewrite A. simpl. rewrite Int.and_idem. auto. auto.
   apply exec_straight_step with rs3 m. simpl. 
-  change (rs2 r1) with (rs1 r1). rewrite A. simpl. 
+  change (rs2 EAX) with (rs1 EAX). rewrite A. simpl. 
   rewrite (Int.add_commut Int.zero tnm1). rewrite Int.add_zero. auto. auto.
   apply exec_straight_step with rs4 m. simpl. 
   change (rs3 SOF) with (rs2 SOF). unfold rs2. rewrite nextinstr_inv; auto with asmgen. 
   unfold compare_ints. rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss.
-  unfold Val.cmp. simpl. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto. rewrite H0; auto.
+  unfold Val.cmp. simpl. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto.
   unfold rs4. destruct (Int.lt x Int.zero); simpl; auto.
   apply exec_straight_one. auto. auto.
-  split. unfold rs5. Simplifs. unfold rs4. Simplifs.
-  f_equal. destruct (Int.lt x Int.zero).
-  Simplifs. rewrite A. auto. Simplifs. congruence. 
-  intros. unfold rs5; Simplifs. unfold rs4; Simplifs. 
-  transitivity (rs3#r).
-  destruct (Int.lt x Int.zero). Simplifs. auto. 
-  unfold rs3; Simplifs. unfold rs2; Simplifs. unfold compare_ints; Simplifs.
+  split. unfold rs5. Simplifs. unfold rs4. rewrite nextinstr_inv; auto with asmgen.
+  destruct (Int.lt x Int.zero). rewrite Pregmap.gss. rewrite A; auto. rewrite A; rewrite H; auto.
+  intros. unfold rs5. Simplifs. unfold rs4. Simplifs. 
+  transitivity (rs3#r). destruct (Int.lt x Int.zero). Simplifs. auto. 
+  unfold rs3. Simplifs. unfold rs2. Simplifs.  
+  unfold compare_ints. Simplifs.  
 Qed.
 
 (** Smart constructor for integer conversions *)
@@ -420,14 +238,14 @@ Lemma mk_intconv_correct:
   exists rs2,
      exec_straight ge fn c rs1 m k rs2 m
   /\ rs2#rd = sem rs1#rs
-  /\ forall r, nontemp_preg r = true -> r <> rd -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> rd -> r <> EAX -> rs2#r = rs1#r.
 Proof.
   unfold mk_intconv; intros. destruct (low_ireg rs); monadInv H.
   econstructor. split. apply exec_straight_one. rewrite H0. eauto. auto. 
-  intuition Simplifs.
+  split. Simplifs. intros. Simplifs.
   econstructor. split. eapply exec_straight_two. 
-  simpl. eauto. apply H0. auto. auto.
-  intuition Simplifs.
+  simpl. eauto. apply H0. auto. auto. 
+  split. Simplifs. intros. Simplifs.  
 Qed.
 
 (** Smart constructor for small stores *)
@@ -449,10 +267,10 @@ Lemma mk_smallstore_correct:
   mk_smallstore sto addr r k = OK c ->
   Mem.storev chunk m1 (eval_addrmode ge addr rs1) (rs1 r) = Some m2 ->
   (forall c r addr rs m,
-   exec_instr ge c (sto addr r) rs m = exec_store ge chunk m addr rs r) ->
+   exec_instr ge c (sto addr r) rs m = exec_store ge chunk m addr rs r nil) ->
   exists rs2,
      exec_straight ge fn c rs1 m1 k rs2 m2
-  /\ forall r, nontemp_preg r = true -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> rs2#r = rs1#r.
 Proof.
   unfold mk_smallstore; intros. 
   remember (low_ireg r) as low. destruct low.
@@ -461,17 +279,17 @@ Proof.
   unfold exec_store. rewrite H0. eauto. auto.
   intros; Simplifs.
 (* high reg *)
-   remember (addressing_mentions addr ECX) as mentions. destruct mentions; monadInv H.
-(* ECX is mentioned. *)
+   remember (addressing_mentions addr EAX) as mentions. destruct mentions; monadInv H.
+(* EAX is mentioned. *)
   assert (r <> ECX). red; intros; subst r; discriminate. 
   set (rs2 := nextinstr (rs1#ECX <- (eval_addrmode ge addr rs1))).
-  set (rs3 := nextinstr (rs2#EDX <- (rs1 r))).
+  set (rs3 := nextinstr (rs2#EAX <- (rs1 r))).
   econstructor; split.
   apply exec_straight_three with rs2 m1 rs3 m1. 
   simpl. auto. 
   simpl. replace (rs2 r) with (rs1 r). auto. symmetry. unfold rs2; Simplifs. 
   rewrite H1. unfold exec_store. simpl. rewrite Int.add_zero. 
-  change (rs3 EDX) with (rs1 r).
+  change (rs3 EAX) with (rs1 r).
   change (rs3 ECX) with (eval_addrmode ge addr rs1).
   replace (Val.add (eval_addrmode ge addr rs1) (Vint Int.zero))
      with (eval_addrmode ge addr rs1).
@@ -479,18 +297,18 @@ Proof.
   destruct (eval_addrmode ge addr rs1); simpl in H0; try discriminate.
   simpl. rewrite Int.add_zero; auto. 
   auto. auto. auto. 
-  intros. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. 
-(* ECX is not mentioned *)
-  set (rs2 := nextinstr (rs1#ECX <- (rs1 r))).
+  intros. destruct H3. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. 
+(* EAX is not mentioned *)
+  set (rs2 := nextinstr (rs1#EAX <- (rs1 r))).
   econstructor; split.
   apply exec_straight_two with rs2 m1.
   simpl. auto.
   rewrite H1. unfold exec_store. 
-  rewrite (addressing_mentions_correct addr ECX rs2 rs1); auto.
-  change (rs2 ECX) with (rs1 r). rewrite H0. eauto. 
+  rewrite (addressing_mentions_correct addr EAX rs2 rs1); auto.
+  change (rs2 EAX) with (rs1 r). rewrite H0. eauto. 
   intros. unfold rs2; Simplifs.
   auto. auto.
-  intros. rewrite dec_eq_false. Simplifs. unfold rs2; Simplifs. congruence.
+  intros. destruct H2. simpl. Simplifs. unfold rs2; Simplifs.
 Qed.
 
 (** Accessing slots in the stack frame *)
@@ -521,6 +339,8 @@ Proof.
   unfold exec_load. rewrite H1; rewrite H0; auto. 
   unfold exec_load. rewrite H1; rewrite H0; auto.
   intuition Simplifs.
+  (* long *)
+  inv H.
 Qed.
 
 Lemma storeind_correct:
@@ -549,7 +369,9 @@ Proof.
   intros. apply nextinstr_nf_inv1; auto.
   econstructor; split. apply exec_straight_one.
   simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto.
-  intros. Simplifs. rewrite dec_eq_true. Simplifs. 
+  intros. simpl. Simplifs.
+  (* long *)
+  inv H.
 Qed.
 
 (** Translation of addressing modes *)
@@ -608,7 +430,7 @@ Lemma compare_ints_spec:
      rs'#ZF = Val.cmpu (Mem.valid_pointer m) Ceq v1 v2
   /\ rs'#CF = Val.cmpu (Mem.valid_pointer m) Clt v1 v2
   /\ rs'#SOF = Val.cmp Clt v1 v2
-  /\ (forall r, nontemp_preg r = true -> rs'#r = rs#r).
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
 Proof.
   intros. unfold rs'; unfold compare_ints.
   split. auto. 
@@ -737,7 +559,7 @@ Lemma compare_floats_spec:
      rs'#ZF = Val.of_bool (negb (Float.cmp Cne n1 n2))
   /\ rs'#CF = Val.of_bool (negb (Float.cmp Cge n1 n2))
   /\ rs'#PF = Val.of_bool (negb (Float.cmp Ceq n1 n2 || Float.cmp Clt n1 n2 || Float.cmp Cgt n1 n2))
-  /\ (forall r, nontemp_preg r = true -> rs'#r = rs#r).
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
 Proof.
   intros. unfold rs'; unfold compare_floats.
   split. auto. 
@@ -890,7 +712,7 @@ Lemma transl_cond_correct:
      | None => True
      | Some b => eval_extcond (testcond_for_condition cond) rs' = Some b
      end
-  /\ forall r, nontemp_preg r = true -> rs'#r = rs r.
+  /\ forall r, data_preg r = true -> rs'#r = rs r.
 Proof.
   unfold transl_cond; intros. 
   destruct cond; repeat (destruct args; try discriminate); monadInv H.
@@ -968,19 +790,19 @@ Proof.
   intros. unfold eval_testcond. repeat rewrite Pregmap.gso; auto with asmgen.
 Qed.
 
-Lemma mk_setcc_correct:
+Lemma mk_setcc_base_correct:
   forall cond rd k rs1 m,
   exists rs2,
-  exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m
+  exec_straight ge fn (mk_setcc_base cond rd k) rs1 m k rs2 m
   /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1)
-  /\ forall r, nontemp_preg r = true -> r <> rd -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> r <> rd -> rs2#r = rs1#r.
 Proof.
   intros. destruct cond; simpl in *.
-(* base *)
+- (* base *)
   econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-(* or *)
+  apply exec_straight_one. simpl; eauto. auto. 
+  split. Simplifs. intros; Simplifs. 
+- (* or *)
   assert (Val.of_optbool
     match eval_testcond c1 rs1 with
     | Some b1 =>
@@ -996,7 +818,7 @@ Proof.
   destruct b; auto.
   auto.
   rewrite H; clear H.
-  destruct (ireg_eq rd EDX).
+  destruct (ireg_eq rd EAX).
   subst rd. econstructor; split.
   eapply exec_straight_three.
   simpl; eauto.
@@ -1010,9 +832,9 @@ Proof.
   simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto. 
   simpl. eauto. 
   auto. auto. auto.
-  split. Simplifs. rewrite Val.or_commut. f_equal; Simplifs. 
-  intros. Simplifs.
-(* and *)
+  split. Simplifs. rewrite Val.or_commut. decEq; Simplifs.
+  intros. destruct H0; Simplifs. 
+- (* and *)
   assert (Val.of_optbool
     match eval_testcond c1 rs1 with
     | Some b1 =>
@@ -1023,12 +845,14 @@ Proof.
     | None => None
     end =
     Val.and (Val.of_optbool (eval_testcond c1 rs1)) (Val.of_optbool (eval_testcond c2 rs1))).
-  destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1). 
-  destruct b; destruct b0; auto.
-  destruct b; auto.
-  auto.
+  {
+    destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1). 
+    destruct b; destruct b0; auto.
+    destruct b; auto.
+    auto.
+  }
   rewrite H; clear H.
-  destruct (ireg_eq rd EDX).
+  destruct (ireg_eq rd EAX).
   subst rd. econstructor; split.
   eapply exec_straight_three.
   simpl; eauto.
@@ -1042,10 +866,25 @@ Proof.
   simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto. 
   simpl. eauto. 
   auto. auto. auto.
-  split. Simplifs. rewrite Val.and_commut. f_equal; Simplifs. 
-  intros. Simplifs.
+  split. Simplifs. rewrite Val.and_commut. decEq; Simplifs.
+  intros. destruct H0; Simplifs. 
 Qed.
 
+Lemma mk_setcc_correct:
+  forall cond rd k rs1 m,
+  exists rs2,
+  exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m
+  /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1)
+  /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> r <> rd -> rs2#r = rs1#r.
+Proof.
+  intros. unfold mk_setcc. destruct (low_ireg rd).
+- apply mk_setcc_base_correct.
+- exploit mk_setcc_base_correct. intros [rs2 [A [B C]]].
+  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+    simpl. eauto. simpl. auto.
+  intuition Simplifs. 
+Qed. 
+
 (** Translation of arithmetic operations. *)
 
 Ltac ArgsInv :=
@@ -1053,7 +892,8 @@ Ltac ArgsInv :=
   | [ H: Error _ = OK _ |- _ ] => discriminate
   | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args; ArgsInv
   | [ H: bind _ _ = OK _ |- _ ] => monadInv H; ArgsInv
-  | [ H: assertion _ = OK _ |- _ ] => monadInv H; subst; ArgsInv
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
   | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *; clear H; ArgsInv
   | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *; clear H; ArgsInv
   | _ => idtac
@@ -1071,25 +911,22 @@ Lemma transl_op_correct:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, 
-     match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs' r = rs r.
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
 Proof.
+Transparent destroyed_by_op.
   intros until v; intros TR EV.
   assert (SAME:
   (exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of res) = v
-  /\ forall r, 
-     match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs' r = rs r) ->
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r) ->
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, 
-     match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs' r = rs r).
-  intros [rs' [A [B C]]]. subst v. exists rs'; auto. 
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r).
+  {
+    intros [rs' [A [B C]]]. subst v. exists rs'; auto.
+  } 
 
   destruct op; simpl in TR; ArgsInv; simpl in EV; try (inv EV); try (apply SAME; TranslOp; fail).
 (* move *)
@@ -1109,32 +946,34 @@ Proof.
   apply SAME. eapply mk_intconv_correct; eauto.
 (* div *)
   apply SAME.
-  specialize (divs_mods_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.mods (rs x0) (rs x1)) as [vr|] eqn:?; intros; try contradiction.
-  eapply mk_div_correct with (dsem := Val.divs) (msem := Val.mods); eauto.
+  specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.mods (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* divu *)
   apply SAME.
-  specialize (divu_modu_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.modu (rs x0) (rs x1)) as [vr|] eqn:?; intros; try contradiction.
-  eapply mk_div_correct with (dsem := Val.divu) (msem := Val.modu); eauto.
+  specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.modu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* mod *)
   apply SAME.
-  specialize (divs_mods_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.divs (rs x0) (rs x1)) as [vq|] eqn:?; intros; try contradiction.
-  eapply mk_mod_correct with (dsem := Val.divs) (msem := Val.mods); eauto.
+  specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.divs (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* modu *)
   apply SAME.
-  specialize (divu_modu_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.divu (rs x0) (rs x1)) as [vq|] eqn:?; intros; try contradiction.
-  eapply mk_mod_correct with (dsem := Val.divu) (msem := Val.modu); eauto.
-(* shl *)
-  apply SAME. eapply mk_shift_correct; eauto. 
-(* shr *)
-  apply SAME. eapply mk_shift_correct; eauto. 
+  specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.divu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* shrximm *)
   apply SAME. eapply mk_shrximm_correct; eauto.
-(* shru *)
-  apply SAME. eapply mk_shift_correct; eauto.
 (* lea *)
   exploit transl_addressing_mode_correct; eauto. intros EA.
   TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss; auto.
@@ -1163,7 +1002,7 @@ Lemma transl_load_correct:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of dest) = v
-  /\ forall r, nontemp_preg r = true -> r <> preg_of dest -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> r <> preg_of dest -> rs'#r = rs#r.
 Proof.
   unfold transl_load; intros. monadInv H. 
   exploit transl_addressing_mode_correct; eauto. intro EA.
@@ -1191,7 +1030,7 @@ Lemma transl_store_correct:
   Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, nontemp_preg r = true -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_store chunk addr) -> rs'#r = rs#r.
 Proof.
   unfold transl_store; intros. monadInv H. 
   exploit transl_addressing_mode_correct; eauto. intro EA.
@@ -1223,7 +1062,7 @@ Proof.
 (* float32 *)
   econstructor; split.
   apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Simplifs.
+  intros. Transparent destroyed_by_store. simpl in H2. simpl. Simplifs.
 (* float64 *)
   econstructor; split.
   apply exec_straight_one. simpl. unfold exec_store. erewrite Mem.storev_float64al32; eauto. auto.
diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp
index e6ba98a..fea0afd 100644
--- a/ia32/ConstpropOp.vp
+++ b/ia32/ConstpropOp.vp
@@ -31,6 +31,7 @@ Inductive approx : Type :=
                              no compile-time information is available. *)
   | I: int -> approx     (** A known integer value. *)
   | F: float -> approx   (** A known floating-point value. *)
+  | L: int64 -> approx   (** A know 64-bit integer value. *)
   | G: ident -> int -> approx
                          (** The value is the address of the given global
                              symbol plus the given integer offset. *)
@@ -130,6 +131,11 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Oshru, I n1 :: I n2 :: nil => if Int.ltu n2 Int.iwordsize then I(Int.shru n1 n2) else Unknown
   | Oshruimm n, I n1 :: nil => if Int.ltu n Int.iwordsize then I(Int.shru n1 n) else Unknown
   | Ororimm n, I n1 :: nil => if Int.ltu n Int.iwordsize then I(Int.ror n1 n) else Unknown
+  | Oshldimm n, I n1 :: I n2 :: nil =>
+      let n' := Int.sub Int.iwordsize n in
+      if Int.ltu n Int.iwordsize && Int.ltu n' Int.iwordsize
+      then I(Int.or (Int.shl n1 n) (Int.shru n2 n')) 
+      else Unknown
   | Olea mode, vl => eval_static_addressing mode vl
   | Onegf, F n1 :: nil => F(Float.neg n1)
   | Oabsf, F n1 :: nil => F(Float.abs n1)
@@ -140,6 +146,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1)
   | Ointoffloat, F n1 :: nil => eval_static_intoffloat n1
   | Ofloatofint, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofint n1) else Unknown
+  | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2)
+  | Olowlong, L n :: nil => I(Int64.loword n)
+  | Ohighlong, L n :: nil => I(Int64.hiword n)
   | Ocmp c, vl => eval_static_condition_val c vl
   | _, _ => Unknown
   end.
diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v
index d792d8a..b6c3cdc 100644
--- a/ia32/ConstpropOpproof.v
+++ b/ia32/ConstpropOpproof.v
@@ -44,9 +44,10 @@ Definition val_match_approx (a: approx) (v: val) : Prop :=
   | Unknown => True
   | I p => v = Vint p
   | F p => v = Vfloat p
+  | L p => v = Vlong p
   | G symb ofs => v = symbol_address ge symb ofs
   | S ofs => v = Val.add sp (Vint ofs)
-  | _ => False
+  | Novalue => False
   end.
 
 Inductive val_list_match_approx: list approx -> list val -> Prop :=
@@ -64,6 +65,8 @@ Ltac SimplVMA :=
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (F _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
+  | H: (val_match_approx (L _) ?v) |- _ =>
+      simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (G _ _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (S _) ?v) |- _ =>
@@ -156,6 +159,8 @@ Proof.
   destruct (Int.ltu n2 Int.iwordsize); simpl; auto.
   destruct (Int.ltu n Int.iwordsize); simpl; auto.
   destruct (Int.ltu n Int.iwordsize); simpl; auto.
+  destruct (Int.ltu n Int.iwordsize);
+  destruct (Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize); simpl; auto.
   eapply eval_static_addressing_correct; eauto.
   unfold eval_static_intoffloat.
   destruct (Float.intoffloat n1) eqn:?; simpl in H0; inv H0.
diff --git a/ia32/Machregs.v b/ia32/Machregs.v
index df96393..3b84aa5 100644
--- a/ia32/Machregs.v
+++ b/ia32/Machregs.v
@@ -10,34 +10,32 @@
 (*                                                                     *)
 (* *********************************************************************)
 
+Require Import String.
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
+Require Import Integers.
+Require Import Op.
 
 (** ** Machine registers *)
 
 (** The following type defines the machine registers that can be referenced
   as locations.  These include:
-- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
-- Floating-point registers that can be allocated to RTL pseudo-registers 
-  ([Fxx]).
-- Two integer registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([IT1, IT2]).
-- Two float registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([FT1, FT2]).
+- Integer registers that can be allocated to RTL pseudo-registers.
+- Floating-point registers that can be allocated to RTL pseudo-registers.
+- The special [FP0] register denoting the top of the X87 float stack.
 
   The type [mreg] does not include special-purpose or reserved
   machine registers such as the stack pointer and the condition codes. *)
 
 Inductive mreg: Type :=
   (** Allocatable integer regs *)
-  | AX: mreg | BX: mreg | SI: mreg | DI: mreg | BP: mreg
+  | AX: mreg | BX: mreg | CX: mreg | DX: mreg | SI: mreg | DI: mreg | BP: mreg
   (** Allocatable float regs *)
-  | X0: mreg | X1: mreg | X2: mreg | X3: mreg | X4: mreg | X5: mreg
-  (** Integer temporaries *)
-  | IT1: mreg (* DX *) | IT2: mreg (* CX *)
-  (** Float temporaries *)
-  | FT1: mreg (* X6 *) | FT2: mreg (* X7 *) | FP0: mreg (* top of FP stack *).
+  | X0: mreg | X1: mreg | X2: mreg | X3: mreg 
+  | X4: mreg | X5: mreg | X6: mreg | X7: mreg
+  (** Special float reg *)
+  | FP0: mreg (**r top of x87 FP stack *).
 
 Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
 Proof. decide equality. Defined.
@@ -45,28 +43,24 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | AX => Tint | BX => Tint | SI => Tint | DI => Tint | BP => Tint
-  (** Allocatable float regs *)
-  | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat
-  | X3 => Tfloat | X4 => Tfloat | X5 => Tfloat
-  (** Integer temporaries *)
-  | IT1 => Tint | IT2 => Tint
-  (** Float temporaries *)
-  | FT1 => Tfloat | FT2 => Tfloat | FP0 => Tfloat
+  | AX => Tint | BX => Tint | CX => Tint | DX => Tint
+  | SI => Tint | DI => Tint | BP => Tint
+  | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat | X3 => Tfloat
+  | X4 => Tfloat | X5 => Tfloat | X6 => Tfloat | X7 => Tfloat
+  | FP0 => Tfloat
   end.
 
-Open Scope positive_scope.
+Local Open Scope positive_scope.
 
 Module IndexedMreg <: INDEXED_TYPE.
   Definition t := mreg.
   Definition eq := mreg_eq.
   Definition index (r: mreg): positive :=
     match r with
-    | AX => 1 | BX => 2 | SI => 3 | DI => 4 | BP => 5
-    | X0 => 6 | X1 => 7 | X2 => 8
-    | X3 => 9 | X4 => 10 | X5 => 11
-    | IT1 => 12 | IT2 => 13
-    | FT1 => 14 | FT2 => 15 | FP0 => 16
+    | AX => 1 | BX => 2 | CX => 3 | DX => 4 | SI => 5 | DI => 6 | BP => 7
+    | X0 => 8 | X1 => 9 | X2 => 10 | X3 => 11
+    | X4 => 12 | X5 => 13 | X6 => 14 | X7 => 15
+    | FP0 => 16
     end.
   Lemma index_inj:
     forall r1 r2, index r1 = index r2 -> r1 = r2.
@@ -75,3 +69,147 @@ Module IndexedMreg <: INDEXED_TYPE.
   Qed.
 End IndexedMreg.
 
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg :=
+  match op with
+  | Omove => FP0 :: nil
+  | Ocast8signed | Ocast8unsigned | Ocast16signed | Ocast16unsigned => AX :: nil
+  | Odiv => AX :: DX :: nil
+  | Odivu => AX :: DX :: nil
+  | Omod => AX :: DX :: nil
+  | Omodu => AX :: DX :: nil
+  | Oshrximm _ => CX :: nil
+  | Ocmp _ => AX :: CX :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
+  nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
+  match chunk with
+  | Mint8signed | Mint8unsigned => AX :: CX :: nil
+  | Mint16signed | Mint16unsigned | Mint32 | Mint64 => nil
+  | Mfloat32 => X7 :: nil
+  | Mfloat64 | Mfloat64al32 => FP0 :: nil
+  end.
+
+Definition destroyed_by_cond (cond: condition): list mreg :=
+  nil.
+
+Definition destroyed_by_jumptable: list mreg :=
+  nil.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+  match ef with
+  | EF_memcpy sz al =>
+      if zle sz 32 then CX :: X7 :: nil else CX :: SI :: DI :: nil
+  | EF_vload _ => nil
+  | EF_vload_global _ _ _ => nil
+  | EF_vstore _ => AX :: CX :: X7 :: nil
+  | EF_vstore_global _ _ _ => AX :: X7 :: nil
+  | _ => AX :: CX :: X7 :: FP0 :: nil
+  end.
+
+Definition destroyed_at_function_entry: list mreg :=
+  DX :: FP0 :: nil.   (* must include destroyed_by_op Omove *)
+
+Definition temp_for_parent_frame: mreg :=
+  DX.
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
+  match op with
+  | Odiv => (Some AX :: Some CX :: nil, Some AX)
+  | Odivu => (Some AX :: Some CX :: nil, Some AX)
+  | Omod => (Some AX :: Some CX :: nil, Some DX)
+  | Omodu => (Some AX :: Some CX :: nil, Some DX)
+  | Oshl => (None :: Some CX :: nil, None)
+  | Oshr => (None :: Some CX :: nil, None)
+  | Oshru => (None :: Some CX :: nil, None)
+  | Oshrximm _ => (Some AX :: nil, Some AX)
+  | _ => (nil, None)
+  end.
+
+Local Open Scope string_scope.
+
+Definition builtin_negl := ident_of_string "__builtin_negl".
+Definition builtin_addl := ident_of_string "__builtin_addl".
+Definition builtin_subl := ident_of_string "__builtin_subl".
+Definition builtin_mull := ident_of_string "__builtin_mull".
+
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) :=
+  match ef with
+  | EF_memcpy sz al =>
+     if zle sz 32 then (Some AX :: Some DX :: nil, nil) else (Some DI :: Some SI :: nil, nil)
+  | EF_builtin id sg =>
+     if ident_eq id builtin_negl then
+       (Some DX :: Some AX :: nil, Some DX :: Some AX :: nil)
+     else if ident_eq id builtin_addl || ident_eq id builtin_subl then
+       (Some DX :: Some AX :: Some CX :: Some BX :: nil, Some DX :: Some AX :: nil)
+     else if ident_eq id builtin_mull then
+       (Some AX :: Some DX :: nil, Some DX :: Some AX :: nil)
+     else
+       (nil, nil)
+  | _ => (nil, nil)
+  end.
+
+Global Opaque
+    destroyed_by_op destroyed_by_load destroyed_by_store
+    destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+    destroyed_at_function_entry temp_for_parent_frame
+    mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations.  Return [true] if the first argument and
+  the result must be in the same location *and* are unconstrained
+  by [mregs_for_operation]. *)
+
+Definition two_address_op (op: operation) : bool :=
+  match op with
+  | Omove => false
+  | Ointconst _ => false
+  | Ofloatconst _ => false
+  | Oindirectsymbol _ => false
+  | Ocast8signed => false
+  | Ocast8unsigned => false
+  | Ocast16signed => false
+  | Ocast16unsigned => false
+  | Oneg => true
+  | Osub => true
+  | Omul => true
+  | Omulimm _ => true
+  | Odiv => false
+  | Odivu => false
+  | Omod => false
+  | Omodu => false
+  | Oand => true
+  | Oandimm _ => true
+  | Oor => true
+  | Oorimm _ => true
+  | Oxor => true
+  | Oxorimm _ => true
+  | Oshl => true
+  | Oshlimm _ => true
+  | Oshr => true
+  | Oshrimm _ => true
+  | Oshrximm _ => false
+  | Oshru => true
+  | Oshruimm _ => true
+  | Ororimm _ => true
+  | Oshldimm _ => true
+  | Olea addr => false
+  | Onegf => true
+  | Oabsf => true
+  | Oaddf => true
+  | Osubf => true
+  | Omulf => true
+  | Odivf => true
+  | Osingleoffloat => false
+  | Ointoffloat => false
+  | Ofloatofint => false
+  | Omakelong => false
+  | Olowlong => false
+  | Ohighlong => false
+  | Ocmp c => false
+  end.
+
diff --git a/ia32/Machregsaux.ml b/ia32/Machregsaux.ml
index 7d6df90..8403746 100644
--- a/ia32/Machregsaux.ml
+++ b/ia32/Machregsaux.ml
@@ -15,11 +15,11 @@
 open Machregs
 
 let register_names = [
-  ("AX", AX); ("BX", BX); ("SI", SI); ("DI", DI); ("BP", BP);
+  ("AX", AX); ("BX", BX); ("CX", CX); ("DX", DX);
+  ("SI", SI); ("DI", DI); ("BP", BP);
   ("XMM0", X0); ("XMM1", X1); ("XMM2", X2); ("XMM3", X3);
-  ("XMM4", X4); ("XMM5", X5);
-  ("DX", IT1); ("CX", IT2);
-  ("XMM6", FT1); ("XMM7", FT2); ("ST0", FP0)
+  ("XMM4", X4); ("XMM5", X5); ("XMM6", X6); ("XMM7", X7);
+  ("ST0", FP0)
 ]
 
 let name_of_register r =
diff --git a/ia32/Op.v b/ia32/Op.v
index a23e8ac..3dc1f77 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -96,6 +96,7 @@ Inductive operation : Type :=
   | Oshru: operation                    (**r [rd = r1 >> r2] (unsigned) *)
   | Oshruimm: int -> operation          (**r [rd = r1 >> n] (unsigned) *)
   | Ororimm: int -> operation           (**r rotate right immediate *)
+  | Oshldimm: int -> operation          (**r [rd = r1 << n | r2 >> (32-n)] *)
   | Olea: addressing -> operation       (**r effective address *)
 (*c Floating-point arithmetic: *)
   | Onegf: operation                    (**r [rd = - r1] *)
@@ -108,6 +109,10 @@ Inductive operation : Type :=
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
   | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
+(*c Manipulating 64-bit integers: *)
+  | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
+  | Olowlong: operation                 (**r [rd = low-word(r1)] *)
+  | Ohighlong: operation                (**r [rd = high-word(r1)] *)
 (*c Boolean tests: *)
   | Ocmp: condition -> operation.       (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
 
@@ -130,6 +135,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
   generalize Float.eq_dec; intro.
+  generalize Int64.eq_dec; intro.
   assert (forall (x y: ident), {x=y}+{x<>y}). exact peq.
   assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
   assert (forall (x y: condition), {x=y}+{x<>y}). decide equality.
@@ -222,6 +228,8 @@ Definition eval_operation
   | Oshru, v1::v2::nil => Some (Val.shru v1 v2)
   | Oshruimm n, v1::nil => Some (Val.shru v1 (Vint n))
   | Ororimm n, v1::nil => Some (Val.ror v1 (Vint n))
+  | Oshldimm n, v1::v2::nil => Some (Val.or (Val.shl v1 (Vint n))
+                                            (Val.shru v2 (Vint (Int.sub Int.iwordsize n))))
   | Olea addr, _ => eval_addressing genv sp addr vl
   | Onegf, v1::nil => Some(Val.negf v1)
   | Oabsf, v1::nil => Some(Val.absf v1)
@@ -232,6 +240,9 @@ Definition eval_operation
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
+  | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
+  | Olowlong, v1::nil => Some(Val.loword v1)
+  | Ohighlong, v1::nil => Some(Val.hiword v1)
   | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
   | _, _ => None
   end.
@@ -306,6 +317,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Oshru => (Tint :: Tint :: nil, Tint)
   | Oshruimm _ => (Tint :: nil, Tint)
   | Ororimm _ => (Tint :: nil, Tint)
+  | Oshldimm _ => (Tint :: Tint :: nil, Tint)
   | Olea addr => (type_of_addressing addr, Tint)
   | Onegf => (Tfloat :: nil, Tfloat)
   | Oabsf => (Tfloat :: nil, Tfloat)
@@ -316,6 +328,9 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osingleoffloat => (Tfloat :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
+  | Omakelong => (Tint :: Tint :: nil, Tlong)
+  | Olowlong => (Tlong :: nil, Tint)
+  | Ohighlong => (Tlong :: nil, Tint)
   | Ocmp c => (type_of_condition c, Tint)
   end.
 
@@ -386,6 +401,8 @@ Proof with (try exact I).
   destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)...
   destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)...
+  destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)...
+  destruct v1; simpl... destruct (Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize)...
   eapply type_of_addressing_sound; eauto.
   destruct v0...
   destruct v0...
@@ -396,6 +413,9 @@ Proof with (try exact I).
   destruct v0...
   destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
   destruct v0; simpl in H0; inv H0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
   destruct (eval_condition c vl m); simpl... destruct b... 
 Qed.
 
@@ -511,79 +531,41 @@ Proof.
   apply eval_shift_stack_addressing. 
 Qed.
 
-(** Transformation of addressing modes with two operands or more
-  into an equivalent arithmetic operation.  This is used in the [Reload]
-  pass when a store instruction cannot be reloaded directly because
-  it runs out of temporary registers. *)
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+  it designates the pointer [delta] bytes past the pointer designated
+  by [addr].  May be undefined, in which case [None] is returned. *)
 
-Definition op_for_binary_addressing (addr: addressing) : operation := Olea addr.
+Definition offset_addressing (addr: addressing) (delta: int) : option addressing :=
+  match addr with
+  | Aindexed n => Some(Aindexed (Int.add n delta))
+  | Aindexed2 n => Some(Aindexed2 (Int.add n delta))
+  | Ascaled sc n => Some(Ascaled sc (Int.add n delta))
+  | Aindexed2scaled sc n => Some(Aindexed2scaled sc (Int.add n delta))
+  | Aglobal s n => Some(Aglobal s (Int.add n delta))
+  | Abased s n => Some(Abased s (Int.add n delta))
+  | Abasedscaled sc s n => Some(Abasedscaled sc s (Int.add n delta))
+  | Ainstack n => Some(Ainstack (Int.add n delta))
+  end.
 
-Lemma eval_op_for_binary_addressing:
-  forall (F V: Type) (ge: Genv.t F V) sp addr args v m,
-  (length args >= 2)%nat ->
+Lemma eval_offset_addressing:
+  forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+  offset_addressing addr delta = Some addr' ->
   eval_addressing ge sp addr args = Some v ->
-  eval_operation ge sp (op_for_binary_addressing addr) args m = Some v.
-Proof.
-  intros. simpl. auto.
-Qed.
-
-Lemma type_op_for_binary_addressing:
-  forall addr,
-  (length (type_of_addressing addr) >= 2)%nat ->
-  type_of_operation (op_for_binary_addressing addr) = (type_of_addressing addr, Tint).
+  eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)).
 Proof.
-  intros. simpl. auto.
+  intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; inv H.
+  rewrite Val.add_assoc; auto.
+  rewrite !Val.add_assoc; auto.
+  rewrite !Val.add_assoc; auto.
+  rewrite !Val.add_assoc; auto.
+  unfold symbol_address. destruct (Genv.find_symbol ge i); auto. 
+  unfold symbol_address. destruct (Genv.find_symbol ge i); auto.
+  rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. 
+  unfold symbol_address. destruct (Genv.find_symbol ge i0); auto.
+  rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. 
+  rewrite Val.add_assoc. auto. 
 Qed.
 
-
-(** Two-address operations.  Return [true] if the first argument and
-  the result must be in the same location. *)
-
-Definition two_address_op (op: operation) : bool :=
-  match op with
-  | Omove => false
-  | Ointconst _ => false
-  | Ofloatconst _ => false
-  | Oindirectsymbol _ => false
-  | Ocast8signed => false
-  | Ocast8unsigned => false
-  | Ocast16signed => false
-  | Ocast16unsigned => false
-  | Oneg => true
-  | Osub => true
-  | Omul => true
-  | Omulimm _ => true
-  | Odiv => true
-  | Odivu => true
-  | Omod => true
-  | Omodu => true
-  | Oand => true
-  | Oandimm _ => true
-  | Oor => true
-  | Oorimm _ => true
-  | Oxor => true
-  | Oxorimm _ => true
-  | Oshl => true
-  | Oshlimm _ => true
-  | Oshr => true
-  | Oshrimm _ => true
-  | Oshrximm _ => true
-  | Oshru => true
-  | Oshruimm _ => true
-  | Ororimm _ => true
-  | Olea addr => false
-  | Onegf => true
-  | Oabsf => true
-  | Oaddf => true
-  | Osubf => true
-  | Omulf => true
-  | Odivf => true
-  | Osingleoffloat => false
-  | Ointoffloat => false
-  | Ofloatofint => false
-  | Ocmp c => false
-  end.
-
 (** Operations that are so cheap to recompute that CSE should not factor them out. *)
 
 Definition is_trivial_op (op: operation) : bool :=
@@ -774,6 +756,8 @@ Proof.
   eauto 3 using val_cmpu_bool_inject, Mem.valid_pointer_implies.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; try discriminate; inv H5; auto.
+  inv H3; try discriminate; inv H5; auto.
 Qed.
 
 Ltac TrivialExists :=
@@ -842,6 +826,8 @@ Proof.
   inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto.
   inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto.
+  inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto.
+  inv H2; simpl; auto. destruct (Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize); auto.
   eapply eval_addressing_inj; eauto. 
   inv H4; simpl; auto.
   inv H4; simpl; auto.
@@ -853,6 +839,9 @@ Proof.
   inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
   subst v1. destruct (eval_condition c vl1 m1) eqn:?.
   exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
   destruct b; simpl; constructor.
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 4768606..c2ea98f 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -61,7 +61,7 @@ let raw_symbol oc s =
   | ELF -> fprintf oc "%s" s
   | MacOS | Cygwin -> fprintf oc "_%s" s
 
-let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$"
+let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$"
 
 let symbol oc symb =
   let s = extern_atom symb in
@@ -239,9 +239,9 @@ let print_annot_val oc txt args res =
   fprintf oc "%s annotation: " comment;
   PrintAnnot.print_annot_val preg oc txt args;
   match args, res with
-  | IR src :: _, IR dst ->
+  | [IR src], [IR dst] ->
       if dst <> src then fprintf oc "	movl	%a, %a\n" ireg src ireg dst
-  | FR src :: _, FR dst ->
+  | [FR src], [FR dst] ->
       if dst <> src then fprintf oc "	movsd	%a, %a\n" freg src freg dst
   | _, _ -> assert false
 
@@ -251,98 +251,76 @@ let print_annot_val oc txt args res =
    memory accesses regardless of alignment. *)
 
 let print_builtin_memcpy_small oc sz al src dst =
-  let tmp =
-    if src <> ECX && dst <> ECX then ECX
-    else if src <> EDX && dst <> EDX then EDX
-    else EAX in
-  let need_tmp =
-    sz mod 4 <> 0 || not !Clflags.option_fsse in
+  assert (src = EDX && dst = EAX);
   let rec copy ofs sz =
     if sz >= 8 && !Clflags.option_fsse then begin
-      fprintf oc "	movq	%d(%a), %a\n" ofs ireg src freg XMM6;
-      fprintf oc "	movq	%a, %d(%a)\n" freg XMM6 ofs ireg dst;
+      fprintf oc "	movq	%d(%a), %a\n" ofs ireg src freg XMM7;
+      fprintf oc "	movq	%a, %d(%a)\n" freg XMM7 ofs ireg dst;
       copy (ofs + 8) (sz - 8)
     end else if sz >= 4 then begin
-      if !Clflags.option_fsse then begin
-        fprintf oc "	movd	%d(%a), %a\n" ofs ireg src freg XMM6;
-        fprintf oc "	movd	%a, %d(%a)\n" freg XMM6 ofs ireg dst
-      end else begin
-        fprintf oc "	movl	%d(%a), %a\n" ofs ireg src ireg tmp;
-        fprintf oc "	movl	%a, %d(%a)\n" ireg tmp ofs ireg dst
-      end;
+      fprintf oc "	movl	%d(%a), %a\n" ofs ireg src ireg ECX;
+      fprintf oc "	movl	%a, %d(%a)\n" ireg ECX ofs ireg dst;
       copy (ofs + 4) (sz - 4)
     end else if sz >= 2 then begin
-      fprintf oc "	movw	%d(%a), %a\n" ofs ireg src ireg16 tmp;
-      fprintf oc "	movw	%a, %d(%a)\n" ireg16 tmp ofs ireg dst;
+      fprintf oc "	movw	%d(%a), %a\n" ofs ireg src ireg16 ECX;
+      fprintf oc "	movw	%a, %d(%a)\n" ireg16 ECX ofs ireg dst;
       copy (ofs + 2) (sz - 2)
     end else if sz >= 1 then begin
-      fprintf oc "	movb	%d(%a), %a\n" ofs ireg src ireg8 tmp;
-      fprintf oc "	movb	%a, %d(%a)\n" ireg8 tmp ofs ireg dst;
+      fprintf oc "	movb	%d(%a), %a\n" ofs ireg src ireg8 ECX;
+      fprintf oc "	movb	%a, %d(%a)\n" ireg8 ECX ofs ireg dst;
       copy (ofs + 1) (sz - 1)
     end in
-  if need_tmp && tmp = EAX then
-    fprintf oc "	pushl	%a\n" ireg EAX;
-  copy 0 sz;
-  if need_tmp && tmp = EAX then
-    fprintf oc "	popl	%a\n" ireg EAX
-
-let print_mov2 oc src1 dst1 src2 dst2 =
-  if src1 = dst1 then
-    if src2 = dst2
-    then ()
-    else fprintf oc "	movl	%a, %a\n" ireg src2 ireg dst2
-  else if src2 = dst2 then
-    fprintf oc "	movl	%a, %a\n" ireg src1 ireg dst1
-  else if src2 = dst1 then
-    if src1 = dst2 then
-      fprintf oc "	xchgl	%a, %a\n" ireg src1 ireg src2
-    else begin
-      fprintf oc "	movl	%a, %a\n" ireg src2 ireg dst2;
-      fprintf oc "	movl	%a, %a\n" ireg src1 ireg dst1
-    end
-  else begin
-    fprintf oc "	movl	%a, %a\n" ireg src1 ireg dst1;
-    fprintf oc "	movl	%a, %a\n" ireg src2 ireg dst2
-  end
+  copy 0 sz
 
 let print_builtin_memcpy_big oc sz al src dst =
-  fprintf oc "	pushl	%a\n" ireg ESI;
-  fprintf oc "	pushl	%a\n" ireg EDI;
-  print_mov2 oc src ESI dst EDI;
+  assert (src = ESI && dst = EDI);
   fprintf oc "	movl	$%d, %a\n" (sz / 4) ireg ECX;
   fprintf oc "	rep	movsl\n";
   if sz mod 4 >= 2 then fprintf oc "	movsw\n";
-  if sz mod 2 >= 1 then fprintf oc "	movsb\n";
-  fprintf oc "	popl	%a\n" ireg EDI;
-  fprintf oc "	popl	%a\n" ireg ESI
+  if sz mod 2 >= 1 then fprintf oc "	movsb\n"
 
 let print_builtin_memcpy oc sz al args =
   let (dst, src) =
     match args with [IR d; IR s] -> (d, s) | _ -> assert false in
   fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n"
           comment sz al;
-  if sz <= 64
+  if sz <= 32
   then print_builtin_memcpy_small oc sz al src dst
   else print_builtin_memcpy_big oc sz al src dst;
   fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment
 
 (* Handling of volatile reads and writes *)
 
+let offset_addressing (Addrmode(base, ofs, cst)) delta =
+  Addrmode(base, ofs,
+           match cst with
+           | Coq_inl n -> Coq_inl(Integers.Int.add n delta)
+           | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta))
+
 let print_builtin_vload_common oc chunk addr res =
   match chunk, res with
-  | Mint8unsigned, IR res ->
+  | Mint8unsigned, [IR res] ->
       fprintf oc "	movzbl	%a, %a\n" addressing addr ireg res
-  | Mint8signed, IR res ->
+  | Mint8signed, [IR res] ->
       fprintf oc "	movsbl	%a, %a\n" addressing addr ireg res
-  | Mint16unsigned, IR res ->
+  | Mint16unsigned, [IR res] ->
       fprintf oc "	movzwl	%a, %a\n" addressing addr ireg res
-  | Mint16signed, IR res ->
+  | Mint16signed, [IR res] ->
       fprintf oc "	movswl	%a, %a\n" addressing addr ireg res
-  | Mint32, IR res ->
+  | Mint32, [IR res] ->
       fprintf oc "	movl	%a, %a\n" addressing addr ireg res
-  | Mfloat32, FR res ->
+  | Mint64, [IR res1; IR res2] ->
+      let addr' = offset_addressing addr (coqint_of_camlint 4l) in
+      if not (Asmgen.addressing_mentions addr res2) then begin
+        fprintf oc "	movl	%a, %a\n" addressing addr ireg res2;
+        fprintf oc "	movl	%a, %a\n" addressing addr' ireg res1
+      end else begin
+        fprintf oc "	movl	%a, %a\n" addressing addr' ireg res1;
+        fprintf oc "	movl	%a, %a\n" addressing addr ireg res2
+      end
+  | Mfloat32, [FR res] ->
       fprintf oc "	cvtss2sd %a, %a\n" addressing addr freg res
-  | (Mfloat64 | Mfloat64al32), FR res ->
+  | (Mfloat64 | Mfloat64al32), [FR res] ->
       fprintf oc "	movsd	%a, %a\n" addressing addr freg res
   | _ ->
       assert false
@@ -366,21 +344,25 @@ let print_builtin_vload_global oc chunk id ofs args res =
 
 let print_builtin_vstore_common oc chunk addr src tmp =
   match chunk, src with
-  | (Mint8signed | Mint8unsigned), IR src ->
+  | (Mint8signed | Mint8unsigned), [IR src] ->
       if Asmgen.low_ireg src then
         fprintf oc "	movb	%a, %a\n" ireg8 src addressing addr
       else begin
         fprintf oc "	movl	%a, %a\n" ireg src ireg tmp;
         fprintf oc "	movb	%a, %a\n" ireg8 tmp addressing addr
       end
-  | (Mint16signed | Mint16unsigned), IR src ->
+  | (Mint16signed | Mint16unsigned), [IR src] ->
       fprintf oc "	movw	%a, %a\n" ireg16 src addressing addr
-  | Mint32, IR src ->
+  | Mint32, [IR src] ->
       fprintf oc "	movl	%a, %a\n" ireg src addressing addr
-  | Mfloat32, FR src ->
+  | Mint64, [IR src1; IR src2] ->
+      let addr' = offset_addressing addr (coqint_of_camlint 4l) in
+      fprintf oc "	movl	%a, %a\n" ireg src2 addressing addr;
+      fprintf oc "	movl	%a, %a\n" ireg src1 addressing addr'
+  | Mfloat32, [FR src] ->
       fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg src;
       fprintf oc "	movss	%%xmm7, %a\n" addressing addr
-  | (Mfloat64 | Mfloat64al32), FR src ->
+  | (Mfloat64 | Mfloat64al32), [FR src] ->
       fprintf oc "	movsd	%a, %a\n" freg src addressing addr
   | _ ->
       assert false
@@ -388,10 +370,10 @@ let print_builtin_vstore_common oc chunk addr src tmp =
 let print_builtin_vstore oc chunk args =
   fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
   begin match args with
-  | [IR addr; src] ->
+  | IR addr :: src ->
       print_builtin_vstore_common oc chunk
                  (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src
-                 (if addr = ECX then EDX else ECX)
+                 (if addr = EAX then ECX else EAX)
   | _ ->
       assert false
   end;
@@ -399,13 +381,8 @@ let print_builtin_vstore oc chunk args =
 
 let print_builtin_vstore_global oc chunk id ofs args =
   fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
-  begin match args with
-  | [src] ->
-      print_builtin_vstore_common oc chunk
-                       (Addrmode(None, None, Coq_inr(id,ofs))) src EDX
-  | _ ->
-      assert false
-  end;
+  print_builtin_vstore_common oc chunk
+                 (Addrmode(None, None, Coq_inr(id,ofs))) args EAX;
   fprintf oc "%s end builtin __builtin_volatile_write\n" comment
 
 (* Handling of compiler-inlined builtins *)
@@ -414,13 +391,13 @@ let print_builtin_inline oc name args res =
   fprintf oc "%s begin builtin %s\n" comment name;
   begin match name, args, res with
   (* Memory accesses *)
-  | "__builtin_read16_reversed", [IR a1], IR res ->
+  | "__builtin_read16_reversed", [IR a1], [IR res] ->
       let tmp = if Asmgen.low_ireg res then res else ECX in
       fprintf oc "	movzwl	0(%a), %a\n" ireg a1 ireg tmp;
       fprintf oc "	xchg	%a, %a\n" ireg8 tmp high_ireg8 tmp;
       if tmp <> res then
         fprintf oc "	movl	%a, %a\n" ireg tmp ireg res
-  | "__builtin_read32_reversed", [IR a1], IR res ->
+  | "__builtin_read32_reversed", [IR a1], [IR res] ->
       fprintf oc "	movl	0(%a), %a\n" ireg a1 ireg res;
       fprintf oc "	bswap	%a\n" ireg res
   | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
@@ -436,19 +413,19 @@ let print_builtin_inline oc name args res =
       fprintf oc "	bswap	%a\n" ireg tmp;
       fprintf oc "	movl	%a, 0(%a)\n" ireg tmp ireg a1
   (* Integer arithmetic *)
-  | "__builtin_bswap", [IR a1], IR res ->
+  | "__builtin_bswap", [IR a1], [IR res] ->
       if a1 <> res then
         fprintf oc "	movl	%a, %a\n" ireg a1 ireg res;
       fprintf oc "	bswap	%a\n" ireg res
   (* Float arithmetic *)
-  | "__builtin_fabs", [FR a1], FR res ->
+  | "__builtin_fabs", [FR a1], [FR res] ->
       need_masks := true;
       if a1 <> res then
         fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
       fprintf oc "	andpd	%a, %a\n" raw_symbol "__absd_mask" freg res
-  | "__builtin_fsqrt", [FR a1], FR res ->
+  | "__builtin_fsqrt", [FR a1], [FR res] ->
       fprintf oc "	sqrtsd	%a, %a\n" freg a1 freg res
-  | "__builtin_fmax", [FR a1; FR a2], FR res ->
+  | "__builtin_fmax", [FR a1; FR a2], [FR res] ->
       if res = a1 then
         fprintf oc "	maxsd	%a, %a\n" freg a2 freg res
       else if res = a2 then
@@ -457,7 +434,7 @@ let print_builtin_inline oc name args res =
         fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
         fprintf oc "	maxsd	%a, %a\n" freg a2 freg res
       end
-  | "__builtin_fmin", [FR a1; FR a2], FR res ->
+  | "__builtin_fmin", [FR a1; FR a2], [FR res] ->
       if res = a1 then
         fprintf oc "	minsd	%a, %a\n" freg a2 freg res
       else if res = a2 then
@@ -466,6 +443,23 @@ let print_builtin_inline oc name args res =
         fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
         fprintf oc "	minsd	%a, %a\n" freg a2 freg res
       end
+  (* 64-bit integer arithmetic *)
+  | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+      assert (ah = EDX && al = EAX && rh = EDX && rl = EAX);
+      fprintf oc "	negl	%a\n" ireg EAX;
+      fprintf oc "	adcl	$0, %a\n" ireg EDX;
+      fprintf oc "	negl	%a\n" ireg EDX
+  | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX);
+      fprintf oc "	addl	%a, %a\n" ireg EBX ireg EAX;
+      fprintf oc "	adcl	%a, %a\n" ireg ECX ireg EDX
+  | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX);
+      fprintf oc "	subl	%a, %a\n" ireg EBX ireg EAX;
+      fprintf oc "	sbbl	%a, %a\n" ireg ECX ireg EDX
+  | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+      assert (a = EAX && b = EDX && rh = EDX && rl = EAX);
+      fprintf oc "	mull    %a\n" ireg EDX
   (* Catch-all *)  
   | _ ->
       invalid_arg ("unrecognized builtin " ^ name)
@@ -604,6 +598,8 @@ let print_instruction oc = function
       fprintf oc "	sarl	%%cl, %a\n" ireg rd
   | Psar_ri(rd, n) ->
       fprintf oc "	sarl	$%a, %a\n" coqint n ireg rd
+  | Pshld_ri(rd, r1, n) ->
+      fprintf oc "	shldl	$%a, %a, %a\n" coqint n ireg r1 ireg rd
   | Pror_ri(rd, n) ->
       fprintf oc "	rorl	$%a, %a\n" coqint n ireg rd
   | Pcmp_rr(r1, r2) ->
@@ -617,8 +613,8 @@ let print_instruction oc = function
   | Pcmov(c, rd, r1) ->
       fprintf oc "	cmov%s	%a, %a\n" (name_of_condition c) ireg r1 ireg rd
   | Psetcc(c, rd) ->
-      fprintf oc "	set%s	%%cl\n" (name_of_condition c);
-      fprintf oc "	movzbl	%%cl, %a\n" ireg rd
+      fprintf oc "	set%s	%a\n" (name_of_condition c) ireg8 rd;
+      fprintf oc "	movzbl	%a, %a\n" ireg8 rd ireg rd
   (** Arithmetic operations over floats *)
   | Paddd_ff(rd, r1) ->
       fprintf oc "	addsd	%a, %a\n" freg r1 freg rd
@@ -757,6 +753,8 @@ let print_init oc = function
       fprintf oc "	.short	%ld\n" (camlint_of_coqint n)
   | Init_int32 n ->
       fprintf oc "	.long	%ld\n" (camlint_of_coqint n)
+  | Init_int64 n ->
+      fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
       fprintf oc "	.long	%ld %s %.18g\n"
 	(camlint_of_coqint (Floats.Float.bits_of_single n))
diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml
index 7ddf42f..1f417c2 100644
--- a/ia32/PrintOp.ml
+++ b/ia32/PrintOp.ml
@@ -90,6 +90,7 @@ let print_operation reg pp = function
   | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
   | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
   | Ororimm n, [r1] -> fprintf pp "%a ror %ld" reg r1 (camlint_of_coqint n)
+  | Oshldimm n, [r1;r2] -> fprintf pp "(%a, %a) << %ld" reg r1 reg r2 (camlint_of_coqint n)
   | Olea addr, args -> print_addressing reg pp (addr, args)
   | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
   | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
@@ -100,6 +101,9 @@ let print_operation reg pp = function
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
+  | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+  | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+  | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
   | _ -> fprintf pp "<bad operator>"
 
diff --git a/ia32/SelectOp.vp b/ia32/SelectOp.vp
index 0c30386..7f79a4f 100644
--- a/ia32/SelectOp.vp
+++ b/ia32/SelectOp.vp
@@ -264,14 +264,16 @@ Nondetfunction or (e1: expr) (e2: expr) :=
   | Eop (Ointconst n1) Enil, t2 => orimm n1 t2
   | t1, Eop (Ointconst n2) Enil => orimm n2 t1
   | Eop (Oshlimm n1) (t1:::Enil), Eop (Oshruimm n2) (t2:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int.iwordsize
-      && same_expr_pure t1 t2
-      then Eop (Ororimm n2) (t1:::Enil)
+      if Int.eq (Int.add n1 n2) Int.iwordsize then
+        if same_expr_pure t1 t2
+        then Eop (Ororimm n2) (t1:::Enil)
+        else Eop (Oshldimm n1) (t1:::t2:::Enil)
       else Eop Oor (e1:::e2:::Enil)
   | Eop (Oshruimm n2) (t2:::Enil), Eop (Oshlimm n1) (t1:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int.iwordsize
-      && same_expr_pure t1 t2
-      then Eop (Ororimm n2) (t1:::Enil)
+      if Int.eq (Int.add n1 n2) Int.iwordsize then
+        if same_expr_pure t1 t2
+        then Eop (Ororimm n2) (t1:::Enil)
+        else Eop (Oshldimm n1) (t1:::t2:::Enil)
       else Eop Oor (e1:::e2:::Enil)
   | _, _ =>
       Eop Oor (e1:::e2:::Enil)
diff --git a/ia32/SelectOpproof.v b/ia32/SelectOpproof.v
index 18deca6..1569ad6 100644
--- a/ia32/SelectOpproof.v
+++ b/ia32/SelectOpproof.v
@@ -26,13 +26,6 @@ Require Import SelectOp.
 
 Open Local Scope cminorsel_scope.
 
-Section CMCONSTR.
-
-Variable ge: genv.
-Variable sp: val.
-Variable e: env.
-Variable m: mem.
-
 (** * Useful lemmas and tactics *)
 
 (** The following are trivial lemmas and custom tactics that help
@@ -78,9 +71,15 @@ Ltac TrivialExists :=
   | [ |- exists v, _ /\ Val.lessdef ?a v ] => exists a; split; [EvalOp | auto]
   end.
 
-
 (** * Correctness of the smart constructors *)
 
+Section CMCONSTR.
+
+Variable ge: genv.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
 (** We now show that the code generated by "smart constructor" functions
   such as [SelectOp.notint] behaves as expected.  Continuing the
   [notint] example, we show that if the expression [e]
@@ -410,6 +409,12 @@ Proof.
   discriminate.
 Qed.
 
+Remark int_add_sub_eq:
+  forall x y z, Int.add x y = z -> Int.sub z x = y.
+Proof.
+  intros. subst z. rewrite Int.sub_add_l. rewrite Int.sub_idem. apply Int.add_zero_l.
+Qed.
+
 Lemma eval_or: binary_constructor_sound or Val.or.
 Proof.
   red; intros until y; unfold or; case (or_match a b); intros.
@@ -417,26 +422,29 @@ Proof.
   InvEval. rewrite Val.or_commut. apply eval_orimm; auto. 
   InvEval. apply eval_orimm; auto.
 (* shlimm - shruimm *)
-  destruct (Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2) eqn:?.
-  destruct (andb_prop _ _ Heqb0). 
-  generalize (Int.eq_spec (Int.add n1 n2) Int.iwordsize); rewrite H1; intros EQ.
+  predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize.
+  destruct (same_expr_pure t1 t2) eqn:?.
   InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. 
   exists (Val.ror v0 (Vint n2)); split. EvalOp.
   destruct v0; simpl; auto. 
   destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto.
   destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto.
   simpl. rewrite <- Int.or_ror; auto.
+  InvEval. exists (Val.or x y); split. EvalOp. 
+  simpl. erewrite int_add_sub_eq; eauto. rewrite H0; rewrite H; auto. auto.
   TrivialExists. 
 (* shruimm - shlimm *) 
-  destruct (Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2) eqn:?.
-  destruct (andb_prop _ _ Heqb0). 
-  generalize (Int.eq_spec (Int.add n1 n2) Int.iwordsize); rewrite H1; intros EQ.
+  predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize.
+  destruct (same_expr_pure t1 t2) eqn:?.
   InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. 
   exists (Val.ror v1 (Vint n2)); split. EvalOp.
   destruct v1; simpl; auto. 
   destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto.
   destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto.
   simpl. rewrite Int.or_commut. rewrite <- Int.or_ror; auto.
+  InvEval. exists (Val.or y x); split. EvalOp. 
+  simpl. erewrite int_add_sub_eq; eauto. rewrite H0; rewrite H; auto.
+  rewrite Val.or_commut; auto.
   TrivialExists. 
 (* default *)
   TrivialExists.
diff --git a/ia32/standard/Conventions1.v b/ia32/standard/Conventions1.v
index 49f5da9..cae20f6 100644
--- a/ia32/standard/Conventions1.v
+++ b/ia32/standard/Conventions1.v
@@ -21,46 +21,26 @@ Require Import Locations.
 
 (** Machine registers (type [mreg] in module [Locations]) are divided in
   the following groups:
-- Temporaries used for spilling, reloading, and parallel move operations.
-- Allocatable registers, that can be assigned to RTL pseudo-registers.
-  These are further divided into:
--- Callee-save registers, whose value is preserved across a function call.
--- Caller-save registers that can be modified during a function call.
+- Callee-save registers, whose value is preserved across a function call.
+- Caller-save registers that can be modified during a function call.
 
   We follow the x86-32 application binary interface (ABI) in our choice
   of callee- and caller-save registers.
 *)
 
-Definition int_caller_save_regs := AX :: nil.
+Definition int_caller_save_regs := AX :: CX :: DX :: nil.
 
-Definition float_caller_save_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil.
+Definition float_caller_save_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
 
 Definition int_callee_save_regs := BX :: SI :: DI :: BP :: nil.
 
 Definition float_callee_save_regs : list mreg := nil.
 
-Definition destroyed_at_call_regs :=
-  int_caller_save_regs ++ float_caller_save_regs.
+Definition destroyed_at_call :=
+  FP0 :: int_caller_save_regs ++ float_caller_save_regs.
 
-Definition destroyed_at_call := List.map R destroyed_at_call_regs.
-
-Definition int_temporaries := IT1 :: IT2 :: nil.
-
-Definition float_temporaries := FT1 :: FT2 :: nil.
-
-(** [FP0] is not used for reloading, hence it is not in [float_temporaries],
-  however it is not allocatable, hence it is in [temporaries]. *)
-  
-Definition temporary_regs := IT1 :: IT2 :: FT1 :: FT2 :: FP0 :: nil.
-
-Definition temporaries := List.map R temporary_regs.
-
-Definition destroyed_at_move_regs := FP0 :: nil.
-
-Definition destroyed_at_move := List.map R destroyed_at_move_regs.
-
-Definition dummy_int_reg := AX.     (**r Used in [Coloring]. *)
-Definition dummy_float_reg := X0.   (**r Used in [Coloring]. *)
+Definition dummy_int_reg := AX.     (**r Used in [Regalloc]. *)
+Definition dummy_float_reg := X0.   (**r Used in [Regalloc]. *)
 
 (** The [index_int_callee_save] and [index_float_callee_save] associate
   a unique positive integer to callee-save registers.  This integer is
@@ -147,7 +127,7 @@ Proof.
 Qed.
 
 (** The following lemmas show that
-    (temporaries, destroyed at call, integer callee-save, float callee-save)
+    (destroyed at call, integer callee-save, float callee-save)
     is a partition of the set of machine registers. *)
 
 Lemma int_float_callee_save_disjoint:
@@ -158,34 +138,26 @@ Qed.
 
 Lemma register_classification:
   forall r, 
-  (In (R r) temporaries \/ In (R r) destroyed_at_call) \/
-  (In r int_callee_save_regs \/ In r float_callee_save_regs).
+  In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs.
 Proof.
   destruct r; 
-  try (left; left; simpl; OrEq);
-  try (left; right; simpl; OrEq);
+  try (left; simpl; OrEq);
   try (right; left; simpl; OrEq);
   try (right; right; simpl; OrEq).
 Qed.
 
 Lemma int_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r int_callee_save_regs).
+    In r destroyed_at_call -> In r int_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma float_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r float_callee_save_regs).
+    In r destroyed_at_call -> In r float_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma int_callee_save_type:
@@ -244,13 +216,15 @@ Qed.
   registers [AX] or [FP0], depending on the type of the returned value.
   We treat a function without result as a function with one integer result. *)
 
-Definition loc_result (s: signature) : mreg :=
+Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
-  | None => AX
-  | Some Tint => AX
-  | Some Tfloat => FP0
+  | None => AX :: nil
+  | Some Tint => AX :: nil
+  | Some Tfloat => FP0 :: nil
+  | Some Tlong => DX :: AX :: nil
   end.
 
+(*
 (** The result location has the type stated in the signature. *)
 
 Lemma loc_result_type:
@@ -263,17 +237,18 @@ Proof.
   destruct t; reflexivity.
   reflexivity.
 Qed.
+*)
 
-(** The result location is a caller-save register or a temporary *)
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
-  forall (s: signature), 
-  In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries.
+  forall (s: signature) (r: mreg),
+  In r (loc_result s) -> In r destroyed_at_call.
 Proof.
-  intros; unfold loc_result.
-  destruct (sig_res s).
-  destruct t. left; simpl; OrEq. right; simpl; OrEq.
-  left; simpl; OrEq.
+  intros.
+  assert (r = AX \/ r = DX \/ r = FP0).
+    unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
+  destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
 Qed.
 
 (** ** Location of function arguments *)
@@ -284,8 +259,9 @@ Fixpoint loc_arguments_rec
     (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys => S (Outgoing ofs Tint) :: loc_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys => S (Outgoing ofs Tfloat) :: loc_arguments_rec tys (ofs + 2)
+  | Tint :: tys => S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 1)
+  | Tfloat :: tys => S Outgoing ofs Tfloat :: loc_arguments_rec tys (ofs + 2)
+  | Tlong :: tys => S Outgoing (ofs + 1) Tint :: S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 2)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
@@ -301,27 +277,19 @@ Fixpoint size_arguments_rec
     (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys => size_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys => size_arguments_rec tys (ofs + 2)
+  | ty :: tys => size_arguments_rec tys (ofs + typesize ty)
   end.
 
 Definition size_arguments (s: signature) : Z :=
   size_arguments_rec s.(sig_args) 0.
 
-(** A tail-call is possible for a signature if the corresponding
-    arguments are all passed in registers. *)
-
-Definition tailcall_possible (s: signature) : Prop :=
-  forall l, In l (loc_arguments s) ->
-  match l with R _ => True | S _ => False end.
-
-(** Argument locations are either non-temporary registers or [Outgoing] 
+(** Argument locations are either caller-save registers or [Outgoing] 
   stack slots at nonnegative offsets. *)
 
 Definition loc_argument_acceptable (l: loc) : Prop :=
   match l with
-  | R r => ~(In l temporaries)
-  | S (Outgoing ofs ty) => ofs >= 0
+  | R r => In r destroyed_at_call
+  | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong
   | _ => False
   end.
 
@@ -329,62 +297,36 @@ Remark loc_arguments_rec_charact:
   forall tyl ofs l,
   In l (loc_arguments_rec tyl ofs) ->
   match l with
-  | S (Outgoing ofs' ty) => ofs' >= ofs
+  | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong
   | _ => False
   end.
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
-  elim H.
-  destruct a; simpl in H; destruct H.
-  subst l. omega.
-  generalize (IHtyl _ _ H). destruct l; auto. destruct s; auto. omega. 
-  subst l. omega.
-  generalize (IHtyl _ _ H). destruct l; auto. destruct s; auto. omega. 
+  destruct H.
+  destruct a.
+- destruct H. subst l. split. omega. congruence.
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H. subst l. split. omega. congruence.
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H. subst l; split; [omega|congruence].
+  destruct H. subst l; split; [omega|congruence].
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
-  forall (s: signature) (r: loc),
-  In r (loc_arguments s) -> loc_argument_acceptable r.
+  forall (s: signature) (l: loc),
+  In l (loc_arguments s) -> loc_argument_acceptable l.
 Proof.
   unfold loc_arguments; intros.
-  generalize (loc_arguments_rec_charact  _ _ _ H).
-  destruct r; tauto.
-Qed.
-Hint Resolve loc_arguments_acceptable: locs.
-
-(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
-
-Remark loc_arguments_rec_notin_local:
-  forall tyl ofs ofs0 ty0,
-  Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a; simpl; auto.
-Qed.
-
-Remark loc_arguments_rec_notin_outgoing:
-  forall tyl ofs ofs0 ty0,
-  ofs0 + typesize ty0 <= ofs ->
-  Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a.
-  split. simpl. omega. apply IHtyl. omega. 
-  split. simpl. omega. apply IHtyl. omega. 
+  exploit loc_arguments_rec_charact; eauto. 
+  unfold loc_argument_acceptable.
+  destruct l; tauto.
 Qed.
 
-Lemma loc_arguments_norepet:
-  forall (s: signature), Loc.norepet (loc_arguments s).
-Proof.
-  intros. unfold loc_arguments. generalize (sig_args s) 0.
-  induction l; simpl; intros.
-  constructor.
-  destruct a; constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-Qed.
+Hint Resolve loc_arguments_acceptable: locs.
 
 (** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
 
@@ -393,9 +335,8 @@ Remark size_arguments_rec_above:
 Proof.
   induction tyl; simpl; intros.
   omega.
-  destruct a.
-  apply Zle_trans with (ofs0 + 1); auto; omega.
-  apply Zle_trans with (ofs0 + 2); auto; omega.
+  apply Zle_trans with (ofs0 + typesize a); auto. 
+  generalize (typesize_pos a); omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -407,56 +348,20 @@ Qed.
 
 Lemma loc_arguments_bounded:
   forall (s: signature) (ofs: Z) (ty: typ),
-  In (S (Outgoing ofs ty)) (loc_arguments s) ->
+  In (S Outgoing ofs ty) (loc_arguments s) ->
   ofs + typesize ty <= size_arguments s.
 Proof.
   intros until ty. unfold loc_arguments, size_arguments. generalize (sig_args s) 0.
   induction l; simpl; intros.
-  elim H.
-  destruct a; simpl in H; destruct H.
-  inv H. apply size_arguments_rec_above.
-  auto.
-  inv H. apply size_arguments_rec_above.
+  destruct H.
+  destruct a.
+  destruct H. inv H. apply size_arguments_rec_above. auto.
+  destruct H. inv H. apply size_arguments_rec_above. auto.
+  destruct H. inv H.
+  simpl typesize. replace (z + 1 + 1) with (z + 2) by omega. apply size_arguments_rec_above.
+  destruct H. inv H. 
+  simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above. 
   auto.
 Qed.
 
-(** Temporary registers do not overlap with argument locations. *)
-
-Lemma loc_arguments_not_temporaries:
-  forall sig, Loc.disjoint (loc_arguments sig) temporaries.
-Proof.
-  intros; red; intros x1 x2 H.
-  generalize (loc_arguments_rec_charact _ _ _ H).
-  destruct x1. tauto. destruct s; intuition.
-  revert H1. simpl; ElimOrEq; auto.
-Qed.
-Hint Resolve loc_arguments_not_temporaries: locs.
-
-(** Argument registers are caller-save. *)
 
-Lemma arguments_caller_save:
-  forall sig r,
-  In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
-Proof.
-  unfold loc_arguments; intros.
-  elim (loc_arguments_rec_charact _ _ _ H); simpl.
-Qed.
-
-(** Argument locations agree in number with the function signature. *)
-
-Lemma loc_arguments_length:
-  forall sig,
-  List.length (loc_arguments sig) = List.length sig.(sig_args).
-Proof.
-  intros. unfold loc_arguments. generalize (sig_args sig) 0.
-  induction l; simpl; intros. auto. destruct a; simpl; decEq; auto.
-Qed.
-
-(** Argument locations agree in types with the function signature. *)
-
-Lemma loc_arguments_type:
-  forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args).
-Proof.
-  intros. unfold loc_arguments. generalize (sig_args sig) 0. 
-  induction l; simpl; intros. auto. destruct a; simpl; decEq; auto.
-Qed.
diff --git a/ia32/standard/Stacklayout.v b/ia32/standard/Stacklayout.v
index be854fd..f9d1daf 100644
--- a/ia32/standard/Stacklayout.v
+++ b/ia32/standard/Stacklayout.v
@@ -19,10 +19,9 @@ Require Import Bounds.
   from bottom (lowest offsets) to top:
 - Space for outgoing arguments to function calls.
 - Back link to parent frame
-- Local stack slots of integer type.
 - Saved values of integer callee-save registers used by the function.
-- Local stack slots of float type.
 - Saved values of float callee-save registers used by the function.
+- Local stack slots.
 - Space for the stack-allocated data declared in Cminor
 - Return address.
 
@@ -36,10 +35,9 @@ Record frame_env : Type := mk_frame_env {
   fe_size: Z;
   fe_ofs_link: Z;
   fe_ofs_retaddr: Z;
-  fe_ofs_int_local: Z;
+  fe_ofs_local: Z;
   fe_ofs_int_callee_save: Z;
   fe_num_int_callee_save: Z;
-  fe_ofs_float_local: Z;
   fe_ofs_float_callee_save: Z;
   fe_num_float_callee_save: Z;
   fe_stack_data: Z
@@ -50,17 +48,16 @@ Record frame_env : Type := mk_frame_env {
 
 Definition make_env (b: bounds) :=
   let olink := 4 * b.(bound_outgoing) in  (* back link *)
-  let oil := olink + 4 in              (* integer locals *)
-  let oics := oil + 4 * b.(bound_int_local) in (* integer callee-saves *)
-  let oendi := oics + 4 * b.(bound_int_callee_save) in
-  let ofl := align oendi 8 in             (* float locals *)
-  let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *)
-  let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *)
+  let oics := olink + 4 in                (* integer callee-saves *)
+  let ofcs := align (oics + 4 * b.(bound_int_callee_save)) 8 in (* float callee-saves *)
+  let ol := ofcs + 8 * b.(bound_float_callee_save) in (* locals *)
+  let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
   let oretaddr := align (ostkdata + b.(bound_stack_data)) 4 in (* return address *)
   let sz := oretaddr + 4 in (* total size *)
   mk_frame_env sz olink oretaddr
-                  oil oics b.(bound_int_callee_save)
-                  ofl ofcs b.(bound_float_callee_save)
+                  ol
+                  oics b.(bound_int_callee_save)
+                  ofcs b.(bound_float_callee_save)
                   ostkdata.
 
 (** Separation property *)
@@ -70,25 +67,23 @@ Remark frame_env_separated:
   let fe := make_env b in
   0 <= fe_ofs_arg
   /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_link)
-  /\ fe.(fe_ofs_link) + 4 <= fe.(fe_ofs_int_local)
-  /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_int_callee_save)
-  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local)
-  /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save)
-  /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data)
+  /\ fe.(fe_ofs_link) + 4 <= fe.(fe_ofs_int_callee_save)
+  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save)
+  /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_ofs_local)
+  /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_stack_data)
   /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_ofs_retaddr)
   /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size).
 Proof.
   intros.
   generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)).
+  generalize (align_le (fe.(fe_ofs_local) + 4 * b.(bound_local)) 8 (refl_equal _)).
   generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 4 (refl_equal _)).
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
-    fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data, fe_ofs_arg.
   intros.
-  generalize (bound_int_local_pos b); intro;
-  generalize (bound_float_local_pos b); intro;
+  generalize (bound_local_pos b); intro;
   generalize (bound_int_callee_save_pos b); intro;
   generalize (bound_float_callee_save_pos b); intro;
   generalize (bound_outgoing_pos b); intro;
@@ -102,38 +97,34 @@ Remark frame_env_aligned:
   forall b,
   let fe := make_env b in
   (4 | fe.(fe_ofs_link))
-  /\ (4 | fe.(fe_ofs_int_local))
   /\ (4 | fe.(fe_ofs_int_callee_save))
-  /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
-  /\ (4 | fe.(fe_ofs_retaddr))
+  /\ (8 | fe.(fe_ofs_local))
   /\ (8 | fe.(fe_stack_data))
+  /\ (4 | fe.(fe_ofs_retaddr))
   /\ (4 | fe.(fe_size)).
 Proof.
   intros.
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save,
     fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data.
   set (x1 := 4 * bound_outgoing b).
   assert (4 | x1). unfold x1; exists (bound_outgoing b); ring.
   set (x2 := x1 + 4).
   assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists 1; auto.
-  set (x3 := x2 + 4 * bound_int_local b).
-  assert (4 | x3). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring.
-  set (x4 := x3 + 4 * bound_int_callee_save b).
-  set (x5 := align x4 8).
-  assert (8 | x5). unfold x5. apply align_divides. omega.
-  set (x6 := x5 + 8 * bound_float_local b).
-  assert (8 | x6). unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
-  set (x7 := x6 + 8 * bound_float_callee_save b).
-  assert (8 | x7).
-    unfold x7. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
-  set (x8 := align (x7 + bound_stack_data b) 4).
-  assert (4 | x8). apply align_divides. omega.
-  set (x9 := x8 + 4).
-  assert (4 | x9). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto.
+  set (x3 := x2 + 4 * bound_int_callee_save b).
+  set (x4 := align x3 8).
+  assert (8 | x4). unfold x4. apply align_divides. omega.
+  set (x5 := x4 + 8 * bound_float_callee_save b).
+  assert (8 | x5). unfold x5; apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
+  set (x6 := align (x5 + 4 * bound_local b) 8).
+  assert (8 | x6). unfold x6; apply align_divides; omega.
+  set (x7 := align (x6 + bound_stack_data b) 4).
+  assert (4 | x7). unfold x7; apply align_divides; omega.
+  set (x8 := x7 + 4).
+  assert (4 | x8). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto.
   tauto.
 Qed.
 
diff --git a/lib/Camlcoq.ml b/lib/Camlcoq.ml
index d99e20f..e057771 100644
--- a/lib/Camlcoq.ml
+++ b/lib/Camlcoq.ml
@@ -252,6 +252,11 @@ let camlstring_of_coqstring (s: char list) =
   | c :: s -> r.[pos] <- c; fill (pos + 1) s
   in fill 0 s
 
+let coqstring_of_camlstring s =
+  let rec cstring accu pos =
+    if pos < 0 then accu else cstring (s.[pos] :: accu) (pos - 1)
+  in cstring [] (String.length s - 1)
+
 (* Floats *)
 
 let coqfloat_of_camlfloat f =
diff --git a/lib/Coqlib.v b/lib/Coqlib.v
index b936b9b..ce5d94e 100644
--- a/lib/Coqlib.v
+++ b/lib/Coqlib.v
@@ -559,6 +559,16 @@ Proof.
 Defined.
 Global Opaque Zdivide_dec.
 
+Lemma Zdivide_interval:
+  forall a b c,
+  0 < c -> 0 <= a < b -> (c | a) -> (c | b) -> 0 <= a <= b - c.
+Proof.
+  intros. destruct H1 as [x EQ1]. destruct H2 as [y EQ2]. subst. destruct H0.
+  split. omega. exploit Zmult_lt_reg_r; eauto. intros. 
+  replace (y * c - c) with ((y - 1) * c) by ring.
+  apply Zmult_le_compat_r; omega.
+Qed.
+
 (** Conversion from [Z] to [nat]. *)
 
 Definition nat_of_Z: Z -> nat := Z.to_nat.
@@ -1250,6 +1260,17 @@ Proof.
   intros. unfold proj_sumbool. destruct a. auto. contradiction. 
 Qed.
 
+Ltac InvBooleans :=
+  match goal with
+  | [ H: _ && _ = true |- _ ] =>
+      destruct (andb_prop _ _ H); clear H; InvBooleans
+  | [ H: _ || _ = false |- _ ] =>
+      destruct (orb_false_elim _ _ H); clear H; InvBooleans
+  | [ H: proj_sumbool ?x = true |- _ ] =>
+      generalize (proj_sumbool_true _ H); clear H; intro; InvBooleans
+  | _ => idtac
+  end.
+
 Section DECIDABLE_EQUALITY.
 
 Variable A: Type.
diff --git a/lib/FSetAVLplus.v b/lib/FSetAVLplus.v
new file mode 100644
index 0000000..5f5cc51
--- /dev/null
+++ b/lib/FSetAVLplus.v
@@ -0,0 +1,513 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** An extension of [FSetAVL] (finite sets as balanced binary search trees)
+  with extra interval-based operations, more efficient than standard
+  operations. *)
+
+Require Import FSetInterface.
+Require FSetAVL.
+Require Import Coqlib.
+
+Module Make(X: OrderedType).
+
+Include FSetAVL.Make(X).
+
+Module Raw := MSet.Raw.
+
+Section MEM_BETWEEN.
+
+(** [mem_between above below s] is [true] iff there exists an element of [s]
+  that belongs to the interval described by the predicates [above] and [below].
+  Using the monotonicity of [above] and [below], the implementation of
+  [mem_between] avoids traversing the subtrees of [s] that
+  lie entirely outside the interval of interest. *)
+
+Variable above_low_bound: elt -> bool.
+Variable below_high_bound: elt -> bool.
+
+Fixpoint raw_mem_between (m: Raw.tree) : bool :=
+  match m with
+  | Raw.Leaf => false
+  | Raw.Node _ l x r =>
+      if above_low_bound x
+      then if below_high_bound x
+           then true
+           else raw_mem_between l
+      else raw_mem_between r
+  end.
+
+Definition mem_between (m: t) : bool := raw_mem_between m.(MSet.this).
+
+Hypothesis above_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true.
+Hypothesis below_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true.
+
+Lemma raw_mem_between_1:
+  forall m,
+  raw_mem_between m = true ->
+  exists x, Raw.In x m /\ above_low_bound x = true /\ below_high_bound x = true.
+Proof.
+  induction m; simpl; intros.
+- discriminate.
+- destruct (above_low_bound t1) eqn: LB; [destruct (below_high_bound t1) eqn: HB | idtac].
+  + (* in interval *)
+    exists t1; split; auto.  apply Raw.IsRoot. auto. 
+  + (* above interval *)
+    exploit IHm1; auto. intros [x' [A B]]. exists x'; split; auto. apply Raw.InLeft; auto.
+  + (* below interval *)
+    exploit IHm2; auto. intros [x' [A B]]. exists x'; split; auto. apply Raw.InRight; auto.
+Qed.
+
+Lemma raw_mem_between_2:
+  forall x m,
+  Raw.bst m ->
+  Raw.In x m -> above_low_bound x = true -> below_high_bound x = true ->
+  raw_mem_between m = true.
+Proof.
+  induction 1; simpl; intros.
+- inv H.
+- rewrite Raw.In_node_iff in H1. 
+  destruct (above_low_bound x0) eqn: LB; [destruct (below_high_bound x0) eqn: HB | idtac].
+  + (* in interval *)
+    auto.
+  + (* above interval *)
+    assert (X.eq x x0 \/ X.lt x0 x -> False).
+    { intros. exploit below_monotone; eauto. congruence. }
+    intuition.
+  + assert (X.eq x x0 \/ X.lt x x0 -> False).
+    { intros. exploit above_monotone; eauto. congruence. }
+    intuition.
+Qed.
+
+Theorem mem_between_1:
+  forall s,
+  mem_between s = true ->
+  exists x, In x s /\ above_low_bound x = true /\ below_high_bound x = true.
+Proof.
+  intros. apply raw_mem_between_1. auto. 
+Qed.
+
+Theorem mem_between_2:
+  forall x s,
+  In x s -> above_low_bound x = true -> below_high_bound x = true ->
+  mem_between s = true.
+Proof.
+  unfold mem_between; intros. apply raw_mem_between_2 with x; auto. apply MSet.is_ok.
+Qed.
+
+End MEM_BETWEEN.
+
+Section ELEMENTS_BETWEEN.
+
+(** [elements_between above below s] returns the set of elements of [s]
+  that belong to the interval [above,below]. *)
+
+Variable above_low_bound: elt -> bool.
+Variable below_high_bound: elt -> bool.
+
+Fixpoint raw_elements_between (m: Raw.tree) : Raw.tree :=
+  match m with
+  | Raw.Leaf => Raw.Leaf
+  | Raw.Node _ l x r =>
+      if above_low_bound x then
+        if below_high_bound x then
+          Raw.join (raw_elements_between l) x (raw_elements_between r)
+        else
+          raw_elements_between l
+      else
+        raw_elements_between r
+  end.
+
+Remark In_raw_elements_between_1:
+  forall x m,
+  Raw.In x (raw_elements_between m) -> Raw.In x m.
+Proof.
+  induction m; simpl; intros.
+- inv H.
+- rewrite Raw.In_node_iff. 
+  destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H.
+  + rewrite Raw.join_spec in H. intuition. 
+  + left; apply IHm1; auto.
+  + right; right; apply IHm2; auto.
+Qed.
+
+Lemma raw_elements_between_ok:
+  forall m, Raw.bst m -> Raw.bst (raw_elements_between m).
+Proof.
+  induction 1; simpl.
+- constructor.
+- destruct (above_low_bound x) eqn:LB; [destruct (below_high_bound x) eqn: RB | idtac]; simpl.
+  + apply Raw.join_ok; auto.
+    red; intros. apply l0. apply In_raw_elements_between_1; auto.
+    red; intros. apply g. apply In_raw_elements_between_1; auto.
+  + auto.
+  + auto.
+Qed.
+
+Definition elements_between (s: t) : t :=
+  @MSet.Mkt (raw_elements_between s.(MSet.this)) (raw_elements_between_ok s.(MSet.this) s.(MSet.is_ok)).
+
+Hypothesis above_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true.
+Hypothesis below_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true.
+
+Remark In_raw_elements_between_2:
+  forall x m,
+  Raw.In x (raw_elements_between m) -> above_low_bound x = true /\ below_high_bound x = true.
+Proof.
+  induction m; simpl; intros.
+- inv H.
+- destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H.
+  + rewrite Raw.join_spec in H. intuition.
+    apply above_monotone with t1; auto. 
+    apply below_monotone with t1; auto.
+  + auto.
+  + auto.
+Qed.
+
+Remark In_raw_elements_between_3:
+  forall x m,
+  Raw.bst m ->
+  Raw.In x m -> above_low_bound x = true -> below_high_bound x = true ->
+  Raw.In x (raw_elements_between m).
+Proof.
+  induction 1; simpl; intros.
+- auto.
+- rewrite Raw.In_node_iff in H1.
+  destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac].
+  + rewrite Raw.join_spec. intuition. 
+  + assert (X.eq x x0 \/ X.lt x0 x -> False).
+    { intros. exploit below_monotone; eauto. congruence. }
+    intuition. elim H7. apply g. auto.
+  + assert (X.eq x x0 \/ X.lt x x0 -> False).
+    { intros. exploit above_monotone; eauto. congruence. }
+    intuition. elim H7. apply l0. auto.
+Qed.
+
+Theorem elements_between_iff:
+  forall x s,
+  In x (elements_between s) <-> In x s /\ above_low_bound x = true /\ below_high_bound x = true.
+Proof.
+  intros. unfold elements_between, In; simpl. split.
+  intros. split. apply In_raw_elements_between_1; auto. eapply In_raw_elements_between_2; eauto. 
+  intros [A [B C]]. apply In_raw_elements_between_3; auto. apply MSet.is_ok.
+Qed.
+
+End ELEMENTS_BETWEEN.
+
+Section FOR_ALL_BETWEEN.
+
+(** [for_all_between pred above below s] is [true] iff all elements of [s]
+  in a given variation interval satisfy predicate [pred].
+  The variation interval is given by two predicates [above] and [below]
+  which must both hold if the element is part of the interval.
+  Using the monotonicity of [above] and [below], the implementation of
+  [for_all_between] avoids traversing the subtrees of [s] that
+  lie entirely outside the interval of interest. *)
+
+Variable pred: elt -> bool.
+Variable above_low_bound: elt -> bool.
+Variable below_high_bound: elt -> bool.
+
+Fixpoint raw_for_all_between (m: Raw.tree) : bool :=
+  match m with
+  | Raw.Leaf => true
+  | Raw.Node _ l x r =>
+      if above_low_bound x
+      then if below_high_bound x
+           then raw_for_all_between l && pred x && raw_for_all_between r
+           else raw_for_all_between l
+      else raw_for_all_between r
+  end.
+
+Definition for_all_between (m: t) : bool := raw_for_all_between m.(MSet.this).
+
+Hypothesis pred_compat:
+  forall x1 x2, X.eq x1 x2 -> pred x1 = pred x2.
+Hypothesis above_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true.
+Hypothesis below_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true.
+
+Lemma raw_for_all_between_1:
+  forall x m,
+  Raw.bst m ->
+  raw_for_all_between m = true ->
+  Raw.In x m ->
+  above_low_bound x = true ->
+  below_high_bound x = true ->
+  pred x = true.
+Proof.
+  induction 1; simpl; intros.
+- inv H0. 
+- destruct (above_low_bound x0) eqn: LB; [destruct (below_high_bound x0) eqn: HB | idtac].
+  + (* in interval *)
+    destruct (andb_prop _ _ H1) as [P C]. destruct (andb_prop _ _ P) as [A B]. clear H1 P.
+    inv H2.
+    * erewrite pred_compat; eauto. 
+    * apply IHbst1; auto.
+    * apply IHbst2; auto.
+  + (* above interval *)
+    inv H2.
+    * assert (below_high_bound x0 = true) by (apply below_monotone with x; auto). 
+      congruence.
+    * apply IHbst1; auto.
+    * assert (below_high_bound x0 = true) by (apply below_monotone with x; auto).
+      congruence.
+  + (* below interval *)
+    inv H2.
+    * assert (above_low_bound x0 = true) by (apply above_monotone with x; auto). 
+      congruence.
+    * assert (above_low_bound x0 = true) by (apply above_monotone with x; auto).
+      congruence.
+    * apply IHbst2; auto.
+Qed.
+
+Lemma raw_for_all_between_2:
+  forall m,
+  Raw.bst m ->
+  (forall x, Raw.In x m -> above_low_bound x = true -> below_high_bound x = true -> pred x = true) ->
+  raw_for_all_between m = true.
+Proof.
+  induction 1; intros; simpl.
+- auto.
+- destruct (above_low_bound x) eqn: LB; [destruct (below_high_bound x) eqn: HB | idtac].
+  + (* in interval *)
+    rewrite IHbst1. rewrite (H1 x). rewrite IHbst2. auto.
+    intros. apply H1; auto. apply Raw.InRight; auto.
+    apply Raw.IsRoot. reflexivity. auto. auto. 
+    intros. apply H1; auto. apply Raw.InLeft; auto.
+  + (* above interval *)
+    apply IHbst1. intros. apply H1; auto. apply Raw.InLeft; auto.
+  + (* below interval *)
+    apply IHbst2. intros. apply H1; auto. apply Raw.InRight; auto.
+Qed.
+
+Theorem for_all_between_iff:
+  forall s,
+  for_all_between s = true <-> (forall x, In x s -> above_low_bound x = true -> below_high_bound x = true -> pred x = true).
+Proof.
+  unfold for_all_between; intros; split; intros.
+- eapply raw_for_all_between_1; eauto. apply MSet.is_ok. 
+- apply raw_for_all_between_2; auto. apply MSet.is_ok.
+Qed.
+
+End FOR_ALL_BETWEEN.
+
+Section PARTITION_BETWEEN.
+
+Variable above_low_bound: elt -> bool.
+Variable below_high_bound: elt -> bool.
+
+Fixpoint raw_partition_between (m: Raw.tree) : Raw.tree * Raw.tree :=
+  match m with
+  | Raw.Leaf => (Raw.Leaf, Raw.Leaf)
+  | Raw.Node _ l x r =>
+      if above_low_bound x then
+        if below_high_bound x then
+          (let (l1, l2) := raw_partition_between l in
+           let (r1, r2) := raw_partition_between r in
+           (Raw.join l1 x r1, Raw.concat l2 r2))
+        else
+          (let (l1, l2) := raw_partition_between l in
+           (l1, Raw.join l2 x r))
+      else
+        (let (r1, r2) := raw_partition_between r in
+         (r1, Raw.join l x r2))
+  end.
+
+Remark In_raw_partition_between_1:
+  forall x m,
+  Raw.In x (fst (raw_partition_between m)) -> Raw.In x m.
+Proof.
+  induction m; simpl; intros.
+- inv H.
+- destruct (raw_partition_between m1) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between m2) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H.
+  + rewrite Raw.join_spec in H. rewrite Raw.In_node_iff. intuition. 
+  + rewrite Raw.In_node_iff. intuition.
+  + rewrite Raw.In_node_iff. intuition.
+Qed.
+
+Remark In_raw_partition_between_2:
+  forall x m,
+  Raw.In x (snd (raw_partition_between m)) -> Raw.In x m.
+Proof.
+  induction m; simpl; intros.
+- inv H.
+- destruct (raw_partition_between m1) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between m2) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H.
+  + rewrite Raw.concat_spec in H. rewrite Raw.In_node_iff. intuition. 
+  + rewrite Raw.join_spec in H. rewrite Raw.In_node_iff. intuition.
+  + rewrite Raw.join_spec in H. rewrite Raw.In_node_iff. intuition.
+Qed.
+
+Lemma raw_partition_between_ok:
+  forall m, Raw.bst m -> Raw.bst (fst (raw_partition_between m)) /\ Raw.bst (snd (raw_partition_between m)).
+Proof.
+  induction 1; simpl.
+- split; constructor.
+- destruct IHbst1 as [L1 L2]. destruct IHbst2 as [R1 R2]. 
+  destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound x) eqn:LB; [destruct (below_high_bound x) eqn: RB | idtac]; simpl.
+  + split.
+    apply Raw.join_ok; auto.
+    red; intros. apply l0. apply In_raw_partition_between_1. rewrite LEQ; auto. 
+    red; intros. apply g. apply In_raw_partition_between_1. rewrite REQ; auto.
+    apply Raw.concat_ok; auto.
+    intros. transitivity x. 
+    apply l0. apply In_raw_partition_between_2. rewrite LEQ; auto. 
+    apply g. apply In_raw_partition_between_2. rewrite REQ; auto.
+  + split.
+    auto.
+    apply Raw.join_ok; auto.
+    red; intros. apply l0. apply In_raw_partition_between_2. rewrite LEQ; auto. 
+  + split.
+    auto.
+    apply Raw.join_ok; auto.
+    red; intros. apply g. apply In_raw_partition_between_2. rewrite REQ; auto.
+Qed.
+
+Hypothesis above_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true.
+Hypothesis below_monotone:
+  forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true.
+
+Remark In_raw_partition_between_3:
+  forall x m,
+  Raw.In x (fst (raw_partition_between m)) -> above_low_bound x = true /\ below_high_bound x = true.
+Proof.
+  induction m; simpl; intros.
+- inv H.
+- destruct (raw_partition_between m1) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between m2) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H.
+  + rewrite Raw.join_spec in H. intuition.
+    apply above_monotone with t1; auto. 
+    apply below_monotone with t1; auto.
+  + auto.
+  + auto.
+Qed.
+
+Remark In_raw_partition_between_4:
+  forall x m,
+  Raw.bst m ->
+  Raw.In x (snd (raw_partition_between m)) -> above_low_bound x = false \/ below_high_bound x = false.
+Proof.
+  induction 1; simpl; intros.
+- inv H.
+- destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]; simpl in H.
+  + simpl in H1. rewrite Raw.concat_spec in H1. intuition. 
+  + assert (forall y, X.eq y x0 \/ X.lt x0 y -> below_high_bound y = false).
+    { intros. destruct (below_high_bound y) eqn:E; auto. 
+      assert (below_high_bound x0 = true) by (apply below_monotone with y; auto).
+      congruence. }
+    simpl in H1. rewrite Raw.join_spec in H1. intuition.
+  + assert (forall y, X.eq y x0 \/ X.lt y x0 -> above_low_bound y = false).
+    { intros. destruct (above_low_bound y) eqn:E; auto. 
+      assert (above_low_bound x0 = true) by (apply above_monotone with y; auto).
+      congruence. }
+    simpl in H1. rewrite Raw.join_spec in H1. intuition.
+Qed.
+
+Remark In_raw_partition_between_5:
+  forall x m,
+  Raw.bst m ->
+  Raw.In x m -> above_low_bound x = true -> below_high_bound x = true ->
+  Raw.In x (fst (raw_partition_between m)).
+Proof.
+  induction 1; simpl; intros.
+- inv H.
+- destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]; simpl in H.
+  + simpl. rewrite Raw.join_spec. inv H1. 
+    auto.
+    right; left; apply IHbst1; auto. 
+    right; right; apply IHbst2; auto.
+  + simpl. inv H1. 
+    assert (below_high_bound x0 = true) by (apply below_monotone with x; auto).
+    congruence.
+    auto. 
+    assert (below_high_bound x0 = true) by (apply below_monotone with x; auto). 
+    congruence.
+  + simpl. inv H1. 
+    assert (above_low_bound x0 = true) by (apply above_monotone with x; auto).
+    congruence.
+    assert (above_low_bound x0 = true) by (apply above_monotone with x; auto). 
+    congruence.
+    eauto.
+Qed.
+
+Remark In_raw_partition_between_6:
+  forall x m,
+  Raw.bst m ->
+  Raw.In x m -> above_low_bound x = false \/ below_high_bound x = false ->
+  Raw.In x (snd (raw_partition_between m)).
+Proof.
+  induction 1; simpl; intros.
+- inv H.
+- destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. 
+  destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *.
+  destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]; simpl in H.
+  + simpl. rewrite Raw.concat_spec. inv H1.
+    assert (below_high_bound x = true) by (apply below_monotone with x0; auto; left; symmetry; auto).
+    assert (above_low_bound x = true) by (apply above_monotone with x0; auto; left; symmetry; auto).
+    destruct H2; congruence.
+    left; apply IHbst1; auto.
+    right; apply IHbst2; auto.
+  + simpl. rewrite Raw.join_spec. inv H1. 
+    auto.
+    right; left; apply IHbst1; auto. 
+    auto.
+  + simpl. rewrite Raw.join_spec. inv H1. 
+    auto.
+    auto.
+    right; right; apply IHbst2; auto.
+Qed.
+
+Definition partition_between (s: t) : t * t :=
+  let p := raw_partition_between s.(MSet.this) in
+  (@MSet.Mkt (fst p) (proj1 (raw_partition_between_ok s.(MSet.this) s.(MSet.is_ok))),
+   @MSet.Mkt (snd p) (proj2 (raw_partition_between_ok s.(MSet.this) s.(MSet.is_ok)))).
+
+Theorem partition_between_iff_1:
+  forall x s,
+  In x (fst (partition_between s)) <->
+  In x s /\ above_low_bound x = true /\ below_high_bound x = true.
+Proof.
+  intros. unfold partition_between, In; simpl. split. 
+  intros. split. apply In_raw_partition_between_1; auto. eapply In_raw_partition_between_3; eauto.
+  intros [A [B C]]. apply In_raw_partition_between_5; auto. apply MSet.is_ok.
+Qed.
+
+Theorem partition_between_iff_2:
+  forall x s,
+  In x (snd (partition_between s)) <->
+  In x s /\ (above_low_bound x = false \/ below_high_bound x = false).
+Proof.
+  intros. unfold partition_between, In; simpl. split. 
+  intros. split. apply In_raw_partition_between_2; auto. eapply In_raw_partition_between_4; eauto. apply MSet.is_ok.
+  intros [A B]. apply In_raw_partition_between_6; auto. apply MSet.is_ok.
+Qed.
+
+End PARTITION_BETWEEN.
+
+End Make.
diff --git a/lib/Floats.v b/lib/Floats.v
index eb86027..02ff25c 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -121,6 +121,26 @@ Definition intuoffloat (f:float): option int := (**r conversion to unsigned 32-b
     | None => None
   end.
 
+Definition longoffloat (f:float): option int64 := (**r conversion to signed 64-bit int *)
+  match Zoffloat f with
+    | Some n =>
+      if Zle_bool Int64.min_signed n && Zle_bool n Int64.max_signed then
+        Some (Int64.repr n)
+      else
+        None
+    | None => None
+  end.
+
+Definition longuoffloat (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
+  match Zoffloat f with
+    | Some n =>
+      if Zle_bool 0 n && Zle_bool n Int64.max_unsigned then
+        Some (Int64.repr n)
+      else
+        None
+    | None => None
+  end.
+
 (* Functions used to parse floats *)
 Program Definition build_from_parsed
   (prec:Z) (emax:Z) (prec_gt_0 :Prec_gt_0 prec) (Hmax:prec < emax)
@@ -154,6 +174,11 @@ Definition floatofint (n:int): float := (**r conversion from signed 32-bit int *
   binary_normalize64 (Int.signed n) 0 false.
 Definition floatofintu (n:int): float:= (**r conversion from unsigned 32-bit int *)
   binary_normalize64 (Int.unsigned n) 0 false.
+Definition floatoflong (n:int64): float := (**r conversion from signed 64-bit int *)
+  binary_normalize64 (Int64.signed n) 0 false.
+Definition floatoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int *)
+  binary_normalize64 (Int64.unsigned n) 0 false.
+
 Definition add: float -> float -> float := b64_plus mode_NE. (**r addition *)
 Definition sub: float -> float -> float := b64_minus mode_NE. (**r subtraction *)
 Definition mul: float -> float -> float := b64_mult mode_NE. (**r multiplication *)
@@ -217,24 +242,7 @@ Definition double_of_bits (b: int64): float := b64_of_bits (Int64.unsigned b).
 Definition bits_of_single (f: float) : int := Int.repr (bits_of_b32 (binary32offloat f)).
 Definition single_of_bits (b: int): float := floatofbinary32 (b32_of_bits (Int.unsigned b)).
 
-Definition from_words (hi lo: int) : float :=
-  double_of_bits
-    (Int64.or (Int64.shl (Int64.repr (Int.unsigned hi)) (Int64.repr 32))
-              (Int64.repr (Int.unsigned lo))).
-
-Lemma from_words_eq:
-  forall lo hi,
-  from_words hi lo =
-  double_of_bits (Int64.repr (Int.unsigned hi * two_p 32 + Int.unsigned lo)).
-Proof.
-  intros. unfold from_words. decEq.
-  rewrite Int64.shifted_or_is_add.
-  apply Int64.eqm_samerepr. auto with ints.
-  change Int64.zwordsize with 64. omega.
-  generalize (Int.unsigned_range lo). intros [A B]. 
-  rewrite Int64.unsigned_repr. assumption. 
-  assert (Int.modulus < Int64.max_unsigned). compute; auto. omega.
-Qed.
+Definition from_words (hi lo: int) : float := double_of_bits (Int64.ofwords hi lo).
 
 (** Below are the only properties of floating-point arithmetic that we
   rely on in the compiler proof. *)
@@ -747,35 +755,11 @@ Definition ox4330_0000 := Int.repr 1127219200.        (**r [0x4330_0000] *)
 
 Lemma split_bits_or:
   forall x,
-  split_bits 52 11
-  (Int64.unsigned
-    (Int64.or
-      (Int64.shl (Int64.repr (Int.unsigned ox4330_0000)) (Int64.repr 32))
-      (Int64.repr (Int.unsigned x)))) = (false, Int.unsigned x, 1075).
+  split_bits 52 11 (Int64.unsigned (Int64.ofwords ox4330_0000 x)) = (false, Int.unsigned x, 1075).
 Proof.
   intros.
   transitivity (split_bits 52 11 (join_bits 52 11 false (Int.unsigned x) 1075)).
-  - f_equal.
-    assert (Int64.unsigned (Int64.repr (Int.unsigned x)) = Int.unsigned x).
-      apply Int64.unsigned_repr. 
-      generalize (Int.unsigned_range x).
-      compute_this (Int.modulus).
-      compute_this (Int64.max_unsigned). 
-      omega.
-    rewrite Int64.shifted_or_is_add.
-    unfold join_bits.
-    rewrite H. 
-    apply Int64.unsigned_repr.
-    generalize (Int.unsigned_range x). 
-    compute_this ((0 + 1075) * 2 ^ 52).
-    compute_this (Int.modulus).
-    compute_this (Int64.max_unsigned). 
-    omega.
-    compute_this Int64.zwordsize. omega.
-    rewrite H. 
-    generalize (Int.unsigned_range x). 
-    change (two_p 32) with Int.modulus. 
-    omega.
+  - f_equal. rewrite Int64.ofwords_add'. reflexivity. 
   - apply split_join_bits. 
     compute; auto.
     generalize (Int.unsigned_range x). 
diff --git a/lib/Integers.v b/lib/Integers.v
index af9decd..84b82bf 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -16,8 +16,8 @@
 (** Formalizations of machine integers modulo $2^N$ #2<sup>N</sup>#. *)
 
 Require Import Eqdep_dec.
+Require Import Zquot.
 Require Import Zwf.
-Require Recdef.
 Require Import Coqlib.
 
 (** * Comparisons *)
@@ -1061,36 +1061,7 @@ Proof.
   apply eqm_mult; apply eqm_sym; apply eqm_signed_unsigned.
 Qed.
 
-(** ** Properties of division and Lemma Ztestbit_same_equal:
-  forall x y,
-  (forall i, 0 <= i -> Z.testbit x i = Z.testbit y i) ->
-  x = y.
-Proof.
-  assert (forall x, 0 <= x ->
-          forall y,
-           (forall i, 0 <= i -> Z.testbit x i = Z.testbit y i) ->
-           x = y).
-  {
-    intros x0 POS0; pattern x0; apply Zdiv2_pos_ind; auto.
-    - intros. symmetry. apply Ztestbit_is_zero. 
-      intros. rewrite <- H; auto. apply Z.testbit_0_l. 
-    - intros. rewrite (Zdecomp x); rewrite (Zdecomp y). f_equal.
-      + exploit (H0 0). omega. rewrite !(Ztestbit_eq 0).
-        rewrite !zeq_true. auto. omega. omega.
-      + intros. apply H; intros.
-        exploit (H0 (Zsucc i)). omega. rewrite !(Ztestbit_eq (Z.succ i)). 
-        rewrite !zeq_false. rewrite !Z.pred_succ. auto. 
-        omega. omega. omega. omega.
-  }
-  intros. destruct (zle 0 x).
-  - apply H; auto.
-  - assert (-x-1 = -y-1). 
-    { apply H. omega. intros. rewrite !Z_one_complement; auto. 
-      rewrite H0; auto. 
-    }
-    omega.
-Qed.
-modulus *)
+(** ** Properties of division and modulus *)
 
 Lemma modu_divu_Euclid:
   forall x y, y <> zero -> x = add (mul (divu x y) y) (modu x y).
@@ -1155,8 +1126,6 @@ Proof.
   apply one_not_zero.
 Qed.
 
-Require Import Zquot.
-
 Theorem divs_mone:
   forall x, divs x mone = neg x.
 Proof.
@@ -3707,7 +3676,434 @@ Module Wordsize_64.
   Proof. unfold wordsize; congruence. Qed.
 End Wordsize_64.
 
-Module Int64 := Make(Wordsize_64).
+Module Int64.
+
+Include Make(Wordsize_64).
+
+(** Shifts with amount given as a 32-bit integer *)
+
+Definition iwordsize': Int.int := Int.repr zwordsize.
+
+Definition shl' (x: int) (y: Int.int): int :=
+  repr (Z.shiftl (unsigned x) (Int.unsigned y)).
+Definition shru' (x: int) (y: Int.int): int :=
+  repr (Z.shiftr (unsigned x) (Int.unsigned y)).
+Definition shr' (x: int) (y: Int.int): int :=
+  repr (Z.shiftr (signed x) (Int.unsigned y)).
+
+Lemma bits_shl':
+  forall x y i,
+  0 <= i < zwordsize ->
+  testbit (shl' x y) i =
+  if zlt i (Int.unsigned y) then false else testbit x (i - Int.unsigned y).
+Proof.
+  intros. unfold shl'. rewrite testbit_repr; auto. 
+  destruct (zlt i (Int.unsigned y)). 
+  apply Z.shiftl_spec_low. auto.
+  apply Z.shiftl_spec_high. omega. omega.
+Qed.
+
+Lemma bits_shru':
+  forall x y i,
+  0 <= i < zwordsize ->
+  testbit (shru' x y) i =
+  if zlt (i + Int.unsigned y) zwordsize then testbit x (i + Int.unsigned y) else false.
+Proof.
+  intros. unfold shru'. rewrite testbit_repr; auto. 
+  rewrite Z.shiftr_spec. fold (testbit x (i + Int.unsigned y)).
+  destruct (zlt (i + Int.unsigned y) zwordsize).
+  auto.
+  apply bits_above; auto. 
+  omega.
+Qed.
+
+Lemma bits_shr':
+  forall x y i,
+  0 <= i < zwordsize ->
+  testbit (shr' x y) i =
+  testbit x (if zlt (i + Int.unsigned y) zwordsize then i + Int.unsigned y else zwordsize - 1).
+Proof.
+  intros. unfold shr'. rewrite testbit_repr; auto. 
+  rewrite Z.shiftr_spec. apply bits_signed. 
+  generalize (Int.unsigned_range y); omega.
+  omega.
+Qed.
+
+(** Decomposing 64-bit ints as pairs of 32-bit ints *)
+
+Definition loword (n: int) : Int.int := Int.repr (unsigned n).
+
+Definition hiword (n: int) : Int.int := Int.repr (unsigned (shru n (repr Int.zwordsize))).
+
+Definition ofwords (hi lo: Int.int) : int :=
+  or (shl (repr (Int.unsigned hi)) (repr Int.zwordsize)) (repr (Int.unsigned lo)).
+
+Lemma bits_loword:
+  forall n i, 0 <= i < Int.zwordsize -> Int.testbit (loword n) i = testbit n i.
+Proof.
+  intros. unfold loword. rewrite Int.testbit_repr; auto. 
+Qed.
+
+Lemma bits_hiword:
+  forall n i, 0 <= i < Int.zwordsize -> Int.testbit (hiword n) i = testbit n (i + Int.zwordsize).
+Proof.
+  intros. unfold hiword. rewrite Int.testbit_repr; auto.
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  fold (testbit (shru n (repr Int.zwordsize)) i). rewrite bits_shru.
+  change (unsigned (repr Int.zwordsize)) with Int.zwordsize.
+  apply zlt_true. omega. omega. 
+Qed.
+
+Lemma bits_ofwords:
+  forall hi lo i, 0 <= i < zwordsize ->
+  testbit (ofwords hi lo) i =
+  if zlt i Int.zwordsize then Int.testbit lo i else Int.testbit hi (i - Int.zwordsize).
+Proof.
+  intros. unfold ofwords. rewrite bits_or; auto. rewrite bits_shl; auto. 
+  change (unsigned (repr Int.zwordsize)) with Int.zwordsize. 
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  destruct (zlt i Int.zwordsize).
+  rewrite testbit_repr; auto. 
+  rewrite !testbit_repr; auto.
+  fold (Int.testbit lo i). rewrite Int.bits_above. apply orb_false_r. auto. 
+  omega.
+Qed.
+
+Lemma lo_ofwords:
+  forall hi lo, loword (ofwords hi lo) = lo.
+Proof.
+  intros. apply Int.same_bits_eq; intros. 
+  rewrite bits_loword; auto. rewrite bits_ofwords. apply zlt_true. omega. 
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity. omega.
+Qed.
+
+Lemma hi_ofwords:
+  forall hi lo, hiword (ofwords hi lo) = hi.
+Proof.
+  intros. apply Int.same_bits_eq; intros. 
+  rewrite bits_hiword; auto. rewrite bits_ofwords.
+  rewrite zlt_false. f_equal. omega. omega. 
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity. omega.
+Qed.
+
+Lemma ofwords_recompose:
+  forall n, ofwords (hiword n) (loword n) = n.
+Proof.
+  intros. apply same_bits_eq; intros. rewrite bits_ofwords; auto. 
+  destruct (zlt i Int.zwordsize). 
+  apply bits_loword. omega. 
+  rewrite bits_hiword. f_equal. omega. 
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity. omega.
+Qed.
+
+Lemma ofwords_add:
+  forall lo hi, ofwords hi lo = repr (Int.unsigned hi * two_p 32 + Int.unsigned lo).
+Proof.
+  intros. unfold ofwords. rewrite shifted_or_is_add. 
+  apply eqm_samerepr. apply eqm_add. apply eqm_mult. 
+  apply eqm_sym; apply eqm_unsigned_repr.
+  apply eqm_refl. 
+  apply eqm_sym; apply eqm_unsigned_repr.
+  change Int.zwordsize with 32; change zwordsize with 64; omega.
+  rewrite unsigned_repr. generalize (Int.unsigned_range lo). intros [A B]. exact B. 
+  assert (Int.max_unsigned < max_unsigned) by (compute; auto).
+  generalize (Int.unsigned_range_2 lo); omega.
+Qed.
+
+Lemma ofwords_add':
+  forall lo hi, unsigned (ofwords hi lo) = Int.unsigned hi * two_p 32 + Int.unsigned lo.
+Proof.
+  intros. rewrite ofwords_add. apply unsigned_repr.
+  generalize (Int.unsigned_range hi) (Int.unsigned_range lo). 
+  change (two_p 32) with Int.modulus.
+  change Int.modulus with 4294967296.
+  change max_unsigned with 18446744073709551615.
+  omega.
+Qed.
+
+(** Expressing 64-bit operations in terms of 32-bit operations *)
+
+Lemma decompose_bitwise_binop:
+  forall f f64 f32 xh xl yh yl,
+  (forall x y i, 0 <= i < zwordsize -> testbit (f64 x y) i = f (testbit x i) (testbit y i)) ->
+  (forall x y i, 0 <= i < Int.zwordsize -> Int.testbit (f32 x y) i = f (Int.testbit x i) (Int.testbit y i)) ->
+  f64 (ofwords xh xl) (ofwords yh yl) = ofwords (f32 xh yh) (f32 xl yl).
+Proof.
+  intros. apply Int64.same_bits_eq; intros.
+  rewrite H by auto. rewrite ! bits_ofwords by auto.
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  destruct (zlt i Int.zwordsize); rewrite H0 by omega; auto.
+Qed.
+
+Lemma decompose_and:
+  forall xh xl yh yl,
+  and (ofwords xh xl) (ofwords yh yl) = ofwords (Int.and xh yh) (Int.and xl yl).
+Proof.
+  intros. apply decompose_bitwise_binop with andb.
+  apply bits_and. apply Int.bits_and.
+Qed.
+
+Lemma decompose_or:
+  forall xh xl yh yl,
+  or (ofwords xh xl) (ofwords yh yl) = ofwords (Int.or xh yh) (Int.or xl yl).
+Proof.
+  intros. apply decompose_bitwise_binop with orb.
+  apply bits_or. apply Int.bits_or.
+Qed.
+
+Lemma decompose_xor:
+  forall xh xl yh yl,
+  xor (ofwords xh xl) (ofwords yh yl) = ofwords (Int.xor xh yh) (Int.xor xl yl).
+Proof.
+  intros. apply decompose_bitwise_binop with xorb.
+  apply bits_xor. apply Int.bits_xor.
+Qed.
+
+Lemma decompose_not:
+  forall xh xl,
+  not (ofwords xh xl) = ofwords (Int.not xh) (Int.not xl).
+Proof.
+  intros. unfold not, Int.not. rewrite <- decompose_xor. f_equal. 
+  apply (Int64.eq_spec mone (ofwords Int.mone Int.mone)).
+Qed.
+
+Lemma decompose_shl_1:
+  forall xh xl y,
+  0 <= Int.unsigned y < Int.zwordsize ->
+  shl' (ofwords xh xl) y =
+  ofwords (Int.or (Int.shl xh y) (Int.shru xl (Int.sub Int.iwordsize y)))
+          (Int.shl xl y).
+Proof.
+  intros.
+  assert (Int.unsigned (Int.sub Int.iwordsize y) = Int.zwordsize - Int.unsigned y).
+  { unfold Int.sub. rewrite Int.unsigned_repr. auto. 
+    rewrite Int.unsigned_repr_wordsize. generalize Int.wordsize_max_unsigned; omega. }
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  apply Int64.same_bits_eq; intros.
+  rewrite bits_shl' by auto. symmetry. rewrite bits_ofwords by auto.
+  destruct (zlt i Int.zwordsize). rewrite Int.bits_shl by omega. 
+  destruct (zlt i (Int.unsigned y)). auto. 
+  rewrite bits_ofwords by omega. rewrite zlt_true by omega. auto.
+  rewrite zlt_false by omega. rewrite bits_ofwords by omega. 
+  rewrite Int.bits_or by omega. rewrite Int.bits_shl by omega. 
+  rewrite Int.bits_shru by omega. rewrite H0.
+  destruct (zlt (i - Int.unsigned y) (Int.zwordsize)).
+  rewrite zlt_true by omega. rewrite zlt_true by omega.
+  rewrite orb_false_l. f_equal. omega. 
+  rewrite zlt_false by omega. rewrite zlt_false by omega. 
+  rewrite orb_false_r. f_equal. omega.
+Qed.
+
+Lemma decompose_shl_2:
+  forall xh xl y,
+  Int.zwordsize <= Int.unsigned y < zwordsize ->
+  shl' (ofwords xh xl) y =
+  ofwords (Int.shl xl (Int.sub y Int.iwordsize)) Int.zero.
+Proof.
+  intros.
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  assert (Int.unsigned (Int.sub y Int.iwordsize) = Int.unsigned y - Int.zwordsize).
+  { unfold Int.sub. rewrite Int.unsigned_repr. auto. 
+    rewrite Int.unsigned_repr_wordsize. generalize (Int.unsigned_range_2 y). omega. }
+  apply Int64.same_bits_eq; intros.
+  rewrite bits_shl' by auto. symmetry. rewrite bits_ofwords by auto.
+  destruct (zlt i Int.zwordsize). rewrite zlt_true by omega. apply Int.bits_zero.
+  rewrite Int.bits_shl by omega. 
+  destruct (zlt i (Int.unsigned y)).
+  rewrite zlt_true by omega. auto. 
+  rewrite zlt_false by omega. 
+  rewrite bits_ofwords by omega. rewrite zlt_true by omega. f_equal. omega. 
+Qed.
+
+Lemma decompose_shru_1:
+  forall xh xl y,
+  0 <= Int.unsigned y < Int.zwordsize ->
+  shru' (ofwords xh xl) y =
+  ofwords (Int.shru xh y)
+          (Int.or (Int.shru xl y) (Int.shl xh (Int.sub Int.iwordsize y))).
+Proof.
+  intros.
+  assert (Int.unsigned (Int.sub Int.iwordsize y) = Int.zwordsize - Int.unsigned y).
+  { unfold Int.sub. rewrite Int.unsigned_repr. auto. 
+    rewrite Int.unsigned_repr_wordsize. generalize Int.wordsize_max_unsigned; omega. }
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  apply Int64.same_bits_eq; intros.
+  rewrite bits_shru' by auto. symmetry. rewrite bits_ofwords by auto.
+  destruct (zlt i Int.zwordsize).
+  rewrite zlt_true by omega. 
+  rewrite bits_ofwords by omega.
+  rewrite Int.bits_or by omega. rewrite Int.bits_shl by omega. 
+  rewrite Int.bits_shru by omega. rewrite H0.
+  destruct (zlt (i + Int.unsigned y) (Int.zwordsize)).
+  rewrite zlt_true by omega.
+  rewrite orb_false_r. auto.
+  rewrite zlt_false by omega. 
+  rewrite orb_false_l. f_equal. omega.
+  rewrite Int.bits_shru by omega. 
+  destruct (zlt (i + Int.unsigned y) zwordsize).
+  rewrite bits_ofwords by omega. 
+  rewrite zlt_true by omega. rewrite zlt_false by omega. f_equal. omega. 
+  rewrite zlt_false by omega. auto.
+Qed.
+
+Lemma decompose_shru_2:
+  forall xh xl y,
+  Int.zwordsize <= Int.unsigned y < zwordsize ->
+  shru' (ofwords xh xl) y =
+  ofwords Int.zero (Int.shru xh (Int.sub y Int.iwordsize)).
+Proof.
+  intros.
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  assert (Int.unsigned (Int.sub y Int.iwordsize) = Int.unsigned y - Int.zwordsize).
+  { unfold Int.sub. rewrite Int.unsigned_repr. auto. 
+    rewrite Int.unsigned_repr_wordsize. generalize (Int.unsigned_range_2 y). omega. }
+  apply Int64.same_bits_eq; intros.
+  rewrite bits_shru' by auto. symmetry. rewrite bits_ofwords by auto.
+  destruct (zlt i Int.zwordsize).
+  rewrite Int.bits_shru by omega. rewrite H1.
+  destruct (zlt (i + Int.unsigned y) zwordsize).
+  rewrite zlt_true by omega. rewrite bits_ofwords by omega. 
+  rewrite zlt_false by omega. f_equal; omega. 
+  rewrite zlt_false by omega. auto.
+  rewrite zlt_false by omega. apply Int.bits_zero.
+Qed.
+
+Lemma decompose_shr_1:
+  forall xh xl y,
+  0 <= Int.unsigned y < Int.zwordsize ->
+  shr' (ofwords xh xl) y =
+  ofwords (Int.shr xh y)
+          (Int.or (Int.shru xl y) (Int.shl xh (Int.sub Int.iwordsize y))).
+Proof.
+  intros.
+  assert (Int.unsigned (Int.sub Int.iwordsize y) = Int.zwordsize - Int.unsigned y).
+  { unfold Int.sub. rewrite Int.unsigned_repr. auto. 
+    rewrite Int.unsigned_repr_wordsize. generalize Int.wordsize_max_unsigned; omega. }
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  apply Int64.same_bits_eq; intros.
+  rewrite bits_shr' by auto. symmetry. rewrite bits_ofwords by auto.
+  destruct (zlt i Int.zwordsize).
+  rewrite zlt_true by omega. 
+  rewrite bits_ofwords by omega.
+  rewrite Int.bits_or by omega. rewrite Int.bits_shl by omega. 
+  rewrite Int.bits_shru by omega. rewrite H0.
+  destruct (zlt (i + Int.unsigned y) (Int.zwordsize)).
+  rewrite zlt_true by omega.
+  rewrite orb_false_r. auto.
+  rewrite zlt_false by omega. 
+  rewrite orb_false_l. f_equal. omega.
+  rewrite Int.bits_shr by omega. 
+  destruct (zlt (i + Int.unsigned y) zwordsize).
+  rewrite bits_ofwords by omega. 
+  rewrite zlt_true by omega. rewrite zlt_false by omega. f_equal. omega. 
+  rewrite zlt_false by omega. rewrite bits_ofwords by omega. 
+  rewrite zlt_false by omega. f_equal.
+Qed.
+
+Lemma decompose_shr_2:
+  forall xh xl y,
+  Int.zwordsize <= Int.unsigned y < zwordsize ->
+  shr' (ofwords xh xl) y =
+  ofwords (Int.shr xh (Int.sub Int.iwordsize Int.one))
+          (Int.shr xh (Int.sub y Int.iwordsize)).
+Proof.
+  intros.
+  assert (zwordsize = 2 * Int.zwordsize) by reflexivity.
+  assert (Int.unsigned (Int.sub y Int.iwordsize) = Int.unsigned y - Int.zwordsize).
+  { unfold Int.sub. rewrite Int.unsigned_repr. auto. 
+    rewrite Int.unsigned_repr_wordsize. generalize (Int.unsigned_range_2 y). omega. }
+  apply Int64.same_bits_eq; intros.
+  rewrite bits_shr' by auto. symmetry. rewrite bits_ofwords by auto.
+  destruct (zlt i Int.zwordsize).
+  rewrite Int.bits_shr by omega. rewrite H1.
+  destruct (zlt (i + Int.unsigned y) zwordsize).
+  rewrite zlt_true by omega. rewrite bits_ofwords by omega. 
+  rewrite zlt_false by omega. f_equal; omega. 
+  rewrite zlt_false by omega. rewrite bits_ofwords by omega. 
+  rewrite zlt_false by omega. auto.
+  rewrite Int.bits_shr by omega. 
+  change (Int.unsigned (Int.sub Int.iwordsize Int.one)) with (Int.zwordsize - 1).
+  destruct (zlt (i + Int.unsigned y) zwordsize);
+  rewrite bits_ofwords by omega.
+  symmetry. rewrite zlt_false by omega. f_equal. 
+  destruct (zlt (i - Int.zwordsize + (Int.zwordsize - 1)) Int.zwordsize); omega.
+  symmetry. rewrite zlt_false by omega. f_equal. 
+  destruct (zlt (i - Int.zwordsize + (Int.zwordsize - 1)) Int.zwordsize); omega.
+Qed.
+
+Remark eqm_mul_2p32:
+  forall x y, Int.eqm x y -> eqm (x * two_p 32) (y * two_p 32).
+Proof.
+  intros. destruct H as [k EQ]. exists k. rewrite EQ.
+  change Int.modulus with (two_p 32). 
+  change modulus with (two_p 32 * two_p 32).
+  ring.
+Qed.
+
+Lemma decompose_add:
+  forall xh xl yh yl,
+  add (ofwords xh xl) (ofwords yh yl) =
+  ofwords (Int.add (Int.add xh yh) (Int.add_carry xl yl Int.zero))
+          (Int.add xl yl).
+Proof.
+  intros. symmetry. rewrite ofwords_add. rewrite add_unsigned.
+  apply eqm_samerepr. 
+  rewrite ! ofwords_add'. rewrite (Int.unsigned_add_carry xl yl). 
+  set (cc := Int.add_carry xl yl Int.zero).
+  set (Xl := Int.unsigned xl); set (Xh := Int.unsigned xh);
+  set (Yl := Int.unsigned yl); set (Yh := Int.unsigned yh).
+  change Int.modulus with (two_p 32).
+  replace (Xh * two_p 32 + Xl + (Yh * two_p 32 + Yl))
+     with ((Xh + Yh) * two_p 32 + (Xl + Yl)) by ring.
+  replace (Int.unsigned (Int.add (Int.add xh yh) cc) * two_p 32 +
+              (Xl + Yl - Int.unsigned cc * two_p 32))
+     with ((Int.unsigned (Int.add (Int.add xh yh) cc) - Int.unsigned cc) * two_p 32
+           + (Xl + Yl)) by ring.
+  apply eqm_add. 2: apply eqm_refl. apply eqm_mul_2p32.
+  replace (Xh + Yh) with ((Xh + Yh + Int.unsigned cc) - Int.unsigned cc) by ring.
+  apply Int.eqm_sub. 2: apply Int.eqm_refl.
+  apply Int.eqm_unsigned_repr_l. apply Int.eqm_add. 2: apply Int.eqm_refl. 
+  apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. 
+Qed.
+
+Definition mul' (x y: Int.int) : int := repr (Int.unsigned x * Int.unsigned y).
+
+Lemma decompose_mul:
+  forall xh xl yh yl,
+  mul (ofwords xh xl) (ofwords yh yl) =
+  ofwords (Int.add (Int.add (hiword (mul' xl yl)) (Int.mul xl yh)) (Int.mul xh yl))
+          (loword (mul' xl yl)).
+Proof.
+  intros. 
+  set (pl := loword (mul' xl yl)); set (ph := hiword (mul' xl yl)).
+  assert (EQ0: unsigned (mul' xl yl) = Int.unsigned ph * two_p 32 + Int.unsigned pl).
+  { rewrite <- (ofwords_recompose (mul' xl yl)). apply ofwords_add'. }
+  symmetry. rewrite ofwords_add. unfold mul. rewrite !ofwords_add'.
+  set (XL := Int.unsigned xl); set (XH := Int.unsigned xh);
+  set (YL := Int.unsigned yl); set (YH := Int.unsigned yh).
+  set (PH := Int.unsigned ph) in *. set (PL := Int.unsigned pl) in *.
+  transitivity (repr (((PH + XL * YH) + XH * YL) * two_p 32 + PL)).
+  apply eqm_samerepr. apply eqm_add. 2: apply eqm_refl. 
+  apply eqm_mul_2p32.
+  rewrite Int.add_unsigned. apply Int.eqm_unsigned_repr_l. apply Int.eqm_add.
+  rewrite Int.add_unsigned. apply Int.eqm_unsigned_repr_l. apply Int.eqm_add.
+  apply Int.eqm_refl.
+  unfold Int.mul. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl.
+  unfold Int.mul. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl.
+  transitivity (repr (unsigned (mul' xl yl) + (XL * YH + XH * YL) * two_p 32)).
+  rewrite EQ0. f_equal. ring.
+  transitivity (repr ((XL * YL + (XL * YH + XH * YL) * two_p 32))).
+  apply eqm_samerepr. apply eqm_add. 2: apply eqm_refl. 
+  unfold mul'. apply eqm_unsigned_repr_l. apply eqm_refl.
+  transitivity (repr (0 + (XL * YL + (XL * YH + XH * YL) * two_p 32))).
+  rewrite Zplus_0_l; auto. 
+  transitivity (repr (XH * YH * (two_p 32 * two_p 32) + (XL * YL + (XL * YH + XH * YL) * two_p 32))).
+  apply eqm_samerepr. apply eqm_add. 2: apply eqm_refl. 
+  change (two_p 32 * two_p 32) with modulus. exists (- XH * YH). ring. 
+  f_equal. ring. 
+Qed.
+
+End Int64.
 
 Notation int64 := Int64.int.
 
diff --git a/lib/Lattice.v b/lib/Lattice.v
index c4b55e9..3fc0d4c 100644
--- a/lib/Lattice.v
+++ b/lib/Lattice.v
@@ -747,3 +747,103 @@ Lemma ge_lub_right: forall x y, ge (lub x y) y.
 Proof. destruct x; destruct y; compute; tauto. Qed.
 
 End LBoolean.
+
+(** * Option semi-lattice *)
+
+(** This lattice adds a top element (represented by [None]) to a given
+  semi-lattice (whose elements are injected via [Some]). *)
+
+Module LOption(L: SEMILATTICE) <: SEMILATTICE_WITH_TOP.
+
+Definition t: Type := option L.t.
+
+Definition eq (x y: t) : Prop :=
+  match x, y with
+  | None, None => True
+  | Some x1, Some y1 => L.eq x1 y1
+  | _, _ => False
+  end.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq; intros; destruct x. apply L.eq_refl. auto.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq; intros; destruct x; destruct y; auto. apply L.eq_sym; auto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq; intros; destruct x; destruct y; destruct z; auto.
+  eapply L.eq_trans; eauto.
+  contradiction.
+Qed.
+
+Definition beq (x y: t) : bool :=
+  match x, y with
+  | None, None => true
+  | Some x1, Some y1 => L.beq x1 y1
+  | _, _ => false
+  end.
+
+Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+Proof.
+  unfold beq, eq; intros; destruct x; destruct y.
+  apply L.beq_correct; auto.
+  discriminate. discriminate. auto.
+Qed.
+
+Definition ge (x y: t) : Prop :=
+  match x, y with
+  | None, _ => True
+  | _, None => False
+  | Some x1, Some y1 => L.ge x1 y1
+  end.
+
+Lemma ge_refl: forall x y, eq x y -> ge x y.
+Proof.
+  unfold eq, ge; intros; destruct x; destruct y.
+  apply L.ge_refl; auto.
+  auto. elim H. auto.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge; intros; destruct x; destruct y; destruct z; auto.
+  eapply L.ge_trans; eauto. contradiction.
+Qed.
+
+Definition bot : t := Some L.bot.
+
+Lemma ge_bot: forall x, ge x bot.
+Proof.
+  unfold ge, bot; intros. destruct x; auto. apply L.ge_bot.
+Qed.
+
+Definition lub (x y: t) : t :=
+  match x, y with
+  | None, _ => None
+  | _, None => None
+  | Some x1, Some y1 => Some (L.lub x1 y1)
+  end.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub; intros; destruct x; destruct y; auto. apply L.ge_lub_left.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub; intros; destruct x; destruct y; auto. apply L.ge_lub_right.
+Qed.
+
+Definition top : t := None.
+
+Lemma ge_top: forall x, ge top x.
+Proof.
+  unfold ge, top; intros. auto.
+Qed.
+
+End LOption.
diff --git a/lib/Maps.v b/lib/Maps.v
index f667ea4..ddb0c33 100644
--- a/lib/Maps.v
+++ b/lib/Maps.v
@@ -377,23 +377,18 @@ Module PTree <: TREE.
       end.
 
     Lemma bempty_correct:
-      forall m, bempty m = true -> forall x, get x m = None.
+      forall m, bempty m = true <-> (forall x, get x m = None).
     Proof.
-      induction m; simpl; intros. 
-      change (@Leaf A) with (empty A). apply gempty.
-      destruct o. congruence. destruct (andb_prop _ _ H). 
+      induction m; simpl.
+      split; intros. apply gleaf. auto.
+      destruct o; split; intros.
+      congruence.
+      generalize (H xH); simpl; congruence.
+      destruct (andb_prop _ _ H). rewrite IHm1 in H0. rewrite IHm2 in H1.
       destruct x; simpl; auto.
-    Qed.
-
-    Lemma bempty_complete:
-      forall m, (forall x, get x m = None) -> bempty m = true.
-    Proof.
-      induction m; simpl; intros.
-      auto.
-      destruct o. generalize (H xH); simpl; congruence.
-      rewrite IHm1. rewrite IHm2. auto. 
-      intros; apply (H (xI x)).
-      intros; apply (H (xO x)).
+      apply andb_true_intro; split. 
+      apply IHm1. intros; apply (H (xO x)).
+      apply IHm2. intros; apply (H (xI x)).
     Qed.
 
     Lemma beq_correct:
@@ -406,38 +401,23 @@ Module PTree <: TREE.
        | _, _ => False
        end).
     Proof.
-      intros; split.
-    - (* beq = true -> exteq *)
-      revert m1 m2. induction m1; destruct m2; simpl.
-      intros; red; intros. change (@Leaf A) with (empty A). 
-      repeat rewrite gempty. auto.
-      destruct o; intro. congruence. 
-      red; intros. change (@Leaf A) with (empty A). rewrite gempty.
-      rewrite bempty_correct. auto. assumption. 
-      destruct o; intro. congruence. 
-      red; intros. change (@Leaf A) with (empty A). rewrite gempty.
-      rewrite bempty_correct. auto. assumption. 
-      destruct o; destruct o0; simpl; intro; try congruence.
-      destruct (andb_prop _ _ H). destruct (andb_prop _ _ H0). 
-      destruct x; simpl.
-      apply IHm1_2; auto. apply IHm1_1; auto. auto. 
-      destruct (andb_prop _ _ H).
-      red; intros. destruct x; simpl.
-      apply IHm1_2; auto. apply IHm1_1; auto.
-      auto.
-    - (* exteq -> beq = true *)
-      revert m1 m2. induction m1; destruct m2; simpl; intros. 
-      auto.
-      change (bempty (Node m2_1 o m2_2) = true).
-      apply bempty_complete. intros. generalize (H x). rewrite gleaf.
-      destruct (get x (Node m2_1 o m2_2)); tauto. 
-      change (bempty (Node m1_1 o m1_2) = true).
-      apply bempty_complete. intros. generalize (H x). rewrite gleaf.
-      destruct (get x (Node m1_1 o m1_2)); tauto.
-      apply andb_true_intro. split. apply andb_true_intro. split. 
-      generalize (H xH); simpl. destruct o; destruct o0; auto.
-      apply IHm1_1. intros. apply (H (xO x)). 
-      apply IHm1_2. intros. apply (H (xI x)). 
+      induction m1; intros.
+    - simpl. rewrite bempty_correct. split; intros.
+      rewrite gleaf. rewrite H. auto.
+      generalize (H x). rewrite gleaf. destruct (get x m2); tauto. 
+    - destruct m2.
+      + unfold beq. rewrite bempty_correct. split; intros.
+        rewrite H. rewrite gleaf. auto.
+        generalize (H x). rewrite gleaf. destruct (get x (Node m1_1 o m1_2)); tauto.
+      + simpl. split; intros.
+        * destruct (andb_prop _ _ H). destruct (andb_prop _ _ H0).
+          rewrite IHm1_1 in H3. rewrite IHm1_2 in H1. 
+          destruct x; simpl. apply H1. apply H3.
+          destruct o; destruct o0; auto || congruence.
+        * apply andb_true_intro. split. apply andb_true_intro. split.
+          generalize (H xH); simpl. destruct o; destruct o0; tauto. 
+          apply IHm1_1. intros; apply (H (xO x)).
+          apply IHm1_2. intros; apply (H (xI x)).
     Qed.
 
   End BOOLEAN_EQUALITY.
@@ -559,7 +539,7 @@ Module PTree <: TREE.
 
   Section COMBINE.
 
-  Variable A B C: Type.
+  Variables A B C: Type.
   Variable f: option A -> option B -> option C.
   Hypothesis f_none_none: f None None = None.
 
@@ -646,45 +626,53 @@ Module PTree <: TREE.
      auto. 
   Qed.
 
-    Fixpoint xelements (A : Type) (m : t A) (i : positive) {struct m}
-             : list (positive * A) :=
+    Fixpoint xelements (A : Type) (m : t A) (i : positive) 
+                       (k: list (positive * A)) {struct m}
+                       : list (positive * A) :=
       match m with
-      | Leaf => nil
+      | Leaf => k
       | Node l None r =>
-          (xelements l (append i (xO xH))) ++ (xelements r (append i (xI xH)))
+          xelements l (append i (xO xH)) (xelements r (append i (xI xH)) k)
       | Node l (Some x) r =>
-          (xelements l (append i (xO xH)))
-          ++ ((i, x) :: xelements r (append i (xI xH)))
+          xelements l (append i (xO xH))
+            ((i, x) :: xelements r (append i (xI xH)) k)
       end.
 
-  (* Note: function [xelements] above is inefficient.  We should apply
-     deforestation to it, but that makes the proofs even harder. *)
 
-  Definition elements A (m : t A) := xelements m xH.
+  Definition elements (A: Type) (m : t A) := xelements m xH nil.
+
+    Lemma xelements_incl:
+      forall (A: Type) (m: t A) (i : positive) k x,
+      In x k -> In x (xelements m i k).
+    Proof.
+      induction m; intros; simpl.
+      auto.
+      destruct o.
+      apply IHm1. simpl; right; auto.
+      auto.
+    Qed.
 
     Lemma xelements_correct:
-      forall (A: Type) (m: t A) (i j : positive) (v: A),
-      get i m = Some v -> In (append j i, v) (xelements m j).
+      forall (A: Type) (m: t A) (i j : positive) (v: A) k,
+      get i m = Some v -> In (append j i, v) (xelements m j k).
     Proof.
       induction m; intros.
        rewrite (gleaf A i) in H; congruence.
        destruct o; destruct i; simpl; simpl in H.
-        rewrite append_assoc_1; apply in_or_app; right; apply in_cons;
-          apply IHm2; auto.
-        rewrite append_assoc_0; apply in_or_app; left; apply IHm1; auto.
-        rewrite append_neutral_r; apply in_or_app; injection H;
-          intro EQ; rewrite EQ; right; apply in_eq.
-        rewrite append_assoc_1; apply in_or_app; right; apply IHm2; auto.
-        rewrite append_assoc_0; apply in_or_app; left; apply IHm1; auto.
-        congruence.
+        rewrite append_assoc_1. apply xelements_incl. right. auto.
+        rewrite append_assoc_0. auto. 
+        inv H. apply xelements_incl. left. rewrite append_neutral_r; auto.
+        rewrite append_assoc_1. apply xelements_incl. auto. 
+        rewrite append_assoc_0. auto. 
+        inv H.
     Qed.
 
   Theorem elements_correct:
     forall (A: Type) (m: t A) (i: positive) (v: A),
     get i m = Some v -> In (i, v) (elements m).
   Proof.
-    intros A m i v H.
-    exact (xelements_correct m i xH H).
+    intros A m i v H. 
+    exact (xelements_correct m i xH nil H).
   Qed.
 
     Fixpoint xget (A : Type) (i j : positive) (m : t A) {struct j} : option A :=
@@ -695,6 +683,13 @@ Module PTree <: TREE.
       | _, _ => None
       end.
 
+    Lemma xget_diag :
+      forall (A : Type) (i : positive) (m1 m2 : t A) (o : option A),
+      xget i i (Node m1 o m2) = o.
+    Proof.
+      induction i; intros; simpl; auto.
+    Qed.
+
     Lemma xget_left :
       forall (A : Type) (j i : positive) (m1 m2 : t A) (o : option A) (v : A),
       xget i (append j (xO xH)) m1 = Some v -> xget i j (Node m1 o m2) = Some v.
@@ -703,122 +698,26 @@ Module PTree <: TREE.
       destruct i; congruence.
     Qed.
 
-    Lemma xelements_ii :
-      forall (A: Type) (m: t A) (i j : positive) (v: A),
-      In (xI i, v) (xelements m (xI j)) -> In (i, v) (xelements m j).
-    Proof.
-      induction m.
-       simpl; auto.
-       intros; destruct o; simpl; simpl in H; destruct (in_app_or _ _ _ H);
-         apply in_or_app.
-        left; apply IHm1; auto.
-        right; destruct (in_inv H0).
-         injection H1; intros EQ1 EQ2; rewrite EQ1; rewrite EQ2; apply in_eq.
-         apply in_cons; apply IHm2; auto.
-        left; apply IHm1; auto.
-        right; apply IHm2; auto.
-    Qed.
-
-    Lemma xelements_io :
-      forall (A: Type) (m: t A) (i j : positive) (v: A),
-      ~In (xI i, v) (xelements m (xO j)).
-    Proof.
-      induction m.
-       simpl; auto.
-       intros; destruct o; simpl; intro H; destruct (in_app_or _ _ _ H).
-        apply (IHm1 _ _ _ H0).
-        destruct (in_inv H0).
-         congruence.
-         apply (IHm2 _ _ _ H1).
-        apply (IHm1 _ _ _ H0).
-        apply (IHm2 _ _ _ H0).
-    Qed.
-
-    Lemma xelements_oo :
-      forall (A: Type) (m: t A) (i j : positive) (v: A),
-      In (xO i, v) (xelements m (xO j)) -> In (i, v) (xelements m j).
-    Proof.
-      induction m.
-       simpl; auto.
-       intros; destruct o; simpl; simpl in H; destruct (in_app_or _ _ _ H);
-         apply in_or_app.
-        left; apply IHm1; auto.
-        right; destruct (in_inv H0).
-         injection H1; intros EQ1 EQ2; rewrite EQ1; rewrite EQ2; apply in_eq.
-         apply in_cons; apply IHm2; auto.
-        left; apply IHm1; auto.
-        right; apply IHm2; auto.
-    Qed.
-
-    Lemma xelements_oi :
-      forall (A: Type) (m: t A) (i j : positive) (v: A),
-      ~In (xO i, v) (xelements m (xI j)).
-    Proof.
-      induction m.
-       simpl; auto.
-       intros; destruct o; simpl; intro H; destruct (in_app_or _ _ _ H).
-        apply (IHm1 _ _ _ H0).
-        destruct (in_inv H0).
-         congruence.
-         apply (IHm2 _ _ _ H1).
-        apply (IHm1 _ _ _ H0).
-        apply (IHm2 _ _ _ H0).
-    Qed.
-
-    Lemma xelements_ih :
-      forall (A: Type) (m1 m2: t A) (o: option A) (i : positive) (v: A),
-      In (xI i, v) (xelements (Node m1 o m2) xH) -> In (i, v) (xelements m2 xH).
-    Proof.
-      destruct o; simpl; intros; destruct (in_app_or _ _ _ H).
-        absurd (In (xI i, v) (xelements m1 2)); auto; apply xelements_io; auto.
-        destruct (in_inv H0).
-         congruence.
-         apply xelements_ii; auto.
-        absurd (In (xI i, v) (xelements m1 2)); auto; apply xelements_io; auto.
-        apply xelements_ii; auto.
-    Qed.
-
-    Lemma xelements_oh :
-      forall (A: Type) (m1 m2: t A) (o: option A) (i : positive) (v: A),
-      In (xO i, v) (xelements (Node m1 o m2) xH) -> In (i, v) (xelements m1 xH).
-    Proof.
-      destruct o; simpl; intros; destruct (in_app_or _ _ _ H).
-        apply xelements_oo; auto.
-        destruct (in_inv H0).
-         congruence.
-         absurd (In (xO i, v) (xelements m2 3)); auto; apply xelements_oi; auto.
-        apply xelements_oo; auto.
-        absurd (In (xO i, v) (xelements m2 3)); auto; apply xelements_oi; auto.
-    Qed.
-
-    Lemma xelements_hi :
-      forall (A: Type) (m: t A) (i : positive) (v: A),
-      ~In (xH, v) (xelements m (xI i)).
+    Lemma xget_right :
+      forall (A : Type) (j i : positive) (m1 m2 : t A) (o : option A) (v : A),
+      xget i (append j (xI xH)) m2 = Some v -> xget i j (Node m1 o m2) = Some v.
     Proof.
-      induction m; intros.
-       simpl; auto.
-       destruct o; simpl; intro H; destruct (in_app_or _ _ _ H).
-        generalize H0; apply IHm1; auto.
-        destruct (in_inv H0).
-         congruence.
-         generalize H1; apply IHm2; auto.
-        generalize H0; apply IHm1; auto.
-        generalize H0; apply IHm2; auto.
+      induction j; intros; destruct i; simpl; simpl in H; auto; try congruence.
+      destruct i; congruence.
     Qed.
 
-    Lemma xelements_ho :
-      forall (A: Type) (m: t A) (i : positive) (v: A),
-      ~In (xH, v) (xelements m (xO i)).
+    Lemma xelements_complete:
+      forall (A: Type) (m: t A) (i j : positive) (v: A) k,
+      In (i, v) (xelements m j k) -> xget i j m = Some v \/ In (i, v) k.
     Proof.
-      induction m; intros.
-       simpl; auto.
-       destruct o; simpl; intro H; destruct (in_app_or _ _ _ H).
-        generalize H0; apply IHm1; auto.
-        destruct (in_inv H0).
-         congruence.
-         generalize H1; apply IHm2; auto.
-        generalize H0; apply IHm1; auto.
-        generalize H0; apply IHm2; auto.
+      induction m; simpl; intros.
+      auto.
+      destruct o. 
+      edestruct IHm1; eauto. left; apply xget_left; auto.
+      destruct H0. inv H0. left; apply xget_diag. 
+      edestruct IHm2; eauto. left; apply xget_right; auto.
+      edestruct IHm1; eauto. left; apply xget_left; auto.
+      edestruct IHm2; eauto. left; apply xget_right; auto.
     Qed.
 
     Lemma get_xget_h :
@@ -827,89 +726,50 @@ Module PTree <: TREE.
       destruct i; simpl; auto.
     Qed.
 
-    Lemma xelements_complete:
-      forall (A: Type) (i j : positive) (m: t A) (v: A),
-      In (i, v) (xelements m j) -> xget i j m = Some v.
-    Proof.
-      induction i; simpl; intros; destruct j; simpl.
-       apply IHi; apply xelements_ii; auto.
-       absurd (In (xI i, v) (xelements m (xO j))); auto; apply xelements_io.
-       destruct m.
-        simpl in H; tauto.
-        rewrite get_xget_h. apply IHi. apply (xelements_ih _ _ _ _ _ H).
-       absurd (In (xO i, v) (xelements m (xI j))); auto; apply xelements_oi.
-       apply IHi; apply xelements_oo; auto.
-       destruct m.
-        simpl in H; tauto.
-        rewrite get_xget_h. apply IHi. apply (xelements_oh _ _ _ _ _ H).
-       absurd (In (xH, v) (xelements m (xI j))); auto; apply xelements_hi.
-       absurd (In (xH, v) (xelements m (xO j))); auto; apply xelements_ho.
-       destruct m.
-        simpl in H; tauto.
-        destruct o; simpl in H; destruct (in_app_or _ _ _ H).
-         absurd (In (xH, v) (xelements m1 (xO xH))); auto; apply xelements_ho.
-         destruct (in_inv H0).
-          congruence.
-          absurd (In (xH, v) (xelements m2 (xI xH))); auto; apply xelements_hi.
-         absurd (In (xH, v) (xelements m1 (xO xH))); auto; apply xelements_ho.
-         absurd (In (xH, v) (xelements m2 (xI xH))); auto; apply xelements_hi.
-    Qed.
-
   Theorem elements_complete:
     forall (A: Type) (m: t A) (i: positive) (v: A),
     In (i, v) (elements m) -> get i m = Some v.
   Proof.
-    intros A m i v H.
-    unfold elements in H.
-    rewrite get_xget_h.
-    exact (xelements_complete i xH m v H).
+    intros A m i v H. unfold elements in H.
+    edestruct xelements_complete; eauto. 
+    rewrite get_xget_h. auto. 
+    contradiction.
   Qed.
 
   Lemma in_xelements:
-    forall (A: Type) (m: t A) (i k: positive) (v: A),
-    In (k, v) (xelements m i) ->
-    exists j, k = append i j.
+    forall (A: Type) (m: t A) (i k: positive) (v: A) l,
+    In (k, v) (xelements m i l) ->
+    (exists j, k = append i j) \/ In (k, v) l.
   Proof.
     induction m; simpl; intros.
-    tauto.
-    assert (k = i \/ In (k, v) (xelements m1 (append i 2))
-                  \/ In (k, v) (xelements m2 (append i 3))).
-      destruct o.
-      elim (in_app_or _ _ _ H); simpl; intuition.
-      replace k with i. tauto. congruence.
-      elim (in_app_or _ _ _ H); simpl; intuition.
-    elim H0; intro.
-    exists xH. rewrite append_neutral_r. auto.
-    elim H1; intro.
-    elim (IHm1 _ _ _ H2). intros k1 EQ. rewrite EQ.
-    rewrite <- append_assoc_0. exists (xO k1); auto.
-    elim (IHm2 _ _ _ H2). intros k1 EQ. rewrite EQ.
-    rewrite <- append_assoc_1. exists (xI k1); auto.
+    auto.
+    destruct o.
+    edestruct IHm1 as [[j EQ] | IN]; eauto.
+    rewrite <- append_assoc_0 in EQ. left; econstructor; eauto. 
+    destruct IN.
+    inv H0. left; exists xH; symmetry; apply append_neutral_r.
+    edestruct IHm2 as [[j EQ] | IN]; eauto.
+    rewrite <- append_assoc_1 in EQ. left; econstructor; eauto. 
+    edestruct IHm1 as [[j EQ] | IN]; eauto.
+    rewrite <- append_assoc_0 in EQ. left; econstructor; eauto. 
+    edestruct IHm2 as [[j EQ] | IN']; eauto.
+    rewrite <- append_assoc_1 in EQ. left; econstructor; eauto.
   Qed.
 
-  Definition xkeys (A: Type) (m: t A) (i: positive) :=
-    List.map (@fst positive A) (xelements m i).
+  Definition xkeys (A: Type) (m: t A) (i: positive) (l: list (positive * A)) :=
+    List.map (@fst positive A) (xelements m i l).
 
   Lemma in_xkeys:
-    forall (A: Type) (m: t A) (i k: positive),
-    In k (xkeys m i) ->
-    exists j, k = append i j.
+    forall (A: Type) (m: t A) (i k: positive) l,
+    In k (xkeys m i l) ->
+    (exists j, k = append i j) \/ In k (List.map fst l).
   Proof.
-    unfold xkeys; intros. 
-    elim (list_in_map_inv _ _ _ H). intros [k1 v1] [EQ IN].
-    simpl in EQ; subst k1. apply in_xelements with A m v1. auto.
-  Qed.
-
-  Remark list_append_cons_norepet:
-    forall (A: Type) (l1 l2: list A) (x: A),
-    list_norepet l1 -> list_norepet l2 -> list_disjoint l1 l2 ->
-    ~In x l1 -> ~In x l2 ->
-    list_norepet (l1 ++ x :: l2).
-  Proof.
-    intros. apply list_norepet_append_commut. simpl; constructor.
-    red; intros. elim (in_app_or _ _ _ H4); intro; tauto.
-    apply list_norepet_append; auto. 
-    apply list_disjoint_sym; auto.
+    unfold xkeys; intros.
+    exploit list_in_map_inv; eauto. intros [[k1 v1] [EQ IN]].
+    simpl in EQ; subst k1.
+    exploit in_xelements; eauto. intros [EX | IN'].
+    auto.
+    right. change k with (fst (k, v1)). apply List.in_map; auto.
   Qed.
 
   Lemma append_injective:
@@ -922,44 +782,52 @@ Module PTree <: TREE.
   Qed.
 
   Lemma xelements_keys_norepet:
-    forall (A: Type) (m: t A) (i: positive),
-    list_norepet (xkeys m i).
+    forall (A: Type) (m: t A) (i: positive) l,
+    (forall k v, get k m = Some v -> ~In (append i k) (List.map fst l)) ->
+    list_norepet (List.map fst l) ->
+    list_norepet (xkeys m i l).
   Proof.
-    induction m; unfold xkeys; simpl; fold xkeys; intros.
-    constructor.
-    assert (list_disjoint (xkeys m1 (append i 2)) (xkeys m2 (append i 3))).
-      red; intros; red; intro. subst y. 
-      elim (in_xkeys _ _ _ H); intros j1 EQ1.
-      elim (in_xkeys _ _ _ H0); intros j2 EQ2.
-      rewrite EQ1 in EQ2. 
-      rewrite <- append_assoc_0 in EQ2. 
-      rewrite <- append_assoc_1 in EQ2. 
-      generalize (append_injective _ _ _ EQ2). congruence.
-    assert (forall (m: t A) j,
-            j = 2%positive \/ j = 3%positive ->
-            ~In i (xkeys m (append i j))).
-      intros; red; intros. 
-      elim (in_xkeys _ _ _ H1); intros k EQ.
-      assert (EQ1: append i xH = append (append i j) k).
-        rewrite append_neutral_r. auto.
-      elim H0; intro; subst j;
-      try (rewrite <- append_assoc_0 in EQ1);
-      try (rewrite <- append_assoc_1 in EQ1);
-      generalize (append_injective _ _ _ EQ1); congruence.
-    destruct o; rewrite list_append_map; simpl;
-    change (List.map (@fst positive A) (xelements m1 (append i 2)))
-      with (xkeys m1 (append i 2));
-    change (List.map (@fst positive A) (xelements m2 (append i 3)))
-      with (xkeys m2 (append i 3)).
-    apply list_append_cons_norepet; auto. 
-    apply list_norepet_append; auto.
+    unfold xkeys; induction m; simpl; intros.
+    auto.
+    destruct o.
+    apply IHm1. 
+    intros; red; intros IN. rewrite <- append_assoc_0 in IN. simpl in IN; destruct IN.
+    exploit (append_injective i k~0 xH). rewrite append_neutral_r. auto.
+    congruence.
+    exploit in_xkeys; eauto. intros [[j EQ] | IN]. 
+    rewrite <- append_assoc_1 in EQ. exploit append_injective; eauto. congruence.
+    elim (H (xO k) v); auto.
+    simpl. constructor. 
+    red; intros IN. exploit in_xkeys; eauto. intros [[j EQ] | IN'].
+    rewrite <- append_assoc_1 in EQ. 
+    exploit (append_injective i j~1 xH). rewrite append_neutral_r. auto. congruence.
+    elim (H xH a). auto. rewrite append_neutral_r. auto.
+    apply IHm2; auto. intros. rewrite <- append_assoc_1. eapply H; eauto. 
+    apply IHm1. 
+    intros; red; intros IN. rewrite <- append_assoc_0 in IN.
+    exploit in_xkeys; eauto. intros [[j EQ] | IN']. 
+    rewrite <- append_assoc_1 in EQ. exploit append_injective; eauto. congruence.
+    elim (H (xO k) v); auto.
+    apply IHm2; auto. intros. rewrite <- append_assoc_1. eapply H; eauto. 
   Qed.
 
   Theorem elements_keys_norepet:
     forall (A: Type) (m: t A), 
     list_norepet (List.map (@fst elt A) (elements m)).
   Proof.
-    intros. change (list_norepet (xkeys m 1)). apply xelements_keys_norepet.
+    intros. change (list_norepet (xkeys m 1 nil)). apply xelements_keys_norepet.
+    intros; red; intros. elim H0. constructor.
+  Qed.
+
+  Remark xelements_empty:
+    forall (A: Type) (m: t A) i l, (forall i, get i m = None) -> xelements m i l = l.
+  Proof.
+    induction m; simpl; intros. 
+    auto.
+    destruct o. generalize (H xH); simpl; congruence. 
+    rewrite IHm1. apply IHm2. 
+    intros. apply (H (xI i0)).
+    intros. apply (H (xO i0)).
   Qed.
 
   Theorem elements_canonical_order:
@@ -971,48 +839,48 @@ Module PTree <: TREE.
       (elements m) (elements n).
   Proof.
     intros until R.
-    assert (forall m n j,
+    assert (forall m n j l1 l2,
     (forall i x, get i m = Some x -> exists y, get i n = Some y /\ R x y) ->
     (forall i y, get i n = Some y -> exists x, get i m = Some x /\ R x y) ->
     list_forall2
       (fun i_x i_y => fst i_x = fst i_y /\ R (snd i_x) (snd i_y))
-      (xelements m j) (xelements n j)).
-    induction m; induction n; intros; simpl.
-    constructor.
-    destruct o. exploit (H0 xH). simpl. reflexivity. simpl. intros [x [P Q]]. congruence.
-    change (@nil (positive*A)) with ((@nil (positive * A))++nil).
-    apply list_forall2_app.
-    apply IHn1.
-    intros. rewrite gleaf in H1. congruence.
-    intros. exploit (H0 (xO i)). simpl; eauto. rewrite gleaf. intros [x [P Q]]. congruence.
-    apply IHn2.
-    intros. rewrite gleaf in H1. congruence.
-    intros. exploit (H0 (xI i)). simpl; eauto. rewrite gleaf. intros [x [P Q]]. congruence.
-    destruct o. exploit (H xH). simpl. reflexivity. simpl. intros [x [P Q]]. congruence.
-    change (@nil (positive*B)) with (xelements (@Leaf B) (append j 2) ++ (xelements (@Leaf B) (append j 3))).
-    apply list_forall2_app.
-    apply IHm1.
-    intros. exploit (H (xO i)). simpl; eauto. rewrite gleaf. intros [y [P Q]]. congruence.
-    intros. rewrite gleaf in H1. congruence.
-    apply IHm2.
-    intros. exploit (H (xI i)). simpl; eauto. rewrite gleaf. intros [y [P Q]]. congruence.
-    intros. rewrite gleaf in H1. congruence.
-    exploit (IHm1 n1 (append j 2)). 
-    intros. exploit (H (xO i)). simpl; eauto. simpl. auto.
-    intros. exploit (H0 (xO i)). simpl; eauto. simpl; auto.
-    intro REC1.
-    exploit (IHm2 n2 (append j 3)). 
-    intros. exploit (H (xI i)). simpl; eauto. simpl. auto.
-    intros. exploit (H0 (xI i)). simpl; eauto. simpl; auto.
-    intro REC2.
-    destruct o; destruct o0.
-    apply list_forall2_app; auto. constructor; auto. 
-    simpl; split; auto. exploit (H xH). simpl; eauto. simpl. intros [y [P Q]]. congruence.
-    exploit (H xH). simpl; eauto. simpl. intros [y [P Q]]; congruence.
-    exploit (H0 xH). simpl; eauto. simpl. intros [x [P Q]]; congruence.
-    apply list_forall2_app; auto.
-
-    unfold elements; auto.
+      l1 l2 ->
+    list_forall2
+      (fun i_x i_y => fst i_x = fst i_y /\ R (snd i_x) (snd i_y))
+      (xelements m j l1) (xelements n j l2)).
+  {
+    induction m; simpl; intros.
+    rewrite xelements_empty. auto. 
+    intros. destruct (get i n) eqn:E; auto. exploit H0; eauto. 
+    intros [x [P Q]]. rewrite gleaf in P; congruence.
+    destruct o. 
+    destruct n. exploit (H xH a); auto. simpl. intros [y [P Q]]; congruence.
+    exploit (H xH a); auto. intros [y [P Q]]. simpl in P. subst o. 
+    simpl. apply IHm1. 
+    intros i x. exact (H (xO i) x). 
+    intros i x. exact (H0 (xO i) x).
+    constructor. simpl; auto. 
+    apply IHm2. 
+    intros i x. exact (H (xI i) x). 
+    intros i x. exact (H0 (xI i) x).
+    auto.
+    destruct n. simpl. 
+    rewrite ! xelements_empty. auto.
+    intros. destruct (get i m2) eqn:E; auto. exploit (H (xI i)); eauto.
+    rewrite gleaf. intros [y [P Q]]; congruence.
+    intros. destruct (get i m1) eqn:E; auto. exploit (H (xO i)); eauto.
+    rewrite gleaf. intros [y [P Q]]; congruence.
+    destruct o. 
+    exploit (H0 xH); simpl; eauto. intros [y [P Q]]; congruence.
+    simpl. apply IHm1. 
+    intros i x. exact (H (xO i) x). 
+    intros i x. exact (H0 (xO i) x).
+    apply IHm2. 
+    intros i x. exact (H (xI i) x). 
+    intros i x. exact (H0 (xI i) x).
+    auto.
+  }
+    intros. apply H. auto. auto. constructor.
   Qed.
 
   Theorem elements_extensional:
@@ -1045,19 +913,15 @@ Module PTree <: TREE.
     xfold f xH m v.
 
   Lemma xfold_xelements:
-    forall (A B: Type) (f: B -> positive -> A -> B) m i v,
-    xfold f i m v =
-    List.fold_left (fun a p => f a (fst p) (snd p))
-                   (xelements m i)
-                   v.
+    forall (A B: Type) (f: B -> positive -> A -> B) m i v l,
+    List.fold_left (fun a p => f a (fst p) (snd p)) l (xfold f i m v) =
+    List.fold_left (fun a p => f a (fst p) (snd p)) (xelements m i l) v.
   Proof.
     induction m; intros.
     simpl. auto.
-    simpl. destruct o.
-    rewrite fold_left_app. simpl. 
-    rewrite IHm1. apply IHm2. 
-    rewrite fold_left_app. simpl.
-    rewrite IHm1. apply IHm2.
+    destruct o; simpl.
+    rewrite <- IHm1. simpl. rewrite <- IHm2. auto.
+    rewrite <- IHm1. rewrite <- IHm2. auto. 
   Qed.
 
   Theorem fold_spec:
@@ -1065,7 +929,7 @@ Module PTree <: TREE.
     fold f m v =
     List.fold_left (fun a p => f a (fst p) (snd p)) (elements m) v.
   Proof.
-    intros. unfold fold, elements. apply xfold_xelements. 
+    intros. unfold fold, elements. rewrite <- xfold_xelements. auto. 
   Qed.
 
 End PTree.
diff --git a/lib/Ordered.v b/lib/Ordered.v
index f52a7ef..026671a 100644
--- a/lib/Ordered.v
+++ b/lib/Ordered.v
@@ -48,6 +48,36 @@ Definition eq_dec : forall x y, { eq x y } + { ~ eq x y } := peq.
 
 End OrderedPositive.
 
+(** The ordered type of integers *)
+
+Module OrderedZ <: OrderedType.
+
+Definition t := Z.
+Definition eq (x y: t) := x = y.
+Definition lt := Zlt.
+
+Lemma eq_refl : forall x : t, eq x x.
+Proof (@refl_equal t). 
+Lemma eq_sym : forall x y : t, eq x y -> eq y x.
+Proof (@sym_equal t).
+Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z.
+Proof (@trans_equal t).
+Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z.
+Proof Zlt_trans.
+Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y.
+Proof. unfold lt, eq, t; intros. omega. Qed. 
+Lemma compare : forall x y : t, Compare lt eq x y.
+Proof.
+  intros. destruct (Z.compare x y) as [] eqn:E.
+  apply EQ. red. apply Z.compare_eq_iff. assumption.
+  apply LT. assumption.
+  apply GT. apply Z.compare_gt_iff. assumption.
+Defined.
+
+Definition eq_dec : forall x y, { eq x y } + { ~ eq x y } := zeq.
+
+End OrderedZ.
+
 (** The ordered type of machine integers *)
 
 Module OrderedInt <: OrderedType.
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index 27e801a..115d846 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -23,7 +23,7 @@ Require Import Events.
 Require Import Globalenvs.
 Require Import Smallstep.
 Require Import Locations.
-Require Stacklayout.
+Require Import Stacklayout.
 Require Import Conventions.
 
 (** * Abstract syntax *)
@@ -222,7 +222,7 @@ Inductive instruction : Type :=
   | Pxori: ireg -> ireg -> constant -> instruction            (**r bitwise xor with immediate *)
   | Pxoris: ireg -> ireg -> constant -> instruction           (**r bitwise xor with immediate high *)
   | Plabel: label -> instruction                              (**r define a code label *)
-  | Pbuiltin: external_function -> list preg -> preg -> instruction (**r built-in function *)
+  | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *)
   | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *)
 
 with annot_param : Type :=
@@ -324,6 +324,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
   | r :: l' => undef_regs l' (rs#r <- Vundef)
   end.
 
+(** Assigning multiple registers *)
+
+Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
+  match rl, vl with
+  | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1)
+  | _, _ => rs
+  end.
+
 Section RELSEM.
 
 (** Looking up instructions in a code sequence by position. *)
@@ -777,45 +785,45 @@ Definition preg_of (r: mreg) : preg :=
   match r with
   | R3 => GPR3  | R4 => GPR4  | R5 => GPR5  | R6 => GPR6
   | R7 => GPR7  | R8 => GPR8  | R9 => GPR9  | R10 => GPR10
+  | R11 => GPR11 | R12 => GPR12
   | R14 => GPR14 | R15 => GPR15 | R16 => GPR16
   | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20
   | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24
   | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28
   | R29 => GPR29 | R30 => GPR30 | R31 => GPR31
-  | IT1 => GPR11 | IT2 => GPR12
+  | F0 => FPR0
   | F1 => FPR1  | F2 => FPR2  | F3 => FPR3  | F4 => FPR4
   | F5 => FPR5  | F6 => FPR6  | F7 => FPR7  | F8 => FPR8
-  | F9 => FPR9  | F10 => FPR10 | F11 => FPR11
-  | F14 => FPR14 | F15 => FPR15
+  | F9 => FPR9  | F10 => FPR10 | F11 => FPR11 | F12 => FPR12
+  | F13 => FPR13 | F14 => FPR14 | F15 => FPR15
   | F16 => FPR16 | F17 => FPR17 | F18 => FPR18 | F19 => FPR19
   | F20 => FPR20 | F21 => FPR21 | F22 => FPR22 | F23 => FPR23
   | F24 => FPR24 | F25 => FPR25 | F26 => FPR26 | F27 => FPR27
   | F28 => FPR28 | F29 => FPR29 | F30 => FPR30 | F31 => FPR31
-  | FT1 => FPR0 | FT2 => FPR12 | FT3 => FPR13
   end.
 
 (** Extract the values of the arguments of an external call.
     We exploit the calling conventions from module [Conventions], except that
     we use PPC registers instead of locations. *)
 
+Definition chunk_of_type (ty: typ) :=
+  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end.
+
 Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
   | extcall_arg_reg: forall r,
       extcall_arg rs m (R r) (rs (preg_of r))
-  | extcall_arg_int_stack: forall ofs bofs v,
-      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mint32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tint)) v
-  | extcall_arg_float_stack: forall ofs bofs v,
+  | extcall_arg_stack: forall ofs ty bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64al32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
+      Mem.loadv (chunk_of_type ty) m
+                (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
+      extcall_arg rs m (S Outgoing ofs ty) v.
 
 Definition extcall_arguments
     (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
   list_forall2 (extcall_arg rs m) (loc_arguments sg) args.
 
-Definition loc_external_result (sg: signature) : preg :=
-  preg_of (loc_result sg).
+Definition loc_external_result (sg: signature) : list preg :=
+  map preg_of (loc_result sg).
 
 (** Extract the values of the arguments of an annotation. *)
 
@@ -845,33 +853,31 @@ Inductive step: state -> trace -> state -> Prop :=
       exec_instr c i rs m = Next rs' m' ->
       step (State rs m) E0 (State rs' m')
   | exec_step_builtin:
-      forall b ofs c ef args res rs m t v m',
+      forall b ofs c ef args res rs m t vl rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) ->
-      external_call ef ge (map rs args) m t v m' ->
-      step (State rs m) t
-           (State (nextinstr(rs #GPR11 <- Vundef #GPR12 <- Vundef
-                                #FPR12 <- Vundef #FPR13 <- Vundef
-                                #FPR0 <- Vundef #CTR <- Vundef
-                                #res <- v)) m')
+      external_call' ef ge (map rs args) m t vl m' ->
+      rs' = nextinstr
+              (set_regs res vl
+                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+      step (State rs m) t (State rs' m')
   | exec_step_annot:
       forall b ofs c ef args rs m vargs t v m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some (Pannot ef args) ->
       annot_arguments rs m args vargs ->
-      external_call ef ge vargs m t v m' ->
+      external_call' ef ge vargs m t v m' ->
       step (State rs m) t
            (State (nextinstr rs) m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
       rs PC = Vptr b Int.zero ->
       Genv.find_funct_ptr ge b = Some (External ef) ->
-      external_call ef ge args m t res m' ->
+      external_call' ef ge args m t res m' ->
       extcall_arguments rs m (ef_sig ef) args ->
-      rs' = (rs#(loc_external_result (ef_sig ef)) <- res
-               #PC <- (rs LR)) ->
+      rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) ->
       step (State rs m) t (State rs' m').
 
 End RELSEM.
@@ -939,21 +945,21 @@ Ltac Equalities :=
   discriminate.
   discriminate.
   inv H11. 
-  exploit external_call_determ. eexact H4. eexact H11. intros [A B].
+  exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   inv H12.
   assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0.
-  exploit external_call_determ. eexact H5. eexact H13. intros [A B].
+  exploit external_call_determ'. eexact H5. eexact H13. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
-  exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+  exploit external_call_determ'. eexact H3. eexact H8. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
 (* trace length *)
   red; intros. inv H; simpl.
   omega.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
+  inv H3; eapply external_call_trace_length; eauto.
+  inv H4; eapply external_call_trace_length; eauto.
+  inv H2; eapply external_call_trace_length; eauto.
 (* initial states *)
   inv H; inv H0. f_equal. congruence.
 (* final no step *)
@@ -973,13 +979,4 @@ Definition data_preg (r: preg) : bool :=
   | _ => true
   end.
 
-Definition nontemp_preg (r: preg) : bool :=
-  match r with
-  | IR GPR0 => false | IR GPR11 => false | IR GPR12 => false
-  | FR FPR0 => false | FR FPR12 => false | FR FPR13 => false
-  | PC => false      | LR => false       | CTR => false
-  | CR0_0 => false   | CR0_1 => false    | CR0_2 => false    | CR0_3 => false
-  | CARRY => false
-  | _ => true
-  end.
 
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 39b84e0..6a1d07e 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -136,6 +136,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
             Plfd r (Cint ofs) base :: k
           else
             loadimm GPR0 ofs (Plfdx r base GPR0 :: k))
+  | Tlong =>
+      Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
@@ -152,6 +154,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
             Pstfd r (Cint ofs) base :: k
           else
             loadimm GPR0 ofs (Pstfdx r base GPR0 :: k))
+  | Tlong =>
+      Error (msg "Asmgen.storeind")
   end.
 
 (** Constructor for a floating-point comparison.  The PowerPC has
@@ -336,8 +340,8 @@ Definition transl_op
       OK (if symbol_is_small_data s ofs then
            Paddi r GPR0 (Csymbol_sda s ofs) :: k
          else
-           Paddis GPR12 GPR0 (Csymbol_high s ofs) ::
-           Paddi r GPR12 (Csymbol_low s ofs) :: k)
+           Paddis r GPR0 (Csymbol_high s ofs) ::
+           Paddi r r (Csymbol_low s ofs) :: k)
   | Oaddrstack n, nil =>
       do r <- ireg_of res; OK (addimm r GPR1 n k)
   | Ocast8signed, a1 :: nil =>
@@ -428,7 +432,7 @@ Definition transl_op
       do r1 <- ireg_of a1; do r <- ireg_of res;
       OK (rolm r r1 amount mask k)
   | Oroli amount mask, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r2 <- ireg_of a2; do r <- ireg_of res;
       OK (Prlwimi r r2 amount mask :: k)
   | Onegf, a1 :: nil =>
@@ -467,7 +471,7 @@ Definition transl_op
 (** Translation of memory accesses: loads, and stores. *)
 
 Definition int_temp_for (r: mreg) :=
-  if mreg_eq r IT2 then GPR11 else GPR12.
+  if mreg_eq r R12 then GPR11 else GPR12.
 
 Definition transl_memory_access
      (mk1: constant -> ireg -> instruction)
@@ -529,6 +533,8 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
   | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of dst;
       transl_memory_access (Plfd r) (Plfdx r) addr args GPR12 k
+  | Mint64 =>
+      Error (msg "Asmgen.transl_load")
   end.
 
 Definition transl_store (chunk: memory_chunk) (addr: addressing)
@@ -550,6 +556,8 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
   | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of src;
       transl_memory_access (Pstfd r) (Pstfdx r) addr args temp k
+  | Mint64 =>
+      Error (msg "Asmgen.transl_store")
   end.
 
 (** Translation of arguments to annotations *)
@@ -574,7 +582,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
         loadind GPR11 ofs ty dst k
       else
         (do k1 <- loadind GPR11 ofs ty dst k;
-         loadind GPR1 f.(fn_link_ofs) Tint IT1 k1)
+         loadind GPR1 f.(fn_link_ofs) Tint R11 k1)
   | Mop op args res =>
       transl_op op args res k
   | Mload chunk addr args dst =>
@@ -598,7 +606,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
           Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: 
           Pbs symb :: k)
   | Mbuiltin ef args res =>
-      OK (Pbuiltin ef (map preg_of args) (preg_of res) :: k)
+      OK (Pbuiltin ef (map preg_of args) (map preg_of res) :: k)
   | Mannot ef args =>
       OK (Pannot ef (map transl_annot_param args) :: k)
   | Mlabel lbl =>
@@ -624,8 +632,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
 Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
   match i with
   | Msetstack src ofs ty => before
-  | Mgetparam ofs ty dst => negb (mreg_eq dst IT1)
-  | Mop Omove args res => before && negb (mreg_eq res IT1)
+  | Mgetparam ofs ty dst => negb (mreg_eq dst R11)
+  | Mop Omove args res => before && negb (mreg_eq res R11)
   | _ => false
   end.
 
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 07e66cf..37c8808 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -487,9 +487,9 @@ Definition measure (s: Mach.state) : nat :=
   | Mach.Returnstate _ _ _ => 1%nat
   end.
 
-Remark preg_of_not_GPR11: forall r, negb (mreg_eq r IT1) = true -> IR GPR11 <> preg_of r.
+Remark preg_of_not_GPR11: forall r, negb (mreg_eq r R11) = true -> IR GPR11 <> preg_of r.
 Proof.
-  intros. change (IR GPR11) with (preg_of IT1). red; intros. 
+  intros. change (IR GPR11) with (preg_of R11). red; intros. 
   exploit preg_of_injective; eauto. intros; subst r; discriminate.
 Qed.
 
@@ -526,7 +526,8 @@ Proof.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
   exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. change (undef_setstack rs) with rs. apply agree_exten with rs0; auto with asmgen.
+  split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. 
+  apply agree_exten with rs0; auto with asmgen.
   simpl; intros. rewrite Q; auto with asmgen.
 
 - (* Mgetparam *)
@@ -568,9 +569,11 @@ Opaque loadind.
   intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. 
   left; eapply exec_straight_steps; eauto; intros. simpl in TR.
   exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
-  exists rs2; split. eauto. split. auto. 
-  simpl. destruct op; try congruence. destruct ep; simpl; try congruence. intros. 
+  exists rs2; split. eauto. split. auto.
+  destruct op; simpl; try discriminate. intros.
+  destruct (andb_prop _ _ H1); clear H1.
   rewrite R; auto. apply preg_of_not_GPR11; auto.
+  change (destroyed_by_op Omove) with (@nil mreg). simpl; auto.
 
 - (* Mload *)
   assert (eval_addressing tge sp addr rs##args = Some a). 
@@ -595,7 +598,7 @@ Opaque loadind.
   left; eapply exec_straight_steps; eauto.
   intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. 
   exists rs2; split. eauto.
-  split. eapply agree_exten_temps; eauto. intros; auto with asmgen. 
+  split. eapply agree_undef_regs; eauto with asmgen.
   simpl; congruence.
 
 - (* Mcall *)
@@ -741,19 +744,21 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   inv AT. monadInv H3. 
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H2); intro NOOV.
-  exploit external_call_mem_extends; eauto. eapply preg_vals; eauto.
+  exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_builtin. eauto. eauto.
   eapply find_instr_tail; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
+  eauto.
   econstructor; eauto.
-  Simpl. rewrite <- H0. simpl. econstructor; eauto.
+  Simpl. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
-  apply agree_nextinstr. eapply agree_set_undef_mreg; eauto. 
-  rewrite Pregmap.gss. auto. 
-  intros. Simpl. 
+  apply preg_notin_charact; auto with asmgen.
+  apply preg_notin_charact; auto with asmgen.
+  apply agree_nextinstr. eapply agree_set_mregs; auto.
+  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. 
   congruence.
 
 - (* Mannot *)
@@ -761,12 +766,12 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H3); intro NOOV.
   exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. 
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_annot. eauto. eauto.
   eapply find_instr_tail; eauto. eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   eapply match_states_intro with (ep := false); eauto with coqlib.
   unfold nextinstr. rewrite Pregmap.gss. 
@@ -798,11 +803,11 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   destruct (snd (crbit_for_cond cond)).
   (* Pbt, taken *)
   econstructor; econstructor; econstructor; split. eexact A. 
-  split. eapply agree_exten_temps; eauto with asmgen.
+  split. eapply agree_exten; eauto with asmgen.
   simpl. rewrite B. reflexivity. 
   (* Pbf, taken *)
   econstructor; econstructor; econstructor; split. eexact A. 
-  split. eapply agree_exten_temps; eauto with asmgen.
+  split. eapply agree_exten; eauto with asmgen.
   simpl. rewrite B. reflexivity. 
 
 - (* Mcond false *)
@@ -815,7 +820,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   destruct (snd (crbit_for_cond cond)).
   apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
   apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
-  split. eapply agree_exten_temps; eauto with asmgen.
+  split. eapply agree_exten; eauto with asmgen.
   intros; Simpl.
   simpl. congruence.
 
@@ -835,7 +840,9 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   eapply find_instr_tail; eauto. 
   simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
   econstructor; eauto. 
-  eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simpl. 
+  eapply agree_undef_regs; eauto.
+Local Transparent destroyed_by_jumptable. 
+  simpl. intros. rewrite C; auto with asmgen. Simpl. 
   congruence.
 
 - (* Mreturn *)
@@ -896,6 +903,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   intros [m1' [C D]].
   exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto. 
   intros [m2' [F G]].
+  simpl Mach.chunk_of_type in F. 
   exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. 
   intros [m3' [P Q]].
   (* Execution of function prologue *)
@@ -910,7 +918,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   rewrite <- H5 at 2. 
   apply exec_straight_three with rs2 m2' rs3 m2'.
   unfold exec_instr. rewrite C. fold sp.
-  rewrite <- (sp_val _ _ _ AG). unfold chunk_of_type in F. rewrite F. auto. 
+  rewrite <- (sp_val _ _ _ AG). rewrite F. auto. 
   simpl. auto.
   simpl. unfold store1. rewrite gpr_or_zero_not_zero. 
   change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR). simpl. 
@@ -928,8 +936,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   unfold rs4, rs3, rs2.
   apply agree_nextinstr. apply agree_set_other; auto. apply agree_set_other; auto. 
   apply agree_nextinstr. apply agree_set_other; auto.
-  eapply agree_change_sp; eauto. apply agree_exten_temps with rs0; eauto.
-  unfold sp; congruence.
+  eapply agree_change_sp; eauto. unfold sp; congruence.
   congruence.
 
 - (* external function *)
@@ -937,17 +944,15 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
   intros [tf [A B]]. simpl in B. inv B.
   exploit extcall_arguments_match; eauto. 
   intros [args' [C D]].
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [res' [m2' [P [Q [R S]]]]].
   left; econstructor; split.
   apply plus_one. eapply exec_step_external; eauto. 
-  eapply external_call_symbols_preserved; eauto. 
+  eapply external_call_symbols_preserved'; eauto. 
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
   unfold loc_external_result.
-  eapply agree_set_mreg; eauto. 
-  rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. 
-  intros; Simpl.
+  apply agree_set_other; auto. apply agree_set_mregs; auto.
 
 - (* return *)
   inv STACKS. simpl in *.
@@ -980,10 +985,9 @@ Lemma transf_final_states:
   forall st1 st2 r, 
   match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv STACKS. constructor.
-  auto. 
-  compute in H1. 
-  generalize (preg_val _ _ _ R3 AG). rewrite H1. intros LD; inv LD. auto.
+  intros. inv H0. inv H. constructor. auto. 
+  compute in H1. inv H1.  
+  generalize (preg_val _ _ _ R3 AG). rewrite H2. intros LD; inv LD. auto.
 Qed.
 
 Theorem transf_program_correct:
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index 0e6d3e1..cd961c9 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -546,7 +546,8 @@ Ltac ArgsInv :=
   | [ H: Error _ = OK _ |- _ ] => discriminate
   | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
   | [ H: bind _ _ = OK _ |- _ ] => monadInv H
-  | [ H: assertion _ = OK _ |- _ ] => monadInv H
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
   end);
   subst;
   repeat (match goal with
@@ -810,9 +811,7 @@ Lemma transl_op_correct_aux:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of res) = v
-  /\ forall r,
-     match op with Omove => data_preg r = true | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
 Proof.
 Opaque Int.eq.
   intros. unfold transl_op in H; destruct op; ArgsInv; simpl in H0; try (inv H0); try TranslOpSimpl.
@@ -839,7 +838,7 @@ Opaque Val.add.
   rewrite gpr_or_zero_not_zero; eauto with asmgen. Simpl.  
   rewrite (Val.add_commut Vzero). rewrite high_half_zero. 
   rewrite Val.add_commut. rewrite low_high_half. auto.
-  intros; Simpl.
+  intros; Simpl. 
   (* Oaddrstack *)
   destruct (addimm_correct x GPR1 i k rs m) as [rs' [EX [RES OTH]]]; eauto with asmgen.
   exists rs'; auto with asmgen.
@@ -879,12 +878,11 @@ Opaque Val.add.
   intros. rewrite D; auto with asmgen. unfold rs1; Simpl.
   (* Oandimm *)
   destruct (andimm_correct x0 x i k rs m) as [rs' [A [B C]]]; eauto with asmgen.
-  exists rs'; auto with asmgen.
   (* Oorimm *)
-  destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]]. 
+  destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]].
   exists rs'; auto with asmgen.
   (* Oxorimm *)
-  destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]]. 
+  destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]].
   exists rs'; auto with asmgen.
   (* Onor *)
   replace (Val.notint (rs x))
@@ -898,7 +896,6 @@ Opaque Val.add.
   intros; Simpl.
   (* Orolm *)
   destruct (rolm_correct x0 x i i0 k rs m) as [rs' [A [B C]]]; eauto with asmgen.
-  exists rs'; auto with asmgen.
   (* Ointoffloat *)
   replace v with (Val.maketotal (Val.intoffloat (rs x))).
   TranslOpSimpl.
@@ -916,19 +913,17 @@ Lemma transl_op_correct:
   Mem.extends m m' ->
   exists rs',
      exec_straight ge fn c rs m' k rs' m'
-  /\ agree (Regmap.set res v (undef_op op ms)) sp rs'
-  /\ forall r, op = Omove -> data_preg r = true -> r <> preg_of res -> rs'#r = rs#r.
+  /\ agree (Regmap.set res v (Mach.undef_regs (destroyed_by_op op) ms)) sp rs'
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
 Proof.
   intros. 
   exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eauto. 
   intros [v' [A B]].  rewrite (sp_val _ _ _ H0) in A. 
   exploit transl_op_correct_aux; eauto. intros [rs' [P [Q R]]].
   rewrite <- Q in B.
-  exists rs'; split. eexact P. 
-  split. unfold undef_op. destruct op;
-  (apply agree_set_undef_mreg with rs || apply agree_set_mreg with rs);
+  exists rs'; split. eexact P.
+  split. apply agree_set_undef_mreg with rs; auto.
   auto.
-  intros. subst op. auto.
 Qed.
 
 (** Translation of memory accesses *)
@@ -1100,17 +1095,18 @@ Lemma transl_store_correct:
   Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r.
+  /\ forall r, r <> PC -> preg_notin r (destroyed_by_store chunk addr) -> rs' r = rs r.
 Proof.
+Local Transparent destroyed_by_store.
   intros.
   assert (TEMP0: int_temp_for src = GPR11 \/ int_temp_for src = GPR12).
-    unfold int_temp_for. destruct (mreg_eq src IT2); auto.
+    unfold int_temp_for. destruct (mreg_eq src R12); auto.
   assert (TEMP1: int_temp_for src <> GPR0).
     destruct TEMP0; congruence.
   assert (TEMP2: IR (int_temp_for src) <> preg_of src).
-    unfold int_temp_for. destruct (mreg_eq src IT2). 
+    unfold int_temp_for. destruct (mreg_eq src R12). 
     subst src; simpl; congruence.
-    change (IR GPR12) with (preg_of IT2). red; intros; elim n. 
+    change (IR GPR12) with (preg_of R12). red; intros; elim n. 
     eapply preg_of_injective; eauto.
   assert (BASE: forall mk1 mk2 chunk',
   transl_memory_access mk1 mk2 addr args (int_temp_for src) k = OK c ->
@@ -1123,15 +1119,15 @@ Proof.
     store2 chunk' (preg_of src) r1 r2 rs1 m) ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r).
+  /\ forall r, r <> PC -> r <> GPR11 /\ r <> GPR12 -> rs' r = rs r).
   {
   intros. eapply transl_memory_access_correct; eauto.
   intros. econstructor; split. apply exec_straight_one. 
   rewrite H4. unfold store1. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto. 
-  intros; Simpl. apply H7; auto. destruct TEMP0; congruence. 
+  intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence. 
   intros. econstructor; split. apply exec_straight_one. 
   rewrite H5. unfold store2. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto. 
-  intros; Simpl. apply H7; auto. destruct TEMP0; congruence. 
+  intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence. 
   }
   destruct chunk; monadInv H.
 - (* Mint8signed *)
@@ -1153,10 +1149,11 @@ Proof.
   eapply transl_memory_access_correct. eauto. eauto. eauto. 
   intros. econstructor; split. apply exec_straight_one. 
   simpl. unfold store1. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. 
-  intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
+Local Transparent destroyed_by_store.
+  simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
   intros. econstructor; split. apply exec_straight_one. 
   simpl. unfold store2. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. 
-  intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
+  simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
 - (* Mfloat64 *)
   apply Mem.storev_float64al32 in H1. eapply BASE; eauto; erewrite freg_of_eq by eauto; auto.
 - (* Mfloat64al32 *)
diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp
index 9131a46..e7e4095 100644
--- a/powerpc/ConstpropOp.vp
+++ b/powerpc/ConstpropOp.vp
@@ -31,6 +31,7 @@ Inductive approx : Type :=
                              no compile-time information is available. *)
   | I: int -> approx     (** A known integer value. *)
   | F: float -> approx   (** A known floating-point value. *)
+  | L: int64 -> approx   (** A know 64-bit integer value. *)
   | G: ident -> int -> approx
                          (** The value is the address of the given global
                              symbol plus the given integer offset. *)
@@ -125,6 +126,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1)
   | Ointoffloat, F n1 :: nil => eval_static_intoffloat n1
   | Ofloatofwords, I n1 :: I n2 :: nil => if propagate_float_constants tt then F(Float.from_words n1 n2) else Unknown
+  | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2)
+  | Olowlong, L n :: nil => I(Int64.loword n)
+  | Ohighlong, L n :: nil => I(Int64.hiword n)
   | Ocmp c, vl => eval_static_condition_val c vl
   | _, _ => Unknown
   end.
diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v
index 84e1e5b..9d833bf 100644
--- a/powerpc/ConstpropOpproof.v
+++ b/powerpc/ConstpropOpproof.v
@@ -45,6 +45,7 @@ Definition val_match_approx (a: approx) (v: val) : Prop :=
   | Unknown => True
   | I p => v = Vint p
   | F p => v = Vfloat p
+  | L p => v = Vlong p
   | G symb ofs => v = symbol_address ge symb ofs
   | S ofs => v = Val.add sp (Vint ofs)
   | _ => False
@@ -65,6 +66,8 @@ Ltac SimplVMA :=
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (F _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
+  | H: (val_match_approx (L _) ?v) |- _ =>
+      simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (G _ _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (S _) ?v) |- _ =>
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index 632a55d..ce66e6a 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -13,6 +13,7 @@
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
+Require Import Op.
 
 (** ** Machine registers *)
 
@@ -21,59 +22,55 @@ Require Import AST.
 - Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
 - Floating-point registers that can be allocated to RTL pseudo-registers 
   ([Fxx]).
-- Two integer registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([IT1, IT2]).
-- Two float registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([FT1, FT2]).
 
   The type [mreg] does not include special-purpose or reserved
-  machine registers such as the stack pointer and the condition codes. *)
+  machine registers such as the stack pointer (GPR1), the small data area
+  pointers (GPR2, GPR13), and the condition codes. *)
 
 Inductive mreg: Type :=
   (** Allocatable integer regs *)
   | R3: mreg  | R4: mreg  | R5: mreg  | R6: mreg
   | R7: mreg  | R8: mreg  | R9: mreg  | R10: mreg
+  | R11: mreg | R12: mreg
   | R14: mreg | R15: mreg | R16: mreg
   | R17: mreg | R18: mreg | R19: mreg | R20: mreg
   | R21: mreg | R22: mreg | R23: mreg | R24: mreg
   | R25: mreg | R26: mreg | R27: mreg | R28: mreg
   | R29: mreg | R30: mreg | R31: mreg
   (** Allocatable float regs *)
+  | F0: mreg
   | F1: mreg  | F2: mreg  | F3: mreg  | F4: mreg
   | F5: mreg  | F6: mreg  | F7: mreg  | F8: mreg
-  | F9: mreg  | F10: mreg | F11: mreg
-  | F14: mreg | F15: mreg
+  | F9: mreg  | F10: mreg | F11: mreg | F12: mreg
+  | F13: mreg | F14: mreg | F15: mreg
   | F16: mreg | F17: mreg | F18: mreg | F19: mreg
   | F20: mreg | F21: mreg | F22: mreg | F23: mreg
   | F24: mreg | F25: mreg | F26: mreg | F27: mreg
-  | F28: mreg | F29: mreg | F30: mreg | F31: mreg
-  (** Integer temporaries *)
-  | IT1: mreg (* R11 *) | IT2: mreg (* R12 *)
-  (** Float temporaries *)
-  | FT1: mreg (* F0 *) | FT2: mreg (* F12 *) | FT3: mreg (* F13 *).
+  | F28: mreg | F29: mreg | F30: mreg | F31: mreg.
 
 Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
 Proof. decide equality. Defined.
+Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
   | R3 => Tint  | R4 => Tint  | R5 => Tint  | R6 => Tint
   | R7 => Tint  | R8 => Tint  | R9 => Tint  | R10 => Tint
+  | R11 => Tint | R12 => Tint
   | R14 => Tint | R15 => Tint | R16 => Tint
   | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint
   | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint
   | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint
   | R29 => Tint | R30 => Tint | R31 => Tint
+  | F0 => Tfloat
   | F1 => Tfloat  | F2 => Tfloat  | F3 => Tfloat  | F4 => Tfloat
   | F5 => Tfloat  | F6 => Tfloat  | F7 => Tfloat  | F8 => Tfloat
-  | F9 => Tfloat  | F10 => Tfloat | F11 => Tfloat
-  | F14 => Tfloat | F15 => Tfloat
+  | F9 => Tfloat  | F10 => Tfloat | F11 => Tfloat | F12 => Tfloat
+  | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
   | F16 => Tfloat | F17 => Tfloat | F18 => Tfloat | F19 => Tfloat
   | F20 => Tfloat | F21 => Tfloat | F22 => Tfloat | F23 => Tfloat
   | F24 => Tfloat | F25 => Tfloat | F26 => Tfloat | F27 => Tfloat
   | F28 => Tfloat | F29 => Tfloat | F30 => Tfloat | F31 => Tfloat
-  | IT1 => Tint | IT2 => Tint
-  | FT1 => Tfloat | FT2 => Tfloat | FT3 => Tfloat
   end.
 
 Open Scope positive_scope.
@@ -85,21 +82,21 @@ Module IndexedMreg <: INDEXED_TYPE.
     match r with
     | R3 => 1  | R4 => 2  | R5 => 3  | R6 => 4
     | R7 => 5  | R8 => 6  | R9 => 7  | R10 => 8
-    | R14 => 9 | R15 => 10 | R16 => 11
-    | R17 => 12 | R18 => 13 | R19 => 14 | R20 => 15
-    | R21 => 16 | R22 => 17 | R23 => 18 | R24 => 19
-    | R25 => 20 | R26 => 21 | R27 => 22 | R28 => 23
-    | R29 => 24 | R30 => 25 | R31 => 26
-    | F1 => 28  | F2 => 29  | F3 => 30  | F4 => 31
-    | F5 => 32  | F6 => 33  | F7 => 34  | F8 => 35
-    | F9 => 36  | F10 => 37 | F11 => 38
-    | F14 => 39 | F15 => 40
-    | F16 => 41 | F17 => 42 | F18 => 43 | F19 => 44
-    | F20 => 45 | F21 => 46 | F22 => 47 | F23 => 48
-    | F24 => 49 | F25 => 50 | F26 => 51 | F27 => 52
-    | F28 => 53 | F29 => 54 | F30 => 55 | F31 => 56
-    | IT1 => 57 | IT2 => 58
-    | FT1 => 59 | FT2 => 60 | FT3 => 61
+    | R11 => 9 | R12 => 10
+    | R14 => 11 | R15 => 12 | R16 => 13
+    | R17 => 14 | R18 => 15 | R19 => 16 | R20 => 17
+    | R21 => 18 | R22 => 19 | R23 => 20 | R24 => 21
+    | R25 => 22 | R26 => 23 | R27 => 24 | R28 => 25
+    | R29 => 26 | R30 => 27 | R31 => 28
+    | F0 => 29
+    | F1 => 30  | F2 => 31  | F3 => 32  | F4 => 33
+    | F5 => 34  | F6 => 35  | F7 => 36  | F8 => 37
+    | F9 => 38  | F10 => 39 | F11 => 40 | F12 => 41
+    | F13 => 42 | F14 => 43 | F15 => 44
+    | F16 => 45 | F17 => 46 | F18 => 47 | F19 => 48
+    | F20 => 49 | F21 => 50 | F22 => 51 | F23 => 52
+    | F24 => 53 | F25 => 54 | F26 => 55 | F27 => 56
+    | F28 => 57 | F29 => 58 | F30 => 59 | F31 => 60
     end.
   Lemma index_inj:
     forall r1 r2, index r1 = index r2 -> r1 = r2.
@@ -108,3 +105,68 @@ Module IndexedMreg <: INDEXED_TYPE.
   Qed.
 End IndexedMreg.
 
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg :=
+  match op with
+  | Ofloatconst _ => R12 :: nil
+  | Ointoffloat => F13 :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
+  R12 :: nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
+  match chunk with
+  | Mfloat32 => R11 :: R12 :: F13 :: nil
+  | _ => R11 :: R12 :: nil
+  end.
+
+Definition destroyed_by_cond (cond: condition): list mreg :=
+  nil.
+
+Definition destroyed_by_jumptable: list mreg :=
+  R12 :: nil.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+  match ef with
+  | EF_builtin _ _ => F13 :: nil
+  | EF_vload _ => nil
+  | EF_vstore Mfloat32 => F13 :: nil
+  | EF_vstore _ => nil
+  | EF_vload_global _ _ _ => R11 :: nil
+  | EF_vstore_global Mint64 _ _ => R10 :: R11 :: R12 :: nil
+  | EF_vstore_global Mfloat32 _ _ => R11 :: R12 :: F13 :: nil
+  | EF_vstore_global _ _ _ => R11 :: R12 :: nil
+  | EF_memcpy _ _ => R11 :: R12 :: F13 :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_at_function_entry: list mreg :=
+  nil.
+
+Definition temp_for_parent_frame: mreg :=
+  R11.
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
+  (nil, None).
+
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) :=
+  (nil, nil).
+
+Global Opaque
+    destroyed_by_op destroyed_by_load destroyed_by_store
+    destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+    destroyed_at_function_entry temp_for_parent_frame
+    mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations.  Return [true] if the first argument and
+  the result must be in the same location *and* are unconstrained
+  by [mregs_for_operation]. There is only one: rotate-mask-insert. *)
+
+Definition two_address_op (op: operation) : bool :=
+  match op with
+  | Oroli _ _ => true
+  | _ => false
+  end.
diff --git a/powerpc/Machregsaux.ml b/powerpc/Machregsaux.ml
index 713e14d..b8d7c97 100644
--- a/powerpc/Machregsaux.ml
+++ b/powerpc/Machregsaux.ml
@@ -17,21 +17,20 @@ open Machregs
 let register_names = [
   ("R3", R3);  ("R4", R4);  ("R5", R5);  ("R6", R6);
   ("R7", R7);  ("R8", R8);  ("R9", R9);  ("R10", R10);
+  ("R11", R11); ("R12", R12);
   ("R14", R14); ("R15", R15); ("R16", R16);
   ("R17", R17); ("R18", R18); ("R19", R19); ("R20", R20);
   ("R21", R21); ("R22", R22); ("R23", R23); ("R24", R24);
   ("R25", R25); ("R26", R26); ("R27", R27); ("R28", R28);
   ("R29", R29); ("R30", R30); ("R31", R31);
-  ("F1", F1);  ("F2", F2);  ("F3", F3);  ("F4", F4);
+  ("F0", F0); ("F1", F1);  ("F2", F2);  ("F3", F3);  ("F4", F4);
   ("F5", F5);  ("F6", F6);  ("F7", F7);  ("F8", F8);
-  ("F9", F9);  ("F10", F10); ("F11", F11); 
-  ("F14", F14); ("F15", F15);
+  ("F9", F9);  ("F10", F10); ("F11", F11); ("F12", F12);
+  ("F13", F13); ("F14", F14); ("F15", F15);
   ("F16", F16); ("F17", F17); ("F18", F18); ("F19", F19);
   ("F20", F20); ("F21", F21); ("F22", F22); ("F23", F23);
   ("F24", F24); ("F25", F25); ("F26", F26); ("F27", F27);
-  ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31);
-  ("R11", IT1); ("R12", IT2);
-  ("F0", FT1); ("F12", FT2); ("F13", FT3)
+  ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31)
 ]
 
 let name_of_register r =
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 110796b..e584726 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -97,6 +97,10 @@ Inductive operation : Type :=
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
   | Ofloatofwords: operation            (**r [rd = float_of_words(r1,r2)] *)
+(*c Manipulating 64-bit integers: *)
+  | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
+  | Olowlong: operation                 (**r [rd = low-word(r1)] *)
+  | Ohighlong: operation                (**r [rd = high-word(r1)] *)
 (*c Boolean tests: *)
   | Ocmp: condition -> operation.       (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
 
@@ -205,6 +209,9 @@ Definition eval_operation
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ofloatofwords, v1::v2::nil => Some(Val.floatofwords v1 v2)
+  | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
+  | Olowlong, v1::nil => Some(Val.loword v1)
+  | Ohighlong, v1::nil => Some(Val.hiword v1)
   | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
   | _, _ => None
   end.
@@ -225,7 +232,7 @@ Ltac FuncInv :=
   match goal with
   | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
       destruct x; simpl in H; try discriminate; FuncInv
-  | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
+  | H: (match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
       destruct v; simpl in H; try discriminate; FuncInv
   | H: (Some _ = Some _) |- _ =>
       injection H; intros; clear H; FuncInv
@@ -292,6 +299,9 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osingleoffloat => (Tfloat :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofwords => (Tint :: Tint :: nil, Tfloat)
+  | Omakelong => (Tint :: Tint :: nil, Tlong)
+  | Olowlong => (Tlong :: nil, Tint)
+  | Ohighlong => (Tlong :: nil, Tint)
   | Ocmp c => (type_of_condition c, Tint)
   end.
 
@@ -366,6 +376,9 @@ Proof with (try exact I).
   destruct v0...
   destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
   destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
   destruct (eval_condition c vl m); simpl... destruct b... 
 Qed.
 
@@ -481,6 +494,33 @@ Proof.
   rewrite Val.add_assoc. simpl. auto.
 Qed.
 
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+  it designates the pointer [delta] bytes past the pointer designated
+  by [addr].  May be undefined, in which case [None] is returned. *)
+
+Definition offset_addressing (addr: addressing) (delta: int) : option addressing :=
+  match addr with
+  | Aindexed n => Some(Aindexed (Int.add n delta))
+  | Aindexed2 => None
+  | Aglobal s n => Some(Aglobal s (Int.add n delta))
+  | Abased s n => Some(Abased s (Int.add n delta))
+  | Ainstack n => Some(Ainstack (Int.add n delta))
+  end.
+
+Lemma eval_offset_addressing:
+  forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+  offset_addressing addr delta = Some addr' ->
+  eval_addressing ge sp addr args = Some v ->
+  eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)).
+Proof.
+  intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst.
+  rewrite Val.add_assoc; auto.
+  unfold symbol_address. destruct (Genv.find_symbol ge i); auto. 
+  unfold symbol_address. destruct (Genv.find_symbol ge i); auto.
+  rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. 
+  rewrite Val.add_assoc. auto. 
+Qed.
+
 (** Transformation of addressing modes with two operands or more
   into an equivalent arithmetic operation.  This is used in the [Reload]
   pass when a store instruction cannot be reloaded directly because
@@ -510,14 +550,6 @@ Proof.
   intros. destruct addr; simpl in H; reflexivity || omegaContradiction.
 Qed.
 
-(** Two-address operations.  There is only one: rotate-mask-insert. *)
-
-Definition two_address_op (op: operation) : bool :=
-  match op with
-  | Oroli _ _ => true
-  | _ => false
-  end.
-
 (** Operations that are so cheap to recompute that CSE should not factor them out. *)
 
 Definition is_trivial_op (op: operation) : bool :=
@@ -767,7 +799,10 @@ Proof.
   inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; inv H2; simpl; auto.
-  subst v1. destruct (eval_condition c vl1 m1) eqn:?.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  subst. destruct (eval_condition c vl1 m1) eqn:?.
   exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
   destruct b; simpl; constructor.
   simpl; constructor.
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index 2d69be1..319e12c 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -223,8 +223,8 @@ let rolm_mask n =
    - annotation statements: take their arguments in registers or stack
      locations; generate no code;
    - inlined by the compiler: take their arguments in arbitrary
-     registers; preserve all registers except the temporaries
-     (GPR0, GPR11, GPR12,  FPR0, FPR12, FPR13);
+     registers; preserve all registers except the reserved temporaries
+     (GPR0, GPR11, GPR12, FPR13);
    - inlined while printing asm code; take their arguments in
      locations dictated by the calling conventions; preserve
      callee-save regs only. *)
@@ -239,11 +239,12 @@ let print_annot_val oc txt args res =
   fprintf oc "%s annotation: " comment;
   PrintAnnot.print_annot_val preg oc txt args;
   match args, res with
-  | IR src :: _, IR dst ->
+  | [IR src], [IR dst] ->
       if dst <> src then fprintf oc "	mr	%a, %a\n" ireg dst ireg src 
-  | FR src :: _, FR dst ->
+  | [FR src], [FR dst] ->
       if dst <> src then fprintf oc "	fmr	%a, %a\n" freg dst freg src 
-  | _, _ -> assert false
+  | _, _ ->
+      assert false
 
 (* Handling of memcpy *)
 
@@ -256,8 +257,8 @@ let print_annot_val oc txt args res =
 let print_builtin_memcpy_small oc sz al src dst =
   let rec copy ofs sz =
     if sz >= 8 && al >= 4 then begin
-      fprintf oc "	lfd	%a, %d(%a)\n" freg FPR0 ofs ireg src;
-      fprintf oc "	stfd	%a, %d(%a)\n" freg FPR0 ofs ireg dst;
+      fprintf oc "	lfd	%a, %d(%a)\n" freg FPR13 ofs ireg src;
+      fprintf oc "	stfd	%a, %d(%a)\n" freg FPR13 ofs ireg dst;
       copy (ofs + 8) (sz - 8)
     end else if sz >= 4 then begin
       fprintf oc "	lwz	%a, %d(%a)\n" ireg GPR0 ofs ireg src;
@@ -326,14 +327,23 @@ let print_builtin_vload_common oc chunk base offset res =
       fprintf oc "	lfs	%a, %a(%a)\n" freg res constant offset ireg base
   | (Mfloat64 | Mfloat64al32), FR res ->
       fprintf oc "	lfd	%a, %a(%a)\n" freg res constant offset ireg base
+  (* Mint64 is special-cased below *)
   | _ ->
       assert false
 
 let print_builtin_vload oc chunk args res =
   fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
-  begin match args with
-  | [IR addr] ->
+  begin match args, res with
+  | [IR addr], [res] when chunk <> Mint64 ->
       print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res
+  | [IR addr], [IR res1; IR res2] when chunk = Mint64 ->
+      if addr <> res1 then begin
+        fprintf oc "	lwz	%a, 0(%a)\n" ireg res1 ireg addr;
+        fprintf oc "	lwz	%a, 4(%a)\n" ireg res2 ireg addr
+      end else begin
+        fprintf oc "	lwz	%a, 4(%a)\n" ireg res2 ireg addr;
+        fprintf oc "	lwz	%a, 0(%a)\n" ireg res1 ireg addr
+      end
   | _ ->
       assert false
   end;
@@ -341,9 +351,24 @@ let print_builtin_vload oc chunk args res =
 
 let print_builtin_vload_global oc chunk id ofs args res =
   fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
-  fprintf oc "	addis	%a, %a, %a\n"
-             ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-  print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res;
+  begin match res with
+  | [res] when chunk <> Mint64 ->
+      fprintf oc "	addis	%a, %a, %a\n"
+                 ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+      print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res
+  | [IR res1; IR res2] when chunk = Mint64 ->
+      fprintf oc "	addis	%a, %a, %a\n"
+                 ireg res1 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+      fprintf oc "	lwz	%a, %a(%a)\n"
+                 ireg res1 constant (Csymbol_low(id, ofs)) ireg res1;
+      let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in
+      fprintf oc "	addis	%a, %a, %a\n"
+                 ireg res2 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+      fprintf oc "	lwz	%a, %a(%a)\n"
+                 ireg res2 constant (Csymbol_low(id, ofs)) ireg res2
+  | _ ->
+      assert false
+  end;
   fprintf oc "%s end builtin __builtin_volatile_read\n" comment
 
 let print_builtin_vstore_common oc chunk base offset src =
@@ -359,14 +384,18 @@ let print_builtin_vstore_common oc chunk base offset src =
       fprintf oc "	stfs	%a, %a(%a)\n" freg FPR13 constant offset ireg base
   | (Mfloat64 | Mfloat64al32), FR src ->
       fprintf oc "	stfd	%a, %a(%a)\n" freg src constant offset ireg base
+  (* Mint64 is special-cased below *)
   | _ ->
       assert false
 
 let print_builtin_vstore oc chunk args =
   fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
   begin match args with
-  | [IR addr; src] ->
+  | [IR addr; src] when chunk <> Mint64 ->
       print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src
+  | [IR addr; IR src1; IR src2] when chunk = Mint64 ->
+      fprintf oc "	stw	%a, 0(%a)\n" ireg src1 ireg addr;
+      fprintf oc "	stw	%a, 4(%a)\n" ireg src2 ireg addr
   | _ ->
       assert false
   end;
@@ -375,11 +404,24 @@ let print_builtin_vstore oc chunk args =
 let print_builtin_vstore_global oc chunk id ofs args =
   fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
   begin match args with
-  | [src] ->
+  | [src] when chunk <> Mint64 ->
       let tmp = if src = IR GPR11 then GPR12 else GPR11 in
       fprintf oc "	addis	%a, %a, %a\n"
                  ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
       print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src
+  | [IR src1; IR src2] when chunk = Mint64 ->
+      let tmp =
+        if not (List.mem GPR12 [src1; src2]) then GPR12 else
+        if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in
+      fprintf oc "	addis	%a, %a, %a\n"
+                 ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+      fprintf oc "	stw	%a, %a(%a)\n"
+                 ireg src1 constant (Csymbol_low(id, ofs)) ireg tmp;
+      let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in
+      fprintf oc "	addis	%a, %a, %a\n"
+                 ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+      fprintf oc "	stw	%a, %a(%a)\n"
+                 ireg src2 constant (Csymbol_low(id, ofs)) ireg tmp
   | _ ->
       assert false
   end;
@@ -389,47 +431,84 @@ let print_builtin_vstore_global oc chunk id ofs args =
 
 let print_builtin_inline oc name args res =
   fprintf oc "%s begin builtin %s\n" comment name;
-  (* Can use as temporaries: GPR0, GPR11, GPR12,  FPR0, FPR12, FPR13 *)
+  (* Can use as temporaries: GPR0, FPR13 *)
   begin match name, args, res with
   (* Integer arithmetic *)
-  | "__builtin_mulhw", [IR a1; IR a2], IR res ->
+  | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
       fprintf oc "	mulhw	%a, %a, %a\n" ireg res ireg a1 ireg a2
-  | "__builtin_mulhwu", [IR a1; IR a2], IR res ->
+  | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
       fprintf oc "	mulhwu	%a, %a, %a\n" ireg res ireg a1 ireg a2
-  | "__builtin_cntlz", [IR a1], IR res ->
+  | "__builtin_cntlz", [IR a1], [IR res] ->
       fprintf oc "	cntlzw	%a, %a\n" ireg res ireg a1
-  | "__builtin_bswap", [IR a1], IR res ->
+  | "__builtin_bswap", [IR a1], [IR res] ->
       fprintf oc "	stwu	%a, -8(%a)\n" ireg a1 ireg GPR1;
       fprintf oc "	lwbrx	%a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1;
       fprintf oc "	addi	%a, %a, 8\n" ireg GPR1 ireg GPR1
   (* Float arithmetic *)
-  | "__builtin_fmadd", [FR a1; FR a2; FR a3], FR res ->
+  | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
       fprintf oc "	fmadd	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fmsub", [FR a1; FR a2; FR a3], FR res ->
+  | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
       fprintf oc "	fmsub	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fnmadd", [FR a1; FR a2; FR a3], FR res ->
+  | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
       fprintf oc "	fnmadd	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fnmsub", [FR a1; FR a2; FR a3], FR res ->
+  | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
       fprintf oc "	fnmsub	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fabs", [FR a1], FR res ->
+  | "__builtin_fabs", [FR a1], [FR res] ->
       fprintf oc "	fabs	%a, %a\n" freg res freg a1
-  | "__builtin_fsqrt", [FR a1], FR res ->
+  | "__builtin_fsqrt", [FR a1], [FR res] ->
       fprintf oc "	fsqrt	%a, %a\n" freg res freg a1
-  | "__builtin_frsqrte", [FR a1], FR res ->
+  | "__builtin_frsqrte", [FR a1], [FR res] ->
       fprintf oc "	frsqrte	%a, %a\n" freg res freg a1
-  | "__builtin_fres", [FR a1], FR res ->
+  | "__builtin_fres", [FR a1], [FR res] ->
       fprintf oc "	fres	%a, %a\n" freg res freg a1
-  | "__builtin_fsel", [FR a1; FR a2; FR a3], FR res ->
+  | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
       fprintf oc "	fsel	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fcti", [FR a1], IR res ->
+  | "__builtin_fcti", [FR a1], [IR res] ->
       fprintf oc "	fctiw	%a, %a\n" freg FPR13 freg a1;
       fprintf oc "	stfdu	%a, -8(%a)\n" freg FPR13 ireg GPR1;
       fprintf oc "	lwz	%a, 4(%a)\n" ireg res ireg GPR1;
       fprintf oc "	addi	%a, %a, 8\n" ireg GPR1 ireg GPR1
+  (* 64-bit integer arithmetic *)
+  | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+      if rl = ah then begin
+        fprintf oc "	subfic	%a, %a, 0\n" ireg GPR0 ireg al;
+        fprintf oc "	subfze	%a, %a\n" ireg rh ireg ah;
+        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
+      end else begin
+        fprintf oc "	subfic	%a, %a, 0\n" ireg rl ireg al;
+        fprintf oc "	subfze	%a, %a\n" ireg rh ireg ah
+      end
+  | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      if rl = ah || rl = bh then begin
+        fprintf oc "	addc	%a, %a, %a\n" ireg GPR0 ireg al ireg bl;
+        fprintf oc "	adde	%a, %a, %a\n" ireg rh ireg ah ireg bh;
+        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
+      end else begin
+        fprintf oc "	addc	%a, %a, %a\n" ireg rl ireg al ireg bl;
+        fprintf oc "	adde	%a, %a, %a\n" ireg rh ireg ah ireg bh
+      end
+  | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      if rl = ah || rl = bh then begin
+        fprintf oc "	subfc	%a, %a, %a\n" ireg GPR0 ireg bl ireg al;
+        fprintf oc "	subfe	%a, %a, %a\n" ireg rh ireg bh ireg ah;
+        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
+      end else begin
+        fprintf oc "	subfc	%a, %a, %a\n" ireg rl ireg bl ireg al;
+        fprintf oc "	subfe	%a, %a, %a\n" ireg rh ireg bh ireg ah
+      end
+  | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+      if rl = a || rl = b then begin
+        fprintf oc "	mullw	%a, %a, %a\n" ireg GPR0 ireg a ireg b;
+        fprintf oc "	mulhwu	%a, %a, %a\n" ireg rh ireg a ireg b;
+        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
+      end else begin
+        fprintf oc "	mullw	%a, %a, %a\n" ireg rl ireg a ireg b;
+        fprintf oc "	mulhwu	%a, %a, %a\n" ireg rh ireg a ireg b
+      end
   (* Memory accesses *)
-  | "__builtin_read16_reversed", [IR a1], IR res ->
+  | "__builtin_read16_reversed", [IR a1], [IR res] ->
       fprintf oc "	lhbrx	%a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1
-  | "__builtin_read32_reversed", [IR a1], IR res ->
+  | "__builtin_read32_reversed", [IR a1], [IR res] ->
       fprintf oc "	lwbrx	%a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1
   | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
       fprintf oc "	sthbrx	%a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1
@@ -484,9 +563,9 @@ let print_instruction oc tbl pc fallthrough = function
       if adj >= -0x8000l then
         fprintf oc "	stwu	%a, %ld(%a)\n" ireg GPR1 adj ireg GPR1
       else begin
-        fprintf oc "	addis	%a, 0, %ld\n" ireg GPR12 (Int32.shift_right_logical adj 16);
-        fprintf oc "	ori	%a, %a, %ld\n" ireg GPR12 ireg GPR12 (Int32.logand adj 0xFFFFl);
-        fprintf oc "	stwux	%a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR12
+        fprintf oc "	addis	%a, 0, %ld\n" ireg GPR0 (Int32.shift_right_logical adj 16);
+        fprintf oc "	ori	%a, %a, %ld\n" ireg GPR0 ireg GPR0 (Int32.logand adj 0xFFFFl);
+        fprintf oc "	stwux	%a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR0
       end
   | Pand_(r1, r2, r3) ->
       fprintf oc "	and.	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
@@ -860,7 +939,7 @@ let print_function oc name code =
 
 (* Generation of stub functions *)
 
-let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$"
+let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$"
 
 (* Stubs for EABI *)
 
@@ -908,6 +987,11 @@ let print_init oc = function
       fprintf oc "	.short	%ld\n" (camlint_of_coqint n)
   | Init_int32 n ->
       fprintf oc "	.long	%ld\n" (camlint_of_coqint n)
+  | Init_int64 n ->
+      let b = camlint64_of_coqint n in
+      fprintf oc "	.long	0x%Lx, 0x%Lx\n"
+                 (Int64.shift_right_logical b 32)
+                 (Int64.logand b 0xFFFFFFFFL)
   | Init_float32 n ->
       fprintf oc "	.long	0x%lx %s %.18g\n"
                  (camlint_of_coqint (Floats.Float.bits_of_single n))
diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml
index a2f9ff8..cac8cd2 100644
--- a/powerpc/PrintOp.ml
+++ b/powerpc/PrintOp.ml
@@ -96,6 +96,9 @@ let print_operation reg pp = function
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ofloatofwords, [r1;r2] -> fprintf pp "floatofwords(%a,%a)" reg r1 reg r2
+  | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+  | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+  | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
   | _ -> fprintf pp "<bad operator>"
 
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 905a448..a011847 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -200,6 +200,8 @@ Nondetfunction mul (e1: expr) (e2: expr) :=
 (** ** Bitwise and, or, xor *)
 
 Nondetfunction andimm (n1: int) (e2: expr) := 
+  if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else
+  if Int.eq n1 Int.mone then e2 else
   match e2 with
   | Eop (Ointconst n2) Enil =>
       Eop (Ointconst (Int.and n1 n2)) Enil
@@ -240,6 +242,8 @@ Definition same_expr_pure (e1 e2: expr) :=
   end.
 
 Nondetfunction orimm (n1: int) (e2: expr) :=
+  if Int.eq n1 Int.zero then e2 else
+  if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil else
   match e2 with
   | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil
   | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
@@ -268,6 +272,7 @@ Nondetfunction or (e1: expr) (e2: expr) :=
   end.
 
 Nondetfunction xorimm (n1: int) (e2: expr) :=
+  if Int.eq n1 Int.zero then e2 else
   match e2 with
   | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil
   | Eop (Oxorimm n2) (t2:::Enil) => Eop (Oxorimm (Int.xor n1 n2)) (t2:::Enil)
@@ -444,12 +449,18 @@ Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
 
 (** ** Recognition of addressing modes for load and store operations *)
 
+Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
+  match chunk with Mint64 => false | _ => true end.
+
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
   match e with
   | Eop (Oaddrsymbol s n) Enil => (Aglobal s n, Enil)
   | Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
   | Eop Oadd (Eop (Oaddrsymbol s n) Enil ::: e2 ::: Enil) => (Abased s n, e2:::Enil)
   | Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil)
-  | Eop Oadd (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil)
+  | Eop Oadd (e1:::e2:::Enil) =>
+     if can_use_Aindexed2 chunk
+     then (Aindexed2, e1:::e2:::Enil)
+     else (Aindexed Int.zero, Eop Oadd (e1:::e2:::Enil) ::: Enil)
   | _ => (Aindexed Int.zero, e:::Enil)
   end.
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index 7be8858..f751314 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -27,13 +27,6 @@ Require Import SelectOp.
 
 Open Local Scope cminorsel_scope.
 
-Section CMCONSTR.
-
-Variable ge: genv.
-Variable sp: val.
-Variable e: env.
-Variable m: mem.
-
 (** * Useful lemmas and tactics *)
 
 (** The following are trivial lemmas and custom tactics that help
@@ -81,6 +74,13 @@ Ltac TrivialExists :=
 
 (** * Correctness of the smart constructors *)
 
+Section CMCONSTR.
+
+Variable ge: genv.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
 (** We now show that the code generated by "smart constructor" functions
   such as [SelectOp.notint] behaves as expected.  Continuing the
   [notint] example, we show that if the expression [e]
@@ -163,7 +163,7 @@ Proof.
   rewrite Int.add_commut. auto.
   unfold symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. rewrite Int.add_commut; auto.
   rewrite Val.add_assoc. rewrite Int.add_commut. auto.
-  subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
+  subst. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
 Qed. 
 
 Theorem eval_add: binary_constructor_sound add Val.add.
@@ -327,7 +327,15 @@ Qed.
 Theorem eval_andimm:
   forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)).
 Proof.
-  intros; red; intros until x. unfold andimm. case (andimm_match a); intros.
+  intros; red; intros until x. unfold andimm. 
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  intros. subst. exists (Vint Int.zero); split. EvalOp. 
+  destruct x; simpl; auto. rewrite Int.and_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  intros. subst. exists x; split. auto.
+  destruct x; simpl; auto. rewrite Int.and_mone; auto.
+  clear H H0.
+  case (andimm_match a); intros.
   InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto.
   set (n' := Int.and n n2). 
   destruct (Int.eq (Int.shru (Int.shl n' amount) amount) n' &&
@@ -376,8 +384,12 @@ Qed.
 Theorem eval_orimm:
   forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)).
 Proof.
-  intros; red; intros until x.
-  unfold orimm. destruct (orimm_match a); intros; InvEval.
+  intros; red; intros until x. unfold orimm.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.or_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  intros. subst. exists (Vint Int.mone); split. EvalOp. destruct x; simpl; auto. rewrite Int.or_mone; auto.
+  clear H H0. destruct (orimm_match a); intros; InvEval.
   TrivialExists. simpl. rewrite Int.or_commut; auto.
   subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists. 
   TrivialExists.
@@ -433,8 +445,10 @@ Qed.
 Theorem eval_xorimm:
   forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)).
 Proof.
-  intros; red; intros until x.
-  unfold xorimm. destruct (xorimm_match a); intros; InvEval.
+  intros; red; intros until x. unfold xorimm.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.xor_zero; auto.
+  clear H. destruct (xorimm_match a); intros; InvEval.
   TrivialExists. simpl. rewrite Int.xor_commut; auto.
   subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. TrivialExists. 
   TrivialExists.
@@ -445,9 +459,9 @@ Proof.
   red; intros until y; unfold xor; case (xor_match a b); intros; InvEval.
   rewrite Val.xor_commut. apply eval_xorimm; auto.
   apply eval_xorimm; auto.
-  subst x. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc. 
+  subst. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc. 
   rewrite <- Val.not_xor. rewrite Val.xor_commut. TrivialExists.
-  subst y. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists.
+  subst. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists.
   TrivialExists.
 Qed.
 
@@ -843,7 +857,11 @@ Proof.
   exists (@nil val). split. eauto with evalexpr. simpl. auto.
   exists (v0 :: nil). split. eauto with evalexpr. simpl. congruence.
   exists (v1 :: nil). split. eauto with evalexpr. simpl. congruence.
+  destruct (can_use_Aindexed2 chunk). 
   exists (v1 :: v0 :: nil). split. eauto with evalexpr. simpl. congruence.
+  exists (Vptr b ofs :: nil). split.
+  constructor. EvalOp. simpl; congruence. constructor. 
+  simpl. rewrite Int.add_zero. auto.
   exists (v :: nil). split. eauto with evalexpr. subst v. simpl. 
   rewrite Int.add_zero. auto.
 Qed.
diff --git a/powerpc/eabi/Conventions1.v b/powerpc/eabi/Conventions1.v
index 652f4a1..8ff0dc0 100644
--- a/powerpc/eabi/Conventions1.v
+++ b/powerpc/eabi/Conventions1.v
@@ -21,21 +21,18 @@ Require Import Locations.
 
 (** Machine registers (type [mreg] in module [Locations]) are divided in
   the following groups:
-- Temporaries used for spilling, reloading, and parallel move operations.
-- Allocatable registers, that can be assigned to RTL pseudo-registers.
-  These are further divided into:
--- Callee-save registers, whose value is preserved across a function call.
--- Caller-save registers that can be modified during a function call.
+- Callee-save registers, whose value is preserved across a function call.
+- Caller-save registers that can be modified during a function call.
 
   We follow the PowerPC/EABI application binary interface (ABI) in our choice
   of callee- and caller-save registers.
 *)
 
 Definition int_caller_save_regs :=
-  R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil.
+  R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: R12 :: nil.
 
 Definition float_caller_save_regs :=
-  F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: nil.
+  F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: nil.
 
 Definition int_callee_save_regs :=
   R31 :: R30 :: R29 :: R28 :: R27 :: R26 :: R25 :: R24 :: R23 ::
@@ -45,26 +42,11 @@ Definition float_callee_save_regs :=
   F31 :: F30 :: F29 :: F28 :: F27 :: F26 :: F25 :: F24 :: F23 ::
   F22 :: F21 :: F20 :: F19 :: F18 :: F17 :: F16 :: F15 :: F14 :: nil.
 
-Definition destroyed_at_call_regs :=
-  int_caller_save_regs ++ float_caller_save_regs.
-
 Definition destroyed_at_call :=
-  List.map R destroyed_at_call_regs.
-
-Definition int_temporaries := IT1 :: IT2 :: nil.
-
-Definition float_temporaries := FT1 :: FT2 :: FT3 :: nil.
-  
-Definition temporary_regs := int_temporaries ++ float_temporaries.
-
-Definition temporaries := List.map R temporary_regs.
-
-Definition destroyed_at_move_regs: list mreg := nil.
-
-Definition destroyed_at_move := List.map R destroyed_at_move_regs.
+  int_caller_save_regs ++ float_caller_save_regs.
 
 Definition dummy_int_reg := R3.     (**r Used in [Coloring]. *)
-Definition dummy_float_reg := F1.   (**r Used in [Coloring]. *)
+Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
 
 (** The [index_int_callee_save] and [index_float_callee_save] associate
   a unique positive integer to callee-save registers.  This integer is
@@ -175,34 +157,26 @@ Qed.
 
 Lemma register_classification:
   forall r, 
-  (In (R r) temporaries \/ In (R r) destroyed_at_call) \/
-  (In r int_callee_save_regs \/ In r float_callee_save_regs).
+  In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs.
 Proof.
   destruct r; 
-  try (left; left; simpl; OrEq);
-  try (left; right; simpl; OrEq);
+  try (left; simpl; OrEq);
   try (right; left; simpl; OrEq);
   try (right; right; simpl; OrEq).
 Qed.
 
 Lemma int_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r int_callee_save_regs).
+    In r destroyed_at_call -> In r int_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma float_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r float_callee_save_regs).
+    In r destroyed_at_call -> In r float_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma int_callee_save_type:
@@ -259,39 +233,27 @@ Qed.
 (** ** Location of function result *)
 
 (** The result value of a function is passed back to the caller in
-  registers [R3] or [F1], depending on the type of the returned value.
+  registers [R3] or [F1] or [R3, R4], depending on the type of the returned value.
   We treat a function without result as a function with one integer result. *)
 
-Definition loc_result (s: signature) : mreg :=
+Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
-  | None => R3
-  | Some Tint => R3
-  | Some Tfloat => F1
+  | None => R3 :: nil
+  | Some Tint => R3 :: nil
+  | Some Tfloat => F1 :: nil
+  | Some Tlong => R3 :: R4 :: nil
   end.
 
-(** The result location has the type stated in the signature. *)
-
-Lemma loc_result_type:
-  forall sig,
-  mreg_type (loc_result sig) =
-  match sig.(sig_res) with None => Tint | Some ty => ty end.
-Proof.
-  intros; unfold loc_result.
-  destruct (sig_res sig). 
-  destruct t; reflexivity.
-  reflexivity.
-Qed.
-
-(** The result location is a caller-save register or a temporary *)
+(** The result location is a caller-save register *)
 
 Lemma loc_result_caller_save:
-  forall (s: signature), 
-  In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries.
+  forall (s: signature) (r: mreg), 
+  In r (loc_result s) -> In r destroyed_at_call.
 Proof.
-  intros; unfold loc_result. left;
-  destruct (sig_res s). 
-  destruct t; simpl; OrEq.
-  simpl; OrEq.
+  intros.
+  assert (r = R3 \/ r = R4 \/ r = F1).
+    unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
+  destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
 Qed.
 
 (** ** Location of function arguments *)
@@ -300,213 +262,162 @@ Qed.
   to a function:
 - The first 8 integer arguments are passed in registers [R3] to [R10].
 - The first 8 float arguments are passed in registers [F1] to [F8].
+- The first 4 long integer arguments are passed in register pairs [R3,R4] ... [R9,R10].
 - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively
   assigned (1 word for an integer argument, 2 words for a float),
   starting at word offset 0.
 - No stack space is reserved for the arguments that are passed in registers.
 *)
 
+Definition int_param_regs :=
+  R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil.
+Definition float_param_regs :=
+  F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil.
+
 Fixpoint loc_arguments_rec
-    (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
-    (ofs: Z) {struct tyl} : list loc :=
+    (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
   | Tint :: tys =>
-      match iregl with
-      | nil =>
-          S (Outgoing ofs Tint) :: loc_arguments_rec tys nil fregl (ofs + 1)
-      | ireg :: iregs =>
-          R ireg :: loc_arguments_rec tys iregs fregl ofs
+      match list_nth_z int_param_regs ir with
+      | None =>
+          S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1)
+      | Some ireg =>
+          R ireg :: loc_arguments_rec tys (ir + 1) fr ofs
       end
   | Tfloat :: tys =>
-      match fregl with
-      | nil =>
-          S (Outgoing ofs Tfloat) :: loc_arguments_rec tys iregl nil (ofs + 2)
-      | freg :: fregs =>
-          R freg :: loc_arguments_rec tys iregl fregs ofs
+      match list_nth_z float_param_regs fr with
+      | None =>
+          S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2)
+      | Some freg =>
+          R freg :: loc_arguments_rec tys ir (fr + 1) ofs
+      end
+  | Tlong :: tys =>
+      let ir := align ir 2 in
+      match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with
+      | Some r1, Some r2 =>
+          R r1 :: R r2 :: loc_arguments_rec tys (ir + 2) fr ofs
+      | _, _ =>
+          S Outgoing ofs Tint :: S Outgoing (ofs + 1) Tint :: loc_arguments_rec tys ir fr (ofs + 2)
       end
   end.
 
-Definition int_param_regs :=
-  R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil.
-Definition float_param_regs :=
-  F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil.
-
 (** [loc_arguments s] returns the list of locations where to store arguments
   when calling a function with signature [s].  *)
 
 Definition loc_arguments (s: signature) : list loc :=
-  loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 0.
+  loc_arguments_rec s.(sig_args) 0 0 0.
 
 (** [size_arguments s] returns the number of [Outgoing] slots used
   to call a function with signature [s]. *)
 
-Fixpoint size_arguments_rec
-    (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
-    (ofs: Z) {struct tyl} : Z :=
+Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
   | Tint :: tys =>
-      match iregl with
-      | nil => size_arguments_rec tys nil fregl (ofs + 1)
-      | ireg :: iregs => size_arguments_rec tys iregs fregl ofs
+      match list_nth_z int_param_regs ir with
+      | None => size_arguments_rec tys ir fr (ofs + 1)
+      | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
       end
   | Tfloat :: tys =>
-      match fregl with
-      | nil => size_arguments_rec tys iregl nil (ofs + 2)
-      | freg :: fregs => size_arguments_rec tys iregl fregs ofs
+      match list_nth_z float_param_regs fr with
+      | None => size_arguments_rec tys ir fr (ofs + 2)
+      | Some freg => size_arguments_rec tys ir (fr + 1) ofs
+      end
+  | Tlong :: tys =>
+      let ir := align ir 2 in
+      match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with
+      | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs
+      | _, _ => size_arguments_rec tys ir fr (ofs + 2)
       end
   end.
 
+
 Definition size_arguments (s: signature) : Z :=
-  size_arguments_rec s.(sig_args) int_param_regs float_param_regs 0.
+  size_arguments_rec s.(sig_args) 0 0 0.
 
 (** A tail-call is possible for a signature if the corresponding
     arguments are all passed in registers. *)
 
 Definition tailcall_possible (s: signature) : Prop :=
   forall l, In l (loc_arguments s) ->
-  match l with R _ => True | S _ => False end.
+  match l with R _ => True | S _ _ _ => False end.
 
-(** Argument locations are either non-temporary registers or [Outgoing] 
+(** Argument locations are either caller-save registers or [Outgoing] 
   stack slots at nonnegative offsets. *)
 
 Definition loc_argument_acceptable (l: loc) : Prop :=
   match l with
-  | R r => ~(In l temporaries)
-  | S (Outgoing ofs ty) => ofs >= 0
+  | R r => In r destroyed_at_call
+  | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong
   | _ => False
   end.
 
 Remark loc_arguments_rec_charact:
-  forall tyl iregl fregl ofs l,
-  In l (loc_arguments_rec tyl iregl fregl ofs) ->
+  forall tyl ir fr ofs l,
+  In l (loc_arguments_rec tyl ir fr ofs) ->
   match l with
-  | R r => In r iregl \/ In r fregl
-  | S (Outgoing ofs' ty) => ofs' >= ofs
-  | S _ => False
+  | R r => In r int_param_regs \/ In r float_param_regs
+  | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong
+  | S _ _ _ => False
   end.
 Proof.
+Opaque list_nth_z.
   induction tyl; simpl loc_arguments_rec; intros.
   elim H.
-  destruct a. 
-  destruct iregl; elim H; intro. 
-  subst l. omega.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
-  subst l. auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. simpl; intuition.
-  destruct fregl; elim H; intro. 
-  subst l. omega.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
-  subst l. auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto.
-  intros [A|B]. left; auto. right; auto with coqlib.
+  destruct a.
+  destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H. 
+  subst. left. eapply list_nth_z_in; eauto.
+  eapply IHtyl; eauto.
+  subst. split. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+  destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H. 
+  subst. right. eapply list_nth_z_in; eauto.
+  eapply IHtyl; eauto.
+  subst. split. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+  set (ir' := align ir 2) in *.
+  destruct (list_nth_z int_param_regs ir') as [r1|] eqn:E1.
+  destruct (list_nth_z int_param_regs (ir' + 1)) as [r2|] eqn:E2.
+  destruct H. subst; left; eapply list_nth_z_in; eauto.
+  destruct H. subst; left; eapply list_nth_z_in; eauto.
+  eapply IHtyl; eauto.
+  destruct H. subst. split. omega. congruence.
+  destruct H. subst. split. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. 
+  destruct H. subst. split. omega. congruence. 
+  destruct H. subst. split. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. 
 Qed.
 
 Lemma loc_arguments_acceptable:
-  forall (s: signature) (r: loc),
-  In r (loc_arguments s) -> loc_argument_acceptable r.
+  forall (s: signature) (l: loc),
+  In l (loc_arguments s) -> loc_argument_acceptable l.
 Proof.
   unfold loc_arguments; intros.
   generalize (loc_arguments_rec_charact _ _ _ _ _ H).
-  destruct r.
-  intro H0; elim H0. simpl. unfold not. ElimOrEq; NotOrEq.
-  simpl. unfold not. ElimOrEq; NotOrEq.
-  destruct s0; try contradiction.
-  simpl. omega. 
+  destruct l.
+  intro H0; elim H0; simpl; ElimOrEq; OrEq.
+  destruct sl; try contradiction. simpl. intuition omega.
 Qed. 
 Hint Resolve loc_arguments_acceptable: locs.
 
-(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
-
-Remark loc_arguments_rec_notin_reg:
-  forall tyl iregl fregl ofs r,
-  ~(In r iregl) -> ~(In r fregl) ->
-  Loc.notin (R r) (loc_arguments_rec tyl iregl fregl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a. 
-  destruct iregl; simpl. auto.
-  simpl in H. split. apply sym_not_equal. tauto.
-  apply IHtyl. tauto. tauto.
-  destruct fregl; simpl. auto.
-  simpl in H0. split. apply sym_not_equal. tauto.
-  apply IHtyl. 
-  red; intro. apply H. auto.
-  tauto.
-Qed.
-
-Remark loc_arguments_rec_notin_local:
-  forall tyl iregl fregl ofs ofs0 ty0,
-  Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a.
-  destruct iregl; simpl; auto.
-  destruct fregl; simpl; auto.
-Qed.
-
-Remark loc_arguments_rec_notin_outgoing:
-  forall tyl iregl fregl ofs ofs0 ty0,
-  ofs0 + typesize ty0 <= ofs ->
-  Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a.
-  destruct iregl; simpl. 
-  split. omega. eapply IHtyl. omega.
-  auto.
-  destruct fregl; simpl. 
-  split. omega. eapply IHtyl. omega.
-  auto.
-Qed.
-
-Lemma loc_arguments_norepet:
-  forall (s: signature), Loc.norepet (loc_arguments s).
-Proof.
-  assert (forall tyl iregl fregl ofs,
-    list_norepet iregl ->
-    list_norepet fregl ->
-    list_disjoint iregl fregl ->
-    Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)).
-  induction tyl; simpl; intros.
-  constructor.
-  destruct a. 
-  destruct iregl; constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  apply loc_arguments_rec_notin_reg. inversion H. auto.
-  apply list_disjoint_notin with (m :: iregl); auto with coqlib.
-  apply IHtyl. inv H; auto. auto.
-  eapply list_disjoint_cons_left; eauto.
-  destruct fregl; constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  apply loc_arguments_rec_notin_reg.
-  red; intro. apply (H1 m m). auto. 
-  auto with coqlib. auto. inv H0; auto.
-  apply IHtyl. auto.
-  inv H0; auto. 
-  red; intros. apply H1. auto. auto with coqlib.
-
-  intro. unfold loc_arguments. apply H.
-  unfold int_param_regs. NoRepet.
-  unfold float_param_regs. NoRepet.
-  red; intros x y; simpl. ElimOrEq; ElimOrEq; discriminate.
-Qed.
-
 (** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
 
 Remark size_arguments_rec_above:
-  forall tyl iregl fregl ofs0,
-  ofs0 <= size_arguments_rec tyl iregl fregl ofs0.
+  forall tyl ir fr ofs0,
+  ofs0 <= size_arguments_rec tyl ir fr ofs0.
 Proof.
   induction tyl; simpl; intros.
   omega.
   destruct a.
-  destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega. auto.
-  destruct fregl. apply Zle_trans with (ofs0 + 2); auto; omega. auto.
+  destruct (list_nth_z int_param_regs ir); eauto. apply Zle_trans with (ofs0 + 1); auto; omega.
+  destruct (list_nth_z float_param_regs fr); eauto. apply Zle_trans with (ofs0 + 2); auto; omega.
+  set (ir' := align ir 2).
+  destruct (list_nth_z int_param_regs ir'); eauto.
+  destruct (list_nth_z int_param_regs (ir' + 1)); eauto.
+  apply Zle_trans with (ofs0 + 2); auto; omega.
+  apply Zle_trans with (ofs0 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -518,81 +429,39 @@ Qed.
 
 Lemma loc_arguments_bounded:
   forall (s: signature) (ofs: Z) (ty: typ),
-  In (S (Outgoing ofs ty)) (loc_arguments s) ->
+  In (S Outgoing ofs ty) (loc_arguments s) ->
   ofs + typesize ty <= size_arguments s.
 Proof.
   intros.
-  assert (forall tyl iregl fregl ofs0,
-    In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) ->
-    ofs + typesize ty <= size_arguments_rec tyl iregl fregl ofs0).
+  assert (forall tyl ir fr ofs0,
+    In (S Outgoing ofs ty) (loc_arguments_rec tyl ir fr ofs0) ->
+    ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0).
   induction tyl; simpl; intros.
   elim H0.
-  destruct a. destruct iregl; elim H0; intro.
-  inv H1. simpl. apply size_arguments_rec_above. auto.
-  discriminate. auto. 
-  destruct fregl; elim H0; intro.
-  inv H1. simpl. apply size_arguments_rec_above. auto.
-  discriminate. auto. 
-  unfold size_arguments. eapply H0. unfold loc_arguments in H. eauto.
-Qed.
-
-(** Temporary registers do not overlap with argument locations. *)
-
-Lemma loc_arguments_not_temporaries:
-  forall sig, Loc.disjoint (loc_arguments sig) temporaries.
-Proof.
-  intros; red; intros x1 x2 H.
-  generalize (loc_arguments_rec_charact _ _ _ _ _ H).
-  destruct x1. 
-  intro H0; elim H0; simpl; (ElimOrEq; ElimOrEq; congruence).
-  destruct s; try contradiction. intro.
-  simpl; ElimOrEq; auto.
-Qed.
-Hint Resolve loc_arguments_not_temporaries: locs.
-
-(** Argument registers are caller-save. *)
-
-Lemma arguments_caller_save:
-  forall sig r,
-  In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
-Proof.
-  unfold loc_arguments; intros.
-  elim (loc_arguments_rec_charact _ _ _ _ _ H); simpl.
-  ElimOrEq; intuition.
-  ElimOrEq; intuition.
-Qed.
-
-(** Argument locations agree in number with the function signature. *)
-
-Lemma loc_arguments_length:
-  forall sig,
-  List.length (loc_arguments sig) = List.length sig.(sig_args).
-Proof.
-  assert (forall tyl iregl fregl ofs,
-    List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl).
-  induction tyl; simpl; intros.
-  auto.
-  destruct a. 
-  destruct iregl; simpl; decEq; auto.
-  destruct fregl; simpl; decEq; auto.
-  intros. unfold loc_arguments. auto.
-Qed.
-
-(** Argument locations agree in types with the function signature. *)
-
-Lemma loc_arguments_type:
-  forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args).
-Proof.
-  assert (forall tyl iregl fregl ofs,
-    (forall r, In r iregl -> mreg_type r = Tint) ->
-    (forall r, In r fregl -> mreg_type r = Tfloat) ->
-    List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl).
-  induction tyl; simpl; intros.
-  auto.
-  destruct a; [destruct iregl|destruct fregl]; simpl;
-  f_equal; eauto with coqlib.
-
-  intros. unfold loc_arguments. apply H. 
-  intro; simpl. ElimOrEq; reflexivity.
-  intro; simpl. ElimOrEq; reflexivity.
+  destruct a.
+  destruct (list_nth_z int_param_regs ir); destruct H0. 
+  congruence.
+  eauto.
+  inv H0. apply size_arguments_rec_above.
+  eauto.
+  destruct (list_nth_z float_param_regs fr); destruct H0. 
+  congruence.
+  eauto.
+  inv H0. apply size_arguments_rec_above.
+  eauto.
+  set (ir' := align ir 2) in *.
+  destruct (list_nth_z int_param_regs ir').
+  destruct (list_nth_z int_param_regs (ir' + 1)).
+  destruct H0. congruence. destruct H0. congruence. eauto.
+  destruct H0. inv H0. 
+  transitivity (ofs + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
+  destruct H0. inv H0.
+  transitivity (ofs0 + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
+  eauto. 
+  destruct H0. inv H0. 
+  transitivity (ofs + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
+  destruct H0. inv H0.
+  transitivity (ofs0 + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
+  eauto. 
+  eauto.
 Qed.
diff --git a/powerpc/eabi/Stacklayout.v b/powerpc/eabi/Stacklayout.v
index cd4d9ae..be823c1 100644
--- a/powerpc/eabi/Stacklayout.v
+++ b/powerpc/eabi/Stacklayout.v
@@ -24,12 +24,8 @@ Require Import Bounds.
   Since we would rather store our return address in our own
   frame, we will not use these 4 bytes, and just reserve them.
 - Space for outgoing arguments to function calls.
-- Local stack slots of integer type.
-- Saved return address into caller.
+- Local stack slots.
 - Saved values of integer callee-save registers used by the function.
-- One word of padding, if necessary to align the following data
-  on a 8-byte boundary.
-- Local stack slots of float type.
 - Saved values of float callee-save registers used by the function.
 - Space for the stack-allocated data declared in Cminor.
 
@@ -43,10 +39,9 @@ Record frame_env : Type := mk_frame_env {
   fe_size: Z;
   fe_ofs_link: Z;
   fe_ofs_retaddr: Z;
-  fe_ofs_int_local: Z;
+  fe_ofs_local: Z;
   fe_ofs_int_callee_save: Z;
   fe_num_int_callee_save: Z;
-  fe_ofs_float_local: Z;
   fe_ofs_float_callee_save: Z;
   fe_num_float_callee_save: Z;
   fe_stack_data: Z
@@ -56,17 +51,17 @@ Record frame_env : Type := mk_frame_env {
   function. *)
 
 Definition make_env (b: bounds) :=
-  let oil := 8 + 4 * b.(bound_outgoing) in    (* integer locals *)
-  let ora := oil + 4 * b.(bound_int_local) in (* saved return address *)
+  let ol := align (8 + 4 * b.(bound_outgoing)) 8 in    (* locals *)
+  let ora := ol + 4 * b.(bound_local) in (* saved return address *)
   let oics := ora + 4 in            (* integer callee-saves *)
   let oendi := oics + 4 * b.(bound_int_callee_save) in
-  let ofl := align oendi 8 in       (* float locals *)
-  let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *)
+  let ofcs := align oendi 8 in       (* float callee-saves *)
   let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *)
   let sz := align (ostkdata + b.(bound_stack_data)) 16 in
   mk_frame_env sz 0 ora
-                  oil oics b.(bound_int_callee_save)
-                  ofl ofcs b.(bound_float_callee_save)
+                  ol
+                  oics b.(bound_int_callee_save)
+                  ofcs b.(bound_float_callee_save)
                   ostkdata.
 
 (** Separation property *)
@@ -76,25 +71,25 @@ Remark frame_env_separated:
   let fe := make_env b in
   0 <= fe.(fe_ofs_link)
   /\ fe.(fe_ofs_link) + 4 <= fe_ofs_arg
-  /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_int_local)
-  /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_retaddr)
+  /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_local)
+  /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_ofs_retaddr)
   /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_ofs_int_callee_save)
-  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local)
-  /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save)
+  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save)
   /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data)
-  /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size).
+  /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size)
+  /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size).
 Proof.
   intros.
+  generalize (align_le (8 + 4 * b.(bound_outgoing)) 8 (refl_equal _)).
   generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)).
   generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 16 (refl_equal _)).
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save,
     fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data, fe_ofs_arg.
   intros.
-  generalize (bound_int_local_pos b); intro;
-  generalize (bound_float_local_pos b); intro;
+  generalize (bound_local_pos b); intro;
   generalize (bound_int_callee_save_pos b); intro;
   generalize (bound_float_callee_save_pos b); intro;
   generalize (bound_outgoing_pos b); intro;
@@ -108,9 +103,8 @@ Remark frame_env_aligned:
   forall b,
   let fe := make_env b in
   (4 | fe.(fe_ofs_link))
-  /\ (4 | fe.(fe_ofs_int_local))
+  /\ (8 | fe.(fe_ofs_local))
   /\ (4 | fe.(fe_ofs_int_callee_save))
-  /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
   /\ (4 | fe.(fe_ofs_retaddr))
   /\ (8 | fe.(fe_stack_data))
@@ -118,24 +112,23 @@ Remark frame_env_aligned:
 Proof.
   intros.
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save,
     fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data.
-  set (x1 := 8 + 4 * bound_outgoing b).
-  assert (4 | x1). unfold x1; apply Zdivide_plus_r. exists 2; auto. exists (bound_outgoing b); ring.
-  set (x2 := x1 + 4 * bound_int_local b).
-  assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring.
+  set (x1 := align (8 + 4 * bound_outgoing b) 8).
+  assert (8 | x1). unfold x1; apply align_divides. omega. 
+  set (x2 := x1 + 4 * bound_local b).
+  assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto.
+  apply Zdivides_trans with 8. exists 2; auto. auto. 
+  exists (bound_local b); ring.
   set (x3 := x2 + 4).
   assert (4 | x3). unfold x3; apply Zdivide_plus_r; auto. exists 1; auto.
   set (x4 := align (x3 + 4 * bound_int_callee_save b) 8).
   assert (8 | x4). unfold x4. apply align_divides. omega.
-  set (x5 := x4 + 8 * bound_float_local b).
-  assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
-  set (x6 := x5 + 8 * bound_float_callee_save b).
-  assert (8 | x6).
-    unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
-  set (x7 := align (x6 + bound_stack_data b) 16).
-  assert (16 | x7). unfold x7; apply align_divides. omega.
+  set (x5 := x4 + 8 * bound_float_callee_save b).
+  assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
+  set (x6 := align (x5 + bound_stack_data b) 16).
+  assert (16 | x6). unfold x6; apply align_divides. omega.
   intuition.
 Qed.
diff --git a/runtime/Makefile b/runtime/Makefile
index c128ba2..5550e6b 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -2,26 +2,34 @@ include ../Makefile.config
 
 CFLAGS=-O1 -g -Wall
 INCLUDES=
-OBJS=
+OBJS=int64.o
 LIB=libcompcert.a
 
-ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
 all: $(LIB) $(INCLUDES)
-else
-all:
-endif
 
 $(LIB): $(OBJS)
 	rm -f $(LIB)
 	ar rcs $(LIB) $(OBJS)
 
-clean:
+%.o: $(ARCH)/%.s
+	$(CASM) $^
+
+clean::
 	rm -f *.o $(LIB)
 
-ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
 install:
 	install -d $(LIBDIR)
 	install -c $(LIB) $(INCLUDES) $(LIBDIR)
-else
-install:
-endif
+
+test/test_int64: test/test_int64.c $(LIB)
+	$(CC) -o $@ test/test_int64.c $(LIB)
+
+clean::
+	rm -f test/test_int64
+
+.PHONY: test
+
+test: FORCE test/test_int64
+	test/test_int64
+
+FORCE:
diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s
new file mode 100644
index 0000000..6b03351
--- /dev/null
+++ b/runtime/arm/int64.s
@@ -0,0 +1,424 @@
+@ *****************************************************************
+@
+@               The Compcert verified compiler
+@
+@           Xavier Leroy, INRIA Paris-Rocquencourt
+@
+@ Copyright (c) 2013 Institut National de Recherche en Informatique et
+@  en Automatique.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are met:
+@     * Redistributions of source code must retain the above copyright
+@       notice, this list of conditions and the following disclaimer.
+@     * Redistributions in binary form must reproduce the above copyright
+@       notice, this list of conditions and the following disclaimer in the
+@       documentation and/or other materials provided with the distribution.
+@     * Neither the name of the <organization> nor the
+@       names of its contributors may be used to endorse or promote products
+@       derived from this software without specific prior written permission.
+@ 
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+@ HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ *********************************************************************
+
+@ Helper functions for 64-bit integer arithmetic.  ARM version.
+
+@ Calling conventions for R = F(X) or R = F(X,Y):
+@   one or two long arguments:   XL in r0, XH in r1, YL in r2, YH in r3
+@   one long argument, one int:  XL in r0, XH in r1, Y in r2
+@   one float argument:          X in r0, r1
+@   one long result:             RL in r0, RH in r1
+@   one float result:            R in r0, r1
+@ This is a little-endian convention: the low word is in the
+@ low-numbered register.
+@ Can use r0...r3 and f0...f7 as temporary registers (caller-save)	
+	
+	.text
+        
+@@@ Unsigned comparison	
+	
+	.global __i64_ucmp
+__i64_ucmp:
+        cmp r1, r3      @ compare high words
+        cmpeq r0, r2    @ if equal, compare low words instead
+        moveq r0, #0    @ res = 0 if eq
+        movhi r0, #1    @ res = 1 if unsigned higher
+        mvnlo r0, #0    @ res = -1 if unsigned lower
+        bx lr
+	.type __i64_ucmp, %function
+	.size __i64_ucmp, . - __i64_ucmp
+	
+@@@ Signed comparison	
+	
+	.global __i64_scmp
+__i64_scmp:
+        cmp r0, r2              @ compare low words (unsigned)
+        moveq r0, #0            @ res = 0 if eq
+        movhi r0, #1            @ res = 1 if unsigned higher
+        mvnlo r0, #0            @ res = -1 if unsigned lower
+        cmp r1, r3              @ compare high words (signed)
+	addgt r0, r0, #2        @ res += 2 if signed greater
+	sublt r0, r0, #2        @ res -= 2 if signed less
+   @ here, r0 = 0 if X == Y
+   @       r0 = -3, -2, -1 if X < Y
+   @       r0 = 1, 2, 3 if X > Y
+        bx      lr
+	.type __i64_scmp, %function
+	.size __i64_scmp, . - __i64_scmp
+	
+@@@ Opposite
+	
+        .global __i64_neg
+__i64_neg:
+        rsbs r0, r0, #0
+        rsc r1, r1, #0
+        bx lr
+	.type __i64_neg, %function
+	.size __i64_neg, . - __i64_neg
+
+@@@ Addition	
+	
+	.global __i64_add
+__i64_add:
+        adds r0, r0, r2
+        adc r1, r1, r3
+        bx lr
+	.type __i64_add, %function
+	.size __i64_add, . - __i64_add
+	
+@@@ Subtraction
+	
+	.global __i64_sub
+__i64_sub:
+        subs r0, r0, r2
+        sbc r1, r1, r3
+        bx lr
+	.type __i64_sub, %function
+	.size __i64_sub, . - __i64_sub
+	
+@ Note on ARM shifts: the shift amount is taken modulo 256.
+@ Therefore, unsigned shifts by 32 bits or more produce 0.        
+	
+@@@ Shift left	
+
+	.global __i64_shl
+__i64_shl:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r1, r1, lsl r2
+        orr r1, r0, lsr r3
+        mov r0, r0, lsl r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r1, r0, lsl r2
+        mov r0, #0
+        bx lr
+	.type __i64_shl, %function
+	.size __i64_shl, . - __i64_shl
+
+@@@ Shift right unsigned	
+
+	.global __i64_shr
+__i64_shr:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r0, r0, lsr r2
+        orr r0, r1, lsl r3
+        mov r1, r1, lsr r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r0, r1, lsr r2
+        mov r1, #0
+        bx lr
+	.type __i64_shr, %function
+	.size __i64_shr, . - __i64_shr
+	
+@@@ Shift right signed
+
+	.global __i64_sar
+__i64_sar:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r0, r0, lsr r2
+        orr r0, r1, lsl r3
+        mov r1, r1, asr r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r0, r1, asr r2
+        mov r1, r1, asr #31
+        bx lr
+	.type __i64_sar, %function
+	.size __i64_sar, . - __i64_sar
+	
+@@@ Multiplication
+	
+	.global __i64_mul
+__i64_mul:
+        push {r4, r5}
+        mov r4, r0              @ save first arg in r4,r5
+        mov r5, r1
+        umull r0, r1, r2, r4    @ 64-bit product of low halves
+        mla r1, r2, r5, r1      @ add 32-bit products low half * high half
+        mla r1, r3, r4, r1      @ to high half of result
+        pop {r4, r5}
+        bx lr
+	.type __i64_mul, %function
+	.size __i64_mul, . - __i64_mul
+	
+@@@ Auxiliary function for division and modulus. Not exported.
+
+@ On entry:  N = (r0, r1) numerator    D = (r2, r3) divisor	
+@ On exit:   Q = (r4, r5) quotient     R = (r0, r1) remainder	
+@ Locals:    M = (r6, r7) mask         TMP = r8 temporary	
+	
+__i64_udivmod:
+        orrs r8, r2, r3         @ is D == 0?
+        bxeq lr                 @ if so, return with unspecified results
+        mov r4, #0              @ Q = 0
+        mov r5, #0
+        mov r6, #1              @ M = 1
+        mov r7, #0
+1:      cmp r3, #0              @ while ((signed) D >= 0) ...
+        blt 2f
+        subs r8, r0, r2         @ ... and N >= D ...
+        sbcs r8, r1, r3
+        blo 2f
+        adds r2, r2, r2         @ D = D << 1
+        adc r3, r3, r3
+        adds r6, r6, r6         @ M = M << 1
+        adc r7, r7, r7
+        b 1b
+2:      subs r0, r0, r2         @ N = N - D
+        sbcs r1, r1, r3
+        orr r4, r4, r6          @ Q = Q | M
+        orr r5, r5, r7
+        bhs 3f                  @ if N was >= D, continue
+        adds r0, r0, r2         @ otherwise, undo what we just did
+        adc r1, r1, r3          @ N = N + D
+        bic r4, r4, r6          @ Q = Q & ~M
+        bic r5, r5, r7
+3:      movs r7, r7, lsr #1     @ M = M >> 1
+        mov r6, r6, rrx
+        movs r3, r3, lsr #1     @ D = D >> 1
+        mov r2, r2, rrx
+        orrs r8, r6, r7         @ repeat while (M != 0) ...
+        bne 2b
+        bx lr
+	
+@@@ Unsigned division	
+	
+	.global __i64_udiv
+__i64_udiv:
+        push {r4, r5, r6, r7, r8, lr}
+        bl __i64_udivmod
+        mov r0, r4
+        mov r1, r5
+        pop {r4, r5, r6, r7, r8, lr}
+        bx lr
+	.type __i64_udiv, %function
+	.size __i64_udiv, . - __i64_udiv
+	
+@@@ Unsigned modulus	
+	
+	.global __i64_umod
+__i64_umod:
+        push {r4, r5, r6, r7, r8, lr}
+        bl __i64_udivmod         @ remainder is already in r0,r1
+        pop {r4, r5, r6, r7, r8, lr}
+        bx lr
+	.type __i64_umod, %function
+	.size __i64_umod, . - __i64_umod
+	
+	.global __i64_sdiv
+__i64_sdiv:
+        push {r4, r5, r6, r7, r8, r10, lr}
+        eor r10, r1, r3          @ r10 = sign of result
+        mov r4, r1, asr #31      @ take absolute value of N
+        eor r0, r0, r4           @ N = (N ^ (N >>s 31)) - (N >>s 31)
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        mov r4, r3, asr #31      @ take absolute value of D
+        eor r2, r2, r4
+        eor r3, r3, r4
+        subs r2, r2, r4
+        sbc r3, r3, r4
+        bl __i64_udivmod         @ do unsigned division
+        mov r0, r4
+        mov r1, r5
+        eor r0, r0, r10, asr#31  @ apply expected sign
+        eor r1, r1, r10, asr#31
+        subs r0, r0, r10, asr#31
+        sbc r1, r1, r10, asr#31
+        pop {r4, r5, r6, r7, r8, r10, lr}
+        bx lr
+	.type __i64_sdiv, %function
+	.size __i64_sdiv, . - __i64_sdiv
+	
+@@@ Signed modulus	
+
+	.global __i64_smod
+__i64_smod:
+        push {r4, r5, r6, r7, r8, r10, lr}
+	mov r10, r1              @ r10 = sign of result
+        mov r4, r1, asr#31       @ take absolute value of N
+        eor r0, r0, r4           @ N = (N ^ (N >>s 31)) - (N >>s 31)
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        mov r4, r3, asr #31      @ take absolute value of D
+        eor r2, r2, r4
+        eor r3, r3, r4
+        subs r2, r2, r4
+        sbc r3, r3, r4
+        bl __i64_udivmod         @ do unsigned division
+        eor r0, r0, r10, asr#31  @ apply expected sign
+        eor r1, r1, r10, asr#31
+        subs r0, r0, r10, asr#31
+        sbc r1, r1, r10, asr#31
+        pop {r4, r5, r6, r7, r8, r10, lr}
+        bx lr
+	.type __i64_smod, %function
+	.size __i64_smod, . - __i64_smod
+	
+@@@ Conversion from unsigned 64-bit integer to double float
+	
+	.global __i64_utod
+__i64_utod:
+        fmsr s0, r0
+        fuitod d0, s0           @ convert low half to double (unsigned)
+        fmsr s2, r1
+        fuitod d1, s2           @ convert high half to double (unsigned)
+        fldd d2, .LC1           @ d2 = 2^32
+        fmacd d0, d1, d2        @ d0 = d0 + d1 * d2 = double value of int64
+	fmrrd r0, r1, d0        @ return result in r0, r1
+        bx lr
+	.type __i64_utod, %function
+	.size __i64_utod, . - __i64_utod
+	
+        .balign 8
+.LC1:   .quad 0x41f0000000000000 @ 2^32 in double precision
+	
+@@@ Conversion from signed 64-bit integer to double float
+	
+	.global __i64_stod
+__i64_stod:
+        fmsr s0, r0
+        fuitod d0, s0           @ convert low half to double (unsigned)
+        fmsr s2, r1
+        fsitod d1, s2           @ convert high half to double (signed)
+        fldd d2, .LC1           @ d2 = 2^32
+        fmacd d0, d1, d2        @ d0 = d0 + d1 * d2 = double value of int64
+	fmrrd r0, r1, d0        @ return result in r0, r1
+        bx lr
+	.type __i64_stod, %function
+	.size __i64_stod, . - __i64_stod
+
+@@@ Conversion from double float to unsigned 64-bit integer	
+
+	.global __i64_dtou
+__i64_dtou:
+        cmp r1, #0              @ is double < 0 ?
+        blt 1f                  @ then it converts to 0
+  @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+  @ note: 1023 + 52 = 1075 = 1024 + 51
+  @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+        mov r2, r1, lsl #1
+        mov r2, r2, lsr #21
+        sub r2, r2, #51
+        sub r2, r2, #1024
+  @ check range of exponent
+        cmn r2, #52             @ if EXP < -52, double is < 1.0
+        blt 1f
+        cmp r2, #12             @ if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  @ extract true mantissa
+        bic r1, r1, #0xFF000000
+        bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+        orr r1, r1, #0x00100000 @ HI |= 0x00100000
+  @ shift it appropriately
+        cmp r2, #0
+        bge __i64_shl           @ if EXP >= 0, shift left by EXP
+        rsb r2, r2, #0
+        b __i64_shr             @ otherwise, shift right by -EXP
+  @ special cases
+1:      mov r0, #0              @ result is 0
+        mov r1, #0
+        bx lr
+2:      mvn r0, #0              @ result is 0xFF....FF (MAX_UINT)
+        mvn r1, #0
+        bx lr
+	.type __i64_dtou, %function
+        .size __i64_dtou, . - __i64_dtou
+	
+@@@ Conversion from double float to signed 64-bit integer
+	
+	.global __i64_dtos
+__i64_dtos:
+        push {r4, lr}
+        mov r4, r1, asr #31     @ save sign in r4
+  @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+  @ note: 1023 + 52 = 1075 = 1024 + 51
+  @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+        mov r2, r1, lsl #1
+        mov r2, r2, lsr #21
+        sub r2, r2, #51
+        sub r2, r2, #1024
+  @ check range of exponent
+        cmn r2, #52             @ if EXP < -52, |double| is < 1.0
+        blt 1f
+        cmp r2, #11             @ if EXP >= 63 - 52, |double| is >= 2^63
+        bge 2f
+  @ extract true mantissa
+        bic r1, r1, #0xFF000000
+        bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+        orr r1, r1, #0x00100000 @ HI |= 0x00100000
+  @ shift it appropriately
+        cmp r2, #0
+        blt 3f
+        bl __i64_shl            @ if EXP >= 0, shift left by EXP
+        b 4f
+3:      rsb r2, r2, #0
+        bl __i64_shr            @ otherwise, shift right by -EXP
+  @ apply sign to result
+4:      eor r0, r0, r4
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        pop {r4, lr}
+        bx lr
+  @ special cases
+1:      mov r0, #0 @ result is 0
+        mov r1, #0
+        pop {r4, lr}
+        bx lr
+2:      cmp r4, #0
+        blt 6f
+        mvn r0, #0              @ result is 0x7F....FF (MAX_SINT)
+        mov r1, r0, lsr #1
+        pop {r4, lr}
+        bx lr
+6:      mov r0, #0              @ result is 0x80....00 (MIN_SINT)
+        mov r1, #0x80000000
+        pop {r4, lr}
+        bx lr
+	.type __i64_dtos, %function
+        .size __i64_dtos, . - __i64_dtos
+
diff --git a/runtime/ia32/int64.s b/runtime/ia32/int64.s
new file mode 100644
index 0000000..8fd8151
--- /dev/null
+++ b/runtime/ia32/int64.s
@@ -0,0 +1,471 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  IA32 version.
+	
+        .text
+
+# Opposite
+
+        .globl __i64_neg
+        .balign 16
+__i64_neg:
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        negl %eax
+        adcl $0, %edx
+        negl %edx
+        ret
+        .type __i64_neg, @function
+        .size __i64_neg, . - __i64_neg
+
+# Addition
+
+        .globl __i64_add
+        .balign 16
+__i64_add:
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        addl 12(%esp), %eax
+        adcl 16(%esp), %edx
+        ret
+        .type __i64_add, @function
+        .size __i64_add, . - __i64_add
+
+# Subtraction
+
+        .globl __i64_sub
+        .balign 16
+__i64_sub:
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        subl 12(%esp), %eax
+        sbbl 16(%esp), %edx
+        ret
+        .type __i64_sub, @function
+        .size __i64_sub, . - __i64_sub
+
+# Multiplication
+
+        .globl __i64_mul
+        .balign 16
+__i64_mul:
+        movl 4(%esp), %eax
+        mull 12(%esp)            # edx:eax = xlo * ylo 
+        movl 4(%esp), %ecx
+        imull 16(%esp), %ecx     # ecx = xlo * yhi
+        addl %ecx, %edx
+        movl 12(%esp), %ecx      # ecx = xhi * ylo
+        imull 8(%esp), %ecx
+        addl %ecx, %edx
+        ret
+        .type __i64_mul, @function
+        .size __i64_mul, . - __i64_mul
+
+# Division and remainder
+
+# Auxiliary function, not exported.
+# Input:   20(esp), 24(esp)  is dividend N
+#          28(esp), 32(esp)  is divisor  D
+# Output:  esi:edi is quotient Q
+#          eax:edx is remainder R
+# ebp is preserved
+
+        .balign 16
+__i64_udivmod:
+        cmpl $0, 32(%esp)        # single-word divisor? (DH = 0)
+        jne 1f
+  # Special case 64 bits divided by 32 bits
+        movl 28(%esp), %ecx      # divide NH by DL
+        movl 24(%esp), %eax      # (will trap if D = 0)
+        xorl %edx, %edx
+        divl %ecx                # eax = quotient, edx = remainder
+        movl %eax, %edi          # high word of quotient in edi
+        movl 20(%esp), %eax      # divide rem : NL by DL
+        divl %ecx                # eax = quotient, edx = remainder
+        movl %eax, %esi          # low word of quotient in esi */
+        movl %edx, %eax          # low word of remainder in eax
+        xorl %edx, %edx          # high word of remainder is 0, in edx
+        ret
+  # The general case
+1:      movl 28(%esp), %ecx      # esi:ecx = D
+        movl 32(%esp), %esi
+        movl 20(%esp), %eax      # edx:eax = N
+        movl 24(%esp), %edx
+  # Scale D and N down, giving D' and N', until D' fits in 32 bits
+2:      shrl $1, %esi            # shift D' right by one
+        rcrl $1, %ecx
+        shrl $1, %edx            # shift N' right by one
+        rcrl $1, %eax
+        testl %esi, %esi         # repeat until D'H = 0
+        jnz 2b
+  # Divide N' by D' to get an approximate quotient
+        divl %ecx                # eax = quotient, edx = remainder
+        movl %eax, %esi          # save tentative quotient Q in esi
+  # Check for off by one quotient
+  # Compute Q * D
+3:      movl 32(%esp), %ecx
+        imull %esi, %ecx         # ecx = Q * DH
+        movl 28(%esp), %eax
+        mull %esi                # edx:eax = Q * DL
+        add %ecx, %edx           # edx:eax = Q * D
+        jc 5f                    # overflow in addition means Q is too high
+  # Compare Q * D with N, computing the remainder in the process
+        movl %eax, %ecx
+        movl 20(%esp), %eax
+        subl %ecx, %eax
+        movl %edx, %ecx
+        movl 24(%esp), %edx
+        sbbl %ecx, %edx          # edx:eax = N - Q * D
+        jnc 4f                   # no carry: N >= Q * D, we are fine
+        decl %esi                # carry: N < Q * D, adjust Q down by 1
+        addl 28(%esp), %eax      # and remainder up by D
+        adcl 32(%esp), %edx
+  # Finished
+4:      xorl %edi, %edi          # high half of quotient is 0
+        ret
+  # Special case when Q * D overflows
+5:      decl %esi                # adjust Q down by 1
+        jmp 3b                   # and redo check & computation of remainder
+
+# Unsigned division
+
+        .globl __i64_udiv
+        .balign 16
+__i64_udiv:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        call __i64_udivmod
+        movl %esi, %eax
+        movl %edi, %edx
+        popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_udiv, @function
+        .size __i64_udiv, . - __i64_udiv
+
+# Unsigned remainder
+
+        .globl __i64_umod
+        .balign 16
+__i64_umod:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        call __i64_udivmod
+        popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_umod, @function
+        .size __i64_umod, . - __i64_umod
+
+# Signed division
+
+        .globl __i64_sdiv
+        .balign 16
+__i64_sdiv:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        movl 20(%esp), %esi          # esi = NH
+        movl %esi, %ebp              # save sign of N in ebp
+        testl %esi, %esi
+        jge 1f                       # if N < 0,
+        negl 16(%esp)                # N = -N
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 20(%esp)
+1:      movl 28(%esp), %esi          # esi = DH
+        xorl %esi, %ebp              # sign of result in ebp
+        testl %esi, %esi
+        jge 2f                       # if D < 0,
+        negl 24(%esp)                # D = -D
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 28(%esp)
+2:      call __i64_udivmod
+        testl %ebp, %ebp             # apply sign to result
+        jge 3f
+        negl %esi
+        adcl $0, %edi
+        negl %edi
+3:      movl %esi, %eax
+        movl %edi, %edx
+        popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, . - __i64_sdiv
+
+# Signed remainder
+
+        .globl __i64_smod
+        .balign 16
+__i64_smod:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        movl 20(%esp), %esi            # esi = NH
+        movl %esi, %ebp                # save sign of result in ebp
+        testl %esi, %esi
+        jge 1f                         # if N < 0,
+        negl 16(%esp)                  # N = -N
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 20(%esp)
+1:      movl 28(%esp), %esi            # esi = DH
+        testl %esi, %esi
+        jge 2f                         # if D < 0,
+        negl 24(%esp)                  # D = -D
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 28(%esp)
+2:      call __i64_udivmod
+        testl %ebp, %ebp               # apply sign to result
+        jge 3f
+        negl %eax
+        adcl $0, %edx
+        negl %edx
+3:      popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, . - __i64_sdiv
+
+# Note on shifts:
+# IA32 shift instructions treat their amount (in %cl) modulo 32
+
+# Shift left
+
+        .globl __i64_shl
+        .balign 16
+__i64_shl:
+        movl 12(%esp), %ecx             # ecx = shift amount, treated mod 64
+        testb $32, %cl
+        jne 1f
+  # shift amount < 32  
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        shldl %cl, %eax, %edx           # edx = high(XH:XL << amount)
+        shll %cl, %eax                  # eax = XL << amount
+        ret
+  # shift amount >= 32
+1:      movl 4(%esp), %edx
+        shll %cl, %edx                  # edx = XL << (amount - 32)
+        xorl %eax, %eax                 # eax = 0
+        ret
+        .type __i64_shl, @function
+        .size __i64_shl, . - __i64_shl
+
+# Shift right unsigned
+
+        .globl __i64_shr
+        .balign 16
+__i64_shr:
+        movl 12(%esp), %ecx             # ecx = shift amount, treated mod 64
+        testb $32, %cl
+        jne 1f
+  # shift amount < 32
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        shrdl %cl, %edx, %eax           # eax = low(XH:XL >> amount)
+        shrl %cl, %edx                  # edx = XH >> amount
+        ret
+  # shift amount >= 32
+1:      movl 8(%esp), %eax
+        shrl %cl, %eax                  # eax = XH >> (amount - 32)
+        xorl %edx, %edx                 # edx = 0
+        ret
+        .type __i64_shr, @function
+        .size __i64_shr, . - __i64_shr
+
+# Shift right signed
+
+        .globl __i64_sar
+        .balign 16
+__i64_sar:
+        movl 12(%esp), %ecx             # ecx = shift amount, treated mod 64
+        testb $32, %cl
+        jne 1f
+  # shift amount < 32
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        shrdl %cl, %edx, %eax           # eax = low(XH:XL >> amount)
+        sarl %cl, %edx                  # edx = XH >> amount (signed)
+        ret
+  # shift amount >= 32
+1:      movl 8(%esp), %eax
+        movl %eax, %edx
+        sarl %cl, %eax                  # eax = XH >> (amount - 32)
+        sarl $31, %edx                  # edx = sign of X
+        ret
+        .type __i64_sar, @function
+        .size __i64_sar, . - __i64_sar
+
+# Unsigned comparison
+
+        .globl __i64_ucmp
+        .balign 16
+__i64_ucmp:
+        movl 8(%esp), %eax              # compare high words
+        cmpl 16(%esp), %eax
+        jne 1f                          # if high words equal,
+        movl 4(%esp), %eax              # compare low words
+        cmpl 12(%esp), %eax
+1:      seta %al                        # AL = 1 if >, 0 if <=
+        setb %dl                        # DL = 1 if <, 0 if >=
+        subb %dl, %al                   # AL = 0 if same, 1 if >, -1 if <
+        movsbl %al, %eax
+        ret
+        .type __i64_ucmp, @function
+        .size __i64_ucmp, . - __i64_ucmp
+
+# Signed comparison
+
+        .globl __i64_scmp
+        .balign 16
+__i64_scmp:
+        movl 8(%esp), %eax              # compare high words (signed)
+        cmpl 16(%esp), %eax
+        je 1f                           # if different,
+        setg %al                        # extract result 
+        setl %dl
+        subb %dl, %al
+        movsbl %al, %eax
+        ret
+1:      movl 4(%esp), %eax              # if high words equal,
+        cmpl 12(%esp), %eax             # compare low words (unsigned)
+        seta %al                        # and extract result
+        setb %dl
+        subb %dl, %al
+        movsbl %al, %eax
+        ret
+        .type __i64_scmp, @function
+        .size __i64_scmp, . - __i64_scmp
+
+# Conversion signed long -> float
+
+        .globl __i64_stod
+        .balign 16
+__i64_stod:
+        fildll 4(%esp)
+        ret
+        .type __i64_stod, @function
+        .size __i64_stod, . - __i64_stod
+
+# Conversion unsigned long -> float
+
+        .globl __i64_utod
+        .balign 16
+__i64_utod:
+        fildll 4(%esp)                  # convert as if signed
+        cmpl $0, 8(%esp)                # is argument >= 2^63?
+        jns 1f
+        fadds LC1                       # adjust by 2^64
+1:      ret
+        .type __i64_stod, @function
+        .size __i64_stod, . - __i64_stod
+
+        .balign 4
+LC1:    .long 0x5f800000                # 2^64 in single precision
+
+# Conversion float -> signed long
+
+        .globl __i64_dtos
+        .balign 16
+__i64_dtos:
+        subl $4, %esp
+  # Change rounding mode to "round towards zero"
+        fnstcw 0(%esp)
+        movw 0(%esp), %ax
+        movb $12, %ah
+        movw %ax, 2(%esp)
+        fldcw 2(%esp)
+  # Convert
+        fldl 8(%esp)
+        fistpll 8(%esp)
+  # Restore rounding mode
+        fldcw 0(%esp)
+  # Load result in edx:eax
+        movl 8(%esp), %eax
+        movl 12(%esp), %edx
+        addl $4, %esp
+        ret
+        .type __i64_dtos, @function
+        .size __i64_dtos, . - __i64_dtos
+
+# Conversion float -> unsigned long
+
+        .globl __i64_dtou
+        .balign 16
+__i64_dtou:
+        subl $4, %esp
+  # Change rounding mode to "round towards zero"
+        fnstcw 0(%esp)
+        movw 0(%esp), %ax
+        movb $12, %ah
+        movw %ax, 2(%esp)
+        fldcw 2(%esp)
+  # Compare argument with 2^63
+        fldl (4+4)(%esp)
+        flds LC2
+        fucomp
+        fnstsw %ax
+        sahf
+        jbe 1f                  # branch if not (ARG < 2^63)
+  # Argument < 2^63: convert as is
+        fistpll 8(%esp)
+        movl 8(%esp), %eax
+        movl 12(%esp), %edx
+        jmp 2f
+  # Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63
+1:      fsubs LC2
+        fistpll 8(%esp)
+        movl 8(%esp), %eax
+        movl 12(%esp), %edx
+        addl $0x80000000, %edx
+  # Restore rounding mode
+2:      fldcw 0(%esp)
+        addl $4, %esp
+        ret
+        .type __i64_dtou, @function
+        .size __i64_dtou, . - __i64_dtou
+
+        .balign 4
+LC2:    .long 0x5f000000        # 2^63 in single precision
diff --git a/runtime/powerpc/int64.s b/runtime/powerpc/int64.s
new file mode 100644
index 0000000..34b65b5
--- /dev/null
+++ b/runtime/powerpc/int64.s
@@ -0,0 +1,492 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+# Calling conventions for R = F(X) or R = F(X,Y):
+#   one or two long arguments:   XH in r3, XL in r4, YH in r5, YL in r6
+#   one long argument, one int:  XH in r3, XL in r4, Y in r5
+#   one float argument:          X in f1	
+#   one long result:             RH in r3, RL in r4
+#   one float result:            R in f1        
+# This is a big-endian convention: the high word is in the low-numbered register
+# Can use r3...r12 and f0...r13 as temporary registers (caller-save)	
+	
+	.text
+        
+### Opposite
+
+        .balign 16
+        .globl __i64_neg
+__i64_neg:
+        subfic r4, r4, 0        # RL = -XL and set borrow iff XL != 0
+        subfze r3, r3           # RH = -XH - borrow
+        blr
+        .type __i64_neg, @function
+        .size __i64_neg, .-__i64_neg
+
+### Addition	
+
+        .balign 16
+        .globl __i64_add
+__i64_add:
+        addc r4, r4, r6         # RL = XL + YL and set carry if overflow
+        adde r3, r3, r5         # RH = XH + YH + carry
+        blr
+        .type __i64_add, @function
+        .size __i64_add, .-__i64_add
+
+### Subtraction	
+
+        .balign 16
+        .globl __i64_sub
+__i64_sub:
+        subfc r4, r6, r4        # RL = XL - YL and set borrow if underflow
+        subfe r3, r5, r3        # RH = XH - YH - borrow
+        blr
+        .type __i64_sub, @function
+        .size __i64_sub, .-__i64_sub
+
+### Multiplication	
+
+        .balign 16
+        .globl __i64_mul
+__i64_mul:
+   # Form intermediate products
+        mulhwu r7, r4, r6       # r7 = high half of XL * YL
+        mullw r8, r3, r6        # r8 = low half of XH * YL
+        mullw r9, r4, r5        # r9 = low half of XL * YH
+        mullw r4, r4, r6        # r4 = low half of XL * YL = low half of result
+   # Reconstruct high half of result
+        add r3, r7, r8
+        add r3, r3, r9
+        blr
+        .type __i64_mul, @function
+        .size __i64_mul, .-__i64_mul
+
+### Helper function for division and modulus.  Not exported.	
+# Input:  numerator N in (r3,r4), divisor D in (r5,r6)
+# Output: quotient Q in (r7,r8),  remainder R in (r3,r4)
+	.balign 16
+__i64_udivmod:
+   # Set up quotient and mask
+        li r8, 0                # Q = 0
+        li r7, 0
+        li r10, 1               # M = 1
+        li r9, 0
+   # Check for zero divisor
+        or. r0, r6, r5
+        beqlr                   # return with unspecified quotient & remainder
+   # Scale divisor and mask
+1:      cmpwi r5, 0             # while top bit of D is zero...
+        blt 2f
+        subfc r0, r6, r4        # compute borrow out of N - D
+        subfe r0, r5, r3
+        subfe. r0, r0, r0       # EQ iff no borrow iff N >= D
+        bne 2f                  # ... and while N >= D ...
+        addc r6, r6, r6         # scale divisor: D = D << 1
+        adde r5, r5, r5
+        addc r10, r10, r10      # scale mask: M = M << 1
+        adde r9, r9, r9
+        b 1b                    # end while
+   # Long division
+2:      subfc r4, r6, r4        # Q = Q | M, N = N - D, and compute borrow
+ 	or r8, r8, r10
+        subfe r3, r5, r3
+        or r7, r7, r9
+        subfe. r0, r0, r0       # test borrow
+        beq 3f                  # no borrow: N >= D, continue
+        addc r4, r4, r6         # borrow: undo what we just did to N and Q
+        andc r8, r8, r10
+        adde r3, r3, r5
+        andc r7, r7, r9
+3:      slwi r0, r9, 31         # unscale mask: M = M >> 1
+        srwi r10, r10, 1
+        or r10, r10, r0
+        srwi r9, r9, 1
+        slwi r0, r5, 31         # unscale divisor: D = D >> 1
+        srwi r6, r6, 1
+        or r6, r6, r0
+        srwi r5, r5, 1
+        or. r0, r10, r9         # iterate while M != 0
+        bne 2b
+        blr
+
+### Unsigned division	
+
+        .balign 16
+        .globl __i64_udiv
+__i64_udiv:
+        mflr r11                # save return address in r11
+        bl __i64_udivmod        # unsigned divide
+        mtlr r11                # restore return address
+        mr r3, r7               # R = quotient
+        mr r4, r8
+        blr
+        .type __i64_udiv, @function
+        .size __i64_udiv, .-__i64_udiv
+
+### Unsigned remainder        
+
+        .balign 16
+        .globl __i64_umod
+__i64_umod:
+        mflr r11
+        bl __i64_udivmod
+        mtlr r11
+        blr                     # remainder is already in R
+        .type __i64_umod, @function
+        .size __i64_umod, .-__i64_umod
+
+### Signed division	
+	
+        .balign 16
+        .globl __i64_sdiv
+__i64_sdiv:
+        mflr r11                # save return address
+        xor r12, r3, r5         # save sign of result in r12 (top bit)
+        srawi r0, r3, 31        # take absolute value of N
+        xor r4, r4, r0          # (i.e.  N = N ^ r0 - r0,
+        xor r3, r3, r0          #  where r0 = 0 if N >= 0 and r0 = -1 if N < 0)
+        subfc r4, r0, r4
+        subfe r3, r0, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0          # (same trick)
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __i64_udivmod        # do unsigned division
+        mtlr r11                # restore return address
+        srawi r0, r12, 31       # apply expected sign to quotient
+        xor r8, r8, r0          # RES = Q if r12 >= 0, -Q if r12 < 0
+        xor r7, r7, r0
+        subfc r4, r0, r8
+        subfe r3, r0, r7
+        blr
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, .-__i64_sdiv
+	
+## Signed remainder	
+	
+        .balign 16
+        .globl __i64_smod
+__i64_smod:
+        mflr r11                # save return address
+        srawi r12, r3, 31       # save sign of result in r12 (sign of N)
+        xor r4, r4, r12         # and take absolute value of N
+        xor r3, r3, r12
+        subfc r4, r12, r4
+        subfe r3, r12, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __i64_udivmod        # do unsigned division
+        mtlr r11                # restore return address
+        xor r4, r4, r12         # apply expected sign to remainder
+        xor r3, r3, r12         # RES = R if r12 == 0, -R if r12 == -1
+        subfc r4, r12, r4
+        subfe r3, r12, r3
+        blr
+        .type __i64_smod, @function
+        .size __i64_smod, .-__i64_smod
+
+### Unsigned comparison        
+
+        .balign 16
+        .globl __i64_ucmp
+__i64_ucmp:
+        cmplw cr0, r3, r5       # compare high words (unsigned)
+	cmplw cr1, r4, r6       # compare low words (unsigned)
+        mfcr r0
+# At this point, the bits of r0 are as follow:	
+#   bit 31: XH < YH
+#   bit 30: XH > YH        
+#   bit 27: XL > YL	
+#   bit 26: XL < YL	
+        rlwinm r3, r0, 0, 0, 1  # r3 = r0 & 0xC000_0000
+        srawi r3, r3, 24        # r4 = r4 >>s 28
+# r3 = -0x80 if XH < YH
+#    = 0x40  if XH > YH
+#    = 0     if XH = YH        
+        rlwinm r4, r0, 4, 0, 1  # r4 = (r0 << 4) & 0xC000_0000
+        srawi r4, r4, 28        # r4 = r4 >>s 28
+# r4 = -8  if XL < YL
+#    =  4  if XL > YL
+#    =  0  if XL = YL        
+	add     r3, r3, r4
+# r3 = -0x80 or -0x80 - 8 or -0x80 + 4 or -8 if X < Y 
+#         (in all cases, r3 < 0 if X < Y)	
+#    = 0x40 or 0x40 - 8 or 0x40 + 4 or 4 if X > Y 
+#         (in all cases, r3 > 0 if X > Y)	
+#    = 0 if X = Y
+        blr
+        .type __i64_ucmp, @function
+        .size __i64_ucmp, .-__i64_ucmp
+	
+### Signed comparison        
+
+        .balign 16
+        .globl __i64_scmp
+__i64_scmp:
+        cmpw cr0, r3, r5        # compare high words (signed)
+	cmplw cr1, r4, r6       # compare low words (unsigned)
+        mfcr r0
+# Same trick as in __i64_ucmp	
+        rlwinm r3, r0, 0, 0, 1
+        srawi r3, r3, 24
+        rlwinm r4, r0, 4, 0, 1
+        srawi r4, r4, 28
+	add     r3, r3, r4
+        blr
+        .type __i64_scmp, @function
+        .size __i64_scmp, .-__i64_scmp
+
+### Shifts	
+	
+# On PowerPC, shift instructions with amount mod 64 >= 32 return 0
+
+        .balign 16
+        .globl __i64_shl
+__i64_shl:
+# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32))
+# lo = lo << amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0
+# if 32 <= amount < 64:
+#    lo << amount == 0
+#    (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        slw r3, r3, r5
+        srw r0, r4, r6
+        or r3, r3, r0
+        slw r0, r4, r7
+        or r3, r3, r0
+        slw r4, r4, r5
+        blr
+        .type __i64_shl, @function
+        .size __i64_shl, .-__i64_shl
+        
+        .balign 16
+        .globl __i64_shr
+__i64_shr:
+# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32))
+# hi = hi >> amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0
+# if 32 <= amount < 64:
+#    hi >> amount == 0
+#    (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        srw r4, r4, r5
+        slw r0, r3, r6
+        or r4, r4, r0
+        srw r0, r3, r7
+        or r4, r4, r0
+        srw r3, r3, r5
+        blr
+        .type __i64_shr, @function
+        .size __i64_shr, .-__i64_shr
+	
+        .balign 16
+        .globl __i64_sar
+__i64_sar:
+        andi. r5, r5, 63        # take amount modulo 64
+        cmpwi r5, 32
+        bge 1f                  # amount < 32?
+        subfic r6, r5, 32       # r6 = 32 - amount
+        srw r4, r4, r5          # RH = XH >>s amount
+        slw r0, r3, r6          # RL = XL >>u amount | XH << (32 - amount)
+        or r4, r4, r0
+        sraw r3, r3, r5
+        blr
+1:      addi r6, r5, -32        # amount >= 32
+        sraw r4, r3, r6         # RL = XH >>s (amount - 32)
+        srawi r3, r3, 31        # RL = sign extension of XH
+        blr
+        .type __i64_sar, @function
+        .size __i64_sar, .-__i64_sar
+
+### Conversion from unsigned long to double float	
+
+        .balign 16
+        .globl __i64_utod
+__i64_utod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL as a double
+        lis r5, 0x4530
+        stw r5, 0(r1)
+        stw r3, 4(r1)
+        stw r5, 8(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __i64_utod, @function
+        .size __i64_utod, .-__i64_utod
+	
+### Conversion from signed long to double float	
+
+        .balign 16
+        .globl __i64_stod
+__i64_stod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL (unsigned) as a double
+        lis r5, 0x4530
+        lis r6, 0x8000
+        stw r5, 0(r1)
+        add r3, r3, r6
+        stw r3, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH (signed) * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __i64_stod, @function
+        .size __i64_stod, .-__i64_stod
+
+### Conversion from double float to unsigned long	
+
+        .balign 16
+        .globl __i64_dtou
+__i64_dtou:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        cmpwi r3, 0             # is double < 0?
+        blt 1f                  # then it converts to 0
+  # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+  # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, double is < 1.0
+        blt 1f
+        cmpwi r5, 12            # if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        cmpwi r5, 0
+        blt 3f
+        b __i64_shl             # if EXP >= 0, shift left by EXP
+3:      subfic r5, r5, 0
+        b __i64_shr             # if EXP < 0, shift right by -EXP
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      li r3, -1               # result is MAX_UINT
+        li r4, -1
+        blr
+        .type __i64_dtou, @function
+        .size __i64_dtou, .-__i64_dtou
+
+### Conversion from double float to signed long	
+
+        .balign 16
+        .globl __i64_dtos
+__i64_dtos:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        srawi r10, r3, 31       # save sign of double in r10
+ # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+ # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, abs(double) is < 1.0
+        blt 1f
+        cmpwi r5, 11            # if EXP >= 63 - 52, abs(double) is >= 2^63
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        mflr r9                 # save retaddr in r9
+        cmpwi r5, 0
+        blt 3f
+        bl __i64_shl            # if EXP >= 0, shift left by EXP
+        b 4f
+3:      subfic r5, r5, 0
+        bl __i64_shr            # if EXP < 0, shift right by -EXP
+  # apply sign to result	
+4:      mtlr r9
+        xor r4, r4, r10
+        xor r3, r3, r10
+        subfc r4, r10, r4
+        subfe r3, r10, r3
+        blr
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      cmpwi r10, 0            # result is MAX_SINT or MIN_SINT
+        bge 5f                  # depending on sign
+        li r4, -1               # result is MAX_SINT =  0x7FFF_FFFF
+        srwi r3, r4, 1
+        blr
+5:      lis r3, 0x8000          # result is MIN_SINT = 0x8000_0000
+        li r4, 0
+        blr
+        .type __i64_dtos, @function
+        .size __i64_dtos, .-__i64_dtos
diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c
new file mode 100644
index 0000000..11adce3
--- /dev/null
+++ b/runtime/test/test_int64.c
@@ -0,0 +1,238 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*               The Compcert verified compiler                               */
+/*                                                                            */
+/*           Xavier Leroy, INRIA Paris-Rocquencourt                           */
+/*                                                                            */
+/* Copyright (c) 2013 Institut National de Recherche en Informatique et       */
+/*  en Automatique.                                                           */
+/*                                                                            */
+/* Redistribution and use in source and binary forms, with or without         */
+/* modification, are permitted provided that the following conditions are met:*/
+/*     * Redistributions of source code must retain the above copyright       */
+/*       notice, this list of conditions and the following disclaimer.        */
+/*     * Redistributions in binary form must reproduce the above copyright    */
+/*       notice, this list of conditions and the following disclaimer in the  */
+/*       documentation and/or other materials provided with the distribution. */
+/*     * Neither the name of the <organization> nor the                       */
+/*       names of its contributors may be used to endorse or promote products */
+/*       derived from this software without specific prior written permission.*/
+/*                                                                            */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS        */
+/* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT          */
+/* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR      */
+/* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT          */
+/* HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,           */
+/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,        */
+/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR         */
+/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF     */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING       */
+/* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS         */
+/* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.               */
+/*                                                                            */
+/* ************************************************************************** */
+
+/* Differential testing of 64-bit integer operations */
+/* This file is to be compiled by a C compiler other than CompCert C */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+extern s64 __i64_neg(s64 x);
+extern s64 __i64_add(s64 x, s64 y);
+extern s64 __i64_sub(s64 x, s64 y);
+extern s64 __i64_mul(s64 x, s64 y);
+extern u64 __i64_udiv(u64 x, u64 y);
+extern u64 __i64_umod(u64 x, u64 y);
+extern s64 __i64_sdiv(s64 x, s64 y);
+extern s64 __i64_smod(s64 x, s64 y);
+
+extern u64 __i64_shl(u64 x, unsigned amount);
+extern u64 __i64_shr(u64 x, unsigned amount);
+extern s64 __i64_sar(s64 x, unsigned amount);
+
+extern int __i64_ucmp(u64 x, u64 y);
+extern int __i64_scmp(s64 x, s64 y);
+
+extern double __i64_utod(u64 x);
+extern double __i64_stod(s64 x);
+extern u64 __i64_dtou(double d);
+extern s64 __i64_dtos(double d);
+
+static u64 rnd64(void)
+{
+  static u64 seed = 0;
+  seed = seed * 6364136223846793005ULL + 1442695040888963407ULL;
+  return seed;
+}
+
+static int error = 0;
+
+static void test1(u64 x, u64 y)
+{
+  u64 z, uy;
+  s64 t, sy;
+  int i;
+  double f, g;
+
+  z = __i64_neg(x);
+  if (z != -x) 
+    error++, printf("- %016llx = %016llx, expected %016llx\n", x, z, -x);
+  z = __i64_add(x, y);
+  if (z != x + y) 
+    error++, printf("%016llx + %016llx = %016llx, expected %016llx\n", x, y, z, x + y);
+
+  z = __i64_sub(x, y);
+  if (z != x - y) 
+    error++, printf("%016llx - %016llx = %016llx, expected %016llx\n", x, y, z, x - y);
+
+  z = __i64_mul(x, y);
+  if (z != x * y) 
+    error++, printf("%016llx * %016llx = %016llx, expected %016llx\n", x, y, z, x * y);
+
+  if (y != 0) {
+
+  z = __i64_udiv(x, y);
+  if (z != x / y) 
+    error++, printf("%llu /u %llu = %llu, expected %llu\n", x, y, z, x / y);
+
+  z = __i64_umod(x, y);
+  if (z != x % y) 
+    error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, y, z, x % y);
+
+  }
+
+  if (y != 0 && !(x == 0x800000000000LLU && y == -1)) {
+
+  t = __i64_sdiv(x, y);
+  if (t != (s64) x / (s64) y) 
+    error++, printf("%lld /s %lld = %lld, expected %lld\n", x, y, t, (s64) x / (s64) y);
+
+  t = __i64_smod(x, y);
+  if (t != (s64) x % (s64) y) 
+    error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, y, t, (s64) x % (s64) y);
+
+  }
+
+  /* Test division with small (32-bit) divisors */
+  uy = y >> 32;
+  sy = (s64)y >> 32;
+
+  if (uy != 0) {
+
+  z = __i64_udiv(x, uy);
+  if (z != x / uy) 
+    error++, printf("%llu /u %llu = %llu, expected %llu\n", x, uy, z, x / uy);
+
+  z = __i64_umod(x, uy);
+  if (z != x % uy) 
+    error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, uy, z, x % uy);
+
+  }
+
+  if (sy != 0 && !(x == 0x800000000000LLU && sy == -1)) {
+
+  t = __i64_sdiv(x, sy);
+  if (t != (s64) x / sy) 
+    error++, printf("%lld /s %lld = %lld, expected %lld\n", x, sy, t, (s64) x / sy);
+
+  t = __i64_smod(x, sy);
+  if (t != (s64) x % sy) 
+    error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, sy, t, (s64) x % sy);
+
+  }
+
+  i = y & 63;
+
+  z = __i64_shl(x, i);
+  if (z != x << i) 
+    error++, printf("%016llx << %d = %016llx, expected %016llx\n", x, i, z, x << i);
+
+  z = __i64_shr(x, i);
+  if (z != x >> i) 
+    error++, printf("%016llx >>u %d = %016llx, expected %016llx\n", x, i, z, x >> i);
+
+  t = __i64_sar(x, i);
+  if (t != (s64) x >> i) 
+    error++, printf("%016llx >>s %d = %016llx, expected %016llx\n", x, i, t, (s64) x >> i);
+
+  i = __i64_ucmp(x, y);
+  if (x == y) {
+    if (! (i == 0))
+      error++, printf("ucmp(%016llx, %016llx) = %d, expected 0\n", x, y, i);
+  }
+  else if (x < y) {
+    if (! (i < 0))
+      error++, printf("ucmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i);
+  } else {
+    if (! (i > 0))
+      error++, printf("ucmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i);
+  }
+
+  i = __i64_scmp(x, y);
+  if (x == y) {
+    if (! (i == 0))
+      error++, printf("scmp(%016llx, %016llx) = %d, expected 0\n", x, y, i);
+  }
+  else if ((s64)x < (s64)y) {
+    if (! (i < 0))
+      error++, printf("scmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i);
+  } else {
+    if (! (i > 0))
+      error++, printf("scmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i);
+  }
+
+  f = __i64_utod(x);
+  g = (double) x;
+  if (f != g)
+    error++, printf("(double) %llu (u) = %a, expected %a\n", x, f, g);
+
+  f = __i64_stod(x);
+  g = (double) (s64) x;
+  if (f != g)
+    error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g);
+
+  f = ((double) x) * 0.0001;
+  z = __i64_dtou(f);
+  if (z != (u64) f)
+    error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f);
+
+  f = ((double) (s64) x) * 0.0001;
+  t = __i64_dtos(f);
+  if (t != (s64) f)
+    error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f);
+}
+
+#define NSPECIFIC 8
+
+unsigned long long specific[NSPECIFIC] = {
+  0, 1, -1, 0x7FFFFFFFULL, 0x80000000ULL, 0xFFFFFFFFULL,
+  0x7FFFFFFFFFFFULL, 0x8000000000000000ULL
+};
+
+int main()
+{
+  int i, j;
+
+  /* Some specific values */
+  for (i = 0; i < NSPECIFIC; i++)
+    for (j = 0; j < NSPECIFIC; j++)
+      test1(specific[i], specific[j]);
+
+  /* Random testing */
+  for (i = 0; i < 50; i++) {
+    for (j = 0; j < 1000000; j++)
+      test1(rnd64(), rnd64());
+    printf("."); fflush(stdout);
+  }
+  printf("\n");
+  if (error == 0)
+    printf ("Test passed\n");
+  else
+    printf ("TEST FAILED, %d error(s) detected\n", error);
+  return 0;
+}
+
diff --git a/test/c/Makefile b/test/c/Makefile
index 6839ac4..a11ab69 100644
--- a/test/c/Makefile
+++ b/test/c/Makefile
@@ -13,7 +13,7 @@ TIME=xtime -o /dev/null -mintime 1.0     # Xavier's hack
 PROGS=fib integr qsort fft sha1 aes almabench lists \
   binarytrees fannkuch knucleotide mandelbrot nbody \
   nsieve nsievebits spectral vmach \
-  bisect chomp perlin floats floats-basics
+  bisect chomp perlin siphash24 floats floats-basics
 
 PROGS_INTERP=floats floats-basics
 
diff --git a/test/c/Results/siphash24 b/test/c/Results/siphash24
new file mode 100644
index 0000000..a99b4f1
--- /dev/null
+++ b/test/c/Results/siphash24
@@ -0,0 +1 @@
+test vectors ok
diff --git a/test/c/siphash24.c b/test/c/siphash24.c
new file mode 100644
index 0000000..0ed1841
--- /dev/null
+++ b/test/c/siphash24.c
@@ -0,0 +1,255 @@
+/*
+   SipHash reference C implementation
+
+   Written in 2012 by 
+   Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+   Daniel J. Bernstein <djb@cr.yp.to>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+
+#include <stdio.h>
+#include <string.h>
+typedef unsigned long long u64;
+typedef unsigned long u32;
+typedef unsigned char u8;
+
+#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define U32TO8_LE(p, v)         \
+    (p)[0] = (u8)((v)      ); (p)[1] = (u8)((v) >>  8); \
+    (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24);
+
+#define U64TO8_LE(p, v)         \
+  U32TO8_LE((p),     (u32)((v)      ));   \
+  U32TO8_LE((p) + 4, (u32)((v) >> 32));
+
+#define U8TO64_LE(p) \
+  (((u64)((p)[0])      ) | \
+   ((u64)((p)[1]) <<  8) | \
+   ((u64)((p)[2]) << 16) | \
+   ((u64)((p)[3]) << 24) | \
+   ((u64)((p)[4]) << 32) | \
+   ((u64)((p)[5]) << 40) | \
+   ((u64)((p)[6]) << 48) | \
+   ((u64)((p)[7]) << 56))
+
+#define SIPROUND            \
+  do {              \
+    v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \
+    v2 += v3; v3=ROTL(v3,16); v3 ^= v2;     \
+    v0 += v3; v3=ROTL(v3,21); v3 ^= v0;     \
+    v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \
+  } while(0)
+
+/* SipHash-2-4 */
+int crypto_auth( unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k )
+{
+  /* "somepseudorandomlygeneratedbytes" */
+  u64 v0 = 0x736f6d6570736575ULL;
+  u64 v1 = 0x646f72616e646f6dULL;
+  u64 v2 = 0x6c7967656e657261ULL;
+  u64 v3 = 0x7465646279746573ULL;
+  u64 b;
+  u64 k0 = U8TO64_LE( k );
+  u64 k1 = U8TO64_LE( k + 8 );
+  u64 m;
+  const u8 *end = in + inlen - ( inlen % sizeof( u64 ) );
+  const int left = inlen & 7;
+  b = ( ( u64 )inlen ) << 56;
+  v3 ^= k1;
+  v2 ^= k0;
+  v1 ^= k1;
+  v0 ^= k0;
+
+  for ( ; in != end; in += 8 )
+  {
+    m = U8TO64_LE( in );
+#ifdef DEBUG
+    printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 );
+    printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 );
+    printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 );
+    printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 );
+    printf( "(%3d) compress %08x %08x\n", ( int )inlen, ( u32 )( m >> 32 ), ( u32 )m );
+#endif
+    v3 ^= m;
+    SIPROUND;
+    SIPROUND;
+    v0 ^= m;
+  }
+
+  switch( left )
+  {
+  case 7: b |= ( ( u64 )in[ 6] )  << 48;
+
+  case 6: b |= ( ( u64 )in[ 5] )  << 40;
+
+  case 5: b |= ( ( u64 )in[ 4] )  << 32;
+
+  case 4: b |= ( ( u64 )in[ 3] )  << 24;
+
+  case 3: b |= ( ( u64 )in[ 2] )  << 16;
+
+  case 2: b |= ( ( u64 )in[ 1] )  <<  8;
+
+  case 1: b |= ( ( u64 )in[ 0] ); break;
+
+  case 0: break;
+  }
+
+#ifdef DEBUG
+  printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 );
+  printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 );
+  printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 );
+  printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 );
+  printf( "(%3d) padding   %08x %08x\n", ( int )inlen, ( u32 )( b >> 32 ), ( u32 )b );
+#endif
+  v3 ^= b;
+  SIPROUND;
+  SIPROUND;
+  v0 ^= b;
+#ifdef DEBUG
+  printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 );
+  printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 );
+  printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 );
+  printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 );
+#endif
+  v2 ^= 0xff;
+  SIPROUND;
+  SIPROUND;
+  SIPROUND;
+  SIPROUND;
+  b = v0 ^ v1 ^ v2  ^ v3;
+  U64TO8_LE( out, b );
+  return 0;
+}
+
+/*
+   SipHash-2-4 output with
+   k = 00 01 02 ...
+   and
+   in = (empty string)
+   in = 00 (1 byte)
+   in = 00 01 (2 bytes)
+   in = 00 01 02 (3 bytes)
+   ...
+   in = 00 01 02 ... 3e (63 bytes)
+*/
+u8 vectors[64][8] =
+{
+  { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
+  { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
+  { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
+  { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
+  { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
+  { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
+  { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
+  { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
+  { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
+  { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
+  { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
+  { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
+  { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
+  { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
+  { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
+  { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
+  { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
+  { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
+  { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
+  { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
+  { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
+  { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
+  { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
+  { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
+  { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
+  { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
+  { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
+  { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
+  { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
+  { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
+  { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
+  { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
+  { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
+  { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
+  { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
+  { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
+  { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
+  { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
+  { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
+  { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
+  { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
+  { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
+  { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
+  { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
+  { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
+  { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
+  { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
+  { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
+  { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
+  { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
+  { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
+  { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
+  { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
+  { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
+  { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
+  { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
+  { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
+  { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
+  { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
+  { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
+  { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
+  { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
+  { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
+  { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, }
+};
+
+
+int test_vectors()
+{
+#define MAXLEN 64
+  u8 in[MAXLEN], out[8], k[16];
+  int i;
+  int ok = 1;
+
+  for( i = 0; i < 16; ++i ) k[i] = i;
+
+  for( i = 0; i < MAXLEN; ++i )
+  {
+    in[i] = i;
+    crypto_auth( out, in, i, k );
+
+    if ( memcmp( out, vectors[i], 8 ) )
+    {
+      printf( "test vector failed for %d bytes\n", i );
+      ok = 0;
+    }
+  }
+
+  return ok;
+}
+
+u8 testdata[100] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 34, 56, 78, 90 };
+
+int speed_test(void)
+{
+  u8 out[8], k[16];
+  int i;
+
+  for(i = 0; i < 16; ++i ) k[i] = i;
+  for(i = 0; i < 10000000; i++) {
+    crypto_auth(out, testdata, 100, k);
+  }
+  return out[0];
+}
+
+int main()
+{
+  if ( test_vectors() ) printf( "test vectors ok\n" );
+  (void) speed_test();
+  return 0;
+}
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 48e3b97..2974444 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -12,7 +12,7 @@ TESTS=attribs1 bitfields1 bitfields2 bitfields3 bitfields4 \
   expr1 expr6 funptr2 initializers volatile1 volatile2 volatile3 \
   funct3 expr5 struct7 struct8 struct11 casts1 casts2 char1 \
   sizeof1 sizeof2 packedstruct1 packedstruct2 \
-  instrsel bool compar switch
+  instrsel bool compar switch int64
 
 # Other tests: should compile to .s without errors (but expect warnings)
 EXTRAS=annot1 commaprec expr2 expr3 expr4 extern1 funct2 funptr1 init1 \
diff --git a/test/regression/Results/int64 b/test/regression/Results/int64
new file mode 100644
index 0000000..15e4ecc
--- /dev/null
+++ b/test/regression/Results/int64
@@ -0,0 +1,3780 @@
+x = 0
+y = 0
+-x = 0
+x + y = 0
+x - y = 0
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 0
+x ^ y = 0
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = eq
+x cmps y = eq
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+
+x = 0
+y = 1
+-x = 0
+x + y = 1
+x - y = ffffffffffffffff
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 1
+x ^ y = 1
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+
+x = 0
+y = ffffffffffffffff
+-x = 0
+x + y = ffffffffffffffff
+x - y = 1
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = ffffffffffffffff
+x ^ y = ffffffffffffffff
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+
+x = 0
+y = 7fffffff
+-x = 0
+x + y = 7fffffff
+x - y = ffffffff80000001
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 7fffffff
+x ^ y = 7fffffff
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+
+x = 0
+y = 80000000
+-x = 0
+x + y = 80000000
+x - y = ffffffff80000000
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 80000000
+x ^ y = 80000000
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+
+x = 0
+y = 14057b7ef767814f
+-x = 0
+x + y = 14057b7ef767814f
+x - y = ebfa848108987eb1
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 14057b7ef767814f
+x ^ y = 14057b7ef767814f
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+
+x = 1a08ee1184ba6d32
+y = 0
+-x = e5f711ee7b4592ce
+x + y = 1a08ee1184ba6d32
+x - y = 1a08ee1184ba6d32
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = e5f711ee7b4592cd
+x & y = 0
+x | y = 1a08ee1184ba6d32
+x ^ y = 1a08ee1184ba6d32
+x << i = 1a08ee1184ba6d32
+x >>u i = 1a08ee1184ba6d32
+x >>s i = 1a08ee1184ba6d32
+x cmpu y = gt
+x cmps y = gt
+utod x = 43ba08ee1184ba6d
+dtou f = aa9f48f29aaf
+stod x = 43ba08ee1184ba6d
+dtos f = aa9f48f29aaf
+
+x = 1
+y = 0
+-x = ffffffffffffffff
+x + y = 1
+x - y = 1
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = fffffffffffffffe
+x & y = 0
+x | y = 1
+x ^ y = 1
+x << i = 1
+x >>u i = 1
+x >>s i = 1
+x cmpu y = gt
+x cmps y = gt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+
+x = 1
+y = 1
+-x = ffffffffffffffff
+x + y = 2
+x - y = 0
+x * y = 1
+x /u y = 1
+x %u y = 0
+x /s y = 1
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = fffffffffffffffe
+x & y = 1
+x | y = 1
+x ^ y = 0
+x << i = 2
+x >>u i = 0
+x >>s i = 0
+x cmpu y = eq
+x cmps y = eq
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+
+x = 1
+y = ffffffffffffffff
+-x = ffffffffffffffff
+x + y = 0
+x - y = 2
+x * y = ffffffffffffffff
+x /u y = 0
+x %u y = 1
+x /s y = ffffffffffffffff
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 1
+x /s y3 = ffffffffffffffff
+x %s y3 = 0
+~x = fffffffffffffffe
+x & y = 1
+x | y = ffffffffffffffff
+x ^ y = fffffffffffffffe
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+
+x = 1
+y = 7fffffff
+-x = ffffffffffffffff
+x + y = 80000000
+x - y = ffffffff80000002
+x * y = 7fffffff
+x /u y = 0
+x %u y = 1
+x /s y = 0
+x %s y = 1
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = fffffffffffffffe
+x & y = 1
+x | y = 7fffffff
+x ^ y = 7ffffffe
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+
+x = 1
+y = 80000000
+-x = ffffffffffffffff
+x + y = 80000001
+x - y = ffffffff80000001
+x * y = 80000000
+x /u y = 0
+x %u y = 1
+x /s y = 0
+x %s y = 1
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = fffffffffffffffe
+x & y = 0
+x | y = 80000001
+x ^ y = 80000001
+x << i = 1
+x >>u i = 1
+x >>s i = 1
+x cmpu y = lt
+x cmps y = lt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+
+x = 1
+y = 9af678222e728119
+-x = ffffffffffffffff
+x + y = 9af678222e72811a
+x - y = 650987ddd18d7ee8
+x * y = 9af678222e728119
+x /u y = 0
+x %u y = 1
+x /s y = 0
+x %s y = 1
+x /u y2 = 0
+x %u y2 = 1
+x /s y3 = 0
+x %s y3 = 1
+~x = fffffffffffffffe
+x & y = 1
+x | y = 9af678222e728119
+x ^ y = 9af678222e728118
+x << i = 2000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+
+x = 66b61ae97f2099b4
+y = 1
+-x = 9949e51680df664c
+x + y = 66b61ae97f2099b5
+x - y = 66b61ae97f2099b3
+x * y = 66b61ae97f2099b4
+x /u y = 66b61ae97f2099b4
+x %u y = 0
+x /s y = 66b61ae97f2099b4
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = 9949e51680df664b
+x & y = 0
+x | y = 66b61ae97f2099b5
+x ^ y = 66b61ae97f2099b5
+x << i = cd6c35d2fe413368
+x >>u i = 335b0d74bf904cda
+x >>s i = 335b0d74bf904cda
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d9ad86ba5fc826
+dtou f = 2a1210c1f1b75
+stod x = 43d9ad86ba5fc826
+dtos f = 2a1210c1f1b75
+
+x = ffffffffffffffff
+y = 0
+-x = 1
+x + y = ffffffffffffffff
+x - y = ffffffffffffffff
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = 0
+x & y = 0
+x | y = ffffffffffffffff
+x ^ y = ffffffffffffffff
+x << i = ffffffffffffffff
+x >>u i = ffffffffffffffff
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+
+x = ffffffffffffffff
+y = 1
+-x = 1
+x + y = 0
+x - y = fffffffffffffffe
+x * y = ffffffffffffffff
+x /u y = ffffffffffffffff
+x %u y = 0
+x /s y = ffffffffffffffff
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = 0
+x & y = 1
+x | y = ffffffffffffffff
+x ^ y = fffffffffffffffe
+x << i = fffffffffffffffe
+x >>u i = 7fffffffffffffff
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+
+x = ffffffffffffffff
+y = ffffffffffffffff
+-x = 1
+x + y = fffffffffffffffe
+x - y = 0
+x * y = 1
+x /u y = 1
+x %u y = 0
+x /s y = 1
+x %s y = 0
+x /u y2 = 100000001
+x %u y2 = 0
+x /s y3 = 1
+x %s y3 = 0
+~x = 0
+x & y = ffffffffffffffff
+x | y = ffffffffffffffff
+x ^ y = 0
+x << i = 8000000000000000
+x >>u i = 1
+x >>s i = ffffffffffffffff
+x cmpu y = eq
+x cmps y = eq
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+
+x = ffffffffffffffff
+y = 7fffffff
+-x = 1
+x + y = 7ffffffe
+x - y = ffffffff80000000
+x * y = ffffffff80000001
+x /u y = 200000004
+x %u y = 3
+x /s y = 0
+x %s y = ffffffffffffffff
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = 0
+x & y = 7fffffff
+x | y = ffffffffffffffff
+x ^ y = ffffffff80000000
+x << i = 8000000000000000
+x >>u i = 1
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+
+x = ffffffffffffffff
+y = 80000000
+-x = 1
+x + y = 7fffffff
+x - y = ffffffff7fffffff
+x * y = ffffffff80000000
+x /u y = 1ffffffff
+x %u y = 7fffffff
+x /s y = 0
+x %s y = ffffffffffffffff
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = 0
+x & y = 80000000
+x | y = ffffffffffffffff
+x ^ y = ffffffff7fffffff
+x << i = ffffffffffffffff
+x >>u i = ffffffffffffffff
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+
+x = ffffffffffffffff
+y = 62354cda6226d1f3
+-x = 1
+x + y = 62354cda6226d1f2
+x - y = 9dcab3259dd92e0c
+x * y = 9dcab3259dd92e0d
+x /u y = 2
+x %u y = 3b95664b3bb25c19
+x /s y = 0
+x %s y = ffffffffffffffff
+x /u y2 = 29b51243c
+x %u y2 = 2db954e7
+x /s y3 = 0
+x %s y3 = ffffffffffffffff
+~x = 0
+x & y = 62354cda6226d1f3
+x | y = ffffffffffffffff
+x ^ y = 9dcab3259dd92e0c
+x << i = fff8000000000000
+x >>u i = 1fff
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+
+x = 8f947f36d0d0f606
+y = ffffffffffffffff
+-x = 706b80c92f2f09fa
+x + y = 8f947f36d0d0f605
+x - y = 8f947f36d0d0f607
+x * y = 706b80c92f2f09fa
+x /u y = 0
+x %u y = 8f947f36d0d0f606
+x /s y = 706b80c92f2f09fa
+x %s y = 0
+x /u y2 = 8f947f37
+x %u y2 = 6065753d
+x /s y3 = 706b80c92f2f09fa
+x %s y3 = 0
+~x = 706b80c92f2f09f9
+x & y = 8f947f36d0d0f606
+x | y = ffffffffffffffff
+x ^ y = 706b80c92f2f09f9
+x << i = 0
+x >>u i = 1
+x >>s i = ffffffffffffffff
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e1f28fe6da1a1f
+dtou f = 3acf760d70f97
+stod x = c3dc1ae0324bcbc2
+dtos f = fffd1f3ea60ffecc
+
+x = 7fffffff
+y = 0
+-x = ffffffff80000001
+x + y = 7fffffff
+x - y = 7fffffff
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff80000000
+x & y = 0
+x | y = 7fffffff
+x ^ y = 7fffffff
+x << i = 7fffffff
+x >>u i = 7fffffff
+x >>s i = 7fffffff
+x cmpu y = gt
+x cmps y = gt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+
+x = 7fffffff
+y = 1
+-x = ffffffff80000001
+x + y = 80000000
+x - y = 7ffffffe
+x * y = 7fffffff
+x /u y = 7fffffff
+x %u y = 0
+x /s y = 7fffffff
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff80000000
+x & y = 1
+x | y = 7fffffff
+x ^ y = 7ffffffe
+x << i = fffffffe
+x >>u i = 3fffffff
+x >>s i = 3fffffff
+x cmpu y = gt
+x cmps y = gt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+
+x = 7fffffff
+y = ffffffffffffffff
+-x = ffffffff80000001
+x + y = 7ffffffe
+x - y = 80000000
+x * y = ffffffff80000001
+x /u y = 0
+x %u y = 7fffffff
+x /s y = ffffffff80000001
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 7fffffff
+x /s y3 = ffffffff80000001
+x %s y3 = 0
+~x = ffffffff80000000
+x & y = 7fffffff
+x | y = ffffffffffffffff
+x ^ y = ffffffff80000000
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+
+x = 7fffffff
+y = 7fffffff
+-x = ffffffff80000001
+x + y = fffffffe
+x - y = 0
+x * y = 3fffffff00000001
+x /u y = 1
+x %u y = 0
+x /s y = 1
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff80000000
+x & y = 7fffffff
+x | y = 7fffffff
+x ^ y = 0
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = eq
+x cmps y = eq
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+
+x = 7fffffff
+y = 80000000
+-x = ffffffff80000001
+x + y = ffffffff
+x - y = ffffffffffffffff
+x * y = 3fffffff80000000
+x /u y = 0
+x %u y = 7fffffff
+x /s y = 0
+x %s y = 7fffffff
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff80000000
+x & y = 0
+x | y = ffffffff
+x ^ y = ffffffff
+x << i = 7fffffff
+x >>u i = 7fffffff
+x >>s i = 7fffffff
+x cmpu y = lt
+x cmps y = lt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+
+x = 7fffffff
+y = 144093704fadba5d
+-x = ffffffff80000001
+x + y = 14409370cfadba5c
+x - y = ebbf6c90305245a2
+x * y = 139649be305245a3
+x /u y = 0
+x %u y = 7fffffff
+x /s y = 0
+x %s y = 7fffffff
+x /u y2 = 6
+x %u y2 = 67c8b5f
+x /s y3 = 6
+x %s y3 = 67c8b5f
+~x = ffffffff80000000
+x & y = 4fadba5d
+x | y = 144093707fffffff
+x ^ y = 14409370305245a2
+x << i = fffffffe0000000
+x >>u i = 3
+x >>s i = 3
+x cmpu y = lt
+x cmps y = lt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+
+x = 5b21778e3c8666a8
+y = 7fffffff
+-x = a4de8871c3799958
+x + y = 5b21778ebc8666a7
+x - y = 5b21778dbc8666a9
+x * y = c321bbc5c3799958
+x /u y = b642ef1d
+x %u y = 72c955c5
+x /s y = b642ef1d
+x %s y = 72c955c5
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = a4de8871c3799957
+x & y = 3c8666a8
+x | y = 5b21778e7fffffff
+x ^ y = 5b21778e43799957
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d6c85de38f219a
+dtou f = 2553bfeb9de93
+stod x = 43d6c85de38f219a
+dtos f = 2553bfeb9de93
+
+x = 80000000
+y = 0
+-x = ffffffff80000000
+x + y = 80000000
+x - y = 80000000
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff7fffffff
+x & y = 0
+x | y = 80000000
+x ^ y = 80000000
+x << i = 80000000
+x >>u i = 80000000
+x >>s i = 80000000
+x cmpu y = gt
+x cmps y = gt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+
+x = 80000000
+y = 1
+-x = ffffffff80000000
+x + y = 80000001
+x - y = 7fffffff
+x * y = 80000000
+x /u y = 80000000
+x %u y = 0
+x /s y = 80000000
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff7fffffff
+x & y = 0
+x | y = 80000001
+x ^ y = 80000001
+x << i = 100000000
+x >>u i = 40000000
+x >>s i = 40000000
+x cmpu y = gt
+x cmps y = gt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+
+x = 80000000
+y = ffffffffffffffff
+-x = ffffffff80000000
+x + y = 7fffffff
+x - y = 80000001
+x * y = ffffffff80000000
+x /u y = 0
+x %u y = 80000000
+x /s y = ffffffff80000000
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 80000000
+x /s y3 = ffffffff80000000
+x %s y3 = 0
+~x = ffffffff7fffffff
+x & y = 80000000
+x | y = ffffffffffffffff
+x ^ y = ffffffff7fffffff
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+
+x = 80000000
+y = 7fffffff
+-x = ffffffff80000000
+x + y = ffffffff
+x - y = 1
+x * y = 3fffffff80000000
+x /u y = 1
+x %u y = 1
+x /s y = 1
+x %s y = 1
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff7fffffff
+x & y = 0
+x | y = ffffffff
+x ^ y = ffffffff
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = gt
+x cmps y = gt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+
+x = 80000000
+y = 80000000
+-x = ffffffff80000000
+x + y = 100000000
+x - y = 0
+x * y = 4000000000000000
+x /u y = 1
+x %u y = 0
+x /s y = 1
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = ffffffff7fffffff
+x & y = 80000000
+x | y = 80000000
+x ^ y = 0
+x << i = 80000000
+x >>u i = 80000000
+x >>s i = 80000000
+x cmpu y = eq
+x cmps y = eq
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+
+x = 80000000
+y = 7b985bc1e7bce4d7
+-x = ffffffff80000000
+x + y = 7b985bc267bce4d7
+x - y = 8467a43e98431b29
+x * y = f3de726b80000000
+x /u y = 0
+x %u y = 80000000
+x /s y = 0
+x %s y = 80000000
+x /u y2 = 1
+x %u y2 = 467a43f
+x /s y3 = 1
+x %s y3 = 467a43f
+~x = ffffffff7fffffff
+x & y = 80000000
+x | y = 7b985bc1e7bce4d7
+x ^ y = 7b985bc167bce4d7
+x << i = 40000000000000
+x >>u i = 100
+x >>s i = 100
+x cmpu y = lt
+x cmps y = lt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+
+x = 7252e9376e45641a
+y = 80000000
+-x = 8dad16c891ba9be6
+x + y = 7252e937ee45641a
+x - y = 7252e936ee45641a
+x * y = b722b20d00000000
+x /u y = e4a5d26e
+x %u y = 6e45641a
+x /s y = e4a5d26e
+x %s y = 6e45641a
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+~x = 8dad16c891ba9be5
+x & y = 0
+x | y = 7252e937ee45641a
+x ^ y = 7252e937ee45641a
+x << i = 7252e9376e45641a
+x >>u i = 7252e9376e45641a
+x >>s i = 7252e9376e45641a
+x cmpu y = gt
+x cmps y = gt
+utod x = 43dc94ba4ddb9159
+dtou f = 2ed3ba0c0e099
+stod x = 43dc94ba4ddb9159
+dtos f = 2ed3ba0c0e099
+
+x = a220229ec164ffe1
+y = 5d7d4da4cb0e1adc
+-x = 5ddfdd613e9b001f
+x + y = ff9d70438c731abd
+x - y = 44a2d4f9f656e505
+x * y = 52b323a3de16bf5c
+x /u y = 1
+x %u y = 44a2d4f9f656e505
+x /s y = ffffffffffffffff
+x %s y = ff9d70438c731abd
+x /u y2 = 1bbf1cb77
+x %u y2 = c30dca5
+x /s y3 = fffffffefef21c87
+x %s y3 = ffffffffa2cb1e65
+~x = 5ddfdd613e9b001e
+x & y = 200084c1041ac0
+x | y = ff7d6fbecb6efffd
+x ^ y = ff5d6f3a0a6ae53d
+x << i = ec164ffe10000000
+x >>u i = a220229ec
+x >>s i = fffffffa220229ec
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e4440453d82ca0
+dtou f = 42681802c45c2
+stod x = c3d777f7584fa6c0
+dtos f = fffd98c8c56534f8
+
+x = c73aa0d9a415dfb
+y = 18e9107ab99b8b6e
+-x = f38c55f265bea205
+x + y = 255cba8853dce969
+x - y = f38a9992e0a5d28d
+x * y = 9c3790b61016aada
+x /u y = 0
+x %u y = c73aa0d9a415dfb
+x /s y = 0
+x %s y = c73aa0d9a415dfb
+x /u y2 = 7ff714a4
+x %u y2 = 3f347d3
+x /s y3 = 7ff714a4
+x %s y3 = 3f347d3
+~x = f38c55f265bea204
+x & y = 86100089801096a
+x | y = 1cfbba7fbbdbdfff
+x ^ y = 149aba7723dad695
+x << i = 577ec00000000000
+x >>u i = 31ce
+x >>s i = 31ce
+x cmpu y = lt
+x cmps y = lt
+utod x = 43a8e7541b3482bc
+dtou f = 519aad330d8d
+stod x = 43a8e7541b3482bc
+dtos f = 519aad330d8d
+
+x = e9bcd26890f095a5
+y = 329cb23ce0f7aa50
+-x = 16432d976f0f6a5b
+x + y = 1c5984a571e83ff5
+x - y = b720202baff8eb55
+x * y = 9b6d58a9d0c15590
+x /u y = 4
+x %u y = 1f4a09750d11ec65
+x /s y = 0
+x %s y = e9bcd26890f095a5
+x /u y2 = 49e436778
+x %u y2 = 1e12e585
+x /s y3 = ffffffff8f651a9c
+x %s y3 = fffffffff9ade115
+~x = 16432d976f0f6a5a
+x & y = 209c922880f08000
+x | y = fbbcf27cf0f7bff5
+x ^ y = db20605470073ff5
+x << i = d26890f095a50000
+x >>u i = e9bcd26890f0
+x >>s i = ffffe9bcd26890f0
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ed379a4d121e13
+dtou f = 5fbd298972a9d
+stod x = c3b6432d976f0f6a
+dtos f = ffff6e19ddd019d2
+
+x = 8362aa9340fe215f
+y = f986342416ec8002
+-x = 7c9d556cbf01dea1
+x + y = 7ce8deb757eaa161
+x - y = 89dc766f2a11a15d
+x * y = e4a2b426803fc2be
+x /u y = 0
+x %u y = 8362aa9340fe215f
+x /s y = 13
+x %s y = fe6ccbe58d70a139
+x /u y2 = 86cb918b
+x %u y2 = 910b6dd3
+x /s y3 = 133e437097
+x %s y3 = fffffffffe99a023
+~x = 7c9d556cbf01dea0
+x & y = 8102200000ec0002
+x | y = fbe6beb756fea15f
+x ^ y = 7ae49eb75612a15d
+x << i = d8aaa4d03f8857c
+x >>u i = 20d8aaa4d03f8857
+x >>s i = e0d8aaa4d03f8857
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e06c5552681fc4
+dtou f = 35d0c262d14d7
+stod x = c3df27555b2fc078
+dtos f = fffccf536b66040d
+
+x = 368083376ba4ffa9
+y = 6912b247b79a4904
+-x = c97f7cc8945b0057
+x + y = 9f93357f233f48ad
+x - y = cd6dd0efb40ab6a5
+x * y = ec1ef87256252fa4
+x /u y = 0
+x %u y = 368083376ba4ffa9
+x /s y = 0
+x %s y = 368083376ba4ffa9
+x /u y2 = 84c9e8f3
+x %u y2 = 27966e44
+x /s y3 = 84c9e8f3
+x %s y3 = 27966e44
+~x = c97f7cc8945b0056
+x & y = 2000820723804900
+x | y = 7f92b377ffbeffad
+x ^ y = 5f923170dc3eb6ad
+x << i = 68083376ba4ffa90
+x >>u i = 368083376ba4ffa
+x >>s i = 368083376ba4ffa
+x cmpu y = lt
+x cmps y = lt
+utod x = 43cb40419bb5d280
+dtou f = 1652f2fb41ccc
+stod x = 43cb40419bb5d280
+dtos f = 1652f2fb41ccc
+
+x = aefab65d77135303
+y = bfc7666ab0ba95d6
+-x = 510549a288ecacfd
+x + y = 6ec21cc827cde8d9
+x - y = ef334ff2c658bd2d
+x * y = b8e736ca29a62382
+x /u y = 0
+x %u y = aefab65d77135303
+x /s y = 1
+x %s y = ef334ff2c658bd2d
+x /u y2 = e9932394
+x %u y2 = bed9fbb
+x /s y3 = 142f786e7
+x %s y3 = ffffffffe6446d5d
+~x = 510549a288ecacfc
+x & y = aec2264830121102
+x | y = bffff67ff7bbd7d7
+x ^ y = 113dd037c7a9c6d5
+x << i = 975dc4d4c0c00000
+x >>u i = 2bbead975dc
+x >>s i = fffffebbead975dc
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e5df56cbaee26a
+dtou f = 47abea07f9115
+stod x = c3d4415268a23b2b
+dtos f = fffded05e5b8804b
+
+x = dec3f99f561701ed
+y = 307892d5fe586af8
+-x = 213c0660a9e8fe13
+x + y = f3c8c75546f6ce5
+x - y = ae4b66c957be96f5
+x * y = f0bbfc03bc8dff98
+x /u y = 4
+x %u y = 1ce1ae475cb5560d
+x /s y = 0
+x %s y = dec3f99f561701ed
+x /u y2 = 49889cbcc
+x %u y2 = d961931
+x /s y3 = ffffffff5078c8f7
+x %s y3 = fffffffffb32ee6a
+~x = 213c0660a9e8fe12
+x & y = 10409095561000e8
+x | y = fefbfbdffe5f6bfd
+x ^ y = eebb6b4aa84f6b15
+x << i = ed00000000000000
+x >>u i = de
+x >>s i = ffffffffffffffde
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ebd87f33eac2e0
+dtou f = 5b3ea899bcdcc
+stod x = c3c09e033054f47f
+dtos f = ffff2631ced4bd02
+
+x = 7c15eb1a6c5b56e7
+y = 74078c767c0560ea
+-x = 83ea14e593a4a919
+x + y = f01d7790e860b7d1
+x - y = 80e5ea3f055f5fd
+x * y = fa248d23e2970f26
+x /u y = 1
+x %u y = 80e5ea3f055f5fd
+x /s y = 1
+x %s y = 80e5ea3f055f5fd
+x /u y2 = 111c647a7
+x %u y2 = 2a35fbed
+x /s y3 = 111c647a7
+x %s y3 = 2a35fbed
+~x = 83ea14e593a4a918
+x & y = 740588126c0140e2
+x | y = 7c17ef7e7c5f76ef
+x ^ y = 812676c105e360d
+x << i = 6d5b9c0000000000
+x >>u i = 1f057a
+x >>s i = 1f057a
+x cmpu y = gt
+x cmps y = gt
+utod x = 43df057ac69b16d6
+dtou f = 32d351f657ccf
+stod x = 43df057ac69b16d6
+dtos f = 32d351f657ccf
+
+x = 2d2acce24f9fa071
+y = 6f47682e14d3c42c
+-x = d2d5331db0605f8f
+x + y = 9c7235106473649d
+x - y = bde364b43acbdc45
+x * y = bb2c5143f769176c
+x /u y = 0
+x %u y = 2d2acce24f9fa071
+x /s y = 0
+x %s y = 2d2acce24f9fa071
+x /u y2 = 67e883b7
+x %u y2 = 4d949d8f
+x /s y3 = 67e883b7
+x %s y3 = 4d949d8f
+~x = d2d5331db0605f8e
+x & y = 2d02482204938020
+x | y = 6f6fecee5fdfe47d
+x ^ y = 426da4cc5b4c645d
+x << i = fa07100000000000
+x >>u i = 2d2ac
+x >>s i = 2d2ac
+x cmpu y = lt
+x cmps y = lt
+utod x = 43c695667127cfd0
+dtou f = 12801f7ddfe5a
+stod x = 43c695667127cfd0
+dtos f = 12801f7ddfe5a
+
+x = 13621127ec8ed10b
+y = 9a109dfc559db53e
+-x = ec9deed813712ef5
+x + y = ad72af24422c8649
+x - y = 7951732b96f11bcd
+x * y = f06266bf1f2267aa
+x /u y = 0
+x %u y = 13621127ec8ed10b
+x /s y = 0
+x %s y = 13621127ec8ed10b
+x /u y2 = 203527d3
+x %u y2 = 339f3657
+x /s y3 = ffffffffcf52411e
+x %s y3 = 47c75183
+~x = ec9deed813712ef4
+x & y = 12001124448c910a
+x | y = 9b729dfffd9ff53f
+x ^ y = 89728cdbb9136435
+x << i = c000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 43b3621127ec8ed1
+dtou f = 7f076703304d
+stod x = 43b3621127ec8ed1
+dtos f = 7f076703304d
+
+x = 4ab009d226201f35
+y = da130a0806148a0
+-x = b54ff62dd9dfe0cb
+x + y = 58513a72a68167d5
+x - y = 3d0ed931a5bed695
+x * y = 27bc30f72fef6920
+x /u y = 5
+x %u y = 68a16afa439b415
+x /s y = 5
+x %s y = 68a16afa439b415
+x /u y2 = 57ad3b593
+x %u y2 = 7501355
+x /s y3 = 57ad3b593
+x %s y3 = 7501355
+~x = b54ff62dd9dfe0ca
+x & y = 8a0008000200820
+x | y = 4fb139f2a6615fb5
+x ^ y = 47113972a6415795
+x << i = 26201f3500000000
+x >>u i = 4ab009d2
+x >>s i = 4ab009d2
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d2ac0274898808
+dtou f = 1e979155aadac
+stod x = 43d2ac0274898808
+dtos f = 1e979155aadac
+
+x = a8f485b490a8a56f
+y = 2dd1b3c84cb9a6d2
+-x = 570b7a4b6f575a91
+x + y = d6c6397cdd624c41
+x - y = 7b22d1ec43eefe9d
+x * y = e3c43aa086d4af0e
+x /u y = 3
+x %u y = 1f7f6a5baa7bb0f9
+x /s y = ffffffffffffffff
+x %s y = d6c6397cdd624c41
+x /u y2 = 3affbc857
+x %u y2 = 2cd84c77
+x /s y3 = fffffffe19aa1e7c
+x %s y3 = ffffffffe134208f
+~x = 570b7a4b6f575a90
+x & y = 28d0818000a8a442
+x | y = adf5b7fcdcb9a7ff
+x ^ y = 8525367cdc1103bd
+x << i = 16d242a295bc0000
+x >>u i = 2a3d216d242a
+x >>s i = ffffea3d216d242a
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e51e90b6921515
+dtou f = 45343bae4fbb7
+stod x = c3d5c2de92dbd5d7
+dtos f = fffdc58b001deaec
+
+x = ac82b442fe060239
+y = ba09c177d0bd2c54
+-x = 537d4bbd01f9fdc7
+x + y = 668c75bacec32e8d
+x - y = f278f2cb2d48d5e5
+x * y = b0321761566f86b4
+x /u y = 0
+x %u y = ac82b442fe060239
+x /s y = 1
+x %s y = f278f2cb2d48d5e5
+x /u y2 = ed62a033
+x %u y2 = 9a9c1784
+x /s y3 = 1317fc5dc
+x %s y3 = ffffffffdaba2cf5
+~x = 537d4bbd01f9fdc6
+x & y = a8008042d0040010
+x | y = be8bf577febf2e7d
+x ^ y = 168b75352ebb2e6d
+x << i = 442fe06023900000
+x >>u i = ac82b442fe0
+x >>s i = fffffac82b442fe0
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e59056885fc0c0
+dtou f = 46a90b2a98617
+stod x = c3d4df52ef407e7f
+dtos f = fffddcd7f7e2754d
+
+x = b18070243e89f813
+y = 8b9a402e6ec889a6
+-x = 4e7f8fdbc17607ed
+x + y = 3d1ab052ad5281b9
+x - y = 25e62ff5cfc16e6d
+x * y = 4bb994c45c110752
+x /u y = 1
+x %u y = 25e62ff5cfc16e6d
+x /s y = 0
+x %s y = b18070243e89f813
+x /u y2 = 1457fa721
+x %u y2 = 62f7b025
+x /s y3 = aca563d9
+x %s y3 = fffffffffc51c715
+~x = 4e7f8fdbc17607ec
+x & y = 818040242e888802
+x | y = bb9a702e7ec9f9b7
+x ^ y = 3a1a300a504171b5
+x << i = a27e04c000000000
+x >>u i = 2c601c0
+x >>s i = fffffffffec601c0
+x cmpu y = gt
+x cmps y = gt
+utod x = 43e6300e0487d13f
+dtou f = 48b46746f5fb1
+stod x = c3d39fe3f6f05d82
+dtos f = fffdfd8db9a84ee6
+
+x = 81dc3a61528f0d7d
+y = 20670ecc67fee348
+-x = 7e23c59ead70f283
+x + y = a243492dba8df0c5
+x - y = 61752b94ea902a35
+x * y = 8c82c58cbe37a228
+x /u y = 4
+x %u y = 3fff2fb293805d
+x /s y = fffffffffffffffd
+x %s y = e31166c68a8bb755
+x /u y2 = 401f99d2e
+x %u y2 = 1b8b48d5
+x /s y3 = fffffffc1b6b5e46
+x %s y3 = ffffffffebae19b5
+~x = 7e23c59ead70f282
+x & y = 440a40428e0148
+x | y = a1ff3eed77ffef7d
+x ^ y = a1bb34ad3571ee35
+x << i = dc3a61528f0d7d00
+x >>u i = 81dc3a61528f0d
+x >>s i = ff81dc3a61528f0d
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e03b874c2a51e2
+dtou f = 3530d5f787ce7
+stod x = c3df88f167ab5c3d
+dtos f = fffcc554a4b16c1d
+
+x = 148943805ade2cf7
+y = b623237a886f1ba
+-x = eb76bc7fa521d309
+x + y = 1feb75b803651eb1
+x - y = 9271148b2573b3d
+x * y = 732a25abcf0b3276
+x /u y = 1
+x %u y = 9271148b2573b3d
+x /s y = 1
+x %s y = 9271148b2573b3d
+x /u y2 = 1cdd42781
+x %u y2 = 8307e40
+x /s y3 = 1cdd42781
+x %s y3 = 8307e40
+~x = eb76bc7fa521d308
+x & y = 200088620b2
+x | y = 1feb73b7fadefdff
+x ^ y = 1feb71b7f258dd4d
+x << i = dc00000000000000
+x >>u i = 5
+x >>s i = 5
+x cmpu y = gt
+x cmps y = gt
+utod x = 43b48943805ade2d
+dtou f = 869600d40a9a
+stod x = 43b48943805ade2d
+dtos f = 869600d40a9a
+
+x = 3596d7e2724d4501
+y = 200951a3d9cd217c
+-x = ca69281d8db2baff
+x + y = 55a029864c1a667d
+x - y = 158d863e98802385
+x * y = 12f0a6c6e1f8d7c
+x /u y = 1
+x %u y = 158d863e98802385
+x /s y = 1
+x %s y = 158d863e98802385
+x /u y2 = 1ac3a0a48
+x %u y2 = f8ff129
+x /s y3 = 1ac3a0a48
+x %s y3 = f8ff129
+~x = ca69281d8db2bafe
+x & y = 200051a2504d0100
+x | y = 359fd7e3fbcd657d
+x ^ y = 159f8641ab80647d
+x << i = 1000000000000000
+x >>u i = 3
+x >>s i = 3
+x cmpu y = gt
+x cmps y = gt
+utod x = 43cacb6bf13926a3
+dtou f = 15f33cfbad626
+stod x = 43cacb6bf13926a3
+dtos f = 15f33cfbad626
+
+x = 8364db7a513ee81b
+y = 39c53d919052b30e
+-x = 7c9b2485aec117e5
+x + y = bd2a190be1919b29
+x - y = 499f9de8c0ec350d
+x * y = 7192d67ff661927a
+x /u y = 2
+x %u y = fda6057309981ff
+x /s y = fffffffffffffffe
+x %s y = f6ef569d71e44e37
+x /u y2 = 2464000e3
+x %u y2 = d195088
+x /s y3 = fffffffdd7d46212
+x %s y3 = fffffffffcbb11e9
+~x = 7c9b2485aec117e4
+x & y = 14419101012a00a
+x | y = bbe5fffbd17efb1f
+x ^ y = baa1e6ebc16c5b15
+x << i = 36de944fba06c000
+x >>u i = 20d936de944fb
+x >>s i = fffe0d936de944fb
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e06c9b6f4a27dd
+dtou f = 35d1a821b799f
+stod x = c3df26c9216bb046
+dtos f = fffccf61c75468d5
+
+x = ca6ee36ebbeaecc5
+y = 7fe0e5d6d5d1daf0
+-x = 35911c914415133b
+x + y = 4a4fc94591bcc7b5
+x - y = 4a8dfd97e61911d5
+x * y = 28f18b4f6fb2bab0
+x /u y = 1
+x %u y = 4a8dfd97e61911d5
+x /s y = 0
+x %s y = ca6ee36ebbeaecc5
+x /u y2 = 195403f42
+x %u y2 = 67e00199
+x /s y3 = ffffffff94c3b859
+x %s y3 = ffffffffd787355f
+~x = 35911c914415133a
+x & y = 4a60e14691c0c8c0
+x | y = ffeee7fefffbfef5
+x ^ y = b58e06b86e3b3635
+x << i = ecc5000000000000
+x >>u i = ca6e
+x >>s i = ffffffffffffca6e
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e94ddc6dd77d5e
+dtou f = 52eaa7b41d752
+stod x = c3cac88e48a20a8a
+dtos f = fffea0f1c07ac687
+
+x = e9872a956980d7f
+y = 5abe9b0d2dbee1a2
+-x = f1678d56a967f281
+x + y = 69570db68456ef21
+x - y = b3d9d79c28d92bdd
+x * y = 374cf7f0c757295e
+x /u y = 0
+x %u y = e9872a956980d7f
+x /s y = 0
+x %s y = e9872a956980d7f
+x /u y2 = 292cea58
+x %u y2 = 301cdf07
+x /s y3 = 292cea58
+x %s y3 = 301cdf07
+~x = f1678d56a967f280
+x & y = a98120904980122
+x | y = 5ebefbad7fbeedff
+x ^ y = 5426e9a47b26ecdd
+x << i = 5a6035fc00000000
+x >>u i = 3a61caa
+x >>s i = 3a61caa
+x cmpu y = lt
+x cmps y = lt
+utod x = 43ad30e552ad301b
+dtou f = 5fa72f57d2bd
+stod x = 43ad30e552ad301b
+dtos f = 5fa72f57d2bd
+
+x = cc90a3111f2e88c9
+y = fdb739ffd16e43a4
+-x = 336f5ceee0d17737
+x + y = ca47dd10f09ccc6d
+x - y = ced969114dc04525
+x * y = cba4570306fa3bc4
+x /u y = 0
+x %u y = cc90a3111f2e88c9
+x /s y = 16
+x %s y = fed1a7151fb4b8b1
+x /u y2 = ce682040
+x %u y2 = 8882909
+x /s y3 = 16845c4311
+x %s y3 = fffffffffe31f1da
+~x = 336f5ceee0d17736
+x & y = cc902111112e0080
+x | y = fdb7bbffdf6ecbed
+x ^ y = 31279aeece40cb6d
+x << i = f2e88c9000000000
+x >>u i = cc90a31
+x >>s i = fffffffffcc90a31
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e992146223e5d1
+dtou f = 53ca3196c85e7
+stod x = c3c9b7ae777068bc
+dtos f = fffeaeea5ea5751c
+
+x = f6d06b7289cbc123
+y = f78231121267d176
+-x = 92f948d76343edd
+x + y = ee529c849c339299
+x - y = ff4e3a607763efad
+x * y = bda1155007ad9922
+x /u y = 0
+x %u y = f6d06b7289cbc123
+x /s y = 1
+x %s y = ff4e3a607763efad
+x /u y2 = ff48210c
+x %u y2 = 6c4e224b
+x /s y3 = 114ef78ac
+x %s y3 = ffffffffff84590b
+~x = 92f948d76343edc
+x & y = f68021120043c122
+x | y = f7d27b729befd177
+x ^ y = 1525a609bac1055
+x << i = 48c0000000000000
+x >>u i = 3db
+x >>s i = ffffffffffffffdb
+x cmpu y = lt
+x cmps y = lt
+utod x = 43eeda0d6e513978
+dtou f = 6518569bde544
+stod x = c3a25f291aec687e
+dtos f = ffffc3ccaef6d47a
+
+x = 1f75e527a63edd0d
+y = cab61a6fe4aecf98
+-x = e08a1ad859c122f3
+x + y = ea2bff978aedaca5
+x - y = 54bfcab7c1900d75
+x * y = f44145345ce6c2b8
+x /u y = 0
+x %u y = 1f75e527a63edd0d
+x /s y = 0
+x %s y = 1f75e527a63edd0d
+x /u y2 = 27bb1964
+x %u y2 = 7672b2b1
+x /s y3 = ffffffff68dce4e4
+x %s y3 = 1d207631
+~x = e08a1ad859c122f2
+x & y = a340027a42ecd08
+x | y = dff7ff6fe6bedf9d
+x ^ y = d5c3ff4842901295
+x << i = 27a63edd0d000000
+x >>u i = 1f75e527a6
+x >>s i = 1f75e527a6
+x cmpu y = lt
+x cmps y = gt
+utod x = 43bf75e527a63edd
+dtou f = ce2e01d58fbd
+stod x = 43bf75e527a63edd
+dtos f = ce2e01d58fbd
+
+x = 67d900b4d6966707
+y = 2953a6185807168a
+-x = 9826ff4b296998f9
+x + y = 912ca6cd2e9d7d91
+x - y = 3e855a9c7e8f507d
+x * y = faecb01bd31f23c6
+x /u y = 2
+x %u y = 1531b484268839f3
+x /s y = 2
+x %s y = 1531b484268839f3
+x /u y2 = 28349b7ce
+x %u y2 = 24b397b7
+x /s y3 = 28349b7ce
+x %s y3 = 24b397b7
+~x = 9826ff4b296998f8
+x & y = 2151001050060602
+x | y = 6fdba6bcde97778f
+x ^ y = 4e8aa6ac8e91718d
+x << i = 6402d35a599c1c00
+x >>u i = 19f6402d35a599
+x >>s i = 19f6402d35a599
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d9f6402d35a59a
+dtou f = 2a893795d8eb4
+stod x = 43d9f6402d35a59a
+dtos f = 2a893795d8eb4
+
+x = 89d196850b26ed91
+y = 756ef4168e7f32cc
+-x = 762e697af4d9126f
+x + y = ff408a9b99a6205d
+x - y = 1462a26e7ca7bac5
+x * y = a3aeacbec65aa18c
+x /u y = 1
+x %u y = 1462a26e7ca7bac5
+x /s y = ffffffffffffffff
+x %s y = ff408a9b99a6205d
+x /u y2 = 12c706881
+x %u y2 = 3974fe7b
+x /s y3 = fffffffefe5ea0d2
+x %s y3 = ffffffffe980f385
+~x = 762e697af4d9126e
+x & y = 14094040a262080
+x | y = fdfff6978f7fffdd
+x ^ y = fcbf62938559df5d
+x << i = 196850b26ed91000
+x >>u i = 89d196850b26e
+x >>s i = fff89d196850b26e
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e13a32d0a164de
+dtou f = 387356a372c01
+stod x = c3dd8b9a5ebd3645
+dtos f = fffcf97caf701b36
+
+x = 25ea5203eb2a32b
+y = d27bb23d57c784de
+-x = fda15adfc14d5cd5
+x + y = d4da575d967a2809
+x - y = 2fe2f2e2e6eb1e4d
+x * y = fe450bd9ee78ab4a
+x /u y = 0
+x %u y = 25ea5203eb2a32b
+x /s y = 0
+x %s y = 25ea5203eb2a32b
+x /u y2 = 2e1d4dc
+x %u y2 = cb2df2bf
+x /s y3 = fffffffff2ac0c93
+x %s y3 = 2a546e24
+~x = fda15adfc14d5cd4
+x & y = 25aa0201682800a
+x | y = d27fb73d7ff7a7ff
+x ^ y = d025171d697527f5
+x << i = faca8cac0000000
+x >>u i = 97a9480
+x >>s i = 97a9480
+x cmpu y = lt
+x cmps y = gt
+utod x = 4382f52901f59519
+dtou f = f87b5758598
+stod x = 4382f52901f59519
+dtos f = f87b5758598
+
+x = 57e7846b9d99fe55
+y = 933550ac541e6140
+-x = a8187b94626601ab
+x + y = eb1cd517f1b85f95
+x - y = c4b233bf497b9d15
+x * y = eb523a71d3ca40
+x /u y = 0
+x %u y = 57e7846b9d99fe55
+x /s y = 0
+x %s y = 57e7846b9d99fe55
+x /u y2 = 98de5aed
+x %u y2 = 81b9d719
+x /s y3 = ffffffff312686c0
+x %s y3 = 67db7555
+~x = a8187b94626601aa
+x & y = 1325002814186040
+x | y = d7f7d4efdd9fff55
+x ^ y = c4d2d4c7c9879f15
+x << i = 57e7846b9d99fe55
+x >>u i = 57e7846b9d99fe55
+x >>s i = 57e7846b9d99fe55
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d5f9e11ae76680
+dtou f = 240170cfeb5b5
+stod x = 43d5f9e11ae76680
+dtos f = 240170cfeb5b5
+
+x = 7f83a526d3d598f
+y = 3562759c2ed93072
+-x = f807c5ad92c2a671
+x + y = 3d5aafee9c168a01
+x - y = d295c4b63e64291d
+x * y = d607fa6cc253b1ae
+x /u y = 0
+x %u y = 7f83a526d3d598f
+x /s y = 0
+x %s y = 7f83a526d3d598f
+x /u y2 = 2637b00b
+x %u y2 = 4a30bdb
+x /s y3 = 2637b00b
+x %s y3 = 4a30bdb
+~x = f807c5ad92c2a670
+x & y = 56030102c191002
+x | y = 37fa7fde6ffd79ff
+x ^ y = 329a4fce43e469fd
+x << i = 663c000000000000
+x >>u i = 1fe
+x >>s i = 1fe
+x cmpu y = lt
+x cmps y = lt
+utod x = 439fe0e949b4f566
+dtou f = 343ad6486db4
+stod x = 439fe0e949b4f566
+dtos f = 343ad6486db4
+
+x = e2e88e05fcf79359
+y = 6d0fe1876fd28ef4
+-x = 1d1771fa03086ca7
+x + y = 4ff86f8d6cca224d
+x - y = 75d8ac7e8d250465
+x * y = e44e78efe5b5ced4
+x /u y = 2
+x %u y = 8c8caf71d527571
+x /s y = 0
+x %s y = e2e88e05fcf79359
+x /u y2 = 2149e9078
+x %u y2 = 3157ec11
+x /s y3 = ffffffffbbb6b39e
+x %s y3 = ffffffff9f7efd07
+~x = 1d1771fa03086ca6
+x & y = 600880056cd28250
+x | y = efefef87fff79ffd
+x ^ y = 8fe76f8293251dad
+x << i = 3590000000000000
+x >>u i = e2e
+x >>s i = fffffffffffffe2e
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ec5d11c0bf9ef2
+dtou f = 5cf112710b749
+stod x = c3bd1771fa03086d
+dtos f = ffff41586c49a67e
+
+x = d3099a8b859ae33
+y = 8fa8587c64456d46
+-x = f2cf665747a651cd
+x + y = 9cd8f2251c9f1b79
+x - y = 7d88412c541440ed
+x * y = 8cfd7b8c777058f2
+x /u y = 0
+x %u y = d3099a8b859ae33
+x /s y = 0
+x %s y = d3099a8b859ae33
+x /u y2 = 178126fd
+x %u y2 = 75bd3a7
+x /s y3 = ffffffffe1f1945e
+x %s y3 = 5ca580ab
+~x = f2cf665747a651cc
+x & y = d20182820412c02
+x | y = 8fb8d9fcfc5def77
+x ^ y = 8298c1d4dc1cc375
+x << i = 4c266a2e166b8cc0
+x >>u i = 34c266a2e166b8
+x >>s i = 34c266a2e166b8
+x cmpu y = lt
+x cmps y = gt
+utod x = 43aa61335170b35c
+dtou f = 5670e3244231
+stod x = 43aa61335170b35c
+dtos f = 5670e3244231
+
+x = b62ea5426a8f709d
+y = 31b8858d02dd2fe8
+-x = 49d15abd95708f63
+x + y = e7e72acf6d6ca085
+x - y = 84761fb567b240b5
+x * y = e22a9ef5933e148
+x /u y = 3
+x %u y = 2105149b61f7e0e5
+x /s y = ffffffffffffffff
+x %s y = e7e72acf6d6ca085
+x /u y2 = 3aa02d06b
+x %u y2 = 8d30eae
+x /s y3 = fffffffe83ee35ed
+x %s y3 = ffffffffdeff9c14
+~x = 49d15abd95708f62
+x & y = 30288500028d2088
+x | y = b7bea5cf6adf7ffd
+x ^ y = 879620cf68525f75
+x << i = 8f709d0000000000
+x >>u i = b62ea5
+x >>s i = ffffffffffb62ea5
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e6c5d4a84d51ee
+dtou f = 4a9f307066a12
+stod x = c3d27456af655c24
+dtos f = fffe1c3a4c3f5948
+
+x = 2e94ca71f5150517
+y = 5f844de64402cf5a
+-x = d16b358e0aeafae9
+x + y = 8e1918583917d471
+x - y = cf107c8bb11235bd
+x * y = 36454e6a4eaf6316
+x /u y = 0
+x %u y = 2e94ca71f5150517
+x /s y = 0
+x %s y = 2e94ca71f5150517
+x /u y2 = 7cd84df1
+x %u y2 = 1c098191
+x /s y3 = 7cd84df1
+x %s y3 = 1c098191
+~x = d16b358e0aeafae8
+x & y = e84486044000512
+x | y = 7f94cff7f517cf5f
+x ^ y = 71108797b117ca4d
+x << i = c7d454145c000000
+x >>u i = ba5329c7d
+x >>s i = ba5329c7d
+x cmpu y = lt
+x cmps y = lt
+utod x = 43c74a6538fa8a83
+dtou f = 131464f1a5831
+stod x = 43c74a6538fa8a83
+dtos f = 131464f1a5831
+
+x = 61bd0eb9b8259a21
+y = 134717f07eaef81c
+-x = 9e42f14647da65df
+x + y = 750426aa36d4923d
+x - y = 4e75f6c93976a205
+x * y = 780fa80f91dad39c
+x /u y = 5
+x %u y = 15997073ebac195
+x /s y = 5
+x %s y = 15997073ebac195
+x /u y2 = 511ed4ac8
+x %u y2 = 27f86a1
+x /s y3 = 511ed4ac8
+x %s y3 = 27f86a1
+~x = 9e42f14647da65de
+x & y = 10506b038249800
+x | y = 73ff1ff9feaffa3d
+x ^ y = 72fa1949c68b623d
+x << i = 9b8259a210000000
+x >>u i = 61bd0eb9b
+x >>s i = 61bd0eb9b
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d86f43ae6e0967
+dtou f = 2808a00a88ddd
+stod x = 43d86f43ae6e0967
+dtos f = 2808a00a88ddd
+
+x = 3b12fa08c18b023b
+y = 812e51fdc3492aae
+-x = c4ed05f73e74fdc5
+x + y = bc414c0684d42ce9
+x - y = b9e4a80afe41d78d
+x * y = 9f875459eeac321a
+x /u y = 0
+x %u y = 3b12fa08c18b023b
+x /s y = 0
+x %s y = 3b12fa08c18b023b
+x /u y2 = 751173e7
+x %u y2 = 4f1d5ff0
+x /s y3 = ffffffff88c0657c
+x %s y3 = 3027aaf
+~x = c4ed05f73e74fdc4
+x & y = 1025008c109022a
+x | y = bb3efbfdc3cb2abf
+x ^ y = ba3cabf502c22895
+x << i = c08ec00000000000
+x >>u i = ec4b
+x >>s i = ec4b
+x cmpu y = lt
+x cmps y = gt
+utod x = 43cd897d0460c581
+dtou f = 18325f0a8cb71
+stod x = 43cd897d0460c581
+dtos f = 18325f0a8cb71
+
+x = 341b83d316b653e5
+y = 5153bc26395bdb90
+-x = cbe47c2ce949ac1b
+x + y = 856f3ff950122f75
+x - y = e2c7c7acdd5a7855
+x * y = a24306a68fbb17d0
+x /u y = 0
+x %u y = 341b83d316b653e5
+x /s y = 0
+x %s y = 341b83d316b653e5
+x /u y2 = a405e4b4
+x %u y2 = 4f86312d
+x /s y3 = a405e4b4
+x %s y3 = 4f86312d
+~x = cbe47c2ce949ac1a
+x & y = 1013800210125380
+x | y = 755bbff73fffdbf5
+x ^ y = 65483ff52fed8875
+x << i = 83d316b653e50000
+x >>u i = 341b83d316b6
+x >>s i = 341b83d316b6
+x cmpu y = lt
+x cmps y = lt
+utod x = 43ca0dc1e98b5b2a
+dtou f = 1557dd8590e16
+stod x = 43ca0dc1e98b5b2a
+dtos f = 1557dd8590e16
+
+x = 605604a12949899f
+y = c5e6d8ea02259342
+-x = 9fa9fb5ed6b67661
+x + y = 263cdd8b2b6f1ce1
+x - y = 9a6f2bb72723f65d
+x * y = f1a33a4c00f6c7fe
+x /u y = 0
+x %u y = 605604a12949899f
+x /s y = ffffffffffffffff
+x %s y = 263cdd8b2b6f1ce1
+x /u y2 = 7c9e1383
+x %u y2 = 89a32be1
+x /s y3 = fffffffe57830a86
+x %s y3 = 83cdb23
+~x = 9fa9fb5ed6b67660
+x & y = 404600a000018102
+x | y = e5f6dceb2b6d9bdf
+x ^ y = a5b0dc4b2b6c1add
+x << i = 81581284a526267c
+x >>u i = 181581284a526267
+x >>s i = 181581284a526267
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d81581284a5262
+dtou f = 27759007077d8
+stod x = 43d81581284a5262
+dtos f = 27759007077d8
+
+x = 94e3e5e2497a21e9
+y = 6f2836d5614f0e44
+-x = 6b1c1a1db685de17
+x + y = 40c1cb7aac9302d
+x - y = 25bbaf0ce82b13a5
+x * y = eaf4c086f232bfe4
+x /u y = 1
+x %u y = 25bbaf0ce82b13a5
+x /s y = 0
+x %s y = 94e3e5e2497a21e9
+x /u y2 = 156e6b4cb
+x %u y2 = caae302
+x /s y3 = ffffffff09523710
+x %s y3 = ffffffffa5f4f199
+~x = 6b1c1a1db685de16
+x & y = 42024c0414a0040
+x | y = ffebf7f7697f2fed
+x ^ y = fbcbd33728352fad
+x << i = 4e3e5e2497a21e90
+x >>u i = 94e3e5e2497a21e
+x >>s i = f94e3e5e2497a21e
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e29c7cbc492f44
+dtou f = 3cfc45959f1ec
+stod x = c3dac706876da178
+dtos f = fffd420b9e92e122
+
+x = 2c62677ac7f4bf43
+y = 58ff1a48be4e5d16
+-x = d39d9885380b40bd
+x + y = 856181c386431c59
+x - y = d3634d3209a6622d
+x * y = dbdcaa8f18edc6c2
+x /u y = 0
+x %u y = 2c62677ac7f4bf43
+x /s y = 0
+x %s y = 2c62677ac7f4bf43
+x /u y2 = 7fac2866
+x %u y2 = 15d50693
+x /s y3 = 7fac2866
+x %s y3 = 15d50693
+~x = d39d9885380b40bc
+x & y = 862024886441d02
+x | y = 7cff7f7afffeff57
+x ^ y = 749d7d3279bae255
+x << i = deb1fd2fd0c00000
+x >>u i = b1899deb1f
+x >>s i = b1899deb1f
+x cmpu y = lt
+x cmps y = lt
+utod x = 43c63133bd63fa60
+dtou f = 122e0a6fbb2b5
+stod x = 43c63133bd63fa60
+dtos f = 122e0a6fbb2b5
+
+x = 54328cce0129c82d
+y = e66196f0c43f0438
+-x = abcd7331fed637d3
+x + y = 3a9423bec568cc65
+x - y = 6dd0f5dd3ceac3f5
+x * y = 6e5b53cf9f577dd8
+x /u y = 0
+x %u y = 54328cce0129c82d
+x /s y = fffffffffffffffd
+x %s y = 75751a04de6d4d5
+x /u y2 = 5d8f7413
+x %u y2 = 2f76d45d
+x /s y3 = fffffffcb6a49899
+x %s y3 = d7812bd
+~x = abcd7331fed637d2
+x & y = 442084c000290028
+x | y = f6739efec53fcc3d
+x ^ y = b2531a3ec516cc15
+x << i = 2d00000000000000
+x >>u i = 54
+x >>s i = 54
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d50ca333804a72
+dtou f = 227cbe624e4ce
+stod x = 43d50ca333804a72
+dtos f = 227cbe624e4ce
+
+x = 764480f2ce2b0727
+y = e847cd7a4b371c2a
+-x = 89bb7f0d31d4f8d9
+x + y = 5e8c4e6d19622351
+x - y = 8dfcb37882f3eafd
+x * y = c4f5404f7e387066
+x /u y = 0
+x %u y = 764480f2ce2b0727
+x /s y = fffffffffffffffc
+x %s y = 1763b6dbfb0777cf
+x /u y2 = 82583705
+x %u y2 = 41aecdc5
+x /s y3 = fffffffb038fcf10
+x %s y3 = 5628987
+~x = 89bb7f0d31d4f8d8
+x & y = 604480724a230422
+x | y = fe47cdfacf3f1f2f
+x ^ y = 9e034d88851c1b0d
+x << i = ac1c9c0000000000
+x >>u i = 1d9120
+x >>s i = 1d9120
+x cmpu y = lt
+x cmps y = gt
+utod x = 43dd91203cb38ac2
+dtou f = 30714183cfbc6
+stod x = 43dd91203cb38ac2
+dtos f = 30714183cfbc6
+
+x = 86e8642063824ab1
+y = 200894ad1d61716c
+-x = 79179bdf9c7db54f
+x + y = a6f0f8cd80e3bc1d
+x - y = 66dfcf734620d945
+x * y = 218ace07d800a3ac
+x /u y = 4
+x %u y = 6c6116bedfc8501
+x /s y = fffffffffffffffd
+x %s y = e7022227bba69ef5
+x /u y2 = 43622075f
+x %u y2 = 748637e
+x /s y3 = fffffffc38469f85
+x %s y3 = ffffffffeb6799d0
+~x = 79179bdf9c7db54e
+x & y = 8042001004020
+x | y = a6e8f4ad7fe37bfd
+x ^ y = a6e0f08d7ee33bdd
+x << i = 24ab100000000000
+x >>u i = 86e86
+x >>s i = fffffffffff86e86
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e0dd0c840c7049
+dtou f = 37421b15de363
+stod x = c3de45e6f7e71f6d
+dtos f = fffce668f696d298
+
+x = f22c66df8ca9054b
+y = 3237edb3e364a47e
+-x = dd399207356fab5
+x + y = 24645493700da9c9
+x - y = bff4792ba94460cd
+x * y = aa00cc4c14e0a6ea
+x /u y = 4
+x %u y = 294cb00fff167353
+x /s y = 0
+x %s y = f22c66df8ca9054b
+x /u y2 = 4d288aa25
+x %u y2 = 9a2cc6c
+x /s y3 = ffffffffb9841949
+x %s y3 = ffffffffd933c240
+~x = dd399207356fab4
+x & y = 322464938020044a
+x | y = f23fefffefeda57f
+x ^ y = c01b8b6c6fcda135
+x << i = c000000000000000
+x >>u i = 3
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ee458cdbf19521
+dtou f = 6331b9e80f041
+stod x = c3aba73240e6adf5
+dtos f = ffffa562e3b9df77
+
+x = a952be7c0308ed75
+y = f80f290ededf49e0
+-x = 56ad4183fcf7128b
+x + y = a161e78ae1e83755
+x - y = b143956d2429a395
+x * y = 13c0257882712360
+x /u y = 0
+x %u y = a952be7c0308ed75
+x /s y = a
+x %s y = f8bb23e74e500ab5
+x /u y2 = aebe580f
+x %u y2 = 47a4b5a3
+x /s y3 = aea5798d9
+x %s y3 = fffffffffe0cd097
+~x = 56ad4183fcf7128a
+x & y = a802280c02084960
+x | y = f95fbf7edfdfedf5
+x ^ y = 515d9772ddd7a495
+x << i = 308ed7500000000
+x >>u i = a952be7c
+x >>s i = ffffffffa952be7c
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e52a57cf80611e
+dtou f = 455ad38d511d7
+stod x = c3d5ab5060ff3dc5
+dtos f = fffdc7f47e0e010d
+
+x = e36991f169ad9daf
+y = 240b76fb67010a12
+-x = 1c966e0e96526251
+x + y = 77508ecd0aea7c1
+x - y = bf5e1af602ac939d
+x * y = d70339ce3d0cec4e
+x /u y = 6
+x %u y = b24c80cffa76143
+x /s y = 0
+x %s y = e36991f169ad9daf
+x /u y2 = 64f253db1
+x %u y2 = 1b1f8b24
+x /s y3 = ffffffff34f67dfd
+x %s y3 = ffffffffe00e78a0
+~x = 1c966e0e96526250
+x & y = 200910f161010802
+x | y = e76bf7fb6fad9fbf
+x ^ y = c762e70a0eac97bd
+x << i = 47c5a6b676bc0000
+x >>u i = 38da647c5a6b
+x >>s i = fffff8da647c5a6b
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ec6d323e2d35b4
+dtou f = 5d25eaad6e0cf
+stod x = c3bc966e0e965262
+dtos f = ffff44a5f00fd005
+
+x = 75714d8bcb0f3479
+y = f306b780aa88c194
+-x = 8a8eb27434f0cb87
+x + y = 6878050c7597f60d
+x - y = 826a960b208672e5
+x * y = d405ed5415a18ef4
+x /u y = 0
+x %u y = 75714d8bcb0f3479
+x /s y = fffffffffffffff7
+x %s y = adc111c9de02ad
+x /u y2 = 7bb650b1
+x %u y2 = b51254f9
+x /s y3 = fffffff6f29b761c
+x %s y3 = 33e2279
+~x = 8a8eb27434f0cb86
+x & y = 710005808a080010
+x | y = f777ff8beb8ff5fd
+x ^ y = 8677fa0b6187f5ed
+x << i = d8bcb0f347900000
+x >>u i = 75714d8bcb0
+x >>s i = 75714d8bcb0
+x cmpu y = lt
+x cmps y = gt
+utod x = 43dd5c5362f2c3cd
+dtou f = 301abf81c22cb
+stod x = 43dd5c5362f2c3cd
+dtos f = 301abf81c22cb
+
+x = 941f83cb649df453
+y = ea181038dbafa0e6
+-x = 6be07c349b620bad
+x + y = 7e379404404d9539
+x - y = aa07739288ee536d
+x * y = 856711c4245a6292
+x /u y = 0
+x %u y = 941f83cb649df453
+x /s y = 4
+x %s y = ebbf42e7f5df70bb
+x /u y2 = a1fbf096
+x %u y2 = 8c67f383
+x /s y3 = 4ecae6205
+x %s y3 = fffffffff5e0333b
+~x = 6be07c349b620bac
+x & y = 80180008408da042
+x | y = fe1f93fbffbff4f7
+x ^ y = 7e0793f3bf3254b5
+x << i = 277d14c000000000
+x >>u i = 2507e0f
+x >>s i = fffffffffe507e0f
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e283f0796c93bf
+dtou f = 3cabd55143047
+stod x = c3daf81f0d26d883
+dtos f = fffd3d049a4d1f7d
+
+x = 789d0be4a3f6e3bd
+y = 278ae68eedc94c88
+-x = 8762f41b5c091c43
+x + y = a027f27391c03045
+x - y = 51122555b62d9735
+x * y = 7b6a0a92f2a1868
+x /u y = 3
+x %u y = 1fc5837da9afe25
+x /s y = 3
+x %s y = 1fc5837da9afe25
+x /u y2 = 30cdb0a56
+x %u y2 = ad1e409
+x /s y3 = 30cdb0a56
+x %s y3 = ad1e409
+~x = 8762f41b5c091c42
+x & y = 20880284a1c04088
+x | y = 7f9fefeeefffefbd
+x ^ y = 5f17ed6a4e3faf35
+x << i = 9d0be4a3f6e3bd00
+x >>u i = 789d0be4a3f6e3
+x >>s i = 789d0be4a3f6e3
+x cmpu y = gt
+x cmps y = gt
+utod x = 43de2742f928fdb9
+dtou f = 31673cfc93710
+stod x = 43de2742f928fdb9
+dtos f = 31673cfc93710
+
+x = 2ff5275f8be96d37
+y = f35327a741a0fcfa
+-x = d00ad8a0741692c9
+x + y = 23484f06cd8a6a31
+x - y = 3ca1ffb84a48703d
+x * y = 7926d788a2d6cbb6
+x /u y = 0
+x %u y = 2ff5275f8be96d37
+x /s y = fffffffffffffffd
+x %s y = 9ee9e5550cc6425
+x /u y2 = 3274b0b2
+x %u y2 = 9b290b19
+x /s y3 = fffffffc3766d779
+x %s y3 = 5c46e48
+~x = d00ad8a0741692c8
+x & y = 2351270701a06c32
+x | y = fff727ffcbe9fdff
+x ^ y = dca600f8ca4991cd
+x << i = dc00000000000000
+x >>u i = b
+x >>s i = b
+x cmpu y = lt
+x cmps y = gt
+utod x = 43c7fa93afc5f4b7
+dtou f = 13a4b8e1e9d87
+stod x = 43c7fa93afc5f4b7
+dtos f = 13a4b8e1e9d87
+
+x = cba92b33d3b5ff41
+y = 1f0ddcc454db9ebc
+-x = 3456d4cc2c4a00bf
+x + y = eab707f828919dfd
+x - y = ac9b4e6f7eda6085
+x * y = 43b3d4f07dcc91bc
+x /u y = 6
+x %u y = 1155fe99d69046d9
+x /s y = ffffffffffffffff
+x %s y = eab707f828919dfd
+x /u y2 = 68ee956da
+x %u y2 = 135a2859
+x /s y3 = fffffffe50888709
+x %s y3 = ffffffffe4ade05d
+~x = 3456d4cc2c4a00be
+x & y = b09080050919e00
+x | y = dfadfff7d7fffffd
+x ^ y = d4a4f7f7876e61fd
+x << i = 1000000000000000
+x >>u i = c
+x >>s i = fffffffffffffffc
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e97525667a76c0
+dtou f = 536b62630d660
+stod x = c3ca2b6a66162500
+dtos f = fffea8fd6b69c595
+
+x = 48d35e1a092dac5b
+y = 32d5df5f91e6f24e
+-x = b72ca1e5f6d253a5
+x + y = 7ba93d799b149ea9
+x - y = 15fd7eba7746ba0d
+x * y = a8bfa1095d9a89ba
+x /u y = 1
+x %u y = 15fd7eba7746ba0d
+x /s y = 1
+x %s y = 15fd7eba7746ba0d
+x /u y2 = 16ebd54de
+x %u y2 = b47cbf9
+x /s y3 = 16ebd54de
+x %s y3 = b47cbf9
+~x = b72ca1e5f6d253a4
+x & y = d15e1a0124a04a
+x | y = 7ad7df5f99effe5f
+x ^ y = 7a06814598cb5e15
+x << i = d786824b6b16c000
+x >>u i = 1234d786824b6
+x >>s i = 1234d786824b6
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d234d786824b6b
+dtou f = 1dd452c7e644a
+stod x = 43d234d786824b6b
+dtos f = 1dd452c7e644a
+
+x = be9793bd5e9acb05
+y = 1b8eddf6093dac30
+-x = 41686c42a16534fb
+x + y = da2671b367d87735
+x - y = a308b5c7555d1ed5
+x * y = 1dea52e54a9e6cf0
+x /u y = 6
+x %u y = 193e5ff92728c1e5
+x /s y = fffffffffffffffe
+x %s y = f5b54fa971162365
+x /u y2 = 6ea800f5e
+x %u y2 = a23e0b1
+x /s y3 = fffffffda06520cf
+x %s y3 = ffffffffe848911b
+~x = 41686c42a16534fa
+x & y = 1a8691b408188800
+x | y = bf9fdfff5fbfef35
+x ^ y = a5194e4b57a76735
+x << i = cb05000000000000
+x >>u i = be97
+x >>s i = ffffffffffffbe97
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e7d2f277abd359
+dtou f = 4e1107ad00a84
+stod x = c3d05a1b10a8594d
+dtos f = fffe5357c008f9b9
+
+x = 6a5642c7a79a95bf
+y = 8a9c14bdfa0894e2
+-x = 95a9bd3858656a41
+x + y = f4f25785a1a32aa1
+x - y = dfba2e09ad9200dd
+x * y = d5e40e3b89029e9e
+x /u y = 0
+x %u y = 6a5642c7a79a95bf
+x /s y = 0
+x %s y = 6a5642c7a79a95bf
+x /u y2 = c46523b0
+x %u y2 = 22e57ccf
+x /s y3 = ffffffff181acd99
+x %s y3 = 84d7ca
+~x = 95a9bd3858656a40
+x & y = a140085a20894a2
+x | y = eade56ffff9a95ff
+x ^ y = e0ca567a5d92015d
+x << i = 9e6a56fc00000000
+x >>u i = 1a9590b1
+x >>s i = 1a9590b1
+x cmpu y = lt
+x cmps y = gt
+utod x = 43da9590b1e9e6a5
+dtou f = 2b8e3cf0b40fc
+stod x = 43da9590b1e9e6a5
+dtos f = 2b8e3cf0b40fc
+
+x = 457806d9ec4fcb09
+y = 71e74b55ef64a8e4
+-x = ba87f92613b034f7
+x + y = b75f522fdbb473ed
+x - y = d390bb83fceb2225
+x * y = c93cf7e08ad2bc04
+x /u y = 0
+x %u y = 457806d9ec4fcb09
+x /s y = 0
+x %s y = 457806d9ec4fcb09
+x /u y2 = 9c21e58a
+x %u y2 = 1d492637
+x /s y3 = 9c21e58a
+x %s y3 = 1d492637
+~x = ba87f92613b034f6
+x & y = 41600251ec448800
+x | y = 75ff4fddef6febed
+x ^ y = 349f4d8c032b63ed
+x << i = c4fcb09000000000
+x >>u i = 457806d
+x >>s i = 457806d
+x cmpu y = lt
+x cmps y = lt
+utod x = 43d15e01b67b13f3
+dtou f = 1c74565d5b77e
+stod x = 43d15e01b67b13f3
+dtos f = 1c74565d5b77e
+
+x = 9f2932af8964d63
+y = cfbf03aa8d638b6
+-x = f60d6cd50769b29d
+x + y = 16ee8365a16c8619
+x - y = fcf6a2f04fc014ad
+x * y = 1a5fcfc9448aac62
+x /u y = 0
+x %u y = 9f2932af8964d63
+x /s y = 0
+x %s y = 9f2932af8964d63
+x /u y2 = c4213692
+x %u y2 = cc1104f
+x /s y3 = c4213692
+x %s y3 = cc1104f
+~x = f60d6cd50769b29c
+x & y = 8f2902aa8960822
+x | y = dfbf33af8d67df7
+x ^ y = 5096310504075d5
+x << i = 58c0000000000000
+x >>u i = 27
+x >>s i = 27
+x cmpu y = lt
+x cmps y = lt
+utod x = 43a3e52655f12c9b
+dtou f = 41313bac4077
+stod x = 43a3e52655f12c9b
+dtos f = 41313bac4077
+
+x = f20b1d13f51fc34d
+y = 9ce1509c57b108d8
+-x = df4e2ec0ae03cb3
+x + y = 8eec6db04cd0cc25
+x - y = 5529cc779d6eba75
+x * y = 547381c1042430f8
+x /u y = 1
+x %u y = 5529cc779d6eba75
+x /s y = 0
+x %s y = f20b1d13f51fc34d
+x /u y2 = 18af89756
+x %u y2 = 2bc2aae5
+x /s y3 = 240bce76
+x %s y3 = ffffffffefb31365
+~x = df4e2ec0ae03cb2
+x & y = 9001101055110048
+x | y = feeb5d9ff7bfcbdd
+x ^ y = 6eea4d8fa2aecb95
+x << i = 13f51fc34d000000
+x >>u i = f20b1d13f5
+x >>s i = fffffff20b1d13f5
+x cmpu y = gt
+x cmps y = gt
+utod x = 43ee4163a27ea3f8
+dtou f = 632417610ecbe
+stod x = c3abe9c5d815c079
+dtos f = ffffa488bb49dbf3
+
+x = f68a3d1d7aa13747
+y = 6db1793cc07d71ca
+-x = 975c2e2855ec8b9
+x + y = 643bb65a3b1ea911
+x - y = 88d8c3e0ba23c57d
+x * y = e56c8a352a46f506
+x /u y = 2
+x %u y = 1b274aa3f9a653b3
+x /s y = 0
+x %s y = f68a3d1d7aa13747
+x /u y2 = 23f5eca36
+x %u y2 = 5804c9f
+x /s y3 = ffffffffe9ec23f1
+x %s y3 = ffffffff93aae1cb
+~x = 975c2e2855ec8b8
+x & y = 6480391c40213142
+x | y = ffbb7d3dfafd77cf
+x ^ y = 9b3b4421badc468d
+x << i = 28f475ea84dd1c00
+x >>u i = 3da28f475ea84d
+x >>s i = fffda28f475ea84d
+x cmpu y = gt
+x cmps y = lt
+utod x = 43eed147a3af5427
+dtou f = 64fb979962f8b
+stod x = c3a2eb85c50abd91
+dtos f = ffffc200becf1ec1
+
+x = d849bdf83b79b7d1
+y = 6a1649c9d6a2800c
+-x = 27b64207c486482f
+x + y = 426007c2121c37dd
+x - y = 6e33742e64d737c5
+x * y = 79305427addf1dcc
+x /u y = 2
+x %u y = 41d2a648e34b7b9
+x /s y = 0
+x %s y = d849bdf83b79b7d1
+x /u y2 = 209ed682d
+x %u y2 = 2780177c
+x /s y3 = ffffffffa02bad7e
+x %s y3 = ffffffffefe191e3
+~x = 27b64207c486482e
+x & y = 480009c812208000
+x | y = fa5ffdf9fffbb7dd
+x ^ y = b25ff431eddb37dd
+x << i = 9bdf83b79b7d1000
+x >>u i = d849bdf83b79b
+x >>s i = fffd849bdf83b79b
+x cmpu y = gt
+x cmps y = lt
+utod x = 43eb0937bf076f37
+dtou f = 5897724416b90
+stod x = c3c3db2103e24324
+dtos f = fffefbbe697a5ac6
+
+x = e725b2286679f76b
+y = 6d152baf41dd141e
+-x = 18da4dd799860895
+x + y = 543addd7a8570b89
+x - y = 7a108679249ce34d
+x * y = 7bf192ff4bfe5a8a
+x /u y = 2
+x %u y = cfb5ac9e2bfcf2f
+x /s y = 0
+x %s y = e725b2286679f76b
+x /u y2 = 21e776484
+x %u y2 = 1226152f
+x /s y3 = ffffffffc5acab53
+x %s y3 = ffffffffdfdae8ae
+~x = 18da4dd799860894
+x & y = 650522284059140a
+x | y = ef35bbaf67fdf77f
+x ^ y = 8a30998727a4e375
+x << i = 199e7ddac0000000
+x >>u i = 39c96c8a1
+x >>s i = ffffffff9c96c8a1
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ece4b6450ccf3f
+dtou f = 5ead8bbcfd53f
+stod x = c3b8da4dd7998609
+dtos f = ffff5d200108c475
+
+x = 2e8033e5d5b4ec95
+y = 5ff552a5384c0280
+-x = d17fcc1a2a4b136b
+x + y = 8e75868b0e00ef15
+x - y = ce8ae1409d68ea15
+x * y = 5679fa16188b7480
+x /u y = 0
+x %u y = 2e8033e5d5b4ec95
+x /s y = 0
+x %s y = 2e8033e5d5b4ec95
+x /u y2 = 7c0e5692
+x %u y2 = 41025c7b
+x /s y3 = 7c0e5692
+x %s y3 = 41025c7b
+~x = d17fcc1a2a4b136a
+x & y = e8012a510040080
+x | y = 7ff573e5fdfcee95
+x ^ y = 71756140edf8ee15
+x << i = 2e8033e5d5b4ec95
+x >>u i = 2e8033e5d5b4ec95
+x >>s i = 2e8033e5d5b4ec95
+x cmpu y = lt
+x cmps y = lt
+utod x = 43c74019f2eada76
+dtou f = 130bf620b348e
+stod x = 43c74019f2eada76
+dtos f = 130bf620b348e
+
+x = ef43bef3068171cf
+y = b7c320f5051933b2
+-x = 10bc410cf97e8e31
+x + y = a706dfe80b9aa581
+x - y = 37809dfe01683e1d
+x * y = 52ca243e77e45eee
+x /u y = 1
+x %u y = 37809dfe01683e1d
+x /s y = 0
+x %s y = ef43bef3068171cf
+x /u y2 = 14d521718
+x %u y2 = 4c657d7
+x /s y3 = 3b4ec343
+x %s y3 = ffffffffcaaf32b0
+~x = 10bc410cf97e8e30
+x & y = a74320f104013182
+x | y = ffc3bef7079973ff
+x ^ y = 58809e060398427d
+x << i = c73c000000000000
+x >>u i = 3bd0
+x >>s i = fffffffffffffbd0
+x cmpu y = gt
+x cmps y = gt
+utod x = 43ede877de60d02e
+dtou f = 6200b71208662
+stod x = c3b0bc410cf97e8e
+dtos f = ffff9252b6597598
+
+x = 90d105664c14e599
+y = 457cda70c307c434
+-x = 6f2efa99b3eb1a67
+x + y = d64ddfd70f1ca9cd
+x - y = 4b542af5890d2165
+x * y = c1c276d4636c714
+x /u y = 2
+x %u y = 5d75084c6055d31
+x /s y = ffffffffffffffff
+x %s y = d64ddfd70f1ca9cd
+x /u y2 = 21584e541
+x %u y2 = 263b3f29
+x /s y3 = fffffffe66636700
+x %s y3 = fffffffff6e1d599
+~x = 6f2efa99b3eb1a66
+x & y = 5000604004c410
+x | y = d5fddf76cf17e5bd
+x ^ y = d5addf168f1321ad
+x << i = 5990000000000000
+x >>u i = 90d
+x >>s i = fffffffffffff90d
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e21a20acc9829d
+dtou f = 3b511c0437bd3
+stod x = c3dbcbbea66cfac7
+dtos f = fffd2759057c6b08
+
+x = 7cb0b0fabc5eca73
+y = 24bef482136f2486
+-x = 834f4f0543a1358d
+x + y = a16fa57ccfcdeef9
+x - y = 57f1bc78a8efa5ed
+x * y = e48506423ef32432
+x /u y = 3
+x %u y = e73d37482115ce1
+x /s y = 3
+x %s y = e73d37482115ce1
+x /u y2 = 364afcce3
+x %u y2 = f56632d
+x /s y3 = 364afcce3
+x %s y3 = f56632d
+~x = 834f4f0543a1358c
+x & y = 24b0b082104e0002
+x | y = 7cbef4fabf7feef7
+x ^ y = 580e4478af31eef5
+x << i = 2c2c3eaf17b29cc0
+x >>u i = 1f2c2c3eaf17b29
+x >>s i = 1f2c2c3eaf17b29
+x cmpu y = gt
+x cmps y = gt
+utod x = 43df2c2c3eaf17b3
+dtou f = 3312b71530e4e
+stod x = 43df2c2c3eaf17b3
+dtos f = 3312b71530e4e
+
+x = 2a1c70168f0d66dd
+y = f58c569d3d6b3928
+-x = d5e38fe970f29923
+x + y = 1fa8c6b3cc78a005
+x - y = 3490197951a22db5
+x * y = 208cd905fd5e4788
+x /u y = 0
+x %u y = 2a1c70168f0d66dd
+x /s y = fffffffffffffffc
+x %s y = 4dca8b84ba4b7d
+x /u y2 = 2be74f23
+x %u y2 = 27781c66
+x /s y3 = fffffffbf88ea18f
+x %s y3 = 1a482a
+~x = d5e38fe970f29922
+x & y = 200c50140d092008
+x | y = ff9c769fbf6f7ffd
+x ^ y = df90268bb2665ff5
+x << i = d66dd0000000000
+x >>u i = 2a1c70
+x >>s i = 2a1c70
+x cmpu y = lt
+x cmps y = gt
+utod x = 43c50e380b4786b3
+dtou f = 113faad6dbfa0
+stod x = 43c50e380b4786b3
+dtos f = 113faad6dbfa0
+
+x = c864770318e36557
+y = 6a00ab1df6497a9a
+-x = 379b88fce71c9aa9
+x + y = 326522210f2cdff1
+x - y = 5e63cbe52299eabd
+x * y = 136745cdd6e56c56
+x /u y = 1
+x %u y = 5e63cbe52299eabd
+x /s y = 0
+x %s y = c864770318e36557
+x /u y2 = 1e3f46921
+x %u y2 = 59fa719a
+x /s y3 = ffffffff79b4afc3
+x %s y3 = ffffffffe0043b40
+~x = 379b88fce71c9aa8
+x & y = 4800230110416012
+x | y = ea64ff1ffeeb7fdf
+x ^ y = a264dc1eeeaa1fcd
+x << i = c638d955c000000
+x >>u i = 32191dc0c6
+x >>s i = fffffff2191dc0c6
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e90c8ee0631c6d
+dtou f = 5214aba09a5f9
+stod x = c3cbcdc47e738e4d
+dtos f = fffe9391ff42952f
+
+x = 9714eb2dc9c67461
+y = 466bcdc06b7b155c
+-x = 68eb14d236398b9f
+x + y = dd80b8ee354189bd
+x - y = 50a91d6d5e4b5f05
+x * y = 8fb920c04078c7dc
+x /u y = 2
+x %u y = a3d4facf2d049a9
+x /s y = ffffffffffffffff
+x %s y = dd80b8ee354189bd
+x /u y2 = 225392f08
+x %u y2 = 3be1c661
+x /s y3 = fffffffe8297e39a
+x %s y3 = fffffffff93b6ee1
+~x = 68eb14d236398b9e
+x & y = 600c90049421440
+x | y = d77fefedebff757d
+x ^ y = d17f26eda2bd613d
+x << i = dc9c674610000000
+x >>u i = 9714eb2dc
+x >>s i = fffffff9714eb2dc
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e2e29d65b938cf
+dtou f = 3de210ddea9bd
+stod x = c3da3ac5348d8e63
+dtos f = fffd5068531798f3
+
+x = 680ca5caaa2ee67b
+y = 20cfe3e3cf9409ee
+-x = 97f35a3555d11985
+x + y = 88dc89ae79c2f069
+x - y = 473cc1e6da9adc8d
+x * y = 21c701f191d0995a
+x /u y = 3
+x %u y = 59cfa1f3b72c8b1
+x /s y = 3
+x %s y = 59cfa1f3b72c8b1
+x /u y2 = 32bcb4e0b
+x %u y2 = 13cff1ba
+x /s y3 = 32bcb4e0b
+x %s y3 = 13cff1ba
+~x = 97f35a3555d11984
+x & y = 200ca1c28a04006a
+x | y = 68cfe7ebefbeefff
+x ^ y = 48c3462965baef95
+x << i = b99ec00000000000
+x >>u i = 1a032
+x >>s i = 1a032
+x cmpu y = gt
+x cmps y = gt
+utod x = 43da032972aa8bba
+dtou f = 2a9e5ef11df9d
+stod x = 43da032972aa8bba
+dtos f = 2a9e5ef11df9d
+
+x = caa0e97754e05225
+y = 45366c07f71f4cd0
+-x = 355f1688ab1faddb
+x + y = fd7557f4bff9ef5
+x - y = 856a7d6f5dc10555
+x * y = 11fcaf963420ba10
+x /u y = 2
+x %u y = 4034116766a1b885
+x /s y = 0
+x %s y = caa0e97754e05225
+x /u y2 = 2ed78e4eb
+x %u y2 = 356cebb8
+x /s y3 = ffffffff3a97a9d7
+x %s y3 = fffffffff8b9f944
+~x = 355f1688ab1fadda
+x & y = 4020680754004000
+x | y = cfb6ed77f7ff5ef5
+x ^ y = 8f968570a3ff1ef5
+x << i = e97754e052250000
+x >>u i = caa0e97754e0
+x >>s i = ffffcaa0e97754e0
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e9541d2eea9c0a
+dtou f = 52ff250e0fec7
+stod x = c3caaf8b44558fd7
+dtos f = fffea2399619edfd
+
+x = 8fb54d13641331df
+y = 3896e8ba4f4fe682
+-x = 704ab2ec9becce21
+x + y = c84c35cdb3631861
+x - y = 571e645914c34b5d
+x * y = bdfcb218455ead3e
+x /u y = 2
+x %u y = 1e877b9ec57364db
+x /s y = ffffffffffffffff
+x %s y = c84c35cdb3631861
+x /u y2 = 28a1bbf24
+x %u y2 = 799b1b7
+x /s y3 = fffffffe040371a9
+x %s y3 = ffffffffc98b7515
+~x = 704ab2ec9becce20
+x & y = 894481244032082
+x | y = bfb7edbb6f5ff7df
+x ^ y = b723a5a92b5cd75d
+x << i = 3ed5344d904cc77c
+x >>u i = 23ed5344d904cc77
+x >>s i = e3ed5344d904cc77
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e1f6a9a26c8266
+dtou f = 3adce5d10e5de
+stod x = c3dc12acbb26fb34
+dtos f = fffd2015a249d513
+
+x = 5fde20614d778429
+y = 4cb55308f7d71384
+-x = a021df9eb2887bd7
+x + y = ac93736a454e97ad
+x - y = 1328cd5855a070a5
+x * y = c6f8b7175dde3024
+x /u y = 1
+x %u y = 1328cd5855a070a5
+x /s y = 1
+x %s y = 1328cd5855a070a5
+x /u y2 = 13ff0e2ae
+x %u y2 = 476c04b9
+x /s y3 = 13ff0e2ae
+x %s y3 = 476c04b9
+~x = a021df9eb2887bd6
+x & y = 4c94000045570000
+x | y = 5fff7369fff797ad
+x ^ y = 136b7369baa097ad
+x << i = fde20614d7784290
+x >>u i = 5fde20614d77842
+x >>s i = 5fde20614d77842
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d7f78818535de1
+dtou f = 2744747b69de3
+stod x = 43d7f78818535de1
+dtos f = 2744747b69de3
+
+x = 9e0edf33c6b86b83
+y = 46d59276da676456
+-x = 61f120cc3947947d
+x + y = e4e471aaa11fcfd9
+x - y = 57394cbcec51072d
+x * y = d98e79279ba84a02
+x /u y = 2
+x %u y = 1063ba4611e9a2d7
+x /s y = ffffffffffffffff
+x %s y = e4e471aaa11fcfd9
+x /u y2 = 23b3ba758
+x %u y2 = 279118f3
+x /s y3 = fffffffe9e07f55d
+x %s y3 = ffffffffd0bd48a5
+~x = 61f120cc3947947c
+x & y = 6049232c2206002
+x | y = dedfdf77deff6fd7
+x ^ y = d8db4d451cdf0fd5
+x << i = ccf1ae1ae0c00000
+x >>u i = 2783b7ccf1a
+x >>s i = fffffe783b7ccf1a
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e3c1dbe678d70d
+dtou f = 40bd97a285016
+stod x = c3d87c48330e51e5
+dtos f = fffd7e20bf613f4c
+
+x = 42419bd19468ce6d
+y = f28e83678dacdd78
+-x = bdbe642e6b973193
+x + y = 34d01f392215abe5
+x - y = 4fb3186a06bbf0f5
+x * y = b4153c53c590dc18
+x /u y = 0
+x %u y = 42419bd19468ce6d
+x /s y = fffffffffffffffc
+x %s y = c7ba96fcb1c444d
+x /u y2 = 45edad94
+x %u y2 = a1dd3be1
+x /s y3 = fffffffb12493a09
+x %s y3 = d41d9ce
+~x = bdbe642e6b973192
+x & y = 420083418428cc68
+x | y = f2cf9bf79decdf7d
+x ^ y = b0cf18b619c41315
+x << i = 6d00000000000000
+x >>u i = 42
+x >>s i = 42
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d09066f4651a34
+dtou f = 1b237993b1864
+stod x = 43d09066f4651a34
+dtos f = 1b237993b1864
+
+x = a10909702780f767
+y = 355cb86c76c2176a
+-x = 5ef6f68fd87f0899
+x + y = d665c1dc9e430ed1
+x - y = 6bac5103b0bedffd
+x * y = ef08c535e7aeb1a6
+x /u y = 3
+x %u y = f2e02ac33ab129
+x /s y = ffffffffffffffff
+x %s y = d665c1dc9e430ed1
+x /u y2 = 3048d2c3e
+x %u y2 = 7ddbd3f
+x /s y3 = fffffffe386a7a90
+x %s y3 = fffffffff6bdc2a7
+~x = 5ef6f68fd87f0898
+x & y = 2108086026801762
+x | y = b55db97c77c2f76f
+x ^ y = 9455b11c5142e00d
+x << i = 3dd9c0000000000
+x >>u i = 284242
+x >>s i = ffffffffffe84242
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e421212e04f01f
+dtou f = 41f5c66d044d6
+stod x = c3d7bdbda3f61fc2
+dtos f = fffd91a3ac09340c
+
+x = b62b990739d534f1
+y = ea2aeea1436a5eac
+-x = 49d466f8c62acb0f
+x + y = a05687a87d3f939d
+x - y = cc00aa65f66ad645
+x * y = 387490cb237a0fec
+x /u y = 0
+x %u y = b62b990739d534f1
+x /s y = 3
+x %s y = f7aacd236f9618ed
+x /u y2 = c7279a22
+x %u y2 = cb0da98f
+x /s y3 = 361b5597a
+x %s y3 = fffffffffb948337
+~x = 49d466f8c62acb0e
+x & y = a22a8801014014a0
+x | y = fe2bffa77bff7efd
+x ^ y = 5c0177a67abf6a5d
+x << i = 534f100000000000
+x >>u i = b62b9
+x >>s i = fffffffffffb62b9
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e6c57320e73aa7
+dtou f = 4a9df0db3a812
+stod x = c3d27519be318ab3
+dtos f = fffe1c2652ec9747
+
+x = 9e2dd389c02d798b
+y = ff1135085098d3be
+-x = 61d22c763fd28675
+x + y = 9d3f089210c64d49
+x - y = 9f1c9e816f94a5cd
+x * y = c4066df5b775c62a
+x /u y = 0
+x %u y = 9e2dd389c02d798b
+x /s y = 68
+x %s y = ff3048290217745b
+x /u y2 = 9ec1e9af
+x %u y2 = cf1df113
+x /s y3 = 68deaf7ef9
+x %s y3 = ffffffffffdef4c3
+~x = 61d22c763fd28674
+x & y = 9e0111084008518a
+x | y = ff3df789d0bdfbbf
+x ^ y = 613ce68190b5aa35
+x << i = c000000000000000
+x >>u i = 2
+x >>s i = fffffffffffffffe
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e3c5ba713805af
+dtou f = 40ca45715b69e
+stod x = c3d8748b1d8ff4a2
+dtos f = fffd7eeb9c4ea5d3
+
+x = eceb862698e5fbb5
+y = b6f3d882c0c8b20
+-x = 131479d9671a044b
+x + y = f85ac3aec4f286d5
+x - y = e17c489e6cd97095
+x * y = 5d01450ae4e6bda0
+x /u y = 14
+x %u y = 83ab78327eb1d35
+x /s y = ffffffffffffffff
+x %s y = f85ac3aec4f286d5
+x /u y2 = 14b83ddb43
+x %u y2 = 7bd891d
+x /s y3 = fffffffe54d4c750
+x %s y3 = fffffffff8ae0935
+~x = 131479d9671a044a
+x & y = 86b040008048b20
+x | y = efefbfaebcedfbb5
+x ^ y = e784bbaeb4e97095
+x << i = 98e5fbb500000000
+x >>u i = eceb8626
+x >>s i = ffffffffeceb8626
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ed9d70c4d31cbf
+dtou f = 610add3ee9d70
+stod x = c3b31479d9671a04
+dtos f = ffff82f519278ca6
+
+x = 3b5e2b836840d5ef
+y = 8984a692ed09ad52
+-x = c4a1d47c97bf2a11
+x + y = c4e2d216554a8341
+x - y = b1d984f07b37289d
+x * y = 8ca36a2efdbe098e
+x /u y = 0
+x %u y = 3b5e2b836840d5ef
+x /s y = 0
+x %s y = 3b5e2b836840d5ef
+x /u y2 = 6e847dbc
+x %u y2 = 63938b7
+x /s y3 = ffffffff7fb9c9d5
+x %s y3 = 8979c75
+~x = c4a1d47c97bf2a10
+x & y = 904228268008542
+x | y = bbdeaf93ed49fdff
+x ^ y = b2da8d11854978bd
+x << i = ae0da10357bc0000
+x >>u i = ed78ae0da10
+x >>s i = ed78ae0da10
+x cmpu y = lt
+x cmps y = gt
+utod x = 43cdaf15c1b4206b
+dtou f = 18512ba0bae1a
+stod x = 43cdaf15c1b4206b
+dtos f = 18512ba0bae1a
+
+x = 8bb41c6aa7d0a6b9
+y = e7cba3f3ef7796d4
+-x = 744be395582f5947
+x + y = 737fc05e97483d8d
+x - y = a3e87876b8590fe5
+x * y = 4af6e0e671797734
+x /u y = 0
+x %u y = 8bb41c6aa7d0a6b9
+x /s y = 4
+x %s y = ec858c9ae9f24b69
+x /u y2 = 9a4aab1a
+x %u y2 = 8460af0b
+x /s y3 = 4ce035bf5
+x %s y3 = fffffffff8cc5e2a
+~x = 744be395582f5946
+x & y = 83800062a7508690
+x | y = efffbffbeff7b6fd
+x ^ y = 6c7fbf9948a7306d
+x << i = c6aa7d0a6b900000
+x >>u i = 8bb41c6aa7d
+x >>s i = fffff8bb41c6aa7d
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e176838d54fa15
+dtou f = 3938fad8898cc
+stod x = c3dd12f8e5560bd6
+dtos f = fffd05d6f2c18801
+
+x = 5e14c5d21ca43093
+y = dd468d765debf826
+-x = a1eb3a2de35bcf6d
+x + y = 3b5b53487a9028b9
+x - y = 80ce385bbeb8386d
+x * y = d4a2340f4d5e9dd2
+x /u y = 0
+x %u y = 5e14c5d21ca43093
+x /s y = fffffffffffffffe
+x %s y = 18a1e0bed87c20df
+x /u y2 = 6cd85af3
+x %u y2 = 21606d91
+x /s y3 = fffffffd4a66f1cd
+x %s y3 = 6f4d315
+~x = a1eb3a2de35bcf6c
+x & y = 5c0485521ca03002
+x | y = df56cdf65deff8b7
+x ^ y = 835248a4414fc8b5
+x << i = 290c24c000000000
+x >>u i = 1785317
+x >>s i = 1785317
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d785317487290c
+dtou f = 26891f773d4b0
+stod x = 43d785317487290c
+dtos f = 26891f773d4b0
+
+x = 7265a096401af9fd
+y = b918a7193a6af5c8
+-x = 8d9a5f69bfe50603
+x + y = 2b7e47af7a85efc5
+x - y = b94cf97d05b00435
+x * y = 4b405d3fbb146ea8
+x /u y = 0
+x %u y = 7265a096401af9fd
+x /s y = ffffffffffffffff
+x %s y = 2b7e47af7a85efc5
+x /u y2 = 9e37e46d
+x %u y2 = 276a9058
+x /s y3 = fffffffe62f7568d
+x %s y3 = 3a458b38
+~x = 8d9a5f69bfe50602
+x & y = 3000a010000af1c8
+x | y = fb7da79f7a7afdfd
+x ^ y = cb7d078f7a700c35
+x << i = 65a096401af9fd00
+x >>u i = 7265a096401af9
+x >>s i = 7265a096401af9
+x cmpu y = lt
+x cmps y = gt
+utod x = 43dc9968259006be
+dtou f = 2edb6497a791c
+stod x = 43dc9968259006be
+dtos f = 2edb6497a791c
+
+x = 529ef449b98aed77
+y = 998d65c70ae4483a
+-x = ad610bb646751289
+x + y = ec2c5a10c46f35b1
+x - y = b9118e82aea6a53d
+x * y = d3da4c66403f44f6
+x /u y = 0
+x %u y = 529ef449b98aed77
+x /s y = 0
+x %s y = 529ef449b98aed77
+x /u y2 = 89be8881
+x %u y2 = 3286ec30
+x /s y3 = ffffffff318b3773
+x %s y3 = 41b7412
+~x = ad610bb646751288
+x & y = 108c644108804832
+x | y = db9ff5cfbbeeed7f
+x ^ y = cb13918eb36ea54d
+x << i = dc00000000000000
+x >>u i = 14
+x >>s i = 14
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4a7bd126e62bb
+dtou f = 21d76e4ce45e9
+stod x = 43d4a7bd126e62bb
+dtos f = 21d76e4ce45e9
+
+x = d4c53464631ef981
+y = d79a311745b55bfc
+-x = 2b3acb9b9ce1067f
+x + y = ac5f657ba8d4557d
+x - y = fd2b034d1d699d85
+x * y = e994cbcac26375fc
+x /u y = 0
+x %u y = d4c53464631ef981
+x /s y = 1
+x %s y = fd2b034d1d699d85
+x /u y2 = fca32bce
+x %u y2 = 19279bff
+x /s y3 = 111f23d93
+x %s y3 = ffffffffe9244e4c
+~x = 2b3acb9b9ce1067e
+x & y = d480300441145980
+x | y = d7df357767bffbfd
+x ^ y = 35f057326aba27d
+x << i = 1000000000000000
+x >>u i = d
+x >>s i = fffffffffffffffd
+x cmpu y = lt
+x cmps y = lt
+utod x = 43ea98a68c8c63df
+dtou f = 57269613000cb
+stod x = c3c59d65cdce7083
+dtos f = fffee4b0a668f000
+
+x = cb016d548a96b09b
+y = 59b7d17de3b8718e
+-x = 34fe92ab75694f65
+x + y = 24b93ed26e4f2229
+x - y = 71499bd6a6de3f0d
+x * y = 9b43f7a8c3f260fa
+x /u y = 2
+x %u y = 1791ca58c325cd7f
+x /s y = 0
+x %s y = cb016d548a96b09b
+x /u y2 = 24340b83f
+x %u y2 = 41884ad8
+x /s y3 = ffffffff68c95c6b
+x %s y3 = ffffffffb854355c
+~x = 34fe92ab75694f64
+x & y = 490141548290308a
+x | y = dbb7fd7debbef19f
+x ^ y = 92b6bc29692ec115
+x << i = 5b5522a5ac26c000
+x >>u i = 32c05b5522a5a
+x >>s i = ffff2c05b5522a5a
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e9602daa9152d6
+dtou f = 5326ad656497d
+stod x = c3ca7f4955bab4a8
+dtos f = fffea4b21b8f38b3
+
+x = e9b85c9ebecee945
+y = eff4c4de68a8bd70
+-x = 1647a361413116bb
+x + y = d9ad217d2777a6b5
+x - y = f9c397c056262bd5
+x * y = 45861cb55a05ff30
+x /u y = 0
+x %u y = e9b85c9ebecee945
+x /s y = 1
+x %s y = f9c397c056262bd5
+x /u y2 = f958dab3
+x %u y2 = ecb4360b
+x /s y3 = 16380abc0
+x %s y3 = fffffffffebaf8c5
+~x = 1647a361413116ba
+x & y = e9b0449e2888a940
+x | y = effcdcdefeeefd75
+x ^ y = 64c9840d6665435
+x << i = e945000000000000
+x >>u i = e9b8
+x >>s i = ffffffffffffe9b8
+x cmpu y = lt
+x cmps y = lt
+utod x = 43ed370b93d7d9dd
+dtou f = 5fbb55dc42afb
+stod x = c3b647a361413117
+dtos f = ffff6dfca2fd1a30
+
+x = f6e37ba6953b5dff
+y = f751e9180fe38822
+-x = 91c84596ac4a201
+x + y = ee3564bea51ee621
+x - y = ff91928e8557d5dd
+x * y = d828a075a5eef3de
+x /u y = 0
+x %u y = f6e37ba6953b5dff
+x /s y = 1
+x %s y = ff91928e8557d5dd
+x /u y2 = ff8db266
+x %u y2 = cc4dce6f
+x /s y3 = 10cb8d59a
+x %s y3 = fffffffffdc42d8f
+~x = 91c84596ac4a200
+x & y = f641690005230822
+x | y = f7f3fbbe9ffbddff
+x ^ y = 1b292be9ad8d5dd
+x << i = 54ed77fc00000000
+x >>u i = 3db8dee9
+x >>s i = fffffffffdb8dee9
+x cmpu y = lt
+x cmps y = lt
+utod x = 43eedc6f74d2a76c
+dtou f = 6520258a4622f
+stod x = c3a23908b2d58944
+dtos f = ffffc4499ddd5165
+
+x = 6b48866df6b94d49
+y = 560a70d9eace4e24
+-x = 94b779920946b2b7
+x + y = c152f747e1879b6d
+x - y = 153e15940beaff25
+x * y = 9ee6c54512591c44
+x /u y = 1
+x %u y = 153e15940beaff25
+x /s y = 1
+x %s y = 153e15940beaff25
+x /u y2 = 13f341cd4
+x %u y2 = 3ea81d95
+x /s y3 = 13f341cd4
+x %s y3 = 3ea81d95
+~x = 94b779920946b2b6
+x & y = 42080049e2884c00
+x | y = 7f4af6fdfeff4f6d
+x ^ y = 3d42f6b41c77036d
+x << i = 6b94d49000000000
+x >>u i = 6b48866
+x >>s i = 6b48866
+x cmpu y = gt
+x cmps y = gt
+utod x = 43dad2219b7dae53
+dtou f = 2bf178305165b
+stod x = 43dad2219b7dae53
+dtos f = 2bf178305165b
+
+x = c48462e3c16319a3
+y = fc0b2696f69dff6
+-x = 3b7b9d1c3e9ce65d
+x + y = d445154d30ccf999
+x - y = b4c3b07a51f939ad
+x * y = 7a588fef596a9fa2
+x /u y = c
+x %u y = 77c05f2886c9a1b
+x /s y = fffffffffffffffd
+x %s y = f3c67a200fa0b985
+x /u y2 = c79a1999e
+x %u y2 = 8cb3bd5
+x /s y3 = fffffffc39559faa
+x %s y3 = fffffffffec068e9
+~x = 3b7b9d1c3e9ce65c
+x & y = 4802261416119a2
+x | y = cfc4f2ebef6bdff7
+x ^ y = cb44d08aae0ac655
+x << i = 68c0000000000000
+x >>u i = 312
+x >>s i = ffffffffffffff12
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e8908c5c782c63
+dtou f = 507e50a560d2f
+stod x = c3cdbdce8e1f4e73
+dtos f = fffe7a2c4f8efc65
+
+x = 81c143a7754ce98d
+y = 2595000288da8218
+-x = 7e3ebc588ab31673
+x + y = a75643a9fe276ba5
+x - y = 5c2c43a4ec726775
+x * y = 1c0ef3efd7e17f38
+x /u y = 3
+x %u y = 1102439fdabd6345
+x /s y = fffffffffffffffd
+x %s y = f28043af0fdc6fd5
+x /u y2 = 373dc3a72
+x %u y2 = f3a74a9
+x /s y3 = fffffffca40c567c
+x %s y3 = ffffffffeb083c95
+~x = 7e3ebc588ab31672
+x & y = 181000200488008
+x | y = a5d543a7fddeeb9d
+x ^ y = a45443a5fd966b95
+x << i = a7754ce98d000000
+x >>u i = 81c143a775
+x >>s i = ffffff81c143a775
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e0382874eea99d
+dtou f = 3525ca9c3ecab
+stod x = c3df8faf1622acc6
+dtos f = fffcc4a3eefcdbe1
+
+x = 185201da44524787
+y = 6207869c80ed0d0a
+-x = e7adfe25bbadb879
+x + y = 7a598876c53f5491
+x - y = b64a7b3dc3653a7d
+x * y = 4ccbcc3290d3a646
+x /u y = 0
+x %u y = 185201da44524787
+x /s y = 0
+x %s y = 185201da44524787
+x /u y2 = 3f82fa73
+x %u y2 = 22447773
+x /s y3 = 3f82fa73
+x %s y3 = 22447773
+~x = e7adfe25bbadb878
+x & y = 2009800400502
+x | y = 7a5787dec4ff4f8f
+x ^ y = 7a558746c4bf4a8d
+x << i = 48076911491e1c00
+x >>u i = 6148076911491
+x >>s i = 6148076911491
+x cmpu y = lt
+x cmps y = lt
+utod x = 43b85201da445248
+dtou f = 9f62c2d2a520
+stod x = 43b85201da445248
+dtos f = 9f62c2d2a520
+
+x = 4c00590c95cc211
+y = 733d576e60e10d4c
+-x = fb3ffa6f36a33def
+x + y = 77fd5cff2a3dcf5d
+x - y = 9182ae22687bb4c5
+x * y = 45e9eed06e557a0c
+x /u y = 0
+x %u y = 4c00590c95cc211
+x /s y = 0
+x %s y = 4c00590c95cc211
+x /u y2 = a8d5772
+x %u y2 = 2bbf7115
+x /s y3 = a8d5772
+x %s y3 = 2bbf7115
+~x = fb3ffa6f36a33dee
+x & y = 50040400000
+x | y = 77fd57fee9fdcf5d
+x ^ y = 77fd52fea9bdcf5d
+x << i = 590c95cc211000
+x >>u i = 4c00590c95cc
+x >>s i = 4c00590c95cc
+x cmpu y = lt
+x cmps y = lt
+utod x = 4393001643257308
+dtou f = 1f2151f0acc4
+stod x = 4393001643257308
+dtos f = 1f2151f0acc4
+
+x = 49769540f1cb8bab
+y = 737fa1db80ffe35e
+-x = b6896abf0e347455
+x + y = bcf6371c72cb6f09
+x - y = d5f6f36570cba84d
+x * y = 3b460a1ee4eae9ca
+x /u y = 0
+x %u y = 49769540f1cb8bab
+x /s y = 0
+x %s y = 49769540f1cb8bab
+x /u y2 = a2d45f9b
+x %u y2 = 7184712
+x /s y3 = a2d45f9b
+x %s y3 = 7184712
+~x = b6896abf0e347454
+x & y = 4176814080cb830a
+x | y = 7b7fb5dbf1ffebff
+x ^ y = 3a09349b713468f5
+x << i = 3c72e2eac0000000
+x >>u i = 125da5503
+x >>s i = 125da5503
+x cmpu y = lt
+x cmps y = lt
+utod x = 43d25da5503c72e3
+dtou f = 1e172d28eff11
+stod x = 43d25da5503c72e3
+dtos f = 1e172d28eff11
+
+x = e43d7008d3e41ad5
+y = df82555639c8e3c0
+-x = 1bc28ff72c1be52b
+x + y = c3bfc55f0dacfe95
+x - y = 4bb1ab29a1b3715
+x * y = 3d32eb3e9246fec0
+x /u y = 1
+x %u y = 4bb1ab29a1b3715
+x /s y = 0
+x %s y = e43d7008d3e41ad5
+x /u y2 = 1056b28b9
+x %u y2 = 986cffaf
+x /s y3 = dab98c69
+x %s y3 = fffffffff99e128f
+~x = 1bc28ff72c1be52a
+x & y = c400500011c002c0
+x | y = ffbf755efbecfbd5
+x ^ y = 3bbf255eea2cf915
+x << i = e43d7008d3e41ad5
+x >>u i = e43d7008d3e41ad5
+x >>s i = e43d7008d3e41ad5
+x cmpu y = gt
+x cmps y = gt
+utod x = 43ec87ae011a7c83
+dtou f = 5d7cb29987887
+stod x = c3bbc28ff72c1be5
+dtos f = ffff4a126ed167bc
+
+x = fbbe909b5773ca0f
+y = f9520e33d6ba76f2
+-x = 4416f64a88c35f1
+x + y = f5109ecf2e2e4101
+x - y = 26c826780b9531d
+x * y = cdb0b52e637dec2e
+x /u y = 1
+x %u y = 26c826780b9531d
+x /s y = 0
+x %s y = fbbe909b5773ca0f
+x /u y2 = 1027d221e
+x %u y2 = e50c5a15
+x /s y3 = a31a1d6d
+x %s y3 = fffffffff9b9f758
+~x = 4416f64a88c35f0
+x & y = f912001356324202
+x | y = fbfe9ebbd7fbfeff
+x ^ y = 2ec9ea881c9bcfd
+x << i = 283c000000000000
+x >>u i = 3eef
+x >>s i = fffffffffffffeef
+x cmpu y = gt
+x cmps y = gt
+utod x = 43ef77d2136aee79
+dtou f = 671d501edc23f
+stod x = c39105bd92a230d8
+dtos f = ffffe41c4726b175
+
+x = 8f2098834a0a77d9
+y = a889851f5a003974
+-x = 70df677cb5f58827
+x + y = 37aa1da2a40ab14d
+x - y = e6971363f00a3e65
+x * y = 376b45632b6d9f54
+x /u y = 0
+x %u y = 8f2098834a0a77d9
+x /s y = 1
+x %s y = e6971363f00a3e65
+x /u y2 = d967540f
+x %u y2 = 73d47f08
+x /s y3 = 14a5fa443
+x %s y3 = ffffffffb843c4bc
+~x = 70df677cb5f58826
+x & y = 880080034a003150
+x | y = afa99d9f5a0a7ffd
+x ^ y = 27a91d9c100a4ead
+x << i = 7d90000000000000
+x >>u i = 8f2
+x >>s i = fffffffffffff8f2
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e1e4131069414f
+dtou f = 3a9ffceeea4ed
+stod x = c3dc37d9df2d7d62
+dtos f = fffd1c4714279423
+
+x = cdb48c3bc67626b3
+y = 7253c92e218e1bc6
+-x = 324b73c43989d94d
+x + y = 40085569e8044279
+x - y = 5b60c30da4e80aed
+x * y = 51d293da7fc0cf72
+x /u y = 1
+x %u y = 5b60c30da4e80aed
+x /s y = 0
+x %s y = cdb48c3bc67626b3
+x /u y2 = 1cc9cbb61
+x %u y2 = ba5245
+x /s y3 = ffffffff8f617990
+x %s y3 = fffffffff0d03ed3
+~x = 324b73c43989d94c
+x & y = 4010882a00060282
+x | y = fff7cd3fe7fe3ff7
+x ^ y = bfe74515e7f83d75
+x << i = 6d230ef19d89acc0
+x >>u i = 336d230ef19d89a
+x >>s i = ff36d230ef19d89a
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e9b6918778cec5
+dtou f = 5441c2a7d80c5
+stod x = c3c925b9e21cc4ed
+dtos f = fffeb6636fb66ffb
+
+x = b9770d034f679d1d
+y = b10aa58461708268
+-x = 4688f2fcb09862e3
+x + y = 6a81b287b0d81f85
+x - y = 86c677eedf71ab5
+x * y = a954ab7b99908dc8
+x /u y = 1
+x %u y = 86c677eedf71ab5
+x /s y = 0
+x %s y = b9770d034f679d1d
+x /u y2 = 10c2e2273
+x %u y2 = 6febbad1
+x /s y3 = e4b07a4a
+x %s y3 = ffffffffb4b2dcf5
+~x = 4688f2fcb09862e2
+x & y = b102050041608008
+x | y = b97fad876f779f7d
+x ^ y = 87da8872e171f75
+x << i = 679d1d0000000000
+x >>u i = b9770d
+x >>s i = ffffffffffb9770d
+x cmpu y = gt
+x cmps y = gt
+utod x = 43e72ee1a069ecf4
+dtou f = 4bf76b520c0b5
+stod x = c3d1a23cbf2c2619
+dtos f = fffe31bdfa59afea
+
+x = 7df56995af680597
+y = 83202d927c5965da
+-x = 820a966a5097fa69
+x + y = 11597282bc16b71
+x - y = fad53c03330e9fbd
+x * y = 95d310a87e485596
+x /u y = 0
+x %u y = 7df56995af680597
+x /s y = ffffffffffffffff
+x %s y = 11597282bc16b71
+x /u y2 = f5e9880c
+x %u y2 = 3dce52bf
+x /s y3 = fffffffefdc6ec2b
+x %s y3 = 7611c611
+~x = 820a966a5097fa68
+x & y = 12029902c480592
+x | y = fff56d97ff7965df
+x ^ y = fed54407d331604d
+x << i = 56bda0165c000000
+x >>u i = 1f7d5a656b
+x >>s i = 1f7d5a656b
+x cmpu y = lt
+x cmps y = gt
+utod x = 43df7d5a656bda01
+dtou f = 3397b88f3b261
+stod x = 43df7d5a656bda01
+dtos f = 3397b88f3b261
+
+x = 553ac3862c878ea1
+y = 596019b6dab2729c
+-x = aac53c79d378715f
+x + y = ae9add3d073a013d
+x - y = fbdaa9cf51d51c05
+x * y = aac46ac3c6109c1c
+x /u y = 0
+x %u y = 553ac3862c878ea1
+x /s y = 0
+x %s y = 553ac3862c878ea1
+x /u y2 = f41ffd27
+x %u y2 = 70c5e7
+x /s y3 = f41ffd27
+x %s y3 = 70c5e7
+~x = aac53c79d378715e
+x & y = 5120018608820280
+x | y = 5d7adbb6feb7febd
+x ^ y = c5ada30f635fc3d
+x << i = 62c878ea10000000
+x >>u i = 553ac3862
+x >>s i = 553ac3862
+x cmpu y = lt
+x cmps y = lt
+utod x = 43d54eb0e18b21e4
+dtou f = 22e8f73417278
+stod x = 43d54eb0e18b21e4
+dtos f = 22e8f73417278
+
+x = 20fcd070f46d0abb
+y = b8bcea21c9bc292e
+-x = df032f8f0b92f545
+x + y = d9b9ba92be2933e9
+x - y = 683fe64f2ab0e18d
+x * y = a4ddf90d16a3e09a
+x /u y = 0
+x %u y = 20fcd070f46d0abb
+x /s y = 0
+x %s y = 20fcd070f46d0abb
+x /u y2 = 2db65f96
+x %u y2 = 5a639c65
+x /s y3 = ffffffff897f0ff8
+x %s y3 = 1c534bc3
+~x = df032f8f0b92f544
+x & y = 20bcc020c02c082a
+x | y = b8fcfa71fdfd2bbf
+x ^ y = 98403a513dd12395
+x << i = 42aec00000000000
+x >>u i = 83f3
+x >>s i = 83f3
+x cmpu y = lt
+x cmps y = gt
+utod x = 43c07e68387a3685
+dtou f = d82fef3b1880
+stod x = 43c07e68387a3685
+dtos f = d82fef3b1880
+
+x = 37d0c25765ae9065
+y = 2c73a7867781fe10
+-x = c82f3da89a516f9b
+x + y = 644469dddd308e75
+x - y = b5d1ad0ee2c9255
+x * y = 7f1db64d44123c50
+x /u y = 1
+x %u y = b5d1ad0ee2c9255
+x /s y = 1
+x %s y = b5d1ad0ee2c9255
+x /u y2 = 14171ae1b
+x %u y2 = 2178d143
+x /s y3 = 14171ae1b
+x %s y3 = 2178d143
+~x = c82f3da89a516f9a
+x & y = 2450820665809000
+x | y = 3ff3e7d777affe75
+x ^ y = 1ba365d1122f6e75
+x << i = c25765ae90650000
+x >>u i = 37d0c25765ae
+x >>s i = 37d0c25765ae
+x cmpu y = gt
+x cmps y = gt
+utod x = 43cbe8612bb2d748
+dtou f = 16dcacfdb480d
+stod x = 43cbe8612bb2d748
+dtos f = 16dcacfdb480d
+
+x = 7c44b251159b1a1f
+y = 6922cff21dab79c2
+-x = 83bb4daeea64e5e1
+x + y = e5678243334693e1
+x - y = 1321e25ef7efa05d
+x * y = f5cf3786a497727e
+x /u y = 1
+x %u y = 1321e25ef7efa05d
+x /s y = 1
+x %s y = 1321e25ef7efa05d
+x /u y2 = 12e961017
+x %u y2 = 22af4b61
+x /s y3 = 12e961017
+x %s y3 = 22af4b61
+~x = 83bb4daeea64e5e0
+x & y = 68008250158b1802
+x | y = 7d66fff31dbb7bdf
+x ^ y = 15667da3083063dd
+x << i = f112c944566c687c
+x >>u i = 1f112c944566c687
+x >>s i = 1f112c944566c687
+x cmpu y = gt
+x cmps y = gt
+utod x = 43df112c944566c7
+dtou f = 32e67b027dfed
+stod x = 43df112c944566c7
+dtos f = 32e67b027dfed
+
diff --git a/test/regression/Results/volatile2 b/test/regression/Results/volatile2
index e6a9903..23a53f0 100644
--- a/test/regression/Results/volatile2
+++ b/test/regression/Results/volatile2
@@ -12,6 +12,8 @@ float 1: OK
 float 2: OK
 double 1: OK
 double 2: OK
+long long 1: OK
+long long 2: OK
 global signed char 1: OK
 global signed char 2: OK
 global unsigned char 1: OK
@@ -26,3 +28,5 @@ global float 1: OK
 global float 2: OK
 global double 1: OK
 global double 2: OK
+global long long 1: OK
+global long long 2: OK
diff --git a/test/regression/int64.c b/test/regression/int64.c
new file mode 100644
index 0000000..55a4f88
--- /dev/null
+++ b/test/regression/int64.c
@@ -0,0 +1,115 @@
+/* Semi-random testing of 64-bit integer operations */
+
+#include <stdio.h>
+
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+static u64 rnd64(void)
+{
+  static u64 seed = 0;
+  seed = seed * 6364136223846793005ULL + 1442695040888963407ULL;
+  return seed;
+}
+
+static inline u64 safe_udiv64(u64 x, u64 y)
+{
+  if (y == 0) return 0; else return x / y;
+}
+
+static inline u64 safe_umod64(u64 x, u64 y)
+{
+  if (y == 0) return 0; else return x % y;
+}
+
+static inline s64 safe_sdiv64(s64 x, s64 y)
+{
+  if (y == 0 || (y == -1 && x == (-1LL << 63))) return 0; else return x / y;
+}
+
+static inline s64 safe_smod64(s64 x, s64 y)
+{
+  if (y == 0 || (y == -1 && x == (-1LL << 63))) return 0; else return x % y;
+}
+
+static void test1(u64 x, u64 y)
+{
+  u64 y2;
+  s64 y3;
+  int i;
+  double f;
+
+  printf("x = %llx\n", x);
+  printf("y = %llx\n", y);
+  printf("-x = %llx\n", -x);
+  printf("x + y = %llx\n", x + y);
+  printf("x - y = %llx\n", x - y);
+  printf("x * y = %llx\n", x * y);
+  printf("x /u y = %llx\n", safe_udiv64(x, y));
+  printf("x %%u y = %llx\n", safe_umod64(x, y));
+  printf("x /s y = %llx\n", safe_sdiv64(x, y));
+  printf("x %%s y = %llx\n", safe_smod64(x, y));
+  y2 = y >> 32;
+  printf("x /u y2 = %llx\n", safe_udiv64(x, y2));
+  printf("x %%u y2 = %llx\n", safe_umod64(x, y2));
+  y3 = ((s64)y) >> 32;
+  printf("x /s y3 = %llx\n", safe_sdiv64(x, y3));
+  printf("x %%s y3 = %llx\n", safe_smod64(x, y3));
+  printf("~x = %llx\n", ~x);
+  printf("x & y = %llx\n", x & y);
+  printf("x | y = %llx\n", x | y);
+  printf("x ^ y = %llx\n", x ^ y);
+  i = y & 63;
+  printf("x << i = %llx\n", x << i);
+  printf("x >>u i = %llx\n", x >> i);
+  printf("x >>s i = %llx\n", (s64) x >> i);
+  printf("x cmpu y = %s\n",
+         x == y ? "eq" : x < y ? "lt" : "gt");
+  printf("x cmps y = %s\n",
+         x == y ? "eq" : (s64)x < (s64)y ? "lt" : "gt");
+  f = (double) x;
+  printf("utod x = %llx\n", *((u64*) &f));
+  f = f * 0.0001;
+  printf("dtou f = %llx\n", (u64) f);
+  f = (double) ((s64) x);
+  printf("stod x = %llx\n", *((u64*) &f));
+  f = f * 0.0001;
+  printf("dtos f = %llx\n", (s64) f);
+  printf("\n");
+}
+
+u64 special_values[] = {
+  0,
+  1,
+  -1,
+  0x7FFFFFFFLLU,
+  0x80000000LLU,
+  0x7FFFFFFFFFFFFFFFLLU,
+  0x8000000000000000LLU
+};
+
+int main()
+{
+  int i, j;
+  u64 x, y;
+
+  for (i = 0; i <= 4; i++) {
+    for (j = 0; j <= 4; j++) {
+      test1(special_values[i], special_values[j]);
+    }
+    test1(special_values[i], rnd64());
+    test1(rnd64(), special_values[i]);
+  }
+  for (i = 0; i < 100; i++) {
+    x = rnd64(); y = rnd64();
+    test1(x, y);
+  }
+  return 0;
+}
+
+
+
+
+
+
+
diff --git a/test/regression/volatile2.c b/test/regression/volatile2.c
index 306eb8c..3bad6ae 100644
--- a/test/regression/volatile2.c
+++ b/test/regression/volatile2.c
@@ -15,6 +15,7 @@ unsigned short gus;
 int gi;
 float gf;
 double gd;
+long long gll;
 
 int main()
 {
@@ -25,6 +26,7 @@ int main()
   int i;
   float f;
   double d;
+  long long ll;
 
   TEST("signed char", signed char, sc, 12, 34);
   TEST("unsigned char", unsigned char, uc, 56, 78);
@@ -33,6 +35,7 @@ int main()
   TEST("int", int, i, 0x123456, 0x7890AB);
   TEST("float", float, f, 0.5, 256.0);
   TEST("double", double, d, 3.1415, 2.718);
+  TEST("long long", long long, ll, 0x123456789ABCDEFLL, 0x789ABCDEF1234567LL);
   TEST("global signed char", signed char, gsc, 12, 34);
   TEST("global unsigned char", unsigned char, guc, 56, 78);
   TEST("global signed short", signed short, gss, 1234, 5678);
@@ -40,6 +43,7 @@ int main()
   TEST("global int", int, gi, 0x123456, 0x7890AB);
   TEST("global float", float, gf, 0.5, 256.0);
   TEST("global double", double, gd, 3.1415, 2.718);
+  TEST("global long long", long long, gll, 0x123456789ABCDEFLL, 0x789ABCDEF1234567LL);
   return 0;
 }
author	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2013-04-20 07:54:52 +0000
committer	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2013-04-20 07:54:52 +0000
commit	255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree	7951b1b13e8fd5e525b9223e8be0580e83550f55
parent	6e5041958df01c56762e90770abd704b95a36e5d (diff)