diff options
author | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2013-04-20 07:54:52 +0000 |
---|---|---|
committer | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2013-04-20 07:54:52 +0000 |
commit | 255cee09b71255051c2b40eae0c88bffce1f6f32 (patch) | |
tree | 7951b1b13e8fd5e525b9223e8be0580e83550f55 | |
parent | 6e5041958df01c56762e90770abd704b95a36e5d (diff) |
Big merge of the newregalloc-int64 branch. Lots of changes in two directions:
1- new register allocator (+ live range splitting, spilling&reloading, etc)
based on a posteriori validation using the Rideau-Leroy algorithm
2- support for 64-bit integer arithmetic (type "long long").
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
166 files changed, 20598 insertions, 10626 deletions
@@ -12,6 +12,7 @@ lib/Parmov.vo lib/Parmov.glob lib/Parmov.v.beautified: lib/Parmov.v lib/Axioms.v lib/UnionFind.vo lib/UnionFind.glob lib/UnionFind.v.beautified: lib/UnionFind.v lib/Coqlib.vo lib/Wfsimpl.vo lib/Wfsimpl.glob lib/Wfsimpl.v.beautified: lib/Wfsimpl.v lib/Axioms.vo lib/Postorder.vo lib/Postorder.glob lib/Postorder.v.beautified: lib/Postorder.v lib/Coqlib.vo lib/Maps.vo lib/Iteration.vo +lib/FSetAVLplus.vo lib/FSetAVLplus.glob lib/FSetAVLplus.v.beautified: lib/FSetAVLplus.v lib/Coqlib.vo common/Errors.vo common/Errors.glob common/Errors.v.beautified: common/Errors.v lib/Coqlib.vo common/AST.vo common/AST.glob common/AST.v.beautified: common/AST.v lib/Coqlib.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Events.vo common/Events.glob common/Events.v.beautified: common/Events.v lib/Coqlib.vo lib/Intv.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Errors.vo @@ -28,9 +29,11 @@ backend/Cminor.vo backend/Cminor.glob backend/Cminor.v.beautified: backend/Cmino $(ARCH)/Op.vo $(ARCH)/Op.glob $(ARCH)/Op.v.beautified: $(ARCH)/Op.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo backend/CminorSel.vo backend/CminorSel.glob backend/CminorSel.v.beautified: backend/CminorSel.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Events.vo common/Values.vo common/Memory.vo backend/Cminor.vo $(ARCH)/Op.vo common/Globalenvs.vo common/Switch.vo common/Smallstep.vo $(ARCH)/SelectOp.vo $(ARCH)/SelectOp.glob $(ARCH)/SelectOp.v.beautified: $(ARCH)/SelectOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo -backend/Selection.vo backend/Selection.glob backend/Selection.v.beautified: backend/Selection.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo -$(ARCH)/SelectOpproof.vo $(ARCH)/SelectOpproof.glob $(ARCH)/SelectOpproof.v.beautified: $(ARCH)/SelectOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo -backend/Selectionproof.vo backend/Selectionproof.glob backend/Selectionproof.v.beautified: backend/Selectionproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/Selection.vo $(ARCH)/SelectOpproof.vo +backend/SelectLong.vo backend/SelectLong.glob backend/SelectLong.v.beautified: backend/SelectLong.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo common/Errors.vo +backend/Selection.vo backend/Selection.glob backend/Selection.v.beautified: backend/Selection.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/SelectLong.vo +$(ARCH)/SelectOpproof.vo $(ARCH)/SelectOpproof.glob $(ARCH)/SelectOpproof.v.beautified: $(ARCH)/SelectOpproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo +backend/SelectLongproof.vo backend/SelectLongproof.glob backend/SelectLongproof.v.beautified: backend/SelectLongproof.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo $(ARCH)/SelectOpproof.vo backend/SelectLong.vo +backend/Selectionproof.vo backend/Selectionproof.glob backend/Selectionproof.v.beautified: backend/Selectionproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/SelectLong.vo backend/Selection.vo $(ARCH)/SelectOpproof.vo backend/SelectLongproof.vo backend/Registers.vo backend/Registers.glob backend/Registers.v.beautified: backend/Registers.v lib/Coqlib.vo common/AST.vo lib/Maps.vo lib/Ordered.vo backend/RTL.vo backend/RTL.glob backend/RTL.v.beautified: backend/RTL.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTLgen.vo backend/RTLgen.glob backend/RTLgen.v.beautified: backend/RTLgen.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Switch.vo $(ARCH)/Op.vo backend/Registers.vo backend/CminorSel.vo backend/RTL.vo @@ -43,59 +46,42 @@ backend/Inliningspec.vo backend/Inliningspec.glob backend/Inliningspec.v.beautif backend/Inliningproof.vo backend/Inliningproof.glob backend/Inliningproof.v.beautified: backend/Inliningproof.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/Inlining.vo backend/Inliningspec.vo backend/RTL.vo backend/Renumber.vo backend/Renumber.glob backend/Renumber.v.beautified: backend/Renumber.v lib/Coqlib.vo lib/Maps.vo lib/Postorder.vo backend/RTL.vo backend/Renumberproof.vo backend/Renumberproof.glob backend/Renumberproof.v.beautified: backend/Renumberproof.v lib/Coqlib.vo lib/Maps.vo lib/Postorder.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Renumber.vo -backend/RTLtyping.vo backend/RTLtyping.glob backend/RTLtyping.v.beautified: backend/RTLtyping.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo common/Globalenvs.vo common/Values.vo lib/Integers.vo common/Events.vo backend/RTL.vo backend/Conventions.vo +backend/RTLtyping.vo backend/RTLtyping.glob backend/RTLtyping.v.beautified: backend/RTLtyping.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo common/Globalenvs.vo common/Values.vo lib/Integers.vo common/Memory.vo common/Events.vo backend/RTL.vo backend/Conventions.vo backend/Kildall.vo backend/Kildall.glob backend/Kildall.v.beautified: backend/Kildall.v lib/Coqlib.vo lib/Iteration.vo lib/Maps.vo lib/Lattice.vo lib/Heaps.vo backend/Liveness.vo backend/Liveness.glob backend/Liveness.v.beautified: backend/Liveness.v lib/Coqlib.vo lib/Maps.vo lib/Lattice.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOp.glob $(ARCH)/ConstpropOp.v.beautified: $(ARCH)/ConstpropOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo backend/Constprop.vo backend/Constprop.glob backend/Constprop.v.beautified: backend/Constprop.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/Liveness.vo $(ARCH)/ConstpropOp.vo -$(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob $(ARCH)/ConstpropOpproof.v.beautified: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo +$(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob $(ARCH)/ConstpropOpproof.v.beautified: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo backend/Constpropproof.vo backend/Constpropproof.glob backend/Constpropproof.v.beautified: backend/Constpropproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/Liveness.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo $(ARCH)/ConstpropOpproof.vo $(ARCH)/CombineOp.vo $(ARCH)/CombineOp.glob $(ARCH)/CombineOp.v.beautified: $(ARCH)/CombineOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo $(ARCH)/SelectOp.vo backend/CSE.vo backend/CSE.glob backend/CSE.v.beautified: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo $(ARCH)/CombineOp.vo -$(ARCH)/CombineOpproof.vo $(ARCH)/CombineOpproof.glob $(ARCH)/CombineOpproof.v.beautified: $(ARCH)/CombineOpproof.v lib/Coqlib.vo lib/Integers.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/RTL.vo $(ARCH)/CombineOp.vo backend/CSE.vo +$(ARCH)/CombineOpproof.vo $(ARCH)/CombineOpproof.glob $(ARCH)/CombineOpproof.v.beautified: $(ARCH)/CombineOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/CombineOp.vo backend/CSE.vo backend/CSEproof.vo backend/CSEproof.glob backend/CSEproof.v.beautified: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo $(ARCH)/CombineOp.vo $(ARCH)/CombineOpproof.vo backend/CSE.vo -$(ARCH)/Machregs.vo $(ARCH)/Machregs.glob $(ARCH)/Machregs.v.beautified: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo -backend/Locations.vo backend/Locations.glob backend/Locations.v.beautified: backend/Locations.v lib/Coqlib.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo +$(ARCH)/Machregs.vo $(ARCH)/Machregs.glob $(ARCH)/Machregs.v.beautified: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo +backend/Locations.vo backend/Locations.glob backend/Locations.v.beautified: backend/Locations.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo $(ARCH)/$(VARIANT)/Conventions1.vo $(ARCH)/$(VARIANT)/Conventions1.glob $(ARCH)/$(VARIANT)/Conventions1.v.beautified: $(ARCH)/$(VARIANT)/Conventions1.v lib/Coqlib.vo common/AST.vo backend/Locations.vo backend/Conventions.vo backend/Conventions.glob backend/Conventions.v.beautified: backend/Conventions.v lib/Coqlib.vo common/AST.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Conventions1.vo backend/LTL.vo backend/LTL.glob backend/LTL.v.beautified: backend/LTL.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo -backend/LTLtyping.vo backend/LTLtyping.glob backend/LTLtyping.v.beautified: backend/LTLtyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo -backend/InterfGraph.vo backend/InterfGraph.glob backend/InterfGraph.v.beautified: backend/InterfGraph.v lib/Coqlib.vo lib/Ordered.vo backend/Registers.vo backend/Locations.vo -backend/Coloring.vo backend/Coloring.glob backend/Coloring.v.beautified: backend/Coloring.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Locations.vo backend/Conventions.vo backend/InterfGraph.vo -backend/Coloringproof.vo backend/Coloringproof.glob backend/Coloringproof.v.beautified: backend/Coloringproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Locations.vo backend/Conventions.vo backend/InterfGraph.vo backend/Coloring.vo -backend/Allocation.vo backend/Allocation.glob backend/Allocation.v.beautified: backend/Allocation.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Liveness.vo backend/Locations.vo backend/LTL.vo backend/Coloring.vo -backend/Allocproof.vo backend/Allocproof.glob backend/Allocproof.v.beautified: backend/Allocproof.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Smallstep.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Liveness.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo backend/Coloring.vo backend/Coloringproof.vo backend/Allocation.vo -backend/Alloctyping.vo backend/Alloctyping.glob backend/Alloctyping.v.beautified: backend/Alloctyping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Locations.vo backend/LTL.vo backend/Coloring.vo backend/Coloringproof.vo backend/Allocation.vo backend/Allocproof.vo backend/RTLtyping.vo backend/LTLtyping.vo backend/Conventions.vo +backend/Allocation.vo backend/Allocation.glob backend/Allocation.v.beautified: backend/Allocation.v lib/FSetAVLplus.vo lib/Coqlib.vo lib/Ordered.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Memdata.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo backend/Locations.vo backend/Conventions.vo backend/RTLtyping.vo backend/LTL.vo +backend/Allocproof.vo backend/Allocproof.glob backend/Allocproof.v.beautified: backend/Allocproof.v lib/Coqlib.vo lib/Ordered.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo backend/Locations.vo backend/Conventions.vo backend/LTL.vo backend/Allocation.vo backend/Tunneling.vo backend/Tunneling.glob backend/Tunneling.v.beautified: backend/Tunneling.v lib/Coqlib.vo lib/Maps.vo lib/UnionFind.vo common/AST.vo backend/LTL.vo backend/Tunnelingproof.vo backend/Tunnelingproof.glob backend/Tunnelingproof.v.beautified: backend/Tunnelingproof.v lib/Coqlib.vo lib/Maps.vo lib/UnionFind.vo common/AST.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Tunneling.vo -backend/Tunnelingtyping.vo backend/Tunnelingtyping.glob backend/Tunnelingtyping.v.beautified: backend/Tunnelingtyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo backend/LTL.vo backend/LTLtyping.vo backend/Tunneling.vo backend/Tunnelingproof.vo -backend/LTLin.vo backend/LTLin.glob backend/LTLin.v.beautified: backend/LTLin.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo -backend/LTLintyping.vo backend/LTLintyping.glob backend/LTLintyping.v.beautified: backend/LTLintyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/LTLtyping.vo backend/Conventions.vo -backend/Linearize.vo backend/Linearize.glob backend/Linearize.v.beautified: backend/Linearize.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Errors.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLin.vo backend/Kildall.vo lib/Lattice.vo -backend/Linearizeproof.vo backend/Linearizeproof.glob backend/Linearizeproof.v.beautified: backend/Linearizeproof.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Errors.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLtyping.vo backend/LTLin.vo backend/Linearize.vo lib/Lattice.vo -backend/Linearizetyping.vo backend/Linearizetyping.glob backend/Linearizetyping.v.beautified: backend/Linearizetyping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLtyping.vo backend/LTLin.vo backend/Linearize.vo backend/LTLintyping.vo backend/Conventions.vo -backend/CleanupLabels.vo backend/CleanupLabels.glob backend/CleanupLabels.v.beautified: backend/CleanupLabels.v lib/Coqlib.vo lib/Ordered.vo backend/LTLin.vo -backend/CleanupLabelsproof.vo backend/CleanupLabelsproof.glob backend/CleanupLabelsproof.v.beautified: backend/CleanupLabelsproof.v lib/Coqlib.vo lib/Ordered.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/CleanupLabels.vo -backend/CleanupLabelstyping.vo backend/CleanupLabelstyping.glob backend/CleanupLabelstyping.v.beautified: backend/CleanupLabelstyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/CleanupLabels.vo backend/LTLintyping.vo backend/Linear.vo backend/Linear.glob backend/Linear.v.beautified: backend/Linear.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo -backend/Lineartyping.vo backend/Lineartyping.glob backend/Lineartyping.v.beautified: backend/Lineartyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Conventions.vo -backend/Parallelmove.vo backend/Parallelmove.glob backend/Parallelmove.v.beautified: backend/Parallelmove.v lib/Coqlib.vo lib/Parmov.vo common/Values.vo common/AST.vo backend/Locations.vo backend/Conventions.vo -backend/Reload.vo backend/Reload.glob backend/Reload.v.beautified: backend/Reload.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/Conventions.vo backend/Parallelmove.vo backend/Linear.vo -backend/Reloadproof.vo backend/Reloadproof.glob backend/Reloadproof.v.beautified: backend/Reloadproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/RTLtyping.vo backend/LTLin.vo backend/LTLintyping.vo backend/Linear.vo backend/Parallelmove.vo backend/Reload.vo -backend/Reloadtyping.vo backend/Reloadtyping.glob backend/Reloadtyping.v.beautified: backend/Reloadtyping.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTLin.vo backend/LTLintyping.vo backend/Linear.vo backend/Lineartyping.vo backend/Conventions.vo backend/Parallelmove.vo backend/Reload.vo backend/Reloadproof.vo -backend/RRE.vo backend/RRE.glob backend/RRE.v.beautified: backend/RRE.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Linear.vo -backend/RREproof.vo backend/RREproof.glob backend/RREproof.v.beautified: backend/RREproof.v lib/Axioms.vo lib/Coqlib.vo common/AST.vo common/Values.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Linear.vo backend/RRE.vo -backend/RREtyping.vo backend/RREtyping.glob backend/RREtyping.v.beautified: backend/RREtyping.v lib/Coqlib.vo common/AST.vo backend/Locations.vo backend/Linear.vo backend/Lineartyping.vo backend/Conventions.vo backend/RRE.vo backend/RREproof.vo +backend/Lineartyping.vo backend/Lineartyping.glob backend/Lineartyping.v.beautified: backend/Lineartyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/LTL.vo backend/Linear.vo +backend/Linearize.vo backend/Linearize.glob backend/Linearize.v.beautified: backend/Linearize.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Errors.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Kildall.vo lib/Lattice.vo +backend/Linearizeproof.vo backend/Linearizeproof.glob backend/Linearizeproof.v.beautified: backend/Linearizeproof.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Errors.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Linearize.vo +backend/CleanupLabels.vo backend/CleanupLabels.glob backend/CleanupLabels.v.beautified: backend/CleanupLabels.v lib/Coqlib.vo lib/Ordered.vo backend/Linear.vo +backend/CleanupLabelsproof.vo backend/CleanupLabelsproof.glob backend/CleanupLabelsproof.v.beautified: backend/CleanupLabelsproof.v lib/Coqlib.vo lib/Ordered.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/CleanupLabels.vo backend/Mach.vo backend/Mach.glob backend/Mach.v.beautified: backend/Mach.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo -backend/Bounds.vo backend/Bounds.glob backend/Bounds.v.beautified: backend/Bounds.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Lineartyping.vo backend/Conventions.vo +backend/Bounds.vo backend/Bounds.glob backend/Bounds.v.beautified: backend/Bounds.v lib/Coqlib.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo $(ARCH)/$(VARIANT)/Stacklayout.glob $(ARCH)/$(VARIANT)/Stacklayout.v.beautified: $(ARCH)/$(VARIANT)/Stacklayout.v lib/Coqlib.vo backend/Bounds.vo -backend/Stacking.vo backend/Stacking.glob backend/Stacking.v.beautified: backend/Stacking.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Bounds.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo +backend/Stacking.vo backend/Stacking.glob backend/Stacking.v.beautified: backend/Stacking.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Bounds.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Lineartyping.vo backend/Stackingproof.vo backend/Stackingproof.glob backend/Stackingproof.v.beautified: backend/Stackingproof.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo common/Values.vo $(ARCH)/Op.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Lineartyping.vo backend/Mach.vo backend/Bounds.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Stacking.vo $(ARCH)/Asm.vo $(ARCH)/Asm.glob $(ARCH)/Asm.v.beautified: $(ARCH)/Asm.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Conventions.vo $(ARCH)/Asmgen.vo $(ARCH)/Asmgen.glob $(ARCH)/Asmgen.v.beautified: $(ARCH)/Asmgen.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo backend/Asmgenproof0.vo backend/Asmgenproof0.glob backend/Asmgenproof0.v.beautified: backend/Asmgenproof0.v lib/Coqlib.vo lib/Intv.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo -$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob $(ARCH)/Asmgenproof1.v.beautified: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo backend/Conventions.vo -$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob $(ARCH)/Asmgenproof.v.beautified: $(ARCH)/Asmgenproof.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo $(ARCH)/Asmgenproof1.vo +$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob $(ARCH)/Asmgenproof1.v.beautified: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo backend/Asmgenproof0.vo +$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob $(ARCH)/Asmgenproof.v.beautified: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo $(ARCH)/Asmgenproof1.vo cfrontend/Ctypes.vo cfrontend/Ctypes.glob cfrontend/Ctypes.v.beautified: cfrontend/Ctypes.v lib/Coqlib.vo common/AST.vo common/Errors.vo cfrontend/Cop.vo cfrontend/Cop.glob cfrontend/Cop.v.beautified: cfrontend/Cop.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo cfrontend/Ctypes.vo cfrontend/Csyntax.vo cfrontend/Csyntax.glob cfrontend/Csyntax.v.beautified: cfrontend/Csyntax.v lib/Coqlib.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo cfrontend/Ctypes.vo cfrontend/Cop.vo @@ -116,7 +102,7 @@ cfrontend/Cshmgenproof.vo cfrontend/Cshmgenproof.glob cfrontend/Cshmgenproof.v.b cfrontend/Csharpminor.vo cfrontend/Csharpminor.glob cfrontend/Csharpminor.v.beautified: cfrontend/Csharpminor.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo backend/Cminor.vo common/Smallstep.vo cfrontend/Cminorgen.vo cfrontend/Cminorgen.glob cfrontend/Cminorgen.v.beautified: cfrontend/Cminorgen.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Ordered.vo common/AST.vo lib/Integers.vo lib/Floats.vo cfrontend/Csharpminor.vo backend/Cminor.vo cfrontend/Cminorgenproof.vo cfrontend/Cminorgenproof.glob cfrontend/Cminorgenproof.v.beautified: cfrontend/Cminorgenproof.v lib/Coqlib.vo lib/Intv.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo common/Switch.vo cfrontend/Csharpminor.vo backend/Cminor.vo cfrontend/Cminorgen.vo -driver/Compiler.vo driver/Compiler.glob driver/Compiler.v.beautified: driver/Compiler.v lib/Coqlib.vo common/Errors.vo common/AST.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Cstrategy.vo cfrontend/Cexec.vo cfrontend/Clight.vo cfrontend/Csharpminor.vo backend/Cminor.vo backend/CminorSel.vo backend/RTL.vo backend/LTL.vo backend/LTLin.vo backend/Linear.vo backend/Mach.vo $(ARCH)/Asm.vo cfrontend/Initializers.vo cfrontend/SimplExpr.vo cfrontend/SimplLocals.vo cfrontend/Cshmgen.vo cfrontend/Cminorgen.vo backend/Selection.vo backend/RTLgen.vo backend/Tailcall.vo backend/Inlining.vo backend/Renumber.vo backend/Constprop.vo backend/CSE.vo backend/Allocation.vo backend/Tunneling.vo backend/Linearize.vo backend/CleanupLabels.vo backend/Reload.vo backend/RRE.vo backend/Stacking.vo $(ARCH)/Asmgen.vo backend/RTLtyping.vo backend/LTLtyping.vo backend/LTLintyping.vo backend/Lineartyping.vo cfrontend/SimplExprproof.vo cfrontend/SimplLocalsproof.vo cfrontend/Cshmgenproof.vo cfrontend/Cminorgenproof.vo backend/Selectionproof.vo backend/RTLgenproof.vo backend/Tailcallproof.vo backend/Inliningproof.vo backend/Renumberproof.vo backend/Constpropproof.vo backend/CSEproof.vo backend/Allocproof.vo backend/Alloctyping.vo backend/Tunnelingproof.vo backend/Tunnelingtyping.vo backend/Linearizeproof.vo backend/Linearizetyping.vo backend/CleanupLabelsproof.vo backend/CleanupLabelstyping.vo backend/Reloadproof.vo backend/Reloadtyping.vo backend/RREproof.vo backend/RREtyping.vo backend/Stackingproof.vo $(ARCH)/Asmgenproof.vo +driver/Compiler.vo driver/Compiler.glob driver/Compiler.v.beautified: driver/Compiler.v lib/Coqlib.vo common/Errors.vo common/AST.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Cstrategy.vo cfrontend/Cexec.vo cfrontend/Clight.vo cfrontend/Csharpminor.vo backend/Cminor.vo backend/CminorSel.vo backend/RTL.vo backend/LTL.vo backend/Linear.vo backend/Mach.vo $(ARCH)/Asm.vo cfrontend/Initializers.vo cfrontend/SimplExpr.vo cfrontend/SimplLocals.vo cfrontend/Cshmgen.vo cfrontend/Cminorgen.vo backend/Selection.vo backend/RTLgen.vo backend/Tailcall.vo backend/Inlining.vo backend/Renumber.vo backend/Constprop.vo backend/CSE.vo backend/Allocation.vo backend/Tunneling.vo backend/Linearize.vo backend/CleanupLabels.vo backend/Stacking.vo $(ARCH)/Asmgen.vo cfrontend/SimplExprproof.vo cfrontend/SimplLocalsproof.vo cfrontend/Cshmgenproof.vo cfrontend/Cminorgenproof.vo backend/Selectionproof.vo backend/RTLgenproof.vo backend/Tailcallproof.vo backend/Inliningproof.vo backend/Renumberproof.vo backend/Constpropproof.vo backend/CSEproof.vo backend/Allocproof.vo backend/Tunnelingproof.vo backend/Linearizeproof.vo backend/CleanupLabelsproof.vo backend/Stackingproof.vo $(ARCH)/Asmgenproof.vo driver/Complements.vo driver/Complements.glob driver/Complements.v.beautified: driver/Complements.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo common/Behaviors.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Cstrategy.vo cfrontend/Clight.vo backend/Cminor.vo backend/RTL.vo $(ARCH)/Asm.vo driver/Compiler.vo common/Errors.vo flocq/Core/Fcore_float_prop.vo flocq/Core/Fcore_float_prop.glob flocq/Core/Fcore_float_prop.v.beautified: flocq/Core/Fcore_float_prop.v flocq/Core/Fcore_Raux.vo flocq/Core/Fcore_defs.vo flocq/Core/Fcore_Zaux.vo flocq/Core/Fcore_Zaux.glob flocq/Core/Fcore_Zaux.v.beautified: flocq/Core/Fcore_Zaux.v @@ -55,7 +55,7 @@ FLOCQ=$(FLOCQ_CORE) $(FLOCQ_PROP) $(FLOCQ_CALC) $(FLOCQ_APPLI) LIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \ Iteration.v Integers.v Floats.v Parmov.v UnionFind.v Wfsimpl.v \ - Postorder.v + Postorder.v FSetAVLplus.v # Parts common to the front-ends and the back-end (in common/) @@ -66,7 +66,8 @@ COMMON=Errors.v AST.v Events.v Globalenvs.v Memdata.v Memtype.v Memory.v \ BACKEND=\ Cminor.v Op.v CminorSel.v \ - SelectOp.v Selection.v SelectOpproof.v Selectionproof.v \ + SelectOp.v SelectLong.v Selection.v \ + SelectOpproof.v SelectLongproof.v Selectionproof.v \ Registers.v RTL.v \ RTLgen.v RTLgenspec.v RTLgenproof.v \ Tailcall.v Tailcallproof.v \ @@ -76,16 +77,12 @@ BACKEND=\ Kildall.v Liveness.v \ ConstpropOp.v Constprop.v ConstpropOpproof.v Constpropproof.v \ CombineOp.v CSE.v CombineOpproof.v CSEproof.v \ - Machregs.v Locations.v Conventions1.v Conventions.v LTL.v LTLtyping.v \ - InterfGraph.v Coloring.v Coloringproof.v \ - Allocation.v Allocproof.v Alloctyping.v \ - Tunneling.v Tunnelingproof.v Tunnelingtyping.v \ - LTLin.v LTLintyping.v \ - Linearize.v Linearizeproof.v Linearizetyping.v \ - CleanupLabels.v CleanupLabelsproof.v CleanupLabelstyping.v \ + Machregs.v Locations.v Conventions1.v Conventions.v LTL.v \ + Allocation.v Allocproof.v \ + Tunneling.v Tunnelingproof.v \ Linear.v Lineartyping.v \ - Parallelmove.v Reload.v Reloadproof.v Reloadtyping.v \ - RRE.v RREproof.v RREtyping.v \ + Linearize.v Linearizeproof.v \ + CleanupLabels.v CleanupLabelsproof.v \ Mach.v \ Bounds.v Stacklayout.v Stacking.v Stackingproof.v \ Asm.v Asmgen.v Asmgenproof0.v Asmgenproof1.v Asmgenproof.v @@ -141,6 +141,7 @@ Inductive instruction : Type := | Pldrh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *) | Pldrsb: ireg -> ireg -> shift_addr -> instruction (**r signed int8 load *) | Pldrsh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *) + | Pmla: ireg -> ireg -> ireg -> ireg -> instruction (**r integer multiply-add *) | Pmov: ireg -> shift_op -> instruction (**r integer move *) | Pmovc: crbit -> ireg -> shift_op -> instruction (**r integer conditional move *) | Pmul: ireg -> ireg -> ireg -> instruction (**r integer multiplication *) @@ -180,7 +181,7 @@ Inductive instruction : Type := | Plabel: label -> instruction (**r define a code label *) | Ploadsymbol: ireg -> ident -> int -> instruction (**r load the address of a symbol *) | Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *) - | Pbuiltin: external_function -> list preg -> preg -> instruction (**r built-in function *) + | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *) | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *) with annot_param : Type := @@ -261,6 +262,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset := | r :: l' => undef_regs l' (rs#r <- Vundef) end. +(** Assigning multiple registers *) + +Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset := + match rl, vl with + | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1) + | _, _ => rs + end. + Section RELSEM. (** Looking up instructions in a code sequence by position. *) @@ -461,6 +470,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out exec_load Mint8signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m | Pldrsh r1 r2 sa => exec_load Mint16signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + | Pmla r1 r2 r3 r4 => + Next (nextinstr (rs#r1 <- (Val.add (Val.mul rs#r2 rs#r3) rs#r4))) m | Pmov r1 so => Next (nextinstr (rs#r1 <- (eval_shift_op so rs))) m | Pmovc bit r1 so => @@ -522,9 +533,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pfuitod r1 r2 => Next (nextinstr (rs#r1 <- (Val.maketotal (Val.floatofintu rs#r2)))) m | Pftosizd r1 r2 => - Next (nextinstr (rs#r1 <- (Val.maketotal (Val.intoffloat rs#r2)))) m + Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intoffloat rs#r2)))) m | Pftouizd r1 r2 => - Next (nextinstr (rs#r1 <- (Val.maketotal (Val.intuoffloat rs#r2)))) m + Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intuoffloat rs#r2)))) m | Pfcvtsd r1 r2 => Next (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m | Pfldd r1 r2 n => @@ -535,7 +546,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out exec_store Mfloat64al32 (Val.add rs#r2 (Vint n)) r1 rs m | Pfsts r1 r2 n => match exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m with - | Next rs' m' => Next (rs'#FR7 <- Vundef) m' + | Next rs' m' => Next (rs'#FR6 <- Vundef) m' | Stuck => Stuck end (* Pseudo-instructions *) @@ -544,7 +555,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out let sp := (Vptr stk Int.zero) in match Mem.storev Mint32 m1 (Val.add sp (Vint pos)) rs#IR13 with | None => Stuck - | Some m2 => Next (nextinstr (rs #IR10 <- (rs#IR13) #IR13 <- sp)) m2 + | Some m2 => Next (nextinstr (rs #IR12 <- (rs#IR13) #IR13 <- sp)) m2 end | Pfreeframe sz pos => match Mem.loadv Mint32 m (Val.add rs#IR13 (Vint pos)) with @@ -585,37 +596,36 @@ Definition preg_of (r: mreg) : preg := match r with | R0 => IR0 | R1 => IR1 | R2 => IR2 | R3 => IR3 | R4 => IR4 | R5 => IR5 | R6 => IR6 | R7 => IR7 - | R8 => IR8 | R9 => IR9 | R11 => IR11 - | IT1 => IR10 | IT2 => IR12 + | R8 => IR8 | R9 => IR9 | R10 => IR10 | R11 => IR11 + | R12 => IR12 | F0 => FR0 | F1 => FR1 | F2 => FR2 | F3 => FR3 - | F4 => FR4 | F5 => FR5 + | F4 => FR4 | F5 => FR5 | F6 => FR6 | F7 => FR7 | F8 => FR8 | F9 => FR9 | F10 => FR10 | F11 => FR11 | F12 => FR12 | F13 => FR13 | F14 => FR14 | F15 => FR15 - | FT1 => FR6 | FT2 => FR7 end. (** Extract the values of the arguments of an external call. We exploit the calling conventions from module [Conventions], except that we use ARM registers instead of locations. *) +Definition chunk_of_type (ty: typ) := + match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end. + Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := | extcall_arg_reg: forall r, extcall_arg rs m (R r) (rs (preg_of r)) - | extcall_arg_int_stack: forall ofs bofs v, - bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mint32 m (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tint)) v - | extcall_arg_float_stack: forall ofs bofs v, + | extcall_arg_stack: forall ofs ty bofs v, bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mfloat64al32 m (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tfloat)) v. + Mem.loadv (chunk_of_type ty) m + (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v -> + extcall_arg rs m (S Outgoing ofs ty) v. Definition extcall_arguments (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := list_forall2 (extcall_arg rs m) (loc_arguments sg) args. -Definition loc_external_result (sg: signature) : preg := - preg_of (loc_result sg). +Definition loc_external_result (sg: signature) : list preg := + map preg_of (loc_result sg). (** Extract the values of the arguments of an annotation. *) @@ -645,28 +655,30 @@ Inductive step: state -> trace -> state -> Prop := exec_instr f i rs m = Next rs' m' -> step (State rs m) E0 (State rs' m') | exec_step_builtin: - forall b ofs f ef args res rs m t v m', + forall b ofs f ef args res rs m t vl rs' m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal f) -> find_instr (Int.unsigned ofs) (fn_code f) = Some (Pbuiltin ef args res) -> - external_call ef ge (map rs args) m t v m' -> - step (State rs m) t (State (nextinstr(rs # res <- v)) m') + external_call' ef ge (map rs args) m t vl m' -> + rs' = nextinstr + (set_regs res vl + (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) -> + step (State rs m) t (State rs' m') | exec_step_annot: forall b ofs f ef args rs m vargs t v m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal f) -> find_instr (Int.unsigned ofs) (fn_code f) = Some (Pannot ef args) -> annot_arguments rs m args vargs -> - external_call ef ge vargs m t v m' -> + external_call' ef ge vargs m t v m' -> step (State rs m) t (State (nextinstr rs) m') | exec_step_external: forall b ef args res rs m t rs' m', rs PC = Vptr b Int.zero -> Genv.find_funct_ptr ge b = Some (External ef) -> - external_call ef ge args m t res m' -> + external_call' ef ge args m t res m' -> extcall_arguments rs m (ef_sig ef) args -> - rs' = (rs#(loc_external_result (ef_sig ef)) <- res - #PC <- (rs IR14)) -> + rs' = (set_regs (loc_external_result (ef_sig ef) ) res rs)#PC <- (rs IR14) -> step (State rs m) t (State rs' m'). End RELSEM. @@ -734,21 +746,21 @@ Ltac Equalities := discriminate. discriminate. inv H11. - exploit external_call_determ. eexact H4. eexact H11. intros [A B]. + exploit external_call_determ'. eexact H4. eexact H9. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. inv H12. assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0. - exploit external_call_determ. eexact H5. eexact H13. intros [A B]. + exploit external_call_determ'. eexact H5. eexact H13. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0. - exploit external_call_determ. eexact H3. eexact H8. intros [A B]. + exploit external_call_determ'. eexact H3. eexact H8. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. (* trace length *) red; intros; inv H; simpl. omega. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. + inv H3; eapply external_call_trace_length; eauto. + inv H4; eapply external_call_trace_length; eauto. + inv H2; eapply external_call_trace_length; eauto. (* initial states *) inv H; inv H0. f_equal. congruence. (* final no step *) @@ -768,15 +780,3 @@ Definition data_preg (r: preg) : bool := | PC => false end. -Definition nontemp_preg (r: preg) : bool := - match r with - | IR IR14 => false - | IR IR10 => false - | IR IR12 => false - | IR _ => true - | FR FR6 => false - | FR FR7 => false - | FR _ => true - | CR _ => false - | PC => false - end. diff --git a/arm/Asmgen.v b/arm/Asmgen.v index d158c77..1ff28d9 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -290,9 +290,15 @@ Definition transl_op OK (rsubimm r r1 n k) | Omul, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (if ireg_eq r r1 || ireg_eq r r2 - then Pmul IR14 r1 r2 :: Pmov r (SOreg IR14) :: k - else Pmul r r1 r2 :: k) + OK (if negb (ireg_eq r r1) then Pmul r r1 r2 :: k + else if negb (ireg_eq r r2) then Pmul r r2 r1 :: k + else Pmul IR14 r1 r2 :: Pmov r (SOreg IR14) :: k) + | Omla, a1 :: a2 :: a3 :: nil => + do r <- ireg_of res; do r1 <- ireg_of a1; + do r2 <- ireg_of a2; do r3 <- ireg_of a3; + OK (if negb (ireg_eq r r1) then Pmla r r1 r2 r3 :: k + else if negb (ireg_eq r r2) then Pmla r r2 r1 r3 :: k + else Pmla IR14 r1 r2 r3 :: Pmov r (SOreg IR14) :: k) | Odiv, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Psdiv r r1 r2 :: k) @@ -420,6 +426,7 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := match ty with | Tint => do r <- ireg_of dst; OK (loadind_int base ofs r k) | Tfloat => do r <- freg_of dst; OK (loadind_float base ofs r k) + | Tlong => Error (msg "Asmgen.loadind") end. Definition storeind_int (src: ireg) (base: ireg) (ofs: int) (k: code) := @@ -432,6 +439,7 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := match ty with | Tint => do r <- ireg_of src; OK (storeind_int r base ofs k) | Tfloat => do r <- freg_of src; OK (storeind_float r base ofs k) + | Tlong => Error (msg "Asmgen.storeind") end. (** Translation of memory accesses *) @@ -512,6 +520,8 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing) transl_memory_access_float Pflds mk_immed_mem_float dst addr args k | Mfloat64 | Mfloat64al32 => transl_memory_access_float Pfldd mk_immed_mem_float dst addr args k + | Mint64 => + Error (msg "Asmgen.transl_load") end. Definition transl_store (chunk: memory_chunk) (addr: addressing) @@ -531,6 +541,8 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing) transl_memory_access_float Pfsts mk_immed_mem_float src addr args k | Mfloat64 | Mfloat64al32 => transl_memory_access_float Pfstd mk_immed_mem_float src addr args k + | Mint64 => + Error (msg "Asmgen.transl_store") end. (** Translation of arguments to annotations *) @@ -544,17 +556,17 @@ Definition transl_annot_param (p: Mach.annot_param) : Asm.annot_param := (** Translation of a Mach instruction. *) Definition transl_instr (f: Mach.function) (i: Mach.instruction) - (r10_is_parent: bool) (k: code) := + (r12_is_parent: bool) (k: code) := match i with | Mgetstack ofs ty dst => loadind IR13 ofs ty dst k | Msetstack src ofs ty => storeind src IR13 ofs ty k | Mgetparam ofs ty dst => - do c <- loadind IR10 ofs ty dst k; - OK (if r10_is_parent + do c <- loadind IR12 ofs ty dst k; + OK (if r12_is_parent then c - else loadind_int IR13 f.(fn_link_ofs) IR10 c) + else loadind_int IR13 f.(fn_link_ofs) IR12 c) | Mop op args res => transl_op op args res k | Mload chunk addr args dst => @@ -573,7 +585,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) OK (loadind_int IR13 f.(fn_retaddr_ofs) IR14 (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbsymb symb sig :: k)) | Mbuiltin ef args res => - OK (Pbuiltin ef (map preg_of args) (preg_of res) :: k) + OK (Pbuiltin ef (map preg_of args) (map preg_of res) :: k) | Mannot ef args => OK (Pannot ef (map transl_annot_param args) :: k) | Mlabel lbl => @@ -596,8 +608,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool := match i with | Msetstack src ofs ty => before - | Mgetparam ofs ty dst => negb (mreg_eq dst IT1) - | Mop Omove args res => before && negb (mreg_eq res IT1) + | Mgetparam ofs ty dst => negb (mreg_eq dst R12) + | Mop Omove args res => before && negb (mreg_eq res R12) | _ => false end. diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v index 986d474..aede0da 100644 --- a/arm/Asmgenproof.v +++ b/arm/Asmgenproof.v @@ -264,7 +264,8 @@ Proof. Opaque Int.eq. unfold transl_op; intros; destruct op; TailNoLabel. destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel. - destruct (ireg_eq x x0 || ireg_eq x x1); TailNoLabel. + destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel. + destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel. Qed. @@ -420,7 +421,7 @@ Inductive match_states: Mach.state -> Asm.state -> Prop := (MEXT: Mem.extends m m') (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) (AG: agree ms sp rs) - (DXP: ep = true -> rs#IR10 = parent_sp s), + (DXP: ep = true -> rs#IR12 = parent_sp s), match_states (Mach.State s fb sp c ms m) (Asm.State rs m') | match_states_call: @@ -451,7 +452,7 @@ Lemma exec_straight_steps: exists rs2, exec_straight tge tf c rs1 m1' k rs2 m2' /\ agree ms2 sp rs2 - /\ (it1_is_parent ep i = true -> rs2#IR10 = parent_sp s)) -> + /\ (it1_is_parent ep i = true -> rs2#IR12 = parent_sp s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c ms2 m2) st'. @@ -515,9 +516,9 @@ Definition measure (s: Mach.state) : nat := | Mach.Returnstate _ _ _ => 1%nat end. -Remark preg_of_not_R10: forall r, negb (mreg_eq r IT1) = true -> IR IR10 <> preg_of r. +Remark preg_of_not_R12: forall r, negb (mreg_eq r R12) = true -> IR IR12 <> preg_of r. Proof. - intros. change (IR IR10) with (preg_of IT1). red; intros. + intros. change (IR IR12) with (preg_of R12). red; intros. exploit preg_of_injective; eauto. intros; subst r. unfold proj_sumbool in H; rewrite dec_eq_true in H; discriminate. Qed. @@ -555,7 +556,7 @@ Proof. rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]]. exists rs'; split. eauto. - split. change (undef_setstack rs) with rs. apply agree_exten with rs0; auto with asmgen. + split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. apply agree_exten with rs0; auto with asmgen. simpl; intros. rewrite Q; auto with asmgen. - (* Mgetparam *) @@ -569,24 +570,24 @@ Proof. Opaque loadind. left; eapply exec_straight_steps; eauto; intros. destruct ep; monadInv TR. -(* R10 contains parent *) +(* R12 contains parent *) exploit loadind_correct. eexact EQ. instantiate (2 := rs0). rewrite DXP; eauto. intros [rs1 [P [Q R]]]. exists rs1; split. eauto. split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. simpl; intros. rewrite R; auto with asmgen. - apply preg_of_not_R10; auto. + apply preg_of_not_R12; auto. (* GPR11 does not contain parent *) - exploit loadind_int_correct. eexact A. instantiate (1 := IR10). intros [rs1 [P [Q R]]]. + exploit loadind_int_correct. eexact A. instantiate (1 := IR12). intros [rs1 [P [Q R]]]. exploit loadind_correct. eexact EQ. instantiate (2 := rs1). rewrite Q. eauto. intros [rs2 [S [T U]]]. exists rs2; split. eapply exec_straight_trans; eauto. split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. - instantiate (1 := rs1#IR10 <- (rs2#IR10)). intros. + instantiate (1 := rs1#IR12 <- (rs2#IR12)). intros. rewrite Pregmap.gso; auto with asmgen. - congruence. intros. unfold Pregmap.set. destruct (PregEq.eq r' IR10). congruence. auto with asmgen. + congruence. intros. unfold Pregmap.set. destruct (PregEq.eq r' IR12). congruence. auto with asmgen. simpl; intros. rewrite U; auto with asmgen. - apply preg_of_not_R10; auto. + apply preg_of_not_R12; auto. - (* Mop *) assert (eval_operation tge sp op rs##args m = Some v). @@ -597,12 +598,9 @@ Opaque loadind. exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. assert (S: Val.lessdef v (rs2 (preg_of res))) by (eapply Val.lessdef_trans; eauto). exists rs2; split. eauto. split. - assert (agree (Regmap.set res v (undef_temps rs)) sp rs2). - eapply agree_set_undef_mreg; eauto with asmgen. - unfold undef_op; destruct op; auto. - change (undef_move rs) with rs. eapply agree_set_mreg; eauto. + eapply agree_set_undef_mreg; eauto with asmgen. simpl. destruct op; try congruence. destruct ep; simpl; try congruence. intros. - rewrite R; auto. apply preg_of_not_R10; auto. + rewrite R; auto. apply preg_of_not_R12; auto. exact I. - (* Mload *) assert (eval_addressing tge sp addr rs##args = Some a). @@ -627,7 +625,7 @@ Opaque loadind. intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. exists rs2; split. eauto. - split. eapply agree_exten_temps; eauto. + split. eapply agree_undef_regs; eauto. simpl; congruence. - (* Mcall *) @@ -695,7 +693,7 @@ Opaque loadind. exploit loadind_int_correct. eexact C. intros [rs1 [P [Q R]]]. econstructor; split. eapply exec_straight_trans. eexact P. apply exec_straight_one. - simpl. rewrite R; auto with asmgen. rewrite A. + simpl. rewrite R; auto with asmgen. unfold Mach.chunk_of_type in A. rewrite A. rewrite <- (sp_val _ _ _ AG). rewrite E. eauto. auto. split. Simpl. split. Simpl. @@ -741,19 +739,21 @@ Opaque loadind. inv AT. monadInv H3. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H2); intro NOOV. - exploit external_call_mem_extends; eauto. eapply preg_vals; eauto. + exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_builtin. eauto. eauto. eapply find_instr_tail; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. + eauto. econstructor; eauto. - Simpl. rewrite <- H0. simpl. econstructor; eauto. + Simpl. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto. eapply code_tail_next_int; eauto. - apply agree_nextinstr. eapply agree_set_undef_mreg; eauto. - rewrite Pregmap.gss. auto. - intros. Simpl. + apply preg_notin_charact. auto with asmgen. + apply preg_notin_charact. auto with asmgen. + apply agree_nextinstr. eapply agree_set_mregs; auto. + eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. congruence. - (* Mannot *) @@ -761,12 +761,12 @@ Opaque loadind. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H3); intro NOOV. exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_annot. eauto. eauto. eapply find_instr_tail; eauto. eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. eapply match_states_intro with (ep := false); eauto with coqlib. unfold nextinstr. rewrite Pregmap.gss. @@ -796,7 +796,7 @@ Opaque loadind. destruct (transl_cond_correct tge tf cond args _ rs0 m' _ TR) as [rs' [A [B C]]]. rewrite EC in B. econstructor; econstructor; econstructor; split. eexact A. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_undef_regs; eauto with asmgen. simpl. rewrite B. reflexivity. - (* Mcond false *) @@ -807,7 +807,7 @@ Opaque loadind. econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B. reflexivity. auto. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_undef_regs; eauto with asmgen. intros; Simpl. simpl. congruence. @@ -827,7 +827,7 @@ Opaque loadind. eapply find_instr_tail; eauto. simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A. econstructor; eauto. - eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simpl. + eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen. Simpl. congruence. - (* Mreturn *) @@ -887,7 +887,7 @@ Opaque loadind. exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. intros [m3' [P Q]]. (* Execution of function prologue *) - set (rs2 := nextinstr (rs0#IR10 <- (parent_sp s) #IR13 <- (Vptr stk Int.zero))). + set (rs2 := nextinstr (rs0#IR12 <- (parent_sp s) #IR13 <- (Vptr stk Int.zero))). set (rs3 := nextinstr rs2). assert (EXEC_PROLOGUE: exec_straight tge x @@ -896,7 +896,7 @@ Opaque loadind. rewrite <- H5 at 2; unfold fn_code. apply exec_straight_two with rs2 m2'. unfold exec_instr. rewrite C. fold sp. - rewrite <- (sp_val _ _ _ AG). unfold chunk_of_type in F. rewrite F. auto. + rewrite <- (sp_val _ _ _ AG). unfold Mach.chunk_of_type in F. rewrite F. auto. simpl. auto. simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14). rewrite Int.add_zero_l. simpl. unfold chunk_of_type in P. simpl in P. @@ -911,7 +911,7 @@ Opaque loadind. unfold rs3, rs2. apply agree_nextinstr. apply agree_nextinstr. eapply agree_change_sp. - apply agree_exten_temps with rs0; eauto. + apply agree_undef_regs with rs0; eauto. intros. Simpl. congruence. - (* external function *) @@ -919,16 +919,15 @@ Opaque loadind. intros [tf [A B]]. simpl in B. inv B. exploit extcall_arguments_match; eauto. intros [args' [C D]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [res' [m2' [P [Q [R S]]]]]. left; econstructor; split. apply plus_one. eapply exec_step_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. - eapply agree_set_mreg; eauto. - rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. - intros; Simpl. + apply agree_set_other; auto with asmgen. + eapply agree_set_mregs; eauto. - (* return *) inv STACKS. simpl in *. @@ -962,8 +961,8 @@ Lemma transf_final_states: Proof. intros. inv H0. inv H. inv STACKS. constructor. auto. - compute in H1. - generalize (preg_val _ _ _ R0 AG). rewrite H1. intros LD; inv LD. auto. + compute in H1. inv H1. + generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto. Qed. Theorem transf_program_correct: diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v index 06d6d17..e27ee80 100644 --- a/arm/Asmgenproof1.v +++ b/arm/Asmgenproof1.v @@ -541,7 +541,8 @@ Ltac ArgsInv := | [ H: Error _ = OK _ |- _ ] => discriminate | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args | [ H: bind _ _ = OK _ |- _ ] => monadInv H - | [ H: assertion _ = OK _ |- _ ] => monadInv H + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H end); subst; repeat (match goal with @@ -685,7 +686,7 @@ Lemma transl_op_correct_same: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of res) = v - /\ forall r, data_preg r = true -> r <> preg_of res -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs'#r = rs#r. Proof. intros until v; intros TR EV NOCMP. unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; inv EV; try (TranslOpSimpl; fail). @@ -713,11 +714,21 @@ Proof. intros [rs' [A [B C]]]. exists rs'; auto with asmgen. (* Omul *) - destruct (ireg_eq x x0 || ireg_eq x x1). + destruct (negb (ireg_eq x x0)). + TranslOpSimpl. + destruct (negb (ireg_eq x x1)). + rewrite Val.mul_commut. TranslOpSimpl. econstructor; split. eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. intuition Simpl. + (* Omla *) + destruct (negb (ireg_eq x x0)). TranslOpSimpl. + destruct (negb (ireg_eq x x1)). + rewrite Val.mul_commut. TranslOpSimpl. + econstructor; split. + eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. + intuition Simpl. (* divs *) econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. intuition Simpl. @@ -767,10 +778,11 @@ Proof. intros. unfold rs4, rs3; Simpl. destruct islt; Simpl; rewrite OTH2; auto with asmgen. (* intoffloat *) econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto. - intuition Simpl. +Transparent destroyed_by_op. + simpl. intuition Simpl. (* intuoffloat *) econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto. - intuition Simpl. + simpl. intuition Simpl. (* floatofint *) econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto. intuition Simpl. @@ -788,7 +800,7 @@ Lemma transl_op_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ Val.lessdef v rs'#(preg_of res) - /\ forall r, data_preg r = true -> r <> preg_of res -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs'#r = rs#r. Proof. intros. assert (EITHER: match op with Ocmp _ => False | _ => True end \/ exists cmp, op = Ocmp cmp). @@ -878,7 +890,7 @@ Lemma transl_load_int_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, nontemp_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. Proof. intros. monadInv H. erewrite ireg_of_eq by eauto. eapply transl_memory_access_correct; eauto. @@ -902,7 +914,7 @@ Lemma transl_load_float_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, nontemp_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. Proof. intros. monadInv H. erewrite freg_of_eq by eauto. eapply transl_memory_access_correct; eauto. @@ -913,7 +925,7 @@ Proof. Qed. Lemma transl_store_int_correct: - forall mk_instr is_immed src addr args k c (rs: regset) a chunk m m', + forall mr mk_instr is_immed src addr args k c (rs: regset) a chunk m m', transl_memory_access_int mk_instr is_immed src addr args k = OK c -> eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a -> Mem.storev chunk m a rs#(preg_of src) = Some m' -> @@ -922,7 +934,7 @@ Lemma transl_store_int_correct: exec_store chunk (Val.add rs1#r2 (eval_shift_addr sa rs1)) r1 rs1 m) -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, nontemp_preg r = true -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r. Proof. intros. monadInv H. erewrite ireg_of_eq in * by eauto. eapply transl_memory_access_correct; eauto. @@ -937,7 +949,7 @@ Proof. Qed. Lemma transl_store_float_correct: - forall mk_instr is_immed src addr args k c (rs: regset) a chunk m m', + forall mr mk_instr is_immed src addr args k c (rs: regset) a chunk m m', transl_memory_access_float mk_instr is_immed src addr args k = OK c -> eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a -> Mem.storev chunk m a rs#(preg_of src) = Some m' -> @@ -946,7 +958,7 @@ Lemma transl_store_float_correct: exec_store chunk (Val.add rs1#r2 (Vint n)) r1 rs1 m) -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, nontemp_preg r = true -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r. Proof. intros. monadInv H. erewrite freg_of_eq in * by eauto. eapply transl_memory_access_correct; eauto. @@ -964,7 +976,7 @@ Lemma transl_load_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, nontemp_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. Proof. intros. destruct chunk; simpl in H. eapply transl_load_int_correct; eauto. @@ -972,6 +984,7 @@ Proof. eapply transl_load_int_correct; eauto. eapply transl_load_int_correct; eauto. eapply transl_load_int_correct; eauto. + discriminate. eapply transl_load_float_correct; eauto. apply Mem.loadv_float64al32 in H1. eapply transl_load_float_correct; eauto. eapply transl_load_float_correct; eauto. @@ -984,7 +997,7 @@ Lemma transl_store_correct: Mem.storev chunk m a rs#(preg_of src) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, nontemp_preg r = true -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_store chunk addr) -> rs'#r = rs#r. Proof. intros. destruct chunk; simpl in H. - assert (Mem.storev Mint8unsigned m a (rs (preg_of src)) = Some m'). @@ -996,11 +1009,12 @@ Proof. clear H1. eapply transl_store_int_correct; eauto. - eapply transl_store_int_correct; eauto. - eapply transl_store_int_correct; eauto. +- discriminate. - unfold transl_memory_access_float in H. monadInv H. rewrite (freg_of_eq _ _ EQ) in *. eapply transl_memory_access_correct; eauto. intros. econstructor; split. apply exec_straight_one. simpl. unfold exec_store. rewrite H. rewrite H2; eauto with asmgen. - rewrite H1. eauto. auto. intros. Simpl. + rewrite H1. eauto. auto. intros. Simpl. simpl; auto. - apply Mem.storev_float64al32 in H1. eapply transl_store_float_correct; eauto. - eapply transl_store_float_correct; eauto. diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp index 7e3217e..9bf066b 100644 --- a/arm/ConstpropOp.vp +++ b/arm/ConstpropOp.vp @@ -31,6 +31,7 @@ Inductive approx : Type := no compile-time information is available. *) | I: int -> approx (** A known integer value. *) | F: float -> approx (** A known floating-point value. *) + | L: int64 -> approx (** A know 64-bit integer value. *) | G: ident -> int -> approx (** The value is the address of the given global symbol plus the given integer offset. *) @@ -145,6 +146,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) := | Ointuoffloat, F n1 :: nil => eval_static_intuoffloat n1 | Ofloatofint, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofint n1) else Unknown | Ofloatofintu, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofintu n1) else Unknown + | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2) + | Olowlong, L n :: nil => I(Int64.loword n) + | Ohighlong, L n :: nil => I(Int64.hiword n) | Ocmp c, vl => eval_static_condition_val c vl | _, _ => Unknown end. diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v index c7de86d..687e08f 100644 --- a/arm/ConstpropOpproof.v +++ b/arm/ConstpropOpproof.v @@ -45,9 +45,10 @@ Definition val_match_approx (a: approx) (v: val) : Prop := | Unknown => True | I p => v = Vint p | F p => v = Vfloat p + | L p => v = Vlong p | G symb ofs => v = symbol_address ge symb ofs | S ofs => v = Val.add sp (Vint ofs) - | _ => False + | Novalue => False end. Inductive val_list_match_approx: list approx -> list val -> Prop := @@ -65,6 +66,8 @@ Ltac SimplVMA := simpl in H; (try subst v); SimplVMA | H: (val_match_approx (F _) ?v) |- _ => simpl in H; (try subst v); SimplVMA + | H: (val_match_approx (L _) ?v) |- _ => + simpl in H; (try subst v); SimplVMA | H: (val_match_approx (G _ _) ?v) |- _ => simpl in H; (try subst v); SimplVMA | H: (val_match_approx (S _) ?v) |- _ => diff --git a/arm/Machregs.v b/arm/Machregs.v index 317515c..4906eb0 100644 --- a/arm/Machregs.v +++ b/arm/Machregs.v @@ -13,6 +13,7 @@ Require Import Coqlib. Require Import Maps. Require Import AST. +Require Import Op. (** ** Machine registers *) @@ -21,43 +22,36 @@ Require Import AST. - Integer registers that can be allocated to RTL pseudo-registers ([Rxx]). - Floating-point registers that can be allocated to RTL pseudo-registers ([Fxx]). -- Two integer registers, not allocatable, reserved as temporaries for - spilling and reloading ([ITx]). -- Two float registers, not allocatable, reserved as temporaries for - spilling and reloading ([FTx]). - The type [mreg] does not include special-purpose machine registers - such as the stack pointer and the condition codes. *) + The type [mreg] does not include reserved machine registers + such as the stack pointer, the link register, and the condition codes. *) Inductive mreg: Type := (** Allocatable integer regs *) | R0: mreg | R1: mreg | R2: mreg | R3: mreg | R4: mreg | R5: mreg | R6: mreg | R7: mreg - | R8: mreg | R9: mreg | R11: mreg + | R8: mreg | R9: mreg | R10: mreg | R11: mreg + | R12: mreg (** Allocatable double-precision float regs *) | F0: mreg | F1: mreg | F2: mreg | F3: mreg - | F4: mreg | F5: mreg + | F4: mreg | F5: mreg | F6: mreg | F7: mreg | F8: mreg | F9: mreg | F10: mreg | F11: mreg - | F12: mreg | F13: mreg | F14: mreg | F15: mreg - (** Integer temporaries *) - | IT1: mreg (* R10 *) | IT2: mreg (* R12 *) - (** Float temporaries *) - | FT1: mreg (* F6 *) | FT2: mreg (* F7 *). + | F12: mreg | F13: mreg | F14: mreg | F15: mreg. Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}. Proof. decide equality. Defined. +Global Opaque mreg_eq. Definition mreg_type (r: mreg): typ := match r with | R0 => Tint | R1 => Tint | R2 => Tint | R3 => Tint | R4 => Tint | R5 => Tint | R6 => Tint | R7 => Tint - | R8 => Tint | R9 => Tint | R11 => Tint + | R8 => Tint | R9 => Tint | R10 => Tint | R11 => Tint + | R12 => Tint | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat - | F4 => Tfloat| F5 => Tfloat + | F4 => Tfloat| F5 => Tfloat | F6 => Tfloat | F7 => Tfloat | F8 => Tfloat | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat | F12 => Tfloat | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat - | IT1 => Tint | IT2 => Tint - | FT1 => Tfloat | FT2 => Tfloat end. Open Scope positive_scope. @@ -69,13 +63,12 @@ Module IndexedMreg <: INDEXED_TYPE. match r with | R0 => 1 | R1 => 2 | R2 => 3 | R3 => 4 | R4 => 5 | R5 => 6 | R6 => 7 | R7 => 8 - | R8 => 9 | R9 => 10 | R11 => 11 - | F0 => 12 | F1 => 13 | F2 => 14 | F3 => 15 - | F4 => 16 | F5 => 17 - | F8 => 18 | F9 => 19 | F10 => 20 | F11 => 21 - | F12 => 22 | F13 => 23 | F14 => 24 | F15 => 25 - | IT1 => 26 | IT2 => 27 - | FT1 => 28 | FT2 => 29 + | R8 => 9 | R9 => 10 | R10 => 11 | R11 => 12 + | R12 => 13 + | F0 => 14 | F1 => 15 | F2 => 16 | F3 => 17 + | F4 => 18 | F5 => 19 | F6 => 20 | F7 => 21 + | F8 => 22 | F9 => 23 | F10 => 24 | F11 => 25 + | F12 => 26 | F13 => 27 | F14 => 28 | F15 => 29 end. Lemma index_inj: forall r1 r2, index r1 = index r2 -> r1 = r2. @@ -84,3 +77,67 @@ Module IndexedMreg <: INDEXED_TYPE. Qed. End IndexedMreg. +(** ** Destroyed registers, preferred registers *) + +Definition destroyed_by_op (op: operation): list mreg := + match op with + | Odiv | Odivu => R0 :: R1 :: R2 :: R3 :: R12 :: nil + | Ointoffloat | Ointuoffloat => F6 :: nil + | _ => nil + end. + +Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := + nil. + +Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := + match chunk with + | Mfloat32 => F6 :: nil + | _ => nil + end. + +Definition destroyed_by_cond (cond: condition): list mreg := + nil. + +Definition destroyed_by_jumptable: list mreg := + nil. + +Definition destroyed_by_builtin (ef: external_function): list mreg := + match ef with + | EF_memcpy sz al => if zle sz 32 then nil else R2 :: R3 :: R12 :: nil + | _ => R12 :: F6 :: nil + end. + +Definition destroyed_at_function_entry: list mreg := + R12 :: nil. + +Definition temp_for_parent_frame: mreg := + R12. + +Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := + match op with + | Odiv | Odivu => (Some R0 :: Some R1 :: nil, Some R0) + | _ => (nil, None) + end. + +Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(option mreg) := + match ef with + | EF_memcpy sz al => + if zle sz 32 then (nil, nil) else (Some R3 :: Some R2 :: nil, nil) + | _ => (nil, nil) + end. + +Global Opaque + destroyed_by_op destroyed_by_load destroyed_by_store + destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin + destroyed_at_function_entry temp_for_parent_frame + mregs_for_operation mregs_for_builtin. + +(** Two-address operations. Return [true] if the first argument and + the result must be in the same location *and* are unconstrained + by [mregs_for_operation]. There are none for ARM. *) + +Definition two_address_op (op: operation) : bool := + false. + +Global Opaque two_address_op. + diff --git a/arm/Machregsaux.ml b/arm/Machregsaux.ml index 642437e..5486c4b 100644 --- a/arm/Machregsaux.ml +++ b/arm/Machregsaux.ml @@ -17,13 +17,12 @@ open Machregs let register_names = [ ("R0", R0); ("R1", R1); ("R2", R2); ("R3", R3); ("R4", R4); ("R5", R5); ("R6", R6); ("R7", R7); - ("R8", R8); ("R9", R9); ("R11", R11); + ("R8", R8); ("R9", R9); ("R10", R10); ("R11", R11); + ("R12", R12); ("F0", F0); ("F1", F1); ("F2", F2); ("F3", F3); - ("F4", F4); ("F5", F5); + ("F4", F4); ("F5", F5); ("F6", F6); ("F7", F7); ("F8", F8); ("F9", F9); ("F10", F10); ("F11", F11); - ("F12", F12);("F13", F13);("F14", F14); ("F15", F15); - ("R10", IT1); ("R12", IT2); - ("F6", FT1); ("F7", FT2) + ("F12", F12);("F13", F13);("F14", F14); ("F15", F15) ] let name_of_register r = @@ -81,6 +81,7 @@ Inductive operation : Type := | Orsubshift: shift -> operation (**r [rd = shifted r2 - r1] *) | Orsubimm: int -> operation (**r [rd = n - r1] *) | Omul: operation (**r [rd = r1 * r2] *) + | Omla: operation (**r [rd = r1 * r2 + r3] *) | Odiv: operation (**r [rd = r1 / r2] (signed) *) | Odivu: operation (**r [rd = r1 / r2] (unsigned) *) | Oand: operation (**r [rd = r1 & r2] *) @@ -114,6 +115,10 @@ Inductive operation : Type := | Ointuoffloat: operation (**r [rd = unsigned_int_of_float(r1)] *) | Ofloatofint: operation (**r [rd = float_of_signed_int(r1)] *) | Ofloatofintu: operation (**r [rd = float_of_unsigned_int(r1)] *) +(*c Manipulating 64-bit integers: *) + | Omakelong: operation (**r [rd = r1 << 32 | r2] *) + | Olowlong: operation (**r [rd = low-word(r1)] *) + | Ohighlong: operation (**r [rd = high-word(r1)] *) (*c Boolean tests: *) | Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) @@ -213,6 +218,7 @@ Definition eval_operation | Orsubshift s, v1 :: v2 :: nil => Some (Val.sub (eval_shift s v2) v1) | Orsubimm n, v1 :: nil => Some (Val.sub (Vint n) v1) | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) + | Omla, v1 :: v2 :: v3 :: nil => Some (Val.add (Val.mul v1 v2) v3) | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) @@ -244,6 +250,9 @@ Definition eval_operation | Ointuoffloat, v1::nil => Val.intuoffloat v1 | Ofloatofint, v1::nil => Val.floatofint v1 | Ofloatofintu, v1::nil => Val.floatofintu v1 + | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2) + | Olowlong, v1::nil => Some(Val.loword v1) + | Ohighlong, v1::nil => Some(Val.hiword v1) | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m)) | _, _ => None end. @@ -302,6 +311,7 @@ Definition type_of_operation (op: operation) : list typ * typ := | Orsubshift _ => (Tint :: Tint :: nil, Tint) | Orsubimm _ => (Tint :: nil, Tint) | Omul => (Tint :: Tint :: nil, Tint) + | Omla => (Tint :: Tint :: Tint :: nil, Tint) | Odiv => (Tint :: Tint :: nil, Tint) | Odivu => (Tint :: Tint :: nil, Tint) | Oand => (Tint :: Tint :: nil, Tint) @@ -333,6 +343,9 @@ Definition type_of_operation (op: operation) : list typ * typ := | Ointuoffloat => (Tfloat :: nil, Tint) | Ofloatofint => (Tint :: nil, Tfloat) | Ofloatofintu => (Tint :: nil, Tfloat) + | Omakelong => (Tint :: Tint :: nil, Tlong) + | Olowlong => (Tlong :: nil, Tint) + | Ohighlong => (Tlong :: nil, Tint) | Ocmp c => (type_of_condition c, Tint) end. @@ -374,6 +387,7 @@ Proof with (try exact I). generalize (S s v1). destruct v0; destruct (eval_shift s v1); simpl; intuition. destruct (zeq b0 b)... destruct v0... destruct v0; destruct v1... + destruct v0... destruct v1... destruct v2... destruct v0; destruct v1; simpl in H0; inv H0. destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2... destruct v0; destruct v1; simpl in H0; inv H0. destruct (Int.eq i0 Int.zero); inv H2... @@ -406,6 +420,9 @@ Proof with (try exact I). destruct v0; simpl in H0; inv H0. destruct (Float.intuoffloat f); simpl in H2; inv H2... destruct v0; simpl in H0; inv H0... destruct v0; simpl in H0; inv H0... + destruct v0; destruct v1... + destruct v0... + destruct v0... destruct (eval_condition c vl m)... destruct b... Qed. @@ -544,6 +561,29 @@ Proof. rewrite Val.add_assoc. simpl. auto. Qed. +(** Offset an addressing mode [addr] by a quantity [delta], so that + it designates the pointer [delta] bytes past the pointer designated + by [addr]. May be undefined, in which case [None] is returned. *) + +Definition offset_addressing (addr: addressing) (delta: int) : option addressing := + match addr with + | Aindexed n => Some(Aindexed (Int.add n delta)) + | Aindexed2 => None + | Aindexed2shift s => None + | Ainstack n => Some(Ainstack (Int.add n delta)) + end. + +Lemma eval_offset_addressing: + forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v, + offset_addressing addr delta = Some addr' -> + eval_addressing ge sp addr args = Some v -> + eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)). +Proof. + intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst. + rewrite Val.add_assoc; auto. + rewrite Val.add_assoc. auto. +Qed. + (** Transformation of addressing modes with two operands or more into an equivalent arithmetic operation. This is used in the [Reload] pass when a store instruction cannot be reloaded directly because @@ -787,6 +827,7 @@ Proof. apply (@Values.val_sub_inject f (Vint i) (Vint i) v v'); auto. inv H4; inv H2; simpl; auto. + apply Values.val_add_inject; auto. inv H4; inv H2; simpl; auto. inv H4; inv H3; simpl in H1; inv H1. simpl. destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists. inv H4; inv H3; simpl in H1; inv H1. simpl. @@ -828,6 +869,10 @@ Proof. inv H4; simpl in *; inv H1. TrivialExists. inv H4; simpl in *; inv H1. TrivialExists. + inv H4; inv H2; simpl; auto. + inv H4; simpl; auto. + inv H4; simpl; auto. + subst v1. destruct (eval_condition c vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. destruct b; simpl; constructor. diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml index 5778286..1bac715 100644 --- a/arm/PrintAsm.ml +++ b/arm/PrintAsm.ml @@ -181,47 +181,11 @@ let emit_constants oc = symbol_labels; reset_constants () -(* Simulate instructions by calling helper functions *) - -let print_list_ireg oc l = - match l with - | [] -> assert false - | r1 :: rs -> ireg oc r1; List.iter (fun r -> fprintf oc ", %a" ireg r) rs - -let rec remove l r = - match l with - | [] -> [] - | hd :: tl -> if hd = r then remove tl r else hd :: remove tl r - -let call_helper oc fn dst arg1 arg2 = - (* Preserve caller-save registers r0...r3 except dst *) - let tosave = remove [IR0; IR1; IR2; IR3] dst in - fprintf oc " stmfd sp!, {%a}\n" print_list_ireg tosave; - (* Copy arg1 to R0 and arg2 to R1 *) - let moves = - Parmov.parmove2 (=) (fun _ -> IR14) [arg1; arg2] [IR0; IR1] in - List.iter - (fun (s, d) -> - fprintf oc " mov %a, %a\n" ireg d ireg s) - moves; - (* Call the helper function *) - fprintf oc " bl %s\n" fn; - (* Move result to dst *) - begin match dst with - | IR0 -> () - | _ -> fprintf oc " mov %a, r0\n" ireg dst - end; - (* Restore the other caller-save registers *) - fprintf oc " ldmfd sp!, {%a}\n" print_list_ireg tosave; - (* ... for a total of at most 7 instructions *) - 7 - (* Built-ins. They come in two flavors: - annotation statements: take their arguments in registers or stack locations; generate no code; - inlined by the compiler: take their arguments in arbitrary - registers; preserve all registers the temporaries - (IR10, IR12, IR14, FP2, FP4) + registers; preserve all registers except IR2, IR3, IR12 and FP6. *) (* Handling of annotations *) @@ -234,9 +198,9 @@ let print_annot_val oc txt args res = fprintf oc "%s annotation: " comment; PrintAnnot.print_annot_val preg oc txt args; match args, res with - | IR src :: _, IR dst -> + | [IR src], [IR dst] -> if dst = src then 0 else (fprintf oc " mov %a, %a\n" ireg dst ireg src; 1) - | FR src :: _, FR dst -> + | [FR src], [FR dst] -> if dst = src then 0 else (fprintf oc " fcpy %a, %a\n" freg dst freg src; 1) | _, _ -> assert false @@ -265,16 +229,12 @@ let print_builtin_memcpy_small oc sz al src dst = let print_builtin_memcpy_big oc sz al src dst = assert (sz >= al); assert (sz mod al = 0); + assert (src = IR2); + assert (dst = IR3); let (load, store, chunksize) = if al >= 4 then ("ldr", "str", 4) else if al = 2 then ("ldrh", "strh", 2) else ("ldrb", "strb", 1) in - let tmp = - if src <> IR0 && dst <> IR0 then IR0 - else if src <> IR1 && dst <> IR1 then IR1 - else IR2 in - let tosave = List.sort compare [tmp;src;dst] in - fprintf oc " stmfd sp!, {%a}\n" print_list_ireg tosave; begin match Asmgen.decompose_int (coqint_of_camlint (Int32.of_int (sz / chunksize))) with | [] -> assert false @@ -286,12 +246,11 @@ let print_builtin_memcpy_big oc sz al src dst = tl end; let lbl = new_label() in - fprintf oc ".L%d: %s %a, [%a], #%d\n" lbl load ireg tmp ireg src chunksize; + fprintf oc ".L%d: %s %a, [%a], #%d\n" lbl load ireg IR12 ireg src chunksize; fprintf oc " subs %a, %a, #1\n" ireg IR14 ireg IR14; - fprintf oc " %s %a, [%a], #%d\n" store ireg tmp ireg dst chunksize; + fprintf oc " %s %a, [%a], #%d\n" store ireg IR12 ireg dst chunksize; fprintf oc " bne .L%d\n" lbl; - fprintf oc " ldmfd sp!, {%a}\n" print_list_ireg tosave; - 9 + 8 let print_builtin_memcpy oc sz al args = let (dst, src) = @@ -308,20 +267,28 @@ let print_builtin_memcpy oc sz al args = let print_builtin_vload_common oc chunk args res = match chunk, args, res with - | Mint8unsigned, [IR addr], IR res -> + | Mint8unsigned, [IR addr], [IR res] -> fprintf oc " ldrb %a, [%a, #0]\n" ireg res ireg addr; 1 - | Mint8signed, [IR addr], IR res -> + | Mint8signed, [IR addr], [IR res] -> fprintf oc " ldrsb %a, [%a, #0]\n" ireg res ireg addr; 1 - | Mint16unsigned, [IR addr], IR res -> + | Mint16unsigned, [IR addr], [IR res] -> fprintf oc " ldrh %a, [%a, #0]\n" ireg res ireg addr; 1 - | Mint16signed, [IR addr], IR res -> + | Mint16signed, [IR addr], [IR res] -> fprintf oc " ldrsh %a, [%a, #0]\n" ireg res ireg addr; 1 - | Mint32, [IR addr], IR res -> + | Mint32, [IR addr], [IR res] -> fprintf oc " ldr %a, [%a, #0]\n" ireg res ireg addr; 1 - | Mfloat32, [IR addr], FR res -> + | Mint64, [IR addr], [IR res1; IR res2] -> + if addr <> res2 then begin + fprintf oc " ldr %a, [%a, #0]\n" ireg res2 ireg addr; + fprintf oc " ldr %a, [%a, #4]\n" ireg res1 ireg addr + end else begin + fprintf oc " ldr %a, [%a, #4]\n" ireg res1 ireg addr; + fprintf oc " ldr %a, [%a, #0]\n" ireg res2 ireg addr + end; 2 + | Mfloat32, [IR addr], [FR res] -> fprintf oc " flds %a, [%a, #0]\n" freg_single res ireg addr; fprintf oc " fcvtds %a, %a\n" freg res freg_single res; 2 - | (Mfloat64 | Mfloat64al32), [IR addr], FR res -> + | (Mfloat64 | Mfloat64al32), [IR addr], [FR res] -> fprintf oc " fldd %a, [%a, #0]\n" freg res ireg addr; 1 | _ -> assert false @@ -346,6 +313,9 @@ let print_builtin_vstore_common oc chunk args = fprintf oc " strh %a, [%a, #0]\n" ireg src ireg addr; 1 | Mint32, [IR addr; IR src] -> fprintf oc " str %a, [%a, #0]\n" ireg src ireg addr; 1 + | Mint64, [IR addr; IR src1; IR src2] -> + fprintf oc " str %a, [%a, #0]\n" ireg src2 ireg addr; + fprintf oc " str %a, [%a, #4]\n" ireg src1 ireg addr; 2 | Mfloat32, [IR addr; FR src] -> fprintf oc " fcvtsd %a, %a\n" freg_single FR6 freg src; fprintf oc " fsts %a, [%a, #0]\n" freg_single FR6 ireg addr; 2 @@ -386,22 +356,57 @@ let print_builtin_inline oc name args res = fprintf oc "%s begin %s\n" comment name; let n = match name, args, res with (* Integer arithmetic *) - | "__builtin_bswap", [IR a1], IR res -> + | "__builtin_bswap", [IR a1], [IR res] -> print_bswap oc a1 IR14 res; 4 - | "__builtin_cntlz", [IR a1], IR res -> + | "__builtin_cntlz", [IR a1], [IR res] -> fprintf oc " clz %a, %a\n" ireg res ireg a1; 1 (* Float arithmetic *) - | "__builtin_fabs", [FR a1], FR res -> + | "__builtin_fabs", [FR a1], [FR res] -> fprintf oc " fabsd %a, %a\n" freg res freg a1; 1 - | "__builtin_fsqrt", [FR a1], FR res -> + | "__builtin_fsqrt", [FR a1], [FR res] -> fprintf oc " fsqrtd %a, %a\n" freg res freg a1; 1 + (* 64-bit integer arithmetic *) + | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> + if rl = ah then begin + fprintf oc " rsbs %a, %a, #0\n" ireg IR14 ireg al; + fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah; + fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3 + end else begin + fprintf oc " rsbs %a, %a, #0\n" ireg rl ireg al; + fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah; 2 + end + | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + if rl = ah || rl = bh then begin + fprintf oc " adds %a, %a, %a\n" ireg IR14 ireg al ireg bl; + fprintf oc " adc %a, %a, %a\n" ireg rh ireg ah ireg bh; + fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3 + end else begin + fprintf oc " adds %a, %a, %a\n" ireg rl ireg al ireg bl; + fprintf oc " adc %a, %a, %a\n" ireg rh ireg ah ireg bh; 2 + end + | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + if rl = ah || rl = bh then begin + fprintf oc " subs %a, %a, %a\n" ireg IR14 ireg al ireg bl; + fprintf oc " sbc %a, %a, %a\n" ireg rh ireg ah ireg bh; + fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3 + end else begin + fprintf oc " subs %a, %a, %a\n" ireg rl ireg al ireg bl; + fprintf oc " sbc %a, %a, %a\n" ireg rh ireg ah ireg bh; 2 + end + | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> + if rl = a || rh = a then begin + fprintf oc " mov %a, %a\n" ireg IR14 ireg a; + fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg IR14 ireg b; 2 + end else begin + fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg a ireg b; 1 + end (* Memory accesses *) - | "__builtin_read16_reversed", [IR a1], IR res -> + | "__builtin_read16_reversed", [IR a1], [IR res] -> fprintf oc " ldrh %a, [%a, #0]\n" ireg res ireg a1; fprintf oc " mov %a, %a, lsl #8\n" ireg IR14 ireg res; fprintf oc " and %a, %a, #0xFF00\n" ireg IR14 ireg IR14; fprintf oc " orr %a, %a, %a, lsr #8\n" ireg res ireg IR14 ireg res; 4 - | "__builtin_read32_reversed", [IR a1], IR res -> + | "__builtin_read32_reversed", [IR a1], [IR res] -> fprintf oc " ldr %a, [%a, #0]\n" ireg res ireg a1; print_bswap oc res IR14 res; 5 | "__builtin_write16_reversed", [IR a1; IR a2], _ -> @@ -410,9 +415,19 @@ let print_builtin_inline oc name args res = fprintf oc " orr %a, %a, %a, lsl #8\n" ireg IR14 ireg IR14 ireg a2; fprintf oc " strh %a, [%a, #0]\n" ireg IR14 ireg a1; 4 | "__builtin_write32_reversed", [IR a1; IR a2], _ -> - let tmp = if a1 = IR10 then IR12 else IR10 in - print_bswap oc a2 IR14 tmp; - fprintf oc " str %a, [%a, #0]\n" ireg tmp ireg a1; 5 + if a1 <> IR12 then begin + print_bswap oc a2 IR14 IR12; + fprintf oc " str %a, [%a, #0]\n" ireg IR12 ireg a1; 5 + end else begin + fprintf oc " mov %a, %a, lsr #24\n" ireg IR14 ireg a2; + fprintf oc " str %a, [%a, #0]\n" ireg IR14 ireg a1; + fprintf oc " mov %a, %a, lsr #16\n" ireg IR14 ireg a2; + fprintf oc " str %a, [%a, #1]\n" ireg IR14 ireg a1; + fprintf oc " mov %a, %a, lsr #8\n" ireg IR14 ireg a2; + fprintf oc " str %a, [%a, #2]\n" ireg IR14 ireg a1; + fprintf oc " str %a, [%a, #3]\n" ireg IR14 ireg a1; + 7 + end (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) @@ -516,6 +531,8 @@ let print_instruction oc = function fprintf oc " ldrsb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 | Pldrsh(r1, r2, sa) -> fprintf oc " ldrsh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + | Pmla(r1, r2, r3, r4) -> + fprintf oc " mla %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1 | Pmov(r1, so) -> fprintf oc " mov %a, %a\n" ireg r1 shift_op so; 1 | Pmovc(bit, r1, so) -> @@ -535,11 +552,13 @@ let print_instruction oc = function | Pstrh(r1, r2, sa) -> fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 | Psdiv(r1, r2, r3) -> - call_helper oc "__aeabi_idiv" r1 r2 r3 + assert (r1 = IR0 && r2 = IR0 && r3 = IR1); + fprintf oc " bl __aeabi_idiv\n"; 1 | Psub(r1, r2, so) -> fprintf oc " sub %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 | Pudiv(r1, r2, r3) -> - call_helper oc "__aeabi_uidiv" r1 r2 r3 + assert (r1 = IR0 && r2 = IR0 && r3 = IR1); + fprintf oc " bl __aeabi_uidiv\n"; 1 (* Floating-point coprocessor instructions *) | Pfcpyd(r1, r2) -> fprintf oc " fcpyd %a, %a\n" freg r1 freg r2; 1 @@ -593,14 +612,14 @@ let print_instruction oc = function fprintf oc " fsts %a, [%a, #%a]\n" freg_single FR6 ireg r2 coqint n; 2 (* Pseudo-instructions *) | Pallocframe(sz, ofs) -> - fprintf oc " mov r10, sp\n"; + fprintf oc " mov r12, sp\n"; let ninstr = ref 0 in List.iter (fun n -> fprintf oc " sub sp, sp, #%a\n" coqint n; incr ninstr) (Asmgen.decompose_int sz); - fprintf oc " str r10, [sp, #%a]\n" coqint ofs; + fprintf oc " str r12, [sp, #%a]\n" coqint ofs; 2 + !ninstr | Pfreeframe(sz, ofs) -> if Asmgen.is_immed_arith sz @@ -620,7 +639,7 @@ let print_instruction oc = function List.iter (fun l -> fprintf oc " .word %a\n" print_label l) tbl; - 2 + List.length tbl + 3 + List.length tbl | Pbuiltin(ef, args, res) -> begin match ef with | EF_builtin(name, sg) -> @@ -713,6 +732,8 @@ let print_init oc = function fprintf oc " .short %ld\n" (camlint_of_coqint n) | Init_int32 n -> fprintf oc " .word %ld\n" (camlint_of_coqint n) + | Init_int64 n -> + fprintf oc " .quad %Ld\n" (camlint64_of_coqint n) | Init_float32 n -> fprintf oc " .word 0x%lx %s %.15g \n" (camlint_of_coqint (Floats.Float.bits_of_single n)) comment (camlfloat_of_coqfloat n) diff --git a/arm/PrintOp.ml b/arm/PrintOp.ml index b5a8d75..eff3959 100644 --- a/arm/PrintOp.ml +++ b/arm/PrintOp.ml @@ -71,6 +71,7 @@ let print_operation reg pp = function | Orsubshift s, [r1;r2] -> fprintf pp "%a %a - %a" reg r2 shift s reg r1 | Orsubimm n, [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint n) reg r1 | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2 + | Omla, [r1;r2;r3] -> fprintf pp "%a * %a + %a" reg r1 reg r2 reg r3 | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2 | Odivu, [r1;r2] -> fprintf pp "%a /u %a" reg r1 reg r2 | Oand, [r1;r2] -> fprintf pp "%a & %a" reg r1 reg r2 @@ -100,6 +101,9 @@ let print_operation reg pp = function | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1 | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1 | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1 + | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2 + | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1 + | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1 | Ocmp c, args -> print_condition reg pp (c, args) | _ -> fprintf pp "<bad operator>" diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp index 22ef88d..d81328b 100644 --- a/arm/SelectOp.vp +++ b/arm/SelectOp.vp @@ -85,6 +85,8 @@ Nondetfunction add (e1: expr) (e2: expr) := | t1, Eop (Oaddimm n2) (t2:::Enil) => addimm n2 (Eop Oadd (t1:::t2:::Enil)) | Eop (Oshift s) (t1:::Enil), t2 => Eop (Oaddshift s) (t2:::t1:::Enil) | t1, Eop (Oshift s) (t2:::Enil) => Eop (Oaddshift s) (t1:::t2:::Enil) + | Eop Omul (t1:::t2:::Enil), t3 => Eop Omla (t1:::t2:::t3:::Enil) + | t1, Eop Omul (t2:::t3:::Enil) => Eop Omla (t2:::t3:::t1:::Enil) | _, _ => Eop Oadd (e1:::e2:::Enil) end. @@ -185,6 +187,7 @@ Nondetfunction mul (e1: expr) (e2: expr) := Nondetfunction andimm (n1: int) (e2: expr) := if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else + if Int.eq n1 Int.mone then e2 else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil @@ -215,6 +218,7 @@ Definition same_expr_pure (e1 e2: expr) := Nondetfunction orimm (n1: int) (e2: expr) := if Int.eq n1 Int.zero then e2 else + if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil) @@ -407,6 +411,7 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool := | Mint16signed => true | Mint16unsigned => true | Mint32 => true + | Mint64 => false | Mfloat32 => false | Mfloat64 => false | Mfloat64al32 => false @@ -419,6 +424,7 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool := | Mint16signed => false | Mint16unsigned => false | Mint32 => true + | Mint64 => false | Mfloat32 => false | Mfloat64 => false | Mfloat64al32 => false diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v index ecc758f..a71ead7 100644 --- a/arm/SelectOpproof.v +++ b/arm/SelectOpproof.v @@ -27,13 +27,6 @@ Require Import SelectOp. Open Local Scope cminorsel_scope. -Section CMCONSTR. - -Variable ge: genv. -Variable sp: val. -Variable e: env. -Variable m: mem. - (** * Useful lemmas and tactics *) (** The following are trivial lemmas and custom tactics that help @@ -81,6 +74,13 @@ Ltac TrivialExists := (** * Correctness of the smart constructors *) +Section CMCONSTR. + +Variable ge: genv. +Variable sp: val. +Variable e: env. +Variable m: mem. + (** We now show that the code generated by "smart constructor" functions such as [Selection.notint] behaves as expected. Continuing the [notint] example, we show that if the expression [e] @@ -172,6 +172,8 @@ Proof. subst. rewrite <- Val.add_assoc. apply eval_addimm. EvalOp. subst. rewrite Val.add_commut. TrivialExists. subst. TrivialExists. + subst. TrivialExists. + subst. rewrite Val.add_commut. TrivialExists. TrivialExists. Qed. @@ -325,6 +327,9 @@ Proof. predSpec Int.eq Int.eq_spec n Int.zero. intros. exists (Vint Int.zero); split. EvalOp. destruct x; simpl; auto. subst n. rewrite Int.and_zero. auto. + predSpec Int.eq Int.eq_spec n Int.mone. + intros. exists x; split; auto. + subst. destruct x; simpl; auto. rewrite Int.and_mone; auto. case (andimm_match a); intros. InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto. InvEval. subst. rewrite Val.and_assoc. simpl. rewrite Int.and_commut. TrivialExists. @@ -352,6 +357,9 @@ Proof. predSpec Int.eq Int.eq_spec n Int.zero. intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.or_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone. + intros. exists (Vint Int.mone); split. EvalOp. + destruct x; simpl; auto. subst n. rewrite Int.or_mone. auto. destruct (orimm_match a); intros; InvEval. TrivialExists. simpl. rewrite Int.or_commut; auto. subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists. diff --git a/arm/linux/Conventions1.v b/arm/linux/Conventions1.v index f1ddc23..1731eba 100644 --- a/arm/linux/Conventions1.v +++ b/arm/linux/Conventions1.v @@ -32,34 +32,19 @@ Require Import Locations. *) Definition int_caller_save_regs := - R0 :: R1 :: R2 :: R3 :: nil. + R0 :: R1 :: R2 :: R3 :: R12 :: nil. Definition float_caller_save_regs := - F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: nil. + F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: nil. Definition int_callee_save_regs := - R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R11 :: nil. + R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: nil. Definition float_callee_save_regs := F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: nil. -Definition destroyed_at_call_regs := - int_caller_save_regs ++ float_caller_save_regs. - Definition destroyed_at_call := - List.map R destroyed_at_call_regs. - -Definition int_temporaries := IT1 :: IT2 :: nil. - -Definition float_temporaries := FT1 :: FT2 :: nil. - -Definition temporary_regs := int_temporaries ++ float_temporaries. - -Definition temporaries := List.map R temporary_regs. - -Definition destroyed_at_move_regs: list mreg := nil. - -Definition destroyed_at_move := List.map R destroyed_at_move_regs. + int_caller_save_regs ++ float_caller_save_regs. Definition dummy_int_reg := R0. (**r Used in [Coloring]. *) Definition dummy_float_reg := F0. (**r Used in [Coloring]. *) @@ -72,7 +57,7 @@ Definition dummy_float_reg := F0. (**r Used in [Coloring]. *) Definition index_int_callee_save (r: mreg) := match r with | R4 => 0 | R5 => 1 | R6 => 2 | R7 => 3 - | R8 => 4 | R9 => 5 | R11 => 6 + | R8 => 4 | R9 => 5 | R10 => 6 | R11 => 7 | _ => -1 end. @@ -169,34 +154,27 @@ Qed. Lemma register_classification: forall r, - (In (R r) temporaries \/ In (R r) destroyed_at_call) \/ - (In r int_callee_save_regs \/ In r float_callee_save_regs). + In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs. Proof. destruct r; - try (left; left; simpl; OrEq); - try (left; right; simpl; OrEq); + try (left; simpl; OrEq); try (right; left; simpl; OrEq); try (right; right; simpl; OrEq). Qed. + Lemma int_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r int_callee_save_regs). + In r destroyed_at_call -> In r int_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma float_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r float_callee_save_regs). + In r destroyed_at_call -> In r float_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma int_callee_save_type: @@ -245,51 +223,33 @@ Qed. Calling conventions are largely arbitrary: they must respect the properties proved in this section (such as no overlapping between the locations of function arguments), but this leaves much liberty in choosing actual - locations. To ensure binary interoperability of code generated by our - compiler with libraries compiled by another ARM EABI compiler, we - implement *almost* the standard conventions defined in the ARM EABI application - binary interface, with two exceptions: -- Double-precision arguments and results are passed in VFP double registers - instead of pairs of integer registers. -- Single-precision arguments and results are passed as double-precision floats. -*) + locations. *) (** ** Location of function result *) (** The result value of a function is passed back to the caller in - registers [R0] or [F0], depending on the type of the returned value. - We treat a function without result as a function with one integer result. *) + registers [R0] or [F0] or [R0,R1], depending on the type of the + returned value. We treat a function without result as a function + with one integer result. *) -Definition loc_result (s: signature) : mreg := +Definition loc_result (s: signature) : list mreg := match s.(sig_res) with - | None => R0 - | Some Tint => R0 - | Some Tfloat => F0 + | None => R0 :: nil + | Some Tint => R0 :: nil + | Some Tfloat => F0 :: nil + | Some Tlong => R1 :: R0 :: nil end. -(** The result location has the type stated in the signature. *) - -Lemma loc_result_type: - forall sig, - mreg_type (loc_result sig) = - match sig.(sig_res) with None => Tint | Some ty => ty end. -Proof. - intros; unfold loc_result. - destruct (sig_res sig). - destruct t; reflexivity. - reflexivity. -Qed. - (** The result location is a caller-save register or a temporary *) Lemma loc_result_caller_save: - forall (s: signature), - In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries. + forall (s: signature) (r: mreg), + In r (loc_result s) -> In r destroyed_at_call. Proof. - intros; unfold loc_result. left; - destruct (sig_res s). - destruct t; simpl; OrEq. - simpl; OrEq. + intros. + assert (r = R0 \/ r = R1 \/ r = F0). + unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition. + destruct H0 as [A | [A | A]]; subst r; simpl; OrEq. Qed. (** ** Location of function arguments *) @@ -299,34 +259,37 @@ Qed. - The first 2 float arguments are passed in registers [F0] and [F1]. - Each float argument passed in a float register ``consumes'' an aligned pair of two integer registers. +- Each long integer argument is passed in an aligned pair of two integer + registers. - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively - assigned (1 word for an integer argument, 2 words for a float), + assigned (1 word for an integer argument, 2 words for a float or a long), starting at word offset 0. *) -Function ireg_param (n: Z) : mreg := +Definition ireg_param (n: Z) : mreg := if zeq n (-4) then R0 else if zeq n (-3) then R1 else if zeq n (-2) then R2 - else if zeq n (-1) then R3 - else R4. (**r should not happen *) - -Function freg_param (n: Z) : mreg := - if zeq n (-4) then F0 - else if zeq n (-3) then F1 - else if zeq n (-2) then F1 - else F2. (**r should not happen *) + else R3. +Definition freg_param (n: Z) : mreg := + if zeq n (-4) then F0 else F1. Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc := match tyl with | nil => nil | Tint :: tys => - (if zle 0 ofs then S (Outgoing ofs Tint) else R (ireg_param ofs)) + (if zle 0 ofs then S Outgoing ofs Tint else R (ireg_param ofs)) :: loc_arguments_rec tys (ofs + 1) | Tfloat :: tys => - (if zle (-1) ofs then S (Outgoing (align ofs 2) Tfloat) else R (freg_param ofs)) - :: loc_arguments_rec tys (align ofs 2 + 2) + let ofs := align ofs 2 in + (if zle 0 ofs then S Outgoing ofs Tfloat else R (freg_param ofs)) + :: loc_arguments_rec tys (ofs + 2) + | Tlong :: tys => + let ofs := align ofs 2 in + (if zle 0 ofs then S Outgoing (ofs + 1) Tint else R (ireg_param (ofs + 1))) + :: (if zle 0 ofs then S Outgoing ofs Tint else R (ireg_param ofs)) + :: loc_arguments_rec tys (ofs + 2) end. (** [loc_arguments s] returns the list of locations where to store arguments @@ -342,7 +305,7 @@ Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z := match tyl with | nil => ofs | Tint :: tys => size_arguments_rec tys (ofs + 1) - | Tfloat :: tys => size_arguments_rec tys (align ofs 2 + 2) + | (Tfloat | Tlong) :: tys => size_arguments_rec tys (align ofs 2 + 2) end. Definition size_arguments (s: signature) : Z := @@ -353,109 +316,85 @@ Definition size_arguments (s: signature) : Z := Definition loc_argument_acceptable (l: loc) : Prop := match l with - | R r => ~(In l temporaries) - | S (Outgoing ofs ty) => ofs >= 0 + | R r => In r destroyed_at_call + | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong | _ => False end. +(* Lemma align_monotone: forall x1 x2 y, y > 0 -> x1 <= x2 -> align x1 y <= align x2 y. Proof. intros. unfold align. apply Zmult_le_compat_r. apply Z_div_le. omega. omega. omega. Qed. +*) + +Remark ireg_param_caller_save: + forall n, In (ireg_param n) destroyed_at_call. +Proof. + unfold ireg_param; intros. + destruct (zeq n (-4)). simpl; auto. + destruct (zeq n (-3)). simpl; auto. + destruct (zeq n (-2)); simpl; auto. +Qed. + +Remark freg_param_caller_save: + forall n, In (freg_param n) destroyed_at_call. +Proof. + unfold freg_param; intros. destruct (zeq n (-4)); simpl; OrEq. +Qed. Remark loc_arguments_rec_charact: forall tyl ofs l, In l (loc_arguments_rec tyl ofs) -> match l with - | R r => - (exists n, ofs <= n < 0 /\ r = ireg_param n) - \/ (exists n, ofs <= n < -1 /\ r = freg_param n) - | S (Outgoing ofs' ty) => ofs' >= 0 /\ ofs' >= ofs - | S _ => False + | R r => In r destroyed_at_call + | S Outgoing ofs' ty => ofs' >= 0 /\ ofs <= ofs' /\ ty <> Tlong + | S _ _ _ => False end. Proof. induction tyl; simpl loc_arguments_rec; intros. elim H. - destruct a; elim H; intro. - subst l. destruct (zle 0 ofs). omega. - left. exists ofs; split; auto; omega. - generalize (IHtyl _ _ H0). - destruct l. intros [[n A] | [n A]]; [left|right]; exists n; intuition omega. - destruct s; auto; omega. - subst l. destruct (zle (-1) ofs). - split. apply Zle_ge. change 0 with (align (-1) 2). apply align_monotone; omega. - apply Zle_ge. apply align_le. omega. - right. exists ofs. intuition. + destruct a. +- (* Tint *) + destruct H. + subst l. destruct (zle 0 ofs). + split. omega. split. omega. congruence. + apply ireg_param_caller_save. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. +- (* Tfloat *) assert (ofs <= align ofs 2) by (apply align_le; omega). - generalize (IHtyl _ _ H0). - destruct l. intros [[n A] | [n A]]; [left|right]; exists n; intuition omega. - destruct s; auto; omega. -Qed. - -Lemma loc_notin_in_diff: - forall l ll, - Loc.notin l ll <-> (forall l', In l' ll -> Loc.diff l l'). -Proof. - induction ll; simpl; intuition. subst l'. auto. -Qed. - -Remark loc_arguments_rec_notin_local: - forall tyl ofs ofs0 ty0, - Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl ofs). -Proof. - intros. rewrite loc_notin_in_diff. intros. - exploit loc_arguments_rec_charact; eauto. - destruct l'; intros; simpl; auto. destruct s; auto; contradiction. + destruct H. + subst l. destruct (zle 0 (align ofs 2)). + split. omega. split. auto. congruence. + apply freg_param_caller_save. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. +- (* Tlong *) + assert (ofs <= align ofs 2) by (apply align_le; omega). + destruct H. + subst l. destruct (zle 0 (align ofs 2)). + split. omega. split. omega. congruence. + apply ireg_param_caller_save. + destruct H. + subst l. destruct (zle 0 (align ofs 2)). + split. omega. split. omega. congruence. + apply ireg_param_caller_save. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. Qed. Lemma loc_arguments_acceptable: forall (s: signature) (r: loc), In r (loc_arguments s) -> loc_argument_acceptable r. Proof. - unfold loc_arguments; intros. + unfold loc_arguments, loc_argument_acceptable; intros. generalize (loc_arguments_rec_charact _ _ _ H). - destruct r; simpl. - intros [[n [A B]] | [n [A B]]]; subst m. - functional induction (ireg_param n); intuition congruence. - functional induction (freg_param n); intuition congruence. - destruct s0; tauto. + destruct r; auto. + destruct sl; auto. + tauto. Qed. Hint Resolve loc_arguments_acceptable: locs. -(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *) - -Lemma loc_arguments_norepet: - forall (s: signature), Loc.norepet (loc_arguments s). -Proof. - unfold loc_arguments; intros. - assert (forall tyl ofs, -4 <= ofs -> Loc.norepet (loc_arguments_rec tyl ofs)). - induction tyl; simpl; intros. - constructor. - destruct a; constructor. - rewrite loc_notin_in_diff. intros. exploit loc_arguments_rec_charact; eauto. - destruct (zle 0 ofs); destruct l'; simpl; auto. - destruct s0; intuition. - intros [[n [A B]] | [n [A B]]]; subst m. - functional induction (ireg_param ofs); functional induction (ireg_param n); congruence || omegaContradiction. - functional induction (ireg_param ofs); functional induction (freg_param n); congruence || omegaContradiction. - apply IHtyl. omega. - rewrite loc_notin_in_diff. intros. exploit loc_arguments_rec_charact; eauto. - destruct (zle (-1) ofs); destruct l'; simpl; auto. - destruct s0; intuition. - intros [[n [A B]] | [n [A B]]]; subst m. - functional induction (freg_param ofs); functional induction (ireg_param n); congruence || omegaContradiction. - functional induction (freg_param ofs); functional induction (freg_param n); try (congruence || omegaContradiction). - compute in A. intuition. - compute in A. intuition. - compute in A. intuition. - compute in A. intuition. - compute in A. intuition. - apply IHtyl. assert (ofs <= align ofs 2) by (apply align_le; omega). omega. - apply H. omega. -Qed. - (** The offsets of [Outgoing] arguments are below [size_arguments s]. *) Remark size_arguments_rec_above: @@ -468,6 +407,8 @@ Proof. apply Zle_trans with (ofs + 1); auto; omega. assert (ofs <= align ofs 2) by (apply align_le; omega). apply Zle_trans with (align ofs 2 + 2); auto; omega. + assert (ofs <= align ofs 2) by (apply align_le; omega). + apply Zle_trans with (align ofs 2 + 2); auto; omega. Qed. Lemma size_arguments_above: @@ -478,79 +419,35 @@ Qed. Lemma loc_arguments_bounded: forall (s: signature) (ofs: Z) (ty: typ), - In (S (Outgoing ofs ty)) (loc_arguments s) -> + In (S Outgoing ofs ty) (loc_arguments s) -> ofs + typesize ty <= size_arguments s. Proof. intros. assert (forall tyl ofs0, 0 <= ofs0 -> ofs0 <= Zmax 0 (size_arguments_rec tyl ofs0)). + { intros. generalize (size_arguments_rec_above tyl ofs0). intros. - rewrite Zmax_spec. rewrite zlt_false. auto. omega. + rewrite Zmax_spec. rewrite zlt_false. auto. omega. + } assert (forall tyl ofs0, - In (S (Outgoing ofs ty)) (loc_arguments_rec tyl ofs0) -> + In (S Outgoing ofs ty) (loc_arguments_rec tyl ofs0) -> ofs + typesize ty <= Zmax 0 (size_arguments_rec tyl ofs0)). - induction tyl; simpl; intros. - elim H1. - destruct a; elim H1; intros. - destruct (zle 0 ofs0); inv H2. apply H0. omega. auto. - destruct (zle (-1) ofs0); inv H2. apply H0. - assert (align (-1) 2 <= align ofs0 2). apply align_monotone. omega. auto. - change (align (-1) 2) with 0 in H2. omega. - auto. - + { + induction tyl; simpl; intros. + elim H1. + destruct a. + - (* Tint *) + destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. + - (* Tfloat *) + destruct H1; auto. destruct (zle 0 (align ofs0 2)); inv H1. apply H0. omega. + - (* Tlong *) + destruct H1. + destruct (zle 0 (align ofs0 2)); inv H1. + eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega. + destruct H1; auto. + destruct (zle 0 (align ofs0 2)); inv H1. + eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega. + } unfold size_arguments. apply H1. auto. Qed. - -(** Temporary registers do not overlap with argument locations. *) - -Lemma loc_arguments_not_temporaries: - forall sig, Loc.disjoint (loc_arguments sig) temporaries. -Proof. - intros; red; intros x1 x2 A B. - exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable. - destruct x1; intros. simpl. destruct x2; auto. intuition congruence. - destruct s; try contradiction. revert B. simpl. ElimOrEq; auto. -Qed. -Hint Resolve loc_arguments_not_temporaries: locs. - -(** Argument registers are caller-save. *) - -Lemma arguments_caller_save: - forall sig r, - In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call. -Proof. - unfold loc_arguments; intros. - destruct (loc_arguments_rec_charact _ _ _ H) as [[n [A B]] | [n [A B]]]; subst r. - functional induction (ireg_param n); simpl; auto. omegaContradiction. - functional induction (freg_param n); simpl; auto 10. -Qed. - -(** Argument locations agree in number with the function signature. *) - -Lemma loc_arguments_length: - forall sig, - List.length (loc_arguments sig) = List.length sig.(sig_args). -Proof. - assert (forall tyl ofs, List.length (loc_arguments_rec tyl ofs) = List.length tyl). - induction tyl; simpl; intros. - auto. - destruct a; simpl; decEq; auto. - - intros. unfold loc_arguments. auto. -Qed. - -(** Argument locations agree in types with the function signature. *) - -Lemma loc_arguments_type: - forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args). -Proof. - assert (forall tyl ofs, List.map Loc.type (loc_arguments_rec tyl ofs) = tyl). - induction tyl; simpl; intros. - auto. - destruct a; simpl; decEq; auto. - destruct (zle 0 ofs). auto. functional induction (ireg_param ofs); auto. - destruct (zle (-1) ofs). auto. functional induction (freg_param ofs); auto. - - intros. unfold loc_arguments. apply H. -Qed. diff --git a/arm/linux/Stacklayout.v b/arm/linux/Stacklayout.v index d84da6b..7694dcf 100644 --- a/arm/linux/Stacklayout.v +++ b/arm/linux/Stacklayout.v @@ -18,11 +18,8 @@ Require Import Bounds. (** The general shape of activation records is as follows, from bottom (lowest offsets) to top: - Space for outgoing arguments to function calls. -- Local stack slots of integer type. +- Local stack slots. - Saved values of integer callee-save registers used by the function. -- One word of padding, if necessary to align the following data - on a 8-byte boundary. -- Local stack slots of float type. - Saved values of float callee-save registers used by the function. - Saved return address into caller. - Pointer to activation record of the caller. @@ -38,10 +35,9 @@ Record frame_env : Type := mk_frame_env { fe_size: Z; fe_ofs_link: Z; fe_ofs_retaddr: Z; - fe_ofs_int_local: Z; + fe_ofs_local: Z; fe_ofs_int_callee_save: Z; fe_num_int_callee_save: Z; - fe_ofs_float_local: Z; fe_ofs_float_callee_save: Z; fe_num_float_callee_save: Z; fe_stack_data: Z @@ -51,18 +47,17 @@ Record frame_env : Type := mk_frame_env { function. *) Definition make_env (b: bounds) := - let oil := 4 * b.(bound_outgoing) in (* integer locals *) - let oics := oil + 4 * b.(bound_int_local) in (* integer callee-saves *) + let ol := align (4 * b.(bound_outgoing)) 8 in (* locals *) + let oics := ol + 4 * b.(bound_local) in (* integer callee-saves *) let oendi := oics + 4 * b.(bound_int_callee_save) in - let ofl := align oendi 8 in (* float locals *) - let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *) + let ofcs := align oendi 8 in (* float callee-saves *) let ora := ofcs + 8 * b.(bound_float_callee_save) in (* retaddr *) let olink := ora + 4 in (* back link *) let ostkdata := olink + 4 in (* stack data *) let sz := align (ostkdata + b.(bound_stack_data)) 8 in - mk_frame_env sz olink ora - oil oics b.(bound_int_callee_save) - ofl ofcs b.(bound_float_callee_save) + mk_frame_env sz olink ora ol + oics b.(bound_int_callee_save) + ofcs b.(bound_float_callee_save) ostkdata. (** Separation property *) @@ -71,26 +66,24 @@ Remark frame_env_separated: forall b, let fe := make_env b in 0 <= fe_ofs_arg - /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_int_local) - /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_int_callee_save) - /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local) - /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save) + /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_local) + /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_ofs_int_callee_save) + /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save) /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_ofs_retaddr) /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_ofs_link) /\ fe.(fe_ofs_link) + 4 <= fe.(fe_stack_data) /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size). Proof. intros. - generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)). - generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 8 (refl_equal _)). + generalize (align_le (4 * bound_outgoing b) 8 (refl_equal)). + generalize (align_le (fe_ofs_int_callee_save fe + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)). + generalize (align_le (fe_stack_data fe + b.(bound_stack_data)) 8 (refl_equal)). unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, - fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data, fe_ofs_arg. intros. - generalize (bound_int_local_pos b); intro; - generalize (bound_float_local_pos b); intro; + generalize (bound_local_pos b); intro; generalize (bound_int_callee_save_pos b); intro; generalize (bound_float_callee_save_pos b); intro; generalize (bound_outgoing_pos b); intro; @@ -104,9 +97,8 @@ Remark frame_env_aligned: forall b, let fe := make_env b in (4 | fe.(fe_ofs_link)) - /\ (4 | fe.(fe_ofs_int_local)) + /\ (8 | fe.(fe_ofs_local)) /\ (4 | fe.(fe_ofs_int_callee_save)) - /\ (8 | fe.(fe_ofs_float_local)) /\ (8 | fe.(fe_ofs_float_callee_save)) /\ (4 | fe.(fe_ofs_retaddr)) /\ (8 | fe.(fe_stack_data)) @@ -114,30 +106,27 @@ Remark frame_env_aligned: Proof. intros. unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, - fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data. set (x1 := 4 * bound_outgoing b). assert (4 | x1). unfold x1; exists (bound_outgoing b); ring. - set (x2 := x1 + 4 * bound_int_local b). - assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring. - set (x3 := x2 + 4 * bound_int_callee_save b). - set (x4 := align x3 8). - assert (8 | x4). unfold x4. apply align_divides. omega. - set (x5 := x4 + 8 * bound_float_local b). - assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring. - set (x6 := x5 + 8 * bound_float_callee_save b). - assert (8 | x6). - unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. - assert (4 | x6). - apply Zdivides_trans with 8. exists 2; auto. auto. + set (x2 := align x1 8). + assert (8 | x2). apply align_divides. omega. + set (x3 := x2 + 4 * bound_local b). + assert (4 | x3). apply Zdivide_plus_r. apply Zdivides_trans with 8; auto. exists 2; auto. + exists (bound_local b); ring. + set (x4 := align (x3 + 4 * bound_int_callee_save b) 8). + assert (8 | x4). apply align_divides. omega. + set (x5 := x4 + 8 * bound_float_callee_save b). + assert (8 | x5). apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. + assert (4 | x5). apply Zdivides_trans with 8; auto. exists 2; auto. + set (x6 := x5 + 4). + assert (4 | x6). apply Zdivide_plus_r; auto. exists 1; auto. set (x7 := x6 + 4). - assert (4 | x7). unfold x7; apply Zdivide_plus_r; auto. exists 1; auto. - set (x8 := x7 + 4). - assert (8 | x8). unfold x8, x7. replace (x6 + 4 + 4) with (x6 + 8) by omega. - apply Zdivide_plus_r; auto. exists 1; auto. - set (x9 := align (x8 + bound_stack_data b) 8). - assert (8 | x9). unfold x9; apply align_divides. omega. + assert (8 | x7). unfold x7, x6. replace (x5 + 4 + 4) with (x5 + 8) by omega. + apply Zdivide_plus_r; auto. exists 1; auto. + set (x8 := align (x7 + bound_stack_data b) 8). + assert (8 | x8). apply align_divides. omega. tauto. Qed. diff --git a/backend/Allocation.v b/backend/Allocation.v index caaf09d..8674335 100644 --- a/backend/Allocation.v +++ b/backend/Allocation.v @@ -10,107 +10,1169 @@ (* *) (* *********************************************************************) -(** Register allocation. *) +(** Register allocation by external oracle and a posteriori validation. *) +Require Import FSets. +Require FSetAVLplus. Require Import Coqlib. +Require Import Ordered. Require Import Errors. Require Import Maps. Require Import Lattice. Require Import AST. +Require Import Integers. +Require Import Memdata. Require Import Op. Require Import Registers. Require Import RTL. -Require Import RTLtyping. -Require Import Liveness. +Require Import Kildall. Require Import Locations. +Require Import Conventions. +Require Import RTLtyping. Require Import LTL. -Require Import Coloring. - -(** * Translation from RTL to LTL *) - -(** Each [RTL] instruction translates to an [LTL] instruction. - The register assignment [assign] returned by register allocation - is applied to the arguments and results of the RTL - instruction. Moreover, dead instructions and redundant moves - are eliminated (turned into a [Lnop] instruction). - Dead instructions are instructions without side-effects ([Iop] and - [Iload]) whose result register is dead, i.e. whose result value - is never used. Redundant moves are moves whose source and destination - are assigned the same location. *) - -Definition is_redundant_move - (op: operation) (args: list reg) (res: reg) (assign: reg -> loc) : bool := - match is_move_operation op args with - | None => false - | Some src => if Loc.eq (assign src) (assign res) then true else false - end. - -Definition transf_instr - (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc) - (pc: node) (instr: RTL.instruction) : LTL.instruction := - match instr with + +(** The validation algorithm used here is described in + "Validating register allocation and spilling", + by Silvain Rideau and Xavier Leroy, + in Compiler Construction (CC 2010), LNCS 6011, Springer, 2010. *) + +(** * Structural checks *) + +(** As a first pass, we check the LTL code returned by the external oracle + against the original RTL code for structural conformance. + Each RTL instruction was transformed into a LTL basic block whose + shape must agree with the RTL instruction. For example, if the RTL + instruction is [Istore(Mint32, addr, args, src, s)], the LTL basic block + must be of the following shape: +- zero, one or several "move" instructions +- a store instruction [Lstore(Mint32, addr, args', src')] +- a [Lbranch s] instruction. + + The [block_shape] type below describes all possible cases of structural + maching between an RTL instruction and an LTL basic block. +*) + +Definition moves := list (loc * loc)%type. + +Inductive block_shape: Type := + | BSnop (mv: moves) (s: node) + | BSmove (src: reg) (dst: reg) (mv: moves) (s: node) + | BSmakelong (src1 src2: reg) (dst: reg) (mv: moves) (s: node) + | BSlowlong (src: reg) (dst: reg) (mv: moves) (s: node) + | BShighlong (src: reg) (dst: reg) (mv: moves) (s: node) + | BSop (op: operation) (args: list reg) (res: reg) + (mv1: moves) (args': list mreg) (res': mreg) + (mv2: moves) (s: node) + | BSopdead (op: operation) (args: list reg) (res: reg) + (mv: moves) (s: node) + | BSload (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg) + (mv1: moves) (args': list mreg) (dst': mreg) + (mv2: moves) (s: node) + | BSloaddead (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg) + (mv: moves) (s: node) + | BSload2 (addr1 addr2: addressing) (args: list reg) (dst: reg) + (mv1: moves) (args1': list mreg) (dst1': mreg) + (mv2: moves) (args2': list mreg) (dst2': mreg) + (mv3: moves) (s: node) + | BSload2_1 (addr: addressing) (args: list reg) (dst: reg) + (mv1: moves) (args': list mreg) (dst': mreg) + (mv2: moves) (s: node) + | BSload2_2 (addr addr': addressing) (args: list reg) (dst: reg) + (mv1: moves) (args': list mreg) (dst': mreg) + (mv2: moves) (s: node) + | BSstore (chunk: memory_chunk) (addr: addressing) (args: list reg) (src: reg) + (mv1: moves) (args': list mreg) (src': mreg) + (s: node) + | BSstore2 (addr1 addr2: addressing) (args: list reg) (src: reg) + (mv1: moves) (args1': list mreg) (src1': mreg) + (mv2: moves) (args2': list mreg) (src2': mreg) + (s: node) + | BScall (sg: signature) (ros: reg + ident) (args: list reg) (res: reg) + (mv1: moves) (ros': mreg + ident) (mv2: moves) (s: node) + | BStailcall (sg: signature) (ros: reg + ident) (args: list reg) + (mv1: moves) (ros': mreg + ident) + | BSbuiltin (ef: external_function) (args: list reg) (res: reg) + (mv1: moves) (args': list mreg) (res': list mreg) + (mv2: moves) (s: node) + | BSannot (text: ident) (targs: list annot_arg) (args: list reg) (res: reg) + (mv: moves) (args': list loc) (s: node) + | BScond (cond: condition) (args: list reg) + (mv: moves) (args': list mreg) (s1 s2: node) + | BSjumptable (arg: reg) + (mv: moves) (arg': mreg) (tbl: list node) + | BSreturn (arg: option reg) + (mv: moves). + +(** Extract the move instructions at the beginning of block [b]. + Return the list of moves and the suffix of [b] after the moves. *) + +Fixpoint extract_moves (accu: moves) (b: bblock) {struct b} : moves * bblock := + match b with + | Lgetstack sl ofs ty dst :: b' => + extract_moves ((S sl ofs ty, R dst) :: accu) b' + | Lsetstack src sl ofs ty :: b' => + extract_moves ((R src, S sl ofs ty) :: accu) b' + | Lop op args res :: b' => + match is_move_operation op args with + | Some arg => extract_moves ((R arg, R res) :: accu) b' + | None => (List.rev accu, b) + end + | _ => + (List.rev accu, b) + end. + +Definition check_succ (s: node) (b: LTL.bblock) : bool := + match b with + | Lbranch s' :: _ => peq s s' + | _ => false + end. + +Parameter eq_operation: forall (x y: operation), {x=y} + {x<>y}. +Parameter eq_addressing: forall (x y: addressing), {x=y} + {x<>y}. +Parameter eq_opt_addressing: forall (x y: option addressing), {x=y} + {x<>y}. +Parameter eq_condition: forall (x y: condition), {x=y} + {x<>y}. +Parameter eq_chunk: forall (x y: memory_chunk), {x=y} + {x<>y}. +Parameter eq_external_function: forall (x y: external_function), {x=y} + {x<>y}. +Parameter eq_signature: forall (x y: signature), {x=y} + {x<>y}. + +Notation "'do' X <- A ; B" := (match A with Some X => B | None => None end) + (at level 200, X ident, A at level 100, B at level 200) + : option_monad_scope. + +Notation "'assertion' A ; B" := (if A then B else None) + (at level 200, A at level 100, B at level 200) + : option_monad_scope. + +Local Open Scope option_monad_scope. + +(** Classify operations into moves, 64-bit integer operations, and other + arithmetic/logical operations. *) + +Inductive operation_kind: operation -> list reg -> Type := + | operation_Omove: forall arg, operation_kind Omove (arg :: nil) + | operation_Omakelong: forall arg1 arg2, operation_kind Omakelong (arg1 :: arg2 :: nil) + | operation_Olowlong: forall arg, operation_kind Olowlong (arg :: nil) + | operation_Ohighlong: forall arg, operation_kind Ohighlong (arg :: nil) + | operation_other: forall op args, operation_kind op args. + +Definition classify_operation (op: operation) (args: list reg) : operation_kind op args := + match op, args with + | Omove, arg::nil => operation_Omove arg + | Omakelong, arg1::arg2::nil => operation_Omakelong arg1 arg2 + | Olowlong, arg::nil => operation_Olowlong arg + | Ohighlong, arg::nil => operation_Ohighlong arg + | op, args => operation_other op args + end. + +(** Check RTL instruction [i] against LTL basic block [b]. + On success, return [Some] with a [block_shape] describing the correspondence. + On error, return [None]. *) + +Definition pair_instr_block + (i: RTL.instruction) (b: LTL.bblock) : option block_shape := + match i with | Inop s => - Lnop s + let (mv, b1) := extract_moves nil b in + assertion (check_succ s b1); Some(BSnop mv s) | Iop op args res s => - if Regset.mem res live!!pc then - if is_redundant_move op args res assign then - Lnop s - else - Lop op (List.map assign args) (assign res) s - else - Lnop s + match classify_operation op args with + | operation_Omove arg => + let (mv, b1) := extract_moves nil b in + assertion (check_succ s b1); Some(BSmove arg res mv s) + | operation_Omakelong arg1 arg2 => + let (mv, b1) := extract_moves nil b in + assertion (check_succ s b1); Some(BSmakelong arg1 arg2 res mv s) + | operation_Olowlong arg => + let (mv, b1) := extract_moves nil b in + assertion (check_succ s b1); Some(BSlowlong arg res mv s) + | operation_Ohighlong arg => + let (mv, b1) := extract_moves nil b in + assertion (check_succ s b1); Some(BShighlong arg res mv s) + | operation_other _ _ => + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lop op' args' res' :: b2 => + let (mv2, b3) := extract_moves nil b2 in + assertion (eq_operation op op'); + assertion (check_succ s b3); + Some(BSop op args res mv1 args' res' mv2 s) + | _ => + assertion (check_succ s b1); + Some(BSopdead op args res mv1 s) + end + end | Iload chunk addr args dst s => - if Regset.mem dst live!!pc then - Lload chunk addr (List.map assign args) (assign dst) s - else - Lnop s + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lload chunk' addr' args' dst' :: b2 => + if eq_chunk chunk Mint64 then + assertion (eq_chunk chunk' Mint32); + let (mv2, b3) := extract_moves nil b2 in + match b3 with + | Lload chunk'' addr'' args'' dst'' :: b4 => + let (mv3, b5) := extract_moves nil b4 in + assertion (eq_chunk chunk'' Mint32); + assertion (eq_addressing addr addr'); + assertion (eq_opt_addressing (offset_addressing addr (Int.repr 4)) (Some addr'')); + assertion (check_succ s b5); + Some(BSload2 addr addr'' args dst mv1 args' dst' mv2 args'' dst'' mv3 s) + | _ => + assertion (check_succ s b3); + if (eq_addressing addr addr') then + Some(BSload2_1 addr args dst mv1 args' dst' mv2 s) + else + (assertion (eq_opt_addressing (offset_addressing addr (Int.repr 4)) (Some addr')); + Some(BSload2_2 addr addr' args dst mv1 args' dst' mv2 s)) + end + else ( + let (mv2, b3) := extract_moves nil b2 in + assertion (eq_chunk chunk chunk'); + assertion (eq_addressing addr addr'); + assertion (check_succ s b3); + Some(BSload chunk addr args dst mv1 args' dst' mv2 s)) + | _ => + assertion (check_succ s b1); + Some(BSloaddead chunk addr args dst mv1 s) + end | Istore chunk addr args src s => - Lstore chunk addr (List.map assign args) (assign src) s - | Icall sig ros args res s => - Lcall sig (sum_left_map assign ros) (List.map assign args) - (assign res) s - | Itailcall sig ros args => - Ltailcall sig (sum_left_map assign ros) (List.map assign args) + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lstore chunk' addr' args' src' :: b2 => + if eq_chunk chunk Mint64 then + let (mv2, b3) := extract_moves nil b2 in + match b3 with + | Lstore chunk'' addr'' args'' src'' :: b4 => + assertion (eq_chunk chunk' Mint32); + assertion (eq_chunk chunk'' Mint32); + assertion (eq_addressing addr addr'); + assertion (eq_opt_addressing (offset_addressing addr (Int.repr 4)) (Some addr'')); + assertion (check_succ s b4); + Some(BSstore2 addr addr'' args src mv1 args' src' mv2 args'' src'' s) + | _ => None + end + else ( + assertion (eq_chunk chunk chunk'); + assertion (eq_addressing addr addr'); + assertion (check_succ s b2); + Some(BSstore chunk addr args src mv1 args' src' s)) + | _ => None + end + | Icall sg ros args res s => + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lcall sg' ros' :: b2 => + let (mv2, b3) := extract_moves nil b2 in + assertion (eq_signature sg sg'); + assertion (check_succ s b3); + Some(BScall sg ros args res mv1 ros' mv2 s) + | _ => None + end + | Itailcall sg ros args => + let (mv1, b1) := extract_moves nil b in + match b1 with + | Ltailcall sg' ros' :: b2 => + assertion (eq_signature sg sg'); + Some(BStailcall sg ros args mv1 ros') + | _ => None + end | Ibuiltin ef args res s => - Lbuiltin ef (List.map assign args) (assign res) s - | Icond cond args ifso ifnot => - Lcond cond (List.map assign args) ifso ifnot + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lbuiltin ef' args' res' :: b2 => + let (mv2, b3) := extract_moves nil b2 in + assertion (eq_external_function ef ef'); + assertion (check_succ s b3); + Some(BSbuiltin ef args res mv1 args' res' mv2 s) + | Lannot ef' args' :: b2 => + assertion (eq_external_function ef ef'); + assertion (check_succ s b2); + match ef with + | EF_annot txt typ => Some(BSannot txt typ args res mv1 args' s) + | _ => None + end + | _ => None + end + | Icond cond args s1 s2 => + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lcond cond' args' s1' s2' :: b2 => + assertion (eq_condition cond cond'); + assertion (peq s1 s1'); + assertion (peq s2 s2'); + Some(BScond cond args mv1 args' s1 s2) + | _ => None + end | Ijumptable arg tbl => - Ljumptable (assign arg) tbl - | Ireturn optarg => - Lreturn (option_map assign optarg) + let (mv1, b1) := extract_moves nil b in + match b1 with + | Ljumptable arg' tbl' :: b2 => + assertion (list_eq_dec peq tbl tbl'); + Some(BSjumptable arg mv1 arg' tbl) + | _ => None + end + | Ireturn arg => + let (mv1, b1) := extract_moves nil b in + match b1 with + | Lreturn :: b2 => Some(BSreturn arg mv1) + | _ => None + end + end. + +(** Check all instructions of the RTL function [f1] against the corresponding + basic blocks of LTL function [f2]. Return a map from CFG nodes to + [block_shape] info. *) + +Definition pair_codes (f1: RTL.function) (f2: LTL.function) : PTree.t block_shape := + PTree.combine + (fun opti optb => do i <- opti; do b <- optb; pair_instr_block i b) + (RTL.fn_code f1) (LTL.fn_code f2). + +(** Check the entry point code of the LTL function [f2]. It must be + a sequence of moves that branches to the same node as the entry point + of RTL function [f1]. *) + +Definition pair_entrypoints (f1: RTL.function) (f2: LTL.function) : option moves := + do b <- (LTL.fn_code f2)!(LTL.fn_entrypoint f2); + let (mv, b1) := extract_moves nil b in + assertion (check_succ (RTL.fn_entrypoint f1) b1); + Some mv. + +(** * Representing sets of equations between RTL registers and LTL locations. *) + +(** The Rideau-Leroy validation algorithm manipulates sets of equations of + the form [pseudoreg = location [kind]], meaning: +- if [kind = Full], the value of [location] in the generated LTL code is + the same as (or more defined than) the value of [pseudoreg] in the original + RTL code; +- if [kind = Low], the value of [location] in the generated LTL code is + the same as (or more defined than) the low 32 bits of the 64-bit + integer value of [pseudoreg] in the original RTL code; +- if [kind = High], the value of [location] in the generated LTL code is + the same as (or more defined than) the high 32 bits of the 64-bit + integer value of [pseudoreg] in the original RTL code. +*) + +Inductive equation_kind : Type := Full | Low | High. + +Record equation := Eq { + ekind: equation_kind; + ereg: reg; + eloc: loc +}. + +(** We use AVL finite sets to represent sets of equations. Therefore, we need + total orders over equations and their components. *) + +Module IndexedEqKind <: INDEXED_TYPE. + Definition t := equation_kind. + Definition index (x: t) := + match x with Full => 1%positive | Low => 2%positive | High => 3%positive end. + Lemma index_inj: forall x y, index x = index y -> x = y. + Proof. destruct x; destruct y; simpl; congruence. Qed. + Definition eq (x y: t) : {x=y} + {x<>y}. + Proof. decide equality. Defined. +End IndexedEqKind. + +Module OrderedEqKind := OrderedIndexed(IndexedEqKind). + +(** This is an order over equations that is lexicgraphic on [ereg], then + [eloc], then [ekind]. *) + +Module OrderedEquation <: OrderedType. + Definition t := equation. + Definition eq (x y: t) := x = y. + Definition lt (x y: t) := + Plt (ereg x) (ereg y) \/ (ereg x = ereg y /\ + (OrderedLoc.lt (eloc x) (eloc y) \/ (eloc x = eloc y /\ + OrderedEqKind.lt (ekind x) (ekind y)))). + Lemma eq_refl : forall x : t, eq x x. + Proof (@refl_equal t). + Lemma eq_sym : forall x y : t, eq x y -> eq y x. + Proof (@sym_equal t). + Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z. + Proof (@trans_equal t). + Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z. + Proof. + unfold lt; intros. + destruct H. + destruct H0. left; eapply Plt_trans; eauto. + destruct H0. rewrite <- H0. auto. + destruct H. rewrite H. + destruct H0. auto. + destruct H0. right; split; auto. + intuition. + left; eapply OrderedLoc.lt_trans; eauto. + left; congruence. + left; congruence. + right; split. congruence. eapply OrderedEqKind.lt_trans; eauto. + Qed. + Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y. + Proof. + unfold lt, eq; intros; red; intros. subst y. intuition. + eelim Plt_strict; eauto. + eelim OrderedLoc.lt_not_eq; eauto. red; auto. + eelim OrderedEqKind.lt_not_eq; eauto. red; auto. + Qed. + Definition compare : forall x y : t, Compare lt eq x y. + Proof. + intros. + destruct (OrderedPositive.compare (ereg x) (ereg y)). + - apply LT. red; auto. + - destruct (OrderedLoc.compare (eloc x) (eloc y)). + + apply LT. red; auto. + + destruct (OrderedEqKind.compare (ekind x) (ekind y)). + * apply LT. red; auto. + * apply EQ. red in e; red in e0; red in e1; red. + destruct x; destruct y; simpl in *; congruence. + * apply GT. red; auto. + + apply GT. red; auto. + - apply GT. red; auto. + Defined. + Definition eq_dec (x y: t) : {x = y} + {x <> y}. + Proof. + intros. decide equality. + apply Loc.eq. + apply peq. + apply IndexedEqKind.eq. + Defined. +End OrderedEquation. + +(** This is an alternate order over equations that is lexicgraphic on + [eloc], then [ereg], then [ekind]. *) + +Module OrderedEquation' <: OrderedType. + Definition t := equation. + Definition eq (x y: t) := x = y. + Definition lt (x y: t) := + OrderedLoc.lt (eloc x) (eloc y) \/ (eloc x = eloc y /\ + (Plt (ereg x) (ereg y) \/ (ereg x = ereg y /\ + OrderedEqKind.lt (ekind x) (ekind y)))). + Lemma eq_refl : forall x : t, eq x x. + Proof (@refl_equal t). + Lemma eq_sym : forall x y : t, eq x y -> eq y x. + Proof (@sym_equal t). + Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z. + Proof (@trans_equal t). + Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z. + Proof. + unfold lt; intros. + destruct H. + destruct H0. left; eapply OrderedLoc.lt_trans; eauto. + destruct H0. rewrite <- H0. auto. + destruct H. rewrite H. + destruct H0. auto. + destruct H0. right; split; auto. + intuition. + left; eapply Plt_trans; eauto. + left; congruence. + left; congruence. + right; split. congruence. eapply OrderedEqKind.lt_trans; eauto. + Qed. + Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y. + Proof. + unfold lt, eq; intros; red; intros. subst y. intuition. + eelim OrderedLoc.lt_not_eq; eauto. red; auto. + eelim Plt_strict; eauto. + eelim OrderedEqKind.lt_not_eq; eauto. red; auto. + Qed. + Definition compare : forall x y : t, Compare lt eq x y. + Proof. + intros. + destruct (OrderedLoc.compare (eloc x) (eloc y)). + - apply LT. red; auto. + - destruct (OrderedPositive.compare (ereg x) (ereg y)). + + apply LT. red; auto. + + destruct (OrderedEqKind.compare (ekind x) (ekind y)). + * apply LT. red; auto. + * apply EQ. red in e; red in e0; red in e1; red. + destruct x; destruct y; simpl in *; congruence. + * apply GT. red; auto. + + apply GT. red; auto. + - apply GT. red; auto. + Defined. + Definition eq_dec: forall (x y: t), {x = y} + {x <> y} := OrderedEquation.eq_dec. +End OrderedEquation'. + +Module EqSet := FSetAVLplus.Make(OrderedEquation). +Module EqSet2 := FSetAVLplus.Make(OrderedEquation'). + +(** We use a redundant representation for sets of equations, comprising + two AVL finite sets, containing the same elements, but ordered along + the two orders defined above. Playing on properties of lexicographic + orders, this redundant representation enables us to quickly find + all equations involving a given RTL pseudoregister, or all equations + involving a given LTL location or overlapping location. *) + +Record eqs := mkeqs { + eqs1 :> EqSet.t; + eqs2 : EqSet2.t; + eqs_same: forall q, EqSet2.In q eqs2 <-> EqSet.In q eqs1 +}. + +(** * Operations on sets of equations *) + +(** The empty set of equations. *) + +Program Definition empty_eqs := mkeqs EqSet.empty EqSet2.empty _. +Next Obligation. + split; intros. eelim EqSet2.empty_1; eauto. eelim EqSet.empty_1; eauto. +Qed. + +(** Adding or removing an equation from a set. *) + +Program Definition add_equation (q: equation) (e: eqs) := + mkeqs (EqSet.add q (eqs1 e)) (EqSet2.add q (eqs2 e)) _. +Next Obligation. + split; intros. + destruct (OrderedEquation'.eq_dec q q0). + apply EqSet.add_1; auto. + apply EqSet.add_2. apply (eqs_same e). apply EqSet2.add_3 with q; auto. + destruct (OrderedEquation.eq_dec q q0). + apply EqSet2.add_1; auto. + apply EqSet2.add_2. apply (eqs_same e). apply EqSet.add_3 with q; auto. +Qed. + +Program Definition remove_equation (q: equation) (e: eqs) := + mkeqs (EqSet.remove q (eqs1 e)) (EqSet2.remove q (eqs2 e)) _. +Next Obligation. + split; intros. + destruct (OrderedEquation'.eq_dec q q0). + eelim EqSet2.remove_1; eauto. + apply EqSet.remove_2; auto. apply (eqs_same e). apply EqSet2.remove_3 with q; auto. + destruct (OrderedEquation.eq_dec q q0). + eelim EqSet.remove_1; eauto. + apply EqSet2.remove_2; auto. apply (eqs_same e). apply EqSet.remove_3 with q; auto. +Qed. + +(** [reg_unconstrained r e] is true if [e] contains no equations involving + the RTL pseudoregister [r]. In other words, all equations [r' = l [kind]] + in [e] are such that [r' <> r]. *) + +Definition select_reg_l (r: reg) (q: equation) := Pos.leb r (ereg q). +Definition select_reg_h (r: reg) (q: equation) := Pos.leb (ereg q) r. + +Definition reg_unconstrained (r: reg) (e: eqs) : bool := + negb (EqSet.mem_between (select_reg_l r) (select_reg_h r) (eqs1 e)). + +(** [loc_unconstrained l e] is true if [e] contains no equations involving + the LTL location [l] or a location that partially overlaps with [l]. + In other words, all equations [r = l' [kind]] in [e] are such that + [Loc.diff l' l]. *) + +Definition select_loc_l (l: loc) := + let lb := OrderedLoc.diff_low_bound l in + fun (q: equation) => match OrderedLoc.compare (eloc q) lb with LT _ => false | _ => true end. +Definition select_loc_h (l: loc) := + let lh := OrderedLoc.diff_high_bound l in + fun (q: equation) => match OrderedLoc.compare (eloc q) lh with GT _ => false | _ => true end. + +Definition loc_unconstrained (l: loc) (e: eqs) : bool := + negb (EqSet2.mem_between (select_loc_l l) (select_loc_h l) (eqs2 e)). + +Definition reg_loc_unconstrained (r: reg) (l: loc) (e: eqs) : bool := + reg_unconstrained r e && loc_unconstrained l e. + +(** [subst_reg r1 r2 e] simulates the effect of assigning [r2] to [r1] on [e]. + All equations of the form [r1 = l [kind]] are replaced by [r2 = l [kind]]. +*) + +Definition subst_reg (r1 r2: reg) (e: eqs) : eqs := + EqSet.fold + (fun q e => add_equation (Eq (ekind q) r2 (eloc q)) (remove_equation q e)) + (EqSet.elements_between (select_reg_l r1) (select_reg_h r1) (eqs1 e)) + e. + +(** [subst_reg_kind r1 k1 r2 k2 e] simulates the effect of assigning + the [k2] part of [r2] to the [k1] part of [r1] on [e]. + All equations of the form [r1 = l [k1]] are replaced by [r2 = l [k2]]. +*) + +Definition subst_reg_kind (r1: reg) (k1: equation_kind) (r2: reg) (k2: equation_kind) (e: eqs) : eqs := + EqSet.fold + (fun q e => + if IndexedEqKind.eq (ekind q) k1 + then add_equation (Eq k2 r2 (eloc q)) (remove_equation q e) + else e) + (EqSet.elements_between (select_reg_l r1) (select_reg_h r1) (eqs1 e)) + e. + +(** [subst_loc l1 l2 e] simulates the effect of assigning [l2] to [l1] on [e]. + All equations of the form [r = l1 [kind]] are replaced by [r = l2 [kind]]. + Return [None] if [e] contains an equation of the form [r = l] with [l] + partially overlapping [l1]. +*) + +Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs := + EqSet2.fold + (fun q opte => + match opte with + | None => None + | Some e => + if Loc.eq l1 (eloc q) then + Some (add_equation (Eq (ekind q) (ereg q) l2) (remove_equation q e)) + else + None + end) + (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e)) + (Some e). + +(** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations + [ri = R mi [Full]]. Return [None] if the two lists have different lengths. +*) + +Fixpoint add_equations (rl: list reg) (ml: list mreg) (e: eqs) : option eqs := + match rl, ml with + | nil, nil => Some e + | r1 :: rl, m1 :: ml => add_equations rl ml (add_equation (Eq Full r1 (R m1)) e) + | _, _ => None + end. + +(** [add_equations_args] is similar but additionally handles the splitting + of pseudoregisters of type [Tlong] in two locations containing the + two 32-bit halves of the 64-bit integer. *) + +Function add_equations_args (rl: list reg) (tyl: list typ) (ll: list loc) (e: eqs) : option eqs := + match rl, tyl, ll with + | nil, nil, nil => Some e + | r1 :: rl, Tlong :: tyl, l1 :: l2 :: ll => + add_equations_args rl tyl ll (add_equation (Eq Low r1 l2) (add_equation (Eq High r1 l1) e)) + | r1 :: rl, (Tint|Tfloat) :: tyl, l1 :: ll => + add_equations_args rl tyl ll (add_equation (Eq Full r1 l1) e) + | _, _, _ => None + end. + +(** [add_equations_res] is similar but is specialized to the case where + there is only one pseudo-register. *) + +Function add_equations_res (r: reg) (oty: option typ) (ll: list loc) (e: eqs) : option eqs := + match oty with + | Some Tlong => + match ll with + | l1 :: l2 :: nil => Some (add_equation (Eq Low r l2) (add_equation (Eq High r l1) e)) + | _ => None + end + | _ => + match ll with + | l1 :: nil => Some (add_equation (Eq Full r l1) e) + | _ => None + end end. -Definition transf_fun (f: RTL.function) (live: PMap.t Regset.t) - (assign: reg -> loc) : LTL.function := - LTL.mkfunction - (RTL.fn_sig f) - (List.map assign (RTL.fn_params f)) - (RTL.fn_stacksize f) - (PTree.map (transf_instr f live assign) (RTL.fn_code f)) - (RTL.fn_entrypoint f). +(** [remove_equations_res] is similar to [add_equations_res] but removes + equations instead of adding them. *) + +Function remove_equations_res (r: reg) (oty: option typ) (ll: list loc) (e: eqs) : option eqs := + match oty with + | Some Tlong => + match ll with + | l1 :: l2 :: nil => + if Loc.diff_dec l2 l1 + then Some (remove_equation (Eq Low r l2) (remove_equation (Eq High r l1) e)) + else None + | _ => None + end + | _ => + match ll with + | l1 :: nil => Some (remove_equation (Eq Full r l1) e) + | _ => None + end + end. + +(** [add_equations_ros] adds an equation, if needed, between an optional + pseudoregister and an optional machine register. It is used for the + function argument of the [Icall] and [Itailcall] instructions. *) + +Definition add_equation_ros (ros: reg + ident) (ros': mreg + ident) (e: eqs) : option eqs := + match ros, ros' with + | inl r, inl mr => Some(add_equation (Eq Full r (R mr)) e) + | inr id, inr id' => assertion (ident_eq id id'); Some e + | _, _ => None + end. + +(** [can_undef ml] returns true if all machine registers in [ml] are + unconstrained and can harmlessly be undefined. *) + +Fixpoint can_undef (ml: list mreg) (e: eqs) : bool := + match ml with + | nil => true + | m1 :: ml => loc_unconstrained (R m1) e && can_undef ml e + end. + +Fixpoint can_undef_except (l: loc) (ml: list mreg) (e: eqs) : bool := + match ml with + | nil => true + | m1 :: ml => + (Loc.eq l (R m1) || loc_unconstrained (R m1) e) && can_undef_except l ml e + end. + +(** [no_caller_saves e] returns [e] if all caller-save locations are + unconstrained in [e]. In other words, [e] contains no equations + involving a caller-save register or [Outgoing] stack slot. *) + +Definition no_caller_saves (e: eqs) : bool := + EqSet.for_all + (fun eq => + match eloc eq with + | R r => + zle 0 (index_int_callee_save r) || zle 0 (index_float_callee_save r) + | S Outgoing _ _ => false + | S _ _ _ => true + end) + e. + +(** [compat_left r l e] returns true if all equations in [e] that involve + [r] are of the form [r = l [Full]]. *) + +Definition compat_left (r: reg) (l: loc) (e: eqs) : bool := + EqSet.for_all_between + (fun q => + match ekind q with + | Full => Loc.eq l (eloc q) + | _ => false + end) + (select_reg_l r) (select_reg_h r) + (eqs1 e). + +(** [compat_left2 r l1 l2 e] returns true if all equations in [e] that involve + [r] are of the form [r = l1 [High]] or [r = l2 [Low]]. *) + +Definition compat_left2 (r: reg) (l1 l2: loc) (e: eqs) : bool := + EqSet.for_all_between + (fun q => + match ekind q with + | High => Loc.eq l1 (eloc q) + | Low => Loc.eq l2 (eloc q) + | _ => false + end) + (select_reg_l r) (select_reg_h r) + (eqs1 e). + +(** [ros_compatible_tailcall ros] returns true if [ros] is a function + name or a caller-save register. This is used to check [Itailcall] + instructions. *) + +Definition ros_compatible_tailcall (ros: mreg + ident) : bool := + match ros with + | inl r => In_dec mreg_eq r destroyed_at_call + | inr id => true + end. + +(** * The validator *) + +Definition destroyed_by_move (src dst: loc) := + match src with + | S sl ofs ty => destroyed_by_getstack sl + | _ => destroyed_by_op Omove + end. + +(** Simulate the effect of a sequence of moves [mv] on a set of + equations [e]. The set [e] is the equations that must hold + after the sequence of moves. Return the set of equations that + must hold before the sequence of moves. Return [None] if the + set of equations [e] cannot hold after the sequence of moves. *) + +Fixpoint track_moves (mv: moves) (e: eqs) : option eqs := + match mv with + | nil => Some e + | (src, dst) :: mv => + do e1 <- track_moves mv e; + do e2 <- subst_loc dst src e1; + assertion (can_undef_except dst (destroyed_by_move src dst)) e1; + Some e2 + end. + +(** [transfer_use_def args res args' res' undefs e] returns the set + of equations that must hold "before" in order for the equations [e] + to hold "after" the execution of RTL and LTL code of the following form: +<< + RTL LTL + use pseudoregs args use machine registers args' + define pseudoreg res undefine machine registers undef + define machine register res' +>> + As usual, [None] is returned if the equations [e] cannot hold after + this execution. +*) + +Definition transfer_use_def (args: list reg) (res: reg) (args': list mreg) (res': mreg) + (undefs: list mreg) (e: eqs) : option eqs := + let e1 := remove_equation (Eq Full res (R res')) e in + assertion (reg_loc_unconstrained res (R res') e1); + assertion (can_undef undefs e1); + add_equations args args' e1. + +Definition kind_first_word := if big_endian then High else Low. +Definition kind_second_word := if big_endian then Low else High. + +(** The core transfer function. It takes a set [e] of equations that must + hold "after" and a block shape [shape] representing a matching pair + of an RTL instruction and an LTL basic block. It returns the set of + equations that must hold "before" these instructions, or [None] if + impossible. *) + +Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option eqs := + match shape with + | BSnop mv s => + track_moves mv e + | BSmove src dst mv s => + track_moves mv (subst_reg dst src e) + | BSmakelong src1 src2 dst mv s => + let e1 := subst_reg_kind dst High src1 Full e in + let e2 := subst_reg_kind dst Low src2 Full e1 in + assertion (reg_unconstrained dst e2); + track_moves mv e2 + | BSlowlong src dst mv s => + let e1 := subst_reg_kind dst Full src Low e in + assertion (reg_unconstrained dst e1); + track_moves mv e1 + | BShighlong src dst mv s => + let e1 := subst_reg_kind dst Full src High e in + assertion (reg_unconstrained dst e1); + track_moves mv e1 + | BSop op args res mv1 args' res' mv2 s => + do e1 <- track_moves mv2 e; + do e2 <- transfer_use_def args res args' res' (destroyed_by_op op) e1; + track_moves mv1 e2 + | BSopdead op args res mv s => + assertion (reg_unconstrained res e); + track_moves mv e + | BSload chunk addr args dst mv1 args' dst' mv2 s => + do e1 <- track_moves mv2 e; + do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1; + track_moves mv1 e2 + | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s => + do e1 <- track_moves mv3 e; + let e2 := remove_equation (Eq kind_second_word dst (R dst2')) e1 in + assertion (loc_unconstrained (R dst2') e2); + assertion (can_undef (destroyed_by_load Mint32 addr') e2); + do e3 <- add_equations args args2' e2; + do e4 <- track_moves mv2 e3; + let e5 := remove_equation (Eq kind_first_word dst (R dst1')) e4 in + assertion (loc_unconstrained (R dst1') e5); + assertion (can_undef (destroyed_by_load Mint32 addr) e5); + assertion (reg_unconstrained dst e5); + do e6 <- add_equations args args1' e5; + track_moves mv1 e6 + | BSload2_1 addr args dst mv1 args' dst' mv2 s => + do e1 <- track_moves mv2 e; + let e2 := remove_equation (Eq kind_first_word dst (R dst')) e1 in + assertion (reg_loc_unconstrained dst (R dst') e2); + assertion (can_undef (destroyed_by_load Mint32 addr) e2); + do e3 <- add_equations args args' e2; + track_moves mv1 e3 + | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s => + do e1 <- track_moves mv2 e; + let e2 := remove_equation (Eq kind_second_word dst (R dst')) e1 in + assertion (reg_loc_unconstrained dst (R dst') e2); + assertion (can_undef (destroyed_by_load Mint32 addr') e2); + do e3 <- add_equations args args' e2; + track_moves mv1 e3 + | BSloaddead chunk addr args dst mv s => + assertion (reg_unconstrained dst e); + track_moves mv e + | BSstore chunk addr args src mv args' src' s => + assertion (can_undef (destroyed_by_store chunk addr) e); + do e1 <- add_equations (src :: args) (src' :: args') e; + track_moves mv e1 + | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s => + assertion (can_undef (destroyed_by_store Mint32 addr') e); + do e1 <- add_equations args args2' + (add_equation (Eq kind_second_word src (R src2')) e); + do e2 <- track_moves mv2 e1; + assertion (can_undef (destroyed_by_store Mint32 addr) e2); + do e3 <- add_equations args args1' + (add_equation (Eq kind_first_word src (R src1')) e2); + track_moves mv1 e3 + | BScall sg ros args res mv1 ros' mv2 s => + let args' := loc_arguments sg in + let res' := map R (loc_result sg) in + do e1 <- track_moves mv2 e; + do e2 <- remove_equations_res res (sig_res sg) res' e1; + assertion (forallb (fun l => reg_loc_unconstrained res l e2) res'); + assertion (no_caller_saves e2); + do e3 <- add_equation_ros ros ros' e2; + do e4 <- add_equations_args args (sig_args sg) args' e3; + track_moves mv1 e4 + | BStailcall sg ros args mv1 ros' => + let args' := loc_arguments sg in + assertion (tailcall_is_possible sg); + assertion (opt_typ_eq sg.(sig_res) f.(RTL.fn_sig).(sig_res)); + assertion (ros_compatible_tailcall ros'); + do e1 <- add_equation_ros ros ros' empty_eqs; + do e2 <- add_equations_args args (sig_args sg) args' e1; + track_moves mv1 e2 + | BSbuiltin ef args res mv1 args' res' mv2 s => + do e1 <- track_moves mv2 e; + let args' := map R args' in + let res' := map R res' in + do e2 <- remove_equations_res res (sig_res (ef_sig ef)) res' e1; + assertion (reg_unconstrained res e2); + assertion (forallb (fun l => loc_unconstrained l e2) res'); + assertion (can_undef (destroyed_by_builtin ef) e2); + do e3 <- add_equations_args args (sig_args (ef_sig ef)) args' e2; + track_moves mv1 e3 + | BSannot txt typ args res mv1 args' s => + do e1 <- add_equations_args args (annot_args_typ typ) args' e; + track_moves mv1 e1 + | BScond cond args mv args' s1 s2 => + assertion (can_undef (destroyed_by_cond cond) e); + do e1 <- add_equations args args' e; + track_moves mv e1 + | BSjumptable arg mv arg' tbl => + assertion (can_undef destroyed_by_jumptable e); + track_moves mv (add_equation (Eq Full arg (R arg')) e) + | BSreturn None mv => + track_moves mv empty_eqs + | BSreturn (Some arg) mv => + let arg' := map R (loc_result (RTL.fn_sig f)) in + do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs; + track_moves mv e1 + end. + +(** The main transfer function for the dataflow analysis. Like [transfer_aux], + it infers the equations that must hold "before" as a function of the + equations that must hold "after". It also handles error propagation + and reporting. *) + +Definition transfer (f: RTL.function) (shapes: PTree.t block_shape) + (pc: node) (after: res eqs) : res eqs := + match after with + | Error _ => after + | OK e => + match shapes!pc with + | None => Error(MSG "At PC " :: POS pc :: MSG ": unmatched block" :: nil) + | Some shape => + match transfer_aux f shape e with + | None => Error(MSG "At PC " :: POS pc :: MSG ": invalid register allocation" :: nil) + | Some e' => OK e' + end + end + end. + +(** The semilattice for dataflow analysis. Operates on analysis results + of type [res eqs], that is, either a set of equations or an error + message. Errors correspond to [Top]. Sets of equations are ordered + by inclusion. *) + +Module LEq <: SEMILATTICE. + + Definition t := res eqs. + + Definition eq (x y: t) := + match x, y with + | OK a, OK b => EqSet.Equal a b + | Error _, Error _ => True + | _, _ => False + end. + + Lemma eq_refl: forall x, eq x x. + Proof. + intros; destruct x; simpl; auto. red; tauto. + Qed. + + Lemma eq_sym: forall x y, eq x y -> eq y x. + Proof. + unfold eq; intros; destruct x; destruct y; auto. + red in H; red; intros. rewrite H; tauto. + Qed. + + Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z. + Proof. + unfold eq; intros. destruct x; destruct y; try contradiction; destruct z; auto. + red in H; red in H0; red; intros. rewrite H. auto. + Qed. + + Definition beq (x y: t) := + match x, y with + | OK a, OK b => EqSet.equal a b + | Error _, Error _ => true + | _, _ => false + end. + + Lemma beq_correct: forall x y, beq x y = true -> eq x y. + Proof. + unfold beq, eq; intros. destruct x; destruct y. + apply EqSet.equal_2. auto. + discriminate. + discriminate. + auto. + Qed. + + Definition ge (x y: t) := + match x, y with + | OK a, OK b => EqSet.Subset b a + | Error _, _ => True + | _, Error _ => False + end. + + Lemma ge_refl: forall x y, eq x y -> ge x y. + Proof. + unfold eq, ge, EqSet.Equal, EqSet.Subset; intros. + destruct x; destruct y; auto. intros; rewrite H; auto. + Qed. + Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z. + Proof. + unfold ge, EqSet.Subset; intros. + destruct x; auto; destruct y; try contradiction. + destruct z; eauto. + Qed. + + Definition bot: t := OK empty_eqs. + + Lemma ge_bot: forall x, ge x bot. + Proof. + unfold ge, bot, EqSet.Subset; simpl; intros. + destruct x; auto. intros. elim (EqSet.empty_1 H). + Qed. + + Program Definition lub (x y: t) : t := + match x, y return _ with + | OK a, OK b => + OK (mkeqs (EqSet.union (eqs1 a) (eqs1 b)) + (EqSet2.union (eqs2 a) (eqs2 b)) _) + | OK _, Error _ => y + | Error _, _ => x + end. + Next Obligation. + split; intros. + apply EqSet2.union_1 in H. destruct H; rewrite eqs_same in H. + apply EqSet.union_2; auto. apply EqSet.union_3; auto. + apply EqSet.union_1 in H. destruct H; rewrite <- eqs_same in H. + apply EqSet2.union_2; auto. apply EqSet2.union_3; auto. + Qed. + + Lemma ge_lub_left: forall x y, ge (lub x y) x. + Proof. + unfold lub, ge, EqSet.Subset; intros. + destruct x; destruct y; auto. + intros; apply EqSet.union_2; auto. + Qed. + + Lemma ge_lub_right: forall x y, ge (lub x y) y. + Proof. + unfold lub, ge, EqSet.Subset; intros. + destruct x; destruct y; auto. + intros; apply EqSet.union_3; auto. + Qed. + +End LEq. + +(** The backward dataflow solver is an instantiation of Kildall's algorithm. *) + +Module DS := Backward_Dataflow_Solver(LEq)(NodeSetBackward). + +(** The control-flow graph that the solver operates on is the CFG of + block shapes built by the structural check phase. Here is its notion + of successors. *) + +Definition successors_block_shape (bsh: block_shape) : list node := + match bsh with + | BSnop mv s => s :: nil + | BSmove src dst mv s => s :: nil + | BSmakelong src1 src2 dst mv s => s :: nil + | BSlowlong src dst mv s => s :: nil + | BShighlong src dst mv s => s :: nil + | BSop op args res mv1 args' res' mv2 s => s :: nil + | BSopdead op args res mv s => s :: nil + | BSload chunk addr args dst mv1 args' dst' mv2 s => s :: nil + | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s => s :: nil + | BSload2_1 addr args dst mv1 args' dst' mv2 s => s :: nil + | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s => s :: nil + | BSloaddead chunk addr args dst mv s => s :: nil + | BSstore chunk addr args src mv1 args' src' s => s :: nil + | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s => s :: nil + | BScall sg ros args res mv1 ros' mv2 s => s :: nil + | BStailcall sg ros args mv1 ros' => nil + | BSbuiltin ef args res mv1 args' res' mv2 s => s :: nil + | BSannot txt typ args res mv1 args' s => s :: nil + | BScond cond args mv args' s1 s2 => s1 :: s2 :: nil + | BSjumptable arg mv arg' tbl => tbl + | BSreturn optarg mv => nil + end. + +Definition analyze (f: RTL.function) (bsh: PTree.t block_shape) := + DS.fixpoint (PTree.map1 successors_block_shape bsh) (transfer f bsh) nil. + +(** * Validating and translating functions and programs *) + +(** Checking equations at function entry point. The RTL function receives + its arguments in the list [rparams] of pseudoregisters. The LTL function + receives them in the list [lparams] of locations dictated by the + calling conventions, with arguments of type [Tlong] being split in + two 32-bit halves. We check that the equations [e] that must hold + at the beginning of the functions are compatible with these calling + conventions, in the sense that all equations involving a pseudoreg + [r] from [rparams] is of the form [r = l [Full]] or [r = l [Low]] + or [r = l [High]], where [l] is the corresponding element of [lparams]. + + Note that [e] can contain additional equations [r' = l [kind]] + involving pseudoregs [r'] not in [rparams]: these equations are + automatically satisfied since the initial value of [r'] is [Vundef]. *) + +Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e: eqs) + {struct rparams} : bool := + match rparams, tys, lparams with + | nil, nil, nil => true + | r1 :: rl, Tlong :: tyl, l1 :: l2 :: ll => + compat_left2 r1 l1 l2 e && compat_entry rl tyl ll e + | r1 :: rl, (Tint|Tfloat) :: tyl, l1 :: ll => + compat_left r1 l1 e && compat_entry rl tyl ll e + | _, _, _ => false + end. + +(** Checking the satisfiability of equations inferred at function entry + point. We also check that the RTL and LTL functions agree in signature + and stack size. *) + +Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function) (e1: eqs) : option unit := + do mv <- pair_entrypoints rtl ltl; + do e2 <- track_moves mv e1; + assertion (compat_entry (RTL.fn_params rtl) + (sig_args (RTL.fn_sig rtl)) + (loc_parameters (RTL.fn_sig rtl)) e2); + assertion (can_undef destroyed_at_function_entry e2); + assertion (zeq (RTL.fn_stacksize rtl) (LTL.fn_stacksize ltl)); + assertion (eq_signature (RTL.fn_sig rtl) (LTL.fn_sig ltl)); + Some tt. + +Local Close Scope option_monad_scope. +Local Open Scope error_monad_scope. + +Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function) + (bsh: PTree.t block_shape) + (a: PMap.t LEq.t): res unit := + do e1 <- transfer rtl bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl); + match check_entrypoints_aux rtl ltl e1 with + | None => Error (msg "invalid register allocation at entry point") + | Some _ => OK tt + end. + +(** Putting it all together, this is the validation function for + a source RTL function and an LTL function generated by the external + register allocator. *) + +Definition check_function (rtl: RTL.function) (ltl: LTL.function) : res unit := + let bsh := pair_codes rtl ltl in + match analyze rtl bsh with + | None => Error (msg "allocation analysis diverges") + | Some a => check_entrypoints rtl ltl bsh a + end. -(** The translation of a function performs liveness analysis, - construction and coloring of the inference graph, and per-instruction - transformation as described above. *) +(** [regalloc] is the external register allocator. It is written in OCaml + in file [backend/Regalloc.ml]. *) -Definition live0 (f: RTL.function) (live: PMap.t Regset.t) := - transfer f f.(RTL.fn_entrypoint) live!!(f.(RTL.fn_entrypoint)). +Parameter regalloc: RTL.function -> res LTL.function. -Open Scope string_scope. +(** Register allocation followed by validation. *) Definition transf_function (f: RTL.function) : res LTL.function := match type_function f with - | Error msg => Error msg - | OK env => - match analyze f with - | None => Error (msg "Liveness analysis failure") - | Some live => - match regalloc f live (live0 f live) env with - | None => Error (msg "Incorrect graph coloring") - | Some assign => OK (transf_fun f live assign) - end - end + | Error m => Error m + | OK tyenv => + match regalloc f with + | Error m => Error m + | OK tf => do x <- check_function f tf; OK tf + end end. Definition transf_fundef (fd: RTL.fundef) : res LTL.fundef := diff --git a/backend/Allocproof.v b/backend/Allocproof.v index 8dfa2d4..76e9744 100644 --- a/backend/Allocproof.v +++ b/backend/Allocproof.v @@ -10,403 +10,1335 @@ (* *) (* *********************************************************************) -(** Correctness proof for the [Allocation] pass (translation from +(** Correctness proof for the [Allocation] pass (validated translation from RTL to LTL). *) Require Import FSets. Require Import Coqlib. +Require Import Ordered. Require Import Errors. Require Import Maps. +Require Import Lattice. Require Import AST. Require Import Integers. Require Import Values. Require Import Memory. Require Import Events. -Require Import Smallstep. Require Import Globalenvs. +Require Import Smallstep. Require Import Op. Require Import Registers. Require Import RTL. Require Import RTLtyping. -Require Import Liveness. +Require Import Kildall. Require Import Locations. -Require Import LTL. Require Import Conventions. -Require Import Coloring. -Require Import Coloringproof. +Require Import LTL. Require Import Allocation. -(** * Properties of allocated locations *) +(** * Soundness of structural checks *) -(** We list here various properties of the locations [alloc r], - where [r] is an RTL pseudo-register and [alloc] is the register - assignment returned by [regalloc]. *) +Definition expand_move (sd: loc * loc) : instruction := + match sd with + | (R src, R dst) => Lop Omove (src::nil) dst + | (S sl ofs ty, R dst) => Lgetstack sl ofs ty dst + | (R src, S sl ofs ty) => Lsetstack src sl ofs ty + | (S _ _ _, S _ _ _) => Lreturn (**r should never happen *) + end. -Section REGALLOC_PROPERTIES. +Definition expand_moves (mv: moves) (k: bblock) : bblock := + List.map expand_move mv ++ k. -Variable f: RTL.function. -Variable env: regenv. -Variable live: PMap.t Regset.t. -Variable alloc: reg -> loc. -Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc. +Definition wf_move (sd: loc * loc) : Prop := + match sd with + | (S _ _ _, S _ _ _) => False + | _ => True + end. -Lemma regalloc_noteq_diff: - forall r1 l2, - alloc r1 <> l2 -> Loc.diff (alloc r1) l2. +Definition wf_moves (mv: moves) : Prop := + forall sd, In sd mv -> wf_move sd. + +Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Prop := + | ebs_nop: forall mv s k, + wf_moves mv -> + expand_block_shape (BSnop mv s) + (Inop s) + (expand_moves mv (Lbranch s :: k)) + | ebs_move: forall src dst mv s k, + wf_moves mv -> + expand_block_shape (BSmove src dst mv s) + (Iop Omove (src :: nil) dst s) + (expand_moves mv (Lbranch s :: k)) + | ebs_makelong: forall src1 src2 dst mv s k, + wf_moves mv -> + expand_block_shape (BSmakelong src1 src2 dst mv s) + (Iop Omakelong (src1 :: src2 :: nil) dst s) + (expand_moves mv (Lbranch s :: k)) + | ebs_lowlong: forall src dst mv s k, + wf_moves mv -> + expand_block_shape (BSlowlong src dst mv s) + (Iop Olowlong (src :: nil) dst s) + (expand_moves mv (Lbranch s :: k)) + | ebs_highlong: forall src dst mv s k, + wf_moves mv -> + expand_block_shape (BShighlong src dst mv s) + (Iop Ohighlong (src :: nil) dst s) + (expand_moves mv (Lbranch s :: k)) + | ebs_op: forall op args res mv1 args' res' mv2 s k, + wf_moves mv1 -> wf_moves mv2 -> + expand_block_shape (BSop op args res mv1 args' res' mv2 s) + (Iop op args res s) + (expand_moves mv1 + (Lop op args' res' :: expand_moves mv2 (Lbranch s :: k))) + | ebs_op_dead: forall op args res mv s k, + wf_moves mv -> + expand_block_shape (BSopdead op args res mv s) + (Iop op args res s) + (expand_moves mv (Lbranch s :: k)) + | ebs_load: forall chunk addr args dst mv1 args' dst' mv2 s k, + wf_moves mv1 -> wf_moves mv2 -> + expand_block_shape (BSload chunk addr args dst mv1 args' dst' mv2 s) + (Iload chunk addr args dst s) + (expand_moves mv1 + (Lload chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k))) + | ebs_load2: forall addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s k, + wf_moves mv1 -> wf_moves mv2 -> wf_moves mv3 -> + offset_addressing addr (Int.repr 4) = Some addr2 -> + expand_block_shape (BSload2 addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s) + (Iload Mint64 addr args dst s) + (expand_moves mv1 + (Lload Mint32 addr args1' dst1' :: + expand_moves mv2 + (Lload Mint32 addr2 args2' dst2' :: + expand_moves mv3 (Lbranch s :: k)))) + | ebs_load2_1: forall addr args dst mv1 args' dst' mv2 s k, + wf_moves mv1 -> wf_moves mv2 -> + expand_block_shape (BSload2_1 addr args dst mv1 args' dst' mv2 s) + (Iload Mint64 addr args dst s) + (expand_moves mv1 + (Lload Mint32 addr args' dst' :: + expand_moves mv2 (Lbranch s :: k))) + | ebs_load2_2: forall addr addr2 args dst mv1 args' dst' mv2 s k, + wf_moves mv1 -> wf_moves mv2 -> + offset_addressing addr (Int.repr 4) = Some addr2 -> + expand_block_shape (BSload2_2 addr addr2 args dst mv1 args' dst' mv2 s) + (Iload Mint64 addr args dst s) + (expand_moves mv1 + (Lload Mint32 addr2 args' dst' :: + expand_moves mv2 (Lbranch s :: k))) + | ebs_load_dead: forall chunk addr args dst mv s k, + wf_moves mv -> + expand_block_shape (BSloaddead chunk addr args dst mv s) + (Iload chunk addr args dst s) + (expand_moves mv (Lbranch s :: k)) + | ebs_store: forall chunk addr args src mv1 args' src' s k, + wf_moves mv1 -> + expand_block_shape (BSstore chunk addr args src mv1 args' src' s) + (Istore chunk addr args src s) + (expand_moves mv1 + (Lstore chunk addr args' src' :: Lbranch s :: k)) + | ebs_store2: forall addr addr2 args src mv1 args1' src1' mv2 args2' src2' s k, + wf_moves mv1 -> wf_moves mv2 -> + offset_addressing addr (Int.repr 4) = Some addr2 -> + expand_block_shape (BSstore2 addr addr2 args src mv1 args1' src1' mv2 args2' src2' s) + (Istore Mint64 addr args src s) + (expand_moves mv1 + (Lstore Mint32 addr args1' src1' :: + expand_moves mv2 + (Lstore Mint32 addr2 args2' src2' :: + Lbranch s :: k))) + | ebs_call: forall sg ros args res mv1 ros' mv2 s k, + wf_moves mv1 -> wf_moves mv2 -> + expand_block_shape (BScall sg ros args res mv1 ros' mv2 s) + (Icall sg ros args res s) + (expand_moves mv1 + (Lcall sg ros' :: expand_moves mv2 (Lbranch s :: k))) + | ebs_tailcall: forall sg ros args mv ros' k, + wf_moves mv -> + expand_block_shape (BStailcall sg ros args mv ros') + (Itailcall sg ros args) + (expand_moves mv (Ltailcall sg ros' :: k)) + | ebs_builtin: forall ef args res mv1 args' res' mv2 s k, + wf_moves mv1 -> wf_moves mv2 -> + expand_block_shape (BSbuiltin ef args res mv1 args' res' mv2 s) + (Ibuiltin ef args res s) + (expand_moves mv1 + (Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k))) + | ebs_annot: forall txt typ args res mv args' s k, + wf_moves mv -> + expand_block_shape (BSannot txt typ args res mv args' s) + (Ibuiltin (EF_annot txt typ) args res s) + (expand_moves mv + (Lannot (EF_annot txt typ) args' :: Lbranch s :: k)) + | ebs_cond: forall cond args mv args' s1 s2 k, + wf_moves mv -> + expand_block_shape (BScond cond args mv args' s1 s2) + (Icond cond args s1 s2) + (expand_moves mv (Lcond cond args' s1 s2 :: k)) + | ebs_jumptable: forall arg mv arg' tbl k, + wf_moves mv -> + expand_block_shape (BSjumptable arg mv arg' tbl) + (Ijumptable arg tbl) + (expand_moves mv (Ljumptable arg' tbl :: k)) + | ebs_return: forall optarg mv k, + wf_moves mv -> + expand_block_shape (BSreturn optarg mv) + (Ireturn optarg) + (expand_moves mv (Lreturn :: k)). + +Ltac MonadInv := + match goal with + | [ H: match ?x with Some _ => _ | None => None end = Some _ |- _ ] => + destruct x as [] eqn:? ; [MonadInv|discriminate] + | [ H: match ?x with left _ => _ | right _ => None end = Some _ |- _ ] => + destruct x; [MonadInv|discriminate] + | [ H: match negb (proj_sumbool ?x) with true => _ | false => None end = Some _ |- _ ] => + destruct x; [discriminate|simpl in H; MonadInv] + | [ H: match negb ?x with true => _ | false => None end = Some _ |- _ ] => + destruct x; [discriminate|simpl in H; MonadInv] + | [ H: match ?x with true => _ | false => None end = Some _ |- _ ] => + destruct x as [] eqn:? ; [MonadInv|discriminate] + | [ H: match ?x with (_, _) => _ end = Some _ |- _ ] => + destruct x as [] eqn:? ; MonadInv + | [ H: Some _ = Some _ |- _ ] => + inv H; MonadInv + | [ H: None = Some _ |- _ ] => + discriminate + | _ => + idtac + end. + +Lemma extract_moves_sound: + forall b mv b', + extract_moves nil b = (mv, b') -> + wf_moves mv /\ b = expand_moves mv b'. Proof. - intros. apply loc_acceptable_noteq_diff. - eapply regalloc_acceptable; eauto. - auto. -Qed. + assert (BASE: + forall accu b, + wf_moves accu -> + wf_moves (List.rev accu) /\ expand_moves (List.rev accu) b = expand_moves (List.rev accu) b). + intros; split; auto. + red; intros. apply H. rewrite <- in_rev in H0; auto. + + assert (IND: forall b accu mv b', + extract_moves accu b = (mv, b') -> + wf_moves accu -> + wf_moves mv /\ expand_moves (List.rev accu) b = expand_moves mv b'). + induction b; simpl; intros. + inv H. auto. + destruct a; try (inv H; apply BASE; auto; fail). + destruct (is_move_operation op args) as [arg|] eqn:E. + exploit is_move_operation_correct; eauto. intros [A B]; subst. + (* reg-reg move *) + exploit IHb; eauto. + red; intros. destruct H1; auto. subst sd; exact I. + intros [P Q]. + split; auto. rewrite <- Q. simpl. unfold expand_moves. rewrite map_app. + rewrite app_ass. simpl. auto. + inv H; apply BASE; auto. + (* stack-reg move *) + exploit IHb; eauto. + red; intros. destruct H1; auto. subst sd; exact I. + intros [P Q]. + split; auto. rewrite <- Q. simpl. unfold expand_moves. rewrite map_app. + rewrite app_ass. simpl. auto. + (* reg-stack move *) + exploit IHb; eauto. + red; intros. destruct H1; auto. subst sd; exact I. + intros [P Q]. + split; auto. rewrite <- Q. simpl. unfold expand_moves. rewrite map_app. + rewrite app_ass. simpl. auto. + + intros. exploit IND; eauto. red; intros. elim H0. +Qed. + +Lemma check_succ_sound: + forall s b, check_succ s b = true -> exists k, b = Lbranch s :: k. +Proof. + intros. destruct b; simpl in H; try discriminate. + destruct i; try discriminate. + destruct (peq s s0); simpl in H; inv H. exists b; auto. +Qed. + +Ltac UseParsingLemmas := + match goal with + | [ H: extract_moves nil _ = (_, _) |- _ ] => + destruct (extract_moves_sound _ _ _ H); clear H; subst; UseParsingLemmas + | [ H: check_succ _ _ = true |- _ ] => + try discriminate; + destruct (check_succ_sound _ _ H); clear H; subst; UseParsingLemmas + | _ => idtac + end. -Lemma regalloc_notin_notin: - forall r ll, - ~(In (alloc r) ll) -> Loc.notin (alloc r) ll. +Lemma pair_instr_block_sound: + forall i b bsh, + pair_instr_block i b = Some bsh -> expand_block_shape bsh i b. Proof. - intros. apply loc_acceptable_notin_notin. - eapply regalloc_acceptable; eauto. auto. + intros; destruct i; simpl in H; MonadInv; UseParsingLemmas. +(* nop *) + econstructor; eauto. +(* op *) + destruct (classify_operation o l). + (* move *) + MonadInv; UseParsingLemmas. econstructor; eauto. + (* makelong *) + MonadInv; UseParsingLemmas. econstructor; eauto. + (* lowlong *) + MonadInv; UseParsingLemmas. econstructor; eauto. + (* highlong *) + MonadInv; UseParsingLemmas. econstructor; eauto. + (* other ops *) + MonadInv. destruct b0. + MonadInv; UseParsingLemmas. + destruct i; MonadInv; UseParsingLemmas. + eapply ebs_op; eauto. + inv H0. eapply ebs_op_dead; eauto. +(* load *) + destruct b0. + MonadInv; UseParsingLemmas. + destruct i; MonadInv; UseParsingLemmas. + destruct (eq_chunk m Mint64). + MonadInv; UseParsingLemmas. + destruct b; MonadInv; UseParsingLemmas. destruct i; MonadInv; UseParsingLemmas. + eapply ebs_load2; eauto. + destruct (eq_addressing a addr). + MonadInv. inv H2. eapply ebs_load2_1; eauto. + MonadInv. inv H2. eapply ebs_load2_2; eauto. + MonadInv; UseParsingLemmas. eapply ebs_load; eauto. + inv H. eapply ebs_load_dead; eauto. +(* store *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. + destruct (eq_chunk m Mint64). + MonadInv; UseParsingLemmas. + destruct b; MonadInv. destruct i; MonadInv; UseParsingLemmas. + eapply ebs_store2; eauto. + MonadInv; UseParsingLemmas. + eapply ebs_store; eauto. +(* call *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto. +(* tailcall *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto. +(* builtin *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. + econstructor; eauto. + destruct ef; inv H. econstructor; eauto. +(* cond *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto. +(* jumptable *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto. +(* return *) + destruct b0; MonadInv. destruct i; MonadInv; UseParsingLemmas. econstructor; eauto. Qed. -Lemma regalloc_notin_notin_2: - forall l rl, - ~(In l (map alloc rl)) -> Loc.notin l (map alloc rl). +Lemma matching_instr_block: + forall f1 f2 pc bsh i, + (pair_codes f1 f2)!pc = Some bsh -> + (RTL.fn_code f1)!pc = Some i -> + exists b, (LTL.fn_code f2)!pc = Some b /\ expand_block_shape bsh i b. Proof. - induction rl; simpl; intros. auto. - split. apply Loc.diff_sym. apply regalloc_noteq_diff. tauto. - apply IHrl. tauto. + intros. unfold pair_codes in H. rewrite PTree.gcombine in H; auto. rewrite H0 in H. + destruct (LTL.fn_code f2)!pc as [b|]. + exists b; split; auto. apply pair_instr_block_sound; auto. + discriminate. Qed. - -Lemma regalloc_norepet_norepet: - forall rl, - list_norepet (List.map alloc rl) -> - Loc.norepet (List.map alloc rl). + +(** * Properties of equations *) + +Module ESF := FSetFacts.Facts(EqSet). +Module ESP := FSetProperties.Properties(EqSet). +Module ESD := FSetDecide.Decide(EqSet). + +Definition sel_val (k: equation_kind) (v: val) : val := + match k with + | Full => v + | Low => Val.loword v + | High => Val.hiword v + end. + +(** A set of equations [e] is satisfied in a RTL pseudoreg state [rs] + and an LTL location state [ls] if, for every equation [r = l [k]] in [e], + [sel_val k (rs#r)] (the [k] fragment of [r]'s value in the RTL code) + is less defined than [ls l] (the value of [l] in the LTL code). *) + +Definition satisf (rs: regset) (ls: locset) (e: eqs) : Prop := + forall q, EqSet.In q e -> Val.lessdef (sel_val (ekind q) rs#(ereg q)) (ls (eloc q)). + +Lemma empty_eqs_satisf: + forall rs ls, satisf rs ls empty_eqs. +Proof. + unfold empty_eqs; intros; red; intros. ESD.fsetdec. +Qed. + +Lemma satisf_incr: + forall rs ls (e1 e2: eqs), + satisf rs ls e2 -> EqSet.Subset e1 e2 -> satisf rs ls e1. Proof. - induction rl; simpl; intros. - apply Loc.norepet_nil. - inversion H. - apply Loc.norepet_cons. - eapply regalloc_notin_notin; eauto. + unfold satisf; intros. apply H. ESD.fsetdec. +Qed. + +Lemma satisf_undef_reg: + forall rs ls e r, + satisf rs ls e -> + satisf (rs#r <- Vundef) ls e. +Proof. + intros; red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r); auto. + destruct (ekind q); simpl; auto. +Qed. + +Lemma add_equation_lessdef: + forall rs ls q e, + satisf rs ls (add_equation q e) -> Val.lessdef (sel_val (ekind q) rs#(ereg q)) (ls (eloc q)). +Proof. + intros. apply H. unfold add_equation. simpl. apply EqSet.add_1. auto. +Qed. + +Lemma add_equation_satisf: + forall rs ls q e, + satisf rs ls (add_equation q e) -> satisf rs ls e. +Proof. + intros. eapply satisf_incr; eauto. unfold add_equation. simpl. ESD.fsetdec. +Qed. + +Lemma add_equations_satisf: + forall rs ls rl ml e e', + add_equations rl ml e = Some e' -> + satisf rs ls e' -> satisf rs ls e. +Proof. + induction rl; destruct ml; simpl; intros; MonadInv. auto. + eapply add_equation_satisf; eauto. Qed. -Lemma regalloc_not_temporary: - forall (r: reg), - Loc.notin (alloc r) temporaries. +Lemma add_equations_lessdef: + forall rs ls rl ml e e', + add_equations rl ml e = Some e' -> + satisf rs ls e' -> + Val.lessdef_list (rs##rl) (reglist ls ml). Proof. - intros. apply temporaries_not_acceptable. - eapply regalloc_acceptable; eauto. + induction rl; destruct ml; simpl; intros; MonadInv. + constructor. + constructor; eauto. + apply add_equation_lessdef with (e := e) (q := Eq Full a (R m)). + eapply add_equations_satisf; eauto. Qed. -Lemma regalloc_disj_temporaries: - forall (rl: list reg), - Loc.disjoint (List.map alloc rl) temporaries. +Lemma add_equations_args_satisf: + forall rs ls rl tyl ll e e', + add_equations_args rl tyl ll e = Some e' -> + satisf rs ls e' -> satisf rs ls e. Proof. - intros. - apply Loc.notin_disjoint. intros. - generalize (list_in_map_inv _ _ _ H). intros [r [EQ IN]]. - subst x. apply regalloc_not_temporary; auto. + intros until e'. functional induction (add_equations_args rl tyl ll e); intros. + inv H; auto. + eapply add_equation_satisf. eapply add_equation_satisf. eauto. + eapply add_equation_satisf. eauto. + eapply add_equation_satisf. eauto. + congruence. Qed. -End REGALLOC_PROPERTIES. +Lemma val_longofwords_eq: + forall v, + Val.has_type v Tlong -> + Val.longofwords (Val.hiword v) (Val.loword v) = v. +Proof. + intros. red in H. destruct v; try contradiction. + reflexivity. + simpl. rewrite Int64.ofwords_recompose. auto. +Qed. -(** * Semantic agreement between RTL registers and LTL locations *) +Lemma add_equations_args_lessdef: + forall rs ls rl tyl ll e e', + add_equations_args rl tyl ll e = Some e' -> + satisf rs ls e' -> + Val.has_type_list (rs##rl) tyl -> + Val.lessdef_list (rs##rl) (decode_longs tyl (map ls ll)). +Proof. + intros until e'. functional induction (add_equations_args rl tyl ll e); simpl; intros. +- inv H; auto. +- destruct H1. constructor; auto. + rewrite <- (val_longofwords_eq (rs#r1)); auto. apply Val.longofwords_lessdef. + eapply add_equation_lessdef with (q := Eq High r1 l1). + eapply add_equation_satisf. eapply add_equations_args_satisf; eauto. + eapply add_equation_lessdef with (q := Eq Low r1 l2). + eapply add_equations_args_satisf; eauto. +- destruct H1. constructor; auto. + eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto. +- destruct H1. constructor; auto. + eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto. +- discriminate. +Qed. -Require Import LTL. -Module RegsetP := Properties(Regset). - -Section AGREE. - -Variable f: RTL.function. -Variable env: regenv. -Variable flive: PMap.t Regset.t. -Variable assign: reg -> loc. -Hypothesis REGALLOC: regalloc f flive (live0 f flive) env = Some assign. - -(** Remember the core of the code transformation performed in module - [Allocation]: every reference to register [r] is replaced by - a reference to location [assign r]. We will shortly prove - the semantic equivalence between the original code and the transformed code. - The key tool to do this is the following relation between - a register set [rs] in the original RTL program and a location set - [ls] in the transformed LTL program. The two sets agree if - they assign identical values to matching registers and locations, - that is, the value of register [r] in [rs] is the same as - the value of location [assign r] in [ls]. However, this equality - needs to hold only for live registers [r]. If [r] is dead at - the current point, its value is never used later, hence the value - of [assign r] can be arbitrary. *) - -Definition agree (live: Regset.t) (rs: regset) (ls: locset) : Prop := - forall (r: reg), Regset.In r live -> rs#r = ls (assign r). - -(** What follows is a long list of lemmas expressing properties - of the [agree_live_regs] predicate that are useful for the - semantic equivalence proof. First: two register sets that agree - on a given set of live registers also agree on a subset of - those live registers. *) - -Lemma agree_increasing: - forall live1 live2 rs ls, - Regset.Subset live2 live1 -> agree live1 rs ls -> - agree live2 rs ls. -Proof. - unfold agree; intros. - apply H0. apply H. auto. -Qed. - -Lemma agree_succ: - forall n s rs ls live i, - analyze f = Some live -> - f.(RTL.fn_code)!n = Some i -> - In s (RTL.successors_instr i) -> - agree live!!n rs ls -> - agree (transfer f s live!!s) rs ls. -Proof. - intros. - apply agree_increasing with (live!!n). - eapply Liveness.analyze_solution; eauto. +Lemma add_equation_ros_satisf: + forall rs ls ros mos e e', + add_equation_ros ros mos e = Some e' -> + satisf rs ls e' -> satisf rs ls e. +Proof. + unfold add_equation_ros; intros. destruct ros; destruct mos; MonadInv. + eapply add_equation_satisf; eauto. auto. Qed. -(** Some useful special cases of [agree_increasing]. *) +Lemma remove_equation_satisf: + forall rs ls q e, + satisf rs ls e -> satisf rs ls (remove_equation q e). +Proof. + intros. eapply satisf_incr; eauto. unfold remove_equation; simpl. ESD.fsetdec. +Qed. -Lemma agree_reg_live: - forall r live rs ls, - agree (reg_live r live) rs ls -> agree live rs ls. +Lemma remove_equation_res_satisf: + forall rs ls r oty ll e e', + remove_equations_res r oty ll e = Some e' -> + satisf rs ls e -> satisf rs ls e'. Proof. - intros. apply agree_increasing with (reg_live r live); auto. - red. apply RegsetP.subset_add_2. apply RegsetP.subset_refl. + intros. functional inversion H. + apply remove_equation_satisf. apply remove_equation_satisf; auto. + apply remove_equation_satisf; auto. Qed. -Lemma agree_reg_list_live: - forall rl live rs ls, - agree (reg_list_live rl live) rs ls -> agree live rs ls. +Remark select_reg_l_monotone: + forall r q1 q2, + OrderedEquation.eq q1 q2 \/ OrderedEquation.lt q1 q2 -> + select_reg_l r q1 = true -> select_reg_l r q2 = true. Proof. - induction rl; simpl; intros. - assumption. - apply agree_reg_live with a. apply IHrl. assumption. + unfold select_reg_l; intros. destruct H. + red in H. congruence. + rewrite Pos.leb_le in *. red in H. destruct H as [A | [A B]]. + red in A. zify; omega. + rewrite <- A; auto. Qed. -Lemma agree_reg_sum_live: - forall ros live rs ls, - agree (reg_sum_live ros live) rs ls -> agree live rs ls. +Remark select_reg_h_monotone: + forall r q1 q2, + OrderedEquation.eq q1 q2 \/ OrderedEquation.lt q2 q1 -> + select_reg_h r q1 = true -> select_reg_h r q2 = true. Proof. - intros. destruct ros; simpl in H. - apply agree_reg_live with r; auto. - auto. + unfold select_reg_h; intros. destruct H. + red in H. congruence. + rewrite Pos.leb_le in *. red in H. destruct H as [A | [A B]]. + red in A. zify; omega. + rewrite A; auto. Qed. -(** Agreement over a set of live registers just extended with [r] - implies equality of the values of [r] and [assign r]. *) +Remark select_reg_charact: + forall r q, select_reg_l r q = true /\ select_reg_h r q = true <-> ereg q = r. +Proof. + unfold select_reg_l, select_reg_h; intros; split. + rewrite ! Pos.leb_le. unfold reg; zify; omega. + intros. rewrite H. rewrite ! Pos.leb_refl; auto. +Qed. -Lemma agree_eval_reg: - forall r live rs ls, - agree (reg_live r live) rs ls -> rs#r = ls (assign r). +Lemma reg_unconstrained_sound: + forall r e q, + reg_unconstrained r e = true -> + EqSet.In q e -> + ereg q <> r. Proof. - intros. apply H. apply Regset.add_1. auto. + unfold reg_unconstrained; intros. red; intros. + apply select_reg_charact in H1. + assert (EqSet.mem_between (select_reg_l r) (select_reg_h r) e = true). + { + apply EqSet.mem_between_2 with q; auto. + exact (select_reg_l_monotone r). + exact (select_reg_h_monotone r). + tauto. + tauto. + } + rewrite H2 in H; discriminate. Qed. -(** Same, for a list of registers. *) +Lemma reg_unconstrained_satisf: + forall r e rs ls v, + reg_unconstrained r e = true -> + satisf rs ls e -> + satisf (rs#r <- v) ls e. +Proof. + red; intros. rewrite PMap.gso. auto. eapply reg_unconstrained_sound; eauto. +Qed. -Lemma agree_eval_regs: - forall rl live rs ls, - agree (reg_list_live rl live) rs ls -> - rs##rl = List.map ls (List.map assign rl). +Remark select_loc_l_monotone: + forall l q1 q2, + OrderedEquation'.eq q1 q2 \/ OrderedEquation'.lt q1 q2 -> + select_loc_l l q1 = true -> select_loc_l l q2 = true. Proof. - induction rl; simpl; intros. + unfold select_loc_l; intros. set (lb := OrderedLoc.diff_low_bound l) in *. + destruct H. + red in H. subst q2; auto. + assert (eloc q1 = eloc q2 \/ OrderedLoc.lt (eloc q1) (eloc q2)). + red in H. tauto. + destruct H1. rewrite <- H1; auto. + destruct (OrderedLoc.compare (eloc q2) lb); auto. + assert (OrderedLoc.lt (eloc q1) lb) by (eapply OrderedLoc.lt_trans; eauto). + destruct (OrderedLoc.compare (eloc q1) lb). auto. - f_equal. - apply agree_eval_reg with live. - apply agree_reg_list_live with rl. auto. - eapply IHrl. eexact H. -Qed. - -(** Agreement is insensitive to the current values of the temporary - machine registers. *) - -Lemma agree_exten: - forall live rs ls ls', - agree live rs ls -> - (forall l, Loc.notin l temporaries -> ls' l = ls l) -> - agree live rs ls'. -Proof. - unfold agree; intros. - rewrite H0. apply H. auto. eapply regalloc_not_temporary; eauto. -Qed. - -Lemma agree_undef_temps: - forall live rs ls, - agree live rs ls -> agree live rs (undef_temps ls). -Proof. - intros. apply agree_exten with ls; auto. - intros. apply Locmap.guo; auto. -Qed. - -(** If a register is dead, assigning it an arbitrary value in [rs] - and leaving [ls] unchanged preserves agreement. (This corresponds - to an operation over a dead register in the original program - that is turned into a no-op in the transformed program.) *) - -Lemma agree_assign_dead: - forall live r rs ls v, - ~Regset.In r live -> - agree live rs ls -> - agree live (rs#r <- v) ls. -Proof. - unfold agree; intros. - case (Reg.eq r r0); intro. - subst r0. contradiction. - rewrite Regmap.gso; auto. -Qed. - -(** Setting [r] to value [v] in [rs] - and simultaneously setting [assign r] to value [v] in [ls] - preserves agreement, provided that all live registers except [r] - are mapped to locations other than that of [r]. *) - -Lemma agree_assign_live: - forall live r rs ls v, - (forall s, - Regset.In s live -> s <> r -> assign s <> assign r) -> - agree (reg_dead r live) rs ls -> - agree live (rs#r <- v) (Locmap.set (assign r) v ls). -Proof. - unfold agree; intros. rewrite Regmap.gsspec. - destruct (peq r0 r). - subst r0. rewrite Locmap.gss. auto. - rewrite Locmap.gso. apply H0. apply Regset.remove_2; auto. - eapply regalloc_noteq_diff. eauto. apply sym_not_equal. apply H. auto. auto. -Qed. - -(** This is a special case of the previous lemma where the value [v] - being stored is not arbitrary, but is the value of - another register [arg]. (This corresponds to a register-register move - instruction.) In this case, the condition can be weakened: - it suffices that all live registers except [arg] and [res] - are mapped to locations other than that of [res]. *) - -Lemma agree_move_live: - forall live arg res rs (ls: locset), - (forall r, - Regset.In r live -> r <> res -> r <> arg -> - assign r <> assign res) -> - agree (reg_live arg (reg_dead res live)) rs ls -> - agree live (rs#res <- (rs#arg)) (Locmap.set (assign res) (ls (assign arg)) ls). -Proof. - unfold agree; intros. rewrite Regmap.gsspec. destruct (peq r res). - subst r. rewrite Locmap.gss. apply H0. - apply Regset.add_1; auto. - destruct (Reg.eq r arg). - subst r. - replace (Locmap.set (assign res) (ls (assign arg)) ls (assign arg)) - with (ls (assign arg)). - apply H0. apply Regset.add_1. auto. - symmetry. destruct (Loc.eq (assign arg) (assign res)). - rewrite <- e. apply Locmap.gss. - apply Locmap.gso. eapply regalloc_noteq_diff; eauto. - - rewrite Locmap.gso. apply H0. apply Regset.add_2. apply Regset.remove_2. auto. auto. - eapply regalloc_noteq_diff; eauto. apply sym_not_equal. apply H; auto. -Qed. - -(** Yet another special case corresponding to the case of - a redundant move. *) - -Lemma agree_redundant_move_live: - forall live arg res rs (ls: locset), - (forall r, - Regset.In r live -> r <> res -> r <> arg -> - assign r <> assign res) -> - agree (reg_live arg (reg_dead res live)) rs ls -> - assign res = assign arg -> - agree live (rs#res <- (rs#arg)) ls. + eelim OrderedLoc.lt_not_eq; eauto. + eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans. eexact l1. eexact H2. red; auto. +Qed. + +Remark select_loc_h_monotone: + forall l q1 q2, + OrderedEquation'.eq q1 q2 \/ OrderedEquation'.lt q2 q1 -> + select_loc_h l q1 = true -> select_loc_h l q2 = true. Proof. - intros. - apply agree_exten with (Locmap.set (assign res) (ls (assign arg)) ls). - eapply agree_move_live; eauto. - intros. symmetry. rewrite H1. destruct (Loc.eq l (assign arg)). - subst l. apply Locmap.gss. - apply Locmap.gso. eapply regalloc_noteq_diff; eauto. -Qed. - -(** This complicated lemma states agreement between the states after - a function call, provided that the states before the call agree - and that calling conventions are respected. *) - -Lemma agree_postcall: - forall live args ros res rs v (ls: locset), - (forall r, - Regset.In r live -> r <> res -> - ~(In (assign r) destroyed_at_call)) -> - (forall r, - Regset.In r live -> r <> res -> assign r <> assign res) -> - agree (reg_list_live args (reg_sum_live ros (reg_dead res live))) rs ls -> - agree live (rs#res <- v) (Locmap.set (assign res) v (postcall_locs ls)). + unfold select_loc_h; intros. set (lb := OrderedLoc.diff_high_bound l) in *. + destruct H. + red in H. subst q2; auto. + assert (eloc q2 = eloc q1 \/ OrderedLoc.lt (eloc q2) (eloc q1)). + red in H. tauto. + destruct H1. rewrite H1; auto. + destruct (OrderedLoc.compare (eloc q2) lb); auto. + assert (OrderedLoc.lt lb (eloc q1)) by (eapply OrderedLoc.lt_trans; eauto). + destruct (OrderedLoc.compare (eloc q1) lb). + eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans. eexact l1. eexact H2. red; auto. + eelim OrderedLoc.lt_not_eq. eexact H2. apply OrderedLoc.eq_sym; auto. + auto. +Qed. + +Remark select_loc_charact: + forall l q, + select_loc_l l q = false \/ select_loc_h l q = false <-> Loc.diff l (eloc q). Proof. - intros. - assert (agree (reg_dead res live) rs ls). - apply agree_reg_sum_live with ros. - apply agree_reg_list_live with args. assumption. - red; intros. rewrite Regmap.gsspec. destruct (peq r res). - subst r. rewrite Locmap.gss. auto. - rewrite Locmap.gso. transitivity (ls (assign r)). - apply H2. apply Regset.remove_2; auto. - unfold postcall_locs. - assert (~In (assign r) temporaries). - apply Loc.notin_not_in. eapply regalloc_not_temporary; eauto. - assert (~In (assign r) destroyed_at_call). - apply H. auto. auto. - caseEq (assign r); auto. intros m ASG. rewrite <- ASG. - destruct (In_dec Loc.eq (assign r) temporaries). contradiction. - destruct (In_dec Loc.eq (assign r) destroyed_at_call). contradiction. + unfold select_loc_l, select_loc_h; intros; split; intros. + apply OrderedLoc.outside_interval_diff. + destruct H. + left. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_low_bound l)); assumption || discriminate. + right. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_high_bound l)); assumption || discriminate. + exploit OrderedLoc.diff_outside_interval. eauto. + intros [A | A]. + left. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_low_bound l)). + auto. + eelim OrderedLoc.lt_not_eq; eauto. + eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans; eauto. red; auto. + right. destruct (OrderedLoc.compare (eloc q) (OrderedLoc.diff_high_bound l)). + eelim OrderedLoc.lt_not_eq. eapply OrderedLoc.lt_trans; eauto. red; auto. + eelim OrderedLoc.lt_not_eq; eauto. apply OrderedLoc.eq_sym; auto. auto. - eapply regalloc_noteq_diff; eauto. apply sym_not_eq. auto. Qed. -(** Agreement between the initial register set at RTL function entry - and the location set at LTL function entry. *) +Lemma loc_unconstrained_sound: + forall l e q, + loc_unconstrained l e = true -> + EqSet.In q e -> + Loc.diff l (eloc q). +Proof. + unfold loc_unconstrained; intros. + destruct (select_loc_l l q) eqn:SL. + destruct (select_loc_h l q) eqn:SH. + assert (EqSet2.mem_between (select_loc_l l) (select_loc_h l) (eqs2 e) = true). + { + apply EqSet2.mem_between_2 with q; auto. + exact (select_loc_l_monotone l). + exact (select_loc_h_monotone l). + apply eqs_same. auto. + } + rewrite H1 in H; discriminate. + apply select_loc_charact; auto. + apply select_loc_charact; auto. +Qed. + +Lemma loc_unconstrained_satisf: + forall rs ls k r l e v, + satisf rs ls (remove_equation (Eq k r l) e) -> + loc_unconstrained l (remove_equation (Eq k r l) e) = true -> + Val.lessdef (sel_val k rs#r) v -> + satisf rs (Locmap.set l v ls) e. +Proof. + intros; red; intros. + destruct (OrderedEquation.eq_dec q (Eq k r l)). + subst q; simpl. rewrite Locmap.gss. auto. + assert (EqSet.In q (remove_equation (Eq k r l) e)). + simpl. ESD.fsetdec. + rewrite Locmap.gso. apply H; auto. eapply loc_unconstrained_sound; eauto. +Qed. + +Lemma reg_loc_unconstrained_sound: + forall r l e q, + reg_loc_unconstrained r l e = true -> + EqSet.In q e -> + ereg q <> r /\ Loc.diff l (eloc q). +Proof. + intros. destruct (andb_prop _ _ H). + split. eapply reg_unconstrained_sound; eauto. eapply loc_unconstrained_sound; eauto. +Qed. + +Lemma parallel_assignment_satisf: + forall k r l e rs ls v v', + Val.lessdef (sel_val k v) v' -> + reg_loc_unconstrained r l (remove_equation (Eq k r l) e) = true -> + satisf rs ls (remove_equation (Eq k r l) e) -> + satisf (rs#r <- v) (Locmap.set l v' ls) e. +Proof. + intros; red; intros. + destruct (OrderedEquation.eq_dec q (Eq k r l)). + subst q; simpl. rewrite Regmap.gss; rewrite Locmap.gss; auto. + assert (EqSet.In q (remove_equation {| ekind := k; ereg := r; eloc := l |} e)). + simpl. ESD.fsetdec. + exploit reg_loc_unconstrained_sound; eauto. intros [A B]. + rewrite Regmap.gso; auto. rewrite Locmap.gso; auto. +Qed. + +Lemma parallel_assignment_satisf_2: + forall rs ls res res' oty e e' v v', + remove_equations_res res oty res' e = Some e' -> + satisf rs ls e' -> + reg_unconstrained res e' = true -> + forallb (fun l => loc_unconstrained l e') res' = true -> + Val.lessdef v v' -> + satisf (rs#res <- v) (Locmap.setlist res' (encode_long oty v') ls) e. +Proof. + intros; red; intros. + functional inversion H. +- (* Two 32-bit halves *) + subst. + set (e' := remove_equation {| ekind := Low; ereg := res; eloc := l2 |} + (remove_equation {| ekind := High; ereg := res; eloc := l1 |} e)) in *. + simpl in H2. InvBooleans. simpl. + destruct (OrderedEquation.eq_dec q (Eq Low res l2)). + subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss. + apply Val.loword_lessdef; auto. + destruct (OrderedEquation.eq_dec q (Eq High res l1)). + subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto. rewrite Locmap.gss. + apply Val.hiword_lessdef; auto. + assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec. + rewrite Regmap.gso. rewrite ! Locmap.gso. auto. + eapply loc_unconstrained_sound; eauto. + eapply loc_unconstrained_sound; eauto. + eapply reg_unconstrained_sound; eauto. +- (* One location *) + subst. simpl in H2. InvBooleans. + replace (encode_long oty v') with (v' :: nil). + set (e' := remove_equation {| ekind := Full; ereg := res; eloc := l1 |} e) in *. + destruct (OrderedEquation.eq_dec q (Eq Full res l1)). + subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss. auto. + assert (EqSet.In q e'). unfold e', remove_equation; simpl. ESD.fsetdec. + simpl. rewrite Regmap.gso. rewrite Locmap.gso. auto. + eapply loc_unconstrained_sound; eauto. + eapply reg_unconstrained_sound; eauto. + destruct oty as [[]|]; reflexivity || contradiction. +Qed. + +Lemma in_subst_reg: + forall r1 r2 q (e: eqs), + EqSet.In q e -> + ereg q = r1 /\ EqSet.In (Eq (ekind q) r2 (eloc q)) (subst_reg r1 r2 e) + \/ ereg q <> r1 /\ EqSet.In q (subst_reg r1 r2 e). +Proof. + intros r1 r2 q e0 IN0. unfold subst_reg. + set (f := fun (q: EqSet.elt) e => add_equation (Eq (ekind q) r2 (eloc q)) (remove_equation q e)). + set (elt := EqSet.elements_between (select_reg_l r1) (select_reg_h r1) e0). + assert (IN_ELT: forall q, EqSet.In q elt <-> EqSet.In q e0 /\ ereg q = r1). + { + intros. unfold elt. rewrite EqSet.elements_between_iff. + rewrite select_reg_charact. tauto. + exact (select_reg_l_monotone r1). + exact (select_reg_h_monotone r1). + } + set (P := fun e1 e2 => + EqSet.In q e1 -> + EqSet.In (Eq (ekind q) r2 (eloc q)) e2). + assert (P elt (EqSet.fold f elt e0)). + { + apply ESP.fold_rec; unfold P; intros. + - ESD.fsetdec. + - simpl. red in H1. apply H1 in H3. destruct H3. + + subst x. ESD.fsetdec. + + rewrite ESF.add_iff. rewrite ESF.remove_iff. + destruct (OrderedEquation.eq_dec x {| ekind := ekind q; ereg := r2; eloc := eloc q |}); auto. + left. subst x; auto. + } + set (Q := fun e1 e2 => + ~EqSet.In q e1 -> + EqSet.In q e2). + assert (Q elt (EqSet.fold f elt e0)). + { + apply ESP.fold_rec; unfold Q; intros. + - auto. + - simpl. red in H2. rewrite H2 in H4. + rewrite ESF.add_iff. rewrite ESF.remove_iff. + right. split. apply H3. tauto. tauto. + } + destruct (ESP.In_dec q elt). + left. split. apply IN_ELT. auto. apply H. auto. + right. split. red; intros. elim n. rewrite IN_ELT. auto. apply H0. auto. +Qed. + +Lemma subst_reg_satisf: + forall src dst rs ls e, + satisf rs ls (subst_reg dst src e) -> + satisf (rs#dst <- (rs#src)) ls e. +Proof. + intros; red; intros. + destruct (in_subst_reg dst src q e H0) as [[A B] | [A B]]. + subst dst. rewrite Regmap.gss. exploit H; eauto. + rewrite Regmap.gso; auto. +Qed. + +Lemma in_subst_reg_kind: + forall r1 k1 r2 k2 q (e: eqs), + EqSet.In q e -> + (ereg q, ekind q) = (r1, k1) /\ EqSet.In (Eq k2 r2 (eloc q)) (subst_reg_kind r1 k1 r2 k2 e) + \/ EqSet.In q (subst_reg_kind r1 k1 r2 k2 e). +Proof. + intros r1 k1 r2 k2 q e0 IN0. unfold subst_reg. + set (f := fun (q: EqSet.elt) e => + if IndexedEqKind.eq (ekind q) k1 + then add_equation (Eq k2 r2 (eloc q)) (remove_equation q e) + else e). + set (elt := EqSet.elements_between (select_reg_l r1) (select_reg_h r1) e0). + assert (IN_ELT: forall q, EqSet.In q elt <-> EqSet.In q e0 /\ ereg q = r1). + { + intros. unfold elt. rewrite EqSet.elements_between_iff. + rewrite select_reg_charact. tauto. + exact (select_reg_l_monotone r1). + exact (select_reg_h_monotone r1). + } + set (P := fun e1 e2 => + EqSet.In q e1 -> ekind q = k1 -> + EqSet.In (Eq k2 r2 (eloc q)) e2). + assert (P elt (EqSet.fold f elt e0)). + { + intros; apply ESP.fold_rec; unfold P; intros. + - ESD.fsetdec. + - simpl. red in H1. apply H1 in H3. destruct H3. + + subst x. unfold f. destruct (IndexedEqKind.eq (ekind q) k1). + simpl. ESD.fsetdec. contradiction. + + unfold f. destruct (IndexedEqKind.eq (ekind x) k1). + simpl. rewrite ESF.add_iff. rewrite ESF.remove_iff. + destruct (OrderedEquation.eq_dec x {| ekind := k2; ereg := r2; eloc := eloc q |}); auto. + left. subst x; auto. + auto. + } + set (Q := fun e1 e2 => + ~EqSet.In q e1 \/ ekind q <> k1 -> + EqSet.In q e2). + assert (Q elt (EqSet.fold f elt e0)). + { + apply ESP.fold_rec; unfold Q; intros. + - auto. + - unfold f. red in H2. rewrite H2 in H4. + destruct (IndexedEqKind.eq (ekind x) k1). + simpl. rewrite ESF.add_iff. rewrite ESF.remove_iff. + right. split. apply H3. tauto. intuition congruence. + apply H3. intuition. + } + destruct (ESP.In_dec q elt). + destruct (IndexedEqKind.eq (ekind q) k1). + left. split. f_equal. apply IN_ELT. auto. auto. apply H. auto. auto. + right. apply H0. auto. + right. apply H0. auto. +Qed. + +Lemma subst_reg_kind_satisf_makelong: + forall src1 src2 dst rs ls e, + let e1 := subst_reg_kind dst High src1 Full e in + let e2 := subst_reg_kind dst Low src2 Full e1 in + reg_unconstrained dst e2 = true -> + satisf rs ls e2 -> + satisf (rs#dst <- (Val.longofwords rs#src1 rs#src2)) ls e. +Proof. + intros; red; intros. + destruct (in_subst_reg_kind dst High src1 Full q e H1) as [[A B] | B]; fold e1 in B. + destruct (in_subst_reg_kind dst Low src2 Full _ e1 B) as [[C D] | D]; fold e2 in D. + simpl in C; simpl in D. inv C. + inversion A. rewrite H3; rewrite H4. rewrite Regmap.gss. + apply Val.lessdef_trans with (rs#src1). + simpl. destruct (rs#src1); simpl; auto. destruct (rs#src2); simpl; auto. + rewrite Int64.hi_ofwords. auto. + exploit H0. eexact D. simpl. auto. + destruct (in_subst_reg_kind dst Low src2 Full q e1 B) as [[C D] | D]; fold e2 in D. + inversion C. rewrite H3; rewrite H4. rewrite Regmap.gss. + apply Val.lessdef_trans with (rs#src2). + simpl. destruct (rs#src1); simpl; auto. destruct (rs#src2); simpl; auto. + rewrite Int64.lo_ofwords. auto. + exploit H0. eexact D. simpl. auto. + rewrite Regmap.gso. apply H0; auto. eapply reg_unconstrained_sound; eauto. +Qed. + +Lemma subst_reg_kind_satisf_lowlong: + forall src dst rs ls e, + let e1 := subst_reg_kind dst Full src Low e in + reg_unconstrained dst e1 = true -> + satisf rs ls e1 -> + satisf (rs#dst <- (Val.loword rs#src)) ls e. +Proof. + intros; red; intros. + destruct (in_subst_reg_kind dst Full src Low q e H1) as [[A B] | B]; fold e1 in B. + inversion A. rewrite H3; rewrite H4. simpl. rewrite Regmap.gss. + exploit H0. eexact B. simpl. auto. + rewrite Regmap.gso. apply H0; auto. eapply reg_unconstrained_sound; eauto. +Qed. + +Lemma subst_reg_kind_satisf_highlong: + forall src dst rs ls e, + let e1 := subst_reg_kind dst Full src High e in + reg_unconstrained dst e1 = true -> + satisf rs ls e1 -> + satisf (rs#dst <- (Val.hiword rs#src)) ls e. +Proof. + intros; red; intros. + destruct (in_subst_reg_kind dst Full src High q e H1) as [[A B] | B]; fold e1 in B. + inversion A. rewrite H3; rewrite H4. simpl. rewrite Regmap.gss. + exploit H0. eexact B. simpl. auto. + rewrite Regmap.gso. apply H0; auto. eapply reg_unconstrained_sound; eauto. +Qed. + +Module ESF2 := FSetFacts.Facts(EqSet2). +Module ESP2 := FSetProperties.Properties(EqSet2). +Module ESD2 := FSetDecide.Decide(EqSet2). + +Lemma in_subst_loc: + forall l1 l2 q (e e': eqs), + EqSet.In q e -> + subst_loc l1 l2 e = Some e' -> + (eloc q = l1 /\ EqSet.In (Eq (ekind q) (ereg q) l2) e') \/ (Loc.diff l1 (eloc q) /\ EqSet.In q e'). +Proof. + intros l1 l2 q e0 e0'. + unfold subst_loc. + set (f := fun (q0 : EqSet2.elt) (opte : option eqs) => + match opte with + | Some e => + if Loc.eq l1 (eloc q0) + then + Some + (add_equation {| ekind := ekind q0; ereg := ereg q0; eloc := l2 |} + (remove_equation q0 e)) + else None + | None => None + end). + set (elt := EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e0)). + intros IN SUBST. + set (P := fun e1 (opte: option eqs) => + match opte with + | None => True + | Some e2 => + EqSet2.In q e1 -> + eloc q = l1 /\ EqSet.In (Eq (ekind q) (ereg q) l2) e2 + end). + assert (P elt (EqSet2.fold f elt (Some e0))). + { + apply ESP2.fold_rec; unfold P; intros. + - ESD2.fsetdec. + - destruct a as [e2|]; simpl; auto. + destruct (Loc.eq l1 (eloc x)); auto. + unfold add_equation, remove_equation; simpl. + red in H1. rewrite H1. intros [A|A]. + + subst x. split. auto. ESD.fsetdec. + + exploit H2; eauto. intros [B C]. split. auto. + rewrite ESF.add_iff. rewrite ESF.remove_iff. + destruct (OrderedEquation.eq_dec x {| ekind := ekind q; ereg := ereg q; eloc := l2 |}). + left. rewrite e1; auto. + right; auto. + } + set (Q := fun e1 (opte: option eqs) => + match opte with + | None => True + | Some e2 => ~EqSet2.In q e1 -> EqSet.In q e2 + end). + assert (Q elt (EqSet2.fold f elt (Some e0))). + { + apply ESP2.fold_rec; unfold Q; intros. + - auto. + - destruct a as [e2|]; simpl; auto. + destruct (Loc.eq l1 (eloc x)); auto. + red in H2. rewrite H2; intros. + unfold add_equation, remove_equation; simpl. + rewrite ESF.add_iff. rewrite ESF.remove_iff. + right; split. apply H3. tauto. tauto. + } + rewrite SUBST in H; rewrite SUBST in H0; simpl in *. + destruct (ESP2.In_dec q elt). + left. apply H; auto. + right. split; auto. + rewrite <- select_loc_charact. + destruct (select_loc_l l1 q) eqn: LL; auto. + destruct (select_loc_h l1 q) eqn: LH; auto. + elim n. eapply EqSet2.elements_between_iff. + exact (select_loc_l_monotone l1). + exact (select_loc_h_monotone l1). + split. apply eqs_same; auto. auto. +Qed. + +Lemma subst_loc_satisf: + forall src dst rs ls e e', + subst_loc dst src e = Some e' -> + satisf rs ls e' -> + satisf rs (Locmap.set dst (ls src) ls) e. +Proof. + intros; red; intros. + exploit in_subst_loc; eauto. intros [[A B] | [A B]]. + subst dst. rewrite Locmap.gss. apply (H0 _ B). + rewrite Locmap.gso; auto. +Qed. + +Lemma can_undef_sound: + forall e ml q, + can_undef ml e = true -> EqSet.In q e -> Loc.notin (eloc q) (map R ml). +Proof. + induction ml; simpl; intros. + tauto. + InvBooleans. split. + apply Loc.diff_sym. eapply loc_unconstrained_sound; eauto. + eauto. +Qed. + +Lemma undef_regs_outside: + forall ml ls l, + Loc.notin l (map R ml) -> undef_regs ml ls l = ls l. +Proof. + induction ml; simpl; intros. auto. + rewrite Locmap.gso. apply IHml. tauto. apply Loc.diff_sym. tauto. +Qed. + +Lemma can_undef_satisf: + forall ml e rs ls, + can_undef ml e = true -> + satisf rs ls e -> + satisf rs (undef_regs ml ls) e. +Proof. + intros; red; intros. rewrite undef_regs_outside. eauto. + eapply can_undef_sound; eauto. +Qed. + +Lemma can_undef_except_sound: + forall lx e ml q, + can_undef_except lx ml e = true -> EqSet.In q e -> Loc.diff (eloc q) lx -> Loc.notin (eloc q) (map R ml). +Proof. + induction ml; simpl; intros. + tauto. + InvBooleans. split. + destruct (orb_true_elim _ _ H2). + apply proj_sumbool_true in e0. congruence. + apply Loc.diff_sym. eapply loc_unconstrained_sound; eauto. + eapply IHml; eauto. +Qed. + +Lemma subst_loc_undef_satisf: + forall src dst rs ls ml e e', + subst_loc dst src e = Some e' -> + can_undef_except dst ml e = true -> + satisf rs ls e' -> + satisf rs (Locmap.set dst (ls src) (undef_regs ml ls)) e. +Proof. + intros; red; intros. + exploit in_subst_loc; eauto. intros [[A B] | [A B]]. + rewrite A. rewrite Locmap.gss. apply (H1 _ B). + rewrite Locmap.gso; auto. rewrite undef_regs_outside. eauto. + eapply can_undef_except_sound; eauto. apply Loc.diff_sym; auto. +Qed. + +Lemma transfer_use_def_satisf: + forall args res args' res' und e e' rs ls, + transfer_use_def args res args' res' und e = Some e' -> + satisf rs ls e' -> + Val.lessdef_list rs##args (reglist ls args') /\ + (forall v v', Val.lessdef v v' -> + satisf (rs#res <- v) (Locmap.set (R res') v' (undef_regs und ls)) e). +Proof. + unfold transfer_use_def; intros. MonadInv. + split. eapply add_equations_lessdef; eauto. + intros. eapply parallel_assignment_satisf; eauto. assumption. + eapply can_undef_satisf; eauto. + eapply add_equations_satisf; eauto. +Qed. + +Lemma add_equations_res_lessdef: + forall r oty ll e e' rs ls, + add_equations_res r oty ll e = Some e' -> + satisf rs ls e' -> + Val.lessdef_list (encode_long oty rs#r) (map ls ll). +Proof. + intros. functional inversion H. +- subst. simpl. constructor. + eapply add_equation_lessdef with (q := Eq High r l1). + eapply add_equation_satisf. eauto. + constructor. + eapply add_equation_lessdef with (q := Eq Low r l2). eauto. + constructor. +- subst. replace (encode_long oty rs#r) with (rs#r :: nil). simpl. constructor; auto. + eapply add_equation_lessdef with (q := Eq Full r l1); eauto. + destruct oty as [[]|]; reflexivity || contradiction. +Qed. + +Definition callee_save_loc (l: loc) := + match l with + | R r => ~(In r destroyed_at_call) + | S sl ofs ty => sl <> Outgoing + end. + +Definition agree_callee_save (ls1 ls2: locset) : Prop := + forall l, callee_save_loc l -> ls1 l = ls2 l. + +Lemma return_regs_agree_callee_save: + forall caller callee, + agree_callee_save caller (return_regs caller callee). +Proof. + intros; red; intros. unfold return_regs. red in H. + destruct l. + rewrite pred_dec_false; auto. + destruct sl; auto || congruence. +Qed. + +Lemma no_caller_saves_sound: + forall e q, + no_caller_saves e = true -> + EqSet.In q e -> + callee_save_loc (eloc q). +Proof. + unfold no_caller_saves, callee_save_loc; intros. + exploit EqSet.for_all_2; eauto. + hnf. intros. simpl in H1. rewrite H1. auto. + lazy beta. destruct (eloc q). + intros; red; intros. destruct (orb_true_elim _ _ H1); InvBooleans. + eapply int_callee_save_not_destroyed; eauto. + apply index_int_callee_save_pos2. omega. + eapply float_callee_save_not_destroyed; eauto. + apply index_float_callee_save_pos2. omega. + destruct sl; congruence. +Qed. + +Lemma function_return_satisf: + forall rs ls_before ls_after res res' sg e e' v, + res' = map R (loc_result sg) -> + remove_equations_res res (sig_res sg) res' e = Some e' -> + satisf rs ls_before e' -> + forallb (fun l => reg_loc_unconstrained res l e') res' = true -> + no_caller_saves e' = true -> + Val.lessdef_list (encode_long (sig_res sg) v) (map ls_after res') -> + agree_callee_save ls_before ls_after -> + satisf (rs#res <- v) ls_after e. +Proof. + intros; red; intros. + functional inversion H0. +- subst. rewrite <- H11 in *. unfold encode_long in H4. rewrite <- H7 in H4. + simpl in H4. inv H4. inv H14. + set (e' := remove_equation {| ekind := Low; ereg := res; eloc := l2 |} + (remove_equation {| ekind := High; ereg := res; eloc := l1 |} e)) in *. + simpl in H2. InvBooleans. + destruct (OrderedEquation.eq_dec q (Eq Low res l2)). + subst q; simpl. rewrite Regmap.gss. auto. + destruct (OrderedEquation.eq_dec q (Eq High res l1)). + subst q; simpl. rewrite Regmap.gss. auto. + assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec. + exploit reg_loc_unconstrained_sound. eexact H. eauto. intros [A B]. + exploit reg_loc_unconstrained_sound. eexact H2. eauto. intros [C D]. + rewrite Regmap.gso; auto. + exploit no_caller_saves_sound; eauto. intros. + red in H5. rewrite <- H5; auto. +- subst. rewrite <- H11 in *. + replace (encode_long (sig_res sg) v) with (v :: nil) in H4. + simpl in H4. inv H4. + simpl in H2. InvBooleans. + set (e' := remove_equation {| ekind := Full; ereg := res; eloc := l1 |} e) in *. + destruct (OrderedEquation.eq_dec q (Eq Full res l1)). + subst q; simpl. rewrite Regmap.gss; auto. + assert (EqSet.In q e'). unfold e', remove_equation; simpl. ESD.fsetdec. + exploit reg_loc_unconstrained_sound; eauto. intros [A B]. + rewrite Regmap.gso; auto. + exploit no_caller_saves_sound; eauto. intros. + red in H5. rewrite <- H5; auto. + destruct (sig_res sg) as [[]|]; reflexivity || contradiction. +Qed. -Lemma agree_init_regs: - forall live rl vl, - (forall r1 r2, - In r1 rl -> Regset.In r2 live -> r1 <> r2 -> - assign r1 <> assign r2) -> - agree live (RTL.init_regs vl rl) - (LTL.init_locs vl (List.map assign rl)). +Lemma compat_left_sound: + forall r l e q, + compat_left r l e = true -> EqSet.In q e -> ereg q = r -> ekind q = Full /\ eloc q = l. Proof. - intro live. - assert (agree live (Regmap.init Vundef) (Locmap.init Vundef)). - red; intros. rewrite Regmap.gi. auto. - induction rl; simpl; intros. + unfold compat_left; intros. + rewrite EqSet.for_all_between_iff in H. + apply select_reg_charact in H1. destruct H1. + exploit H; eauto. intros. + destruct (ekind q); try discriminate. + destruct (Loc.eq l (eloc q)); try discriminate. auto. - destruct vl. auto. - assert (agree live (init_regs vl rl) (init_locs vl (map assign rl))). - apply IHrl. intros. apply H0. tauto. auto. auto. - red; intros. rewrite Regmap.gsspec. destruct (peq r a). - subst r. rewrite Locmap.gss. auto. - rewrite Locmap.gso. apply H1; auto. - eapply regalloc_noteq_diff; eauto. + intros. subst x2. auto. + exact (select_reg_l_monotone r). + exact (select_reg_h_monotone r). Qed. -Lemma agree_parameters: - forall vl, - let params := f.(RTL.fn_params) in - agree (live0 f flive) - (RTL.init_regs vl params) - (LTL.init_locs vl (List.map assign params)). +Lemma compat_left2_sound: + forall r l1 l2 e q, + compat_left2 r l1 l2 e = true -> EqSet.In q e -> ereg q = r -> + (ekind q = High /\ eloc q = l1) \/ (ekind q = Low /\ eloc q = l2). Proof. - intros. apply agree_init_regs. - intros. eapply regalloc_correct_3; eauto. + unfold compat_left2; intros. + rewrite EqSet.for_all_between_iff in H. + apply select_reg_charact in H1. destruct H1. + exploit H; eauto. intros. + destruct (ekind q); try discriminate. + InvBooleans. auto. + InvBooleans. auto. + intros. subst x2. auto. + exact (select_reg_l_monotone r). + exact (select_reg_h_monotone r). Qed. -End AGREE. +Lemma val_hiword_longofwords: + forall v1 v2, Val.lessdef (Val.hiword (Val.longofwords v1 v2)) v1. +Proof. + intros. destruct v1; simpl; auto. destruct v2; auto. unfold Val.hiword. + rewrite Int64.hi_ofwords. auto. +Qed. -(** * Preservation of semantics *) +Lemma val_loword_longofwords: + forall v1 v2, Val.lessdef (Val.loword (Val.longofwords v1 v2)) v2. +Proof. + intros. destruct v1; simpl; auto. destruct v2; auto. unfold Val.loword. + rewrite Int64.lo_ofwords. auto. +Qed. -(** We now show that the LTL code reflecting register allocation has - the same semantics as the original RTL code. We start with - standard properties of translated functions and - global environments in the original and translated code. *) +Lemma compat_entry_satisf: + forall rl tyl ll e, + compat_entry rl tyl ll e = true -> + forall vl ls, + Val.lessdef_list vl (decode_longs tyl (map ls ll)) -> + satisf (init_regs vl rl) ls e. +Proof. + intros until e. functional induction (compat_entry rl tyl ll e); intros. +- (* no params *) + simpl. red; intros. rewrite Regmap.gi. destruct (ekind q); simpl; auto. +- (* a param of type Tlong *) + InvBooleans. simpl in H0. inv H0. simpl. + red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). + exploit compat_left2_sound; eauto. + intros [[A B] | [A B]]; rewrite A; rewrite B; simpl. + apply Val.lessdef_trans with (Val.hiword (Val.longofwords (ls l1) (ls l2))). + apply Val.hiword_lessdef; auto. apply val_hiword_longofwords. + apply Val.lessdef_trans with (Val.loword (Val.longofwords (ls l1) (ls l2))). + apply Val.loword_lessdef; auto. apply val_loword_longofwords. + eapply IHb; eauto. +- (* a param of type Tint *) + InvBooleans. simpl in H0. inv H0. simpl. + red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). + exploit compat_left_sound; eauto. intros [A B]. rewrite A; rewrite B; auto. + eapply IHb; eauto. +- (* a param of type Tfloat *) + InvBooleans. simpl in H0. inv H0. simpl. + red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). + exploit compat_left_sound; eauto. intros [A B]. rewrite A; rewrite B; auto. + eapply IHb; eauto. +- (* error case *) + discriminate. +Qed. + +Lemma call_regs_param_values: + forall sg ls, + map (call_regs ls) (loc_parameters sg) = map ls (loc_arguments sg). +Proof. + intros. unfold loc_parameters. rewrite list_map_compose. + apply list_map_exten; intros. unfold call_regs, parameter_of_argument. + exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable. + destruct x; auto. destruct sl; tauto. +Qed. + +Lemma return_regs_arg_values: + forall sg ls1 ls2, + tailcall_is_possible sg = true -> + map (return_regs ls1 ls2) (loc_arguments sg) = map ls2 (loc_arguments sg). +Proof. + intros. apply list_map_exten; intros. + exploit loc_arguments_acceptable; eauto. + exploit tailcall_is_possible_correct; eauto. + unfold loc_argument_acceptable, return_regs. + destruct x; intros. + rewrite pred_dec_true; auto. + contradiction. +Qed. + +Lemma find_function_tailcall: + forall tge ros ls1 ls2, + ros_compatible_tailcall ros = true -> + find_function tge ros (return_regs ls1 ls2) = find_function tge ros ls2. +Proof. + unfold ros_compatible_tailcall, find_function; intros. + destruct ros as [r|id]; auto. + unfold return_regs. destruct (in_dec mreg_eq r destroyed_at_call); simpl in H. + auto. congruence. +Qed. + +(** * Properties of the dataflow analysis *) + +Lemma analyze_successors: + forall f bsh an pc bs s e, + analyze f bsh = Some an -> + bsh!pc = Some bs -> + In s (successors_block_shape bs) -> + an!!pc = OK e -> + exists e', transfer f bsh s an!!s = OK e' /\ EqSet.Subset e' e. +Proof. + unfold analyze; intros. exploit DS.fixpoint_solution; eauto. + instantiate (1 := pc). instantiate (1 := s). + unfold successors_list. rewrite PTree.gmap1. rewrite H0. simpl. auto. + rewrite H2. unfold DS.L.ge. destruct (transfer f bsh s an#s); intros. + exists e0; auto. + contradiction. +Qed. + +Lemma satisf_successors: + forall f bsh an pc bs s e rs ls, + analyze f bsh = Some an -> + bsh!pc = Some bs -> + In s (successors_block_shape bs) -> + an!!pc = OK e -> + satisf rs ls e -> + exists e', transfer f bsh s an!!s = OK e' /\ satisf rs ls e'. +Proof. + intros. exploit analyze_successors; eauto. intros [e' [A B]]. + exists e'; split; auto. eapply satisf_incr; eauto. +Qed. + +(** Inversion on [transf_function] *) + +Inductive transf_function_spec (f: RTL.function) (tf: LTL.function) : Prop := + | transf_function_spec_intro: + forall tyenv an mv k e1 e2, + wt_function f tyenv -> + analyze f (pair_codes f tf) = Some an -> + (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) -> + wf_moves mv -> + transfer f (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 -> + track_moves mv e1 = Some e2 -> + compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true -> + can_undef destroyed_at_function_entry e2 = true -> + RTL.fn_stacksize f = LTL.fn_stacksize tf -> + RTL.fn_sig f = LTL.fn_sig tf -> + transf_function_spec f tf. + +Lemma transf_function_inv: + forall f tf, + transf_function f = OK tf -> + transf_function_spec f tf. +Proof. + unfold transf_function; intros. + destruct (type_function f) as [tyenv|] eqn:TY; try discriminate. + destruct (regalloc f); try discriminate. + destruct (check_function f f0) as [] eqn:?; inv H. + unfold check_function in Heqr. + destruct (analyze f (pair_codes f tf)) as [an|] eqn:?; try discriminate. + monadInv Heqr. + destruct (check_entrypoints_aux f tf x) as [y|] eqn:?; try discriminate. + unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv. + exploit extract_moves_sound; eauto. intros [A B]. subst b. + exploit check_succ_sound; eauto. intros [k EQ1]. subst b0. + econstructor; eauto. eapply type_function_correct; eauto. congruence. +Qed. + +Lemma invert_code: + forall f tf pc i opte e, + (RTL.fn_code f)!pc = Some i -> + transfer f (pair_codes f tf) pc opte = OK e -> + exists eafter, exists bsh, exists bb, + opte = OK eafter /\ + (pair_codes f tf)!pc = Some bsh /\ + (LTL.fn_code tf)!pc = Some bb /\ + expand_block_shape bsh i bb /\ + transfer_aux f bsh eafter = Some e. +Proof. + intros. destruct opte as [eafter|]; simpl in H0; try discriminate. exists eafter. + destruct (pair_codes f tf)!pc as [bsh|] eqn:?; try discriminate. exists bsh. + exploit matching_instr_block; eauto. intros [bb [A B]]. + destruct (transfer_aux f bsh eafter) as [e1|] eqn:?; inv H0. + exists bb. auto. +Qed. + +(** * Semantic preservation *) Section PRESERVATION. @@ -452,348 +1384,726 @@ Lemma sig_function_translated: transf_fundef f = OK tf -> LTL.funsig tf = RTL.funsig f. Proof. - intros f tf. destruct f; simpl. - unfold transf_function. - destruct (type_function f). - destruct (analyze f). - destruct (regalloc f t). - intro. monadInv H. inv EQ. auto. - simpl; congruence. simpl; congruence. simpl; congruence. - intro EQ; inv EQ. auto. -Qed. - -(** The proof of semantic preservation is a simulation argument - based on diagrams of the following form: -<< - st1 --------------- st2 - | | - t| |t - | | - v v - st1'--------------- st2' ->> - Hypotheses: the left vertical arrow represents a transition in the - original RTL code. The top horizontal bar is the [match_states] - relation defined below. It implies agreement between - the RTL register map [rs] and the LTL location map [ls] - over the pseudo-registers live before the RTL instruction at [pc]. - - Conclusions: the right vertical arrow is an [exec_instrs] transition - in the LTL code generated by translation of the current function. - The bottom horizontal bar is the [match_states] relation. -*) - -Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> Prop := - | match_stackframes_nil: - match_stackframes nil nil + intros; destruct f; monadInv H. + destruct (transf_function_inv _ _ EQ). simpl; auto. + auto. +Qed. + +Lemma find_function_translated: + forall ros rs fd ros' e e' ls, + RTL.find_function ge ros rs = Some fd -> + add_equation_ros ros ros' e = Some e' -> + satisf rs ls e' -> + exists tfd, + LTL.find_function tge ros' ls = Some tfd /\ transf_fundef fd = OK tfd. +Proof. + unfold RTL.find_function, LTL.find_function; intros. + destruct ros as [r|id]; destruct ros' as [r'|id']; simpl in H0; MonadInv. + (* two regs *) + exploit add_equation_lessdef; eauto. intros LD. inv LD. + eapply functions_translated; eauto. + rewrite <- H2 in H. simpl in H. congruence. + (* two symbols *) + rewrite symbols_preserved. rewrite Heqo. + eapply function_ptr_translated; eauto. +Qed. + +Lemma exec_moves: + forall mv rs s f sp bb m e e' ls, + track_moves mv e = Some e' -> + wf_moves mv -> + satisf rs ls e' -> + exists ls', + star step tge (Block s f sp (expand_moves mv bb) ls m) + E0 (Block s f sp bb ls' m) + /\ satisf rs ls' e. +Proof. +Opaque destroyed_by_op. + induction mv; simpl; intros. + (* base *) +- unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto. + (* step *) +- destruct a as [src dst]. unfold expand_moves. simpl. + destruct (track_moves mv e) as [e1|] eqn:?; MonadInv. + assert (wf_moves mv). red; intros. apply H0; auto with coqlib. + destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst]. + (* reg-reg *) ++ exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto. + intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto. + econstructor. simpl. eauto. auto. auto. + (* reg->stack *) ++ exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto. + intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto. + econstructor. simpl. eauto. auto. + (* stack->reg *) ++ simpl in Heqb. exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto. + intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto. + econstructor. auto. auto. + (* stack->stack *) ++ exploit H0; auto with coqlib. unfold wf_move. tauto. +Qed. + +(** The simulation relation *) + +Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signature -> Prop := + | match_stackframes_nil: forall sg, + sg.(sig_res) = Some Tint -> + match_stackframes nil nil sg | match_stackframes_cons: - forall s ts res f sp pc rs ls env live assign, - match_stackframes s ts -> - wt_function f env -> - analyze f = Some live -> - regalloc f live (live0 f live) env = Some assign -> - (forall rv, - agree assign (transfer f pc live!!pc) - (rs#res <- rv) - (Locmap.set (assign res) rv ls)) -> + forall res f sp pc rs s tf bb ls ts sg an e tyenv + (STACKS: match_stackframes s ts (fn_sig tf)) + (FUN: transf_function f = OK tf) + (ANL: analyze f (pair_codes f tf) = Some an) + (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e) + (WTF: wt_function f tyenv) + (WTRS: wt_regset tyenv rs) + (WTRES: tyenv res = proj_sig_res sg) + (STEPS: forall v ls1 m, + Val.lessdef_list (encode_long (sig_res sg) v) (map ls1 (map R (loc_result sg))) -> + agree_callee_save ls ls1 -> + exists ls2, + star LTL.step tge (Block ts tf sp bb ls1 m) + E0 (State ts tf sp pc ls2 m) + /\ satisf (rs#res <- v) ls2 e), match_stackframes (RTL.Stackframe res f sp pc rs :: s) - (LTL.Stackframe (assign res) (transf_fun f live assign) sp ls pc :: ts). + (LTL.Stackframe tf sp ls bb :: ts) + sg. Inductive match_states: RTL.state -> LTL.state -> Prop := | match_states_intro: - forall s f sp pc rs m ts ls live assign env - (STACKS: match_stackframes s ts) - (WT: wt_function f env) - (ANL: analyze f = Some live) - (ASG: regalloc f live (live0 f live) env = Some assign) - (AG: agree assign (transfer f pc live!!pc) rs ls), + forall s f sp pc rs m ts tf ls m' an e tyenv + (STACKS: match_stackframes s ts (fn_sig tf)) + (FUN: transf_function f = OK tf) + (ANL: analyze f (pair_codes f tf) = Some an) + (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e) + (SAT: satisf rs ls e) + (MEM: Mem.extends m m') + (WTF: wt_function f tyenv) + (WTRS: wt_regset tyenv rs), match_states (RTL.State s f sp pc rs m) - (LTL.State ts (transf_fun f live assign) sp pc ls m) + (LTL.State ts tf sp pc ls m') | match_states_call: - forall s f args m ts tf, - match_stackframes s ts -> - transf_fundef f = OK tf -> + forall s f args m ts tf ls m' + (STACKS: match_stackframes s ts (funsig tf)) + (FUN: transf_fundef f = OK tf) + (ARGS: Val.lessdef_list args (decode_longs (sig_args (funsig tf)) (map ls (loc_arguments (funsig tf))))) + (AG: agree_callee_save (parent_locset ts) ls) + (MEM: Mem.extends m m') + (WTARGS: Val.has_type_list args (sig_args (funsig tf))), match_states (RTL.Callstate s f args m) - (LTL.Callstate ts tf args m) + (LTL.Callstate ts tf ls m') | match_states_return: - forall s v m ts, - match_stackframes s ts -> - match_states (RTL.Returnstate s v m) - (LTL.Returnstate ts v m). - -(** The simulation proof is by case analysis over the RTL transition - taken in the source program. *) - -Ltac CleanupHyps := - match goal with - | H: (match_states _ _) |- _ => - inv H; CleanupHyps - | H1: (PTree.get _ _ = Some _), - H2: (agree _ (transfer _ _ _) _ _) |- _ => - unfold transfer in H2; rewrite H1 in H2; simpl in H2; CleanupHyps - | _ => idtac - end. - -Ltac WellTypedHyp := - match goal with - | H1: (PTree.get _ _ = Some _), - H2: (wt_function _ _) |- _ => - let R := fresh "WTI" in ( - generalize (wt_instrs _ _ H2 _ _ H1); intro R) - | _ => idtac - end. + forall s res m ts ls m' sg + (STACKS: match_stackframes s ts sg) + (RES: Val.lessdef_list (encode_long (sig_res sg) res) (map ls (map R (loc_result sg)))) + (AG: agree_callee_save (parent_locset ts) ls) + (MEM: Mem.extends m m') + (WTRES: Val.has_type res (proj_sig_res sg)), + match_states (RTL.Returnstate s res m) + (LTL.Returnstate ts ls m'). + +Lemma match_stackframes_change_sig: + forall s ts sg sg', + match_stackframes s ts sg -> + sg'.(sig_res) = sg.(sig_res) -> + match_stackframes s ts sg'. +Proof. + intros. inv H. + constructor. congruence. + econstructor; eauto. + unfold proj_sig_res in *. rewrite H0; auto. + intros. unfold loc_result in H; rewrite H0 in H; eauto. +Qed. -Ltac TranslInstr := +Ltac UseShape := match goal with - | H: (PTree.get _ _ = Some _) |- _ => - simpl in H; simpl; rewrite PTree.gmap; rewrite H; simpl; auto + | [ CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ = OK _ |- _ ] => + destruct (invert_code _ _ _ _ _ _ CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR); + inv EBS; unfold transfer_aux in TR; MonadInv end. -Ltac MatchStates := +Ltac UseType := match goal with - | |- match_states (RTL.State _ _ _ _ _ _) (LTL.State _ _ _ _ _ _) => - eapply match_states_intro; eauto; MatchStates - | H: (PTree.get ?pc _ = Some _) |- agree _ _ _ _ => - eapply agree_succ with (n := pc); eauto; MatchStates - | |- In _ (RTL.successors_instr _) => - unfold RTL.successors_instr; auto with coqlib - | _ => idtac + | [ CODE: (RTL.fn_code _)!_ = Some _, WTF: wt_function _ _ |- _ ] => + generalize (wt_instrs _ _ WTF _ _ CODE); intro WTI end. -Lemma transl_find_function: - forall ros f args lv rs ls alloc, - RTL.find_function ge ros rs = Some f -> - agree alloc (reg_list_live args (reg_sum_live ros lv)) rs ls -> - exists tf, - LTL.find_function tge (sum_left_map alloc ros) ls = Some tf /\ - transf_fundef f = OK tf. +Remark addressing_not_long: + forall env f addr args dst s r, + wt_instr env f (Iload Mint64 addr args dst s) -> + In r args -> r <> dst. Proof. - intros; destruct ros; simpl in *. - assert (rs#r = ls (alloc r)). - eapply agree_eval_reg. eapply agree_reg_list_live; eauto. - rewrite <- H1. apply functions_translated. auto. - rewrite symbols_preserved. destruct (Genv.find_symbol ge i). - apply function_ptr_translated. auto. discriminate. + intros. + assert (forall ty, In ty (type_of_addressing addr) -> ty = Tint). + { intros. destruct addr; simpl in H1; intuition. } + inv H. + assert (env r = Tint). + { apply H1. rewrite <- H5. apply in_map; auto. } + simpl in H8; congruence. Qed. -Theorem transl_step_correct: - forall s1 t s2, RTL.step ge s1 t s2 -> - forall s1', match_states s1 s1' -> - exists s2', LTL.step tge s1' t s2' /\ match_states s2 s2'. +(** The proof of semantic preservation is a simulation argument of the + "plus" kind. *) + +Lemma step_simulation: + forall S1 t S2, RTL.step ge S1 t S2 -> + forall S1', match_states S1 S1' -> + exists S2', plus LTL.step tge S1' t S2' /\ match_states S2 S2'. Proof. - induction 1; intros; CleanupHyps; WellTypedHyp. + induction 1; intros S1' MS; inv MS; try UseType; try UseShape. + +(* nop *) + exploit exec_moves; eauto. intros [ls1 [X Y]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. + econstructor; eauto. + +(* op move *) +- simpl in H0. inv H0. + exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. eapply subst_reg_satisf; eauto. + intros [enext [U V]]. + econstructor; eauto. + inv WTI. apply wt_regset_assign; auto. rewrite <- H2. apply WTRS. congruence. - (* Inop *) +(* op makelong *) +- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'. + simpl in H0. inv H0. + exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. econstructor; split. - eapply exec_Lnop. TranslInstr. MatchStates. - - (* Iop *) - generalize (PTree.gmap (transf_instr f live assign) pc (RTL.fn_code f)). - rewrite H. simpl. - caseEq (Regset.mem res live!!pc); intro LV; - rewrite LV in AG. - generalize (Regset.mem_2 LV). intro LV'. - generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). - unfold correct_alloc_instr, is_redundant_move. - caseEq (is_move_operation op args). - (* Special case for moves *) - intros arg IMO CORR. - generalize (is_move_operation_correct _ _ IMO). - intros [EQ1 EQ2]. subst op; subst args. - injection H0; intro. - destruct (Loc.eq (assign arg) (assign res)); intro CODE. - (* sub-case: redundant move *) - econstructor; split. eapply exec_Lnop; eauto. - MatchStates. - rewrite <- H1. eapply agree_redundant_move_live; eauto. - (* sub-case: non-redundant move *) - econstructor; split. eapply exec_Lop; eauto. simpl. eauto. - MatchStates. - rewrite <- H1. set (ls1 := undef_temps ls). - replace (ls (assign arg)) with (ls1 (assign arg)). - eapply agree_move_live; eauto. - unfold ls1. eapply agree_undef_temps; eauto. - unfold ls1. simpl. apply Locmap.guo. eapply regalloc_not_temporary; eauto. - (* Not a move *) - intros INMO CORR CODE. - assert (eval_operation tge sp op (map ls (map assign args)) m = Some v). - replace (map ls (map assign args)) with (rs##args). - rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved. - eapply agree_eval_regs; eauto. - econstructor; split. eapply exec_Lop; eauto. MatchStates. - apply agree_assign_live with f env live; auto. - eapply agree_undef_temps; eauto. - eapply agree_reg_list_live; eauto. - (* Result is not live, instruction turned into a nop *) - intro CODE. econstructor; split. eapply exec_Lnop; eauto. - MatchStates. apply agree_assign_dead; auto. - red; intro. exploit Regset.mem_1; eauto. congruence. - - (* Iload *) - caseEq (Regset.mem dst live!!pc); intro LV; - rewrite LV in AG. - (* dst is live *) - exploit Regset.mem_2; eauto. intro LV'. - assert (eval_addressing tge sp addr (map ls (map assign args)) = Some a). - replace (map ls (map assign args)) with (rs##args). - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eapply agree_eval_regs; eauto. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. + eapply subst_reg_kind_satisf_makelong. eauto. eauto. + intros [enext [U V]]. + econstructor; eauto. + +(* op lowlong *) +- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'. + simpl in H0. inv H0. + exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. econstructor; split. - eapply exec_Lload; eauto. TranslInstr. rewrite LV; auto. - generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). - unfold correct_alloc_instr. intro CORR. - MatchStates. - eapply agree_assign_live; eauto. - eapply agree_undef_temps; eauto. - eapply agree_reg_list_live; eauto. - (* dst is dead *) + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. + eapply subst_reg_kind_satisf_lowlong. eauto. eauto. + intros [enext [U V]]. + econstructor; eauto. + +(* op highlong *) +- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'. + simpl in H0. inv H0. + exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. econstructor; split. - eapply exec_Lnop. TranslInstr. rewrite LV; auto. - MatchStates. apply agree_assign_dead; auto. - red; intro; exploit Regset.mem_1; eauto. congruence. - - (* Istore *) - assert (eval_addressing tge sp addr (map ls (map assign args)) = Some a). - replace (map ls (map assign args)) with (rs##args). - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eapply agree_eval_regs; eauto. - assert (ESRC: rs#src = ls (assign src)). - eapply agree_eval_reg. eapply agree_reg_list_live. eauto. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. + eapply subst_reg_kind_satisf_highlong. eauto. eauto. + intros [enext [U V]]. + econstructor; eauto. + +(* op regular *) +- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + exploit transfer_use_def_satisf; eauto. intros [X Y]. + exploit eval_operation_lessdef; eauto. intros [v' [F G]]. + exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_left. econstructor. instantiate (1 := v'). rewrite <- F. + apply eval_operation_preserved. exact symbols_preserved. + eauto. eapply star_right. eexact A2. constructor. + eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. + econstructor; eauto. + eapply wt_exec_Iop; eauto. + +(* op dead *) +- exploit exec_moves; eauto. intros [ls1 [X Y]]. econstructor; split. - eapply exec_Lstore; eauto. TranslInstr. - rewrite <- ESRC. eauto. - MatchStates. - eapply agree_undef_temps; eauto. - eapply agree_reg_live. eapply agree_reg_list_live. eauto. - - (* Icall *) - exploit transl_find_function; eauto. intros [tf [TFIND TF]]. - generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
unfold correct_alloc_instr. intros [CORR1 [CORR2 CORR3]]. - assert (rs##args = map ls (map assign args)). - eapply agree_eval_regs; eauto. - econstructor; split. - eapply exec_Lcall; eauto. TranslInstr. - rewrite (sig_function_translated _ _ TF). eauto. - rewrite H1. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors. eauto. eauto. simpl; eauto. eauto. + eapply reg_unconstrained_satisf; eauto. + intros [enext [U V]]. econstructor; eauto. + eapply wt_exec_Iop; eauto. + +(* load regular *) +- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + exploit transfer_use_def_satisf; eauto. intros [X Y]. + exploit eval_addressing_lessdef; eauto. intros [a' [F G]]. + exploit Mem.loadv_extends; eauto. intros [v' [P Q]]. + exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_left. econstructor. instantiate (1 := a'). rewrite <- F. + apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto. + eapply star_right. eexact A2. constructor. + eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. + econstructor; eauto. + eapply wt_exec_Iload; eauto. + +(* load pair *) +- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12). + set (v2' := if big_endian then v2 else v1) in *. + set (v1' := if big_endian then v1 else v2) in *. + exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + assert (LD1: Val.lessdef_list rs##args (reglist ls1 args1')). + { eapply add_equations_lessdef; eauto. } + exploit eval_addressing_lessdef. eexact LD1. eauto. intros [a1' [F1 G1]]. + exploit Mem.loadv_extends. eauto. eexact LOAD1. eexact G1. intros (v1'' & LOAD1' & LD2). + set (ls2 := Locmap.set (R dst1') v1'' (undef_regs (destroyed_by_load Mint32 addr) ls1)). + assert (SAT2: satisf (rs#dst <- v) ls2 e2). + { eapply loc_unconstrained_satisf. eapply can_undef_satisf; eauto. + eapply reg_unconstrained_satisf. eauto. + eapply add_equations_satisf; eauto. assumption. + rewrite Regmap.gss. apply Val.lessdef_trans with v1'; auto. + subst v. unfold v1', kind_first_word. + destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords. + } + exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. + assert (LD3: Val.lessdef_list rs##args (reglist ls3 args2')). + { replace (rs##args) with ((rs#dst<-v)##args). + eapply add_equations_lessdef; eauto. + apply list_map_exten; intros. rewrite Regmap.gso; auto. + eapply addressing_not_long; eauto. + } + exploit eval_addressing_lessdef. eexact LD3. + eapply eval_offset_addressing; eauto. intros [a2' [F2 G2]]. + exploit Mem.loadv_extends. eauto. eexact LOAD2. eexact G2. intros (v2'' & LOAD2' & LD4). + set (ls4 := Locmap.set (R dst2') v2'' (undef_regs (destroyed_by_load Mint32 addr2) ls3)). + assert (SAT4: satisf (rs#dst <- v) ls4 e0). + { eapply loc_unconstrained_satisf. eapply can_undef_satisf; eauto. + eapply add_equations_satisf; eauto. assumption. + apply Val.lessdef_trans with v2'; auto. + rewrite Regmap.gss. subst v. unfold v2', kind_second_word. + destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords. + } + exploit (exec_moves mv3); eauto. intros [ls5 [A5 B5]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_left. econstructor. + instantiate (1 := a1'). rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved. + eexact LOAD1'. instantiate (1 := ls2); auto. + eapply star_trans. eexact A3. + eapply star_left. econstructor. + instantiate (1 := a2'). rewrite <- F2. apply eval_addressing_preserved. exact symbols_preserved. + eexact LOAD2'. instantiate (1 := ls4); auto. + eapply star_right. eexact A5. + constructor. + eauto. eauto. eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. econstructor; eauto. - intros. eapply agree_succ with (n := pc); eauto. - simpl. auto. - eapply agree_postcall; eauto. - - (* Itailcall *) - exploit transl_find_function; eauto. intros [tf [TFIND TF]]. - assert (rs##args = map ls (map assign args)). - eapply agree_eval_regs; eauto. - econstructor; split. - eapply exec_Ltailcall; eauto. TranslInstr. - rewrite (sig_function_translated _ _ TF). eauto. - rewrite H1. econstructor; eauto. - - (* Ibuiltin *) + eapply wt_exec_Iload; eauto. + +(* load first word of a pair *) +- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12). + set (v2' := if big_endian then v2 else v1) in *. + set (v1' := if big_endian then v1 else v2) in *. + exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + assert (LD1: Val.lessdef_list rs##args (reglist ls1 args')). + { eapply add_equations_lessdef; eauto. } + exploit eval_addressing_lessdef. eexact LD1. eauto. intros [a1' [F1 G1]]. + exploit Mem.loadv_extends. eauto. eexact LOAD1. eexact G1. intros (v1'' & LOAD1' & LD2). + set (ls2 := Locmap.set (R dst') v1'' (undef_regs (destroyed_by_load Mint32 addr) ls1)). + assert (SAT2: satisf (rs#dst <- v) ls2 e0). + { eapply parallel_assignment_satisf; eauto. + apply Val.lessdef_trans with v1'; auto. + subst v. unfold v1', kind_first_word. + destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords. + eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto. + } + exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. econstructor; split. - eapply exec_Lbuiltin; eauto. TranslInstr. - replace (map ls (@map reg loc assign args)) with (rs##args). - eapply external_call_symbols_preserved; eauto. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_left. econstructor. + instantiate (1 := a1'). rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved. + eexact LOAD1'. instantiate (1 := ls2); auto. + eapply star_right. eexact A3. + constructor. + eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. + econstructor; eauto. + eapply wt_exec_Iload; eauto. + +(* load second word of a pair *) +- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12). + set (v2' := if big_endian then v2 else v1) in *. + set (v1' := if big_endian then v1 else v2) in *. + exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + assert (LD1: Val.lessdef_list rs##args (reglist ls1 args')). + { eapply add_equations_lessdef; eauto. } + exploit eval_addressing_lessdef. eexact LD1. + eapply eval_offset_addressing; eauto. intros [a1' [F1 G1]]. + exploit Mem.loadv_extends. eauto. eexact LOAD2. eexact G1. intros (v2'' & LOAD2' & LD2). + set (ls2 := Locmap.set (R dst') v2'' (undef_regs (destroyed_by_load Mint32 addr2) ls1)). + assert (SAT2: satisf (rs#dst <- v) ls2 e0). + { eapply parallel_assignment_satisf; eauto. + apply Val.lessdef_trans with v2'; auto. + subst v. unfold v2', kind_second_word. + destruct big_endian; apply val_hiword_longofwords || apply val_loword_longofwords. + eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto. + } + exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_left. econstructor. + instantiate (1 := a1'). rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved. + eexact LOAD2'. instantiate (1 := ls2); auto. + eapply star_right. eexact A3. + constructor. + eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. + econstructor; eauto. + eapply wt_exec_Iload; eauto. + +(* load dead *) +- exploit exec_moves; eauto. intros [ls1 [X Y]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact X. econstructor; eauto. + eauto. traceEq. + exploit satisf_successors. eauto. eauto. simpl; eauto. eauto. + eapply reg_unconstrained_satisf; eauto. + intros [enext [U V]]. + econstructor; eauto. + eapply wt_exec_Iload; eauto. + +(* store *) +- exploit exec_moves; eauto. intros [ls1 [X Y]]. + exploit add_equations_lessdef; eauto. intros LD. simpl in LD. inv LD. + exploit eval_addressing_lessdef; eauto. intros [a' [F G]]. + exploit Mem.storev_extends; eauto. intros [m'' [P Q]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact X. + eapply star_two. econstructor. instantiate (1 := a'). rewrite <- F. + apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto. + constructor. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. + eapply can_undef_satisf; eauto. eapply add_equations_satisf; eauto. intros [enext [U V]]. + econstructor; eauto. + +(* store 2 *) +- exploit Mem.storev_int64_split; eauto. + replace (if big_endian then Val.hiword rs#src else Val.loword rs#src) + with (sel_val kind_first_word rs#src) + by (unfold kind_first_word; destruct big_endian; reflexivity). + replace (if big_endian then Val.loword rs#src else Val.hiword rs#src) + with (sel_val kind_second_word rs#src) + by (unfold kind_second_word; destruct big_endian; reflexivity). + intros [m1 [STORE1 STORE2]]. + exploit (exec_moves mv1); eauto. intros [ls1 [X Y]]. + exploit add_equations_lessdef. eexact Heqo1. eexact Y. intros LD1. + exploit add_equation_lessdef. eapply add_equations_satisf. eexact Heqo1. eexact Y. + simpl. intros LD2. + set (ls2 := undef_regs (destroyed_by_store Mint32 addr) ls1). + assert (SAT2: satisf rs ls2 e1). + eapply can_undef_satisf. eauto. + eapply add_equation_satisf. eapply add_equations_satisf; eauto. + exploit eval_addressing_lessdef. eexact LD1. eauto. intros [a1' [F1 G1]]. + assert (F1': eval_addressing tge sp addr (reglist ls1 args1') = Some a1'). + rewrite <- F1. apply eval_addressing_preserved. exact symbols_preserved. + exploit Mem.storev_extends. eauto. eexact STORE1. eexact G1. eauto. + intros [m1' [STORE1' EXT1]]. + exploit (exec_moves mv2); eauto. intros [ls3 [U V]]. + exploit add_equations_lessdef. eexact Heqo. eexact V. intros LD3. + exploit add_equation_lessdef. eapply add_equations_satisf. eexact Heqo. eexact V. + simpl. intros LD4. + exploit eval_addressing_lessdef. eexact LD3. eauto. intros [a2' [F2 G2]]. + assert (F2': eval_addressing tge sp addr (reglist ls3 args2') = Some a2'). + rewrite <- F2. apply eval_addressing_preserved. exact symbols_preserved. + exploit eval_offset_addressing. eauto. eexact F2'. intros F2''. + exploit Mem.storev_extends. eexact EXT1. eexact STORE2. + apply Val.add_lessdef. eexact G2. eauto. eauto. + intros [m2' [STORE2' EXT2]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact X. + eapply star_left. + econstructor. eexact F1'. eexact STORE1'. instantiate (1 := ls2). auto. + eapply star_trans. eexact U. + eapply star_two. + econstructor. eexact F2''. eexact STORE2'. eauto. + constructor. eauto. eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. simpl; eauto. + eapply can_undef_satisf. eauto. + eapply add_equation_satisf. eapply add_equations_satisf; eauto. + intros [enext [P Q]]. + econstructor; eauto. + +(* call *) +- set (sg := RTL.funsig fd) in *. + set (args' := loc_arguments sg) in *. + set (res' := map R (loc_result sg)) in *. + exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + exploit find_function_translated. eauto. eauto. eapply add_equations_args_satisf; eauto. + intros [tfd [E F]]. + assert (SIG: funsig tfd = sg). eapply sig_function_translated; eauto. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact A1. econstructor; eauto. + eauto. traceEq. + exploit analyze_successors; eauto. simpl. left; eauto. intros [enext [U V]]. + econstructor; eauto. + econstructor; eauto. + inv WTI. rewrite SIG. auto. + intros. exploit (exec_moves mv2); eauto. + eapply function_return_satisf with (v := v) (ls_before := ls1) (ls_after := ls0); eauto. + eapply add_equation_ros_satisf; eauto. + eapply add_equations_args_satisf; eauto. + congruence. intros [ls2 [A2 B2]]. + exists ls2; split. + eapply star_right. eexact A2. constructor. traceEq. + apply satisf_incr with eafter; auto. + rewrite SIG. eapply add_equations_args_lessdef; eauto. + inv WTI. rewrite <- H7. apply wt_regset_list; auto. + simpl. red; auto. + rewrite SIG. inv WTI. rewrite <- H7. apply wt_regset_list; auto. + +(* tailcall *) +- set (sg := RTL.funsig fd) in *. + set (args' := loc_arguments sg) in *. + exploit Mem.free_parallel_extends; eauto. intros [m'' [P Q]]. + exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. + exploit find_function_translated. eauto. eauto. eapply add_equations_args_satisf; eauto. + intros [tfd [E F]]. + assert (SIG: funsig tfd = sg). eapply sig_function_translated; eauto. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact A1. econstructor; eauto. + rewrite <- E. apply find_function_tailcall; auto. + replace (fn_stacksize tf) with (RTL.fn_stacksize f); eauto. + destruct (transf_function_inv _ _ FUN); auto. + eauto. traceEq. + econstructor; eauto. + eapply match_stackframes_change_sig; eauto. rewrite SIG. rewrite e0. decEq. + destruct (transf_function_inv _ _ FUN); auto. + rewrite SIG. rewrite return_regs_arg_values; auto. eapply add_equations_args_lessdef; eauto. + inv WTI. rewrite <- H8. apply wt_regset_list; auto. + apply return_regs_agree_callee_save. + rewrite SIG. inv WTI. rewrite <- H8. apply wt_regset_list; auto. + +(* builtin *) +- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. + exploit external_call_mem_extends; eauto. + eapply add_equations_args_lessdef; eauto. + inv WTI. rewrite <- H4. apply wt_regset_list; auto. + intros [v' [m'' [F [G [J K]]]]]. + assert (E: map ls1 (map R args') = reglist ls1 args'). + { unfold reglist. rewrite list_map_compose. auto. } + rewrite E in F. clear E. + set (vl' := encode_long (sig_res (ef_sig ef)) v'). + set (ls2 := Locmap.setlist (map R res') vl' (undef_regs (destroyed_by_builtin ef) ls1)). + assert (satisf (rs#res <- v) ls2 e0). + { eapply parallel_assignment_satisf_2; eauto. + eapply can_undef_satisf; eauto. + eapply add_equations_args_satisf; eauto. } + exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_left. econstructor. + econstructor. unfold reglist. eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved. - eapply agree_eval_regs; eauto. - generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). - unfold correct_alloc_instr. intro CORR. - MatchStates. - eapply agree_assign_live; eauto. - eapply agree_reg_list_live; eauto. - - (* Icond *) - assert (COND: eval_condition cond (map ls (map assign args)) m = Some b). - replace (map ls (map assign args)) with (rs##args). auto. - eapply agree_eval_regs; eauto. + instantiate (1 := vl'); auto. + instantiate (1 := ls2); auto. + eapply star_right. eexact A3. + econstructor. + reflexivity. reflexivity. reflexivity. traceEq. + exploit satisf_successors; eauto. simpl; eauto. + intros [enext [U V]]. + econstructor; eauto. + inv WTI. apply wt_regset_assign; auto. rewrite H9. + eapply external_call_well_typed; eauto. + +(* annot *) +- exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. + exploit external_call_mem_extends; eauto. eapply add_equations_args_lessdef; eauto. + inv WTI. simpl in H4. rewrite <- H4. apply wt_regset_list; auto. + intros [v' [m'' [F [G [J K]]]]]. + assert (v = Vundef). red in H0; inv H0. auto. econstructor; split. - eapply exec_Lcond; eauto. TranslInstr. - MatchStates. destruct b; simpl; auto. - eapply agree_undef_temps; eauto. - eapply agree_reg_list_live. eauto. + eapply plus_left. econstructor; eauto. + eapply star_trans. eexact A1. + eapply star_two. econstructor. + eapply external_call_symbols_preserved' with (ge1 := ge). + econstructor; eauto. + exact symbols_preserved. exact varinfo_preserved. + eauto. constructor. eauto. eauto. traceEq. + exploit satisf_successors. eauto. eauto. simpl; eauto. eauto. + eapply satisf_undef_reg with (r := res). + eapply add_equations_args_satisf; eauto. + intros [enext [U V]]. + econstructor; eauto. + change (destroyed_by_builtin (EF_annot txt typ)) with (@nil mreg). + simpl. subst v. assumption. + apply wt_regset_assign; auto. subst v. constructor. - (* Ijumptable *) - assert (rs#arg = ls (assign arg)). apply AG. apply Regset.add_1. auto. +(* cond *) +- exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. econstructor; split. - eapply exec_Ljumptable; eauto. TranslInstr. congruence. - MatchStates. eapply list_nth_z_in; eauto. - eapply agree_undef_temps; eauto. - eapply agree_reg_live; eauto. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact A1. + econstructor. eapply eval_condition_lessdef; eauto. eapply add_equations_lessdef; eauto. + eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. + instantiate (1 := if b then ifso else ifnot). simpl. destruct b; auto. + eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto. + intros [enext [U V]]. + econstructor; eauto. - (* Ireturn *) +(* jumptable *) +- exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. + assert (Val.lessdef (Vint n) (ls1 (R arg'))). + rewrite <- H0. eapply add_equation_lessdef with (q := Eq Full arg (R arg')); eauto. + inv H2. econstructor; split. - eapply exec_Lreturn; eauto. TranslInstr; eauto. - replace (regmap_optget or Vundef rs) - with (locmap_optget (option_map assign or) Vundef ls). + eapply plus_left. econstructor; eauto. + eapply star_right. eexact A1. + econstructor. eauto. eauto. eauto. eauto. traceEq. + exploit satisf_successors; eauto. + instantiate (1 := pc'). simpl. eapply list_nth_z_in; eauto. + eapply can_undef_satisf. eauto. eapply add_equation_satisf; eauto. + intros [enext [U V]]. econstructor; eauto. - inv WTI. destruct or; simpl in *. - symmetry; eapply agree_eval_reg; eauto. - auto. - (* internal function *) - generalize H7. simpl. unfold transf_function. - caseEq (type_function f); simpl; try congruence. intros env TYP. - assert (WTF: wt_function f env). apply type_function_correct; auto. - caseEq (analyze f); simpl; try congruence. intros live ANL. - caseEq (regalloc f live (live0 f live) env); simpl; try congruence. - intros alloc ALLOC EQ. inv EQ. simpl in *. +(* return *) +- destruct (transf_function_inv _ _ FUN). + exploit Mem.free_parallel_extends; eauto. rewrite H10. intros [m'' [P Q]]. + destruct or as [r|]; MonadInv. + (* with an argument *) ++ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. econstructor; split. - eapply exec_function_internal; simpl; eauto. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact A1. + econstructor. eauto. eauto. traceEq. + simpl. econstructor; eauto. rewrite <- H11. + replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f)))) + with (map ls1 (map R (loc_result (RTL.fn_sig f)))). + eapply add_equations_res_lessdef; eauto. + rewrite !list_map_compose. apply list_map_exten; intros. + unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto. + apply return_regs_agree_callee_save. + inv WTI. simpl in H13. unfold proj_sig_res. rewrite <- H11; rewrite <- H13. apply WTRS. + (* without an argument *) ++ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_right. eexact A1. + econstructor. eauto. eauto. traceEq. simpl. econstructor; eauto. - change (transfer f (RTL.fn_entrypoint f) live !! (RTL.fn_entrypoint f)) - with (live0 f live). - eapply agree_parameters; eauto. + unfold encode_long, loc_result. destruct (sig_res (fn_sig tf)) as [[]|]; simpl; auto. + apply return_regs_agree_callee_save. + constructor. + +(* internal function *) +- monadInv FUN. simpl in *. + destruct (transf_function_inv _ _ EQ). + exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H8; apply Zle_refl. + intros [m'' [U V]]. + exploit (exec_moves mv). eauto. eauto. + eapply can_undef_satisf; eauto. eapply compat_entry_satisf; eauto. + rewrite call_regs_param_values. rewrite H9. eexact ARGS. + intros [ls1 [A B]]. + econstructor; split. + eapply plus_left. econstructor; eauto. + eapply star_left. econstructor; eauto. + eapply star_right. eexact A. + econstructor; eauto. + eauto. eauto. traceEq. + econstructor; eauto. + apply wt_init_regs. inv H0. rewrite wt_params. congruence. - (* external function *) - injection H7; intro EQ; inv EQ. +(* external function *) +- exploit external_call_mem_extends; eauto. intros [v' [m'' [F [G [J K]]]]]. + simpl in FUN; inv FUN. econstructor; split. - eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. + apply plus_one. econstructor; eauto. + eapply external_call_symbols_preserved' with (ge1 := ge). + econstructor; eauto. exact symbols_preserved. exact varinfo_preserved. - eapply match_states_return; eauto. - - (* return *) - inv H4. + econstructor; eauto. simpl. + replace (map + (Locmap.setlist (map R (loc_result (ef_sig ef))) + (encode_long (sig_res (ef_sig ef)) v') ls) + (map R (loc_result (ef_sig ef)))) + with (encode_long (sig_res (ef_sig ef)) v'). + apply encode_long_lessdef; auto. + unfold encode_long, loc_result. + destruct (sig_res (ef_sig ef)) as [[]|]; simpl; symmetry; f_equal; auto. + red; intros. rewrite Locmap.gsetlisto. apply AG; auto. + apply Loc.notin_iff. intros. + exploit list_in_map_inv; eauto. intros [r [A B]]; subst l'. + destruct l; simpl; auto. red; intros; subst r0; elim H0. + eapply loc_result_caller_save; eauto. + simpl. eapply external_call_well_typed; eauto. + +(* return *) +- inv STACKS. + exploit STEPS; eauto. intros [ls2 [A B]]. econstructor; split. - eapply exec_return; eauto. + eapply plus_left. constructor. eexact A. traceEq. econstructor; eauto. + apply wt_regset_assign; auto. congruence. Qed. -(** The semantic equivalence between the original and transformed programs - follows easily. *) - -Lemma transf_initial_states: +Lemma initial_states_simulation: forall st1, RTL.initial_state prog st1 -> exists st2, LTL.initial_state tprog st2 /\ match_states st1 st2. Proof. - intros. inversion H. + intros. inv H. exploit function_ptr_translated; eauto. intros [tf [FIND TR]]. - exists (LTL.Callstate nil tf nil m0); split. + exploit sig_function_translated; eauto. intros SIG. + exists (LTL.Callstate nil tf (Locmap.init Vundef) m0); split. econstructor; eauto. eapply Genv.init_mem_transf_partial; eauto. rewrite symbols_preserved. rewrite (transform_partial_program_main _ _ TRANSF). auto. - rewrite <- H3. apply sig_function_translated; auto. - constructor; auto. constructor. + congruence. + constructor; auto. + constructor. rewrite SIG; rewrite H3; auto. + rewrite SIG; rewrite H3; simpl; auto. + red; auto. + apply Mem.extends_refl. + rewrite SIG; rewrite H3; simpl; auto. Qed. -Lemma transf_final_states: +Lemma final_states_simulation: forall st1 st2 r, match_states st1 st2 -> RTL.final_state st1 r -> LTL.final_state st2 r. Proof. - intros. inv H0. inv H. inv H4. econstructor. + intros. inv H0. inv H. inv STACKS. + econstructor. simpl; reflexivity. + unfold loc_result in RES; rewrite H in RES. simpl in RES. inv RES. inv H3; auto. Qed. Theorem transf_program_correct: forward_simulation (RTL.semantics prog) (LTL.semantics tprog). Proof. - eapply forward_simulation_step. + eapply forward_simulation_plus. eexact symbols_preserved. - eexact transf_initial_states. - eexact transf_final_states. - exact transl_step_correct. + eexact initial_states_simulation. + eexact final_states_simulation. + exact step_simulation. Qed. End PRESERVATION. diff --git a/backend/Alloctyping.v b/backend/Alloctyping.v deleted file mode 100644 index 59bf621..0000000 --- a/backend/Alloctyping.v +++ /dev/null @@ -1,205 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Preservation of typing during register allocation. *) - -Require Import Coqlib. -Require Import Maps. -Require Import Errors. -Require Import AST. -Require Import Op. -Require Import Registers. -Require Import RTL. -Require Import Liveness. -Require Import Locations. -Require Import LTL. -Require Import Coloring. -Require Import Coloringproof. -Require Import Allocation. -Require Import Allocproof. -Require Import RTLtyping. -Require Import LTLtyping. -Require Import Conventions. - -(** This file proves that register allocation (the translation from - RTL to LTL defined in file [Allocation]) preserves typing: - given a well-typed RTL input, it produces LTL code that is - well-typed. *) - -Section TYPING_FUNCTION. - -Variable f: RTL.function. -Variable env: regenv. -Variable live: PMap.t Regset.t. -Variable alloc: reg -> loc. -Variable tf: LTL.function. - -Hypothesis TYPE_RTL: type_function f = OK env. -Hypothesis LIVE: analyze f = Some live. -Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc. -Hypothesis TRANSL: transf_function f = OK tf. - -Lemma wt_rtl_function: RTLtyping.wt_function f env. -Proof. - apply type_function_correct; auto. -Qed. - -Lemma alloc_type: forall r, Loc.type (alloc r) = env r. -Proof. - intro. eapply regalloc_preserves_types; eauto. -Qed. - -Lemma alloc_types: - forall rl, List.map Loc.type (List.map alloc rl) = List.map env rl. -Proof. - intros. rewrite list_map_compose. apply list_map_exten. - intros. symmetry. apply alloc_type. -Qed. - -Lemma alloc_acceptable: - forall r, loc_acceptable (alloc r). -Proof. - intros. eapply regalloc_acceptable; eauto. -Qed. - -Lemma allocs_acceptable: - forall rl, locs_acceptable (List.map alloc rl). -Proof. - intros. eapply regsalloc_acceptable; eauto. -Qed. - -Remark transf_unroll: - tf = transf_fun f live alloc. -Proof. - generalize TRANSL. unfold transf_function. - rewrite TYPE_RTL. rewrite LIVE. rewrite ALLOC. congruence. -Qed. - -Lemma valid_successor_transf: - forall s, - RTLtyping.valid_successor f s -> - LTLtyping.valid_successor tf s. -Proof. - unfold RTLtyping.valid_successor, LTLtyping.valid_successor. - intros s [i AT]. - rewrite transf_unroll; simpl. rewrite PTree.gmap. - rewrite AT. exists (transf_instr f live alloc s i). auto. -Qed. - -Hint Resolve alloc_acceptable allocs_acceptable: allocty. -Hint Rewrite alloc_type alloc_types: allocty. -Hint Resolve valid_successor_transf: allocty. - -(** * Type preservation during translation from RTL to LTL *) - -Ltac WT := - constructor; auto with allocty; autorewrite with allocty; auto. - -Lemma wt_transf_instr: - forall pc instr, - RTLtyping.wt_instr env f instr -> - f.(RTL.fn_code)!pc = Some instr -> - wt_instr tf (transf_instr f live alloc pc instr). -Proof. - intros. inv H; simpl. - (* nop *) - WT. - (* move *) - destruct (Regset.mem r live!!pc). - destruct (is_redundant_move Omove (r1 :: nil) r alloc); WT. - WT. - (* other ops *) - destruct (Regset.mem res live!!pc). - destruct (is_redundant_move op args res alloc); WT. - WT. - (* load *) - destruct (Regset.mem dst live!!pc); WT. - (* store *) - WT. - (* call *) - exploit regalloc_correct_1; eauto. unfold correct_alloc_instr. - intros [A1 [A2 A3]]. - WT. - destruct ros; simpl; auto. - split. autorewrite with allocty; auto. - split. auto with allocty. auto. - (* tailcall *) - exploit regalloc_correct_1; eauto. unfold correct_alloc_instr. - intro A1. - WT. - destruct ros; simpl; auto. - split. autorewrite with allocty; auto. - split. auto with allocty. auto. - rewrite transf_unroll; auto. - (* builtin *) - WT. - (* cond *) - WT. - (* jumptable *) - WT. - (* return *) - WT. - rewrite transf_unroll; simpl. - destruct optres; simpl. autorewrite with allocty. auto. auto. - destruct optres; simpl; auto with allocty. -Qed. - -End TYPING_FUNCTION. - -Lemma wt_transf_function: - forall f tf, - transf_function f = OK tf -> wt_function tf. -Proof. - intros. generalize H; unfold transf_function. - caseEq (type_function f). intros env TYP. - caseEq (analyze f). intros live ANL. - change (transfer f (RTL.fn_entrypoint f) - live!!(RTL.fn_entrypoint f)) - with (live0 f live). - caseEq (regalloc f live (live0 f live) env). - intros alloc ALLOC. - intro EQ; injection EQ; intro. - assert (RTLtyping.wt_function f env). apply type_function_correct; auto. - inversion H1. - constructor; rewrite <- H0; simpl. - rewrite (alloc_types _ _ _ _ ALLOC). auto. - eapply regsalloc_acceptable; eauto. - eapply regalloc_norepet_norepet; eauto. - eapply regalloc_correct_2; eauto. - intros until instr. rewrite PTree.gmap. - caseEq (RTL.fn_code f)!pc; simpl; intros. - inversion H3. eapply wt_transf_instr; eauto. congruence. discriminate. - eapply valid_successor_transf; eauto. congruence. - congruence. congruence. congruence. -Qed. - -Lemma wt_transf_fundef: - forall f tf, - transf_fundef f = OK tf -> wt_fundef tf. -Proof. - intros until tf; destruct f; simpl. - caseEq (transf_function f); simpl. intros g TF EQ. inversion EQ. - constructor. eapply wt_transf_function; eauto. - congruence. - intros. inversion H. constructor. -Qed. - -Lemma program_typing_preserved: - forall (p: RTL.program) (tp: LTL.program), - transf_program p = OK tp -> - LTLtyping.wt_program tp. -Proof. - intros; red; intros. - generalize (transform_partial_program_function transf_fundef p i f H H0). - intros [f0 [IN TRANSF]]. - apply wt_transf_fundef with f0; auto. -Qed. diff --git a/backend/Asmgenproof0.v b/backend/Asmgenproof0.v index 72de80a..f74fba8 100644 --- a/backend/Asmgenproof0.v +++ b/backend/Asmgenproof0.v @@ -80,29 +80,6 @@ Qed. Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen. -Lemma nontemp_diff: - forall r r', - nontemp_preg r = true -> nontemp_preg r' = false -> r <> r'. -Proof. - congruence. -Qed. -Hint Resolve nontemp_diff: asmgen. - -Lemma temporaries_temp_preg: - forall r, In r temporary_regs -> nontemp_preg (preg_of r) = false. -Proof. - assert (List.forallb (fun r => negb (nontemp_preg (preg_of r))) temporary_regs = true) by reflexivity. - rewrite List.forallb_forall in H. intros. generalize (H r H0). - destruct (nontemp_preg (preg_of r)); simpl; congruence. -Qed. - -Lemma nontemp_data_preg: - forall r, nontemp_preg r = true -> data_preg r = true. -Proof. - destruct r; try (destruct i); try (destruct f); simpl; congruence. -Qed. -Hint Resolve nontemp_data_preg: asmgen. - Lemma nextinstr_pc: forall rs, (nextinstr rs)#PC = Val.add rs#PC Vone. Proof. @@ -121,12 +98,6 @@ Proof. intros. apply nextinstr_inv. red; intro; subst; discriminate. Qed. -Lemma nextinstr_inv2: - forall r rs, nontemp_preg r = true -> (nextinstr rs)#r = rs#r. -Proof. - intros. apply nextinstr_inv1; auto with asmgen. -Qed. - Lemma nextinstr_set_preg: forall rs m v, (nextinstr (rs#(preg_of m) <- v))#PC = Val.add rs#PC Vone. @@ -135,6 +106,58 @@ Proof. rewrite Pregmap.gso. auto. apply sym_not_eq. apply preg_of_not_PC. Qed. +Lemma undef_regs_other: + forall r rl rs, + (forall r', In r' rl -> r <> r') -> + undef_regs rl rs r = rs r. +Proof. + induction rl; simpl; intros. auto. + rewrite IHrl by auto. rewrite Pregmap.gso; auto. +Qed. + +Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop := + match rl with + | nil => True + | r1 :: nil => r <> preg_of r1 + | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl + end. + +Remark preg_notin_charact: + forall r rl, + preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr). +Proof. + induction rl; simpl; intros. + tauto. + destruct rl. + simpl. split. intros. intuition congruence. auto. + rewrite IHrl. split. + intros [A B]. intros. destruct H. congruence. auto. + auto. +Qed. + +Lemma undef_regs_other_2: + forall r rl rs, + preg_notin r rl -> + undef_regs (map preg_of rl) rs r = rs r. +Proof. + intros. apply undef_regs_other. intros. + exploit list_in_map_inv; eauto. intros [mr [A B]]. subst. + rewrite preg_notin_charact in H. auto. +Qed. + +Lemma set_pregs_other_2: + forall r rl vl rs, + preg_notin r rl -> + set_regs (map preg_of rl) vl rs r = rs r. +Proof. + induction rl; simpl; intros. + auto. + destruct vl; auto. + assert (r <> preg_of a) by (destruct rl; tauto). + assert (preg_notin r rl) by (destruct rl; simpl; tauto). + rewrite IHrl by auto. apply Pregmap.gso; auto. +Qed. + (** * Agreement between Mach registers and processor registers *) Record agree (ms: Mach.regset) (sp: val) (rs: Asm.regset) : Prop := mkagree { @@ -224,7 +247,21 @@ Proof. intros. unfold nextinstr. apply agree_set_other. auto. auto. Qed. -Lemma agree_undef_regs: +Lemma agree_set_mregs: + forall sp rl vl vl' ms rs, + agree ms sp rs -> + Val.lessdef_list vl vl' -> + agree (Mach.set_regs rl vl ms) sp (set_regs (map preg_of rl) vl' rs). +Proof. + induction rl; simpl; intros. + auto. + inv H0. auto. apply IHrl; auto. + eapply agree_set_mreg. eexact H. + rewrite Pregmap.gss. auto. + intros. apply Pregmap.gso; auto. +Qed. + +Lemma agree_undef_nondata_regs: forall ms sp rl rs, agree ms sp rs -> (forall r, In r rl -> data_preg r = false) -> @@ -237,31 +274,33 @@ Proof. intros. apply H0; auto. Qed. -Lemma agree_exten_temps: - forall ms sp rs rs', +Lemma agree_undef_regs: + forall ms sp rl rs rs', agree ms sp rs -> - (forall r, nontemp_preg r = true -> rs'#r = rs#r) -> - agree (undef_temps ms) sp rs'. + (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Mach.undef_regs rl ms) sp rs'. Proof. intros. destruct H. split; auto. - rewrite H0; auto. auto. - intros. unfold undef_temps. - destruct (In_dec mreg_eq r temporary_regs). - rewrite Mach.undef_regs_same; auto. - rewrite Mach.undef_regs_other; auto. rewrite H0; auto. - simpl in n. destruct r; auto; intuition. + rewrite <- agree_sp0. apply H0; auto. + rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP. + intros. destruct (In_dec mreg_eq r rl). + rewrite Mach.undef_regs_same; auto. + rewrite Mach.undef_regs_other; auto. rewrite H0; auto. + apply preg_of_data. + rewrite preg_notin_charact. intros; red; intros. elim n. + exploit preg_of_injective; eauto. congruence. Qed. Lemma agree_set_undef_mreg: - forall ms sp rs r v rs', + forall ms sp rs r v rl rs', agree ms sp rs -> Val.lessdef v (rs'#(preg_of r)) -> - (forall r', nontemp_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') -> - agree (Regmap.set r v (undef_temps ms)) sp rs'. + (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Regmap.set r v (Mach.undef_regs rl ms)) sp rs'. Proof. intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto. - eapply agree_exten_temps; eauto. - intros. unfold Pregmap.set. destruct (PregEq.eq r0 (preg_of r)). + apply agree_undef_regs with rs; auto. + intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)). congruence. auto. intros. rewrite Pregmap.gso; auto. Qed. @@ -291,9 +330,7 @@ Proof. exploit Mem.loadv_extends; eauto. intros [v' [A B]]. rewrite (sp_val _ _ _ H) in A. exists v'; split; auto. - destruct ty; econstructor. - reflexivity. assumption. - reflexivity. assumption. + econstructor. eauto. assumption. Qed. Lemma extcall_args_match: @@ -667,6 +704,7 @@ Ltac TailNoLabel := match goal with | [ |- tail_nolabel _ (_ :: _) ] => apply tail_nolabel_cons; [auto; exact I | TailNoLabel] | [ H: Error _ = OK _ |- _ ] => discriminate + | [ H: assertion_failed = OK _ |- _ ] => discriminate | [ H: OK _ = OK _ |- _ ] => inv H; TailNoLabel | [ H: bind _ _ = OK _ |- _ ] => monadInv H; TailNoLabel | [ H: (if ?x then _ else _) = OK _ |- _ ] => destruct x; TailNoLabel diff --git a/backend/Bounds.v b/backend/Bounds.v index ef78b2e..bcd2848 100644 --- a/backend/Bounds.v +++ b/backend/Bounds.v @@ -17,7 +17,6 @@ Require Import AST. Require Import Op. Require Import Locations. Require Import Linear. -Require Import Lineartyping. Require Import Conventions. (** * Resource bounds for a function *) @@ -30,14 +29,12 @@ Require Import Conventions. the activation record. *) Record bounds : Type := mkbounds { - bound_int_local: Z; - bound_float_local: Z; + bound_local: Z; bound_int_callee_save: Z; bound_float_callee_save: Z; bound_outgoing: Z; bound_stack_data: Z; - bound_int_local_pos: bound_int_local >= 0; - bound_float_local_pos: bound_float_local >= 0; + bound_local_pos: bound_local >= 0; bound_int_callee_save_pos: bound_int_callee_save >= 0; bound_float_callee_save_pos: bound_float_callee_save >= 0; bound_outgoing_pos: bound_outgoing >= 0; @@ -47,41 +44,39 @@ Record bounds : Type := mkbounds { (** The following predicates define the correctness of a set of bounds for the code of a function. *) -Section BELOW. +Section WITHIN_BOUNDS. -Variable funct: function. Variable b: bounds. Definition mreg_within_bounds (r: mreg) := - match mreg_type r with - | Tint => index_int_callee_save r < bound_int_callee_save b - | Tfloat => index_float_callee_save r < bound_float_callee_save b - end. - -Definition slot_within_bounds (s: slot) := - match s with - | Local ofs Tint => 0 <= ofs < bound_int_local b - | Local ofs Tfloat => 0 <= ofs < bound_float_local b - | Outgoing ofs ty => 0 <= ofs /\ ofs + typesize ty <= bound_outgoing b - | Incoming ofs ty => In (S s) (loc_parameters funct.(fn_sig)) + index_int_callee_save r < bound_int_callee_save b + /\ index_float_callee_save r < bound_float_callee_save b. + +Definition slot_within_bounds (sl: slot) (ofs: Z) (ty: typ) := + match sl with + | Local => ofs + typesize ty <= bound_local b + | Outgoing => ofs + typesize ty <= bound_outgoing b + | Incoming => True end. Definition instr_within_bounds (i: instruction) := match i with - | Lgetstack s r => slot_within_bounds s /\ mreg_within_bounds r - | Lsetstack r s => slot_within_bounds s + | Lgetstack sl ofs ty r => slot_within_bounds sl ofs ty /\ mreg_within_bounds r + | Lsetstack r sl ofs ty => slot_within_bounds sl ofs ty | Lop op args res => mreg_within_bounds res | Lload chunk addr args dst => mreg_within_bounds dst | Lcall sig ros => size_arguments sig <= bound_outgoing b - | Lbuiltin ef args res => mreg_within_bounds res - | Lannot ef args => forall s, In (S s) args -> slot_within_bounds s + | Lbuiltin ef args res => + forall r, In r res \/ In r (destroyed_by_builtin ef) -> mreg_within_bounds r + | Lannot ef args => + forall sl ofs ty, In (S sl ofs ty) args -> slot_within_bounds sl ofs ty | _ => True end. -End BELOW. +End WITHIN_BOUNDS. Definition function_within_bounds (f: function) (b: bounds) : Prop := - forall instr, In instr f.(fn_code) -> instr_within_bounds f b instr. + forall instr, In instr f.(fn_code) -> instr_within_bounds b instr. (** * Inference of resource bounds for a function *) @@ -99,14 +94,14 @@ Variable f: function. Definition regs_of_instr (i: instruction) : list mreg := match i with - | Lgetstack s r => r :: nil - | Lsetstack r s => r :: nil + | Lgetstack sl ofs ty r => r :: nil + | Lsetstack r sl ofs ty => r :: nil | Lop op args res => res :: nil | Lload chunk addr args dst => dst :: nil | Lstore chunk addr args src => nil | Lcall sig ros => nil | Ltailcall sig ros => nil - | Lbuiltin ef args res => res :: nil + | Lbuiltin ef args res => res ++ destroyed_by_builtin ef | Lannot ef args => nil | Llabel lbl => nil | Lgoto lbl => nil @@ -115,58 +110,55 @@ Definition regs_of_instr (i: instruction) : list mreg := | Lreturn => nil end. -Fixpoint slots_of_locs (l: list loc) : list slot := +Fixpoint slots_of_locs (l: list loc) : list (slot * Z * typ) := match l with | nil => nil - | S s :: l' => s :: slots_of_locs l' + | S sl ofs ty :: l' => (sl, ofs, ty) :: slots_of_locs l' | R r :: l' => slots_of_locs l' end. -Definition slots_of_instr (i: instruction) : list slot := +Definition slots_of_instr (i: instruction) : list (slot * Z * typ) := match i with - | Lgetstack s r => s :: nil - | Lsetstack r s => s :: nil + | Lgetstack sl ofs ty r => (sl, ofs, ty) :: nil + | Lsetstack r sl ofs ty => (sl, ofs, ty) :: nil | Lannot ef args => slots_of_locs args | _ => nil end. -Definition max_over_list (A: Type) (valu: A -> Z) (l: list A) : Z := +Definition max_over_list {A: Type} (valu: A -> Z) (l: list A) : Z := List.fold_left (fun m l => Zmax m (valu l)) l 0. Definition max_over_instrs (valu: instruction -> Z) : Z := - max_over_list instruction valu f.(fn_code). + max_over_list valu f.(fn_code). Definition max_over_regs_of_instr (valu: mreg -> Z) (i: instruction) : Z := - max_over_list mreg valu (regs_of_instr i). + max_over_list valu (regs_of_instr i). -Definition max_over_slots_of_instr (valu: slot -> Z) (i: instruction) : Z := - max_over_list slot valu (slots_of_instr i). +Definition max_over_slots_of_instr (valu: slot * Z * typ -> Z) (i: instruction) : Z := + max_over_list valu (slots_of_instr i). Definition max_over_regs_of_funct (valu: mreg -> Z) : Z := max_over_instrs (max_over_regs_of_instr valu). -Definition max_over_slots_of_funct (valu: slot -> Z) : Z := +Definition max_over_slots_of_funct (valu: slot * Z * typ -> Z) : Z := max_over_instrs (max_over_slots_of_instr valu). Definition int_callee_save (r: mreg) := 1 + index_int_callee_save r. Definition float_callee_save (r: mreg) := 1 + index_float_callee_save r. -Definition int_local (s: slot) := - match s with Local ofs Tint => 1 + ofs | _ => 0 end. - -Definition float_local (s: slot) := - match s with Local ofs Tfloat => 1 + ofs | _ => 0 end. +Definition local_slot (s: slot * Z * typ) := + match s with (Local, ofs, ty) => ofs + typesize ty | _ => 0 end. -Definition outgoing_slot (s: slot) := - match s with Outgoing ofs ty => ofs + typesize ty | _ => 0 end. +Definition outgoing_slot (s: slot * Z * typ) := + match s with (Outgoing, ofs, ty) => ofs + typesize ty | _ => 0 end. Definition outgoing_space (i: instruction) := match i with Lcall sig _ => size_arguments sig | _ => 0 end. Lemma max_over_list_pos: forall (A: Type) (valu: A -> Z) (l: list A), - max_over_list A valu l >= 0. + max_over_list valu l >= 0. Proof. intros until valu. unfold max_over_list. assert (forall l z, fold_left (fun x y => Zmax x (valu y)) l z >= z). @@ -176,7 +168,7 @@ Proof. Qed. Lemma max_over_slots_of_funct_pos: - forall (valu: slot -> Z), max_over_slots_of_funct valu >= 0. + forall (valu: slot * Z * typ -> Z), max_over_slots_of_funct valu >= 0. Proof. intros. unfold max_over_slots_of_funct. unfold max_over_instrs. apply max_over_list_pos. @@ -188,18 +180,16 @@ Proof. intros. unfold max_over_regs_of_funct. unfold max_over_instrs. apply max_over_list_pos. Qed. - + Program Definition function_bounds := mkbounds - (max_over_slots_of_funct int_local) - (max_over_slots_of_funct float_local) + (max_over_slots_of_funct local_slot) (max_over_regs_of_funct int_callee_save) (max_over_regs_of_funct float_callee_save) (Zmax (max_over_instrs outgoing_space) (max_over_slots_of_funct outgoing_slot)) (Zmax f.(fn_stacksize) 0) - (max_over_slots_of_funct_pos int_local) - (max_over_slots_of_funct_pos float_local) + (max_over_slots_of_funct_pos local_slot) (max_over_regs_of_funct_pos int_callee_save) (max_over_regs_of_funct_pos float_callee_save) _ _. @@ -215,7 +205,7 @@ Qed. Lemma max_over_list_bound: forall (A: Type) (valu: A -> Z) (l: list A) (x: A), - In x l -> valu x <= max_over_list A valu l. + In x l -> valu x <= max_over_list valu l. Proof. intros until x. unfold max_over_list. assert (forall c z, @@ -250,7 +240,7 @@ Proof. Qed. Lemma max_over_slots_of_funct_bound: - forall (valu: slot -> Z) i s, + forall (valu: slot * Z * typ -> Z) i s, In i f.(fn_code) -> In s (slots_of_instr i) -> valu s <= max_over_slots_of_funct valu. Proof. @@ -282,34 +272,23 @@ Proof. eapply max_over_regs_of_funct_bound; eauto. Qed. -Lemma int_local_slot_bound: - forall i ofs, - In i f.(fn_code) -> In (Local ofs Tint) (slots_of_instr i) -> - ofs < bound_int_local function_bounds. -Proof. - intros. apply Zlt_le_trans with (int_local (Local ofs Tint)). - unfold int_local. omega. - unfold function_bounds, bound_int_local. - eapply max_over_slots_of_funct_bound; eauto. -Qed. - -Lemma float_local_slot_bound: - forall i ofs, - In i f.(fn_code) -> In (Local ofs Tfloat) (slots_of_instr i) -> - ofs < bound_float_local function_bounds. +Lemma local_slot_bound: + forall i ofs ty, + In i f.(fn_code) -> In (Local, ofs, ty) (slots_of_instr i) -> + ofs + typesize ty <= bound_local function_bounds. Proof. - intros. apply Zlt_le_trans with (float_local (Local ofs Tfloat)). - unfold float_local. omega. - unfold function_bounds, bound_float_local. + intros. + unfold function_bounds, bound_local. + change (ofs + typesize ty) with (local_slot (Local, ofs, ty)). eapply max_over_slots_of_funct_bound; eauto. Qed. Lemma outgoing_slot_bound: forall i ofs ty, - In i f.(fn_code) -> In (Outgoing ofs ty) (slots_of_instr i) -> + In i f.(fn_code) -> In (Outgoing, ofs, ty) (slots_of_instr i) -> ofs + typesize ty <= bound_outgoing function_bounds. Proof. - intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing ofs ty)). + intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing, ofs, ty)). unfold function_bounds, bound_outgoing. apply Zmax_bound_r. eapply max_over_slots_of_funct_bound; eauto. Qed. @@ -332,28 +311,25 @@ Lemma mreg_is_within_bounds: forall r, In r (regs_of_instr i) -> mreg_within_bounds function_bounds r. Proof. - intros. unfold mreg_within_bounds. - case (mreg_type r). + intros. unfold mreg_within_bounds. split. eapply int_callee_save_bound; eauto. eapply float_callee_save_bound; eauto. Qed. Lemma slot_is_within_bounds: forall i, In i f.(fn_code) -> - forall s, In s (slots_of_instr i) -> Lineartyping.slot_valid f s -> - slot_within_bounds f function_bounds s. + forall sl ty ofs, In (sl, ofs, ty) (slots_of_instr i) -> + slot_within_bounds function_bounds sl ofs ty. Proof. intros. unfold slot_within_bounds. - destruct s. - destruct t. - split. exact H1. eapply int_local_slot_bound; eauto. - split. exact H1. eapply float_local_slot_bound; eauto. - exact H1. - split. simpl in H1. exact H1. eapply outgoing_slot_bound; eauto. + destruct sl. + eapply local_slot_bound; eauto. + auto. + eapply outgoing_slot_bound; eauto. Qed. Lemma slots_of_locs_charact: - forall s l, In s (slots_of_locs l) <-> In (S s) l. + forall sl ofs ty l, In (sl, ofs, ty) (slots_of_locs l) <-> In (S sl ofs ty) l. Proof. induction l; simpl; intros. tauto. @@ -366,27 +342,21 @@ Qed. Lemma instr_is_within_bounds: forall i, In i f.(fn_code) -> - Lineartyping.wt_instr f i -> - instr_within_bounds f function_bounds i. + instr_within_bounds function_bounds i. Proof. intros; destruct i; generalize (mreg_is_within_bounds _ H); generalize (slot_is_within_bounds _ H); simpl; intros; auto. -(* getstack *) - inv H0. split; auto. -(* setstack *) - inv H0; auto. (* call *) eapply size_arguments_bound; eauto. +(* builtin *) + apply H1. apply in_or_app; auto. (* annot *) - inv H0. apply H1. rewrite slots_of_locs_charact; auto. - generalize (H8 _ H3). unfold loc_acceptable, slot_valid. - destruct s; (contradiction || omega). + apply H0. rewrite slots_of_locs_charact; auto. Qed. Lemma function_is_within_bounds: - Lineartyping.wt_code f f.(fn_code) -> function_within_bounds f function_bounds. Proof. intros; red; intros. apply instr_is_within_bounds; auto. diff --git a/backend/CMtypecheck.ml b/backend/CMtypecheck.ml index 39e8c51..1a19f71 100644 --- a/backend/CMtypecheck.ml +++ b/backend/CMtypecheck.ml @@ -24,15 +24,16 @@ open Cminor exception Error of string -let name_of_typ = function Tint -> "int" | Tfloat -> "float" +let name_of_typ = function Tint -> "int" | Tfloat -> "float" | Tlong -> "long" type ty = Base of typ | Var of ty option ref let newvar () = Var (ref None) let tint = Base Tint let tfloat = Base Tfloat +let tlong = Base Tlong -let ty_of_typ = function Tint -> tint | Tfloat -> tfloat +let ty_of_typ = function Tint -> tint | Tfloat -> tfloat | Tlong -> tlong let ty_of_sig_args tyl = List.map ty_of_typ tyl @@ -81,6 +82,7 @@ let name_of_comparison = function let type_constant = function | Ointconst _ -> tint | Ofloatconst _ -> tfloat + | Olongconst _ -> tlong | Oaddrsymbol _ -> tint | Oaddrstack _ -> tint @@ -98,6 +100,15 @@ let type_unary_operation = function | Ointuoffloat -> tfloat, tint | Ofloatofint -> tint, tfloat | Ofloatofintu -> tint, tfloat + | Onegl -> tlong, tlong + | Onotl -> tlong, tlong + | Ointoflong -> tlong, tint + | Olongofint -> tint, tlong + | Olongofintu -> tint, tlong + | Olongoffloat -> tfloat, tlong + | Olonguoffloat -> tfloat, tlong + | Ofloatoflong -> tlong, tfloat + | Ofloatoflongu -> tlong, tfloat let type_binary_operation = function | Oadd -> tint, tint, tint @@ -117,52 +128,28 @@ let type_binary_operation = function | Osubf -> tfloat, tfloat, tfloat | Omulf -> tfloat, tfloat, tfloat | Odivf -> tfloat, tfloat, tfloat + | Oaddl -> tlong, tlong, tlong + | Osubl -> tlong, tlong, tlong + | Omull -> tlong, tlong, tlong + | Odivl -> tlong, tlong, tlong + | Odivlu -> tlong, tlong, tlong + | Omodl -> tlong, tlong, tlong + | Omodlu -> tlong, tlong, tlong + | Oandl -> tlong, tlong, tlong + | Oorl -> tlong, tlong, tlong + | Oxorl -> tlong, tlong, tlong + | Oshll -> tlong, tint, tlong + | Oshrl -> tlong, tint, tlong + | Oshrlu -> tlong, tint, tlong | Ocmp _ -> tint, tint, tint | Ocmpu _ -> tint, tint, tint | Ocmpf _ -> tfloat, tfloat, tint + | Ocmpl _ -> tlong, tlong, tint + | Ocmplu _ -> tlong, tlong, tint -let name_of_constant = function - | Ointconst n -> sprintf "intconst %ld" (camlint_of_coqint n) - | Ofloatconst n -> sprintf "floatconst %g" (camlfloat_of_coqfloat n) - | Oaddrsymbol (s, ofs) -> sprintf "addrsymbol %s %ld" (extern_atom s) (camlint_of_coqint ofs) - | Oaddrstack n -> sprintf "addrstack %ld" (camlint_of_coqint n) +let name_of_unary_operation = PrintCminor.name_of_unop -let name_of_unary_operation = function - | Ocast8signed -> "cast8signed" - | Ocast16signed -> "cast16signed" - | Ocast8unsigned -> "cast8unsigned" - | Ocast16unsigned -> "cast16unsigned" - | Onegint -> "negint" - | Onotint -> "notint" - | Onegf -> "negf" - | Oabsf -> "absf" - | Osingleoffloat -> "singleoffloat" - | Ointoffloat -> "intoffloat" - | Ointuoffloat -> "intuoffloat" - | Ofloatofint -> "floatofint" - | Ofloatofintu -> "floatofintu" - -let name_of_binary_operation = function - | Oadd -> "add" - | Osub -> "sub" - | Omul -> "mul" - | Odiv -> "div" - | Odivu -> "divu" - | Omod -> "mod" - | Omodu -> "modu" - | Oand -> "and" - | Oor -> "or" - | Oxor -> "xor" - | Oshl -> "shl" - | Oshr -> "shr" - | Oshru -> "shru" - | Oaddf -> "addf" - | Osubf -> "subf" - | Omulf -> "mulf" - | Odivf -> "divf" - | Ocmp c -> sprintf "cmp %s" (name_of_comparison c) - | Ocmpu c -> sprintf "cmpu %s" (name_of_comparison c) - | Ocmpf c -> sprintf "cmpf %s" (name_of_comparison c) +let name_of_binary_operation = PrintCminor.name_of_binop let type_chunk = function | Mint8signed -> tint @@ -170,19 +157,12 @@ let type_chunk = function | Mint16signed -> tint | Mint16unsigned -> tint | Mint32 -> tint + | Mint64 -> tlong | Mfloat32 -> tfloat | Mfloat64 -> tfloat | Mfloat64al32 -> tfloat -let name_of_chunk = function - | Mint8signed -> "int8signed" - | Mint8unsigned -> "int8unsigned" - | Mint16signed -> "int16signed" - | Mint16unsigned -> "int16unsigned" - | Mint32 -> "int32" - | Mfloat32 -> "float32" - | Mfloat64 -> "float64" - | Mfloat64al32 -> "float64al32" +let name_of_chunk = PrintAST.name_of_chunk let rec type_expr env lenv e = match e with diff --git a/backend/CSEproof.v b/backend/CSEproof.v index 8fc9407..1e269f8 100644 --- a/backend/CSEproof.v +++ b/backend/CSEproof.v @@ -996,12 +996,7 @@ Proof. rewrite <- RES. apply eval_operation_preserved. exact symbols_preserved. (* state matching *) econstructor; eauto. - apply wt_regset_assign; auto. - generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. - simpl in H0. inv H0. rewrite <- H3. apply WTREGS. - replace (tyenv res) with (snd (type_of_operation op)). - eapply type_of_operation_sound; eauto. - rewrite <- H6. reflexivity. + eapply wt_exec_Iop; eauto. eapply wt_instrs; eauto. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. eapply add_op_satisfiable; eauto. eapply wf_analyze; eauto. @@ -1028,8 +1023,7 @@ Proof. eapply exec_Iload; eauto. (* state matching *) econstructor; eauto. - generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. - apply wt_regset_assign. auto. rewrite H8. eapply type_of_chunk_correct; eauto. + eapply wt_exec_Iload; eauto. eapply wt_instrs; eauto. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. eapply add_load_satisfiable; eauto. eapply wf_analyze; eauto. diff --git a/backend/CleanupLabels.v b/backend/CleanupLabels.v index 8db871e..5eaa81e 100644 --- a/backend/CleanupLabels.v +++ b/backend/CleanupLabels.v @@ -16,15 +16,15 @@ control-flow graph. Many of these labels are never branched to, which can complicate further optimizations over linearized code. (There are no such optimizations yet.) In preparation for these - further optimizations, and to make the generated LTLin code + further optimizations, and to make the generated Linear code better-looking, the present pass removes labels that cannot be branched to. *) -Require Import Coqlib. -Require Import Ordered. Require Import FSets. Require FSetAVL. -Require Import LTLin. +Require Import Coqlib. +Require Import Ordered. +Require Import Linear. Module Labelset := FSetAVL.Make(OrderedPositive). @@ -63,7 +63,6 @@ Definition cleanup_labels (c: code) := Definition transf_function (f: function) : function := mkfunction (fn_sig f) - (fn_params f) (fn_stacksize f) (cleanup_labels (fn_code f)). diff --git a/backend/CleanupLabelsproof.v b/backend/CleanupLabelsproof.v index 70f0eb3..65ba61c 100644 --- a/backend/CleanupLabelsproof.v +++ b/backend/CleanupLabelsproof.v @@ -24,7 +24,7 @@ Require Import Globalenvs. Require Import Smallstep. Require Import Op. Require Import Locations. -Require Import LTLin. +Require Import Linear. Require Import CleanupLabels. Module LabelsetFacts := FSetFacts.Facts(Labelset). @@ -103,7 +103,7 @@ Proof. intros; red; intros; destruct i; simpl; auto. apply Labelset.add_2; auto. apply Labelset.add_2; auto. - revert H; induction l0; simpl. auto. intros; apply Labelset.add_2; auto. + revert H; induction l; simpl. auto. intros; apply Labelset.add_2; auto. Qed. Remark add_label_branched_to_contains: @@ -114,7 +114,7 @@ Proof. destruct i; simpl; intros; try contradiction. apply Labelset.add_1; auto. apply Labelset.add_1; auto. - revert H. induction l0; simpl; intros. + revert H. induction l; simpl; intros. contradiction. destruct H. apply Labelset.add_1; auto. apply Labelset.add_2; auto. Qed. @@ -200,11 +200,11 @@ Qed. Inductive match_stackframes: stackframe -> stackframe -> Prop := | match_stackframe_intro: - forall res f sp ls c, + forall f sp ls c, incl c f.(fn_code) -> match_stackframes - (Stackframe res f sp ls c) - (Stackframe res (transf_function f) sp ls + (Stackframe f sp ls c) + (Stackframe (transf_function f) sp ls (remove_unused_labels (labels_branched_to f.(fn_code)) c)). Inductive match_states: state -> state -> Prop := @@ -231,6 +231,14 @@ Definition measure (st: state) : nat := | _ => O end. +Lemma match_parent_locset: + forall s ts, + list_forall2 match_stackframes s ts -> + parent_locset ts = parent_locset s. +Proof. + induction 1; simpl. auto. inv H; auto. +Qed. + Theorem transf_step_correct: forall s1 t s2, step ge s1 t s2 -> forall s1' (MS: match_states s1 s1'), @@ -238,19 +246,27 @@ Theorem transf_step_correct: \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat. Proof. induction 1; intros; inv MS; try rewrite remove_unused_labels_cons. +(* Lgetstack *) + left; econstructor; split. + econstructor; eauto. + econstructor; eauto with coqlib. +(* Lsetstack *) + left; econstructor; split. + econstructor; eauto. + econstructor; eauto with coqlib. (* Lop *) left; econstructor; split. econstructor; eauto. instantiate (1 := v). rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. econstructor; eauto with coqlib. (* Lload *) - assert (eval_addressing tge sp addr (map rs args) = Some a). + assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a). rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. left; econstructor; split. econstructor; eauto. econstructor; eauto with coqlib. (* Lstore *) - assert (eval_addressing tge sp addr (map rs args) = Some a). + assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a). rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. left; econstructor; split. econstructor; eauto. @@ -262,13 +278,18 @@ Proof. econstructor; eauto. constructor; auto. constructor; eauto with coqlib. (* Ltailcall *) left; econstructor; split. - econstructor. eapply find_function_translated; eauto. + econstructor. erewrite match_parent_locset; eauto. eapply find_function_translated; eauto. symmetry; apply sig_function_translated. simpl. eauto. econstructor; eauto. (* Lbuiltin *) left; econstructor; split. - econstructor; eauto. eapply external_call_symbols_preserved; eauto. + econstructor; eauto. eapply external_call_symbols_preserved'; eauto. + exact symbols_preserved. exact varinfo_preserved. + econstructor; eauto with coqlib. +(* Lannot *) + left; econstructor; split. + econstructor; eauto. eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto with coqlib. (* Llabel *) @@ -285,7 +306,7 @@ Proof. econstructor; eauto. eapply find_label_incl; eauto. (* Lcond taken *) left; econstructor; split. - econstructor. auto. eapply find_label_translated; eauto. red; auto. + econstructor. auto. eauto. eapply find_label_translated; eauto. red; auto. econstructor; eauto. eapply find_label_incl; eauto. (* Lcond not taken *) left; econstructor; split. @@ -294,11 +315,12 @@ Proof. (* Ljumptable *) left; econstructor; split. econstructor. eauto. eauto. eapply find_label_translated; eauto. - red. eapply list_nth_z_in; eauto. + red. eapply list_nth_z_in; eauto. eauto. econstructor; eauto. eapply find_label_incl; eauto. (* Lreturn *) left; econstructor; split. econstructor; eauto. + erewrite <- match_parent_locset; eauto. econstructor; eauto with coqlib. (* internal function *) left; econstructor; split. @@ -306,7 +328,7 @@ Proof. econstructor; eauto with coqlib. (* external function *) left; econstructor; split. - econstructor; eauto. eapply external_call_symbols_preserved; eauto. + econstructor; eauto. eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto with coqlib. (* return *) @@ -333,11 +355,11 @@ Lemma transf_final_states: forall st1 st2 r, match_states st1 st2 -> final_state st1 r -> final_state st2 r. Proof. - intros. inv H0. inv H. inv H4. constructor. + intros. inv H0. inv H. inv H6. econstructor; eauto. Qed. Theorem transf_program_correct: - forward_simulation (LTLin.semantics prog) (LTLin.semantics tprog). + forward_simulation (Linear.semantics prog) (Linear.semantics tprog). Proof. eapply forward_simulation_opt. eexact symbols_preserved. diff --git a/backend/CleanupLabelstyping.v b/backend/CleanupLabelstyping.v deleted file mode 100644 index 11b516f..0000000 --- a/backend/CleanupLabelstyping.v +++ /dev/null @@ -1,59 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Type preservation for the CleanupLabels pass *) - -Require Import Coqlib. -Require Import Maps. -Require Import AST. -Require Import Op. -Require Import Locations. -Require Import LTLin. -Require Import CleanupLabels. -Require Import LTLintyping. - -Lemma in_remove_unused_labels: - forall bto i c, In i (remove_unused_labels bto c) -> In i c. -Proof. - unfold remove_unused_labels, remove_unused. induction c; simpl. - auto. - rewrite list_fold_right_eq. destruct a; simpl; intuition. - destruct (Labelset.mem l bto); simpl in H; intuition. -Qed. - -Lemma wt_transf_function: - forall f, - wt_function f -> - wt_function (transf_function f). -Proof. - intros. inv H. constructor; simpl; auto. - unfold cleanup_labels; red; intros. - apply wt_instrs. eapply in_remove_unused_labels; eauto. -Qed. - -Lemma wt_transf_fundef: - forall f, - wt_fundef f -> - wt_fundef (transf_fundef f). -Proof. - induction 1. constructor. constructor. apply wt_transf_function; auto. -Qed. - -Lemma program_typing_preserved: - forall p, - wt_program p -> - wt_program (transf_program p). -Proof. - intros; red; intros. - exploit transform_program_function; eauto. intros [f1 [A B]]. subst f. - apply wt_transf_fundef. eapply H; eauto. -Qed. diff --git a/backend/Cminor.v b/backend/Cminor.v index 3d177e4..3963e76 100644 --- a/backend/Cminor.v +++ b/backend/Cminor.v @@ -36,6 +36,7 @@ Require Import Switch. Inductive constant : Type := | Ointconst: int -> constant (**r integer constant *) | Ofloatconst: float -> constant (**r floating-point constant *) + | Olongconst: int64 -> constant (**r long integer constant *) | Oaddrsymbol: ident -> int -> constant (**r address of the symbol plus the offset *) | Oaddrstack: int -> constant. (**r stack pointer plus the given offset *) @@ -52,7 +53,16 @@ Inductive unary_operation : Type := | Ointoffloat: unary_operation (**r signed integer to float *) | Ointuoffloat: unary_operation (**r unsigned integer to float *) | Ofloatofint: unary_operation (**r float to signed integer *) - | Ofloatofintu: unary_operation. (**r float to unsigned integer *) + | Ofloatofintu: unary_operation (**r float to unsigned integer *) + | Onegl: unary_operation (**r long integer opposite *) + | Onotl: unary_operation (**r long bitwise complement *) + | Ointoflong: unary_operation (**r long to int *) + | Olongofint: unary_operation (**r signed int to long *) + | Olongofintu: unary_operation (**r unsigned int to long *) + | Olongoffloat: unary_operation (**r signed long to float *) + | Olonguoffloat: unary_operation (**r unsigned long to float *) + | Ofloatoflong: unary_operation (**r float to signed long *) + | Ofloatoflongu: unary_operation. (**r float to unsigned long *) Inductive binary_operation : Type := | Oadd: binary_operation (**r integer addition *) @@ -62,19 +72,34 @@ Inductive binary_operation : Type := | Odivu: binary_operation (**r integer unsigned division *) | Omod: binary_operation (**r integer signed modulus *) | Omodu: binary_operation (**r integer unsigned modulus *) - | Oand: binary_operation (**r bitwise ``and'' *) - | Oor: binary_operation (**r bitwise ``or'' *) - | Oxor: binary_operation (**r bitwise ``xor'' *) - | Oshl: binary_operation (**r left shift *) - | Oshr: binary_operation (**r right signed shift *) - | Oshru: binary_operation (**r right unsigned shift *) + | Oand: binary_operation (**r integer bitwise ``and'' *) + | Oor: binary_operation (**r integer bitwise ``or'' *) + | Oxor: binary_operation (**r integer bitwise ``xor'' *) + | Oshl: binary_operation (**r integer left shift *) + | Oshr: binary_operation (**r integer right signed shift *) + | Oshru: binary_operation (**r integer right unsigned shift *) | Oaddf: binary_operation (**r float addition *) | Osubf: binary_operation (**r float subtraction *) | Omulf: binary_operation (**r float multiplication *) | Odivf: binary_operation (**r float division *) + | Oaddl: binary_operation (**r long addition *) + | Osubl: binary_operation (**r long subtraction *) + | Omull: binary_operation (**r long multiplication *) + | Odivl: binary_operation (**r long signed division *) + | Odivlu: binary_operation (**r long unsigned division *) + | Omodl: binary_operation (**r long signed modulus *) + | Omodlu: binary_operation (**r long unsigned modulus *) + | Oandl: binary_operation (**r long bitwise ``and'' *) + | Oorl: binary_operation (**r long bitwise ``or'' *) + | Oxorl: binary_operation (**r long bitwise ``xor'' *) + | Oshll: binary_operation (**r long left shift *) + | Oshrl: binary_operation (**r long right signed shift *) + | Oshrlu: binary_operation (**r long right unsigned shift *) | Ocmp: comparison -> binary_operation (**r integer signed comparison *) | Ocmpu: comparison -> binary_operation (**r integer unsigned comparison *) - | Ocmpf: comparison -> binary_operation. (**r float comparison *) + | Ocmpf: comparison -> binary_operation (**r float comparison *) + | Ocmpl: comparison -> binary_operation (**r long signed comparison *) + | Ocmplu: comparison -> binary_operation. (**r long unsigned comparison *) (** Expressions include reading local variables, constants and arithmetic operations, reading store locations, and conditional @@ -214,6 +239,7 @@ Definition eval_constant (sp: val) (cst: constant) : option val := match cst with | Ointconst n => Some (Vint n) | Ofloatconst n => Some (Vfloat n) + | Olongconst n => Some (Vlong n) | Oaddrsymbol s ofs => Some(match Genv.find_symbol ge s with | None => Vundef @@ -236,6 +262,15 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val := | Ointuoffloat => Val.intuoffloat arg | Ofloatofint => Val.floatofint arg | Ofloatofintu => Val.floatofintu arg + | Onegl => Some (Val.negl arg) + | Onotl => Some (Val.notl arg) + | Ointoflong => Some (Val.loword arg) + | Olongofint => Some (Val.longofint arg) + | Olongofintu => Some (Val.longofintu arg) + | Olongoffloat => Val.longoffloat arg + | Olonguoffloat => Val.longuoffloat arg + | Ofloatoflong => Val.floatoflong arg + | Ofloatoflongu => Val.floatoflongu arg end. Definition eval_binop @@ -258,9 +293,24 @@ Definition eval_binop | Osubf => Some (Val.subf arg1 arg2) | Omulf => Some (Val.mulf arg1 arg2) | Odivf => Some (Val.divf arg1 arg2) + | Oaddl => Some (Val.addl arg1 arg2) + | Osubl => Some (Val.subl arg1 arg2) + | Omull => Some (Val.mull arg1 arg2) + | Odivl => Val.divls arg1 arg2 + | Odivlu => Val.divlu arg1 arg2 + | Omodl => Val.modls arg1 arg2 + | Omodlu => Val.modlu arg1 arg2 + | Oandl => Some (Val.andl arg1 arg2) + | Oorl => Some (Val.orl arg1 arg2) + | Oxorl => Some (Val.xorl arg1 arg2) + | Oshll => Some (Val.shll arg1 arg2) + | Oshrl => Some (Val.shrl arg1 arg2) + | Oshrlu => Some (Val.shrlu arg1 arg2) | Ocmp c => Some (Val.cmp c arg1 arg2) | Ocmpu c => Some (Val.cmpu (Mem.valid_pointer m) c arg1 arg2) | Ocmpf c => Some (Val.cmpf c arg1 arg2) + | Ocmpl c => Some (Val.cmpl c arg1 arg2) + | Ocmplu c => Some (Val.cmplu c arg1 arg2) end. (** Evaluation of an expression: [eval_expr ge sp e m a v] diff --git a/backend/CminorSel.v b/backend/CminorSel.v index b5a0d39..3538dda 100644 --- a/backend/CminorSel.v +++ b/backend/CminorSel.v @@ -43,6 +43,8 @@ Inductive expr : Type := | Econdition : condition -> exprlist -> expr -> expr -> expr | Elet : expr -> expr -> expr | Eletvar : nat -> expr + | Ebuiltin : external_function -> exprlist -> expr + | Eexternal : ident -> signature -> exprlist -> expr with exprlist : Type := | Enil: exprlist @@ -171,6 +173,17 @@ Inductive eval_expr: letenv -> expr -> val -> Prop := | eval_Eletvar: forall le n v, nth_error le n = Some v -> eval_expr le (Eletvar n) v + | eval_Ebuiltin: forall le ef al vl v, + eval_exprlist le al vl -> + external_call ef ge vl m E0 v m -> + eval_expr le (Ebuiltin ef al) v + | eval_Eexternal: forall le id sg al b ef vl v, + Genv.find_symbol ge id = Some b -> + Genv.find_funct_ptr ge b = Some (External ef) -> + ef_sig ef = sg -> + eval_exprlist le al vl -> + external_call ef ge vl m E0 v m -> + eval_expr le (Eexternal id sg al) v with eval_exprlist: letenv -> exprlist -> list val -> Prop := | eval_Enil: forall le, @@ -388,6 +401,8 @@ Fixpoint lift_expr (p: nat) (a: expr) {struct a}: expr := | Elet b c => Elet (lift_expr p b) (lift_expr (S p) c) | Eletvar n => if le_gt_dec p n then Eletvar (S n) else Eletvar n + | Ebuiltin ef bl => Ebuiltin ef (lift_exprlist p bl) + | Eexternal id sg bl => Eexternal id sg (lift_exprlist p bl) end with lift_exprlist (p: nat) (a: exprlist) {struct a}: exprlist := diff --git a/backend/Constprop.v b/backend/Constprop.v index f85405d..0575079 100644 --- a/backend/Constprop.v +++ b/backend/Constprop.v @@ -47,6 +47,7 @@ Module Approx <: SEMILATTICE_WITH_TOP. decide equality. apply Int.eq_dec. apply Float.eq_dec. + apply Int64.eq_dec. apply Int.eq_dec. apply ident_eq. apply Int.eq_dec. @@ -139,6 +140,10 @@ Fixpoint eval_load_init (chunk: memory_chunk) (pos: Z) (il: list init_data): app if zeq pos 0 then match chunk with Mint32 => I n | _ => Unknown end else eval_load_init chunk (pos - 4) il' + | Init_int64 n :: il' => + if zeq pos 0 + then match chunk with Mint64 => L n | _ => Unknown end + else eval_load_init chunk (pos - 8) il' | Init_float32 n :: il' => if zeq pos 0 then match chunk with diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v index 580d551..a6385f4 100644 --- a/backend/Constpropproof.v +++ b/backend/Constpropproof.v @@ -186,6 +186,11 @@ Proof. destruct chunk; simpl; auto. congruence. eapply IHil; eauto. omega. + (* Init_int64 *) + destruct H. destruct (zeq pos 0). subst. rewrite Zplus_0_r in H0. + destruct chunk; simpl; auto. + congruence. + eapply IHil; eauto. omega. (* Init_float32 *) destruct H. destruct (zeq pos 0). subst. rewrite Zplus_0_r in H0. destruct chunk; simpl; auto. destruct (propagate_float_constants tt); simpl; auto. diff --git a/backend/Conventions.v b/backend/Conventions.v index c11bf47..abfe4ee 100644 --- a/backend/Conventions.v +++ b/backend/Conventions.v @@ -22,110 +22,6 @@ Require Export Conventions1. [arch/abi/Conventions1.v]. This file adds various processor-independent definitions and lemmas. *) -(** * Acceptable locations for register allocation *) - -(** The following predicate describes the locations that can be assigned - to an RTL pseudo-register during register allocation: a non-temporary - machine register or a [Local] stack slot are acceptable. *) - -Definition loc_acceptable (l: loc) : Prop := - match l with - | R r => ~(In l temporaries) - | S (Local ofs ty) => ofs >= 0 - | S (Incoming _ _) => False - | S (Outgoing _ _) => False - end. - -Definition locs_acceptable (ll: list loc) : Prop := - forall l, In l ll -> loc_acceptable l. - -Lemma temporaries_not_acceptable: - forall l, loc_acceptable l -> Loc.notin l temporaries. -Proof. - unfold loc_acceptable; destruct l. - simpl. intuition congruence. - destruct s; try contradiction. - intro. simpl. tauto. -Qed. -Hint Resolve temporaries_not_acceptable: locs. - -Lemma locs_acceptable_disj_temporaries: - forall ll, locs_acceptable ll -> Loc.disjoint ll temporaries. -Proof. - intros. apply Loc.notin_disjoint. intros. - apply temporaries_not_acceptable. auto. -Qed. - -Lemma loc_acceptable_noteq_diff: - forall l1 l2, - loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2. -Proof. - unfold loc_acceptable, Loc.diff; destruct l1; destruct l2; - try (destruct s); try (destruct s0); intros; auto; try congruence. - case (zeq z z0); intro. - compare t t0; intro. - subst z0; subst t0; tauto. - tauto. tauto. - contradiction. contradiction. -Qed. - -Lemma loc_acceptable_notin_notin: - forall r ll, - loc_acceptable r -> - ~(In r ll) -> Loc.notin r ll. -Proof. - induction ll; simpl; intros. - auto. - split. apply loc_acceptable_noteq_diff. assumption. - apply sym_not_equal. tauto. - apply IHll. assumption. tauto. -Qed. - -(** * Additional properties of result and argument locations *) - -(** The result location is not a callee-save register. *) - -Lemma loc_result_not_callee_save: - forall (s: signature), - ~(In (loc_result s) int_callee_save_regs \/ In (loc_result s) float_callee_save_regs). -Proof. - intros. generalize (loc_result_caller_save s). - generalize (int_callee_save_not_destroyed (loc_result s)). - generalize (float_callee_save_not_destroyed (loc_result s)). - tauto. -Qed. - -(** Callee-save registers do not overlap with argument locations. *) - -Lemma arguments_not_preserved: - forall sig l, - Loc.notin l destroyed_at_call -> loc_acceptable l -> - Loc.notin l (loc_arguments sig). -Proof. - intros. destruct l; red in H0. - apply Loc.reg_notin. red; intros. - exploit Loc.notin_not_in; eauto. eapply arguments_caller_save; eauto. - destruct s; try contradiction. - unfold loc_arguments. apply loc_arguments_rec_notin_local. -Qed. - -(** There is no partial overlap between arguments and acceptable locations. *) - -Lemma no_overlap_arguments: - forall args sg, - locs_acceptable args -> - Loc.no_overlap args (loc_arguments sg). -Proof. - unfold Loc.no_overlap; intros. - generalize (H r H0). - generalize (loc_arguments_acceptable _ _ H1). - destruct s; destruct r; simpl. - intros. case (mreg_eq m0 m); intro. left; congruence. tauto. - intros. right; destruct s; auto. - intros. right. auto. - destruct s; try tauto. destruct s0; tauto. -Qed. - (** ** Location of function parameters *) (** A function finds the values of its parameter in the same locations @@ -135,86 +31,39 @@ Qed. Definition parameter_of_argument (l: loc) : loc := match l with - | S (Outgoing n ty) => S (Incoming n ty) + | S Outgoing n ty => S Incoming n ty | _ => l end. Definition loc_parameters (s: signature) := List.map parameter_of_argument (loc_arguments s). -Lemma loc_parameters_type: - forall sig, List.map Loc.type (loc_parameters sig) = sig.(sig_args). -Proof. - intros. unfold loc_parameters. - rewrite list_map_compose. - rewrite <- loc_arguments_type. - apply list_map_exten. - intros. destruct x; simpl. auto. - destruct s; reflexivity. -Qed. - -Lemma loc_parameters_length: - forall sg, List.length (loc_parameters sg) = List.length sg.(sig_args). -Proof. - intros. unfold loc_parameters. rewrite list_length_map. - apply loc_arguments_length. -Qed. - -Lemma loc_parameters_not_temporaries: - forall sig, Loc.disjoint (loc_parameters sig) temporaries. -Proof. - intro; red; intros. - unfold loc_parameters in H. - elim (list_in_map_inv _ _ _ H). intros y [EQ IN]. - generalize (loc_arguments_not_temporaries sig y x2 IN H0). - subst x1. destruct x2. - destruct y; simpl. auto. destruct s; auto. - byContradiction. generalize H0. simpl. NotOrEq. -Qed. - -Lemma no_overlap_parameters: - forall params sg, - locs_acceptable params -> - Loc.no_overlap (loc_parameters sg) params. -Proof. - unfold Loc.no_overlap; intros. - unfold loc_parameters in H0. - elim (list_in_map_inv _ _ _ H0). intros t [EQ IN]. - rewrite EQ. - generalize (loc_arguments_acceptable _ _ IN). - generalize (H s H1). - destruct s; destruct t; simpl. - intros. case (mreg_eq m0 m); intro. left; congruence. tauto. - intros. right; destruct s; simpl; auto. - intros; right; auto. - destruct s; try tauto. destruct s0; try tauto. - intros; simpl. tauto. -Qed. - Lemma incoming_slot_in_parameters: forall ofs ty sg, - In (S (Incoming ofs ty)) (loc_parameters sg) -> - In (S (Outgoing ofs ty)) (loc_arguments sg). + In (S Incoming ofs ty) (loc_parameters sg) -> + In (S Outgoing ofs ty) (loc_arguments sg). Proof. intros. unfold loc_parameters in H. - change (S (Incoming ofs ty)) with (parameter_of_argument (S (Outgoing ofs ty))) in H. + change (S Incoming ofs ty) with (parameter_of_argument (S Outgoing ofs ty)) in H. exploit list_in_map_inv. eexact H. intros [x [A B]]. simpl in A. exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable; intros. destruct x; simpl in A; try discriminate. - destruct s; try contradiction. + destruct sl; try contradiction. inv A. auto. Qed. - (** * Tail calls *) (** A tail-call is possible for a signature if the corresponding arguments are all passed in registers. *) +(** A tail-call is possible for a signature if the corresponding + arguments are all passed in registers. *) + Definition tailcall_possible (s: signature) : Prop := forall l, In l (loc_arguments s) -> - match l with R _ => True | S _ => False end. + match l with R _ => True | S _ _ _ => False end. (** Decide whether a tailcall is possible. *) @@ -223,7 +72,7 @@ Definition tailcall_is_possible (sg: signature) : bool := match l with | nil => true | R _ :: l' => tcisp l' - | S _ :: l' => false + | S _ _ _ :: l' => false end in tcisp (loc_arguments sg). @@ -237,31 +86,12 @@ Proof. destruct H0. subst l0. auto. apply IHl. auto. auto. discriminate. Qed. -(** * Counting temporaries *) - -(** Given a list [tys] of types representing arguments to an operator, - [arity_ok tys] returns [true] if there are enough temporaries to - reload all arguments into temporaries. *) - -Fixpoint arity_ok_rec (tys: list typ) (itmps ftmps: list mreg) - {struct tys} : bool := - match tys with - | nil => true - | Tint :: ts => - match itmps with - | nil => false - | it1 :: its => arity_ok_rec ts its ftmps - end - | Tfloat :: ts => - match ftmps with - | nil => false - | ft1 :: fts => arity_ok_rec ts itmps fts - end - end. - -Definition arity_ok (tys: list typ) := - arity_ok_rec tys int_temporaries float_temporaries. - - - - +Lemma zero_size_arguments_tailcall_possible: + forall sg, size_arguments sg = 0 -> tailcall_possible sg. +Proof. + intros; red; intros. exploit loc_arguments_acceptable; eauto. + unfold loc_argument_acceptable. + destruct l; intros. auto. destruct sl; try contradiction. destruct H1. + generalize (loc_arguments_bounded _ _ _ H0). + generalize (typesize_pos ty). omega. +Qed. diff --git a/backend/IRC.ml b/backend/IRC.ml new file mode 100644 index 0000000..573c3d7 --- /dev/null +++ b/backend/IRC.ml @@ -0,0 +1,894 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +open Printf +open Camlcoq +open Datatypes +open AST +open Registers +open Machregs +open Locations +open Conventions1 +open Conventions +open XTL + +(* Iterated Register Coalescing: George and Appel's graph coloring algorithm *) + +type var_stats = { + mutable cost: int; (* estimated cost of a spill *) + mutable usedefs: int (* number of uses and defs *) +} + +(* Representation of the interference graph. Each node of the graph + (i.e. each variable) is represented as follows. *) + +type node = + { ident: int; (*r unique identifier *) + typ: typ; (*r its type *) + var: var; (*r the XTL variable it comes from *) + regclass: int; (*r identifier of register class *) + mutable accesses: int; (*r number of defs and uses *) + mutable spillcost: float; (*r estimated cost of spilling *) + mutable adjlist: node list; (*r all nodes it interferes with *) + mutable degree: int; (*r number of adjacent nodes *) + mutable movelist: move list; (*r list of moves it is involved in *) + mutable extra_adj: node list; (*r extra interferences (see below) *) + mutable extra_pref: move list; (*r extra preferences (see below) *) + mutable alias: node option; (*r [Some n] if coalesced with [n] *) + mutable color: loc option; (*r chosen color *) + mutable nstate: nodestate; (*r in which set of nodes it is *) + mutable nprev: node; (*r for double linking *) + mutable nnext: node (*r for double linking *) + } + +(* These are the possible states for nodes. *) + +and nodestate = + | Colored + | Initial + | SimplifyWorklist + | FreezeWorklist + | SpillWorklist + | CoalescedNodes + | SelectStack + +(* Each move (i.e. wish to be put in the same location) is represented + as follows. *) + +and move = + { src: node; (*r source of the move *) + dst: node; (*r destination of the move *) + mutable mstate: movestate; (*r in which set of moves it is *) + mutable mprev: move; (*r for double linking *) + mutable mnext: move (*r for double linking *) + } + +(* These are the possible states for moves *) + +and movestate = + | CoalescedMoves + | ConstrainedMoves + | FrozenMoves + | WorklistMoves + | ActiveMoves + +(* Note on "precolored" nodes and how they are handled: + +The register allocator can express interferences and preferences between +any two values of type [var]: either pseudoregisters, to be colored by IRC, +or fixed, "precolored" locations. + +I and P between two pseudoregisters are recorded in the graph that IRC +modifies, via the [adjlist] and [movelist] fields. + +I and P between a pseudoregister and a machine register are also +recorded in the IRC graph, but only in the [adjlist] and [movelist] +fields of the pseudoregister. This is the special case described +in George and Appel's papers. + +I and P between a pseudoregister and a stack slot +are omitted from the IRC graph, as they contribute nothing to the +simplification and coalescing process. We record them in the +[extra_adj] and [extra_pref] fields, where they can be honored +after IRC elimination, when assigning a stack slot to a spilled variable. *) + +let name_of_loc = function + | R r -> + begin match Machregsaux.name_of_register r with + | None -> "fixed-reg" + | Some s -> s + end + | S (Local, ofs, ty) -> + sprintf "L%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs) + | S (Incoming, ofs, ty) -> + sprintf "I%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs) + | S (Outgoing, ofs, ty) -> + sprintf "O%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs) + +let name_of_node n = + match n.var with + | V(r, ty) -> sprintf "x%ld" (P.to_int32 r) + | L l -> name_of_loc l + +(* The algorithm manipulates partitions of the nodes and of the moves + according to their states, frequently moving a node or a move from + a state to another, and frequently enumerating all nodes or all moves + of a given state. To support these operations efficiently, + nodes or moves having the same state are put into imperative doubly-linked + lists, allowing for constant-time insertion and removal, and linear-time + scanning. We now define the operations over these doubly-linked lists. *) + +module DLinkNode = struct + type t = node + let make state = + let rec empty = + { ident = 0; typ = Tint; var = V(P.one, Tint); regclass = 0; + adjlist = []; degree = 0; accesses = 0; spillcost = 0.0; + movelist = []; extra_adj = []; extra_pref = []; + alias = None; color = None; + nstate = state; nprev = empty; nnext = empty } + in empty + let dummy = make Colored + let clear dl = dl.nnext <- dl; dl.nprev <- dl + let notempty dl = dl.nnext != dl + let insert n dl = + n.nstate <- dl.nstate; + n.nnext <- dl.nnext; n.nprev <- dl; + dl.nnext.nprev <- n; dl.nnext <- n + let remove n dl = + assert (n.nstate = dl.nstate); + n.nnext.nprev <- n.nprev; n.nprev.nnext <- n.nnext + let move n dl1 dl2 = + remove n dl1; insert n dl2 + let pick dl = + let n = dl.nnext in remove n dl; n + let iter f dl = + let rec iter n = if n != dl then (f n; iter n.nnext) + in iter dl.nnext + let fold f dl accu = + let rec fold n accu = if n == dl then accu else fold n.nnext (f n accu) + in fold dl.nnext accu +end + +module DLinkMove = struct + type t = move + let make state = + let rec empty = + { src = DLinkNode.dummy; dst = DLinkNode.dummy; + mstate = state; mprev = empty; mnext = empty } + in empty + let dummy = make CoalescedMoves + let clear dl = dl.mnext <- dl; dl.mprev <- dl + let notempty dl = dl.mnext != dl + let insert m dl = + m.mstate <- dl.mstate; + m.mnext <- dl.mnext; m.mprev <- dl; + dl.mnext.mprev <- m; dl.mnext <- m + let remove m dl = + assert (m.mstate = dl.mstate); + m.mnext.mprev <- m.mprev; m.mprev.mnext <- m.mnext + let move m dl1 dl2 = + remove m dl1; insert m dl2 + let pick dl = + let m = dl.mnext in remove m dl; m + let iter f dl = + let rec iter m = if m != dl then (f m; iter m.mnext) + in iter dl.mnext + let fold f dl accu = + let rec fold m accu = if m == dl then accu else fold m.mnext (f m accu) + in fold dl.mnext accu +end + +(* Auxiliary data structures *) + +module IntSet = Set.Make(struct + type t = int + let compare (x:int) (y:int) = compare x y +end) + +module IntPairSet = Set.Make(struct + type t = int * int + let compare ((x1, y1): (int * int)) (x2, y2) = + if x1 < x2 then -1 else + if x1 > x2 then 1 else + if y1 < y2 then -1 else + if y1 > y2 then 1 else + 0 + end) + +(* The global state of the algorithm *) + +type graph = { + (* Machine registers available for allocation *) + caller_save_registers: mreg array array; + callee_save_registers: mreg array array; + num_available_registers: int array; + start_points: int array; + allocatable_registers: mreg list; + (* Costs for pseudo-registers *) + stats_of_reg: reg -> var_stats; + (* Mapping from XTL variables to nodes *) + varTable: (var, node) Hashtbl.t; + mutable nextIdent: int; + (* The adjacency set *) + mutable adjSet: IntPairSet.t; + (* Low-degree, non-move-related nodes *) + simplifyWorklist: DLinkNode.t; + (* Low-degree, move-related nodes *) + freezeWorklist: DLinkNode.t; + (* High-degree nodes *) + spillWorklist: DLinkNode.t; + (* Nodes that have been coalesced *) + coalescedNodes: DLinkNode.t; + (* Moves that have been coalesced *) + coalescedMoves: DLinkMove.t; + (* Moves whose source and destination interfere *) + constrainedMoves: DLinkMove.t; + (* Moves that will no longer be considered for coalescing *) + frozenMoves: DLinkMove.t; + (* Moves enabled for possible coalescing *) + worklistMoves: DLinkMove.t; + (* Moves not yet ready for coalescing *) + activeMoves: DLinkMove.t +} + +(* Register classes and reserved registers *) + +let num_register_classes = 2 + +let class_of_type = function Tint -> 0 | Tfloat -> 1 | Tlong -> assert false + +let reserved_registers = ref ([]: mreg list) + +let rec remove_reserved = function + | [] -> [] + | hd :: tl -> + if List.mem hd !reserved_registers + then remove_reserved tl + else hd :: remove_reserved tl + +(* Initialize and return an empty graph *) + +let init costs = + let int_caller_save = remove_reserved int_caller_save_regs + and float_caller_save = remove_reserved float_caller_save_regs + and int_callee_save = remove_reserved int_callee_save_regs + and float_callee_save = remove_reserved float_callee_save_regs in + { + caller_save_registers = + [| Array.of_list int_caller_save; Array.of_list float_caller_save |]; + callee_save_registers = + [| Array.of_list int_callee_save; Array.of_list float_callee_save |]; + num_available_registers = + [| List.length int_caller_save + List.length int_callee_save; + List.length float_caller_save + List.length float_callee_save |]; + start_points = + [| 0; 0 |]; + allocatable_registers = + int_caller_save @ int_callee_save @ float_caller_save @ float_callee_save; + stats_of_reg = costs; + varTable = Hashtbl.create 253; + nextIdent = 0; + adjSet = IntPairSet.empty; + simplifyWorklist = DLinkNode.make SimplifyWorklist; + freezeWorklist = DLinkNode.make FreezeWorklist; + spillWorklist = DLinkNode.make SpillWorklist; + coalescedNodes = DLinkNode.make CoalescedNodes; + coalescedMoves = DLinkMove.make CoalescedMoves; + constrainedMoves = DLinkMove.make ConstrainedMoves; + frozenMoves = DLinkMove.make FrozenMoves; + worklistMoves = DLinkMove.make WorklistMoves; + activeMoves = DLinkMove.make ActiveMoves + } + +(* Create nodes corresponding to XTL variables *) + +let weightedSpillCost st = + if st.cost < max_int + then float_of_int st.cost + else infinity + +let newNodeOfReg g r ty = + let st = g.stats_of_reg r in + g.nextIdent <- g.nextIdent + 1; + { ident = g.nextIdent; typ = ty; + var = V(r, ty); regclass = class_of_type ty; + accesses = st.usedefs; + spillcost = weightedSpillCost st; + adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = []; + alias = None; + color = None; + nstate = Initial; + nprev = DLinkNode.dummy; nnext = DLinkNode.dummy } + +let newNodeOfLoc g l = + let ty = Loc.coq_type l in + g.nextIdent <- g.nextIdent + 1; + { ident = g.nextIdent; typ = ty; + var = L l; regclass = class_of_type ty; + accesses = 0; spillcost = 0.0; + adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = []; + alias = None; + color = Some l; + nstate = Colored; + nprev = DLinkNode.dummy; nnext = DLinkNode.dummy } + +let nodeOfVar g v = + try + Hashtbl.find g.varTable v + with Not_found -> + let n = + match v with V(r, ty) -> newNodeOfReg g r ty | L l -> newNodeOfLoc g l in + Hashtbl.add g.varTable v n; + n + +(* Determine if two nodes interfere *) + +let interfere g n1 n2 = + let i1 = n1.ident and i2 = n2.ident in + let p = if i1 < i2 then (i1, i2) else (i2, i1) in + IntPairSet.mem p g.adjSet + +(* Add an edge to the graph. *) + +let recordInterf n1 n2 = + match n2.color with + | None | Some (R _) -> + if n1.regclass = n2.regclass then begin + n1.adjlist <- n2 :: n1.adjlist; + n1.degree <- 1 + n1.degree + end else begin + n1.extra_adj <- n2 :: n1.extra_adj + end + | Some (S _) -> + (*i printf "extra adj %s to %s\n" (name_of_node n1) (name_of_node n2); *) + n1.extra_adj <- n2 :: n1.extra_adj + +let addEdge g n1 n2 = + (*i printf "edge %s -- %s;\n" (name_of_node n1) (name_of_node n2);*) + assert (n1 != n2); + if not (interfere g n1 n2) then begin + let i1 = n1.ident and i2 = n2.ident in + let p = if i1 < i2 then (i1, i2) else (i2, i1) in + g.adjSet <- IntPairSet.add p g.adjSet; + if n1.nstate <> Colored then recordInterf n1 n2; + if n2.nstate <> Colored then recordInterf n2 n1 + end + +(* Add a move preference. *) + +let recordMove g n1 n2 = + let m = + { src = n1; dst = n2; mstate = WorklistMoves; + mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in + n1.movelist <- m :: n1.movelist; + n2.movelist <- m :: n2.movelist; + DLinkMove.insert m g.worklistMoves + +let recordExtraPref n1 n2 = + let m = + { src = n1; dst = n2; mstate = FrozenMoves; + mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in + n1.extra_pref <- m :: n1.extra_pref + +let addMovePref g n1 n2 = + assert (n1.regclass = n2.regclass); + match n1.color, n2.color with + | None, None -> + recordMove g n1 n2 + | Some (R mr1), None -> + if List.mem mr1 g.allocatable_registers then recordMove g n1 n2 + | None, Some (R mr2) -> + if List.mem mr2 g.allocatable_registers then recordMove g n1 n2 + | Some (S _), None -> + recordExtraPref n2 n1 + | None, Some (S _) -> + recordExtraPref n1 n2 + | _, _ -> + () + +(* Apply the given function to the relevant adjacent nodes of a node *) + +let iterAdjacent f n = + List.iter + (fun n -> + match n.nstate with + | SelectStack | CoalescedNodes -> () + | _ -> f n) + n.adjlist + +(* Determine the moves affecting a node *) + +let moveIsActiveOrWorklist m = + match m.mstate with + | ActiveMoves | WorklistMoves -> true + | _ -> false + +let nodeMoves n = + List.filter moveIsActiveOrWorklist n.movelist + +(* Determine whether a node is involved in a move *) + +let moveRelated n = + List.exists moveIsActiveOrWorklist n.movelist + +(* Initial partition of nodes into spill / freeze / simplify *) + +let initialNodePartition g = + let part_node v n = + match n.nstate with + | Initial -> + let k = g.num_available_registers.(n.regclass) in + if n.degree >= k then + DLinkNode.insert n g.spillWorklist + else if moveRelated n then + DLinkNode.insert n g.freezeWorklist + else + DLinkNode.insert n g.simplifyWorklist + | Colored -> () + | _ -> assert false in + Hashtbl.iter part_node g.varTable + + +(* Check invariants *) + +let degreeInvariant g n = + let c = ref 0 in + iterAdjacent (fun n -> incr c) n; + if !c <> n.degree then + failwith("degree invariant violated by " ^ name_of_node n) + +let simplifyWorklistInvariant g n = + if n.degree < g.num_available_registers.(n.regclass) + && not (moveRelated n) + then () + else failwith("simplify worklist invariant violated by " ^ name_of_node n) + +let freezeWorklistInvariant g n = + if n.degree < g.num_available_registers.(n.regclass) + && moveRelated n + then () + else failwith("freeze worklist invariant violated by " ^ name_of_node n) + +let spillWorklistInvariant g n = + if n.degree >= g.num_available_registers.(n.regclass) + then () + else failwith("spill worklist invariant violated by " ^ name_of_node n) + +let checkInvariants g = + DLinkNode.iter + (fun n -> degreeInvariant g n; simplifyWorklistInvariant g n) + g.simplifyWorklist; + DLinkNode.iter + (fun n -> degreeInvariant g n; freezeWorklistInvariant g n) + g.freezeWorklist; + DLinkNode.iter + (fun n -> degreeInvariant g n; spillWorklistInvariant g n) + g.spillWorklist + +(* Enable moves that have become low-degree related *) + +let enableMoves g n = + List.iter + (fun m -> + if m.mstate = ActiveMoves + then DLinkMove.move m g.activeMoves g.worklistMoves) + (nodeMoves n) + +(* Simulate the removal of a node from the graph *) + +let decrementDegree g n = + let k = g.num_available_registers.(n.regclass) in + let d = n.degree in + n.degree <- d - 1; + if d = k then begin + enableMoves g n; + iterAdjacent (enableMoves g) n; + if moveRelated n + then DLinkNode.move n g.spillWorklist g.freezeWorklist + else DLinkNode.move n g.spillWorklist g.simplifyWorklist + end + +(* Simulate the effect of combining nodes [n1] and [n3] on [n2], + where [n2] is a node adjacent to [n3]. *) + +let combineEdge g n1 n2 = + assert (n1 != n2); + if interfere g n1 n2 then begin + (* The two edges n2--n3 and n2--n1 become one, so degree of n2 decreases *) + decrementDegree g n2 + end else begin + (* Add new edge *) + let i1 = n1.ident and i2 = n2.ident in + let p = if i1 < i2 then (i1, i2) else (i2, i1) in + g.adjSet <- IntPairSet.add p g.adjSet; + if n1.nstate <> Colored then begin + n1.adjlist <- n2 :: n1.adjlist; + n1.degree <- 1 + n1.degree + end; + if n2.nstate <> Colored then begin + n2.adjlist <- n1 :: n2.adjlist; + (* n2's degree stays the same because the old edge n2--n3 disappears + and becomes the new edge n2--n1 *) + end + end + +(* Simplification of a low-degree node *) + +let simplify g = + let n = DLinkNode.pick g.simplifyWorklist in + (*i printf "Simplifying %s\n" (name_of_node n); *) + n.nstate <- SelectStack; + iterAdjacent (decrementDegree g) n; + n + +(* Briggs's conservative coalescing criterion. In the terminology of + Hailperin, "Comparing Conservative Coalescing Criteria", + TOPLAS 27(3) 2005, this is the full Briggs criterion, slightly + more powerful than the one in George and Appel's paper. *) + +let canCoalesceBriggs g u v = + let seen = ref IntSet.empty in + let k = g.num_available_registers.(u.regclass) in + let c = ref 0 in + let consider other n = + if not (IntSet.mem n.ident !seen) then begin + seen := IntSet.add n.ident !seen; + (* if n interferes with both u and v, its degree will decrease by one + after coalescing *) + let degree_after_coalescing = + if interfere g n other then n.degree - 1 else n.degree in + if degree_after_coalescing >= k || n.nstate = Colored then begin + incr c; + if !c >= k then raise Exit + end + end in + try + iterAdjacent (consider v) u; + iterAdjacent (consider u) v; + (*i printf " Briggs: OK for %s and %s\n" (name_of_node u) (name_of_node v); *) + true + with Exit -> + (*i printf " Briggs: no\n"; *) + false + +(* George's conservative coalescing criterion: all high-degree neighbors + of [v] are neighbors of [u]. *) + +let canCoalesceGeorge g u v = + let k = g.num_available_registers.(u.regclass) in + let isOK t = + if t.nstate = Colored then + if u.nstate = Colored || interfere g t u then () else raise Exit + else + if t.degree < k || interfere g t u then () else raise Exit + in + try + iterAdjacent isOK v; + (*i printf " George: OK for %s and %s\n" (name_of_node u) (name_of_node v); *) + true + with Exit -> + (*i printf " George: no\n"; *) + false + +(* The combined coalescing criterion. [u] can be precolored, but + [v] is not. According to George and Appel's paper: +- If [u] is precolored, use George's criterion. +- If [u] is not precolored, use Briggs's criterion. + + As noted by Hailperin, for non-precolored nodes, George's criterion + is incomparable with Briggs's: there are cases where G says yes + and B says no. Typically, [u] is a long-lived variable with many + interferences, and [v] is a short-lived temporary copy of [u] + that has no more interferences than [u]. Coalescing [u] and [v] + is "weakly safe" in Hailperin's terminology: [u] is no harder to color, + [u]'s neighbors are no harder to color either, but if we end up + spilling [u], we'll spill [v] as well. So, we restrict this heuristic + to [v] having a small number of uses. +*) + +let thresholdGeorge = 3 + +let canCoalesce g u v = + (*i printf "canCoalesce %s[%.2f] %s[%.2f]\n" + (name_of_node u) u.spillcost (name_of_node v) v.spillcost; *) + if u.nstate = Colored + then canCoalesceGeorge g u v + else canCoalesceBriggs g u v + || (u.spillcost < infinity && v.spillcost < infinity && + ((v.accesses <= thresholdGeorge && canCoalesceGeorge g u v) + || (u.accesses <= thresholdGeorge && canCoalesceGeorge g v u))) + +(* Update worklists after a move was processed *) + +let addWorkList g u = + if (not (u.nstate = Colored)) + && u.degree < g.num_available_registers.(u.regclass) + && (not (moveRelated u)) + then DLinkNode.move u g.freezeWorklist g.simplifyWorklist + +(* Return the canonical representative of a possibly coalesced node *) + +let rec getAlias n = + match n.alias with None -> n | Some n' -> getAlias n' + +(* Combine two nodes *) + +let combine g u v = + (*i printf "Combining %s and %s\n" (name_of_node u) (name_of_node v); *) + (*i if u.spillcost = infinity then + printf "Warning: combining unspillable %s\n" (name_of_node u); + if v.spillcost = infinity then + printf "Warning: combining unspillable %s\n" (name_of_node v);*) + if v.nstate = FreezeWorklist + then DLinkNode.move v g.freezeWorklist g.coalescedNodes + else DLinkNode.move v g.spillWorklist g.coalescedNodes; + v.alias <- Some u; + (* Precolored nodes often have big movelists, and if one of [u] and [v] + is precolored, it is []u. So, append [v.movelist] to [u.movelist] + instead of the other way around. *) + u.movelist <- List.rev_append v.movelist u.movelist; + u.spillcost <- u.spillcost +. v.spillcost; + iterAdjacent (combineEdge g u) v; (*r original code using [decrementDegree] is buggy *) + u.extra_adj <- u.extra_adj @ v.extra_adj; + u.extra_pref <- u.extra_pref @ v.extra_pref; + enableMoves g v; (*r added as per Appel's book erratum *) + if u.degree >= g.num_available_registers.(u.regclass) + && u.nstate = FreezeWorklist + then DLinkNode.move u g.freezeWorklist g.spillWorklist + +(* Attempt coalescing *) + +let coalesce g = + let m = DLinkMove.pick g.worklistMoves in + let x = getAlias m.src and y = getAlias m.dst in + let (u, v) = if y.nstate = Colored then (y, x) else (x, y) in + (*i printf "Attempt coalescing %s and %s\n" (name_of_node u) (name_of_node v);*) + if u == v then begin + DLinkMove.insert m g.coalescedMoves; + addWorkList g u + end else if v.nstate = Colored || interfere g u v then begin + DLinkMove.insert m g.constrainedMoves; + addWorkList g u; + addWorkList g v + end else if canCoalesce g u v then begin + DLinkMove.insert m g.coalescedMoves; + combine g u v; + addWorkList g u + end else begin + DLinkMove.insert m g.activeMoves + end + +(* Freeze moves associated with node [u] *) + +let freezeMoves g u = + let u' = getAlias u in + let freeze m = + let y = getAlias m.src in + let v = if y == u' then getAlias m.dst else y in + DLinkMove.move m g.activeMoves g.frozenMoves; + if not (moveRelated v) + && v.degree < g.num_available_registers.(v.regclass) + && v.nstate <> Colored + then DLinkNode.move v g.freezeWorklist g.simplifyWorklist in + List.iter freeze (nodeMoves u) + +(* Pick a move and freeze it *) + +let freeze g = + let u = DLinkNode.pick g.freezeWorklist in + (*i printf "Freezing %s\n" (name_of_node u); *) + DLinkNode.insert u g.simplifyWorklist; + freezeMoves g u + +(* This is the original spill cost function from Chaitin 1982 *) + +(* +let spillCost n = +(*i + printf "spillCost %s: cost = %.2f degree = %d rank = %.2f\n" + (name_of_node n) n.spillcost n.degree + (n.spillcost /. float n.degree); +*) + n.spillcost /. float n.degree +*) + +(* This is spill cost function h_0 from Bernstein et al 1989. It performs + slightly better than Chaitin's and than functions h_1 and h_2. *) + +let spillCost n = + let deg = float n.degree in n.spillcost /. (deg *. deg) + +(* Spill a node *) + +let selectSpill g = + (*i printf "Attempt spilling\n"; *) + (* Find a spillable node of minimal cost *) + let (n, cost) = + DLinkNode.fold + (fun n (best_node, best_cost as best) -> + let cost = spillCost n in + if cost <= best_cost then (n, cost) else best) + g.spillWorklist (DLinkNode.dummy, infinity) in + assert (n != DLinkNode.dummy); + if cost = infinity then begin + printf "Warning: spilling unspillable %s\n" (name_of_node n); + printf " spill queue is:"; + DLinkNode.iter (fun n -> printf " %s" (name_of_node n)) g.spillWorklist; + printf "\n" + end; + DLinkNode.remove n g.spillWorklist; + (*i printf "Spilling %s\n" (name_of_node n); *) + freezeMoves g n; + n.nstate <- SelectStack; + iterAdjacent (decrementDegree g) n; + n + +(* Produce the order of nodes that we'll use for coloring *) + +let rec nodeOrder g stack = + (*i checkInvariants g; *) + if DLinkNode.notempty g.simplifyWorklist then + (let n = simplify g in nodeOrder g (n :: stack)) + else if DLinkMove.notempty g.worklistMoves then + (coalesce g; nodeOrder g stack) + else if DLinkNode.notempty g.freezeWorklist then + (freeze g; nodeOrder g stack) + else if DLinkNode.notempty g.spillWorklist then + (let n = selectSpill g in nodeOrder g (n :: stack)) + else + stack + +(* Assign a color (i.e. a hardware register or a stack location) + to a node. The color is chosen among the colors that are not + assigned to nodes with which this node interferes. The choice + is guided by the following heuristics: consider first caller-save + hardware register of the correct type; second, callee-save registers; + third, a stack location. Callee-save registers and stack locations + are ``expensive'' resources, so we try to minimize their number + by picking the smallest available callee-save register or stack location. + In contrast, caller-save registers are ``free'', so we pick an + available one pseudo-randomly. *) + +module Regset = + Set.Make(struct type t = mreg let compare = compare end) + +let find_reg g conflicts regclass = + let rec find avail curr last = + if curr >= last then None else begin + let r = avail.(curr) in + if Regset.mem r conflicts + then find avail (curr + 1) last + else Some (R r) + end in + let caller_save = g.caller_save_registers.(regclass) + and callee_save = g.callee_save_registers.(regclass) + and start = g.start_points.(regclass) in + match find caller_save start (Array.length caller_save) with + | Some _ as res -> + g.start_points.(regclass) <- + (if start + 1 < Array.length caller_save then start + 1 else 0); + res + | None -> + match find caller_save 0 start with + | Some _ as res -> + g.start_points.(regclass) <- + (if start + 1 < Array.length caller_save then start + 1 else 0); + res + | None -> + find callee_save 0 (Array.length callee_save) + +(* Aggressive coalescing of stack slots. When assigning a slot, + try first the slots assigned to the pseudoregs for which we + have a preference, provided no conflict occurs. *) + +let rec reuse_slot conflicts n mvlist = + match mvlist with + | [] -> None + | mv :: rem -> + let attempt_reuse n' = + match n'.color with + | Some(S(Local, _, _) as l) + when List.for_all (Loc.diff_dec l) conflicts -> Some l + | _ -> reuse_slot conflicts n rem in + let src = getAlias mv.src and dst = getAlias mv.dst in + if n == src then attempt_reuse dst + else if n == dst then attempt_reuse src + else reuse_slot conflicts n rem (* should not happen? *) + +(* If no reuse possible, assign lowest nonconflicting stack slot. *) + +let compare_slots s1 s2 = + match s1, s2 with + | S(_, ofs1, _), S(_, ofs2, _) -> Z.compare ofs1 ofs2 + | _, _ -> assert false + +let find_slot conflicts typ = + let rec find curr = function + | [] -> + S(Local, curr, typ) + | S(Local, ofs, typ') :: l -> + if Z.le (Z.add curr (typesize typ)) ofs then + S(Local, curr, typ) + else begin + let ofs' = Z.add ofs (typesize typ') in + find (if Z.le ofs' curr then curr else ofs') l + end + | _ :: l -> + find curr l + in find Z.zero (List.stable_sort compare_slots conflicts) + +(* Record locations assigned to interfering nodes *) + +let record_reg_conflict cnf n = + match (getAlias n).color with + | Some (R r) -> Regset.add r cnf + | _ -> cnf + +let record_slot_conflict cnf n = + match (getAlias n).color with + | Some (S _ as l) -> l :: cnf + | _ -> cnf + +(* Assign a location, the best we can *) + +let assign_color g n = + let reg_conflicts = + List.fold_left record_reg_conflict Regset.empty n.adjlist in + (* First, try to assign a register *) + match find_reg g reg_conflicts n.regclass with + | Some loc -> + n.color <- Some loc + | None -> + (* Add extra conflicts for nonallocatable and preallocated stack slots *) + let slot_conflicts = + List.fold_left record_slot_conflict + (List.fold_left record_slot_conflict [] n.adjlist) + n.extra_adj in + (* Second, try to coalesce stack slots *) + match reuse_slot slot_conflicts n (n.extra_pref @ n.movelist) with + | Some loc -> + n.color <- Some loc + | None -> + (* Last, pick a Local stack slot *) + n.color <- Some (find_slot slot_conflicts n.typ) + +(* Extract the location of a variable *) + +let location_of_var g v = + match v with + | L l -> l + | V(r, ty) -> + try + let n = Hashtbl.find g.varTable v in + let n' = getAlias n in + match n'.color with + | None -> assert false + | Some l -> l + with Not_found -> + match ty with + | Tint -> R dummy_int_reg + | Tfloat -> R dummy_float_reg + | Tlong -> assert false + +(* The exported interface *) + +let add_interf g v1 v2 = + addEdge g (nodeOfVar g v1) (nodeOfVar g v2) + +let add_pref g v1 v2 = + addMovePref g (nodeOfVar g v1) (nodeOfVar g v2) + +let coloring g = + initialNodePartition g; + List.iter (assign_color g) (nodeOrder g []); + location_of_var g (* total function var -> location *) diff --git a/backend/IRC.mli b/backend/IRC.mli new file mode 100644 index 0000000..e81b6dc --- /dev/null +++ b/backend/IRC.mli @@ -0,0 +1,44 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Iterated Register Coalescing: George and Appel's graph coloring algorithm *) + +open AST +open Registers +open Machregs +open Locations +open XTL + +(* The abstract type of interference and preference graphs. *) +type graph + +(* Information associated to every variable. *) +type var_stats = { + mutable cost: int; (* estimated cost of a spill *) + mutable usedefs: int (* number of uses and defs *) +} + +(* Create an empty graph. The given function associates statistics to + every variable. *) +val init: (reg -> var_stats) -> graph + +(* Add an interference between two variables. *) +val add_interf: graph -> var -> var -> unit + +(* Add a preference between two variables. *) +val add_pref: graph -> var -> var -> unit + +(* Color the graph. Return an assignment of locations to variables. *) +val coloring: graph -> (var -> loc) + +(* Machine registers that are reserved and not available for allocation. *) +val reserved_registers: mreg list ref diff --git a/backend/Inlining.v b/backend/Inlining.v index a9b1e7e..7b19f80 100644 --- a/backend/Inlining.v +++ b/backend/Inlining.v @@ -180,11 +180,17 @@ Next Obligation. intros; constructor; simpl; xomega. Qed. -Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit := - match l with - | nil => ret tt - | (x,y) :: l' => do z <- f x y; mlist_iter2 f l' - end. +Program Definition ptree_mfold {A: Type} (f: positive -> A -> mon unit) (t: PTree.t A): mon unit := + fun s => + R tt + (PTree.fold (fun s1 k v => match f k v s1 return _ with R _ s2 _ => s2 end) t s) + _. +Next Obligation. + apply PTree_Properties.fold_rec. + auto. + apply sincr_refl. + intros. destruct (f k v a). eapply sincr_trans; eauto. +Qed. (** ** Inlining contexts *) @@ -422,7 +428,7 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit := Definition expand_cfg_rec (ctx: context) (f: function): mon unit := do x <- request_stack (ctx.(dstk) + ctx.(mstk)); - mlist_iter2 (expand_instr ctx) (PTree.elements f.(fn_code)). + ptree_mfold (expand_instr ctx) f.(fn_code). End EXPAND_CFG. diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v index 82ef9cf..e8dba67 100644 --- a/backend/Inliningspec.v +++ b/backend/Inliningspec.v @@ -180,6 +180,35 @@ Ltac monadInv H := ((progress simpl in H) || unfold F in H); monadInv1 H end. +Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit := + match l with + | nil => ret tt + | (x,y) :: l' => do z <- f x y; mlist_iter2 f l' + end. + +Remark mlist_iter2_fold: + forall (A B: Type) (f: A -> B -> mon unit) l s, + exists i, + mlist_iter2 f l s = + R tt (fold_left (fun a p => match f (fst p) (snd p) a with R _ s2 _ => s2 end) l s) i. +Proof. + induction l; simpl; intros. + exists (sincr_refl s); auto. + destruct a as [x y]. unfold bind. simpl. destruct (f x y s) as [xx s1 i1]. + destruct (IHl s1) as [i2 EQ]. rewrite EQ. econstructor; eauto. +Qed. + +Lemma ptree_mfold_spec: + forall (A: Type) (f: positive -> A -> mon unit) t s x s' i, + ptree_mfold f t s = R x s' i -> + exists i', mlist_iter2 f (PTree.elements t) s = R tt s' i'. +Proof. + intros. + destruct (mlist_iter2_fold _ _ f (PTree.elements t) s) as [i' EQ]. + unfold ptree_mfold in H. inv H. rewrite PTree.fold_spec. + econstructor. eexact EQ. +Qed. + (** ** Relational specification of the translation of moves *) Inductive tr_moves (c: code) : node -> list reg -> list reg -> node -> Prop := @@ -416,6 +445,7 @@ Lemma expand_cfg_rec_unchanged: Proof. intros. unfold expand_cfg_rec in H. monadInv H. inversion EQ. transitivity ((st_code s0)!pc). + exploit ptree_mfold_spec; eauto. intros [INCR' ITER]. eapply iter_expand_instr_unchanged; eauto. subst s0; auto. subst s0; simpl. xomega. @@ -596,7 +626,8 @@ Proof. intros. unfold expand_cfg_rec in H. monadInv H. inversion EQ. constructor. intros. rewrite H1. eapply max_def_function_params; eauto. - intros. eapply iter_expand_instr_spec; eauto. + intros. exploit ptree_mfold_spec; eauto. intros [INCR' ITER]. + eapply iter_expand_instr_spec; eauto. apply PTree.elements_keys_norepet. intros. rewrite H1. eapply max_def_function_instr; eauto. eapply PTree.elements_complete; eauto. diff --git a/backend/LTL.v b/backend/LTL.v index 422b0e0..de10845 100644 --- a/backend/LTL.v +++ b/backend/LTL.v @@ -30,27 +30,33 @@ Require Import Conventions. (** * Abstract syntax *) -(** LTL is close to RTL, but uses locations instead of pseudo-registers. *) +(** LTL is close to RTL, but uses machine registers and stack slots + instead of pseudo-registers. Also, the nodes of the control-flow + graph are basic blocks instead of single instructions. *) Definition node := positive. Inductive instruction: Type := - | Lnop: node -> instruction - | Lop: operation -> list loc -> loc -> node -> instruction - | Lload: memory_chunk -> addressing -> list loc -> loc -> node -> instruction - | Lstore: memory_chunk -> addressing -> list loc -> loc -> node -> instruction - | Lcall: signature -> loc + ident -> list loc -> loc -> node -> instruction - | Ltailcall: signature -> loc + ident -> list loc -> instruction - | Lbuiltin: external_function -> list loc -> loc -> node -> instruction - | Lcond: condition -> list loc -> node -> node -> instruction - | Ljumptable: loc -> list node -> instruction - | Lreturn: option loc -> instruction. - -Definition code: Type := PTree.t instruction. + | Lop (op: operation) (args: list mreg) (res: mreg) + | Lload (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg) + | Lgetstack (sl: slot) (ofs: Z) (ty: typ) (dst: mreg) + | Lsetstack (src: mreg) (sl: slot) (ofs: Z) (ty: typ) + | Lstore (chunk: memory_chunk) (addr: addressing) (args: list mreg) (src: mreg) + | Lcall (sg: signature) (ros: mreg + ident) + | Ltailcall (sg: signature) (ros: mreg + ident) + | Lbuiltin (ef: external_function) (args: list mreg) (res: list mreg) + | Lannot (ef: external_function) (args: list loc) + | Lbranch (s: node) + | Lcond (cond: condition) (args: list mreg) (s1 s2: node) + | Ljumptable (arg: mreg) (tbl: list node) + | Lreturn. + +Definition bblock := list instruction. + +Definition code: Type := PTree.t bblock. Record function: Type := mkfunction { fn_sig: signature; - fn_params: list loc; fn_stacksize: Z; fn_code: code; fn_entrypoint: node @@ -71,49 +77,56 @@ Definition funsig (fd: fundef) := Definition genv := Genv.t fundef unit. Definition locset := Locmap.t. -Definition locmap_optget (ol: option loc) (dfl: val) (ls: locset) : val := - match ol with - | None => dfl - | Some l => ls l - end. +(** Calling conventions are reflected at the level of location sets + (environments mapping locations to values) by the following two + functions. -Fixpoint init_locs (vl: list val) (rl: list loc) {struct rl} : locset := - match rl, vl with - | r1 :: rs, v1 :: vs => Locmap.set r1 v1 (init_locs vs rs) - | _, _ => Locmap.init Vundef - end. + [call_regs caller] returns the location set at function entry, + as a function of the location set [caller] of the calling function. +- Machine registers have the same values as in the caller. +- Incoming stack slots (used for parameter passing) have the same + values as the corresponding outgoing stack slots (used for argument + passing) in the caller. +- Local and outgoing stack slots are initialized to undefined values. +*) -(** [postcall_locs ls] returns the location set [ls] after a function - call. Caller-save registers and temporary registers - are set to [undef], reflecting the fact that the called - function can modify them freely. *) +Definition call_regs (caller: locset) : locset := + fun (l: loc) => + match l with + | R r => caller (R r) + | S Local ofs ty => Vundef + | S Incoming ofs ty => caller (S Outgoing ofs ty) + | S Outgoing ofs ty => Vundef + end. -Definition postcall_locs (ls: locset) : locset := +(** [return_regs caller callee] returns the location set after + a call instruction, as a function of the location set [caller] + of the caller before the call instruction and of the location + set [callee] of the callee at the return instruction. +- Callee-save machine registers have the same values as in the caller + before the call. +- Caller-save machine registers have the same values as in the callee. +- Stack slots have the same values as in the caller. +*) + +Definition return_regs (caller callee: locset) : locset := fun (l: loc) => match l with | R r => - if In_dec Loc.eq (R r) temporaries then - Vundef - else if In_dec Loc.eq (R r) destroyed_at_call then - Vundef - else - ls (R r) - | S s => ls (S s) + if In_dec mreg_eq r destroyed_at_call + then callee (R r) + else caller (R r) + | S sl ofs ty => caller (S sl ofs ty) end. -(** Temporaries destroyed across instructions *) - -Definition undef_temps (ls: locset) := Locmap.undef temporaries ls. - (** LTL execution states. *) Inductive stackframe : Type := | Stackframe: - forall (res: loc) (**r where to store the result *) - (f: function) (**r calling function *) + forall (f: function) (**r calling function *) (sp: val) (**r stack pointer in calling function *) (ls: locset) (**r location state in calling function *) - (pc: node), (**r program point in calling function *) + (bb: bblock), (**r continuation in calling function *) stackframe. Inductive state : Type := @@ -125,15 +138,23 @@ Inductive state : Type := (ls: locset) (**r location state *) (m: mem), (**r memory state *) state + | Block: + forall (stack: list stackframe) (**r call stack *) + (f: function) (**r function currently executing *) + (sp: val) (**r stack pointer *) + (bb: bblock) (**r current basic block *) + (ls: locset) (**r location state *) + (m: mem), (**r memory state *) + state | Callstate: forall (stack: list stackframe) (**r call stack *) (f: fundef) (**r function to call *) - (args: list val) (**r arguments to the call *) + (ls: locset) (**r location state of caller *) (m: mem), (**r memory state *) state | Returnstate: forall (stack: list stackframe) (**r call stack *) - (v: val) (**r return value for the call *) + (ls: locset) (**r location state of callee *) (m: mem), (**r memory state *) state. @@ -142,9 +163,24 @@ Section RELSEM. Variable ge: genv. -Definition find_function (los: loc + ident) (rs: locset) : option fundef := - match los with - | inl l => Genv.find_funct ge (rs l) +Definition reglist (rs: locset) (rl: list mreg) : list val := + List.map (fun r => rs (R r)) rl. + +Fixpoint undef_regs (rl: list mreg) (rs: locset) : locset := + match rl with + | nil => rs + | r1 :: rl => Locmap.set (R r1) Vundef (undef_regs rl rs) + end. + +Definition destroyed_by_getstack (s: slot) : list mreg := + match s with + | Incoming => temp_for_parent_frame :: nil + | _ => nil + end. + +Definition find_function (ros: mreg + ident) (rs: locset) : option fundef := + match ros with + | inl r => Genv.find_funct ge (rs (R r)) | inr symb => match Genv.find_symbol ge symb with | None => None @@ -152,95 +188,109 @@ Definition find_function (los: loc + ident) (rs: locset) : option fundef := end end. -(** The LTL transition relation is very similar to that of RTL, - with locations being used in place of pseudo-registers. - The main difference is for the [call] instruction: caller-save - registers are set to [Vundef] across the call, using the [postcall_locs] - function defined above. This forces the LTL producer to avoid - storing values live across the call in a caller-save register. *) +(** [parent_locset cs] returns the mapping of values for locations + of the caller function. *) + +Definition parent_locset (stack: list stackframe) : locset := + match stack with + | nil => Locmap.init Vundef + | Stackframe f sp ls bb :: stack' => ls + end. + +(* REVISE +(** [getslot sl ofs ty rs] looks up the value of location [S sl ofs ty] in [rs], + and normalizes it to the type [ty] of this location. *) + +Definition getslot (sl: slot) (ofs: Z) (ty: typ) (rs: locset) : val := + Val.load_result + (match ty with Tint => Mint32 | Tfloat => Mfloat64 | Tlong => Mint64 end) + (rs (S sl ofs ty)). +*) Inductive step: state -> trace -> state -> Prop := - | exec_Lnop: - forall s f sp pc rs m pc', - (fn_code f)!pc = Some(Lnop pc') -> + | exec_start_block: forall s f sp pc rs m bb, + (fn_code f)!pc = Some bb -> step (State s f sp pc rs m) - E0 (State s f sp pc' rs m) - | exec_Lop: - forall s f sp pc rs m op args res pc' v, - (fn_code f)!pc = Some(Lop op args res pc') -> - eval_operation ge sp op (map rs args) m = Some v -> - step (State s f sp pc rs m) - E0 (State s f sp pc' (Locmap.set res v (undef_temps rs)) m) - | exec_Lload: - forall s f sp pc rs m chunk addr args dst pc' a v, - (fn_code f)!pc = Some(Lload chunk addr args dst pc') -> - eval_addressing ge sp addr (map rs args) = Some a -> + E0 (Block s f sp bb rs m) + | exec_Lop: forall s f sp op args res bb rs m v rs', + eval_operation ge sp op (reglist rs args) m = Some v -> + rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) -> + step (Block s f sp (Lop op args res :: bb) rs m) + E0 (Block s f sp bb rs' m) + | exec_Lload: forall s f sp chunk addr args dst bb rs m a v rs', + eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = Some v -> - step (State s f sp pc rs m) - E0 (State s f sp pc' (Locmap.set dst v (undef_temps rs)) m) - | exec_Lstore: - forall s f sp pc rs m chunk addr args src pc' a m', - (fn_code f)!pc = Some(Lstore chunk addr args src pc') -> - eval_addressing ge sp addr (map rs args) = Some a -> - Mem.storev chunk m a (rs src) = Some m' -> - step (State s f sp pc rs m) - E0 (State s f sp pc' (undef_temps rs) m') - | exec_Lcall: - forall s f sp pc rs m sig ros args res pc' f', - (fn_code f)!pc = Some(Lcall sig ros args res pc') -> - find_function ros rs = Some f' -> - funsig f' = sig -> - step (State s f sp pc rs m) - E0 (Callstate (Stackframe res f sp (postcall_locs rs) pc' :: s) - f' (List.map rs args) m) - | exec_Ltailcall: - forall s f stk pc rs m sig ros args f' m', - (fn_code f)!pc = Some(Ltailcall sig ros args) -> - find_function ros rs = Some f' -> - funsig f' = sig -> - Mem.free m stk 0 f.(fn_stacksize) = Some m' -> - step (State s f (Vptr stk Int.zero) pc rs m) - E0 (Callstate s f' (List.map rs args) m') - | exec_Lbuiltin: - forall s f sp pc rs m ef args res pc' t v m', - (fn_code f)!pc = Some(Lbuiltin ef args res pc') -> - external_call ef ge (map rs args) m t v m' -> - step (State s f sp pc rs m) - t (State s f sp pc' (Locmap.set res v rs) m') - | exec_Lcond: - forall s f sp pc rs m cond args ifso ifnot b pc', - (fn_code f)!pc = Some(Lcond cond args ifso ifnot) -> - eval_condition cond (map rs args) m = Some b -> - pc' = (if b then ifso else ifnot) -> - step (State s f sp pc rs m) - E0 (State s f sp pc' (undef_temps rs) m) - | exec_Ljumptable: - forall s f sp pc rs m arg tbl n pc', - (fn_code f)!pc = Some(Ljumptable arg tbl) -> - rs arg = Vint n -> - list_nth_z tbl (Int.unsigned n) = Some pc' -> - step (State s f sp pc rs m) - E0 (State s f sp pc' (undef_temps rs) m) - | exec_Lreturn: - forall s f stk pc rs m or m', - (fn_code f)!pc = Some(Lreturn or) -> - Mem.free m stk 0 f.(fn_stacksize) = Some m' -> - step (State s f (Vptr stk Int.zero) pc rs m) - E0 (Returnstate s (locmap_optget or Vundef rs) m') - | exec_function_internal: - forall s f args m m' stk, - Mem.alloc m 0 f.(fn_stacksize) = (m', stk) -> - step (Callstate s (Internal f) args m) - E0 (State s f (Vptr stk Int.zero) f.(fn_entrypoint) (init_locs args f.(fn_params)) m') - | exec_function_external: - forall s ef t args res m m', - external_call ef ge args m t res m' -> - step (Callstate s (External ef) args m) - t (Returnstate s res m') - | exec_return: - forall res f sp rs pc s vres m, - step (Returnstate (Stackframe res f sp rs pc :: s) vres m) - E0 (State s f sp pc (Locmap.set res vres rs) m). + rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) -> + step (Block s f sp (Lload chunk addr args dst :: bb) rs m) + E0 (Block s f sp bb rs' m) + | exec_Lgetstack: forall s f sp sl ofs ty dst bb rs m rs', + rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) -> + step (Block s f sp (Lgetstack sl ofs ty dst :: bb) rs m) + E0 (Block s f sp bb rs' m) + | exec_Lsetstack: forall s f sp src sl ofs ty bb rs m rs', + rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_op Omove) rs) -> + step (Block s f sp (Lsetstack src sl ofs ty :: bb) rs m) + E0 (Block s f sp bb rs' m) + | exec_Lstore: forall s f sp chunk addr args src bb rs m a rs' m', + eval_addressing ge sp addr (reglist rs args) = Some a -> + Mem.storev chunk m a (rs (R src)) = Some m' -> + rs' = undef_regs (destroyed_by_store chunk addr) rs -> + step (Block s f sp (Lstore chunk addr args src :: bb) rs m) + E0 (Block s f sp bb rs' m') + | exec_Lcall: forall s f sp sig ros bb rs m fd, + find_function ros rs = Some fd -> + funsig fd = sig -> + step (Block s f sp (Lcall sig ros :: bb) rs m) + E0 (Callstate (Stackframe f sp rs bb :: s) fd rs m) + | exec_Ltailcall: forall s f sp sig ros bb rs m fd rs' m', + rs' = return_regs (parent_locset s) rs -> + find_function ros rs' = Some fd -> + funsig fd = sig -> + Mem.free m sp 0 f.(fn_stacksize) = Some m' -> + step (Block s f (Vptr sp Int.zero) (Ltailcall sig ros :: bb) rs m) + E0 (Callstate s fd rs' m') + | exec_Lbuiltin: forall s f sp ef args res bb rs m t vl rs' m', + external_call' ef ge (reglist rs args) m t vl m' -> + rs' = Locmap.setlist (map R res) vl (undef_regs (destroyed_by_builtin ef) rs) -> + step (Block s f sp (Lbuiltin ef args res :: bb) rs m) + t (Block s f sp bb rs' m') + | exec_Lannot: forall s f sp ef args bb rs m t vl m', + external_call' ef ge (map rs args) m t vl m' -> + step (Block s f sp (Lannot ef args :: bb) rs m) + t (Block s f sp bb rs m') + | exec_Lbranch: forall s f sp pc bb rs m, + step (Block s f sp (Lbranch pc :: bb) rs m) + E0 (State s f sp pc rs m) + | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m, + eval_condition cond (reglist rs args) m = Some b -> + pc = (if b then pc1 else pc2) -> + rs' = undef_regs (destroyed_by_cond cond) rs -> + step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m) + E0 (State s f sp pc rs' m) + | exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs', + rs (R arg) = Vint n -> + list_nth_z tbl (Int.unsigned n) = Some pc -> + rs' = undef_regs (destroyed_by_jumptable) rs -> + step (Block s f sp (Ljumptable arg tbl :: bb) rs m) + E0 (State s f sp pc rs' m) + | exec_Lreturn: forall s f sp bb rs m m', + Mem.free m sp 0 f.(fn_stacksize) = Some m' -> + step (Block s f (Vptr sp Int.zero) (Lreturn :: bb) rs m) + E0 (Returnstate s (return_regs (parent_locset s) rs) m') + | exec_function_internal: forall s f rs m m' sp rs', + Mem.alloc m 0 f.(fn_stacksize) = (m', sp) -> + rs' = undef_regs destroyed_at_function_entry (call_regs rs) -> + step (Callstate s (Internal f) rs m) + E0 (State s f (Vptr sp Int.zero) f.(fn_entrypoint) rs' m') + | exec_function_external: forall s ef t args res rs m rs' m', + args = map rs (loc_arguments (ef_sig ef)) -> + external_call' ef ge args m t res m' -> + rs' = Locmap.setlist (map R (loc_result (ef_sig ef))) res rs -> + step (Callstate s (External ef) rs m) + t (Returnstate s rs' m') + | exec_return: forall f sp rs1 bb s rs m, + step (Returnstate (Stackframe f sp rs1 bb :: s) rs m) + E0 (Block s f sp bb rs m). End RELSEM. @@ -256,33 +306,32 @@ Inductive initial_state (p: program): state -> Prop := Genv.find_symbol ge p.(prog_main) = Some b -> Genv.find_funct_ptr ge b = Some f -> funsig f = mksignature nil (Some Tint) -> - initial_state p (Callstate nil f nil m0). + initial_state p (Callstate nil f (Locmap.init Vundef) m0). Inductive final_state: state -> int -> Prop := - | final_state_intro: forall n m, - final_state (Returnstate nil (Vint n) m) n. + | final_state_intro: forall rs m r retcode, + loc_result (mksignature nil (Some Tint)) = r :: nil -> + rs (R r) = Vint retcode -> + final_state (Returnstate nil rs m) retcode. Definition semantics (p: program) := Semantics step (initial_state p) final_state (Genv.globalenv p). (** * Operations over LTL *) -(** Computation of the possible successors of an instruction. +(** Computation of the possible successors of a block. This is used in particular for dataflow analyses. *) -Definition successors_instr (i: instruction) : list node := - match i with - | Lnop s => s :: nil - | Lop op args res s => s :: nil - | Lload chunk addr args dst s => s :: nil - | Lstore chunk addr args src s => s :: nil - | Lcall sig ros args res s => s :: nil - | Ltailcall sig ros args => nil - | Lbuiltin ef args res s => s :: nil - | Lcond cond args ifso ifnot => ifso :: ifnot :: nil - | Ljumptable arg tbl => tbl - | Lreturn optarg => nil +Fixpoint successors_block (b: bblock) : list node := + match b with + | nil => nil (**r should never happen *) + | Ltailcall _ _ :: _ => nil + | Lbranch s :: _ => s :: nil + | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil + | Ljumptable _ tbl :: _ => tbl + | Lreturn :: _ => nil + | instr :: b' => successors_block b' end. Definition successors (f: function): PTree.t (list node) := - PTree.map1 successors_instr f.(fn_code). + PTree.map1 successors_block f.(fn_code). diff --git a/backend/LTLin.v b/backend/LTLin.v deleted file mode 100644 index e0d5ca2..0000000 --- a/backend/LTLin.v +++ /dev/null @@ -1,268 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** The LTLin intermediate language: abstract syntax and semantcs *) - -(** The LTLin language is a variant of LTL where control-flow is not - expressed as a graph of basic blocks, but as a linear list of - instructions with explicit labels and ``goto'' instructions. *) - -Require Import Coqlib. -Require Import AST. -Require Import Integers. -Require Import Values. -Require Import Memory. -Require Import Events. -Require Import Globalenvs. -Require Import Smallstep. -Require Import Op. -Require Import Locations. -Require Import LTL. - -(** * Abstract syntax *) - -Definition label := positive. - -(** LTLin instructions are similar to those of LTL. - Except the last three, these instructions continue in sequence - with the next instruction in the linear list of instructions. - Unconditional branches [Lgoto] and conditional branches [Lcond] - transfer control to the given code label. Labels are explicitly - inserted in the instruction list as pseudo-instructions [Llabel]. *) - -Inductive instruction: Type := - | Lop: operation -> list loc -> loc -> instruction - | Lload: memory_chunk -> addressing -> list loc -> loc -> instruction - | Lstore: memory_chunk -> addressing -> list loc -> loc -> instruction - | Lcall: signature -> loc + ident -> list loc -> loc -> instruction - | Ltailcall: signature -> loc + ident -> list loc -> instruction - | Lbuiltin: external_function -> list loc -> loc -> instruction - | Llabel: label -> instruction - | Lgoto: label -> instruction - | Lcond: condition -> list loc -> label -> instruction - | Ljumptable: loc -> list label -> instruction - | Lreturn: option loc -> instruction. - -Definition code: Type := list instruction. - -Record function: Type := mkfunction { - fn_sig: signature; - fn_params: list loc; - fn_stacksize: Z; - fn_code: code -}. - -Definition fundef := AST.fundef function. - -Definition program := AST.program fundef unit. - -Definition funsig (fd: fundef) := - match fd with - | Internal f => fn_sig f - | External ef => ef_sig ef - end. - -Definition genv := Genv.t fundef unit. -Definition locset := Locmap.t. - -(** * Operational semantics *) - -(** Looking up labels in the instruction list. *) - -Definition is_label (lbl: label) (instr: instruction) : bool := - match instr with - | Llabel lbl' => if peq lbl lbl' then true else false - | _ => false - end. - -Lemma is_label_correct: - forall lbl instr, - if is_label lbl instr then instr = Llabel lbl else instr <> Llabel lbl. -Proof. - intros. destruct instr; simpl; try discriminate. - case (peq lbl l); intro; congruence. -Qed. - -(** [find_label lbl c] returns a list of instruction, suffix of the - code [c], that immediately follows the [Llabel lbl] pseudo-instruction. - If the label [lbl] is multiply-defined, the first occurrence is - retained. If the label [lbl] is not defined, [None] is returned. *) - -Fixpoint find_label (lbl: label) (c: code) {struct c} : option code := - match c with - | nil => None - | i1 :: il => if is_label lbl i1 then Some il else find_label lbl il - end. - -(** The states of the dynamic semantics are similar to those used - in the LTL semantics (see module [LTL]). The only difference - is that program points [pc] (nodes of the CFG in LTL) become - code sequences [c] (suffixes of the code of the current function). -*) - -Inductive stackframe : Type := - | Stackframe: - forall (res: loc) (**r where to store the result *) - (f: function) (**r calling function *) - (sp: val) (**r stack pointer in calling function *) - (ls: locset) (**r location state in calling function *) - (c: code), (**r program point in calling function *) - stackframe. - -Inductive state : Type := - | State: - forall (stack: list stackframe) (**r call stack *) - (f: function) (**r function currently executing *) - (sp: val) (**r stack pointer *) - (c: code) (**r current program point *) - (ls: locset) (**r location state *) - (m: mem), (**r memory state *) - state - | Callstate: - forall (stack: list stackframe) (**r call stack *) - (f: fundef) (**r function to call *) - (args: list val) (**r arguments to the call *) - (m: mem), (**r memory state *) - state - | Returnstate: - forall (stack: list stackframe) (**r call stack *) - (v: val) (**r return value for the call *) - (m: mem), (**r memory state *) - state. - -Section RELSEM. - -Variable ge: genv. - -Definition find_function (ros: loc + ident) (rs: locset) : option fundef := - match ros with - | inl r => Genv.find_funct ge (rs r) - | inr symb => - match Genv.find_symbol ge symb with - | None => None - | Some b => Genv.find_funct_ptr ge b - end - end. - -Inductive step: state -> trace -> state -> Prop := - | exec_Lop: - forall s f sp op args res b rs m v, - eval_operation ge sp op (map rs args) m = Some v -> - step (State s f sp (Lop op args res :: b) rs m) - E0 (State s f sp b (Locmap.set res v (undef_temps rs)) m) - | exec_Lload: - forall s f sp chunk addr args dst b rs m a v, - eval_addressing ge sp addr (map rs args) = Some a -> - Mem.loadv chunk m a = Some v -> - step (State s f sp (Lload chunk addr args dst :: b) rs m) - E0 (State s f sp b (Locmap.set dst v (undef_temps rs)) m) - | exec_Lstore: - forall s f sp chunk addr args src b rs m m' a, - eval_addressing ge sp addr (map rs args) = Some a -> - Mem.storev chunk m a (rs src) = Some m' -> - step (State s f sp (Lstore chunk addr args src :: b) rs m) - E0 (State s f sp b (undef_temps rs) m') - | exec_Lcall: - forall s f sp sig ros args res b rs m f', - find_function ros rs = Some f' -> - sig = funsig f' -> - step (State s f sp (Lcall sig ros args res :: b) rs m) - E0 (Callstate (Stackframe res f sp (postcall_locs rs) b :: s) - f' (List.map rs args) m) - | exec_Ltailcall: - forall s f stk sig ros args b rs m f' m', - find_function ros rs = Some f' -> - sig = funsig f' -> - Mem.free m stk 0 f.(fn_stacksize) = Some m' -> - step (State s f (Vptr stk Int.zero) (Ltailcall sig ros args :: b) rs m) - E0 (Callstate s f' (List.map rs args) m') - | exec_Lbuiltin: - forall s f sp rs m ef args res b t v m', - external_call ef ge (map rs args) m t v m' -> - step (State s f sp (Lbuiltin ef args res :: b) rs m) - t (State s f sp b (Locmap.set res v rs) m') - | exec_Llabel: - forall s f sp lbl b rs m, - step (State s f sp (Llabel lbl :: b) rs m) - E0 (State s f sp b rs m) - | exec_Lgoto: - forall s f sp lbl b rs m b', - find_label lbl f.(fn_code) = Some b' -> - step (State s f sp (Lgoto lbl :: b) rs m) - E0 (State s f sp b' rs m) - | exec_Lcond_true: - forall s f sp cond args lbl b rs m b', - eval_condition cond (map rs args) m = Some true -> - find_label lbl f.(fn_code) = Some b' -> - step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b' (undef_temps rs) m) - | exec_Lcond_false: - forall s f sp cond args lbl b rs m, - eval_condition cond (map rs args) m = Some false -> - step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b (undef_temps rs) m) - | exec_Ljumptable: - forall s f sp arg tbl b rs m n lbl b', - rs arg = Vint n -> - list_nth_z tbl (Int.unsigned n) = Some lbl -> - find_label lbl f.(fn_code) = Some b' -> - step (State s f sp (Ljumptable arg tbl :: b) rs m) - E0 (State s f sp b' (undef_temps rs) m) - | exec_Lreturn: - forall s f stk rs m or b m', - Mem.free m stk 0 f.(fn_stacksize) = Some m' -> - step (State s f (Vptr stk Int.zero) (Lreturn or :: b) rs m) - E0 (Returnstate s (locmap_optget or Vundef rs) m') - | exec_function_internal: - forall s f args m m' stk, - Mem.alloc m 0 f.(fn_stacksize) = (m', stk) -> - step (Callstate s (Internal f) args m) - E0 (State s f (Vptr stk Int.zero) f.(fn_code) (init_locs args f.(fn_params)) m') - | exec_function_external: - forall s ef args t res m m', - external_call ef ge args m t res m' -> - step (Callstate s (External ef) args m) - t (Returnstate s res m') - | exec_return: - forall res f sp rs b s vres m, - step (Returnstate (Stackframe res f sp rs b :: s) vres m) - E0 (State s f sp b (Locmap.set res vres rs) m). - -End RELSEM. - -Inductive initial_state (p: program): state -> Prop := - | initial_state_intro: forall b f m0, - let ge := Genv.globalenv p in - Genv.init_mem p = Some m0 -> - Genv.find_symbol ge p.(prog_main) = Some b -> - Genv.find_funct_ptr ge b = Some f -> - funsig f = mksignature nil (Some Tint) -> - initial_state p (Callstate nil f nil m0). - -Inductive final_state: state -> int -> Prop := - | final_state_intro: forall n m, - final_state (Returnstate nil (Vint n) m) n. - -Definition semantics (p: program) := - Semantics step (initial_state p) final_state (Genv.globalenv p). - -(** * Properties of the operational semantics *) - -Lemma find_label_is_tail: - forall lbl c c', find_label lbl c = Some c' -> is_tail c' c. -Proof. - induction c; simpl; intros. - discriminate. - destruct (is_label lbl a). inv H. constructor. constructor. - constructor. auto. -Qed. - diff --git a/backend/LTLintyping.v b/backend/LTLintyping.v deleted file mode 100644 index 0338667..0000000 --- a/backend/LTLintyping.v +++ /dev/null @@ -1,122 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Typing rules for LTLin. *) - -Require Import Coqlib. -Require Import AST. -Require Import Integers. -Require Import Op. -Require Import Locations. -Require Import LTLin. -Require LTLtyping. -Require Import Conventions. - -(** The following predicates define a type system for LTLin similar to that - of LTL. *) - -Section WT_INSTR. - -Variable funsig: signature. - -Inductive wt_instr : instruction -> Prop := - | wt_Lopmove: - forall r1 r, - Loc.type r1 = Loc.type r -> loc_acceptable r1 -> loc_acceptable r -> - wt_instr (Lop Omove (r1 :: nil) r) - | wt_Lop: - forall op args res, - op <> Omove -> - (List.map Loc.type args, Loc.type res) = type_of_operation op -> - locs_acceptable args -> loc_acceptable res -> - wt_instr (Lop op args res) - | wt_Lload: - forall chunk addr args dst, - List.map Loc.type args = type_of_addressing addr -> - Loc.type dst = type_of_chunk chunk -> - locs_acceptable args -> loc_acceptable dst -> - wt_instr (Lload chunk addr args dst) - | wt_Lstore: - forall chunk addr args src, - List.map Loc.type args = type_of_addressing addr -> - Loc.type src = type_of_chunk chunk -> - locs_acceptable args -> loc_acceptable src -> - wt_instr (Lstore chunk addr args src) - | wt_Lcall: - forall sig ros args res, - List.map Loc.type args = sig.(sig_args) -> - Loc.type res = proj_sig_res sig -> - LTLtyping.call_loc_acceptable sig ros -> - locs_acceptable args -> loc_acceptable res -> - wt_instr (Lcall sig ros args res) - | wt_Ltailcall: - forall sig ros args, - List.map Loc.type args = sig.(sig_args) -> - LTLtyping.call_loc_acceptable sig ros -> - locs_acceptable args -> - sig.(sig_res) = funsig.(sig_res) -> - tailcall_possible sig -> - wt_instr (Ltailcall sig ros args) - | wt_Lbuiltin: - forall ef args res, - List.map Loc.type args = (ef_sig ef).(sig_args) -> - Loc.type res = proj_sig_res (ef_sig ef) -> - arity_ok (ef_sig ef).(sig_args) = true \/ ef_reloads ef = false -> - locs_acceptable args -> loc_acceptable res -> - wt_instr (Lbuiltin ef args res) - | wt_Llabel: forall lbl, - wt_instr (Llabel lbl) - | wt_Lgoto: forall lbl, - wt_instr (Lgoto lbl) - | wt_Lcond: - forall cond args lbl, - List.map Loc.type args = type_of_condition cond -> - locs_acceptable args -> - wt_instr (Lcond cond args lbl) - | wt_Ljumptable: - forall arg tbl, - Loc.type arg = Tint -> - loc_acceptable arg -> - list_length_z tbl * 4 <= Int.max_unsigned -> - wt_instr (Ljumptable arg tbl) - | wt_Lreturn: - forall optres, - option_map Loc.type optres = funsig.(sig_res) -> - match optres with None => True | Some r => loc_acceptable r end -> - wt_instr (Lreturn optres). - -Definition wt_code (c: code) : Prop := - forall i, In i c -> wt_instr i. - -End WT_INSTR. - -Record wt_function (f: function): Prop := - mk_wt_function { - wt_params: - List.map Loc.type f.(fn_params) = f.(fn_sig).(sig_args); - wt_acceptable: - locs_acceptable f.(fn_params); - wt_norepet: - Loc.norepet f.(fn_params); - wt_instrs: - wt_code f.(fn_sig) f.(fn_code) -}. - -Inductive wt_fundef: fundef -> Prop := - | wt_fundef_external: forall ef, - wt_fundef (External ef) - | wt_function_internal: forall f, - wt_function f -> - wt_fundef (Internal f). - -Definition wt_program (p: program): Prop := - forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f. diff --git a/backend/LTLtyping.v b/backend/LTLtyping.v deleted file mode 100644 index 0c90514..0000000 --- a/backend/LTLtyping.v +++ /dev/null @@ -1,143 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Typing rules for LTL. *) - -Require Import Coqlib. -Require Import Maps. -Require Import AST. -Require Import Integers. -Require Import Op. -Require Import Locations. -Require Import LTL. -Require Import Conventions. - -(** The following predicates define a type system for LTL similar to that - of [RTL] (see file [RTLtyping]): it statically guarantees that operations - and addressing modes are applied to the right number of arguments - and that the arguments are of the correct types. Moreover, it also - guarantees that the locations of arguments and results are "acceptable", - i.e. either non-temporary registers or [Local] stack locations. *) - -Section WT_INSTR. - -Variable funct: function. - -Definition valid_successor (s: node) : Prop := - exists i, funct.(fn_code)!s = Some i. - -Definition call_loc_acceptable (sig: signature) (los: loc + ident) : Prop := - match los with - | inl l => Loc.type l = Tint /\ loc_acceptable l /\ ~In l (loc_arguments sig) - | inr s => True - end. - -Inductive wt_instr : instruction -> Prop := - | wt_Lnop: - forall s, - valid_successor s -> - wt_instr (Lnop s) - | wt_Lopmove: - forall r1 r s, - Loc.type r1 = Loc.type r -> loc_acceptable r1 -> loc_acceptable r -> - valid_successor s -> - wt_instr (Lop Omove (r1 :: nil) r s) - | wt_Lop: - forall op args res s, - op <> Omove -> - (List.map Loc.type args, Loc.type res) = type_of_operation op -> - locs_acceptable args -> loc_acceptable res -> - valid_successor s -> - wt_instr (Lop op args res s) - | wt_Lload: - forall chunk addr args dst s, - List.map Loc.type args = type_of_addressing addr -> - Loc.type dst = type_of_chunk chunk -> - locs_acceptable args -> loc_acceptable dst -> - valid_successor s -> - wt_instr (Lload chunk addr args dst s) - | wt_Lstore: - forall chunk addr args src s, - List.map Loc.type args = type_of_addressing addr -> - Loc.type src = type_of_chunk chunk -> - locs_acceptable args -> loc_acceptable src -> - valid_successor s -> - wt_instr (Lstore chunk addr args src s) - | wt_Lcall: - forall sig ros args res s, - List.map Loc.type args = sig.(sig_args) -> - Loc.type res = proj_sig_res sig -> - call_loc_acceptable sig ros -> - locs_acceptable args -> loc_acceptable res -> - valid_successor s -> - wt_instr (Lcall sig ros args res s) - | wt_Ltailcall: - forall sig ros args, - List.map Loc.type args = sig.(sig_args) -> - call_loc_acceptable sig ros -> - locs_acceptable args -> - sig.(sig_res) = funct.(fn_sig).(sig_res) -> - tailcall_possible sig -> - wt_instr (Ltailcall sig ros args) - | wt_Lbuiltin: - forall ef args res s, - List.map Loc.type args = (ef_sig ef).(sig_args) -> - Loc.type res = proj_sig_res (ef_sig ef) -> - arity_ok (ef_sig ef).(sig_args) = true \/ ef_reloads ef = false -> - locs_acceptable args -> loc_acceptable res -> - valid_successor s -> - wt_instr (Lbuiltin ef args res s) - | wt_Lcond: - forall cond args s1 s2, - List.map Loc.type args = type_of_condition cond -> - locs_acceptable args -> - valid_successor s1 -> valid_successor s2 -> - wt_instr (Lcond cond args s1 s2) - | wt_Ljumptable: - forall arg tbl, - Loc.type arg = Tint -> - loc_acceptable arg -> - (forall lbl, In lbl tbl -> valid_successor lbl) -> - list_length_z tbl * 4 <= Int.max_unsigned -> - wt_instr (Ljumptable arg tbl) - | wt_Lreturn: - forall optres, - option_map Loc.type optres = funct.(fn_sig).(sig_res) -> - match optres with None => True | Some r => loc_acceptable r end -> - wt_instr (Lreturn optres). - -End WT_INSTR. - -Record wt_function (f: function): Prop := - mk_wt_function { - wt_params: - List.map Loc.type f.(fn_params) = f.(fn_sig).(sig_args); - wt_acceptable: - locs_acceptable f.(fn_params); - wt_norepet: - Loc.norepet f.(fn_params); - wt_instrs: - forall pc instr, - f.(fn_code)!pc = Some instr -> wt_instr f instr; - wt_entrypoint: - valid_successor f f.(fn_entrypoint) -}. - -Inductive wt_fundef: fundef -> Prop := - | wt_fundef_external: forall ef, - wt_fundef (External ef) - | wt_function_internal: forall f, - wt_function f -> - wt_fundef (Internal f). - -Definition wt_program (p: program): Prop := - forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f. diff --git a/backend/Linear.v b/backend/Linear.v index 52f5fd7..bdb08b4 100644 --- a/backend/Linear.v +++ b/backend/Linear.v @@ -12,10 +12,9 @@ (** The Linear intermediate language: abstract syntax and semantcs *) -(** The Linear language is a variant of LTLin where arithmetic - instructions operate on machine registers (type [mreg]) instead - of arbitrary locations. Special instructions [Lgetstack] and - [Lsetstack] are provided to access stack slots. *) +(** The Linear language is a variant of LTL where control-flow is not + expressed as a graph of basic blocks, but as a linear list of + instructions with explicit labels and ``goto'' instructions. *) Require Import Coqlib. Require Import AST. @@ -35,14 +34,14 @@ Require Import Conventions. Definition label := positive. Inductive instruction: Type := - | Lgetstack: slot -> mreg -> instruction - | Lsetstack: mreg -> slot -> instruction + | Lgetstack: slot -> Z -> typ -> mreg -> instruction + | Lsetstack: mreg -> slot -> Z -> typ -> instruction | Lop: operation -> list mreg -> mreg -> instruction | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction | Lcall: signature -> mreg + ident -> instruction | Ltailcall: signature -> mreg + ident -> instruction - | Lbuiltin: external_function -> list mreg -> mreg -> instruction + | Lbuiltin: external_function -> list mreg -> list mreg -> instruction | Lannot: external_function -> list loc -> instruction | Llabel: label -> instruction | Lgoto: label -> instruction @@ -114,73 +113,6 @@ Definition find_function (ros: mreg + ident) (rs: locset) : option fundef := end end. -Definition reglist (rs: locset) (rl: list mreg) : list val := - List.map (fun r => rs (R r)) rl. - -(** Calling conventions are reflected at the level of location sets - (environments mapping locations to values) by the following two - functions. - - [call_regs caller] returns the location set at function entry, - as a function of the location set [caller] of the calling function. -- Temporary registers are undefined. -- Other machine registers have the same values as in the caller. -- Incoming stack slots (used for parameter passing) have the same - values as the corresponding outgoing stack slots (used for argument - passing) in the caller. -- Local and outgoing stack slots are initialized to undefined values. -*) - -Definition call_regs (caller: locset) : locset := - fun (l: loc) => - match l with - | R r => if In_dec Loc.eq (R r) temporaries then Vundef else caller (R r) - | S (Local ofs ty) => Vundef - | S (Incoming ofs ty) => caller (S (Outgoing ofs ty)) - | S (Outgoing ofs ty) => Vundef - end. - -(** [return_regs caller callee] returns the location set after - a call instruction, as a function of the location set [caller] - of the caller before the call instruction and of the location - set [callee] of the callee at the return instruction. -- Callee-save machine registers have the same values as in the caller - before the call. -- Caller-save machine registers have the same values - as in the callee at the return point. -- Stack slots have the same values as in the caller before the call. -*) - -Definition return_regs (caller callee: locset) : locset := - fun (l: loc) => - match l with - | R r => - if In_dec Loc.eq (R r) temporaries then - callee (R r) - else if In_dec Loc.eq (R r) destroyed_at_call then - callee (R r) - else - caller (R r) - | S s => caller (S s) - end. - -(** Temporaries destroyed across operations *) - -Definition undef_op (op: operation) (rs: locset) := - match op with - | Omove => Locmap.undef destroyed_at_move rs - | _ => Locmap.undef temporaries rs - end. - -Definition undef_getstack (s: slot) (rs: locset) := - match s with - | Incoming _ _ => Locmap.set (R IT1) Vundef rs - | _ => rs - end. - -Definition undef_setstack (rs: locset) := - Locmap.undef destroyed_at_move rs. - (** Linear execution states. *) Inductive stackframe: Type := @@ -214,73 +146,43 @@ Inductive state: Type := (** [parent_locset cs] returns the mapping of values for locations of the caller function. *) - Definition parent_locset (stack: list stackframe) : locset := match stack with | nil => Locmap.init Vundef | Stackframe f sp ls c :: stack' => ls end. -(** The main difference between the Linear transition relation - and the LTL transition relation is the handling of function calls. - In LTL, arguments and results to calls are transmitted via - [vargs] and [vres] components of [Callstate] and [Returnstate], - respectively. The semantics takes care of transferring these values - between the locations of the caller and of the callee. - - In Linear and lower-level languages (Mach, PPC), arguments and - results are transmitted implicitly: the generated code for the - caller arranges for arguments to be left in conventional registers - and stack locations, as determined by the calling conventions, where - the function being called will find them. Similarly, conventional - registers will be used to pass the result value back to the caller. - This is reflected in the definition of [Callstate] and [Returnstate] - above, where a whole location state [rs] is passed instead of just - the values of arguments or returns as in LTL. - - These location states passed across calls are treated in a way that - reflects the callee-save/caller-save convention: -- The [exec_Lcall] transition from [State] to [Callstate] - saves the current location state [ls] in the call stack, - and passes it to the callee. -- The [exec_function_internal] transition from [Callstate] to [State] - changes the view of stack slots ([Outgoing] slots slide to - [Incoming] slots as per [call_regs]). -- The [exec_Lreturn] transition from [State] to [Returnstate] - restores the values of callee-save locations from - the location state of the caller, using [return_regs]. - -This protocol makes it much easier to later prove the correctness of -the [Stacking] pass, which inserts actual code that performs the -saving and restoring of callee-save registers described above. -*) - Inductive step: state -> trace -> state -> Prop := | exec_Lgetstack: - forall s f sp sl r b rs m, - step (State s f sp (Lgetstack sl r :: b) rs m) - E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) (undef_getstack sl rs)) m) + forall s f sp sl ofs ty dst b rs m rs', + rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) -> + step (State s f sp (Lgetstack sl ofs ty dst :: b) rs m) + E0 (State s f sp b rs' m) | exec_Lsetstack: - forall s f sp r sl b rs m, - step (State s f sp (Lsetstack r sl :: b) rs m) - E0 (State s f sp b (Locmap.set (S sl) (rs (R r)) (undef_setstack rs)) m) + forall s f sp src sl ofs ty b rs m rs', + rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_op Omove) rs) -> + step (State s f sp (Lsetstack src sl ofs ty :: b) rs m) + E0 (State s f sp b rs' m) | exec_Lop: - forall s f sp op args res b rs m v, + forall s f sp op args res b rs m v rs', eval_operation ge sp op (reglist rs args) m = Some v -> + rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) -> step (State s f sp (Lop op args res :: b) rs m) - E0 (State s f sp b (Locmap.set (R res) v (undef_op op rs)) m) + E0 (State s f sp b rs' m) | exec_Lload: - forall s f sp chunk addr args dst b rs m a v, + forall s f sp chunk addr args dst b rs m a v rs', eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = Some v -> + rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload chunk addr args dst :: b) rs m) - E0 (State s f sp b (Locmap.set (R dst) v (undef_temps rs)) m) + E0 (State s f sp b rs' m) | exec_Lstore: - forall s f sp chunk addr args src b rs m m' a, + forall s f sp chunk addr args src b rs m m' a rs', eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.storev chunk m a (rs (R src)) = Some m' -> + rs' = undef_regs (destroyed_by_store chunk addr) rs -> step (State s f sp (Lstore chunk addr args src :: b) rs m) - E0 (State s f sp b (undef_temps rs) m') + E0 (State s f sp b rs' m') | exec_Lcall: forall s f sp sig ros b rs m f', find_function ros rs = Some f' -> @@ -288,20 +190,22 @@ Inductive step: state -> trace -> state -> Prop := step (State s f sp (Lcall sig ros :: b) rs m) E0 (Callstate (Stackframe f sp rs b:: s) f' rs m) | exec_Ltailcall: - forall s f stk sig ros b rs m f' m', - find_function ros rs = Some f' -> + forall s f stk sig ros b rs m rs' f' m', + rs' = return_regs (parent_locset s) rs -> + find_function ros rs' = Some f' -> sig = funsig f' -> Mem.free m stk 0 f.(fn_stacksize) = Some m' -> step (State s f (Vptr stk Int.zero) (Ltailcall sig ros :: b) rs m) - E0 (Callstate s f' (return_regs (parent_locset s) rs) m') + E0 (Callstate s f' rs' m') | exec_Lbuiltin: - forall s f sp rs m ef args res b t v m', - external_call ef ge (reglist rs args) m t v m' -> + forall s f sp rs m ef args res b t vl rs' m', + external_call' ef ge (reglist rs args) m t vl m' -> + rs' = Locmap.setlist (map R res) vl (undef_regs (destroyed_by_builtin ef) rs) -> step (State s f sp (Lbuiltin ef args res :: b) rs m) - t (State s f sp b (Locmap.set (R res) v (undef_temps rs)) m') + t (State s f sp b rs' m') | exec_Lannot: forall s f sp rs m ef args b t v m', - external_call ef ge (map rs args) m t v m' -> + external_call' ef ge (map rs args) m t v m' -> step (State s f sp (Lannot ef args :: b) rs m) t (State s f sp b rs m') | exec_Llabel: @@ -314,38 +218,42 @@ Inductive step: state -> trace -> state -> Prop := step (State s f sp (Lgoto lbl :: b) rs m) E0 (State s f sp b' rs m) | exec_Lcond_true: - forall s f sp cond args lbl b rs m b', + forall s f sp cond args lbl b rs m rs' b', eval_condition cond (reglist rs args) m = Some true -> + rs' = undef_regs (destroyed_by_cond cond) rs -> find_label lbl f.(fn_code) = Some b' -> step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b' (undef_temps rs) m) + E0 (State s f sp b' rs' m) | exec_Lcond_false: - forall s f sp cond args lbl b rs m, + forall s f sp cond args lbl b rs m rs', eval_condition cond (reglist rs args) m = Some false -> + rs' = undef_regs (destroyed_by_cond cond) rs -> step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b (undef_temps rs) m) + E0 (State s f sp b rs' m) | exec_Ljumptable: - forall s f sp arg tbl b rs m n lbl b', + forall s f sp arg tbl b rs m n lbl b' rs', rs (R arg) = Vint n -> list_nth_z tbl (Int.unsigned n) = Some lbl -> find_label lbl f.(fn_code) = Some b' -> + rs' = undef_regs (destroyed_by_jumptable) rs -> step (State s f sp (Ljumptable arg tbl :: b) rs m) - E0 (State s f sp b' (undef_temps rs) m) + E0 (State s f sp b' rs' m) | exec_Lreturn: forall s f stk b rs m m', Mem.free m stk 0 f.(fn_stacksize) = Some m' -> step (State s f (Vptr stk Int.zero) (Lreturn :: b) rs m) E0 (Returnstate s (return_regs (parent_locset s) rs) m') | exec_function_internal: - forall s f rs m m' stk, + forall s f rs m rs' m' stk, Mem.alloc m 0 f.(fn_stacksize) = (m', stk) -> + rs' = undef_regs destroyed_at_function_entry (call_regs rs) -> step (Callstate s (Internal f) rs m) - E0 (State s f (Vptr stk Int.zero) f.(fn_code) (call_regs rs) m') + E0 (State s f (Vptr stk Int.zero) f.(fn_code) rs' m') | exec_function_external: forall s ef args res rs1 rs2 m t m', - external_call ef ge args m t res m' -> - args = List.map rs1 (loc_arguments (ef_sig ef)) -> - rs2 = Locmap.set (R (loc_result (ef_sig ef))) res rs1 -> + args = map rs1 (loc_arguments (ef_sig ef)) -> + external_call' ef ge args m t res m' -> + rs2 = Locmap.setlist (map R (loc_result (ef_sig ef))) res rs1 -> step (Callstate s (External ef) rs1 m) t (Returnstate s rs2 m') | exec_return: @@ -365,9 +273,10 @@ Inductive initial_state (p: program): state -> Prop := initial_state p (Callstate nil f (Locmap.init Vundef) m0). Inductive final_state: state -> int -> Prop := - | final_state_intro: forall rs m r, - rs (R (loc_result (mksignature nil (Some Tint)))) = Vint r -> - final_state (Returnstate nil rs m) r. + | final_state_intro: forall rs m r retcode, + loc_result (mksignature nil (Some Tint)) = r :: nil -> + rs (R r) = Vint retcode -> + final_state (Returnstate nil rs m) retcode. Definition semantics (p: program) := Semantics step (initial_state p) final_state (Genv.globalenv p). diff --git a/backend/Linearize.v b/backend/Linearize.v index 6fc8e48..388f459 100644 --- a/backend/Linearize.v +++ b/backend/Linearize.v @@ -10,8 +10,7 @@ (* *) (* *********************************************************************) -(** Linearization of the control-flow graph: - translation from LTL to LTLin *) +(** Linearization of the control-flow graph: translation from LTL to Linear *) Require Import Coqlib. Require Import Maps. @@ -23,26 +22,26 @@ Require Import Errors. Require Import Op. Require Import Locations. Require Import LTL. -Require Import LTLin. +Require Import Linear. Require Import Kildall. Require Import Lattice. Open Scope error_monad_scope. -(** To translate from LTL to LTLin, we must lay out the nodes +(** To translate from LTL to Linear, we must lay out the nodes of the LTL control-flow graph in some linear order, and insert explicit branches and conditional branches to make sure that each node jumps to its successors as prescribed by the LTL control-flow graph. However, branches are not necessary if the fall-through behaviour of LTLin instructions already implements the desired flow of control. For instance, - consider the two LTL instructions + consider the two LTL blocks << - L1: Lop op args res L2 + L1: Lop op args res; Lbranch L2 L2: ... >> If the instructions [L1] and [L2] are laid out consecutively in the LTLin - code, we can generate the following LTLin code: + code, we can generate the following Linear code: << L1: Lop op args res L2: ... @@ -65,7 +64,7 @@ Open Scope error_monad_scope. - Choosing an order for the nodes. This returns an enumeration of CFG nodes stating that they must be laid out in the order shown in the list. -- Generate LTLin code where each node branches explicitly to its +- Generate Linear code where each node branches explicitly to its successors, except if one of these successors is the immediately following instruction. @@ -102,7 +101,7 @@ Definition reachable (f: LTL.function) : PMap.t bool := | Some rs => rs end. -(** We then enumerate the nodes of reachable instructions. +(** We then enumerate the nodes of reachable blocks. This task is performed by external, untrusted Caml code. *) Parameter enumerate_aux: LTL.function -> PMap.t bool -> list node. @@ -126,7 +125,7 @@ Fixpoint nodeset_of_list (l: list node) (s: Nodeset.t) Definition check_reachable_aux (reach: PMap.t bool) (s: Nodeset.t) - (ok: bool) (pc: node) (i: LTL.instruction) : bool := + (ok: bool) (pc: node) (bb: LTL.bblock) : bool := if reach!!pc then ok && Nodeset.mem pc s else ok. Definition check_reachable @@ -141,10 +140,10 @@ Definition enumerate (f: LTL.function) : res (list node) := then OK enum else Error (msg "Linearize: wrong enumeration"). -(** * Translation from LTL to LTLin *) +(** * Translation from LTL to Linear *) (** We now flatten the structure of the CFG graph, laying out - LTL instructions consecutively in the order computed by [enumerate], + LTL blocks consecutively in the order computed by [enumerate], and inserting branches to the labels of sucessors if necessary. Whether to insert a branch or not is determined by the [starts_with] function below. @@ -169,31 +168,38 @@ Fixpoint starts_with (lbl: label) (k: code) {struct k} : bool := Definition add_branch (s: label) (k: code) : code := if starts_with s k then k else Lgoto s :: k. -Definition linearize_instr (b: LTL.instruction) (k: code) : code := +Fixpoint linearize_block (b: LTL.bblock) (k: code) : code := match b with - | LTL.Lnop s => + | nil => k + | LTL.Lop op args res :: b' => + Lop op args res :: linearize_block b' k + | LTL.Lload chunk addr args dst :: b' => + Lload chunk addr args dst :: linearize_block b' k + | LTL.Lgetstack sl ofs ty dst :: b' => + Lgetstack sl ofs ty dst :: linearize_block b' k + | LTL.Lsetstack src sl ofs ty :: b' => + Lsetstack src sl ofs ty :: linearize_block b' k + | LTL.Lstore chunk addr args src :: b' => + Lstore chunk addr args src :: linearize_block b' k + | LTL.Lcall sig ros :: b' => + Lcall sig ros :: linearize_block b' k + | LTL.Ltailcall sig ros :: b' => + Ltailcall sig ros :: k + | LTL.Lbuiltin ef args res :: b' => + Lbuiltin ef args res :: linearize_block b' k + | LTL.Lannot ef args :: b' => + Lannot ef args :: linearize_block b' k + | LTL.Lbranch s :: b' => add_branch s k - | LTL.Lop op args res s => - Lop op args res :: add_branch s k - | LTL.Lload chunk addr args dst s => - Lload chunk addr args dst :: add_branch s k - | LTL.Lstore chunk addr args src s => - Lstore chunk addr args src :: add_branch s k - | LTL.Lcall sig ros args res s => - Lcall sig ros args res :: add_branch s k - | LTL.Ltailcall sig ros args => - Ltailcall sig ros args :: k - | LTL.Lbuiltin ef args res s => - Lbuiltin ef args res :: add_branch s k - | LTL.Lcond cond args s1 s2 => + | LTL.Lcond cond args s1 s2 :: b' => if starts_with s1 k then Lcond (negate_condition cond) args s2 :: add_branch s1 k else Lcond cond args s1 :: add_branch s2 k - | LTL.Ljumptable arg tbl => + | LTL.Ljumptable arg tbl :: b' => Ljumptable arg tbl :: k - | LTL.Lreturn or => - Lreturn or :: k + | LTL.Lreturn :: b' => + Lreturn :: k end. (** Linearize a function body according to an enumeration of its nodes. *) @@ -201,7 +207,7 @@ Definition linearize_instr (b: LTL.instruction) (k: code) : code := Definition linearize_node (f: LTL.function) (pc: node) (k: code) : code := match f.(LTL.fn_code)!pc with | None => k - | Some b => Llabel pc :: linearize_instr b k + | Some b => Llabel pc :: linearize_block b k end. Definition linearize_body (f: LTL.function) (enum: list node) : code := @@ -209,16 +215,15 @@ Definition linearize_body (f: LTL.function) (enum: list node) : code := (** * Entry points for code linearization *) -Definition transf_function (f: LTL.function) : res LTLin.function := +Definition transf_function (f: LTL.function) : res Linear.function := do enum <- enumerate f; OK (mkfunction (LTL.fn_sig f) - (LTL.fn_params f) (LTL.fn_stacksize f) (add_branch (LTL.fn_entrypoint f) (linearize_body f enum))). -Definition transf_fundef (f: LTL.fundef) : res LTLin.fundef := +Definition transf_fundef (f: LTL.fundef) : res Linear.fundef := AST.transf_partial_fundef transf_function f. -Definition transf_program (p: LTL.program) : res LTLin.program := +Definition transf_program (p: LTL.program) : res Linear.program := transform_partial_program transf_fundef p. diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml index ac47ae8..5bb5838 100644 --- a/backend/Linearizeaux.ml +++ b/backend/Linearizeaux.ml @@ -47,8 +47,12 @@ let join_points f = reached_twice := IntSet.add npc !reached_twice end else begin reached := IntSet.add npc !reached; - List.iter traverse (Kildall.successors_list succs pc) + traverse_succs (Kildall.successors_list succs pc) end + and traverse_succs = function + | [] -> () + | [pc] -> traverse pc + | pc :: l -> traverse pc; traverse_succs l in traverse f.fn_entrypoint; !reached_twice (* Cut into reachable basic blocks, annotated with the min value of the PC *) @@ -73,20 +77,18 @@ let basic_blocks f joins = let minpc = min npc minpc in match PTree.get pc f.fn_code with | None -> assert false - | Some i -> - match i with - | Lnop s -> next_in_block blk minpc s - | Lop (op, args, res, s) -> next_in_block blk minpc s - | Lload (chunk, addr, args, dst, s) -> next_in_block blk minpc s - | Lstore (chunk, addr, args, src, s) -> next_in_block blk minpc s - | Lcall (sig0, ros, args, res, s) -> next_in_block blk minpc s - | Ltailcall (sig0, ros, args) -> end_block blk minpc - | Lbuiltin (ef, args, res, s) -> next_in_block blk minpc s - | Lcond (cond, args, ifso, ifnot) -> + | Some b -> + let rec do_instr_list = function + | [] -> assert false + | Lbranch s :: _ -> next_in_block blk minpc s + | Ltailcall (sig0, ros) :: _ -> end_block blk minpc + | Lcond (cond, args, ifso, ifnot) :: _ -> end_block blk minpc; start_block ifso; start_block ifnot - | Ljumptable(arg, tbl) -> + | Ljumptable(arg, tbl) :: _ -> end_block blk minpc; List.iter start_block tbl - | Lreturn optarg -> end_block blk minpc + | Lreturn :: _ -> end_block blk minpc + | instr :: b' -> do_instr_list b' in + do_instr_list b (* next_in_block: check if join point and either extend block or start block *) and next_in_block blk minpc pc = diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v index d368311..2548580 100644 --- a/backend/Linearizeproof.v +++ b/backend/Linearizeproof.v @@ -12,10 +12,11 @@ (** Correctness proof for code linearization *) +Require Import FSets. Require Import Coqlib. Require Import Maps. Require Import Ordered. -Require Import FSets. +Require Import Lattice. Require Import AST. Require Import Integers. Require Import Values. @@ -27,17 +28,15 @@ Require Import Smallstep. Require Import Op. Require Import Locations. Require Import LTL. -Require Import LTLtyping. -Require Import LTLin. +Require Import Linear. Require Import Linearize. -Require Import Lattice. Module NodesetFacts := FSetFacts.Facts(Nodeset). Section LINEARIZATION. Variable prog: LTL.program. -Variable tprog: LTLin.program. +Variable tprog: Linear.program. Hypothesis TRANSF: transf_program prog = OK tprog. @@ -70,7 +69,7 @@ Proof (Genv.find_var_info_transf_partial transf_fundef _ TRANSF). Lemma sig_preserved: forall f tf, transf_fundef f = OK tf -> - LTLin.funsig tf = LTL.funsig f. + Linear.funsig tf = LTL.funsig f. Proof. unfold transf_fundef, transf_partial_fundef; intros. destruct f. monadInv H. monadInv EQ. reflexivity. @@ -80,7 +79,7 @@ Qed. Lemma stacksize_preserved: forall f tf, transf_function f = OK tf -> - LTLin.fn_stacksize tf = LTL.fn_stacksize f. + Linear.fn_stacksize tf = LTL.fn_stacksize f. Proof. intros. monadInv H. auto. Qed. @@ -117,8 +116,8 @@ Qed. (** The successors of a reachable instruction are reachable. *) Lemma reachable_successors: - forall f pc pc' i, - f.(LTL.fn_code)!pc = Some i -> In pc' (successors_instr i) -> + forall f pc pc' b, + f.(LTL.fn_code)!pc = Some b -> In pc' (successors_block b) -> (reachable f)!!pc = true -> (reachable f)!!pc' = true. Proof. @@ -222,7 +221,7 @@ Qed. (** * Properties related to labels *) -(** If labels are globally unique and the LTLin code [c] contains +(** If labels are globally unique and the Linear code [c] contains a subsequence [Llabel lbl :: c1], then [find_label lbl c] returns [c1]. *) @@ -284,13 +283,13 @@ Proof. intros. unfold add_branch. destruct (starts_with s k); auto. Qed. -Lemma find_label_lin_instr: +Lemma find_label_lin_block: forall lbl k b, - find_label lbl (linearize_instr b k) = find_label lbl k. + find_label lbl (linearize_block b k) = find_label lbl k. Proof. intros lbl k. generalize (find_label_add_branch lbl k); intro. - induction b; simpl; auto. - case (starts_with n k); simpl; auto. + induction b; simpl; auto. destruct a; simpl; auto. + case (starts_with s1 k); simpl; auto. Qed. Remark linearize_body_cons: @@ -298,7 +297,7 @@ Remark linearize_body_cons: linearize_body f (pc :: enum) = match f.(LTL.fn_code)!pc with | None => linearize_body f enum - | Some b => Llabel pc :: linearize_instr b (linearize_body f enum) + | Some b => Llabel pc :: linearize_block b (linearize_body f enum) end. Proof. intros. unfold linearize_body. rewrite list_fold_right_eq. @@ -309,7 +308,7 @@ Lemma find_label_lin_rec: forall f enum pc b, In pc enum -> f.(LTL.fn_code)!pc = Some b -> - exists k, find_label pc (linearize_body f enum) = Some (linearize_instr b k). + exists k, find_label pc (linearize_body f enum) = Some (linearize_block b k). Proof. induction enum; intros. elim H. @@ -320,7 +319,7 @@ Proof. assert (In pc enum). simpl in H. tauto. destruct (IHenum pc b H1 H0) as [k FIND]. exists k. destruct (LTL.fn_code f)!a. - simpl. rewrite peq_false. rewrite find_label_lin_instr. auto. auto. + simpl. rewrite peq_false. rewrite find_label_lin_block. auto. auto. auto. Qed. @@ -330,7 +329,7 @@ Lemma find_label_lin: f.(LTL.fn_code)!pc = Some b -> (reachable f)!!pc = true -> exists k, - find_label pc (fn_code tf) = Some (linearize_instr b k). + find_label pc (fn_code tf) = Some (linearize_block b k). Proof. intros. monadInv H. simpl. rewrite find_label_add_branch. apply find_label_lin_rec. @@ -343,25 +342,12 @@ Lemma find_label_lin_inv: f.(LTL.fn_code)!pc = Some b -> (reachable f)!!pc = true -> find_label pc (fn_code tf) = Some k -> - exists k', k = linearize_instr b k'. + exists k', k = linearize_block b k'. Proof. intros. exploit find_label_lin; eauto. intros [k' FIND]. exists k'. congruence. Qed. -Lemma find_label_lin_succ: - forall f tf s, - transf_function f = OK tf -> - valid_successor f s -> - (reachable f)!!s = true -> - exists k, - find_label s (fn_code tf) = Some k. -Proof. - intros. destruct H0 as [i AT]. - exploit find_label_lin; eauto. intros [k FIND]. - exists (linearize_instr i k); auto. -Qed. - (** Unique label property for linearized code. *) Lemma label_in_add_branch: @@ -372,16 +358,16 @@ Proof. destruct (starts_with s k); simpl; intuition congruence. Qed. -Lemma label_in_lin_instr: +Lemma label_in_lin_block: forall lbl k b, - In (Llabel lbl) (linearize_instr b k) -> In (Llabel lbl) k. + In (Llabel lbl) (linearize_block b k) -> In (Llabel lbl) k. Proof. - induction b; simpl; intros; - try (apply label_in_add_branch with n; intuition congruence); - try (intuition congruence). - destruct (starts_with n k); simpl in H. - apply label_in_add_branch with n; intuition congruence. - apply label_in_add_branch with n0; intuition congruence. + induction b; simpl; intros. auto. + destruct a; simpl in H; try (intuition congruence). + apply label_in_add_branch with s; intuition congruence. + destruct (starts_with s1 k); simpl in H. + apply label_in_add_branch with s1; intuition congruence. + apply label_in_add_branch with s2; intuition congruence. Qed. Lemma label_in_lin_rec: @@ -392,7 +378,7 @@ Proof. simpl; auto. rewrite linearize_body_cons. destruct (LTL.fn_code f)!a. simpl. intros [A|B]. left; congruence. - right. apply IHenum. eapply label_in_lin_instr; eauto. + right. apply IHenum. eapply label_in_lin_block; eauto. intro; right; auto. Qed. @@ -404,12 +390,13 @@ Proof. destruct (starts_with lbl k); simpl; intuition. Qed. -Lemma unique_labels_lin_instr: +Lemma unique_labels_lin_block: forall k b, - unique_labels k -> unique_labels (linearize_instr b k). + unique_labels k -> unique_labels (linearize_block b k). Proof. - induction b; intro; simpl; auto; try (apply unique_labels_add_branch; auto). - case (starts_with n k); simpl; apply unique_labels_add_branch; auto. + induction b; intros; simpl. auto. + destruct a; auto; try (apply unique_labels_add_branch; auto). + case (starts_with s1 k); simpl; apply unique_labels_add_branch; auto. Qed. Lemma unique_labels_lin_rec: @@ -423,8 +410,8 @@ Proof. intro. destruct (LTL.fn_code f)!a. simpl. split. red. intro. inversion H. elim H3. apply label_in_lin_rec with f. - apply label_in_lin_instr with i. auto. - apply unique_labels_lin_instr. apply IHenum. inversion H; auto. + apply label_in_lin_block with b. auto. + apply unique_labels_lin_block. apply IHenum. inversion H; auto. apply IHenum. inversion H; auto. Qed. @@ -458,6 +445,17 @@ Proof. auto. eauto with coqlib. Qed. +Lemma is_tail_lin_block: + forall b c1 c2, + is_tail (linearize_block b c1) c2 -> is_tail c1 c2. +Proof. + induction b; simpl; intros. + auto. + destruct a; eauto with coqlib. + eapply is_tail_add_branch; eauto. + destruct (starts_with s1 c1); eapply is_tail_add_branch; eauto with coqlib. +Qed. + Lemma add_branch_correct: forall lbl c k s f tf sp ls m, transf_function f = OK tf -> @@ -475,12 +473,11 @@ Qed. (** * Correctness of linearization *) -(** The proof of semantic preservation is a simulation argument - based on diagrams of the following form: +(** The proof of semantic preservation is a simulation argument of the "star" kind: << st1 --------------- st2 | | - t| +|t + t| t| + or ( 0 \/ |st1'| < |st1| ) | | v v st1'--------------- st2' @@ -492,273 +489,260 @@ Qed. control-flow graph, the transformed state is at a code sequence [c] that starts with the label [pc]. *) -Inductive match_stackframes: LTL.stackframe -> LTLin.stackframe -> Prop := +Inductive match_stackframes: LTL.stackframe -> Linear.stackframe -> Prop := | match_stackframe_intro: - forall res f sp pc ls tf c, + forall f sp bb ls tf c, transf_function f = OK tf -> - (reachable f)!!pc = true -> - valid_successor f pc -> - is_tail c (fn_code tf) -> - wt_function f -> + (forall pc, In pc (successors_block bb) -> (reachable f)!!pc = true) -> + is_tail c tf.(fn_code) -> match_stackframes - (LTL.Stackframe res f sp ls pc) - (LTLin.Stackframe res tf sp ls (add_branch pc c)). + (LTL.Stackframe f sp ls bb) + (Linear.Stackframe tf sp ls (linearize_block bb c)). -Inductive match_states: LTL.state -> LTLin.state -> Prop := - | match_states_intro: +Inductive match_states: LTL.state -> Linear.state -> Prop := + | match_states_add_branch: forall s f sp pc ls m tf ts c (STACKS: list_forall2 match_stackframes s ts) (TRF: transf_function f = OK tf) (REACH: (reachable f)!!pc = true) - (AT: find_label pc (fn_code tf) = Some c) - (WTF: wt_function f), + (TAIL: is_tail c tf.(fn_code)), match_states (LTL.State s f sp pc ls m) - (LTLin.State ts tf sp c ls m) + (Linear.State ts tf sp (add_branch pc c) ls m) + | match_states_cond_taken: + forall s f sp pc ls m tf ts cond args c + (STACKS: list_forall2 match_stackframes s ts) + (TRF: transf_function f = OK tf) + (REACH: (reachable f)!!pc = true) + (JUMP: eval_condition cond (reglist ls args) m = Some true), + match_states (LTL.State s f sp pc (undef_regs (destroyed_by_cond cond) ls) m) + (Linear.State ts tf sp (Lcond cond args pc :: c) ls m) + | match_states_jumptable: + forall s f sp pc ls m tf ts arg tbl c n + (STACKS: list_forall2 match_stackframes s ts) + (TRF: transf_function f = OK tf) + (REACH: (reachable f)!!pc = true) + (ARG: ls (R arg) = Vint n) + (JUMP: list_nth_z tbl (Int.unsigned n) = Some pc), + match_states (LTL.State s f sp pc (undef_regs destroyed_by_jumptable ls) m) + (Linear.State ts tf sp (Ljumptable arg tbl :: c) ls m) + | match_states_block: + forall s f sp bb ls m tf ts c + (STACKS: list_forall2 match_stackframes s ts) + (TRF: transf_function f = OK tf) + (REACH: forall pc, In pc (successors_block bb) -> (reachable f)!!pc = true) + (TAIL: is_tail c tf.(fn_code)), + match_states (LTL.Block s f sp bb ls m) + (Linear.State ts tf sp (linearize_block bb c) ls m) | match_states_call: forall s f ls m tf ts, list_forall2 match_stackframes s ts -> transf_fundef f = OK tf -> - wt_fundef f -> match_states (LTL.Callstate s f ls m) - (LTLin.Callstate ts tf ls m) + (Linear.Callstate ts tf ls m) | match_states_return: forall s ls m ts, list_forall2 match_stackframes s ts -> match_states (LTL.Returnstate s ls m) - (LTLin.Returnstate ts ls m). + (Linear.Returnstate ts ls m). -Hypothesis wt_prog: wt_program prog. +Definition measure (S: LTL.state) : nat := + match S with + | LTL.State s f sp pc ls m => 0%nat + | LTL.Block s f sp bb ls m => 1%nat + | _ => 0%nat + end. + +Remark match_parent_locset: + forall s ts, list_forall2 match_stackframes s ts -> parent_locset ts = LTL.parent_locset s. +Proof. + induction 1; simpl. auto. inv H; auto. +Qed. Theorem transf_step_correct: forall s1 t s2, LTL.step ge s1 t s2 -> forall s1' (MS: match_states s1 s1'), - exists s2', plus LTLin.step tge s1' t s2' /\ match_states s2 s2'. -Proof. - induction 1; intros; try (inv MS); - try (generalize (wt_instrs _ WTF _ _ H); intro WTI). - (* Lnop *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT']. - econstructor; split. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_find_label. eauto. + (exists s2', plus Linear.step tge s1' t s2' /\ match_states s2 s2') + \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat. +Proof. + induction 1; intros; try (inv MS). + + (* start of block, at an [add_branch] *) + exploit find_label_lin; eauto. intros [k F]. + left; econstructor; split. + eapply add_branch_correct; eauto. + econstructor; eauto. + intros; eapply reachable_successors; eauto. + eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto. + + (* start of block, target of an [Lcond] *) + exploit find_label_lin; eauto. intros [k F]. + left; econstructor; split. + apply plus_one. eapply exec_Lcond_true; eauto. + econstructor; eauto. + intros; eapply reachable_successors; eauto. + eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto. + + (* start of block, target of an [Ljumptable] *) + exploit find_label_lin; eauto. intros [k F]. + left; econstructor; split. + apply plus_one. eapply exec_Ljumptable; eauto. econstructor; eauto. + intros; eapply reachable_successors; eauto. + eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto. + (* Lop *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT']. - econstructor; split. - eapply plus_left'. - eapply exec_Lop with (v := v); eauto. - rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - traceEq. - econstructor; eauto. + left; econstructor; split. simpl. + apply plus_one. econstructor; eauto. + instantiate (1 := v); rewrite <- H; apply eval_operation_preserved. + exact symbols_preserved. + econstructor; eauto. + (* Lload *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT']. - econstructor; split. - eapply plus_left'. - apply exec_Lload with a. - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eauto. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - traceEq. + left; econstructor; split. simpl. + apply plus_one. econstructor. + instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved. + exact symbols_preserved. eauto. eauto. + econstructor; eauto. + + (* Lgetstack *) + left; econstructor; split. simpl. + apply plus_one. econstructor; eauto. + econstructor; eauto. + + (* Lsetstack *) + left; econstructor; split. simpl. + apply plus_one. econstructor; eauto. econstructor; eauto. + (* Lstore *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT']. - econstructor; split. - eapply plus_left'. - apply exec_Lstore with a. - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eauto. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - traceEq. + left; econstructor; split. simpl. + apply plus_one. econstructor. + instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved. + exact symbols_preserved. eauto. eauto. econstructor; eauto. + (* Lcall *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl; auto. - assert (VALID: valid_successor f pc'). inv WTI; auto. - exploit find_function_translated; eauto. intros [tf' [A B]]. - econstructor; split. - apply plus_one. eapply exec_Lcall with (f' := tf'); eauto. - symmetry; apply sig_preserved; auto. - econstructor; eauto. - constructor; auto. econstructor; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - destruct ros; simpl in H0. - eapply Genv.find_funct_prop; eauto. - destruct (Genv.find_symbol ge i); try discriminate. - eapply Genv.find_funct_ptr_prop; eauto. + exploit find_function_translated; eauto. intros [tfd [A B]]. + left; econstructor; split. simpl. + apply plus_one. econstructor; eauto. + symmetry; eapply sig_preserved; eauto. + econstructor; eauto. constructor; auto. econstructor; eauto. (* Ltailcall *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - exploit find_function_translated; eauto. intros [tf' [A B]]. - econstructor; split. - apply plus_one. eapply exec_Ltailcall with (f' := tf'); eauto. - symmetry; apply sig_preserved; auto. - rewrite (stacksize_preserved _ _ TRF). eauto. + exploit find_function_translated; eauto. intros [tfd [A B]]. + left; econstructor; split. simpl. + apply plus_one. econstructor; eauto. + rewrite (match_parent_locset _ _ STACKS). eauto. + symmetry; eapply sig_preserved; eauto. + rewrite (stacksize_preserved _ _ TRF); eauto. + rewrite (match_parent_locset _ _ STACKS). econstructor; eauto. - destruct ros; simpl in H0. - eapply Genv.find_funct_prop; eauto. - destruct (Genv.find_symbol ge i); try discriminate. - eapply Genv.find_funct_ptr_prop; eauto. (* Lbuiltin *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT']. - econstructor; split. - eapply plus_left'. - eapply exec_Lbuiltin. - eapply external_call_symbols_preserved; eauto. + left; econstructor; split. simpl. + apply plus_one. eapply exec_Lbuiltin; eauto. + eapply external_call_symbols_preserved'; eauto. + exact symbols_preserved. exact varinfo_preserved. + econstructor; eauto. + + (* Lannot *) + left; econstructor; split. simpl. + apply plus_one. eapply exec_Lannot; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - traceEq. econstructor; eauto. + (* Lbranch *) + assert ((reachable f)!!pc = true). apply REACH; simpl; auto. + right; split. simpl; omega. split. auto. simpl. econstructor; eauto. + (* Lcond *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. + assert (REACH1: (reachable f)!!pc1 = true) by (apply REACH; simpl; auto). + assert (REACH2: (reachable f)!!pc2 = true) by (apply REACH; simpl; auto). + simpl linearize_block. + destruct (starts_with pc1 c). + (* branch if cond is false *) + assert (DC: destroyed_by_cond (negate_condition cond) = destroyed_by_cond cond). + destruct cond; reflexivity. destruct b. - (* true *) - assert (REACH': (reachable f)!!ifso = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; eauto. intros [c'' AT']. - destruct (starts_with ifso c'). - econstructor; split. - eapply plus_left'. - eapply exec_Lcond_false; eauto. - rewrite eval_negate_condition; rewrite H0; auto. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - traceEq. - econstructor; eauto. - econstructor; split. - apply plus_one. eapply exec_Lcond_true; eauto. - econstructor; eauto. - (* false *) - assert (REACH': (reachable f)!!ifnot = true). - eapply reachable_successors; eauto. simpl; auto. - exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT']. - destruct (starts_with ifso c'). - econstructor; split. - apply plus_one. eapply exec_Lcond_true; eauto. - rewrite eval_negate_condition; rewrite H0; auto. - econstructor; eauto. - econstructor; split. - eapply plus_left'. - eapply exec_Lcond_false; eauto. - eapply add_branch_correct; eauto. - eapply is_tail_add_branch. eapply is_tail_cons_left. - eapply is_tail_find_label. eauto. - traceEq. + (* cond is true: no branch *) + left; econstructor; split. + apply plus_one. eapply exec_Lcond_false. + rewrite eval_negate_condition. rewrite H. auto. eauto. + rewrite DC. econstructor; eauto. + (* cond is false: branch is taken *) + right; split. simpl; omega. split. auto. rewrite <- DC. econstructor; eauto. + rewrite eval_negate_condition. rewrite H. auto. + (* branch if cond is true *) + destruct b. + (* cond is true: branch is taken *) + right; split. simpl; omega. split. auto. econstructor; eauto. + (* cond is false: no branch *) + left; econstructor; split. + apply plus_one. eapply exec_Lcond_false. eauto. eauto. econstructor; eauto. (* Ljumptable *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - assert (REACH': (reachable f)!!pc' = true). - eapply reachable_successors; eauto. simpl. eapply list_nth_z_in; eauto. - exploit find_label_lin_succ; eauto. - inv WTI. apply H6. eapply list_nth_z_in; eauto. - intros [c'' AT']. - econstructor; split. - apply plus_one. eapply exec_Ljumptable; eauto. - econstructor; eauto. + assert (REACH': (reachable f)!!pc = true). + apply REACH. simpl. eapply list_nth_z_in; eauto. + right; split. simpl; omega. split. auto. econstructor; eauto. (* Lreturn *) - destruct (find_label_lin_inv _ _ _ _ _ TRF H REACH AT) as [c' EQ]. - simpl in EQ. subst c. - econstructor; split. - apply plus_one. eapply exec_Lreturn; eauto. + left; econstructor; split. + simpl. apply plus_one. econstructor; eauto. rewrite (stacksize_preserved _ _ TRF). eauto. - econstructor; eauto. - - (* internal function *) + rewrite (match_parent_locset _ _ STACKS). econstructor; eauto. + + (* internal functions *) assert (REACH: (reachable f)!!(LTL.fn_entrypoint f) = true). apply reachable_entrypoint. - inv H7. monadInv H6. - exploit find_label_lin_succ; eauto. inv H1; auto. intros [c'' AT']. - generalize EQ; intro. monadInv EQ0. econstructor; simpl; split. - eapply plus_left'. - eapply exec_function_internal; eauto. - simpl. eapply add_branch_correct. eauto. - simpl. eapply is_tail_add_branch. constructor. eauto. - traceEq. - econstructor; eauto. + monadInv H7. + left; econstructor; split. + apply plus_one. eapply exec_function_internal; eauto. + rewrite (stacksize_preserved _ _ EQ). eauto. + generalize EQ; intro EQ'; monadInv EQ'. simpl. + econstructor; eauto. simpl. eapply is_tail_add_branch. constructor. (* external function *) - monadInv H6. econstructor; split. + monadInv H8. left; econstructor; split. apply plus_one. eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. (* return *) inv H3. inv H1. - exploit find_label_lin_succ; eauto. intros [c' AT]. - econstructor; split. - eapply plus_left'. - eapply exec_return; eauto. - eapply add_branch_correct; eauto. traceEq. - econstructor; eauto. + left; econstructor; split. + apply plus_one. econstructor. + econstructor; eauto. Qed. Lemma transf_initial_states: forall st1, LTL.initial_state prog st1 -> - exists st2, LTLin.initial_state tprog st2 /\ match_states st1 st2. + exists st2, Linear.initial_state tprog st2 /\ match_states st1 st2. Proof. intros. inversion H. exploit function_ptr_translated; eauto. intros [tf [A B]]. - exists (Callstate nil tf nil m0); split. + exists (Callstate nil tf (Locmap.init Vundef) m0); split. econstructor; eauto. eapply Genv.init_mem_transf_partial; eauto. replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto. symmetry. apply (transform_partial_program_main transf_fundef _ TRANSF). rewrite <- H3. apply sig_preserved. auto. constructor. constructor. auto. - eapply Genv.find_funct_ptr_prop; eauto. Qed. Lemma transf_final_states: forall st1 st2 r, - match_states st1 st2 -> LTL.final_state st1 r -> LTLin.final_state st2 r. + match_states st1 st2 -> LTL.final_state st1 r -> Linear.final_state st2 r. Proof. - intros. inv H0. inv H. inv H4. constructor. + intros. inv H0. inv H. inv H6. econstructor; eauto. Qed. Theorem transf_program_correct: - forward_simulation (LTL.semantics prog) (LTLin.semantics tprog). + forward_simulation (LTL.semantics prog) (Linear.semantics tprog). Proof. - eapply forward_simulation_plus. + eapply forward_simulation_star. eexact symbols_preserved. eexact transf_initial_states. eexact transf_final_states. diff --git a/backend/Linearizetyping.v b/backend/Linearizetyping.v deleted file mode 100644 index b4e25de..0000000 --- a/backend/Linearizetyping.v +++ /dev/null @@ -1,112 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Type preservation for the Linearize pass *) - -Require Import Coqlib. -Require Import Maps. -Require Import Errors. -Require Import AST. -Require Import Op. -Require Import Locations. -Require Import LTL. -Require Import LTLtyping. -Require Import LTLin. -Require Import Linearize. -Require Import LTLintyping. -Require Import Conventions. - -(** * Type preservation for the linearization pass *) - -Lemma wt_add_instr: - forall f i k, wt_instr f i -> wt_code f k -> wt_code f (i :: k). -Proof. - intros; red; intros. elim H1; intro. subst i0; auto. auto. -Qed. - -Lemma wt_add_branch: - forall f s k, wt_code f k -> wt_code f (add_branch s k). -Proof. - intros; unfold add_branch. destruct (starts_with s k). - auto. apply wt_add_instr; auto. constructor. -Qed. - -Lemma wt_linearize_instr: - forall f instr, - LTLtyping.wt_instr f instr -> - forall k, - wt_code f.(LTL.fn_sig) k -> - wt_code f.(LTL.fn_sig) (linearize_instr instr k). -Proof. - induction 1; simpl; intros. - apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. auto. - apply wt_add_instr. constructor; auto. apply wt_add_branch; auto. - destruct (starts_with s1 k); apply wt_add_instr. - constructor; auto. rewrite H. destruct cond; auto. - apply wt_add_branch; auto. - constructor; auto. apply wt_add_branch; auto. - apply wt_add_instr. constructor; auto. auto. - apply wt_add_instr. constructor; auto. auto. -Qed. - -Lemma wt_linearize_body: - forall f, - (forall pc instr, - f.(LTL.fn_code)!pc = Some instr -> LTLtyping.wt_instr f instr) -> - forall enum, - wt_code f.(LTL.fn_sig) (linearize_body f enum). -Proof. - unfold linearize_body; induction enum; rewrite list_fold_right_eq. - red; simpl; intros; contradiction. - unfold linearize_node. caseEq ((LTL.fn_code f)!a); intros. - apply wt_add_instr. constructor. apply wt_linearize_instr; eauto with coqlib. - auto. -Qed. - -Lemma wt_transf_function: - forall f tf, - LTLtyping.wt_function f -> - transf_function f = OK tf -> - wt_function tf. -Proof. - intros. inv H. monadInv H0. constructor; auto. - simpl. apply wt_add_branch. - apply wt_linearize_body. auto. -Qed. - -Lemma wt_transf_fundef: - forall f tf, - LTLtyping.wt_fundef f -> - transf_fundef f = OK tf -> - wt_fundef tf. -Proof. - induction 1; intros. monadInv H. constructor. - monadInv H0. constructor; eapply wt_transf_function; eauto. -Qed. - -Lemma program_typing_preserved: - forall (p: LTL.program) (tp: LTLin.program), - LTLtyping.wt_program p -> - transf_program p = OK tp -> - LTLintyping.wt_program tp. -Proof. - intros; red; intros. - generalize (transform_partial_program_function transf_fundef _ _ _ H0 H1). - intros [f0 [IN TR]]. - eapply wt_transf_fundef; eauto. -Qed. diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v index 32d6045..c51db6f 100644 --- a/backend/Lineartyping.v +++ b/backend/Lineartyping.v @@ -16,124 +16,87 @@ Require Import Coqlib. Require Import AST. Require Import Integers. Require Import Values. +Require Import Events. Require Import Op. Require Import Locations. +Require Import Conventions. Require Import LTL. Require Import Linear. -Require Import Conventions. -(** The typing rules for Linear are similar to those for LTLin: we check - that instructions receive the right number of arguments, - and that the types of the argument and result registers agree - with what the instruction expects. Moreover, we also - enforces some correctness conditions on the offsets of stack slots - accessed through [Lgetstack] and [Lsetstack] Linear instructions. *) +(** The typing rules for Linear enforce several properties useful for + the proof of the [Stacking] pass: +- for each instruction, the type of the result register or slot + agrees with the type of values produced by the instruction; +- correctness conditions on the offsets of stack slots + accessed through [Lgetstack] and [Lsetstack] Linear instructions. +*) + +(** The rules are presented as boolean-valued functions so that we + get an executable type-checker for free. *) Section WT_INSTR. Variable funct: function. -Definition slot_valid (s: slot) := - match s with - | Local ofs ty => 0 <= ofs - | Outgoing ofs ty => 0 <= ofs - | Incoming ofs ty => In (S s) (loc_parameters funct.(fn_sig)) +Definition slot_valid (sl: slot) (ofs: Z) (ty: typ): bool := + match sl with + | Local => zle 0 ofs + | Outgoing => zle 0 ofs + | Incoming => In_dec Loc.eq (S Incoming ofs ty) (loc_parameters funct.(fn_sig)) + end && + match ty with + | Tint | Tfloat => true + | Tlong => false end. -Definition slot_writable (s: slot) := - match s with - | Local ofs ty => True - | Outgoing ofs ty => True - | Incoming ofs ty => False +Definition slot_writable (sl: slot) : bool := + match sl with + | Local => true + | Outgoing => true + | Incoming => false end. -Inductive wt_instr : instruction -> Prop := - | wt_Lgetstack: - forall s r, - slot_type s = mreg_type r -> - slot_valid s -> - wt_instr (Lgetstack s r) - | wt_Lsetstack: - forall s r, - slot_type s = mreg_type r -> - slot_valid s -> slot_writable s -> - wt_instr (Lsetstack r s) - | wt_Lopmove: - forall r1 r, - mreg_type r1 = mreg_type r -> - wt_instr (Lop Omove (r1 :: nil) r) - | wt_Lop: - forall op args res, - op <> Omove -> - (List.map mreg_type args, mreg_type res) = type_of_operation op -> - wt_instr (Lop op args res) - | wt_Lload: - forall chunk addr args dst, - List.map mreg_type args = type_of_addressing addr -> - mreg_type dst = type_of_chunk chunk -> - wt_instr (Lload chunk addr args dst) - | wt_Lstore: - forall chunk addr args src, - List.map mreg_type args = type_of_addressing addr -> - mreg_type src = type_of_chunk chunk -> - wt_instr (Lstore chunk addr args src) - | wt_Lcall: - forall sig ros, - match ros with inl r => mreg_type r = Tint | _ => True end -> - wt_instr (Lcall sig ros) - | wt_Ltailcall: - forall sig ros, - tailcall_possible sig -> - match ros with inl r => r = IT1 | _ => True end -> - wt_instr (Ltailcall sig ros) - | wt_Lbuiltin: - forall ef args res, - List.map mreg_type args = (ef_sig ef).(sig_args) -> - mreg_type res = proj_sig_res (ef_sig ef) -> - arity_ok (ef_sig ef).(sig_args) = true -> - wt_instr (Lbuiltin ef args res) - | wt_Lannot: - forall ef args, - List.map Loc.type args = (ef_sig ef).(sig_args) -> - ef_reloads ef = false -> - locs_acceptable args -> - wt_instr (Lannot ef args) - | wt_Llabel: - forall lbl, - wt_instr (Llabel lbl) - | wt_Lgoto: - forall lbl, - wt_instr (Lgoto lbl) - | wt_Lcond: - forall cond args lbl, - List.map mreg_type args = type_of_condition cond -> - wt_instr (Lcond cond args lbl) - | wt_Ljumptable: - forall arg tbl, - mreg_type arg = Tint -> - list_length_z tbl * 4 <= Int.max_unsigned -> - wt_instr (Ljumptable arg tbl) - | wt_Lreturn: - wt_instr (Lreturn). +Definition loc_valid (l: loc) : bool := + match l with + | R r => true + | S Local ofs ty => slot_valid Local ofs ty + | S _ _ _ => false + end. + +Definition wt_instr (i: instruction) : bool := + match i with + | Lgetstack sl ofs ty r => + typ_eq ty (mreg_type r) && slot_valid sl ofs ty + | Lsetstack r sl ofs ty => + typ_eq ty (mreg_type r) && slot_valid sl ofs ty && slot_writable sl + | Lop op args res => + match is_move_operation op args with + | Some arg => + typ_eq (mreg_type arg) (mreg_type res) + | None => + let (targs, tres) := type_of_operation op in + typ_eq (mreg_type res) tres + end + | Lload chunk addr args dst => + typ_eq (mreg_type dst) (type_of_chunk chunk) + | Ltailcall sg ros => + zeq (size_arguments sg) 0 + | Lbuiltin ef args res => + list_typ_eq (map mreg_type res) (proj_sig_res' (ef_sig ef)) + | Lannot ef args => + forallb loc_valid args + | _ => + true + end. End WT_INSTR. -Definition wt_code (f: function) (c: code) : Prop := - forall instr, In instr c -> wt_instr f instr. +Definition wt_code (f: function) (c: code) : bool := + forallb (wt_instr f) c. -Definition wt_function (f: function) : Prop := +Definition wt_function (f: function) : bool := wt_code f f.(fn_code). -Inductive wt_fundef: fundef -> Prop := - | wt_fundef_external: forall ef, - wt_fundef (External ef) - | wt_function_internal: forall f, - wt_function f -> - wt_fundef (Internal f). - -Definition wt_program (p: program) : Prop := - forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f. - (** Typing the run-time state. These definitions are used in [Stackingproof]. *) Require Import Values. @@ -148,48 +111,30 @@ Proof. intros; red; intros. unfold Locmap.set. destruct (Loc.eq l l0). congruence. - destruct (Loc.overlap l l0). red. auto. - auto. -Qed. - -Lemma wt_undef_locs: - forall locs ls, wt_locset ls -> wt_locset (Locmap.undef locs ls). -Proof. - induction locs; simpl; intros. auto. apply IHlocs. apply wt_setloc; auto. red; auto. + destruct (Loc.diff_dec l l0). auto. red. auto. Qed. -Lemma wt_undef_temps: - forall ls, wt_locset ls -> wt_locset (undef_temps ls). +Lemma wt_setlocs: + forall ll vl ls, + Val.has_type_list vl (map Loc.type ll) -> wt_locset ls -> wt_locset (Locmap.setlist ll vl ls). Proof. - intros; unfold undef_temps. apply wt_undef_locs; auto. + induction ll; destruct vl; simpl; intuition. + apply IHll; auto. apply wt_setloc; auto. Qed. -Lemma wt_undef_op: - forall op ls, wt_locset ls -> wt_locset (undef_op op ls). +Lemma wt_undef_regs: + forall rs ls, wt_locset ls -> wt_locset (undef_regs rs ls). Proof. - intros. generalize (wt_undef_temps ls H); intro. - unfold undef_op; destruct op; auto; apply wt_undef_locs; auto. -Qed. - -Lemma wt_undef_getstack: - forall s ls, wt_locset ls -> wt_locset (undef_getstack s ls). -Proof. - intros. unfold undef_getstack. destruct s; auto. apply wt_setloc; auto. red; auto. -Qed. - -Lemma wt_undef_setstack: - forall ls, wt_locset ls -> wt_locset (undef_setstack ls). -Proof. - intros. unfold undef_setstack. apply wt_undef_locs; auto. + induction rs; simpl; intros. auto. apply wt_setloc; auto. red; auto. Qed. Lemma wt_call_regs: forall ls, wt_locset ls -> wt_locset (call_regs ls). Proof. - intros; red; intros. unfold call_regs. destruct l. auto. - destruct (in_dec Loc.eq (R m) temporaries). red; auto. auto. - destruct s. red; auto. - change (Loc.type (S (Incoming z t))) with (Loc.type (S (Outgoing z t))). auto. + intros; red; intros. unfold call_regs. destruct l. auto. + destruct sl. + red; auto. + change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)). auto. red; auto. Qed. @@ -198,9 +143,8 @@ Lemma wt_return_regs: wt_locset caller -> wt_locset callee -> wt_locset (return_regs caller callee). Proof. intros; red; intros. - unfold return_regs. destruct l; auto. - destruct (in_dec Loc.eq (R m) temporaries); auto. - destruct (in_dec Loc.eq (R m) destroyed_at_call); auto. + unfold return_regs. destruct l; auto. + destruct (in_dec mreg_eq r destroyed_at_call); auto. Qed. Lemma wt_init: @@ -208,3 +152,19 @@ Lemma wt_init: Proof. red; intros. unfold Locmap.init. red; auto. Qed. + +Lemma wt_setlist_result: + forall sg v rs, + Val.has_type v (proj_sig_res sg) -> + wt_locset rs -> + wt_locset (Locmap.setlist (map R (loc_result sg)) (encode_long (sig_res sg) v) rs). +Proof. + unfold loc_result, encode_long, proj_sig_res; intros. + destruct (sig_res sg) as [[] | ]; simpl. + apply wt_setloc; auto. + apply wt_setloc; auto. + destruct v; simpl in H; try contradiction; + simpl; apply wt_setloc; auto; apply wt_setloc; auto. + apply wt_setloc; auto. +Qed. + diff --git a/backend/Locations.v b/backend/Locations.v index 2381fea..2f2dae8 100644 --- a/backend/Locations.v +++ b/backend/Locations.v @@ -13,7 +13,10 @@ (** Locations are a refinement of RTL pseudo-registers, used to reflect the results of register allocation (file [Allocation]). *) +Require Import OrderedType. Require Import Coqlib. +Require Import Maps. +Require Import Ordered. Require Import AST. Require Import Values. Require Export Machregs. @@ -42,9 +45,9 @@ Require Export Machregs. calling conventions. *) Inductive slot: Type := - | Local: Z -> typ -> slot - | Incoming: Z -> typ -> slot - | Outgoing: Z -> typ -> slot. + | Local + | Incoming + | Outgoing. (** Morally, the [Incoming] slots of a function are the [Outgoing] slots of its caller function. @@ -56,46 +59,17 @@ as 32-bit integers/pointers (type [Tint]) or as 64-bit floats (type [Tfloat]). The offset of a slot, combined with its type and its kind, identifies uniquely the slot and will determine later where it resides within the memory-allocated activation record. Offsets are always positive. - -Conceptually, slots will be mapped to four non-overlapping memory areas -within activation records: -- The area for [Local] slots of type [Tint]. The offset is interpreted - as a 4-byte word index. -- The area for [Local] slots of type [Tfloat]. The offset is interpreted - as a 8-byte word index. Thus, two [Local] slots always refer either - to the same memory chunk (if they have the same types and offsets) - or to non-overlapping memory chunks (if the types or offsets differ). -- The area for [Outgoing] slots. The offset is a 4-byte word index. - Unlike [Local] slots, the PowerPC calling conventions demand that - integer and float [Outgoing] slots reside in the same memory area. - Therefore, [Outgoing Tint 0] and [Outgoing Tfloat 0] refer to - overlapping memory chunks and cannot be used simultaneously: one will - lose its value when the other is assigned. We will reflect this - overlapping behaviour in the environments mapping locations to values - defined later in this file. -- The area for [Incoming] slots. Same structure as the [Outgoing] slots. *) -Definition slot_type (s: slot): typ := - match s with - | Local ofs ty => ty - | Incoming ofs ty => ty - | Outgoing ofs ty => ty - end. - Lemma slot_eq: forall (p q: slot), {p = q} + {p <> q}. Proof. - assert (typ_eq: forall (t1 t2: typ), {t1 = t2} + {t1 <> t2}). - decide equality. - generalize zeq; intro. decide equality. Defined. -Global Opaque slot_eq. Open Scope Z_scope. Definition typesize (ty: typ) : Z := - match ty with Tint => 1 | Tfloat => 2 end. + match ty with Tint => 1 | Tlong => 2 | Tfloat => 2 end. Lemma typesize_pos: forall (ty: typ), typesize ty > 0. @@ -109,20 +83,24 @@ Qed. activation record slots. *) Inductive loc : Type := - | R: mreg -> loc - | S: slot -> loc. + | R (r: mreg) + | S (sl: slot) (pos: Z) (ty: typ). Module Loc. Definition type (l: loc) : typ := match l with | R r => mreg_type r - | S s => slot_type s + | S sl pos ty => ty end. Lemma eq: forall (p q: loc), {p = q} + {p <> q}. Proof. - decide equality. apply mreg_eq. apply slot_eq. + decide equality. + apply mreg_eq. + apply typ_eq. + apply zeq. + apply slot_eq. Defined. (** As mentioned previously, two locations can be different (in the sense @@ -138,13 +116,10 @@ Module Loc. *) Definition diff (l1 l2: loc) : Prop := match l1, l2 with - | R r1, R r2 => r1 <> r2 - | S (Local d1 t1), S (Local d2 t2) => - d1 <> d2 \/ t1 <> t2 - | S (Incoming d1 t1), S (Incoming d2 t2) => - d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1 - | S (Outgoing d1 t1), S (Outgoing d2 t2) => - d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1 + | R r1, R r2 => + r1 <> r2 + | S s1 d1 t1, S s2 d2 t2 => + s1 <> s2 \/ d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1 | _, _ => True end. @@ -152,11 +127,8 @@ Module Loc. Lemma same_not_diff: forall l, ~(diff l l). Proof. - destruct l; unfold diff; try tauto. - destruct s. - tauto. - generalize (typesize_pos t); omega. - generalize (typesize_pos t); omega. + destruct l; unfold diff; auto. + red; intros. destruct H; auto. generalize (typesize_pos ty); omega. Qed. Lemma diff_not_eq: @@ -169,124 +141,22 @@ Module Loc. forall l1 l2, diff l1 l2 -> diff l2 l1. Proof. destruct l1; destruct l2; unfold diff; auto. - destruct s; auto. - destruct s; destruct s0; intuition auto. - Qed. - - Lemma diff_reg_right: - forall l r, l <> R r -> diff (R r) l. - Proof. - intros. simpl. destruct l. congruence. auto. - Qed. - - Lemma diff_reg_left: - forall l r, l <> R r -> diff l (R r). - Proof. - intros. apply diff_sym. apply diff_reg_right. auto. - Qed. - -(** [Loc.overlap l1 l2] returns [false] if [l1] and [l2] are different and - non-overlapping, and [true] otherwise: either [l1 = l2] or they partially - overlap. *) - - Definition overlap_aux (t1: typ) (d1 d2: Z) : bool := - if zeq d1 d2 then true else - match t1 with - | Tint => false - | Tfloat => if zeq (d1 + 1) d2 then true else false - end. - - Definition overlap (l1 l2: loc) : bool := - match l1, l2 with - | S (Incoming d1 t1), S (Incoming d2 t2) => - overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 - | S (Outgoing d1 t1), S (Outgoing d2 t2) => - overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 - | _, _ => false - end. - - Lemma overlap_aux_true_1: - forall d1 t1 d2 t2, - overlap_aux t1 d1 d2 = true -> - ~(d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1). - Proof. - intros until t2. - generalize (typesize_pos t1); intro. - generalize (typesize_pos t2); intro. - unfold overlap_aux. - case (zeq d1 d2). - intros. omega. - case t1. intros; discriminate. - case (zeq (d1 + 1) d2); intros. - subst d2. simpl. omega. - discriminate. - Qed. - - Lemma overlap_aux_true_2: - forall d1 t1 d2 t2, - overlap_aux t2 d2 d1 = true -> - ~(d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1). - Proof. - intros. generalize (overlap_aux_true_1 d2 t2 d1 t1 H). - tauto. + intuition. Qed. - Lemma overlap_not_diff: - forall l1 l2, overlap l1 l2 = true -> ~(diff l1 l2). + Definition diff_dec (l1 l2: loc) : { Loc.diff l1 l2 } + { ~Loc.diff l1 l2 }. Proof. - unfold overlap, diff; destruct l1; destruct l2; intros; try discriminate. - destruct s; discriminate. - destruct s; destruct s0; try discriminate. - elim (orb_true_elim _ _ H); intro. - apply overlap_aux_true_1; auto. - apply overlap_aux_true_2; auto. - elim (orb_true_elim _ _ H); intro. - apply overlap_aux_true_1; auto. - apply overlap_aux_true_2; auto. - Qed. - - Lemma overlap_aux_false_1: - forall t1 d1 t2 d2, - overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 = false -> - d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1. - Proof. - intros until d2. intro OV. - generalize (orb_false_elim _ _ OV). intro OV'. elim OV'. - unfold overlap_aux. - case (zeq d1 d2); intro. - intros; discriminate. - case (zeq d2 d1); intro. - intros; discriminate. - case t1; case t2; simpl. - intros; omega. - case (zeq (d2 + 1) d1); intros. discriminate. omega. - case (zeq (d1 + 1) d2); intros. discriminate. omega. - case (zeq (d1 + 1) d2); intros H1 H2. discriminate. - case (zeq (d2 + 1) d1); intros. discriminate. omega. - Qed. - - Lemma non_overlap_diff: - forall l1 l2, l1 <> l2 -> overlap l1 l2 = false -> diff l1 l2. - Proof. - intros. unfold diff; destruct l1; destruct l2. - congruence. - auto. - destruct s; auto. - destruct s; destruct s0; auto. - case (zeq z z0); intro. - compare t t0; intro. - congruence. tauto. tauto. - apply overlap_aux_false_1. exact H0. - apply overlap_aux_false_1. exact H0. - Qed. - - Definition diff_dec (l1 l2: loc) : { Loc.diff l1 l2 } + {~Loc.diff l1 l2}. - Proof. - intros. case (eq l1 l2); intros. - right. rewrite e. apply same_not_diff. - case_eq (overlap l1 l2); intros. - right. apply overlap_not_diff; auto. - left. apply non_overlap_diff; auto. + intros. destruct l1; destruct l2; simpl. + - destruct (mreg_eq r r0). right; tauto. left; auto. + - left; auto. + - left; auto. + - destruct (slot_eq sl sl0). + destruct (zle (pos + typesize ty) pos0). + left; auto. + destruct (zle (pos0 + typesize ty0) pos). + left; auto. + right; red; intros [P | [P | P]]. congruence. omega. omega. + left; auto. Defined. (** We now redefine some standard notions over lists, using the [Loc.diff] @@ -316,12 +186,16 @@ Module Loc. elim (diff_not_eq l l); auto. Qed. - Lemma reg_notin: - forall r ll, ~(In (R r) ll) -> notin (R r) ll. + Lemma notin_dec (l: loc) (ll: list loc) : {notin l ll} + {~notin l ll}. Proof. - intros. rewrite notin_iff; intros. - destruct l'; simpl. congruence. auto. - Qed. + induction ll; simpl. + left; auto. + destruct (diff_dec l a). + destruct IHll. + left; auto. + right; tauto. + right; tauto. + Defined. (** [Loc.disjoint l1 l2] is true if the locations in list [l1] are different from all locations in list [l2]. *) @@ -376,6 +250,17 @@ Module Loc. | norepet_cons: forall hd tl, notin hd tl -> norepet tl -> norepet (hd :: tl). + Lemma norepet_dec (ll: list loc) : {norepet ll} + {~norepet ll}. + Proof. + induction ll. + left; constructor. + destruct (notin_dec a ll). + destruct IHll. + left; constructor; auto. + right; red; intros P; inv P; contradiction. + right; red; intros P; inv P; contradiction. + Defined. + (** [Loc.no_overlap l1 l2] holds if elements of [l1] never overlap partially with elements of [l2]. *) @@ -384,8 +269,6 @@ Module Loc. End Loc. -Global Opaque Loc.eq Loc.diff_dec. - (** * Mappings from locations to values *) (** The [Locmap] module defines mappings from locations to values, @@ -413,20 +296,20 @@ Module Locmap. Definition set (l: loc) (v: val) (m: t) : t := fun (p: loc) => - if Loc.eq l p then v else if Loc.overlap l p then Vundef else m p. + if Loc.eq l p then v else if Loc.diff_dec l p then m p else Vundef. Lemma gss: forall l v m, (set l v m) l = v. Proof. - intros. unfold set. case (Loc.eq l l); tauto. + intros. unfold set. rewrite dec_eq_true. auto. Qed. Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p. Proof. - intros. unfold set. case (Loc.eq l p); intro. - subst p. elim (Loc.same_not_diff _ H). - caseEq (Loc.overlap l p); intro. - elim (Loc.overlap_not_diff _ _ H0 H). + intros. unfold set. destruct (Loc.eq l p). + subst p. elim (Loc.same_not_diff _ H). + destruct (Loc.diff_dec l p). auto. + contradiction. Qed. Fixpoint undef (ll: list loc) (m: t) {struct ll} : t := @@ -446,10 +329,172 @@ Module Locmap. assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef). induction ll; simpl; intros. auto. apply IHll. unfold set. destruct (Loc.eq a l); auto. - destruct (Loc.overlap a l); auto. + destruct (Loc.diff_dec a l); auto. induction ll; simpl; intros. contradiction. destruct H. apply P. subst a. apply gss. auto. Qed. + Fixpoint setlist (ll: list loc) (vl: list val) (m: t) {struct ll} : t := + match ll, vl with + | l1 :: ls, v1 :: vs => setlist ls vs (set l1 v1 m) + | _, _ => m + end. + + Lemma gsetlisto: forall l ll vl m, Loc.notin l ll -> (setlist ll vl m) l = m l. + Proof. + induction ll; simpl; intros. + auto. + destruct vl; auto. destruct H. rewrite IHll; auto. apply gso; auto. apply Loc.diff_sym; auto. + Qed. + End Locmap. + +(** * Total ordering over locations *) + +Module IndexedTyp <: INDEXED_TYPE. + Definition t := typ. + Definition index (x: t) := + match x with Tint => 1%positive | Tfloat => 2%positive | Tlong => 3%positive end. + Lemma index_inj: forall x y, index x = index y -> x = y. + Proof. destruct x; destruct y; simpl; congruence. Qed. + Definition eq := typ_eq. +End IndexedTyp. + +Module OrderedTyp := OrderedIndexed(IndexedTyp). + +Module IndexedSlot <: INDEXED_TYPE. + Definition t := slot. + Definition index (x: t) := + match x with Local => 1%positive | Incoming => 2%positive | Outgoing => 3%positive end. + Lemma index_inj: forall x y, index x = index y -> x = y. + Proof. destruct x; destruct y; simpl; congruence. Qed. + Definition eq := slot_eq. +End IndexedSlot. + +Module OrderedSlot := OrderedIndexed(IndexedSlot). + +Module OrderedLoc <: OrderedType. + Definition t := loc. + Definition eq (x y: t) := x = y. + Definition lt (x y: t) := + match x, y with + | R r1, R r2 => Plt (IndexedMreg.index r1) (IndexedMreg.index r2) + | R _, S _ _ _ => True + | S _ _ _, R _ => False + | S sl1 ofs1 ty1, S sl2 ofs2 ty2 => + OrderedSlot.lt sl1 sl2 \/ (sl1 = sl2 /\ + (ofs1 < ofs2 \/ (ofs1 = ofs2 /\ OrderedTyp.lt ty1 ty2))) + end. + Lemma eq_refl : forall x : t, eq x x. + Proof (@refl_equal t). + Lemma eq_sym : forall x y : t, eq x y -> eq y x. + Proof (@sym_equal t). + Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z. + Proof (@trans_equal t). + Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z. + Proof. + unfold lt; intros. + destruct x; destruct y; destruct z; try tauto. + eapply Plt_trans; eauto. + destruct H. + destruct H0. left; eapply OrderedSlot.lt_trans; eauto. + destruct H0. subst sl0. auto. + destruct H. subst sl. + destruct H0. auto. + destruct H. + right. split. auto. + intuition. + right; split. congruence. eapply OrderedTyp.lt_trans; eauto. + Qed. + Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y. + Proof. + unfold lt, eq; intros; red; intros. subst y. + destruct x. + eelim Plt_strict; eauto. + destruct H. eelim OrderedSlot.lt_not_eq; eauto. red; auto. + destruct H. destruct H0. omega. + destruct H0. eelim OrderedTyp.lt_not_eq; eauto. red; auto. + Qed. + Definition compare : forall x y : t, Compare lt eq x y. + Proof. + intros. destruct x; destruct y. + - destruct (OrderedPositive.compare (IndexedMreg.index r) (IndexedMreg.index r0)). + + apply LT. red. auto. + + apply EQ. red. f_equal. apply IndexedMreg.index_inj. auto. + + apply GT. red. auto. + - apply LT. red; auto. + - apply GT. red; auto. + - destruct (OrderedSlot.compare sl sl0). + + apply LT. red; auto. + + destruct (OrderedZ.compare pos pos0). + * apply LT. red. auto. + * destruct (OrderedTyp.compare ty ty0). + apply LT. red; auto. + apply EQ. red; red in e; red in e0; red in e1. congruence. + apply GT. red; auto. + * apply GT. red. auto. + + apply GT. red; auto. + Defined. + Definition eq_dec := Loc.eq. + +(** Connection between the ordering defined here and the [Loc.diff] predicate. *) + + Definition diff_low_bound (l: loc) : loc := + match l with + | R mr => l + | S sl ofs ty => S sl (ofs - 1) Tfloat + end. + + Definition diff_high_bound (l: loc) : loc := + match l with + | R mr => l + | S sl ofs ty => S sl (ofs + typesize ty - 1) Tlong + end. + + Lemma outside_interval_diff: + forall l l', lt l' (diff_low_bound l) \/ lt (diff_high_bound l) l' -> Loc.diff l l'. + Proof. + intros. + destruct l as [mr | sl ofs ty]; destruct l' as [mr' | sl' ofs' ty']; simpl in *; auto. + - assert (IndexedMreg.index mr <> IndexedMreg.index mr'). + { destruct H. apply sym_not_equal. apply Plt_ne; auto. apply Plt_ne; auto. } + congruence. + - assert (RANGE: forall ty, 1 <= typesize ty <= 2). + { intros; unfold typesize. destruct ty0; omega. } + destruct H. + + destruct H. left. apply sym_not_equal. apply OrderedSlot.lt_not_eq; auto. + destruct H. right. + destruct H0. right. generalize (RANGE ty'); omega. + destruct H0. + assert (ty' = Tint). + { unfold OrderedTyp.lt in H1. destruct ty'; compute in H1; congruence. } + subst ty'. right. simpl typesize. omega. + + destruct H. left. apply OrderedSlot.lt_not_eq; auto. + destruct H. right. + destruct H0. left; omega. + destruct H0. exfalso. destruct ty'; compute in H1; congruence. + Qed. + + Lemma diff_outside_interval: + forall l l', Loc.diff l l' -> lt l' (diff_low_bound l) \/ lt (diff_high_bound l) l'. + Proof. + intros. + destruct l as [mr | sl ofs ty]; destruct l' as [mr' | sl' ofs' ty']; simpl in *; auto. + - unfold Plt, Pos.lt. destruct (Pos.compare (IndexedMreg.index mr) (IndexedMreg.index mr')) eqn:C. + elim H. apply IndexedMreg.index_inj. apply Pos.compare_eq_iff. auto. + auto. + rewrite Pos.compare_antisym. rewrite C. auto. + - destruct (OrderedSlot.compare sl sl'); auto. + destruct H. contradiction. + destruct H. + right; right; split; auto. left; omega. + left; right; split; auto. destruct ty'; simpl in *. + destruct (zlt ofs' (ofs - 1)). left; auto. + right; split. omega. compute. auto. + left; omega. + left; omega. + Qed. + +End OrderedLoc. + diff --git a/backend/Mach.v b/backend/Mach.v index 0728c4d..5030de1 100644 --- a/backend/Mach.v +++ b/backend/Mach.v @@ -60,7 +60,7 @@ Inductive instruction: Type := | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction | Mcall: signature -> mreg + ident -> instruction | Mtailcall: signature -> mreg + ident -> instruction - | Mbuiltin: external_function -> list mreg -> mreg -> instruction + | Mbuiltin: external_function -> list mreg -> list mreg -> instruction | Mannot: external_function -> list annot_param -> instruction | Mlabel: label -> instruction | Mgoto: label -> instruction @@ -124,7 +124,7 @@ store in the reserved location. *) Definition chunk_of_type (ty: typ) := - match ty with Tint => Mint32 | Tfloat => Mfloat64al32 end. + match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end. Definition load_stack (m: mem) (sp: val) (ty: typ) (ofs: int) := Mem.loadv (chunk_of_type ty) m (Val.add sp (Vint ofs)). @@ -147,38 +147,29 @@ Notation "a # b <- c" := (Regmap.set b c a) (at level 1, b at next level). Fixpoint undef_regs (rl: list mreg) (rs: regset) {struct rl} : regset := match rl with | nil => rs - | r1 :: rl' => undef_regs rl' (Regmap.set r1 Vundef rs) + | r1 :: rl' => Regmap.set r1 Vundef (undef_regs rl' rs) end. Lemma undef_regs_other: forall r rl rs, ~In r rl -> undef_regs rl rs r = rs r. Proof. - induction rl; simpl; intros. auto. rewrite IHrl. apply Regmap.gso. intuition. intuition. + induction rl; simpl; intros. auto. rewrite Regmap.gso. apply IHrl. intuition. intuition. Qed. Lemma undef_regs_same: - forall r rl rs, In r rl \/ rs r = Vundef -> undef_regs rl rs r = Vundef. + forall r rl rs, In r rl -> undef_regs rl rs r = Vundef. Proof. induction rl; simpl; intros. tauto. - destruct H. destruct H. apply IHrl. right. subst; apply Regmap.gss. - auto. - apply IHrl. right. unfold Regmap.set. destruct (RegEq.eq r a); auto. + destruct H. subst a. apply Regmap.gss. + unfold Regmap.set. destruct (RegEq.eq r a); auto. Qed. -Definition undef_temps (rs: regset) := - undef_regs temporary_regs rs. - -Definition undef_move (rs: regset) := - undef_regs destroyed_at_move_regs rs. - -Definition undef_op (op: operation) (rs: regset) := - match op with - | Omove => undef_move rs - | _ => undef_temps rs +Fixpoint set_regs (rl: list mreg) (vl: list val) (rs: regset) : regset := + match rl, vl with + | r1 :: rl', v1 :: vl' => set_regs rl' vl' (Regmap.set r1 v1 rs) + | _, _ => rs end. -Definition undef_setstack (rs: regset) := undef_move rs. - Definition is_label (lbl: label) (instr: instruction) : bool := match instr with | Mlabel lbl' => if peq lbl lbl' then true else false @@ -231,7 +222,7 @@ Inductive extcall_arg: regset -> mem -> val -> loc -> val -> Prop := extcall_arg rs m sp (R r) (rs r) | extcall_arg_stack: forall rs m sp ofs ty v, load_stack m sp ty (Int.repr (Stacklayout.fe_ofs_arg + 4 * ofs)) = Some v -> - extcall_arg rs m sp (S (Outgoing ofs ty)) v. + extcall_arg rs m sp (S Outgoing ofs ty) v. Definition extcall_arguments (rs: regset) (m: mem) (sp: val) (sg: signature) (args: list val) : Prop := @@ -306,34 +297,39 @@ Inductive step: state -> trace -> state -> Prop := step (State s f sp (Mgetstack ofs ty dst :: c) rs m) E0 (State s f sp c (rs#dst <- v) m) | exec_Msetstack: - forall s f sp src ofs ty c rs m m', + forall s f sp src ofs ty c rs m m' rs', store_stack m sp ty ofs (rs src) = Some m' -> + rs' = undef_regs (destroyed_by_op Omove) rs -> step (State s f sp (Msetstack src ofs ty :: c) rs m) - E0 (State s f sp c (undef_setstack rs) m') + E0 (State s f sp c rs' m') | exec_Mgetparam: - forall s fb f sp ofs ty dst c rs m v, + forall s fb f sp ofs ty dst c rs m v rs', Genv.find_funct_ptr ge fb = Some (Internal f) -> load_stack m sp Tint f.(fn_link_ofs) = Some (parent_sp s) -> load_stack m (parent_sp s) ty ofs = Some v -> + rs' = (rs # temp_for_parent_frame <- Vundef # dst <- v) -> step (State s fb sp (Mgetparam ofs ty dst :: c) rs m) - E0 (State s fb sp c (rs # IT1 <- Vundef # dst <- v) m) + E0 (State s fb sp c rs' m) | exec_Mop: - forall s f sp op args res c rs m v, + forall s f sp op args res c rs m v rs', eval_operation ge sp op rs##args m = Some v -> + rs' = ((undef_regs (destroyed_by_op op) rs)#res <- v) -> step (State s f sp (Mop op args res :: c) rs m) - E0 (State s f sp c ((undef_op op rs)#res <- v) m) + E0 (State s f sp c rs' m) | exec_Mload: - forall s f sp chunk addr args dst c rs m a v, + forall s f sp chunk addr args dst c rs m a v rs', eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = Some v -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- v) -> step (State s f sp (Mload chunk addr args dst :: c) rs m) - E0 (State s f sp c ((undef_temps rs)#dst <- v) m) + E0 (State s f sp c rs' m) | exec_Mstore: - forall s f sp chunk addr args src c rs m m' a, + forall s f sp chunk addr args src c rs m m' a rs', eval_addressing ge sp addr rs##args = Some a -> Mem.storev chunk m a (rs src) = Some m' -> + rs' = undef_regs (destroyed_by_store chunk addr) rs -> step (State s f sp (Mstore chunk addr args src :: c) rs m) - E0 (State s f sp c (undef_temps rs) m') + E0 (State s f sp c rs' m') | exec_Mcall: forall s fb sp sig ros c rs m f f' ra, find_function_ptr ge ros rs = Some f' -> @@ -352,14 +348,15 @@ Inductive step: state -> trace -> state -> Prop := step (State s fb (Vptr stk soff) (Mtailcall sig ros :: c) rs m) E0 (Callstate s f' rs m') | exec_Mbuiltin: - forall s f sp rs m ef args res b t v m', - external_call ef ge rs##args m t v m' -> + forall s f sp rs m ef args res b t vl rs' m', + external_call' ef ge rs##args m t vl m' -> + rs' = set_regs res vl (undef_regs (destroyed_by_builtin ef) rs) -> step (State s f sp (Mbuiltin ef args res :: b) rs m) - t (State s f sp b ((undef_temps rs)#res <- v) m') + t (State s f sp b rs' m') | exec_Mannot: forall s f sp rs m ef args b vargs t v m', annot_arguments rs m sp args vargs -> - external_call ef ge vargs m t v m' -> + external_call' ef ge vargs m t v m' -> step (State s f sp (Mannot ef args :: b) rs m) t (State s f sp b rs m') | exec_Mgoto: @@ -369,25 +366,28 @@ Inductive step: state -> trace -> state -> Prop := step (State s fb sp (Mgoto lbl :: c) rs m) E0 (State s fb sp c' rs m) | exec_Mcond_true: - forall s fb f sp cond args lbl c rs m c', + forall s fb f sp cond args lbl c rs m c' rs', eval_condition cond rs##args m = Some true -> Genv.find_funct_ptr ge fb = Some (Internal f) -> find_label lbl f.(fn_code) = Some c' -> + rs' = undef_regs (destroyed_by_cond cond) rs -> step (State s fb sp (Mcond cond args lbl :: c) rs m) - E0 (State s fb sp c' (undef_temps rs) m) + E0 (State s fb sp c' rs' m) | exec_Mcond_false: - forall s f sp cond args lbl c rs m, + forall s f sp cond args lbl c rs m rs', eval_condition cond rs##args m = Some false -> + rs' = undef_regs (destroyed_by_cond cond) rs -> step (State s f sp (Mcond cond args lbl :: c) rs m) - E0 (State s f sp c (undef_temps rs) m) + E0 (State s f sp c rs' m) | exec_Mjumptable: - forall s fb f sp arg tbl c rs m n lbl c', + forall s fb f sp arg tbl c rs m n lbl c' rs', rs arg = Vint n -> list_nth_z tbl (Int.unsigned n) = Some lbl -> Genv.find_funct_ptr ge fb = Some (Internal f) -> find_label lbl f.(fn_code) = Some c' -> + rs' = undef_regs destroyed_by_jumptable rs -> step (State s fb sp (Mjumptable arg tbl :: c) rs m) - E0 (State s fb sp c' (undef_temps rs) m) + E0 (State s fb sp c' rs' m) | exec_Mreturn: forall s fb stk soff c rs m f m', Genv.find_funct_ptr ge fb = Some (Internal f) -> @@ -397,20 +397,21 @@ Inductive step: state -> trace -> state -> Prop := step (State s fb (Vptr stk soff) (Mreturn :: c) rs m) E0 (Returnstate s rs m') | exec_function_internal: - forall s fb rs m f m1 m2 m3 stk, + forall s fb rs m f m1 m2 m3 stk rs', Genv.find_funct_ptr ge fb = Some (Internal f) -> Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) -> let sp := Vptr stk Int.zero in store_stack m1 sp Tint f.(fn_link_ofs) (parent_sp s) = Some m2 -> store_stack m2 sp Tint f.(fn_retaddr_ofs) (parent_ra s) = Some m3 -> + rs' = undef_regs destroyed_at_function_entry rs -> step (Callstate s fb rs m) - E0 (State s fb sp f.(fn_code) (undef_temps rs) m3) + E0 (State s fb sp f.(fn_code) rs' m3) | exec_function_external: forall s fb rs m t rs' ef args res m', Genv.find_funct_ptr ge fb = Some (External ef) -> - external_call ef ge args m t res m' -> extcall_arguments rs m (parent_sp s) (ef_sig ef) args -> - rs' = (rs#(loc_result (ef_sig ef)) <- res) -> + external_call' ef ge args m t res m' -> + rs' = set_regs (loc_result (ef_sig ef)) res rs -> step (Callstate s fb rs m) t (Returnstate s rs' m') | exec_return: @@ -428,9 +429,10 @@ Inductive initial_state (p: program): state -> Prop := initial_state p (Callstate nil fb (Regmap.init Vundef) m0). Inductive final_state: state -> int -> Prop := - | final_state_intro: forall rs m r, - rs (loc_result (mksignature nil (Some Tint))) = Vint r -> - final_state (Returnstate nil rs m) r. + | final_state_intro: forall rs m r retcode, + loc_result (mksignature nil (Some Tint)) = r :: nil -> + rs r = Vint retcode -> + final_state (Returnstate nil rs m) retcode. Definition semantics (rao: function -> code -> int -> Prop) (p: program) := Semantics (step rao) (initial_state p) final_state (Genv.globalenv p). diff --git a/backend/Machtyping.v b/backend/Machtyping.v deleted file mode 100644 index 2dc19be..0000000 --- a/backend/Machtyping.v +++ /dev/null @@ -1,108 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Type system for the Mach intermediate language. *) - -Require Import Coqlib. -Require Import AST. -Require Import Integers. -Require Import Op. -Require Import Locations. -Require Import Conventions. -Require Import Mach. - -(** * Typing rules *) - -Inductive wt_instr : instruction -> Prop := - | wt_Mlabel: - forall lbl, - wt_instr (Mlabel lbl) - | wt_Mgetstack: - forall ofs ty r, - mreg_type r = ty -> - wt_instr (Mgetstack ofs ty r) - | wt_Msetstack: - forall ofs ty r, - mreg_type r = ty -> - wt_instr (Msetstack r ofs ty) - | wt_Mgetparam: - forall ofs ty r, - mreg_type r = ty -> - wt_instr (Mgetparam ofs ty r) - | wt_Mopmove: - forall r1 r, - mreg_type r1 = mreg_type r -> - wt_instr (Mop Omove (r1 :: nil) r) - | wt_Mop: - forall op args res, - op <> Omove -> - (List.map mreg_type args, mreg_type res) = type_of_operation op -> - wt_instr (Mop op args res) - | wt_Mload: - forall chunk addr args dst, - List.map mreg_type args = type_of_addressing addr -> - mreg_type dst = type_of_chunk chunk -> - wt_instr (Mload chunk addr args dst) - | wt_Mstore: - forall chunk addr args src, - List.map mreg_type args = type_of_addressing addr -> - mreg_type src = type_of_chunk chunk -> - wt_instr (Mstore chunk addr args src) - | wt_Mcall: - forall sig ros, - match ros with inl r => mreg_type r = Tint | inr s => True end -> - wt_instr (Mcall sig ros) - | wt_Mtailcall: - forall sig ros, - tailcall_possible sig -> - match ros with inl r => mreg_type r = Tint | inr s => True end -> - wt_instr (Mtailcall sig ros) - | wt_Mbuiltin: - forall ef args res, - List.map mreg_type args = (ef_sig ef).(sig_args) -> - mreg_type res = proj_sig_res (ef_sig ef) -> - wt_instr (Mbuiltin ef args res) - | wt_Mannot: - forall ef args, - ef_reloads ef = false -> - wt_instr (Mannot ef args) - | wt_Mgoto: - forall lbl, - wt_instr (Mgoto lbl) - | wt_Mcond: - forall cond args lbl, - List.map mreg_type args = type_of_condition cond -> - wt_instr (Mcond cond args lbl) - | wt_Mjumptable: - forall arg tbl, - mreg_type arg = Tint -> - list_length_z tbl * 4 <= Int.max_unsigned -> - wt_instr (Mjumptable arg tbl) - | wt_Mreturn: - wt_instr Mreturn. - -Record wt_function (f: function) : Prop := mk_wt_function { - wt_function_instrs: - forall instr, In instr f.(fn_code) -> wt_instr instr; - wt_function_stacksize: - 0 <= f.(fn_stacksize) <= Int.max_unsigned -}. - -Inductive wt_fundef: fundef -> Prop := - | wt_fundef_external: forall ef, - wt_fundef (External ef) - | wt_function_internal: forall f, - wt_function f -> - wt_fundef (Internal f). - -Definition wt_program (p: program) : Prop := - forall i f, In (i, Gfun f) (prog_defs p) -> wt_fundef f. diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml index 01ea1e6..8612b73 100644 --- a/backend/PrintCminor.ml +++ b/backend/PrintCminor.ml @@ -31,13 +31,13 @@ let rec precedence = function | Evar _ -> (16, NA) | Econst _ -> (16, NA) | Eunop _ -> (15, RtoL) - | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf), _, _) -> (13, LtoR) - | Ebinop((Oadd|Osub|Oaddf|Osubf), _, _) -> (12, LtoR) - | Ebinop((Oshl|Oshr|Oshru), _, _) -> (11, LtoR) - | Ebinop((Ocmp _|Ocmpu _|Ocmpf _), _, _) -> (10, LtoR) - | Ebinop(Oand, _, _) -> (8, LtoR) - | Ebinop(Oxor, _, _) -> (7, LtoR) - | Ebinop(Oor, _, _) -> (6, LtoR) + | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf|Omull|Odivl|Odivlu|Omodl|Omodlu), _, _) -> (13, LtoR) + | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddl|Osubl), _, _) -> (12, LtoR) + | Ebinop((Oshl|Oshr|Oshru|Oshll|Oshrl|Oshrlu), _, _) -> (11, LtoR) + | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR) + | Ebinop((Oand|Oandl), _, _) -> (8, LtoR) + | Ebinop((Oxor|Oxorl), _, _) -> (7, LtoR) + | Ebinop((Oor|Oorl), _, _) -> (6, LtoR) | Eload _ -> (15, RtoL) (* Naming idents. *) @@ -60,6 +60,15 @@ let name_of_unop = function | Ointuoffloat -> "intuoffloat" | Ofloatofint -> "floatofint" | Ofloatofintu -> "floatofintu" + | Onegl -> "-l" + | Onotl -> "~l" + | Ointoflong -> "intofflong" + | Olongofint -> "longofint" + | Olongofintu -> "longofintu" + | Olongoffloat -> "longoffloat" + | Olonguoffloat -> "longuoffloat" + | Ofloatoflong -> "floatoflong" + | Ofloatoflongu -> "floatoflongu" let comparison_name = function | Ceq -> "==" @@ -87,9 +96,24 @@ let name_of_binop = function | Osubf -> "-f" | Omulf -> "*f" | Odivf -> "/f" + | Oaddl -> "+l" + | Osubl -> "-l" + | Omull -> "*l" + | Odivl -> "/l" + | Odivlu -> "/lu" + | Omodl -> "%l" + | Omodlu -> "%lu" + | Oandl -> "&l" + | Oorl -> "|l" + | Oxorl -> "^l" + | Oshll -> "<<l" + | Oshrl -> ">>l" + | Oshrlu -> ">>lu" | Ocmp c -> comparison_name c | Ocmpu c -> comparison_name c ^ "u" | Ocmpf c -> comparison_name c ^ "f" + | Ocmpl c -> comparison_name c ^ "l" + | Ocmplu c -> comparison_name c ^ "lu" (* Expressions *) @@ -109,6 +133,8 @@ let rec expr p (prec, e) = fprintf p "%ld" (camlint_of_coqint n) | Econst(Ofloatconst f) -> fprintf p "%F" (camlfloat_of_coqfloat f) + | Econst(Olongconst n) -> + fprintf p "%LdLL" (camlint64_of_coqint n) | Econst(Oaddrsymbol(id, ofs)) -> let ofs = camlint_of_coqint ofs in if ofs = 0l @@ -141,6 +167,7 @@ let rec print_expr_list p (first, rl) = let name_of_type = function | Tint -> "int" | Tfloat -> "float" + | Tlong -> "long" let rec print_sig p = function | {sig_args = []; sig_res = None} -> fprintf p "void" @@ -262,6 +289,7 @@ let print_init_data p = function | Init_int8 i -> fprintf p "int8 %ld" (camlint_of_coqint i) | Init_int16 i -> fprintf p "int16 %ld" (camlint_of_coqint i) | Init_int32 i -> fprintf p "%ld" (camlint_of_coqint i) + | Init_int64 i -> fprintf p "%Ld" (camlint64_of_coqint i) | Init_float32 f -> fprintf p "float32 %F" (camlfloat_of_coqfloat f) | Init_float64 f -> fprintf p "%F" (camlfloat_of_coqfloat f) | Init_space i -> fprintf p "[%ld]" (camlint_of_coqint i) diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml index 94f5af0..1149dd0 100644 --- a/backend/PrintLTL.ml +++ b/backend/PrintLTL.ml @@ -22,21 +22,30 @@ open Locations open LTL open PrintAST open PrintOp -open PrintRTL + +let mreg pp r = + match Machregsaux.name_of_register r with + | Some s -> fprintf pp "%s" s + | None -> fprintf pp "<unknown machreg>" + +let rec mregs pp = function + | [] -> () + | [r] -> mreg pp r + | r1::rl -> fprintf pp "%a, %a" mreg r1 mregs rl + +let slot pp (sl, ofs, ty) = + match sl with + | Local -> + fprintf pp "local(%ld,%s)" (camlint_of_coqint ofs) (name_of_type ty) + | Incoming -> + fprintf pp "incoming(%ld,%s)" (camlint_of_coqint ofs) (name_of_type ty) + | Outgoing -> + fprintf pp "outgoing(%ld,%s)" (camlint_of_coqint ofs) (name_of_type ty) let loc pp l = match l with - | R r -> - begin match Machregsaux.name_of_register r with - | Some s -> fprintf pp "%s" s - | None -> fprintf pp "<unknown machreg>" - end - | S(Local(ofs, ty)) -> - fprintf pp "local(%ld,%s)" (camlint_of_coqint ofs) (name_of_typ ty) - | S(Incoming(ofs, ty)) -> - fprintf pp "incoming(%ld,%s)" (camlint_of_coqint ofs) (name_of_typ ty) - | S(Outgoing(ofs, ty)) -> - fprintf pp "outgoing(%ld,%s)" (camlint_of_coqint ofs) (name_of_typ ty) + | R r -> mreg pp r + | S(sl, ofs, ty) -> slot pp (sl, ofs, ty) let rec locs pp = function | [] -> () @@ -44,77 +53,93 @@ let rec locs pp = function | r1::rl -> fprintf pp "%a, %a" loc r1 locs rl let ros pp = function - | Coq_inl r -> loc pp r + | Coq_inl r -> mreg pp r | Coq_inr s -> fprintf pp "\"%s\"" (extern_atom s) let print_succ pp s dfl = - let s = camlint_of_positive s in - if s <> dfl then fprintf pp " goto %ld@ " s - -let print_instruction pp (pc, i) = - fprintf pp "%5ld: " pc; - match i with - | Lnop s -> - let s = camlint_of_positive s in - if s = Int32.pred pc - then fprintf pp "nop@ " - else fprintf pp "goto %ld@ " s - | Lop(op, args, res, s) -> - fprintf pp "%a = %a@ " loc res (print_operator loc) (op, args); - print_succ pp s (Int32.pred pc) - | Lload(chunk, addr, args, dst, s) -> - fprintf pp "%a = %s[%a]@ " - loc dst (name_of_chunk chunk) (print_addressing loc) (addr, args); - print_succ pp s (Int32.pred pc) - | Lstore(chunk, addr, args, src, s) -> - fprintf pp "%s[%a] = %a@ " - (name_of_chunk chunk) (print_addressing loc) (addr, args) loc src; - print_succ pp s (Int32.pred pc) - | Lcall(sg, fn, args, res, s) -> - fprintf pp "%a = %a(%a)@ " - loc res ros fn locs args; - print_succ pp s (Int32.pred pc) - | Ltailcall(sg, fn, args) -> - fprintf pp "tailcall %a(%a)@ " - ros fn locs args - | Lbuiltin(ef, args, res, s) -> - fprintf pp "%a = builtin %s(%a)@ " - loc res (name_of_external ef) locs args; - print_succ pp s (Int32.pred pc) + let s = P.to_int32 s in + if s <> dfl then fprintf pp "goto %ld" s + +let print_instruction pp succ = function + | Lop(op, args, res) -> + fprintf pp "%a = %a;" mreg res (print_operation mreg) (op, args) + | Lload(chunk, addr, args, dst) -> + fprintf pp "%a = %s[%a];" + mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args) + | Lgetstack(sl, ofs, ty, dst) -> + fprintf pp "%a = %a;" mreg dst slot (sl, ofs, ty) + | Lsetstack(src, sl, ofs, ty) -> + fprintf pp "%a = %a;" slot (sl, ofs, ty) mreg src + | Lstore(chunk, addr, args, src) -> + fprintf pp "%s[%a] = %a;" + (name_of_chunk chunk) (print_addressing mreg) (addr, args) mreg src + | Lcall(sg, fn) -> + fprintf pp "call %a;" ros fn + | Ltailcall(sg, fn) -> + fprintf pp "tailcall %a;" ros fn + | Lbuiltin(ef, args, res) -> + fprintf pp "%a = builtin %s(%a);" + mregs res (name_of_external ef) mregs args + | Lannot(ef, args) -> + fprintf pp "builtin %s(%a);" + (name_of_external ef) locs args + | Lbranch s -> + print_succ pp s succ | Lcond(cond, args, s1, s2) -> - fprintf pp "if (%a) goto %ld else goto %ld@ " - (print_condition loc) (cond, args) - (camlint_of_positive s1) (camlint_of_positive s2) + fprintf pp "if (%a) goto %ld else goto %ld" + (print_condition mreg) (cond, args) + (P.to_int32 s1) (P.to_int32 s2) | Ljumptable(arg, tbl) -> let tbl = Array.of_list tbl in - fprintf pp "@[<v 2>jumptable (%a)" loc arg; + fprintf pp "@[<v 2>jumptable (%a)" mreg arg; for i = 0 to Array.length tbl - 1 do - fprintf pp "@ case %d: goto %ld" i (camlint_of_positive tbl.(i)) + fprintf pp "@ case %d: goto %ld" i (P.to_int32 tbl.(i)) done; - fprintf pp "@]@ " - | Lreturn None -> - fprintf pp "return@ " - | Lreturn (Some arg) -> - fprintf pp "return %a@ " loc arg - -let print_function pp f = - fprintf pp "@[<v 2>f(%a) {@ " locs f.fn_params; + fprintf pp "@]" + | Lreturn -> + fprintf pp "return" + +let rec print_instructions pp succ = function + | [] -> () + | [i] -> print_instruction pp succ i + | i :: il -> + print_instruction pp succ i; + fprintf pp "@ "; + print_instructions pp succ il + +let print_block pp (pc, blk) = + fprintf pp "%5ld: @[<hov 0>" pc; + print_instructions pp (Int32.pred pc) blk; + fprintf pp "@]@ " + +let print_function pp id f = + fprintf pp "@[<v 2>%s() {@ " (extern_atom id); let instrs = List.sort (fun (pc1, _) (pc2, _) -> Pervasives.compare pc2 pc1) (List.map - (fun (pc, i) -> (camlint_of_positive pc, i)) + (fun (pc, i) -> (P.to_int32 pc, i)) (PTree.elements f.fn_code)) in print_succ pp f.fn_entrypoint (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1l); - List.iter (print_instruction pp) instrs; + List.iter (print_block pp) instrs; fprintf pp "@;<0 -2>}@]@." -let print_fundef fd = - begin match fd with - | Internal f -> print_function std_formatter f - | External _ -> () - end; - fd +let print_globdef pp (id, gd) = + match gd with + | Gfun(Internal f) -> print_function pp id f + | _ -> () + +let print_program pp (prog: LTL.program) = + List.iter (print_globdef pp) prog.prog_defs +let destination : string option ref = ref None +let print_if prog = + match !destination with + | None -> () + | Some f -> + let oc = open_out f in + let pp = formatter_of_out_channel oc in + print_program pp prog; + close_out oc diff --git a/backend/PrintMach.ml b/backend/PrintMach.ml index 7e6c343..e7cb947 100644 --- a/backend/PrintMach.ml +++ b/backend/PrintMach.ml @@ -76,7 +76,7 @@ let print_instruction pp i = fprintf pp "tailcall %a@ " ros fn | Mbuiltin(ef, args, res) -> fprintf pp "%a = builtin %s(%a)@ " - reg res (name_of_external ef) regs args + regs res (name_of_external ef) regs args | Mannot(ef, args) -> fprintf pp "%s(%a)@ " (name_of_external ef) annot_params args | Mlabel lbl -> diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml new file mode 100644 index 0000000..756fc58 --- /dev/null +++ b/backend/PrintXTL.ml @@ -0,0 +1,147 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Pretty-printer for XTL *) + +open Format +open Camlcoq +open Datatypes +open Maps +open AST +open Registers +open Op +open Locations +open PrintAST +open PrintOp +open XTL + +let mreg pp r = + match Machregsaux.name_of_register r with + | Some s -> fprintf pp "%s" s + | None -> fprintf pp "<unknown machreg>" + +let short_name_of_type = function Tint -> 'i' | Tfloat -> 'f' | Tlong -> 'l' + +let loc pp = function + | R r -> mreg pp r + | S(Local, ofs, ty) -> + fprintf pp "L%c%ld" (short_name_of_type ty) (camlint_of_coqint ofs) + | S(Incoming, ofs, ty) -> + fprintf pp "I%c%ld" (short_name_of_type ty) (camlint_of_coqint ofs) + | S(Outgoing, ofs, ty) -> + fprintf pp "O%c%ld" (short_name_of_type ty) (camlint_of_coqint ofs) + +let current_alloc = ref (None: (var -> loc) option) +let current_liveness = ref (None: VSet.t PMap.t option) + +let reg pp r ty = + match !current_alloc with + | None -> fprintf pp "x%ld" (P.to_int32 r) + | Some alloc -> fprintf pp "x%ld{%a}" (P.to_int32 r) loc (alloc (V(r, ty))) + +let var pp = function + | V(r, ty) -> reg pp r ty + | L l -> loc pp l + +let rec vars pp = function + | [] -> () + | [r] -> var pp r + | r1::rl -> fprintf pp "%a, %a" var r1 vars rl + +let ros pp = function + | Coq_inl r -> var pp r + | Coq_inr s -> fprintf pp "\"%s\"" (extern_atom s) + +let liveset pp lv = + fprintf pp "@[<hov 2>{"; + VSet.iter (function V(r, ty) -> fprintf pp "@ x%ld" (P.to_int32 r) + | L l -> ()) + lv; + fprintf pp " }@]" + +let print_succ pp s dfl = + let s = P.to_int32 s in + if s <> dfl then fprintf pp "goto %ld" s + +let print_instruction pp succ = function + | Xmove(src, dst) -> + fprintf pp "%a = %a;" var dst var src + | Xreload(src, dst) -> + fprintf pp "%a =r %a;" var dst var src + | Xspill(src, dst) -> + fprintf pp "%a =s %a;" var dst var src + | Xparmove(srcs, dsts, t1, t2) -> + fprintf pp "(%a) = (%a) using %a, %a;" vars dsts vars srcs var t1 var t2 + | Xop(op, args, res) -> + fprintf pp "%a = %a;" var res (print_operation var) (op, args) + | Xload(chunk, addr, args, dst) -> + fprintf pp "%a = %s[%a];" + var dst (name_of_chunk chunk) (print_addressing var) (addr, args) + | Xstore(chunk, addr, args, src) -> + fprintf pp "%s[%a] = %a;" + (name_of_chunk chunk) (print_addressing var) (addr, args) var src + | Xcall(sg, fn, args, res) -> + fprintf pp "%a = call %a(%a);" vars res ros fn vars args + | Xtailcall(sg, fn, args) -> + fprintf pp "tailcall %a(%a);" ros fn vars args + | Xbuiltin(ef, args, res) -> + fprintf pp "%a = builtin %s(%a);" + vars res (name_of_external ef) vars args + | Xbranch s -> + print_succ pp s succ + | Xcond(cond, args, s1, s2) -> + fprintf pp "if (%a) goto %ld else goto %ld" + (print_condition var) (cond, args) + (P.to_int32 s1) (P.to_int32 s2) + | Xjumptable(arg, tbl) -> + let tbl = Array.of_list tbl in + fprintf pp "@[<v 2>jumptable (%a)" var arg; + for i = 0 to Array.length tbl - 1 do + fprintf pp "@ case %d: goto %ld" i (P.to_int32 tbl.(i)) + done; + fprintf pp "@]" + | Xreturn args -> + fprintf pp "return %a" vars args + +let rec print_instructions pp succ = function + | [] -> () + | [i] -> print_instruction pp succ i + | i :: il -> + print_instruction pp succ i; + fprintf pp "@ "; + print_instructions pp succ il + +let print_block pp (pc, blk) = + fprintf pp "%5ld: @[<hov 0>" pc; + print_instructions pp (Int32.pred pc) blk; + fprintf pp "@]@ "; + match !current_liveness with + | None -> () + | Some liveness -> fprintf pp "%a@ " liveset (PMap.get (P.of_int32 pc) liveness) + +let print_function pp ?alloc ?live f = + current_alloc := alloc; + current_liveness := live; + fprintf pp "@[<v 2>f() {@ "; + let instrs = + List.sort + (fun (pc1, _) (pc2, _) -> Pervasives.compare pc2 pc1) + (List.map + (fun (pc, i) -> (P.to_int32 pc, i)) + (PTree.elements f.fn_code)) in + print_succ pp f.fn_entrypoint + (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1l); + List.iter (print_block pp) instrs; + fprintf pp "@;<0 -2>}@]@."; + current_alloc := None; + current_liveness := None + diff --git a/backend/RRE.v b/backend/RRE.v deleted file mode 100644 index bee57f6..0000000 --- a/backend/RRE.v +++ /dev/null @@ -1,173 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Redundant Reloads Elimination *) - -Require Import Coqlib. -Require Import AST. -Require Import Op. -Require Import Locations. -Require Import Conventions. -Require Import Linear. - -(** * Equations between slots and machine registers *) - -(** The RRE pass keeps track of which register holds the value of which - stack slot, using sets of equations like the following. *) - -Record equation := mkeq { e_reg: mreg; e_slot: slot }. - -Definition equations : Type := list equation. - -Fixpoint find_reg_containing (s: slot) (eqs: equations) : option mreg := - match eqs with - | nil => None - | e :: eqs' => if slot_eq (e_slot e) s then Some (e_reg e) else find_reg_containing s eqs' - end. - -Definition eq_equation (eq1 eq2: equation) : {eq1=eq2} + {eq1<>eq2}. -Proof. - generalize slot_eq mreg_eq. decide equality. -Defined. - -Definition contains_equation (s: slot) (r: mreg) (eqs: equations) : bool := - In_dec eq_equation (mkeq r s) eqs. - -(** Remove equations that are invalidated by an assignment to location [l]. *) - -Fixpoint kill_loc (l: loc) (eqs: equations) : equations := - match eqs with - | nil => nil - | e :: eqs' => - if Loc.diff_dec (S (e_slot e)) l && Loc.diff_dec (R (e_reg e)) l - then e :: kill_loc l eqs' - else kill_loc l eqs' - end. - -(** Same, for a list of locations [ll]. *) - -Definition kill_locs (ll: list loc) (eqs: equations) : equations := - List.fold_left (fun eqs l => kill_loc l eqs) ll eqs. - -Definition kill_temps (eqs: equations) : equations := - kill_locs temporaries eqs. - -Definition kill_at_move (eqs: equations) : equations := - kill_locs destroyed_at_move eqs. - -Definition kill_op (op: operation) (eqs: equations) : equations := - match op with Omove => kill_at_move eqs | _ => kill_temps eqs end. - -(** * Safety criterion *) - -Definition is_incoming (s: slot) : bool := - match s with - | Incoming _ _ => true - | _ => false - end. - -(** Turning a [Lgetstack] into a register-to-register move is not always - safe: at least on x86, the move destroys some registers - (those from [destroyed_at_move] list) while the [Lgetstack] does not. - Therefore, we perform this transformation only if the destroyed - registers are not used before being destroyed by a later - [Lop], [Lload], [Lstore], [Lbuiltin], [Lcond] or [Ljumptable] operation. *) - -Fixpoint safe_move_insertion (c: code) : bool := - match c with - | Lgetstack s r :: c' => - negb(In_dec mreg_eq r destroyed_at_move_regs) && safe_move_insertion c' - | Lsetstack r s :: c' => - negb(In_dec mreg_eq r destroyed_at_move_regs) - | Lop op args res :: c' => - list_disjoint_dec mreg_eq args destroyed_at_move_regs - | Lload chunk addr args res :: c' => - list_disjoint_dec mreg_eq args destroyed_at_move_regs - | Lstore chunk addr args src :: c' => - list_disjoint_dec mreg_eq (src :: args) destroyed_at_move_regs - | Lbuiltin ef args res :: c' => - list_disjoint_dec mreg_eq args destroyed_at_move_regs - | Lcond cond args lbl :: c' => - list_disjoint_dec mreg_eq args destroyed_at_move_regs - | Ljumptable arg tbl :: c' => - negb(In_dec mreg_eq arg destroyed_at_move_regs) - | _ => false - end. - -(** * Code transformation *) - -(** The following function eliminates [Lgetstack] instructions or turn them - into register-to-register move whenever possible. Simultaneously, - it propagates valid (register, slot) equations across basic blocks. *) - -(** [transf_code] is written in accumulator-passing style so that it runs - in constant stack space. The [k] parameter accumulates the instructions - to be generated, in reverse order, and is then reversed at the end *) - -Fixpoint transf_code (eqs: equations) (c: code) (k: code) : code := - match c with - | nil => List.rev' k - | Lgetstack s r :: c => - if is_incoming s then - transf_code (kill_loc (R r) (kill_loc (R IT1) eqs)) c (Lgetstack s r :: k) - else if contains_equation s r eqs then - transf_code eqs c k - else - match find_reg_containing s eqs with - | Some r' => - if safe_move_insertion c then - transf_code (kill_at_move (mkeq r s :: kill_loc (R r) eqs)) c (Lop Omove (r' :: nil) r :: k) - else - transf_code (mkeq r s :: kill_loc (R r) eqs) c (Lgetstack s r :: k) - | None => - transf_code (mkeq r s :: kill_loc (R r) eqs) c (Lgetstack s r :: k) - end - | Lsetstack r s :: c => - transf_code (kill_at_move (mkeq r s :: kill_loc (S s) eqs)) c (Lsetstack r s :: k) - | Lop op args res :: c => - transf_code (kill_loc (R res) (kill_op op eqs)) c (Lop op args res :: k) - | Lload chunk addr args res :: c => - transf_code (kill_loc (R res) (kill_temps eqs)) c (Lload chunk addr args res :: k) - | Lstore chunk addr args src :: c => - transf_code (kill_temps eqs) c (Lstore chunk addr args src :: k) - | Lcall sg ros :: c => - transf_code nil c (Lcall sg ros :: k) - | Ltailcall sg ros :: c => - transf_code nil c (Ltailcall sg ros :: k) - | Lbuiltin ef args res :: c => - transf_code (kill_loc (R res) (kill_temps eqs)) c (Lbuiltin ef args res :: k) - | Lannot ef args :: c => - transf_code eqs c (Lannot ef args :: k) - | Llabel lbl :: c => - transf_code nil c (Llabel lbl :: k) - | Lgoto lbl :: c => - transf_code nil c (Lgoto lbl :: k) - | Lcond cond args lbl :: c => - transf_code (kill_temps eqs) c (Lcond cond args lbl :: k) - | Ljumptable arg lbls :: c => - transf_code nil c (Ljumptable arg lbls :: k) - | Lreturn :: c => - transf_code nil c (Lreturn :: k) - end. - -Definition transf_function (f: function) : function := - mkfunction - (fn_sig f) - (fn_stacksize f) - (transf_code nil (fn_code f) nil). - -Definition transf_fundef (fd: fundef) : fundef := - transf_fundef transf_function fd. - -Definition transf_program (p: program) : program := - transform_program transf_fundef p. - diff --git a/backend/RREproof.v b/backend/RREproof.v deleted file mode 100644 index 98de7a8..0000000 --- a/backend/RREproof.v +++ /dev/null @@ -1,658 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Correctness proof for the [RRE] pass. *) - -Require Import Axioms. -Require Import Coqlib. -Require Import AST. -Require Import Values. -Require Import Globalenvs. -Require Import Events. -Require Import Smallstep. -Require Import Op. -Require Import Locations. -Require Import Conventions. -Require Import Linear. -Require Import RRE. - -(** * Operations over equations *) - -Lemma find_reg_containing_sound: - forall s r eqs, find_reg_containing s eqs = Some r -> In (mkeq r s) eqs. -Proof. - induction eqs; simpl; intros. - congruence. - destruct (slot_eq (e_slot a) s). inv H. left; destruct a; auto. right; eauto. -Qed. - -Definition equations_hold (ls: locset) (eqs: equations) : Prop := - forall e, In e eqs -> ls (S (e_slot e)) = ls (R (e_reg e)). - -Lemma nil_hold: - forall ls, equations_hold ls nil. -Proof. - red; intros; contradiction. -Qed. - -Lemma In_kill_loc: - forall e l eqs, - In e (kill_loc l eqs) -> - In e eqs /\ Loc.diff (S (e_slot e)) l /\ Loc.diff (R (e_reg e)) l. -Proof. - induction eqs; simpl kill_loc; simpl In; intros. - tauto. - destruct (Loc.diff_dec (S (e_slot a)) l). - destruct (Loc.diff_dec (R (e_reg a)) l). - simpl in H. intuition congruence. - simpl in H. intuition. - simpl in H. intuition. -Qed. - -Lemma kill_loc_hold: - forall ls eqs l v, - equations_hold ls eqs -> - equations_hold (Locmap.set l v ls) (kill_loc l eqs). -Proof. - intros; red; intros. - exploit In_kill_loc; eauto. intros [A [B C]]. - repeat rewrite Locmap.gso; auto; apply Loc.diff_sym; auto. -Qed. - -Lemma In_kill_locs: - forall e ll eqs, - In e (kill_locs ll eqs) -> - In e eqs /\ Loc.notin (S (e_slot e)) ll /\ Loc.notin (R (e_reg e)) ll. -Proof. -Opaque Loc.diff. - induction ll; simpl; intros. - tauto. - exploit IHll; eauto. intros [A [B C]]. exploit In_kill_loc; eauto. intros [D [E F]]. - tauto. -Qed. - -Lemma kill_locs_hold: - forall ll ls eqs, - equations_hold ls eqs -> - equations_hold (Locmap.undef ll ls) (kill_locs ll eqs). -Proof. - intros; red; intros. exploit In_kill_locs; eauto. intros [A [B C]]. - repeat rewrite Locmap.guo; auto. -Qed. - -Lemma kill_temps_hold: - forall ls eqs, - equations_hold ls eqs -> - equations_hold (LTL.undef_temps ls) (kill_temps eqs). -Proof. - exact (kill_locs_hold temporaries). -Qed. - -Lemma kill_at_move_hold: - forall ls eqs, - equations_hold ls eqs -> - equations_hold (undef_setstack ls) (kill_at_move eqs). -Proof. - exact (kill_locs_hold destroyed_at_move). -Qed. - -Lemma kill_at_op_hold: - forall op ls eqs, - equations_hold ls eqs -> - equations_hold (undef_op op ls) (kill_op op eqs). -Proof. - intros op. - destruct op; exact kill_temps_hold || exact kill_at_move_hold. -Qed. - -Lemma eqs_getstack_hold: - forall rs r s eqs, - equations_hold rs eqs -> - equations_hold (Locmap.set (R r) (rs (S s)) rs) - (mkeq r s :: kill_loc (R r) eqs). -Proof. -Transparent Loc.diff. - intros; red; intros. simpl in H0; destruct H0. - subst e. simpl. rewrite Locmap.gss; rewrite Locmap.gso; auto. red; auto. - exploit In_kill_loc; eauto. intros [D [E F]]. - repeat rewrite Locmap.gso. auto. - apply Loc.diff_sym; auto. apply Loc.diff_sym; auto. -Qed. - -Lemma eqs_movestack_hold: - forall rs r s eqs, - equations_hold rs eqs -> - equations_hold (Locmap.set (R r) (rs (S s)) (undef_setstack rs)) - (kill_at_move (mkeq r s :: kill_loc (R r) eqs)). -Proof. - unfold undef_setstack, kill_at_move; intros; red; intros. - exploit In_kill_locs; eauto. intros [A [B C]]. - simpl in A; destruct A. - subst e. rewrite Locmap.gss. rewrite Locmap.gso. apply Locmap.guo. auto. - simpl; auto. - exploit In_kill_loc; eauto. intros [D [E F]]. - repeat rewrite Locmap.gso. repeat rewrite Locmap.guo; auto. - apply Loc.diff_sym; auto. apply Loc.diff_sym; auto. -Qed. - -Lemma eqs_setstack_hold: - forall rs r s eqs, - equations_hold rs eqs -> - equations_hold (Locmap.set (S s) (rs (R r)) (undef_setstack rs)) - (kill_at_move (mkeq r s :: kill_loc (S s) eqs)). -Proof. - unfold undef_setstack, kill_at_move; intros; red; intros. - exploit In_kill_locs; eauto. intros [A [B C]]. - simpl in A; destruct A. - subst e. rewrite Locmap.gss. rewrite Locmap.gso. rewrite Locmap.guo. auto. - auto. simpl. destruct s; auto. - exploit In_kill_loc; eauto. intros [D [E F]]. - repeat rewrite Locmap.gso. repeat rewrite Locmap.guo; auto. - apply Loc.diff_sym; auto. apply Loc.diff_sym; auto. -Qed. - -Lemma locmap_set_reg_same: - forall rs r, - Locmap.set (R r) (rs (R r)) rs = rs. -Proof. - intros. apply extensionality; intros. - destruct (Loc.eq x (R r)). - subst x. apply Locmap.gss. - apply Locmap.gso. apply Loc.diff_reg_right; auto. -Qed. - -(** * Agreement between values of locations *) - -(** Values of locations may differ between the original and transformed - program: after a [Lgetstack] is optimized to a [Lop Omove], - the values of [destroyed_at_move] temporaries differ. This - can only happen in parts of the code where the [safe_move_insertion] - function returns [true]. *) - -Definition agree (sm: bool) (rs rs': locset) : Prop := - forall l, sm = false \/ Loc.notin l destroyed_at_move -> rs' l = rs l. - -Lemma agree_false: - forall rs rs', - agree false rs rs' <-> rs' = rs. -Proof. - intros; split; intros. - apply extensionality; intros. auto. - subst rs'. red; auto. -Qed. - -Lemma agree_slot: - forall sm rs rs' s, - agree sm rs rs' -> rs' (S s) = rs (S s). -Proof. -Transparent Loc.diff. - intros. apply H. right. simpl; destruct s; tauto. -Qed. - -Lemma agree_reg: - forall sm rs rs' r, - agree sm rs rs' -> - sm = false \/ ~In r destroyed_at_move_regs -> rs' (R r) = rs (R r). -Proof. - intros. apply H. destruct H0; auto. right. - simpl in H0; simpl; intuition congruence. -Qed. - -Lemma agree_regs: - forall sm rs rs' rl, - agree sm rs rs' -> - sm = false \/ list_disjoint rl destroyed_at_move_regs -> reglist rs' rl = reglist rs rl. -Proof. - induction rl; intros; simpl. - auto. - decEq. apply agree_reg with sm. auto. - destruct H0. auto. right. eapply list_disjoint_notin; eauto with coqlib. - apply IHrl; auto. destruct H0; auto. right. eapply list_disjoint_cons_left; eauto. -Qed. - -Lemma agree_set: - forall sm rs rs' l v, - agree sm rs rs' -> - agree sm (Locmap.set l v rs) (Locmap.set l v rs'). -Proof. - intros; red; intros. - unfold Locmap.set. - destruct (Loc.eq l l0). auto. - destruct (Loc.overlap l l0). auto. - apply H; auto. -Qed. - -Lemma agree_undef_move_1: - forall sm rs rs', - agree sm rs rs' -> - agree true rs (undef_setstack rs'). -Proof. - intros. unfold undef_setstack. red; intros. - destruct H0. congruence. rewrite Locmap.guo; auto. -Qed. - -Remark locmap_undef_equal: - forall x ll rs rs', - (forall l, Loc.notin l ll -> rs' l = rs l) -> - Locmap.undef ll rs' x = Locmap.undef ll rs x. -Proof. - induction ll; intros; simpl. - apply H. simpl. auto. - apply IHll. intros. unfold Locmap.set. - destruct (Loc.eq a l). auto. destruct (Loc.overlap a l) eqn:?. auto. - apply H. simpl. split; auto. apply Loc.diff_sym. apply Loc.non_overlap_diff; auto. -Qed. - -Lemma agree_undef_move_2: - forall sm rs rs', - agree sm rs rs' -> - agree false (undef_setstack rs) (undef_setstack rs'). -Proof. - intros. rewrite agree_false. - apply extensionality; intros. unfold undef_setstack. apply locmap_undef_equal. auto. -Qed. - -Lemma agree_undef_temps: - forall sm rs rs', - agree sm rs rs' -> - agree false (LTL.undef_temps rs) (LTL.undef_temps rs'). -Proof. - intros. rewrite agree_false. - apply extensionality; intros. unfold LTL.undef_temps. apply locmap_undef_equal. - intros. apply H. right. simpl in H0; simpl; tauto. -Qed. - -Lemma agree_undef_op: - forall op sm rs rs', - agree sm rs rs' -> - agree false (undef_op op rs) (undef_op op rs'). -Proof. - intros op. - destruct op; exact agree_undef_temps || exact agree_undef_move_2. -Qed. - -Lemma transf_code_cont: - forall c eqs k1 k2, - transf_code eqs c (k1 ++ k2) = List.rev k2 ++ transf_code eqs c k1. -Proof. - induction c; simpl; intros. - unfold rev'; rewrite <- ! rev_alt; apply rev_app_distr. - destruct a; try (rewrite <- IHc; reflexivity). - destruct (is_incoming s). - rewrite <- IHc; reflexivity. - destruct (contains_equation s m eqs). - auto. - destruct (find_reg_containing s eqs). - destruct (safe_move_insertion c). - rewrite <- IHc; reflexivity. - rewrite <- IHc; reflexivity. - rewrite <- IHc; reflexivity. -Qed. - -Corollary transf_code_eq: - forall eqs c i, transf_code eqs c (i :: nil) = i :: transf_code eqs c nil. -Proof. - intros. change (i :: nil) with (nil ++ (i :: nil)). - rewrite transf_code_cont. auto. -Qed. - -Lemma transl_find_label: - forall lbl c eqs, - find_label lbl (transf_code eqs c nil) = - option_map (fun c => transf_code nil c nil) (find_label lbl c). -Proof. - induction c; intros. - auto. - destruct a; simpl; try (rewrite transf_code_eq; simpl; auto). - destruct (is_incoming s); simpl; auto. - destruct (contains_equation s m eqs). auto. - destruct (find_reg_containing s eqs); rewrite !transf_code_eq. - destruct (safe_move_insertion c); simpl; auto. - simpl; auto. - destruct (peq lbl l); simpl; auto. -Qed. - -(** * Semantic preservation *) - -Section PRESERVATION. - -Variable prog: program. -Let tprog := transf_program prog. - -Let ge := Genv.globalenv prog. -Let tge := Genv.globalenv tprog. - -Lemma functions_translated: - forall v f, - Genv.find_funct ge v = Some f -> - Genv.find_funct tge v = Some (transf_fundef f). -Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog). - -Lemma function_ptr_translated: - forall v f, - Genv.find_funct_ptr ge v = Some f -> - Genv.find_funct_ptr tge v = Some (transf_fundef f). -Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog). - -Lemma symbols_preserved: - forall id, - Genv.find_symbol tge id = Genv.find_symbol ge id. -Proof (@Genv.find_symbol_transf _ _ _ transf_fundef prog). - -Lemma varinfo_preserved: - forall b, Genv.find_var_info tge b = Genv.find_var_info ge b. -Proof (@Genv.find_var_info_transf _ _ _ transf_fundef prog). - -Lemma sig_preserved: - forall f, funsig (transf_fundef f) = funsig f. -Proof. - destruct f; reflexivity. -Qed. - -Lemma find_function_translated: - forall ros rs fd, - find_function ge ros rs = Some fd -> - find_function tge ros rs = Some (transf_fundef fd). -Proof. - intros. destruct ros; simpl in *. - apply functions_translated; auto. - rewrite symbols_preserved. destruct (Genv.find_symbol ge i). - apply function_ptr_translated; auto. - congruence. -Qed. - -Inductive match_frames: stackframe -> stackframe -> Prop := - | match_frames_intro: - forall f sp rs c, - match_frames (Stackframe f sp rs c) - (Stackframe (transf_function f) sp rs (transf_code nil c nil)). - -Inductive match_states: state -> state -> Prop := - | match_states_regular: - forall sm stk f sp c rs m stk' rs' eqs - (STK: list_forall2 match_frames stk stk') - (EQH: equations_hold rs' eqs) - (AG: agree sm rs rs') - (SAFE: sm = false \/ safe_move_insertion c = true), - match_states (State stk f sp c rs m) - (State stk' (transf_function f) sp (transf_code eqs c nil) rs' m) - | match_states_call: - forall stk f rs m stk' - (STK: list_forall2 match_frames stk stk'), - match_states (Callstate stk f rs m) - (Callstate stk' (transf_fundef f) rs m) - | match_states_return: - forall stk rs m stk' - (STK: list_forall2 match_frames stk stk'), - match_states (Returnstate stk rs m) - (Returnstate stk' rs m). - -Definition measure (S: state) : nat := - match S with - | State s f sp c rs m => List.length c - | _ => 0%nat - end. - -Remark match_parent_locset: - forall stk stk', - list_forall2 match_frames stk stk' -> - return_regs (parent_locset stk') = return_regs (parent_locset stk). -Proof. - intros. inv H; auto. inv H0; auto. -Qed. - -Theorem transf_step_correct: - forall S1 t S2, step ge S1 t S2 -> - forall S1' (MS: match_states S1 S1'), - (exists S2', step tge S1' t S2' /\ match_states S2 S2') - \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat. -Proof. -Opaque destroyed_at_move_regs. - induction 1; intros; inv MS; simpl. -(** getstack *) - simpl in SAFE. - assert (SAFE': sm = false \/ ~In r destroyed_at_move_regs /\ safe_move_insertion b = true). - destruct (in_dec mreg_eq r destroyed_at_move_regs); simpl in SAFE; intuition congruence. - destruct (is_incoming sl) eqn:?. - (* incoming, stays as getstack *) - assert (UGS: forall rs, undef_getstack sl rs = Locmap.set (R IT1) Vundef rs). - destruct sl; simpl in Heqb0; discriminate || auto. - left; econstructor; split. - rewrite transf_code_eq; constructor. - repeat rewrite UGS. - apply match_states_regular with sm. auto. - apply kill_loc_hold. apply kill_loc_hold; auto. - rewrite (agree_slot _ _ _ sl AG). apply agree_set. apply agree_set. auto. - tauto. - (* not incoming *) - assert (UGS: forall rs, undef_getstack sl rs = rs). - destruct sl; simpl in Heqb0; discriminate || auto. - unfold contains_equation. - destruct (in_dec eq_equation (mkeq r sl) eqs); simpl. - (* eliminated *) - right. split. omega. split. auto. rewrite UGS. - exploit EQH; eauto. simpl. intro EQ. - assert (EQ1: rs' (S sl) = rs (S sl)) by (eapply agree_slot; eauto). - assert (EQ2: rs' (R r) = rs (R r)) by (eapply agree_reg; eauto; tauto). - rewrite <- EQ1; rewrite EQ; rewrite EQ2. rewrite locmap_set_reg_same. - apply match_states_regular with sm; auto; tauto. - (* found an equation *) - destruct (find_reg_containing sl eqs) as [r'|] eqn:?. - exploit EQH. eapply find_reg_containing_sound; eauto. - simpl; intro EQ. - (* turned into a move *) - destruct (safe_move_insertion b) eqn:?. - left; econstructor; split. - rewrite transf_code_eq. constructor. simpl; eauto. - rewrite UGS. rewrite <- EQ. - apply match_states_regular with true; auto. - apply eqs_movestack_hold; auto. - rewrite (agree_slot _ _ _ sl AG). apply agree_set. eapply agree_undef_move_1; eauto. - (* left as a getstack *) - left; econstructor; split. - rewrite transf_code_eq. constructor. - repeat rewrite UGS. - apply match_states_regular with sm. auto. - apply eqs_getstack_hold; auto. - rewrite (agree_slot _ _ _ sl AG). apply agree_set. auto. - intuition congruence. - (* no equation, left as a getstack *) - left; econstructor; split. - rewrite transf_code_eq; constructor. - repeat rewrite UGS. - apply match_states_regular with sm. auto. - apply eqs_getstack_hold; auto. - rewrite (agree_slot _ _ _ sl AG). apply agree_set. auto. - tauto. - -(* setstack *) - left; econstructor; split. rewrite transf_code_eq; constructor. - apply match_states_regular with false; auto. - apply eqs_setstack_hold; auto. - rewrite (agree_reg _ _ _ r AG). apply agree_set. eapply agree_undef_move_2; eauto. - simpl in SAFE. destruct (in_dec mreg_eq r destroyed_at_move_regs); simpl in SAFE; intuition congruence. - -(* op *) - left; econstructor; split. rewrite transf_code_eq; constructor. - instantiate (1 := v). rewrite <- H. - rewrite (agree_regs _ _ _ args AG). - apply eval_operation_preserved. exact symbols_preserved. - simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence. - apply match_states_regular with false; auto. - apply kill_loc_hold; apply kill_at_op_hold; auto. - apply agree_set. eapply agree_undef_op; eauto. - -(* load *) - left; econstructor; split. - rewrite transf_code_eq; econstructor. instantiate (1 := a). rewrite <- H. - rewrite (agree_regs _ _ _ args AG). - apply eval_addressing_preserved. exact symbols_preserved. - simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence. - eauto. - apply match_states_regular with false; auto. - apply kill_loc_hold; apply kill_temps_hold; auto. - apply agree_set. eapply agree_undef_temps; eauto. - -(* store *) -Opaque list_disjoint_dec. - simpl in SAFE. - assert (sm = false \/ ~In src destroyed_at_move_regs /\ list_disjoint args destroyed_at_move_regs). - destruct SAFE. auto. right. - destruct (list_disjoint_dec mreg_eq (src :: args) destroyed_at_move_regs); try discriminate. - split. eapply list_disjoint_notin; eauto with coqlib. eapply list_disjoint_cons_left; eauto. - left; econstructor; split. - rewrite transf_code_eq; econstructor. instantiate (1 := a). rewrite <- H. - rewrite (agree_regs _ _ _ args AG). - apply eval_addressing_preserved. exact symbols_preserved. - tauto. - rewrite (agree_reg _ _ _ src AG). - eauto. - tauto. - apply match_states_regular with false; auto. - apply kill_temps_hold; auto. - eapply agree_undef_temps; eauto. - -(* call *) - simpl in SAFE. assert (sm = false) by intuition congruence. - subst sm. rewrite agree_false in AG. subst rs'. - left; econstructor; split. - rewrite transf_code_eq; constructor. eapply find_function_translated; eauto. - symmetry; apply sig_preserved. - constructor. constructor; auto. constructor. - -(* tailcall *) - simpl in SAFE. assert (sm = false) by intuition congruence. - subst sm. rewrite agree_false in AG. subst rs'. - left; econstructor; split. - rewrite transf_code_eq; constructor. eapply find_function_translated; eauto. - symmetry; apply sig_preserved. eauto. - rewrite (match_parent_locset _ _ STK). constructor; auto. - -(* builtin *) - left; econstructor; split. - rewrite transf_code_eq; constructor. - rewrite (agree_regs _ _ _ args AG). - eapply external_call_symbols_preserved; eauto. - exact symbols_preserved. exact varinfo_preserved. - simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence. - apply match_states_regular with false; auto. - apply kill_loc_hold; apply kill_temps_hold; auto. - apply agree_set. eapply agree_undef_temps; eauto. - -(* annot *) - simpl in SAFE. assert (sm = false) by intuition congruence. - subst sm. rewrite agree_false in AG. subst rs'. - left; econstructor; split. - rewrite transf_code_eq; econstructor. eapply external_call_symbols_preserved; eauto. - exact symbols_preserved. exact varinfo_preserved. - apply match_states_regular with false; auto. - rewrite agree_false; auto. - -(* label *) - left; econstructor; split. rewrite transf_code_eq; constructor. - apply match_states_regular with false; auto. - apply nil_hold. - simpl in SAFE. destruct SAFE. subst sm. auto. congruence. - -(* goto *) - generalize (transl_find_label lbl (fn_code f) nil). rewrite H. simpl. intros. - left; econstructor; split. rewrite transf_code_eq; constructor; eauto. - apply match_states_regular with false; auto. - apply nil_hold. - simpl in SAFE. destruct SAFE. subst sm. auto. congruence. - -(* cond true *) - generalize (transl_find_label lbl (fn_code f) nil). rewrite H0. simpl. intros. - left; econstructor; split. - rewrite transf_code_eq; apply exec_Lcond_true; auto. - rewrite (agree_regs _ _ _ args AG). auto. - simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence. - eauto. - apply match_states_regular with false; auto. - apply nil_hold. - eapply agree_undef_temps; eauto. - -(* cond false *) - left; econstructor; split. rewrite transf_code_eq; apply exec_Lcond_false; auto. - rewrite (agree_regs _ _ _ args AG). auto. - simpl in SAFE. destruct (list_disjoint_dec mreg_eq args destroyed_at_move_regs); simpl in SAFE; intuition congruence. - apply match_states_regular with false; auto. - apply kill_temps_hold; auto. - eapply agree_undef_temps; eauto. - -(* jumptable *) - generalize (transl_find_label lbl (fn_code f) nil). rewrite H1. simpl. intros. - left; econstructor; split. rewrite transf_code_eq; econstructor; eauto. - rewrite (agree_reg _ _ _ arg AG). auto. - simpl in SAFE. destruct (in_dec mreg_eq arg destroyed_at_move_regs); simpl in SAFE; intuition congruence. - apply match_states_regular with false; auto. - apply nil_hold. - eapply agree_undef_temps; eauto. - -(* return *) - simpl in SAFE. destruct SAFE; try discriminate. subst sm. rewrite agree_false in AG. subst rs'. - left; econstructor; split. - rewrite transf_code_eq; constructor. simpl. eauto. - rewrite (match_parent_locset _ _ STK). - constructor; auto. - -(* internal *) - left; econstructor; split. - constructor. simpl; eauto. - simpl. apply match_states_regular with false; auto. apply nil_hold. rewrite agree_false; auto. - -(* external *) - left; econstructor; split. - econstructor. eapply external_call_symbols_preserved; eauto. - exact symbols_preserved. exact varinfo_preserved. - auto. eauto. - constructor; auto. - -(* return *) - inv STK. inv H1. left; econstructor; split. constructor. - apply match_states_regular with false; auto. - apply nil_hold. - rewrite agree_false; auto. -Qed. - -Lemma transf_initial_states: - forall st1, initial_state prog st1 -> - exists st2, initial_state tprog st2 /\ match_states st1 st2. -Proof. - intros. inversion H. - econstructor; split. - econstructor. - apply Genv.init_mem_transf; eauto. - rewrite symbols_preserved. eauto. - apply function_ptr_translated; eauto. - rewrite sig_preserved. auto. - econstructor; eauto. constructor. -Qed. - -Lemma transf_final_states: - forall st1 st2 r, - match_states st1 st2 -> final_state st1 r -> final_state st2 r. -Proof. - intros. inv H0. inv H. inv STK. econstructor. auto. -Qed. - -Theorem transf_program_correct: - forward_simulation (Linear.semantics prog) (Linear.semantics tprog). -Proof. - eapply forward_simulation_opt. - eexact symbols_preserved. - eexact transf_initial_states. - eexact transf_final_states. - eexact transf_step_correct. -Qed. - -End PRESERVATION. diff --git a/backend/RREtyping.v b/backend/RREtyping.v deleted file mode 100644 index 170d8ad..0000000 --- a/backend/RREtyping.v +++ /dev/null @@ -1,110 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Proof of type preservation for the [RRE] pass. *) - -Require Import Coqlib. -Require Import AST. -Require Import Locations. -Require Import Linear. -Require Import Lineartyping. -Require Import Conventions. -Require Import RRE. -Require Import RREproof. - -Remark wt_cons: - forall f c i, wt_instr f i -> wt_code f c -> wt_code f (i::c). -Proof. - intros; red; intros. simpl in H1; destruct H1. congruence. auto. -Qed. - -Hint Constructors wt_instr : linearty. -Hint Resolve wt_cons: linearty. - -Definition wt_eqs (eqs: equations) := - forall e, In e eqs -> slot_type (e_slot e) = mreg_type (e_reg e). - -Lemma wt_eqs_nil: - wt_eqs nil. -Proof. red; simpl; tauto. Qed. - -Lemma wt_eqs_cons: - forall r s eqs, - slot_type s = mreg_type r -> wt_eqs eqs -> wt_eqs (mkeq r s :: eqs). -Proof. - intros; red; simpl; intros. destruct H1. subst; simpl; auto. auto. -Qed. - -Lemma wt_kill_loc: - forall l eqs, - wt_eqs eqs -> wt_eqs (kill_loc l eqs). -Proof. - intros; red; intros. exploit In_kill_loc; eauto. intros [A B]. auto. -Qed. - -Lemma wt_kill_locs: - forall ll eqs, - wt_eqs eqs -> wt_eqs (kill_locs ll eqs). -Proof. - intros; red; intros. exploit In_kill_locs; eauto. intros [A B]. auto. -Qed. - -Lemma wt_kill_temps: - forall eqs, wt_eqs eqs -> wt_eqs (kill_temps eqs). -Proof. - exact (wt_kill_locs temporaries). -Qed. - -Lemma wt_kill_at_move: - forall eqs, wt_eqs eqs -> wt_eqs (kill_at_move eqs). -Proof. - exact (wt_kill_locs destroyed_at_move). -Qed. - -Hint Resolve wt_eqs_nil wt_eqs_cons wt_kill_loc wt_kill_locs - wt_kill_temps wt_kill_at_move: linearty. - -Lemma wt_kill_op: - forall op eqs, wt_eqs eqs -> wt_eqs (kill_op op eqs). -Proof. - intros; destruct op; simpl; apply wt_kill_locs; auto. -Qed. - -Hint Resolve wt_kill_op: linearty. - -Lemma wt_transf_code: - forall f c eqs, wt_code f c -> wt_eqs eqs -> - wt_code (transf_function f) (transf_code eqs c nil). -Proof. - induction c; intros; simpl. - red; simpl; tauto. - assert (WI: wt_instr f a) by auto with coqlib. - assert (WC: wt_code f c) by (red; auto with coqlib). - clear H. - inv WI; rewrite ? transf_code_eq; auto 10 with linearty. - destruct (is_incoming s) eqn:?. auto with linearty. - destruct (contains_equation s r eqs). auto with linearty. - destruct (find_reg_containing s eqs) as [r'|] eqn:?; auto with linearty. - assert (mreg_type r' = mreg_type r). - exploit H0. eapply find_reg_containing_sound; eauto. simpl. congruence. - rewrite ! transf_code_eq. - destruct (safe_move_insertion c); auto 10 with linearty. -Qed. - -Lemma program_typing_preserved: - forall p, wt_program p -> wt_program (transf_program p). -Proof. - intros. red; intros. exploit transform_program_function; eauto. - intros [f0 [A B]]. subst f. exploit H; eauto. intros WTFD. - inv WTFD; simpl; constructor. red; simpl. - apply wt_transf_code; auto with linearty. -Qed. diff --git a/backend/RTLgen.v b/backend/RTLgen.v index 007191a..f5e34e4 100644 --- a/backend/RTLgen.v +++ b/backend/RTLgen.v @@ -362,18 +362,6 @@ Fixpoint alloc_regs (map: mapping) (al: exprlist) ret (r :: rl) end. -(** A variant of [alloc_regs] for two-address instructions: - reuse the result register as destination for the first argument. *) - -Definition alloc_regs_2addr (map: mapping) (al: exprlist) (rd: reg) - : mon (list reg) := - match al with - | Enil => - ret nil - | Econs a bl => - do rl <- alloc_regs map bl; ret (rd :: rl) - end. - (** [alloc_optreg] is used for function calls. If a destination is specified for the call, it is returned. Otherwise, a fresh register is returned. *) @@ -406,9 +394,7 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node) | Evar v => do r <- find_var map v; add_move r rd nd | Eop op al => - do rl <- if two_address_op op - then alloc_regs_2addr map al rd - else alloc_regs map al; + do rl <- alloc_regs map al; do no <- add_instr (Iop op rl rd nd); transl_exprlist map al rl no | Eload chunk addr al => @@ -427,6 +413,14 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node) transl_expr map b r nc | Eletvar n => do r <- find_letvar map n; add_move r rd nd + | Ebuiltin ef al => + do rl <- alloc_regs map al; + do no <- add_instr (Ibuiltin ef rl rd nd); + transl_exprlist map al rl no + | Eexternal id sg al => + do rl <- alloc_regs map al; + do no <- add_instr (Icall sg (inr id) rl rd nd); + transl_exprlist map al rl no end (** Translation of a list of expressions. The expressions are evaluated @@ -495,16 +489,6 @@ Fixpoint transl_switch (r: reg) (nexits: list node) (t: comptree) add_instr (Iop op (r :: nil) rt n3) end. -(** Detect a two-address operator at the top of an expression. *) - -Fixpoint expr_is_2addr_op (e: expr) : bool := - match e with - | Eop op _ => two_address_op op - | Econdition cond el e2 e3 => expr_is_2addr_op e2 || expr_is_2addr_op e3 - | Elet e1 e2 => expr_is_2addr_op e2 - | _ => false - end. - (** Translation of statements. [transl_stmt map s nd nexits nret rret] enriches the current CFG with the RTL instructions necessary to execute the CminorSel statement [s], and returns the node of the first @@ -524,12 +508,7 @@ Fixpoint transl_stmt (map: mapping) (s: stmt) (nd: node) ret nd | Sassign v b => do r <- find_var map v; - if expr_is_2addr_op b then - do rd <- new_reg; - do n1 <- add_move rd r nd; - transl_expr map b rd n1 - else - transl_expr map b r nd + transl_expr map b r nd | Sstore chunk addr al b => do rl <- alloc_regs map al; do r <- alloc_reg map b; diff --git a/backend/RTLgenaux.ml b/backend/RTLgenaux.ml index 363bc2b..150de5a 100644 --- a/backend/RTLgenaux.ml +++ b/backend/RTLgenaux.ml @@ -34,6 +34,8 @@ let rec size_expr = function 1 + size_exprs args + max (size_expr e1) (size_expr e2) | Elet(e1, e2) -> size_expr e1 + size_expr e2 | Eletvar n -> 0 + | Ebuiltin(ef, el) -> 2 + size_exprs el + | Eexternal(id, sg, el) -> 5 + size_exprs el and size_exprs = function | Enil -> 0 diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v index 690611c..c3cae28 100644 --- a/backend/RTLgenproof.v +++ b/backend/RTLgenproof.v @@ -485,19 +485,18 @@ Section CORRECTNESS_EXPR. Variable sp: val. Variable e: env. -Variable m tm: mem. -Hypothesis mem_extends: Mem.extends m tm. +Variable m: mem. (** The proof of semantic preservation for the translation of expressions is a simulation argument based on diagrams of the following form: << I /\ P - e, le, m, a ------------- State cs code sp ns rs m + e, le, m, a ------------- State cs code sp ns rs tm || | || |* || | \/ v - e, le, m', v ------------ State cs code sp nd rs' m' + e, le, m, v ------------ State cs code sp nd rs' tm' I /\ Q >> where [tr_expr code map pr a ns nd rd] is assumed to hold. @@ -521,27 +520,31 @@ Hypothesis mem_extends: Mem.extends m tm. Definition transl_expr_prop (le: letenv) (a: expr) (v: val) : Prop := - forall cs f map pr ns nd rd rs dst + forall tm cs f map pr ns nd rd rs dst (MWF: map_wf map) (TE: tr_expr f.(fn_code) map pr a ns nd rd dst) - (ME: match_env map e le rs), - exists rs', - star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm) + (ME: match_env map e le rs) + (EXT: Mem.extends m tm), + exists rs', exists tm', + star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm') /\ match_env map (set_optvar dst v e) le rs' /\ Val.lessdef v rs'#rd - /\ (forall r, In r pr -> rs'#r = rs#r). + /\ (forall r, In r pr -> rs'#r = rs#r) + /\ Mem.extends m tm'. Definition transl_exprlist_prop (le: letenv) (al: exprlist) (vl: list val) : Prop := - forall cs f map pr ns nd rl rs + forall tm cs f map pr ns nd rl rs (MWF: map_wf map) (TE: tr_exprlist f.(fn_code) map pr al ns nd rl) - (ME: match_env map e le rs), - exists rs', - star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm) + (ME: match_env map e le rs) + (EXT: Mem.extends m tm), + exists rs', exists tm', + star step tge (State cs f sp ns rs tm) E0 (State cs f sp nd rs' tm') /\ match_env map e le rs' /\ Val.lessdef_list vl rs'##rl - /\ (forall r, In r pr -> rs'#r = rs#r). + /\ (forall r, In r pr -> rs'#r = rs#r) + /\ Mem.extends m tm'. (** The correctness of the translation is a huge induction over the Cminor evaluation derivation for the source program. To keep @@ -559,21 +562,23 @@ Proof. intros; red; intros. inv TE. exploit match_env_find_var; eauto. intro EQ. exploit tr_move_correct; eauto. intros [rs' [A [B C]]]. - exists rs'; split. eauto. + exists rs'; exists tm; split. eauto. destruct H2 as [[D E] | [D E]]. (* optimized case *) subst r dst. simpl. assert (forall r, rs'#r = rs#r). intros. destruct (Reg.eq r rd). subst r. auto. auto. split. eapply match_env_invariant; eauto. - split. congruence. auto. + split. congruence. + split; auto. (* general case *) split. apply match_env_invariant with (rs#rd <- (rs#r)). apply match_env_update_dest; auto. intros. rewrite Regmap.gsspec. destruct (peq r0 rd). congruence. auto. split. congruence. - intros. apply C. intuition congruence. + split. intros. apply C. intuition congruence. + auto. Qed. Lemma transl_expr_Eop_correct: @@ -586,9 +591,9 @@ Lemma transl_expr_Eop_correct: Proof. intros; red; intros. inv TE. (* normal case *) - exploit H0; eauto. intros [rs1 [EX1 [ME1 [RR1 RO1]]]]. + exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RR1 [RO1 EXT1]]]]]]. edestruct eval_operation_lessdef as [v' []]; eauto. - exists (rs1#rd <- v'). + exists (rs1#rd <- v'); exists tm1. (* Exec *) split. eapply star_right. eexact EX1. eapply exec_Iop; eauto. @@ -599,7 +604,9 @@ Proof. (* Result reg *) split. rewrite Regmap.gss. auto. (* Other regs *) - intros. rewrite Regmap.gso. auto. intuition congruence. + split. intros. rewrite Regmap.gso. auto. intuition congruence. +(* Mem *) + auto. Qed. Lemma transl_expr_Eload_correct: @@ -612,10 +619,10 @@ Lemma transl_expr_Eload_correct: transl_expr_prop le (Eload chunk addr args) v. Proof. intros; red; intros. inv TE. - exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]]. edestruct eval_addressing_lessdef as [vaddr' []]; eauto. edestruct Mem.loadv_extends as [v' []]; eauto. - exists (rs1#rd <- v'). + exists (rs1#rd <- v'); exists tm1. (* Exec *) split. eapply star_right. eexact EX1. eapply exec_Iload. eauto. instantiate (1 := vaddr'). rewrite <- H3. @@ -626,7 +633,9 @@ Proof. (* Result *) split. rewrite Regmap.gss. auto. (* Other regs *) - intros. rewrite Regmap.gso. auto. intuition congruence. + split. intros. rewrite Regmap.gso. auto. intuition congruence. +(* Mem *) + auto. Qed. Lemma transl_expr_Econdition_correct: @@ -640,11 +649,11 @@ Lemma transl_expr_Econdition_correct: transl_expr_prop le (Econdition cond al ifso ifnot) v. Proof. intros; red; intros; inv TE. inv H14. - exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]]. assert (tr_expr f.(fn_code) map pr (if vcond then ifso else ifnot) (if vcond then ntrue else nfalse) nd rd dst). destruct vcond; auto. - exploit H3; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. - exists rs2. + exploit H3; eauto. intros [rs2 [tm2 [EX2 [ME2 [RES2 [OTHER2 EXT2]]]]]]. + exists rs2; exists tm2. (* Exec *) split. eapply star_trans. eexact EX1. eapply star_left. eapply exec_Icond. eauto. eapply eval_condition_lessdef; eauto. reflexivity. @@ -654,7 +663,9 @@ Proof. (* Result value *) split. assumption. (* Other regs *) - intros. transitivity (rs1#r); auto. + split. intros. transitivity (rs1#r); auto. +(* Mem *) + auto. Qed. Lemma transl_expr_Elet_correct: @@ -666,12 +677,12 @@ Lemma transl_expr_Elet_correct: transl_expr_prop le (Elet a1 a2) v2. Proof. intros; red; intros; inv TE. - exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]]. assert (map_wf (add_letvar map r)). eapply add_letvar_wf; eauto. exploit H2; eauto. eapply match_env_bind_letvar; eauto. - intros [rs2 [EX2 [ME3 [RES2 OTHER2]]]]. - exists rs2. + intros [rs2 [tm2 [EX2 [ME3 [RES2 [OTHER2 EXT2]]]]]]. + exists rs2; exists tm2. (* Exec *) split. eapply star_trans. eexact EX1. eexact EX2. auto. (* Match-env *) @@ -679,7 +690,9 @@ Proof. (* Result *) split. assumption. (* Other regs *) - intros. transitivity (rs1#r0); auto. + split. intros. transitivity (rs1#r0); auto. +(* Mem *) + auto. Qed. Lemma transl_expr_Eletvar_correct: @@ -689,7 +702,7 @@ Lemma transl_expr_Eletvar_correct: Proof. intros; red; intros; inv TE. exploit tr_move_correct; eauto. intros [rs1 [EX1 [RES1 OTHER1]]]. - exists rs1. + exists rs1; exists tm. (* Exec *) split. eexact EX1. (* Match-env *) @@ -706,10 +719,73 @@ Proof. (* Result *) split. rewrite RES1. eapply match_env_find_letvar; eauto. (* Other regs *) - intros. + split. intros. destruct H2 as [[A B] | [A B]]. destruct (Reg.eq r0 rd); subst; auto. apply OTHER1. intuition congruence. +(* Mem *) + auto. +Qed. + +Lemma transl_expr_Ebuiltin_correct: + forall le ef al vl v, + eval_exprlist ge sp e m le al vl -> + transl_exprlist_prop le al vl -> + external_call ef ge vl m E0 v m -> + transl_expr_prop le (Ebuiltin ef al) v. +Proof. + intros; red; intros. inv TE. + exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RR1 [RO1 EXT1]]]]]]. + exploit external_call_mem_extends; eauto. + intros [v' [tm2 [A [B [C [D E]]]]]]. + exists (rs1#rd <- v'); exists tm2. +(* Exec *) + split. eapply star_right. eexact EX1. + eapply exec_Ibuiltin; eauto. + eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved. + reflexivity. +(* Match-env *) + split. eauto with rtlg. +(* Result reg *) + split. rewrite Regmap.gss. auto. +(* Other regs *) + split. intros. rewrite Regmap.gso. auto. intuition congruence. +(* Mem *) + auto. +Qed. + +Lemma transl_expr_Eexternal_correct: + forall le id sg al b ef vl v, + Genv.find_symbol ge id = Some b -> + Genv.find_funct_ptr ge b = Some (External ef) -> + ef_sig ef = sg -> + eval_exprlist ge sp e m le al vl -> + transl_exprlist_prop le al vl -> + external_call ef ge vl m E0 v m -> + transl_expr_prop le (Eexternal id sg al) v. +Proof. + intros; red; intros. inv TE. + exploit H3; eauto. intros [rs1 [tm1 [EX1 [ME1 [RR1 [RO1 EXT1]]]]]]. + exploit external_call_mem_extends; eauto. + intros [v' [tm2 [A [B [C [D E]]]]]]. + exploit function_ptr_translated; eauto. simpl. intros [tf [P Q]]. inv Q. + exists (rs1#rd <- v'); exists tm2. +(* Exec *) + split. eapply star_trans. eexact EX1. + eapply star_left. eapply exec_Icall; eauto. + simpl. rewrite symbols_preserved. rewrite H. eauto. auto. + eapply star_left. eapply exec_function_external. + eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved. + apply star_one. apply exec_return. + reflexivity. reflexivity. reflexivity. +(* Match-env *) + split. eauto with rtlg. +(* Result reg *) + split. rewrite Regmap.gss. auto. +(* Other regs *) + split. intros. rewrite Regmap.gso. auto. intuition congruence. +(* Mem *) + auto. Qed. Lemma transl_exprlist_Enil_correct: @@ -717,7 +793,7 @@ Lemma transl_exprlist_Enil_correct: transl_exprlist_prop le Enil nil. Proof. intros; red; intros; inv TE. - exists rs. + exists rs; exists tm. split. apply star_refl. split. assumption. split. constructor. @@ -734,9 +810,9 @@ Lemma transl_exprlist_Econs_correct: transl_exprlist_prop le (Econs a1 al) (v1 :: vl). Proof. intros; red; intros; inv TE. - exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. - exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. - exists rs2. + exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]]. + exploit H2; eauto. intros [rs2 [tm2 [EX2 [ME2 [RES2 [OTHER2 EXT2]]]]]]. + exists rs2; exists tm2. (* Exec *) split. eapply star_trans. eexact EX1. eexact EX2. auto. (* Match-env *) @@ -746,9 +822,11 @@ Proof. simpl; tauto. auto. (* Other regs *) - intros. transitivity (rs1#r). + split. intros. transitivity (rs1#r). apply OTHER2; auto. simpl; tauto. apply OTHER1; auto. +(* Mem *) + auto. Qed. Theorem transl_expr_correct: @@ -765,6 +843,8 @@ Proof transl_expr_Econdition_correct transl_expr_Elet_correct transl_expr_Eletvar_correct + transl_expr_Ebuiltin_correct + transl_expr_Eexternal_correct transl_exprlist_Enil_correct transl_exprlist_Econs_correct). @@ -782,6 +862,8 @@ Proof transl_expr_Econdition_correct transl_expr_Elet_correct transl_expr_Eletvar_correct + transl_expr_Ebuiltin_correct + transl_expr_Eexternal_correct transl_exprlist_Enil_correct transl_exprlist_Econs_correct). @@ -1019,37 +1101,25 @@ Proof. (* assign *) inv TS. - (* optimized translation (not 2 addr) *) exploit transl_expr_correct; eauto. - intros [rs' [A [B [C D]]]]. + intros [rs' [tm' [A [B [C [D E]]]]]]. econstructor; split. right; split. eauto. Lt_state. econstructor; eauto. constructor. - (* alternate translation (2 addr) *) - exploit transl_expr_correct; eauto. - intros [rs' [A [B [C D]]]]. - exploit tr_move_correct; eauto. - intros [rs'' [P [Q R]]]. - econstructor; split. - right; split. eapply star_trans. eexact A. eexact P. traceEq. Lt_state. - econstructor; eauto. constructor. - simpl in B. apply match_env_invariant with (rs'#r <- (rs'#rd)). - apply match_env_update_var; auto. - intros. rewrite Regmap.gsspec. destruct (peq r0 r). congruence. auto. (* store *) inv TS. exploit transl_exprlist_correct; eauto. - intros [rs' [A [B [C D]]]]. + intros [rs' [tm' [A [B [C [D E]]]]]]. exploit transl_expr_correct; eauto. - intros [rs'' [E [F [G J]]]]. + intros [rs'' [tm'' [F [G [J [K L]]]]]]. assert (Val.lessdef_list vl rs''##rl). replace (rs'' ## rl) with (rs' ## rl). auto. - apply list_map_exten. intros. apply J. auto. + apply list_map_exten. intros. apply K. auto. edestruct eval_addressing_lessdef as [vaddr' []]; eauto. - edestruct Mem.storev_extends as [tm' []]; eauto. + edestruct Mem.storev_extends as [tm''' []]; eauto. econstructor; split. - left; eapply plus_right. eapply star_trans. eexact A. eexact E. reflexivity. + left; eapply plus_right. eapply star_trans. eexact A. eexact F. reflexivity. eapply exec_Istore with (a := vaddr'). eauto. rewrite <- H4. apply eval_addressing_preserved. exact symbols_preserved. eauto. traceEq. @@ -1059,9 +1129,9 @@ Proof. inv TS; inv H. (* indirect *) exploit transl_expr_correct; eauto. - intros [rs' [A [B [C D]]]]. + intros [rs' [tm' [A [B [C [D X]]]]]]. exploit transl_exprlist_correct; eauto. - intros [rs'' [E [F [G J]]]]. + intros [rs'' [tm'' [E [F [G [J Y]]]]]]. exploit functions_translated; eauto. intros [tf' [P Q]]. econstructor; split. left; eapply plus_right. eapply star_trans. eexact A. eexact E. reflexivity. @@ -1071,7 +1141,7 @@ Proof. constructor; auto. econstructor; eauto. (* direct *) exploit transl_exprlist_correct; eauto. - intros [rs'' [E [F [G J]]]]. + intros [rs'' [tm'' [E [F [G [J Y]]]]]]. exploit functions_translated; eauto. intros [tf' [P Q]]. econstructor; split. left; eapply plus_right. eexact E. @@ -1085,13 +1155,13 @@ Proof. inv TS; inv H. (* indirect *) exploit transl_expr_correct; eauto. - intros [rs' [A [B [C D]]]]. + intros [rs' [tm' [A [B [C [D X]]]]]]. exploit transl_exprlist_correct; eauto. - intros [rs'' [E [F [G J]]]]. + intros [rs'' [tm'' [E [F [G [J Y]]]]]]. exploit functions_translated; eauto. intros [tf' [P Q]]. exploit match_stacks_call_cont; eauto. intros [U V]. assert (fn_stacksize tf = fn_stackspace f). inv TF; auto. - edestruct Mem.free_parallel_extends as [tm' []]; eauto. + edestruct Mem.free_parallel_extends as [tm''' []]; eauto. econstructor; split. left; eapply plus_right. eapply star_trans. eexact A. eexact E. reflexivity. eapply exec_Itailcall; eauto. simpl. rewrite J. destruct C. eauto. discriminate P. simpl; auto. @@ -1101,11 +1171,11 @@ Proof. constructor; auto. (* direct *) exploit transl_exprlist_correct; eauto. - intros [rs'' [E [F [G J]]]]. + intros [rs'' [tm'' [E [F [G [J Y]]]]]]. exploit functions_translated; eauto. intros [tf' [P Q]]. exploit match_stacks_call_cont; eauto. intros [U V]. assert (fn_stacksize tf = fn_stackspace f). inv TF; auto. - edestruct Mem.free_parallel_extends as [tm' []]; eauto. + edestruct Mem.free_parallel_extends as [tm''' []]; eauto. econstructor; split. left; eapply plus_right. eexact E. eapply exec_Itailcall; eauto. simpl. rewrite symbols_preserved. rewrite H5. @@ -1118,8 +1188,8 @@ Proof. (* builtin *) inv TS. exploit transl_exprlist_correct; eauto. - intros [rs' [E [F [G J]]]]. - edestruct external_call_mem_extends as [tv [tm' [A [B [C D]]]]]; eauto. + intros [rs' [tm' [E [F [G [J K]]]]]]. + edestruct external_call_mem_extends as [tv [tm'' [A [B [C D]]]]]; eauto. econstructor; split. left. eapply plus_right. eexact E. eapply exec_Ibuiltin. eauto. @@ -1137,7 +1207,7 @@ Proof. (* ifthenelse *) inv TS. inv H13. - exploit transl_exprlist_correct; eauto. intros [rs' [A [B [C D]]]]. + exploit transl_exprlist_correct; eauto. intros [rs' [tm' [A [B [C [D E]]]]]]. econstructor; split. left. eapply plus_right. eexact A. eapply exec_Icond; eauto. eapply eval_condition_lessdef; eauto. traceEq. @@ -1179,7 +1249,7 @@ Proof. inv TS. exploit validate_switch_correct; eauto. intro CTM. exploit transl_expr_correct; eauto. - intros [rs' [A [B [C D]]]]. + intros [rs' [tm' [A [B [C [D X]]]]]]. exploit transl_switch_correct; eauto. inv C. auto. intros [nd [rs'' [E [F G]]]]. econstructor; split. @@ -1200,10 +1270,10 @@ Proof. (* return some *) inv TS. exploit transl_expr_correct; eauto. - intros [rs' [A [B [C D]]]]. + intros [rs' [tm' [A [B [C [D E]]]]]]. exploit match_stacks_call_cont; eauto. intros [U V]. inversion TF. - edestruct Mem.free_parallel_extends as [tm' []]; eauto. + edestruct Mem.free_parallel_extends as [tm'' []]; eauto. econstructor; split. left; eapply plus_right. eexact A. eapply exec_Ireturn; eauto. rewrite H4; eauto. traceEq. diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v index c50c702..d8d5dc8 100644 --- a/backend/RTLgenspec.v +++ b/backend/RTLgenspec.v @@ -500,32 +500,6 @@ Proof. right; eauto with rtlg. Qed. -Lemma alloc_regs_2addr_valid: - forall al rd s1 s2 map rl i, - map_valid map s1 -> - reg_valid rd s1 -> - alloc_regs_2addr map al rd s1 = OK rl s2 i -> - regs_valid rl s2. -Proof. - unfold alloc_regs_2addr; intros. - destruct al; monadInv H1. - apply regs_valid_nil. - apply regs_valid_cons. eauto with rtlg. eauto with rtlg. -Qed. -Hint Resolve alloc_regs_2addr_valid: rtlg. - -Lemma alloc_regs_2addr_fresh_or_in_map: - forall map al rd s rl s' i, - map_valid map s -> - alloc_regs_2addr map al rd s = OK rl s' i -> - forall r, In r rl -> r = rd \/ reg_in_map map r \/ reg_fresh r s. -Proof. - unfold alloc_regs_2addr; intros. - destruct al; monadInv H0. - elim H1. - simpl in H1; destruct H1. auto. right. eapply alloc_regs_fresh_or_in_map; eauto. -Qed. - (** A register is an adequate target for holding the value of an expression if - either the register is associated with a Cminor let-bound variable @@ -628,24 +602,8 @@ Proof. apply regs_valid_cons; eauto with rtlg. Qed. -Lemma alloc_regs_2addr_target_ok: - forall map al rd pr s1 rl s2 i, - map_valid map s1 -> - regs_valid pr s1 -> - reg_valid rd s1 -> - ~(reg_in_map map rd) -> ~In rd pr -> - alloc_regs_2addr map al rd s1 = OK rl s2 i -> - target_regs_ok map pr al rl. -Proof. - unfold alloc_regs_2addr; intros. destruct al; monadInv H4. - constructor. - constructor. constructor; auto. - eapply alloc_regs_target_ok; eauto. - apply regs_valid_cons; auto. -Qed. - Hint Resolve new_reg_target_ok alloc_reg_target_ok - alloc_regs_target_ok alloc_regs_2addr_target_ok: rtlg. + alloc_regs_target_ok: rtlg. (** The following predicate is a variant of [target_reg_ok] used to characterize registers that are adequate for holding the return @@ -760,6 +718,17 @@ Inductive tr_expr (c: code): ((rd = r /\ dst = None) \/ (reg_map_ok map rd dst /\ ~In rd pr)) -> tr_move c ns r nd rd -> tr_expr c map pr (Eletvar n) ns nd rd dst + | tr_Ebuiltin: forall map pr ef al ns nd rd dst n1 rl, + tr_exprlist c map pr al ns n1 rl -> + c!n1 = Some (Ibuiltin ef rl rd nd) -> + reg_map_ok map rd dst -> ~In rd pr -> + tr_expr c map pr (Ebuiltin ef al) ns nd rd dst + | tr_Eexternal: forall map pr id sg al ns nd rd dst n1 rl, + tr_exprlist c map pr al ns n1 rl -> + c!n1 = Some (Icall sg (inr _ id) rl rd nd) -> + reg_map_ok map rd dst -> ~In rd pr -> + tr_expr c map pr (Eexternal id sg al) ns nd rd dst + (** [tr_condition c map pr cond al ns ntrue nfalse rd] holds if the graph [c], starting at node [ns], contains instructions that compute the truth @@ -834,13 +803,8 @@ Inductive tr_stmt (c: code) (map: mapping): | tr_Sskip: forall ns nexits ngoto nret rret, tr_stmt c map Sskip ns ns nexits ngoto nret rret | tr_Sassign: forall id a ns nd nexits ngoto nret rret r, - map.(map_vars)!id = Some r -> expr_is_2addr_op a = false -> - tr_expr c map nil a ns nd r (Some id) -> - tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret - | tr_Sassign_2: forall id a ns n1 nd nexits ngoto nret rret rd r, map.(map_vars)!id = Some r -> - tr_expr c map nil a ns n1 rd None -> - tr_move c n1 rd nd r -> + tr_expr c map nil a ns nd r (Some id) -> tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret | tr_Sstore: forall chunk addr al b ns nd nexits ngoto nret rret rd n1 rl n2, tr_exprlist c map nil al ns n1 rl -> @@ -1008,9 +972,7 @@ Proof. inv OK. left; split; congruence. right; eauto with rtlg. eapply add_move_charact; eauto. (* Eop *) - inv OK. destruct (two_address_op o). - econstructor; eauto with rtlg. - eapply transl_exprlist_charact; eauto with rtlg. + inv OK. econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. (* Eload *) @@ -1045,6 +1007,14 @@ Proof. inv OK. left; split; congruence. right; eauto with rtlg. eapply add_move_charact; eauto. monadInv EQ1. + (* Ebuiltin *) + inv OK. + econstructor; eauto with rtlg. + eapply transl_exprlist_charact; eauto with rtlg. + (* Eexternal *) + inv OK. + econstructor; eauto with rtlg. + eapply transl_exprlist_charact; eauto with rtlg. (* Lists *) @@ -1070,25 +1040,21 @@ Lemma transl_expr_assign_charact: forall id a map rd nd s ns s' INCR (TR: transl_expr map a rd nd s = OK ns s' INCR) (WF: map_valid map s) - (OK: reg_map_ok map rd (Some id)) - (NOT2ADDR: expr_is_2addr_op a = false), + (OK: reg_map_ok map rd (Some id)), tr_expr s'.(st_code) map nil a ns nd rd (Some id). Proof. -Opaque two_address_op. induction a; intros; monadInv TR; saturateTrans. (* Evar *) generalize EQ; unfold find_var. caseEq (map_vars map)!i; intros; inv EQ1. econstructor; eauto. eapply add_move_charact; eauto. (* Eop *) - simpl in NOT2ADDR. rewrite NOT2ADDR in EQ. econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. (* Eload *) econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. (* Econdition *) - simpl in NOT2ADDR. destruct (orb_false_elim _ _ NOT2ADDR). econstructor; eauto with rtlg. econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. apply tr_expr_incr with s2; auto. @@ -1096,7 +1062,6 @@ Opaque two_address_op. apply tr_expr_incr with s1; auto. eapply IHa2; eauto 2 with rtlg. (* Elet *) - simpl in NOT2ADDR. econstructor. eapply new_reg_not_in_map; eauto with rtlg. eapply transl_expr_charact; eauto 3 with rtlg. apply tr_expr_incr with s1; auto. @@ -1109,6 +1074,12 @@ Opaque two_address_op. econstructor; eauto with rtlg. eapply add_move_charact; eauto. monadInv EQ1. + (* Ebuiltin *) + econstructor; eauto with rtlg. + eapply transl_exprlist_charact; eauto with rtlg. + (* Eexternal *) + econstructor; eauto with rtlg. + eapply transl_exprlist_charact; eauto with rtlg. Qed. Lemma alloc_optreg_map_ok: @@ -1141,7 +1112,6 @@ Proof. generalize tr_expr_incr tr_condition_incr tr_exprlist_incr; intros I1 I2 I3. pose (AT := fun pc i => instr_at_incr s1 s2 pc i EXT). induction 1; try (econstructor; eauto; fail). - eapply tr_Sassign_2; eauto. eapply tr_move_incr; eauto. econstructor; eauto. eapply tr_switch_incr; eauto. Qed. @@ -1210,10 +1180,6 @@ Proof. constructor. (* Sassign *) revert EQ. unfold find_var. case_eq (map_vars map)!i; intros; monadInv EQ. - remember (expr_is_2addr_op e) as is2a. destruct is2a. - monadInv EQ0. eapply tr_Sassign_2; eauto. - eapply transl_expr_charact; eauto with rtlg. - apply tr_move_incr with s1; auto. eapply add_move_charact; eauto. eapply tr_Sassign; eauto. eapply transl_expr_assign_charact; eauto with rtlg. constructor. auto. diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v index a1f9518..7ed7344 100644 --- a/backend/RTLtyping.v +++ b/backend/RTLtyping.v @@ -22,6 +22,7 @@ Require Import Registers. Require Import Globalenvs. Require Import Values. Require Import Integers. +Require Import Memory. Require Import Events. Require Import RTL. Require Import Conventions. @@ -109,7 +110,6 @@ Inductive wt_instr : instruction -> Prop := forall ef args res s, List.map env args = (ef_sig ef).(sig_args) -> env res = proj_sig_res (ef_sig ef) -> - arity_ok (ef_sig ef).(sig_args) = true \/ ef_reloads ef = false -> valid_successor s -> wt_instr (Ibuiltin ef args res s) | wt_Icond: @@ -282,10 +282,7 @@ Definition type_instr (e: typenv) (i: instruction) : res typenv := let sig := ef_sig ef in do x <- check_successor s; do e1 <- type_regs e args sig.(sig_args); - do e2 <- type_reg e1 res (proj_sig_res sig); - if (negb (ef_reloads ef)) || arity_ok sig.(sig_args) - then OK e2 - else Error(msg "cannot reload builtin") + type_reg e1 res (proj_sig_res sig) | Icond cond args s1 s2 => do x1 <- check_successor s1; do x2 <- check_successor s2; @@ -518,9 +515,6 @@ Proof. destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate. destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2. eauto with ty. -- (* builtin *) - destruct (negb (ef_reloads e0) || arity_ok (sig_args (ef_sig e0))) eqn:E; inv EQ3. - eauto with ty. - (* jumptable *) destruct (zle (list_length_z l * 4) Int.max_unsigned); inv EQ2. eauto with ty. @@ -572,11 +566,9 @@ Proof. eapply type_regs_sound; eauto with ty. apply tailcall_is_possible_correct; auto. - (* builtin *) - destruct (negb (ef_reloads e0) || arity_ok (sig_args (ef_sig e0))) eqn:E; inv EQ3. constructor. eapply type_regs_sound; eauto with ty. eapply type_reg_sound; eauto. - destruct (ef_reloads e0); auto. eauto with ty. - (* cond *) constructor. @@ -833,10 +825,7 @@ Proof. exploit type_reg_complete. eexact B. eauto. intros [e2 [C D]]. exists e2; split; auto. rewrite check_successor_complete by auto; simpl. - rewrite A; simpl; rewrite C; simpl. - destruct H2; rewrite H2. - rewrite orb_true_r. auto. - auto. + rewrite A; simpl; rewrite C; auto. - (* cond *) exploit type_regs_complete. eauto. eauto. intros [e1 [A B]]. exists e1; split; auto. @@ -982,6 +971,34 @@ Proof. apply wt_regset_assign; auto. Qed. +Lemma wt_exec_Iop: + forall (ge: genv) env f sp op args res s rs m v, + wt_instr env f (Iop op args res s) -> + eval_operation ge sp op rs##args m = Some v -> + wt_regset env rs -> + wt_regset env (rs#res <- v). +Proof. + intros. inv H. + simpl in H0. inv H0. apply wt_regset_assign; auto. rewrite <- H4; apply H1. + apply wt_regset_assign; auto. + replace (env res) with (snd (type_of_operation op)). + eapply type_of_operation_sound; eauto. + rewrite <- H7; auto. +Qed. + +Lemma wt_exec_Iload: + forall env f chunk addr args dst s m a v rs, + wt_instr env f (Iload chunk addr args dst s) -> + Mem.loadv chunk m a = Some v -> + wt_regset env rs -> + wt_regset env (rs#dst <- v). +Proof. + intros. destruct a; simpl in H0; try discriminate. + apply wt_regset_assign; auto. + inv H. rewrite H8. + eapply Mem.load_type; eauto. +Qed. + Inductive wt_stackframes: list stackframe -> option typ -> Prop := | wt_stackframes_nil: wt_stackframes nil (Some Tint) diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml new file mode 100644 index 0000000..fe981e3 --- /dev/null +++ b/backend/Regalloc.ml @@ -0,0 +1,986 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Register allocation by coloring of an interference graph *) + +(* The algorithm in a nutshell: + + - Split live ranges + - Convert from RTL to XTL + - Eliminate dead code + - Repeat: + . Construct interference graph + . Color interference graph using IRC algorithm + . Check for variables that were spilled and must be in registers + . If none, convert to LTL and exit. + . If some, insert spill and reload instructions and try again + End Repeat +*) + +open Format +open Clflags +open Camlcoq +open Datatypes +open Coqlib +open Maps +open AST +open Memdata +open Kildall +open Registers +open Op +open Machregs +open Locations +open Conventions1 +open Conventions +open IRC +open XTL + +(* Detection of 2-address operations *) + +let is_two_address op args = + if two_address_op op then + match args with + | [] -> assert false + | arg1 :: argl -> Some(arg1, argl) + else None + +(* For tracing *) + +let destination_alloctrace : string option ref = ref None +let pp = ref std_formatter + +let init_trace () = + if !option_dalloctrace && !pp == std_formatter then begin + match !destination_alloctrace with + | None -> () (* should not happen *) + | Some f -> pp := formatter_of_out_channel (open_out f) + end + + +(**************** Initial conversion from RTL to XTL **************) + +let vreg tyenv r = V(r, tyenv r) + +let vregs tyenv rl = List.map (vreg tyenv) rl + +let rec expand_regs tyenv = function + | [] -> [] + | r :: rl -> + match tyenv r with + | Tlong -> V(r, Tint) :: V(twin_reg r, Tint) :: expand_regs tyenv rl + | ty -> V(r, ty) :: expand_regs tyenv rl + +let constrain_reg v c = + match c with + | None -> v + | Some mr -> L(R mr) + +let rec constrain_regs vl cl = + match vl, cl with + | [], _ -> [] + | v1 :: vl', [] -> vl + | v1 :: vl', Some mr1 :: cl' -> L(R mr1) :: constrain_regs vl' cl' + | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl' + +let move v1 v2 k = + if v1 = v2 then k else Xmove(v1, v2) :: k + +let rec movelist vl1 vl2 k = + match vl1, vl2 with + | [], [] -> k + | v1 :: vl1, v2 :: vl2 -> move v1 v2 (movelist vl1 vl2 k) + | _, _ -> assert false + +let xparmove srcs dsts k = + assert (List.length srcs = List.length dsts); + match srcs, dsts with + | [], [] -> k + | [src], [dst] -> Xmove(src, dst) :: k + | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k + +(* Return the XTL basic block corresponding to the given RTL instruction. + Move and parallel move instructions are introduced to honor calling + conventions and register constraints on some operations. + 64-bit integer variables are split in two 32-bit halves. *) + +let block_of_RTL_instr funsig tyenv = function + | RTL.Inop s -> + [Xbranch s] + | RTL.Iop(Omove, [arg], res, s) -> + if tyenv arg = Tlong then + [Xmove(V(arg, Tint), V(res, Tint)); + Xmove(V(twin_reg arg, Tint), V(twin_reg res, Tint)); + Xbranch s] + else + [Xmove(vreg tyenv arg, vreg tyenv res); Xbranch s] + | RTL.Iop(Omakelong, [arg1; arg2], res, s) -> + [Xmove(V(arg1, Tint), V(res, Tint)); + Xmove(V(arg2, Tint), V(twin_reg res, Tint)); + Xbranch s] + | RTL.Iop(Olowlong, [arg], res, s) -> + [Xmove(V(twin_reg arg, Tint), V(res, Tint)); Xbranch s] + | RTL.Iop(Ohighlong, [arg], res, s) -> + [Xmove(V(arg, Tint), V(res, Tint)); Xbranch s] + | RTL.Iop(op, args, res, s) -> + let (cargs, cres) = mregs_for_operation op in + let args1 = vregs tyenv args and res1 = vreg tyenv res in + let args2 = constrain_regs args1 cargs and res2 = constrain_reg res1 cres in + let (args3, res3) = + match is_two_address op args2 with + | None -> + (args2, res2) + | Some(arg, args2') -> + if arg = res2 || not (List.mem res2 args2') then + (args2, res2) + else + let t = new_temp (tyenv res) in (t :: args2', t) in + movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s]) + | RTL.Iload(chunk, addr, args, dst, s) -> + if chunk = Mint64 then begin + match offset_addressing addr (coqint_of_camlint 4l) with + | None -> assert false + | Some addr' -> + [Xload(Mint32, addr, vregs tyenv args, + V((if big_endian then dst else twin_reg dst), Tint)); + Xload(Mint32, addr', vregs tyenv args, + V((if big_endian then twin_reg dst else dst), Tint)); + Xbranch s] + end else + [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s] + | RTL.Istore(chunk, addr, args, src, s) -> + if chunk = Mint64 then begin + match offset_addressing addr (coqint_of_camlint 4l) with + | None -> assert false + | Some addr' -> + [Xstore(Mint32, addr, vregs tyenv args, + V((if big_endian then src else twin_reg src), Tint)); + Xstore(Mint32, addr', vregs tyenv args, + V((if big_endian then twin_reg src else src), Tint)); + Xbranch s] + end else + [Xstore(chunk, addr, vregs tyenv args, vreg tyenv src); Xbranch s] + | RTL.Icall(sg, ros, args, res, s) -> + let args' = vlocs (loc_arguments sg) + and res' = vmregs (loc_result sg) in + xparmove (expand_regs tyenv args) args' + (Xcall(sg, sum_left_map (vreg tyenv) ros, args', res') :: + xparmove res' (expand_regs tyenv [res]) + [Xbranch s]) + | RTL.Itailcall(sg, ros, args) -> + let args' = vlocs (loc_arguments sg) in + xparmove (expand_regs tyenv args) args' + [Xtailcall(sg, sum_left_map (vreg tyenv) ros, args')] + | RTL.Ibuiltin(ef, args, res, s) -> + let (cargs, cres) = mregs_for_builtin ef in + let args1 = expand_regs tyenv args and res1 = expand_regs tyenv [res] in + let args2 = constrain_regs args1 cargs and res2 = constrain_regs res1 cres in + movelist args1 args2 + (Xbuiltin(ef, args2, res2) :: movelist res2 res1 [Xbranch s]) + | RTL.Icond(cond, args, s1, s2) -> + [Xcond(cond, vregs tyenv args, s1, s2)] + | RTL.Ijumptable(arg, tbl) -> + [Xjumptable(vreg tyenv arg, tbl)] + | RTL.Ireturn None -> + [Xreturn []] + | RTL.Ireturn (Some arg) -> + let args' = vmregs (loc_result funsig) in + xparmove (expand_regs tyenv [arg]) args' [Xreturn args'] + +(* One above the [pc] nodes of the given RTL function *) + +let next_pc f = + PTree.fold + (fun npc pc i -> if P.lt pc npc then npc else P.succ pc) + f.RTL.fn_code P.one + +(* Translate an RTL function to an XTL function *) + +let function_of_RTL_function f tyenv = + let xc = PTree.map1 (block_of_RTL_instr f.RTL.fn_sig tyenv) f.RTL.fn_code in + (* Add moves for function parameters *) + let pc_entrypoint = next_pc f in + let b_entrypoint = + xparmove (vlocs (loc_parameters f.RTL.fn_sig)) + (expand_regs tyenv f.RTL.fn_params) + [Xbranch f.RTL.fn_entrypoint] in + { fn_sig = f.RTL.fn_sig; + fn_stacksize = f.RTL.fn_stacksize; + fn_entrypoint = pc_entrypoint; + fn_code = PTree.set pc_entrypoint b_entrypoint xc } + + +(***************** Liveness analysis *****************) + +let vset_removelist vl after = List.fold_right VSet.remove vl after +let vset_addlist vl after = List.fold_right VSet.add vl after +let vset_addros vos after = + match vos with Coq_inl v -> VSet.add v after | Coq_inr id -> after + +let live_before instr after = + match instr with + | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) -> + if VSet.mem dst after + then VSet.add src (VSet.remove dst after) + else after + | Xparmove(srcs, dsts, itmp, ftmp) -> + vset_addlist srcs (vset_removelist dsts after) + | Xop(op, args, res) -> + if VSet.mem res after + then vset_addlist args (VSet.remove res after) + else after + | Xload(chunk, addr, args, dst) -> + if VSet.mem dst after + then vset_addlist args (VSet.remove dst after) + else after + | Xstore(chunk, addr, args, src) -> + vset_addlist args (VSet.add src after) + | Xcall(sg, ros, args, res) -> + vset_addlist args (vset_addros ros (vset_removelist res after)) + | Xtailcall(sg, ros, args) -> + vset_addlist args (vset_addros ros VSet.empty) + | Xbuiltin(ef, args, res) -> + vset_addlist args (vset_removelist res after) + | Xbranch s -> + after + | Xcond(cond, args, s1, s2) -> + List.fold_right VSet.add args after + | Xjumptable(arg, tbl) -> + VSet.add arg after + | Xreturn args -> + vset_addlist args VSet.empty + +let rec live_before_block blk after = + match blk with + | [] -> after + | instr :: blk -> live_before instr (live_before_block blk after) + +let transfer_live f pc after = + match PTree.get pc f.fn_code with + | None -> VSet.empty + | Some blk -> live_before_block blk after + +module VSetLat = struct + type t = VSet.t + let beq = VSet.equal + let bot = VSet.empty + let lub = VSet.union +end + +module Liveness_Solver = Backward_Dataflow_Solver(VSetLat)(NodeSetBackward) + +let liveness_analysis f = + match Liveness_Solver.fixpoint (successors f) (transfer_live f) [] with + | None -> assert false + | Some lv -> lv + +(* Pair the instructions of a block with their live-before sets *) + +let pair_block_live blk after = + let rec pair_rec accu after = function + | [] -> accu + | instr :: blk -> + let before = live_before instr after in + pair_rec ((instr, before) :: accu) before blk in + pair_rec [] after (List.rev blk) + + +(**************** Dead code elimination **********************) + +(* Eliminate pure instructions whose results are not used later. *) + +let rec dce_parmove srcs dsts after = + match srcs, dsts with + | [], [] -> [], [] + | src1 :: srcs, dst1 :: dsts -> + let (srcs', dsts') = dce_parmove srcs dsts after in + if VSet.mem dst1 after + then (src1 :: srcs', dst1 :: dsts') + else (srcs', dsts') + | _, _ -> assert false + +let dce_instr instr after k = + match instr with + | Xmove(src, dst) -> + if VSet.mem dst after + then instr :: k + else k + | Xparmove(srcs, dsts, itmp, ftmp) -> + let (srcs', dsts') = dce_parmove srcs dsts after in + Xparmove(srcs', dsts', itmp, ftmp) :: k + | Xop(op, args, res) -> + if VSet.mem res after + then instr :: k + else k + | Xload(chunk, addr, args, dst) -> + if VSet.mem dst after + then instr :: k + else k + | _ -> + instr :: k + +let rec dce_block blk after = + match blk with + | [] -> (after, []) + | instr :: blk' -> + let (after', tblk') = dce_block blk' after in + (live_before instr after', dce_instr instr after' tblk') + +let dead_code_elimination f liveness = + { f with fn_code = + PTree.map (fun pc blk -> snd(dce_block blk (PMap.get pc liveness))) + f.fn_code } + + +(*********************** Spill costs ****************************) + +(* Estimate spill costs and count other statistics for every variable. + Variables that must not be spilled are given infinite costs. *) + +let spill_costs f = + + let costs = ref PTree.empty in + + let get_stats r = + match PTree.get r !costs with + | Some st -> st + | None -> + let st = {cost = 0; usedefs = 0} in + costs := PTree.set r st !costs; + st in + + let charge amount uses v = + match v with + | L l -> () + | V(r, ty) -> + let st = get_stats r in + let c1 = st.cost + amount in + let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in + st.cost <- c2; + st.usedefs <- st.usedefs + uses in + + let charge_list amount uses vl = + List.iter (charge amount uses) vl in + + let charge_ros amount ros = + match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in + + let charge_instr = function + | Xmove(src, dst) -> + charge 1 1 src; charge 1 1 dst + | Xreload(src, dst) -> + charge 1 1 src; charge max_int 1 dst + (* dest must not be spilled! *) + | Xspill(src, dst) -> + charge max_int 1 src; charge 1 1 dst + (* source must not be spilled! *) + | Xparmove(srcs, dsts, itmp, ftmp) -> + charge_list 1 1 srcs; charge_list 1 1 dsts; + charge max_int 0 itmp; charge max_int 0 ftmp + (* temps must not be spilled *) + | Xop(op, args, res) -> + charge_list 10 1 args; charge 10 1 res + | Xload(chunk, addr, args, dst) -> + charge_list 10 1 args; charge 10 1 dst + | Xstore(chunk, addr, args, src) -> + charge_list 10 1 args; charge 10 1 src + | Xcall(sg, vos, args, res) -> + charge_ros 10 vos + | Xtailcall(sg, vos, args) -> + charge_ros 10 vos + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_vstore _ | EF_vstore_global _ | EF_memcpy _ -> + (* result is not used but should not be spilled *) + charge_list 10 1 args; charge_list max_int 0 res + | EF_annot _ -> + (* arguments are not actually used, so don't charge; + result is never used but should not be spilled *) + charge_list max_int 0 res + | EF_annot_val _ -> + (* like a move *) + charge_list 1 1 args; charge_list 1 1 res + | _ -> + charge_list 10 1 args; charge_list 10 1 res + end + | Xbranch _ -> () + | Xcond(cond, args, _, _) -> + charge_list 10 1 args + | Xjumptable(arg, _) -> + charge 10 1 arg + | Xreturn optarg -> + () in + + let charge_block blk = List.iter charge_instr blk in + + PTree.fold + (fun () pc blk -> charge_block blk) + f.fn_code (); + if !option_dalloctrace then begin + fprintf !pp "------------------ Unspillable variables --------------@ @."; + fprintf !pp "@[<hov 1>"; + PTree.fold + (fun () r st -> + if st.cost = max_int then fprintf !pp "@ x%ld" (P.to_int32 r)) + !costs (); + fprintf !pp "@]@ @." + end; + (* Result is cost function: pseudoreg -> stats *) + get_stats + + +(********* Construction and coloring of the interference graph **************) + +let add_interfs_def g res live = + VSet.iter (fun v -> if v <> res then IRC.add_interf g v res) live + +let add_interfs_move g src dst live = + VSet.iter (fun v -> if v <> src && v <> dst then IRC.add_interf g v dst) live + +let add_interfs_destroyed g live mregs = + List.iter + (fun mr -> VSet.iter (IRC.add_interf g (L (R mr))) live) + mregs + +let add_interfs_live g live v = + VSet.iter (fun v' -> IRC.add_interf g v v') live + +let add_interfs_list g v vl = + List.iter (IRC.add_interf g v) vl + +let rec add_interfs_pairwise g = function + | [] -> () + | v1 :: vl -> add_interfs_list g v1 vl; add_interfs_pairwise g vl + +let add_interfs_instr g instr live = + match instr with + | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) -> + IRC.add_pref g src dst; + add_interfs_move g src dst live + | Xparmove(srcs, dsts, itmp, ftmp) -> + List.iter2 (IRC.add_pref g) srcs dsts; + (* Interferences with live across *) + let across = vset_removelist dsts live in + List.iter (add_interfs_live g across) dsts; + add_interfs_live g across itmp; add_interfs_live g across ftmp; + (* All destinations must be pairwise different *) + add_interfs_pairwise g dsts; + (* The temporaries must be different from sources and dests *) + add_interfs_list g itmp srcs; add_interfs_list g itmp dsts; + add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts; + (* Take into account destroyed reg when accessing Incoming param *) + if List.exists (function (L(S(Incoming, _, _))) -> true | _ -> false) srcs + then add_interfs_list g (vmreg temp_for_parent_frame) dsts + | Xop(op, args, res) -> + begin match is_two_address op args with + | None -> + add_interfs_def g res live; + | Some(arg1, argl) -> + (* Treat as "res := arg1; res := op(res, argl)" *) + add_interfs_def g res live; + IRC.add_pref g arg1 res; + add_interfs_move g arg1 res + (vset_addlist (res :: argl) (VSet.remove res live)) + end; + add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op); + | Xload(chunk, addr, args, dst) -> + add_interfs_def g dst live; + add_interfs_destroyed g (VSet.remove dst live) + (destroyed_by_load chunk addr) + | Xstore(chunk, addr, args, src) -> + add_interfs_destroyed g live (destroyed_by_store chunk addr) + | Xcall(sg, vos, args, res) -> + add_interfs_destroyed g (vset_removelist res live) destroyed_at_call + | Xtailcall(sg, Coq_inl v, args) -> + List.iter (fun r -> IRC.add_interf g (vmreg r) v) int_callee_save_regs + | Xtailcall(sg, Coq_inr id, args) -> + () + | Xbuiltin(ef, args, res) -> + (* Interferences with live across *) + let across = vset_removelist res live in + List.iter (add_interfs_live g across) res; + (* All results must be pairwise different *) + add_interfs_pairwise g res; + add_interfs_destroyed g across (destroyed_by_builtin ef); + begin match ef, args, res with + | EF_annot_val _, [arg], [res] -> IRC.add_pref g arg res (* like a move *) + | _ -> () + end + | Xbranch s -> + () + | Xcond(cond, args, s1, s2) -> + add_interfs_destroyed g live (destroyed_by_cond cond) + | Xjumptable(arg, tbl) -> + add_interfs_destroyed g live destroyed_by_jumptable + | Xreturn optarg -> + () + +let rec add_interfs_block g blk live = + match blk with + | [] -> live + | instr :: blk' -> + let live' = add_interfs_block g blk' live in + add_interfs_instr g instr live'; + live_before instr live' + +let find_coloring f liveness = + (*type_function f; (* for debugging *)*) + let g = IRC.init (spill_costs f) in + PTree.fold + (fun () pc blk -> ignore (add_interfs_block g blk (PMap.get pc liveness))) + f.fn_code (); + IRC.coloring g + + +(*********** Determination of variables that need spill code insertion *****) + +let is_reg alloc v = + match alloc v with R _ -> true | S _ -> false + +let add_tospill alloc v ts = + match alloc v with R _ -> ts | S _ -> VSet.add v ts + +let addlist_tospill alloc vl ts = + List.fold_right (add_tospill alloc) vl ts + +let addros_tospill alloc ros ts = + match ros with Coq_inl v -> add_tospill alloc v ts | Coq_inr s -> ts + +let tospill_instr alloc instr ts = + match instr with + | Xmove(src, dst) -> + if is_reg alloc src || is_reg alloc dst || alloc src = alloc dst + then ts + else VSet.add src (VSet.add dst ts) + | Xreload(src, dst) -> + assert (is_reg alloc dst); + ts + | Xspill(src, dst) -> + assert (is_reg alloc src); + ts + | Xparmove(srcs, dsts, itmp, ftmp) -> + assert (is_reg alloc itmp && is_reg alloc ftmp); + ts + | Xop(op, args, res) -> + addlist_tospill alloc args (add_tospill alloc res ts) + | Xload(chunk, addr, args, dst) -> + addlist_tospill alloc args (add_tospill alloc dst ts) + | Xstore(chunk, addr, args, src) -> + addlist_tospill alloc args (add_tospill alloc src ts) + | Xcall(sg, vos, args, res) -> + addros_tospill alloc vos ts + | Xtailcall(sg, vos, args) -> + addros_tospill alloc vos ts + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_annot _ -> ts + | _ -> addlist_tospill alloc args (addlist_tospill alloc res ts) + end + | Xbranch s -> + ts + | Xcond(cond, args, s1, s2) -> + addlist_tospill alloc args ts + | Xjumptable(arg, tbl) -> + add_tospill alloc arg ts + | Xreturn optarg -> + ts + +let rec tospill_block alloc blk ts = + match blk with + | [] -> ts + | instr :: blk' -> tospill_block alloc blk' (tospill_instr alloc instr ts) + +let tospill_function f alloc = + PTree.fold + (fun ts pc blk -> tospill_block alloc blk ts) + f.fn_code VSet.empty + + +(********************* Spilling ***********************) + +(* We follow a semi-naive spilling strategy. By default, we spill at + every definition of a variable that could not be allocated a register, + and we reload before every use. However, we also maintain a list of + equations of the form [spilled-var = temp] that keep track of + variables that were recently spilled or reloaded. Based on these + equations, we can avoid reloading a spilled variable if its value + is still available in a temporary register. *) + +let rec find_reg_containing v = function + | [] -> None + | (var, temp, date) :: eqs -> + if var = v then Some temp else find_reg_containing v eqs + +let add v t eqs = (v, t, 0) :: eqs + +let kill x eqs = + List.filter (fun (v, t, date) -> v <> x && t <> x) eqs + +let reload_var tospill eqs v = + if not (VSet.mem v tospill) then + (v, [], eqs) + else + match find_reg_containing v eqs with + | Some t -> + (*printf "Reusing %a for %a@ @." PrintXTL.var t PrintXTL.var v;*) + (t, [], eqs) + | None -> + let t = new_temp (typeof v) in (t, [Xreload(v, t)], add v t eqs) + +let rec reload_vars tospill eqs vl = + match vl with + | [] -> ([], [], eqs) + | v1 :: vs -> + let (t1, c1, eqs1) = reload_var tospill eqs v1 in + let (ts, cs, eqs2) = reload_vars tospill eqs1 vs in + (t1 :: ts, c1 @ cs, eqs2) + +let save_var tospill eqs v = + if not (VSet.mem v tospill) then + (v, [], kill v eqs) + else begin + let t = new_temp (typeof v) in + (t, [Xspill(t, v)], add v t (kill v eqs)) + end + +let rec save_vars tospill eqs vl = + match vl with + | [] -> ([], [], eqs) + | v1 :: vs -> + let (t1, c1, eqs1) = save_var tospill eqs v1 in + let (ts, cs, eqs2) = save_vars tospill eqs1 vs in + (t1 :: ts, c1 @ cs, eqs2) + +(* Trimming equations when we have too many or when they are too old. + The goal is to limit the live range of unspillable temporaries. + By setting [max_age] to zero, we can effectively deactivate + the reuse strategy and fall back to a naive "reload at every use" + strategy. *) + +let max_age = ref 3 +let max_num_eqs = ref 3 + +let rec trim count eqs = + if count <= 0 then [] else + match eqs with + | [] -> [] + | (v, t, date) :: eqs' -> + if date <= !max_age + then (v, t, date + 1) :: trim (count - 1) eqs' + else [] + +(* Insertion of spill and reload instructions. *) + +let spill_instr tospill eqs instr = + let eqs = trim !max_num_eqs eqs in + match instr with + | Xmove(src, dst) -> + if VSet.mem src tospill && VSet.mem dst tospill then begin + let (src', c1, eqs1) = reload_var tospill eqs src in + (c1 @ [Xspill(src', dst)], add dst src' (kill dst eqs1)) + end else begin + ([instr], kill dst eqs) + end + | Xreload(src, dst) -> + assert false + | Xspill(src, dst) -> + assert false + | Xparmove(srcs, dsts, itmp, ftmp) -> + ([instr], List.fold_right kill dsts eqs) + | Xop(op, args, res) -> + begin match is_two_address op args with + | None -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (res', c2, eqs2) = save_var tospill eqs1 res in + (c1 @ Xop(op, args', res') :: c2, eqs2) + | Some(arg1, argl) -> + begin match VSet.mem res tospill, VSet.mem arg1 tospill with + | false, false -> + let (argl', c1, eqs1) = reload_vars tospill eqs argl in + (c1 @ [Xop(op, arg1 :: argl', res)], kill res eqs1) + | true, false -> + let tmp = new_temp (typeof res) in + let (argl', c1, eqs1) = reload_vars tospill eqs argl in + (c1 @ [Xmove(arg1, tmp); Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], + add res tmp (kill res eqs1)) + | false, true -> + let eqs1 = add arg1 res (kill res eqs) in + let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in + (Xreload(arg1, res) :: c1 @ [Xop(op, res :: argl', res)], + kill res eqs2) + | true, true -> + let tmp = new_temp (typeof res) in + let eqs1 = add arg1 tmp eqs in + let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in + (Xreload(arg1, tmp) :: c1 @ [Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], + add res tmp (kill tmp (kill res eqs2))) + end + end + | Xload(chunk, addr, args, dst) -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (dst', c2, eqs2) = save_var tospill eqs1 dst in + (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2) + | Xstore(chunk, addr, args, src) -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (src', c2, eqs2) = reload_var tospill eqs1 src in + (c1 @ c2 @ [Xstore(chunk, addr, args', src')], eqs2) + | Xcall(sg, Coq_inl v, args, res) -> + let (v', c1, eqs1) = reload_var tospill eqs v in + (c1 @ [Xcall(sg, Coq_inl v', args, res)], []) + | Xcall(sg, Coq_inr id, args, res) -> + ([instr], []) + | Xtailcall(sg, Coq_inl v, args) -> + let (v', c1, eqs1) = reload_var tospill eqs v in + (c1 @ [Xtailcall(sg, Coq_inl v', args)], []) + | Xtailcall(sg, Coq_inr id, args) -> + ([instr], []) + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_annot _ -> + ([instr], eqs) + | _ -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (res', c2, eqs2) = save_vars tospill eqs1 res in + (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2) + end + | Xbranch s -> + ([instr], eqs) + | Xcond(cond, args, s1, s2) -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + (c1 @ [Xcond(cond, args', s1, s2)], eqs1) + | Xjumptable(arg, tbl) -> + let (arg', c1, eqs1) = reload_var tospill eqs arg in + (c1 @ [Xjumptable(arg', tbl)], eqs1) + | Xreturn optarg -> + ([instr], []) + +let rec spill_block tospill pc blk eqs = + match blk with + | [] -> ([], eqs) + | instr :: blk' -> + let (c1, eqs1) = spill_instr tospill eqs instr in + let (c2, eqs2) = spill_block tospill pc blk' eqs1 in + (c1 @ c2, eqs2) + +(* +let spill_block tospill pc blk eqs = + printf "@[<hov 2>spill_block: at %ld: " (camlint_of_positive pc); + List.iter (fun (x,y,d) -> printf "@ %a=%a" PrintXTL.var x PrintXTL.var y) eqs; + printf "@]@."; + spill_block tospill pc blk eqs +*) + +let spill_function f tospill round = + max_num_eqs := 3; + max_age := (if round <= 10 then 3 else if round <= 20 then 1 else 0); + transform_basic_blocks (spill_block tospill) [] f + + +(***************** Generation of LTL from XTL ***********************) + +(** Apply a register allocation to an XTL function, producing an LTL function. + Raise [Bad_LTL] if some pseudoregisters were mapped to stack locations + while machine registers were expected, or in other words if spilling + and reloading code must be inserted. *) + +exception Bad_LTL + +let mreg_of alloc v = match alloc v with R mr -> mr | S _ -> raise Bad_LTL + +let mregs_of alloc vl = List.map (mreg_of alloc) vl + +let mros_of alloc vos = sum_left_map (mreg_of alloc) vos + +let make_move src dst k = + match src, dst with + | R rsrc, R rdst -> + if rsrc = rdst then k else LTL.Lop(Omove, [rsrc], rdst) :: k + | R rsrc, S(sl, ofs, ty) -> + LTL.Lsetstack(rsrc, sl, ofs, ty) :: k + | S(sl, ofs, ty), R rdst -> + LTL.Lgetstack(sl, ofs, ty, rdst) :: k + | S _, S _ -> + if src = dst then k else raise Bad_LTL + +type parmove_status = To_move | Being_moved | Moved + +let make_parmove srcs dsts itmp ftmp k = + let src = Array.of_list srcs + and dst = Array.of_list dsts in + let n = Array.length src in + assert (Array.length dst = n); + let status = Array.make n To_move in + let temp_for = + function Tint -> itmp | Tfloat -> ftmp | Tlong -> assert false in + let code = ref [] in + let add_move s d = + match s, d with + | R rs, R rd -> + code := LTL.Lop(Omove, [rs], rd) :: !code + | R rs, S(sl, ofs, ty) -> + code := LTL.Lsetstack(rs, sl, ofs, ty) :: !code + | S(sl, ofs, ty), R rd -> + code := LTL.Lgetstack(sl, ofs, ty, rd) :: !code + | S(sls, ofss, tys), S(sld, ofsd, tyd) -> + let tmp = temp_for tys in + (* code will be reversed at the end *) + code := LTL.Lsetstack(tmp, sld, ofsd, tyd) :: + LTL.Lgetstack(sls, ofss, tys, tmp) :: !code + in + let rec move_one i = + if src.(i) <> dst.(i) then begin + status.(i) <- Being_moved; + for j = 0 to n - 1 do + if src.(j) = dst.(i) then + match status.(j) with + | To_move -> + move_one j + | Being_moved -> + let tmp = R (temp_for (Loc.coq_type src.(j))) in + add_move src.(j) tmp; + src.(j) <- tmp + | Moved -> + () + done; + add_move src.(i) dst.(i); + status.(i) <- Moved + end in + for i = 0 to n - 1 do + if status.(i) = To_move then move_one i + done; + List.rev_append !code k + +let transl_instr alloc instr k = + match instr with + | Xmove(src, dst) | Xreload(src, dst) | Xspill(src, dst) -> + make_move (alloc src) (alloc dst) k + | Xparmove(srcs, dsts, itmp, ftmp) -> + make_parmove (List.map alloc srcs) (List.map alloc dsts) + (mreg_of alloc itmp) (mreg_of alloc ftmp) k + | Xop(op, args, res) -> + let rargs = mregs_of alloc args + and rres = mreg_of alloc res in + begin match is_two_address op rargs with + | None -> + LTL.Lop(op, rargs, rres) :: k + | Some(rarg1, rargl) -> + if rarg1 = rres then + LTL.Lop(op, rargs, rres) :: k + else + LTL.Lop(Omove, [rarg1], rres) :: + LTL.Lop(op, rres :: rargl, rres) :: k + end + | Xload(chunk, addr, args, dst) -> + LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k + | Xstore(chunk, addr, args, src) -> + LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k + | Xcall(sg, vos, args, res) -> + LTL.Lcall(sg, mros_of alloc vos) :: k + | Xtailcall(sg, vos, args) -> + LTL.Ltailcall(sg, mros_of alloc vos) :: [] + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_annot _ -> + LTL.Lannot(ef, List.map alloc args) :: k + | _ -> + LTL.Lbuiltin(ef, mregs_of alloc args, mregs_of alloc res) :: k + end + | Xbranch s -> + LTL.Lbranch s :: [] + | Xcond(cond, args, s1, s2) -> + LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: [] + | Xjumptable(arg, tbl) -> + LTL.Ljumptable(mreg_of alloc arg, tbl) :: [] + | Xreturn optarg -> + LTL.Lreturn :: [] + +let rec transl_block alloc blk = + match blk with + | [] -> [] + | instr :: blk' -> transl_instr alloc instr (transl_block alloc blk') + +let transl_function fn alloc = + { LTL.fn_sig = fn.fn_sig; + LTL.fn_stacksize = fn.fn_stacksize; + LTL.fn_entrypoint = fn.fn_entrypoint; + LTL.fn_code = PTree.map1 (transl_block alloc) fn.fn_code + } + + +(******************* All together *********************) + +exception Timeout + +let rec first_round f liveness = + let alloc = find_coloring f liveness in + if !option_dalloctrace then begin + fprintf !pp "-------------- After initial register allocation@ @."; + PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f + end; + let ts = tospill_function f alloc in + if VSet.is_empty ts then success f alloc else more_rounds f ts 1 + +and more_rounds f ts count = + if count >= 40 then raise Timeout; + let f' = spill_function f ts count in + let liveness = liveness_analysis f' in + let alloc = find_coloring f' liveness in + if !option_dalloctrace then begin + fprintf !pp "-------------- After register allocation (round %d)@ @." count; + PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f' + end; + let ts' = tospill_function f' alloc in + if VSet.is_empty ts' + then success f' alloc + else begin + if !option_dalloctrace then begin + fprintf !pp "--- Remain to be spilled:@ @."; + VSet.iter (fun v -> fprintf !pp "%a " PrintXTL.var v) ts'; + fprintf !pp "@ @." + end; + more_rounds f (VSet.union ts ts') (count + 1) + end + +and success f alloc = + let f' = transl_function f alloc in + if !option_dalloctrace then begin + fprintf !pp "-------------- Candidate allocation@ @."; + PrintLTL.print_function !pp P.one f' + end; + f' + +open Errors + +let regalloc f = + init_trace(); + reset_temps(); + let f1 = Splitting.rename_function f in + match RTLtyping.type_function f1 with + | Error msg -> + Error(MSG (coqstring_of_camlstring "RTL code after splitting is ill-typed:") :: msg) + | OK tyenv -> + let f2 = function_of_RTL_function f1 tyenv in + let liveness = liveness_analysis f2 in + let f3 = dead_code_elimination f2 liveness in + if !option_dalloctrace then begin + fprintf !pp "-------------- Initial XTL@ @."; + PrintXTL.print_function !pp f3 + end; + try + OK(first_round f3 liveness) + with + | Timeout -> + Error(msg (coqstring_of_camlstring "Spilling fails to converge")) + | Type_error_at pc -> + Error [MSG(coqstring_of_camlstring "Ill-typed XTL code at PC "); + POS pc] + | Bad_LTL -> + Error(msg (coqstring_of_camlstring "Bad LTL after spilling")) diff --git a/backend/Reload.v b/backend/Reload.v deleted file mode 100644 index be844b3..0000000 --- a/backend/Reload.v +++ /dev/null @@ -1,274 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Reloading, spilling, and explication of calling conventions. *) - -Require Import Coqlib. -Require Import AST. -Require Import Integers. -Require Import Op. -Require Import Locations. -Require Import LTLin. -Require Import Conventions. -Require Import Parallelmove. -Require Import Linear. - -Open Local Scope error_monad_scope. - -(** * Spilling and reloading *) - -(** Operations in the Linear language, like those of the target processor, - operate only over machine registers, but not over stack slots. - Consider the LTLin instruction -<< - r1 <- Lop(Oadd, r1 :: r2 :: nil) ->> - and assume that [r1] and [r2] are assigned to stack locations [S s1] - and [S s2], respectively. The translated LTL code must load these - stack locations into temporary integer registers (this is called - ``reloading''), perform the [Oadd] operation over these temporaries, - leave the result in a temporary, then store the temporary back to - stack location [S s1] (this is called ``spilling''). In other term, - the generated Linear code has the following shape: -<< - IT1 <- Lgetstack s1; - IT2 <- Lgetstack s2; - IT1 <- Lop(Oadd, IT1 :: IT2 :: nil); - Lsetstack s1 IT1; ->> - This section provides functions that assist in choosing appropriate - temporaries and inserting the required spilling and reloading - operations. *) - -(** ** Allocation of temporary registers for reloading and spilling. *) - -(** [reg_for l] returns a machine register appropriate for working - over the location [l]: either the machine register [m] if [l = R m], - or a temporary register of [l]'s type if [l] is a stack slot. *) - -Definition reg_for (l: loc) : mreg := - match l with - | R r => r - | S s => match slot_type s with Tint => IT1 | Tfloat => FT1 end - end. - -(** [regs_for ll] is similar, for a list of locations [ll]. - We ensure that distinct temporaries are used for - different elements of [ll]. - The result is correct only if [enough_temporaries ll = true] - (see below). *) - -Fixpoint regs_for_rec (locs: list loc) (itmps ftmps: list mreg) - {struct locs} : list mreg := - match locs with - | nil => nil - | R r :: ls => r :: regs_for_rec ls itmps ftmps - | S s :: ls => - match slot_type s with - | Tint => - match itmps with - | nil => nil - | it1 :: its => it1 :: regs_for_rec ls its ftmps - end - | Tfloat => - match ftmps with - | nil => nil - | ft1 :: fts => ft1 :: regs_for_rec ls itmps fts - end - end - end. - -Definition regs_for (locs: list loc) := - regs_for_rec locs int_temporaries float_temporaries. - -(** Detect the situations where not enough temporaries are available - for reloading. *) - -Fixpoint enough_temporaries_rec (locs: list loc) (itmps ftmps: list mreg) - {struct locs} : bool := - match locs with - | nil => true - | R r :: ls => enough_temporaries_rec ls itmps ftmps - | S s :: ls => - match slot_type s with - | Tint => - match itmps with - | nil => false - | it1 :: its => enough_temporaries_rec ls its ftmps - end - | Tfloat => - match ftmps with - | nil => false - | ft1 :: fts => enough_temporaries_rec ls itmps fts - end - end - end. - -Definition enough_temporaries (locs: list loc) := - enough_temporaries_rec locs int_temporaries float_temporaries. - -(** ** Insertion of Linear reloads, stores and moves *) - -(** [add_spill src dst k] prepends to [k] the instructions needed - to assign location [dst] the value of machine register [mreg]. *) - -Definition add_spill (src: mreg) (dst: loc) (k: code) := - match dst with - | R rd => if mreg_eq src rd then k else Lop Omove (src :: nil) rd :: k - | S sl => Lsetstack src sl :: k - end. - -(** [add_reload src dst k] prepends to [k] the instructions needed - to assign machine register [mreg] the value of the location [src]. *) - -Definition add_reload (src: loc) (dst: mreg) (k: code) := - match src with - | R rs => if mreg_eq rs dst then k else Lop Omove (rs :: nil) dst :: k - | S sl => Lgetstack sl dst :: k - end. - -(** [add_reloads] is similar for a list of locations (as sources) - and a list of machine registers (as destinations). *) - -Fixpoint add_reloads (srcs: list loc) (dsts: list mreg) (k: code) - {struct srcs} : code := - match srcs, dsts with - | s1 :: sl, t1 :: tl => add_reload s1 t1 (add_reloads sl tl k) - | _, _ => k - end. - -(** [add_move src dst k] prepends to [k] the instructions that copy - the value of location [src] into location [dst]. *) - -Definition add_move (src dst: loc) (k: code) := - if Loc.eq src dst then k else - match src, dst with - | R rs, _ => - add_spill rs dst k - | _, R rd => - add_reload src rd k - | S ss, S sd => - let tmp := - match slot_type ss with Tint => IT1 | Tfloat => FT1 end in - add_reload src tmp (add_spill tmp dst k) - end. - -(** [parallel_move srcs dsts k] is similar, but for a list of - source locations and a list of destination locations of the same - length. This is a parallel move, meaning that some of the - destinations can also occur as sources. *) - -Definition parallel_move (srcs dsts: list loc) (k: code) : code := - List.fold_right - (fun p k => add_move (fst p) (snd p) k) - k (parmove srcs dsts). - -(** * Code transformation *) - -(** We insert appropriate reload, spill and parallel move operations - around each of the instructions of the source code. *) - -Definition transf_instr - (f: LTLin.function) (instr: LTLin.instruction) (k: code) : code := - match instr with - | LTLin.Lop op args res => - match is_move_operation op args with - | Some src => - add_move src res k - | None => - let rargs := regs_for args in - let rres := reg_for res in - add_reloads args rargs (Lop op rargs rres :: add_spill rres res k) - end - | LTLin.Lload chunk addr args dst => - let rargs := regs_for args in - let rdst := reg_for dst in - add_reloads args rargs - (Lload chunk addr rargs rdst :: add_spill rdst dst k) - | LTLin.Lstore chunk addr args src => - if enough_temporaries (src :: args) then - match regs_for (src :: args) with - | nil => k (* never happens *) - | rsrc :: rargs => - add_reloads (src :: args) (rsrc :: rargs) - (Lstore chunk addr rargs rsrc :: k) - end - else - let rargs := regs_for args in - let rsrc := reg_for src in - add_reloads args rargs - (Lop (op_for_binary_addressing addr) rargs IT2 :: - add_reload src rsrc - (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) rsrc :: k)) - | LTLin.Lcall sig los args res => - let largs := loc_arguments sig in - let rres := loc_result sig in - match los with - | inl fn => - parallel_move args largs - (add_reload fn (reg_for fn) - (Lcall sig (inl _ (reg_for fn)) :: add_spill rres res k)) - | inr id => - parallel_move args largs - (Lcall sig (inr _ id) :: add_spill rres res k) - end - | LTLin.Ltailcall sig los args => - let largs := loc_arguments sig in - match los with - | inl fn => - parallel_move args largs - (add_reload fn IT1 - (Ltailcall sig (inl _ IT1) :: k)) - | inr id => - parallel_move args largs - (Ltailcall sig (inr _ id) :: k) - end - | LTLin.Lbuiltin ef args dst => - if ef_reloads ef then - (let rargs := regs_for args in - let rdst := reg_for dst in - add_reloads args rargs - (Lbuiltin ef rargs rdst :: add_spill rdst dst k)) - else - Lannot ef args :: k - | LTLin.Llabel lbl => - Llabel lbl :: k - | LTLin.Lgoto lbl => - Lgoto lbl :: k - | LTLin.Lcond cond args lbl => - let rargs := regs_for args in - add_reloads args rargs (Lcond cond rargs lbl :: k) - | LTLin.Ljumptable arg tbl => - let rarg := reg_for arg in - add_reload arg rarg (Ljumptable rarg tbl :: k) - | LTLin.Lreturn None => - Lreturn :: k - | LTLin.Lreturn (Some loc) => - add_reload loc (loc_result (LTLin.fn_sig f)) (Lreturn :: k) - end. - -Definition transf_code (f: LTLin.function) (c: LTLin.code) : code := - list_fold_right (transf_instr f) c nil. - -Definition transf_function (f: LTLin.function) : function := - mkfunction - (LTLin.fn_sig f) - (LTLin.fn_stacksize f) - (parallel_move (loc_parameters (LTLin.fn_sig f)) (LTLin.fn_params f) - (transf_code f (LTLin.fn_code f))). - -Definition transf_fundef (fd: LTLin.fundef) : Linear.fundef := - transf_fundef transf_function fd. - -Definition transf_program (p: LTLin.program) : Linear.program := - transform_program transf_fundef p. - diff --git a/backend/Reloadproof.v b/backend/Reloadproof.v deleted file mode 100644 index fe6e475..0000000 --- a/backend/Reloadproof.v +++ /dev/null @@ -1,1487 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Correctness proof for the [Reload] pass. *) - -Require Import Coqlib. -Require Import AST. -Require Import Integers. -Require Import Values. -Require Import Memory. -Require Import Events. -Require Import Globalenvs. -Require Import Smallstep. -Require Import Op. -Require Import Locations. -Require Import Conventions. -Require Import RTLtyping. -Require Import LTLin. -Require Import LTLintyping. -Require Import Linear. -Require Import Parallelmove. -Require Import Reload. - -(** * Exploitation of the typing hypothesis *) - -Remark arity_ok_rec_incr_1: - forall tys it itmps ftmps, - arity_ok_rec tys itmps ftmps = true -> - arity_ok_rec tys (it :: itmps) ftmps = true. -Proof. - induction tys; intros until ftmps; simpl. - tauto. - destruct a. - destruct itmps. congruence. auto. - destruct ftmps. congruence. auto. -Qed. - -Remark arity_ok_rec_incr_2: - forall tys ft itmps ftmps, - arity_ok_rec tys itmps ftmps = true -> - arity_ok_rec tys itmps (ft :: ftmps) = true. -Proof. - induction tys; intros until ftmps; simpl. - tauto. - destruct a. - destruct itmps. congruence. auto. - destruct ftmps. congruence. auto. -Qed. - -Remark arity_ok_rec_decr: - forall tys ty itmps ftmps, - arity_ok_rec (ty :: tys) itmps ftmps = true -> - arity_ok_rec tys itmps ftmps = true. -Proof. - intros until ftmps. simpl. destruct ty. - destruct itmps. congruence. intros. apply arity_ok_rec_incr_1; auto. - destruct ftmps. congruence. intros. apply arity_ok_rec_incr_2; auto. -Qed. - -Lemma arity_ok_enough_rec: - forall locs itmps ftmps, - arity_ok_rec (List.map Loc.type locs) itmps ftmps = true -> - enough_temporaries_rec locs itmps ftmps = true. -Proof. - induction locs; intros until ftmps. - simpl. auto. - simpl enough_temporaries_rec. simpl map. - destruct a. intros. apply IHlocs. eapply arity_ok_rec_decr; eauto. - simpl. destruct (slot_type s). - destruct itmps; auto. - destruct ftmps; auto. -Qed. - -Lemma arity_ok_enough: - forall locs, - arity_ok (List.map Loc.type locs) = true -> - enough_temporaries locs = true. -Proof. - unfold arity_ok, enough_temporaries. intros. - apply arity_ok_enough_rec; auto. -Qed. - -Lemma enough_temporaries_op_args: - forall (op: operation) (args: list loc) (res: loc), - (List.map Loc.type args, Loc.type res) = type_of_operation op -> - enough_temporaries args = true. -Proof. - intros. apply arity_ok_enough. - replace (map Loc.type args) with (fst (type_of_operation op)). - destruct op; try (destruct c); try (destruct a); compute; reflexivity. - rewrite <- H. auto. -Qed. - -Lemma enough_temporaries_addr: - forall (addr: addressing) (args: list loc), - List.map Loc.type args = type_of_addressing addr -> - enough_temporaries args = true. -Proof. - intros. apply arity_ok_enough. rewrite H. - destruct addr; compute; reflexivity. -Qed. - -Lemma enough_temporaries_cond: - forall (cond: condition) (args: list loc), - List.map Loc.type args = type_of_condition cond -> - enough_temporaries args = true. -Proof. - intros. apply arity_ok_enough. rewrite H. - destruct cond; compute; reflexivity. -Qed. - -Lemma arity_ok_rec_length: - forall tys itmps ftmps, - (length tys <= length itmps)%nat -> - (length tys <= length ftmps)%nat -> - arity_ok_rec tys itmps ftmps = true. -Proof. - induction tys; intros until ftmps; simpl. - auto. - intros. destruct a. - destruct itmps; simpl in H. omegaContradiction. apply IHtys; omega. - destruct ftmps; simpl in H0. omegaContradiction. apply IHtys; omega. -Qed. - -Lemma enough_temporaries_length: - forall args, - (length args <= 2)%nat -> enough_temporaries args = true. -Proof. - intros. apply arity_ok_enough. unfold arity_ok. - apply arity_ok_rec_length. - rewrite list_length_map. simpl. omega. - rewrite list_length_map. simpl. omega. -Qed. - -Lemma not_enough_temporaries_length: - forall src args, - enough_temporaries (src :: args) = false -> - (length args >= 2)%nat. -Proof. - intros. - assert (length (src :: args) <= 2 \/ length (src :: args) > 2)%nat by omega. - destruct H0. - exploit enough_temporaries_length. eauto. congruence. - simpl in H0. omega. -Qed. - -Lemma not_enough_temporaries_addr: - forall (ge: genv) sp addr src args ls v m, - enough_temporaries (src :: args) = false -> - eval_addressing ge sp addr (List.map ls args) = Some v -> - eval_operation ge sp (op_for_binary_addressing addr) (List.map ls args) m = Some v. -Proof. - intros. - apply eval_op_for_binary_addressing; auto. - rewrite list_length_map. eapply not_enough_temporaries_length; eauto. -Qed. - -(** Some additional properties of [reg_for] and [regs_for]. *) - -Lemma regs_for_cons: - forall src args, - exists rsrc, exists rargs, regs_for (src :: args) = rsrc :: rargs. -Proof. - intros. unfold regs_for. simpl. - destruct src. econstructor; econstructor; reflexivity. - destruct (slot_type s); econstructor; econstructor; reflexivity. -Qed. - -Lemma reg_for_not_IT2: - forall l, loc_acceptable l -> reg_for l <> IT2. -Proof. - intros. destruct l; simpl. - red; intros; subst m. simpl in H. intuition congruence. - destruct (slot_type s); congruence. -Qed. - -(** * Correctness of the Linear constructors *) - -(** This section proves theorems that establish the correctness of the - Linear constructor functions such as [add_move]. The theorems are of - the general form ``the generated Linear instructions execute and - modify the location set in the expected way: the result location(s) - contain the expected values; other, non-temporary locations keep - their values''. *) - -Section LINEAR_CONSTRUCTORS. - -Variable ge: genv. -Variable stk: list stackframe. -Variable f: function. -Variable sp: val. - -Lemma reg_for_spec: - forall l, - R(reg_for l) = l \/ In (R (reg_for l)) temporaries. -Proof. - intros. unfold reg_for. destruct l. tauto. - case (slot_type s); simpl; tauto. -Qed. - -Lemma reg_for_diff: - forall l l', - Loc.diff l l' -> Loc.notin l' temporaries -> - Loc.diff (R (reg_for l)) l'. -Proof. - intros. destruct (reg_for_spec l). - rewrite H1; auto. - apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto. -Qed. - -Lemma add_reload_correct: - forall src dst k rs m, - exists rs', - star step ge (State stk f sp (add_reload src dst k) rs m) - E0 (State stk f sp k rs' m) /\ - rs' (R dst) = rs src /\ - forall l, - Loc.diff (R dst) l -> - loc_acceptable src \/ Loc.diff (R IT1) l -> - Loc.notin l destroyed_at_move -> - rs' l = rs l. -Proof. - intros. unfold add_reload. destruct src. - destruct (mreg_eq m0 dst). - subst dst. exists rs. split. apply star_refl. tauto. - econstructor. - split. apply star_one; apply exec_Lop. simpl; reflexivity. - unfold undef_op. split. apply Locmap.gss. - intros. rewrite Locmap.gso; auto; apply Locmap.guo; auto. - econstructor. - split. apply star_one; apply exec_Lgetstack. - split. apply Locmap.gss. - intros. rewrite Locmap.gso; auto. - destruct s; unfold undef_getstack; unfold loc_acceptable in H0; auto. - apply Locmap.gso. tauto. -Qed. - -Lemma add_reload_correct_2: - forall src k rs m, - loc_acceptable src -> - exists rs', - star step ge (State stk f sp (add_reload src (reg_for src) k) rs m) - E0 (State stk f sp k rs' m) /\ - rs' (R (reg_for src)) = rs src /\ - (forall l, Loc.notin l temporaries -> rs' l = rs l) /\ - rs' (R IT2) = rs (R IT2). -Proof. - intros. unfold reg_for, add_reload; destruct src. - rewrite dec_eq_true. exists rs; split. constructor. auto. - set (t := match slot_type s with - | Tint => IT1 - | Tfloat => FT1 - end). - exists (Locmap.set (R t) (rs (S s)) (undef_getstack s rs)). - split. apply star_one; apply exec_Lgetstack. - split. apply Locmap.gss. - split. intros. rewrite Locmap.gso; auto. - destruct s; unfold undef_getstack; unfold loc_acceptable in H; auto. - apply Locmap.gso. tauto. - apply Loc.diff_sym. simpl in H0; unfold t; destruct (slot_type s); tauto. - rewrite Locmap.gso. unfold undef_getstack. - destruct s; simpl in H; reflexivity || contradiction. - unfold t; destruct (slot_type s); red; congruence. -Qed. - -Lemma add_spill_correct: - forall src dst k rs m, - exists rs', - star step ge (State stk f sp (add_spill src dst k) rs m) - E0 (State stk f sp k rs' m) /\ - rs' dst = rs (R src) /\ - forall l, Loc.diff dst l -> Loc.notin l destroyed_at_move -> rs' l = rs l. -Proof. - intros. unfold add_spill. destruct dst. - destruct (mreg_eq src m0). - subst src. exists rs. split. apply star_refl. tauto. - econstructor. - split. apply star_one. apply exec_Lop. simpl; reflexivity. - split. apply Locmap.gss. - intros. rewrite Locmap.gso; auto; unfold undef_op; apply Locmap.guo; auto. - econstructor. - split. apply star_one. apply exec_Lsetstack. - split. apply Locmap.gss. - intros. rewrite Locmap.gso; auto; unfold undef_setstack; apply Locmap.guo; auto. -Qed. - -Remark notin_destroyed_move_1: - forall r, ~In r destroyed_at_move_regs -> Loc.notin (R r) destroyed_at_move. -Proof. - intros. simpl in *. intuition congruence. -Qed. - -Remark notin_destroyed_move_2: - forall s, Loc.notin (S s) destroyed_at_move. -Proof. - intros. simpl in *. destruct s; auto. -Qed. - -Lemma add_reloads_correct_rec: - forall srcs itmps ftmps k rs m, - locs_acceptable srcs -> - enough_temporaries_rec srcs itmps ftmps = true -> - (forall r, In (R r) srcs -> In r itmps -> False) -> - (forall r, In (R r) srcs -> In r ftmps -> False) -> - (forall r, In (R r) srcs -> ~In r destroyed_at_move_regs) -> - list_disjoint itmps ftmps -> - list_norepet itmps -> - list_norepet ftmps -> - list_disjoint itmps destroyed_at_move_regs -> - list_disjoint ftmps destroyed_at_move_regs -> - exists rs', - star step ge - (State stk f sp (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m) - E0 (State stk f sp k rs' m) /\ - reglist rs' (regs_for_rec srcs itmps ftmps) = map rs srcs /\ - (forall r, ~(In r itmps) -> ~(In r ftmps) -> ~(In r destroyed_at_move_regs) -> rs' (R r) = rs (R r)) /\ - (forall s, rs' (S s) = rs (S s)). -Proof. -Opaque destroyed_at_move_regs. - induction srcs; simpl; intros. - (* base case *) - exists rs. split. apply star_refl. tauto. - (* inductive case *) - simpl in H0. - assert (ACC1: loc_acceptable a) by (auto with coqlib). - assert (ACC2: locs_acceptable srcs) by (red; auto with coqlib). - destruct a as [r | s]. - (* a is a register *) - simpl add_reload. rewrite dec_eq_true. - exploit IHsrcs; eauto. - intros [rs' [EX [RES [OTH1 OTH2]]]]. - exists rs'. split. eauto. - split. simpl. decEq. - apply OTH1. red; intros; eauto. red; intros; eauto. auto. - auto. - auto. - (* a is a stack slot *) - destruct (slot_type s). - (* ... of integer type *) - destruct itmps as [ | it1 itmps ]. discriminate. inv H5. - destruct (add_reload_correct (S s) it1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m) - as [rs1 [A [B C]]]. - exploit IHsrcs; eauto with coqlib. - eapply list_disjoint_cons_left; eauto. - eapply list_disjoint_cons_left; eauto. - intros [rs' [P [Q [T U]]]]. - exists rs'. split. eapply star_trans; eauto. - split. simpl. decEq. rewrite <- B. apply T. - auto. - eapply list_disjoint_notin. eexact H4. eauto with coqlib. - eapply list_disjoint_notin. eapply H7. auto with coqlib. - rewrite Q. apply list_map_exten. intros. symmetry. apply C. - simpl. destruct x; auto. red; intro; subst m0. apply H1 with it1; auto with coqlib. - auto. - destruct x. apply notin_destroyed_move_1. auto. apply notin_destroyed_move_2. - split. simpl. intros. transitivity (rs1 (R r)). - apply T; tauto. apply C. simpl. tauto. auto. - apply notin_destroyed_move_1; auto. - intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto. apply notin_destroyed_move_2. - (* ... of float type *) - destruct ftmps as [ | ft1 ftmps ]. discriminate. inv H6. - destruct (add_reload_correct (S s) ft1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m) - as [rs1 [A [B C]]]. - exploit IHsrcs; eauto with coqlib. - eapply list_disjoint_cons_right; eauto. - eapply list_disjoint_cons_left; eauto. - intros [rs' [P [Q [T U]]]]. - exists rs'. split. eapply star_trans; eauto. - split. simpl. decEq. rewrite <- B. apply T. - eapply list_disjoint_notin; eauto. apply list_disjoint_sym. apply H4. auto with coqlib. - auto. - eapply list_disjoint_notin. eexact H8. auto with coqlib. - rewrite Q. apply list_map_exten. intros. symmetry. apply C. - simpl. destruct x; auto. red; intro; subst m0. apply H2 with ft1; auto with coqlib. auto. - destruct x. apply notin_destroyed_move_1. auto. apply notin_destroyed_move_2. - split. simpl. intros. transitivity (rs1 (R r)). - apply T; tauto. apply C. simpl. tauto. auto. - apply notin_destroyed_move_1; auto. - intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto. apply notin_destroyed_move_2; auto. -Qed. - -Lemma add_reloads_correct: - forall srcs k rs m, - enough_temporaries srcs = true -> - locs_acceptable srcs -> - exists rs', - star step ge (State stk f sp (add_reloads srcs (regs_for srcs) k) rs m) - E0 (State stk f sp k rs' m) /\ - reglist rs' (regs_for srcs) = List.map rs srcs /\ - forall l, Loc.notin l temporaries -> rs' l = rs l. -Proof. -Transparent destroyed_at_move_regs. - intros. - unfold enough_temporaries in H. - exploit add_reloads_correct_rec. eauto. eauto. - intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2. - simpl. intuition congruence. - intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2. - simpl. intuition congruence. - intros. generalize (H0 _ H1). unfold loc_acceptable. - simpl. intuition congruence. - red; simpl; intros. intuition congruence. - unfold int_temporaries. NoRepet. - unfold float_temporaries. NoRepet. - red; simpl; intros. intuition congruence. - red; simpl; intros. intuition congruence. - intros [rs' [EX [RES [OTH1 OTH2]]]]. - exists rs'. split. eexact EX. - split. exact RES. - intros. destruct l. generalize (Loc.notin_not_in _ _ H1); simpl; intro. - apply OTH1; simpl; intuition congruence. - apply OTH2. -Qed. - -Lemma add_move_correct: - forall src dst k rs m, - exists rs', - star step ge (State stk f sp (add_move src dst k) rs m) - E0 (State stk f sp k rs' m) /\ - rs' dst = rs src /\ - forall l, - Loc.diff l dst -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> Loc.notin l destroyed_at_move -> - rs' l = rs l. -Proof. - intros; unfold add_move. - destruct (Loc.eq src dst). - subst dst. exists rs. split. apply star_refl. tauto. - destruct src. - (* src is a register *) - generalize (add_spill_correct m0 dst k rs m); intros [rs' [EX [RES OTH]]]. - exists rs'; intuition. apply OTH. apply Loc.diff_sym; auto. auto. - destruct dst. - (* src is a stack slot, dst a register *) - generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]]. - exists rs'; intuition. apply OTH. apply Loc.diff_sym; auto. right; apply Loc.diff_sym; auto. auto. - (* src and dst are stack slots *) - set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end). - generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m); - intros [rs1 [EX1 [RES1 OTH1]]]. - generalize (add_spill_correct tmp (S s0) k rs1 m); - intros [rs2 [EX2 [RES2 OTH2]]]. - exists rs2. split. - eapply star_trans; eauto. traceEq. - split. congruence. - intros. rewrite OTH2. apply OTH1. - apply Loc.diff_sym. unfold tmp; case (slot_type s); auto. - right. apply Loc.diff_sym; auto. auto. - apply Loc.diff_sym; auto. auto. -Qed. - -Lemma effect_move_sequence: - forall k moves rs m, - let k' := List.fold_right (fun p k => add_move (fst p) (snd p) k) k moves in - exists rs', - star step ge (State stk f sp k' rs m) - E0 (State stk f sp k rs' m) /\ - effect_seqmove moves rs rs'. -Proof. - induction moves; intros until m; simpl. - exists rs; split. constructor. constructor. - destruct a as [src dst]; simpl. - set (k1 := fold_right - (fun (p : loc * loc) (k : code) => add_move (fst p) (snd p) k) - k moves) in *. - destruct (add_move_correct src dst k1 rs m) as [rs1 [A [B C]]]. - destruct (IHmoves rs1 m) as [rs' [D E]]. - exists rs'; split. - eapply star_trans; eauto. - econstructor; eauto. red. tauto. -Qed. - -Lemma parallel_move_correct: - forall srcs dsts k rs m, - List.length srcs = List.length dsts -> - Loc.no_overlap srcs dsts -> - Loc.norepet dsts -> - Loc.disjoint srcs temporaries -> - Loc.disjoint dsts temporaries -> - exists rs', - star step ge (State stk f sp (parallel_move srcs dsts k) rs m) - E0 (State stk f sp k rs' m) /\ - List.map rs' dsts = List.map rs srcs /\ - forall l, Loc.notin l dsts -> Loc.notin l temporaries -> rs' l = rs l. -Proof. - intros. - generalize (effect_move_sequence k (parmove srcs dsts) rs m). - intros [rs' [EXEC EFFECT]]. - exists rs'. split. exact EXEC. - apply effect_parmove; auto. -Qed. - -Lemma parallel_move_arguments_correct: - forall args sg k rs m, - List.map Loc.type args = sg.(sig_args) -> - locs_acceptable args -> - exists rs', - star step ge (State stk f sp (parallel_move args (loc_arguments sg) k) rs m) - E0 (State stk f sp k rs' m) /\ - List.map rs' (loc_arguments sg) = List.map rs args /\ - forall l, Loc.notin l (loc_arguments sg) -> Loc.notin l temporaries -> rs' l = rs l. -Proof. - intros. apply parallel_move_correct. - transitivity (length sg.(sig_args)). - rewrite <- H. symmetry; apply list_length_map. - symmetry. apply loc_arguments_length. - apply no_overlap_arguments; auto. - apply loc_arguments_norepet. - apply locs_acceptable_disj_temporaries; auto. - apply loc_arguments_not_temporaries. -Qed. - -Lemma parallel_move_parameters_correct: - forall params sg k rs m, - List.map Loc.type params = sg.(sig_args) -> - locs_acceptable params -> - Loc.norepet params -> - exists rs', - star step ge (State stk f sp (parallel_move (loc_parameters sg) params k) rs m) - E0 (State stk f sp k rs' m) /\ - List.map rs' params = List.map rs (loc_parameters sg) /\ - forall l, Loc.notin l params -> Loc.notin l temporaries -> rs' l = rs l. -Proof. - intros. apply parallel_move_correct. - transitivity (length sg.(sig_args)). - apply loc_parameters_length. - rewrite <- H. apply list_length_map. - apply no_overlap_parameters; auto. - auto. apply loc_parameters_not_temporaries. - apply locs_acceptable_disj_temporaries; auto. -Qed. - -End LINEAR_CONSTRUCTORS. - -(** * Agreement between values of locations *) - -(** The predicate [agree] states that two location maps - give compatible values to all acceptable locations, - that is, non-temporary registers and [Local] stack slots. - The notion of compatibility used is the [Val.lessdef] ordering, - which enables a [Vundef] value in the original program to be refined - into any value in the transformed program. - - A typical situation where this refinement of values occurs is at - function entry point. In LTLin, all registers except those - belonging to the function parameters are set to [Vundef]. In - Linear, these registers have whatever value they had in the caller - function. This difference is harmless: if the original LTLin code - does not get stuck, we know that it does not use any of these - [Vundef] values. *) - -Definition agree (rs1 rs2: locset) : Prop := - forall l, loc_acceptable l -> Val.lessdef (rs1 l) (rs2 l). - -Lemma agree_loc: - forall rs1 rs2 l, - agree rs1 rs2 -> loc_acceptable l -> Val.lessdef (rs1 l) (rs2 l). -Proof. - auto. -Qed. - -Lemma agree_locs: - forall rs1 rs2 ll, - agree rs1 rs2 -> locs_acceptable ll -> - Val.lessdef_list (map rs1 ll) (map rs2 ll). -Proof. - induction ll; simpl; intros. - constructor. - constructor. apply H. apply H0; auto with coqlib. - apply IHll; auto. red; intros. apply H0; auto with coqlib. -Qed. - -Lemma agree_exten: - forall rs ls1 ls2, - agree rs ls1 -> - (forall l, Loc.notin l temporaries -> ls2 l = ls1 l) -> - agree rs ls2. -Proof. - intros; red; intros. rewrite H0. auto. - apply temporaries_not_acceptable; auto. -Qed. - -Remark undef_temps_others: - forall rs l, - Loc.notin l temporaries -> LTL.undef_temps rs l = rs l. -Proof. - intros. apply Locmap.guo; auto. -Qed. - -Remark undef_op_others: - forall op rs l, - Loc.notin l temporaries -> undef_op op rs l = rs l. -Proof. - intros. generalize (undef_temps_others rs l H); intro. - unfold undef_op; destruct op; auto; apply Locmap.guo; simpl in *; tauto. -Qed. - -Lemma agree_undef_temps: - forall rs1 rs2, - agree rs1 rs2 -> agree (LTL.undef_temps rs1) rs2. -Proof. - intros; red; intros. rewrite undef_temps_others; auto. - apply Conventions.temporaries_not_acceptable. auto. -Qed. - -Lemma agree_undef_temps2: - forall rs1 rs2, - agree rs1 rs2 -> agree (LTL.undef_temps rs1) (LTL.undef_temps rs2). -Proof. - intros. apply agree_exten with rs2. apply agree_undef_temps; auto. - intros. apply undef_temps_others; auto. -Qed. - -Lemma agree_set: - forall rs1 rs2 rs2' l v, - loc_acceptable l -> - Val.lessdef v (rs2' l) -> - (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') -> - agree rs1 rs2 -> agree (Locmap.set l v rs1) rs2'. -Proof. - intros; red; intros. - destruct (Loc.eq l l0). - subst l0. rewrite Locmap.gss. auto. - assert (Loc.diff l l0). eapply loc_acceptable_noteq_diff; eauto. - rewrite Locmap.gso; auto. rewrite H1. auto. auto. - apply temporaries_not_acceptable; auto. -Qed. - -Lemma agree_set2: - forall rs1 rs2 rs2' l v, - loc_acceptable l -> - Val.lessdef v (rs2' l) -> - (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') -> - agree rs1 rs2 -> agree (Locmap.set l v (LTL.undef_temps rs1)) rs2'. -Proof. - intros. eapply agree_set; eauto. apply agree_undef_temps; auto. -Qed. - -Lemma agree_find_funct: - forall (ge: Linear.genv) rs ls r f, - Genv.find_funct ge (rs r) = Some f -> - agree rs ls -> - loc_acceptable r -> - Genv.find_funct ge (ls r) = Some f. -Proof. - intros. - assert (Val.lessdef (rs r) (ls r)). eapply agree_loc; eauto. - exploit Genv.find_funct_inv; eauto. intros [b EQ]. rewrite EQ in H2. - inv H2. rewrite <- EQ. auto. -Qed. - -Lemma agree_postcall_1: - forall rs ls, - agree rs ls -> - agree (LTL.postcall_locs rs) ls. -Proof. - intros; red; intros. unfold LTL.postcall_locs. - destruct l; auto. - destruct (In_dec Loc.eq (R m) temporaries). constructor. - destruct (In_dec Loc.eq (R m) destroyed_at_call). constructor. - auto. -Qed. - -Lemma agree_postcall_2: - forall rs ls ls', - agree (LTL.postcall_locs rs) ls -> - (forall l, - loc_acceptable l -> ~In l destroyed_at_call -> ~In l temporaries -> - ls' l = ls l) -> - agree (LTL.postcall_locs rs) ls'. -Proof. - intros; red; intros. generalize (H l H1). unfold LTL.postcall_locs. - destruct l. - destruct (In_dec Loc.eq (R m) temporaries). intro; constructor. - destruct (In_dec Loc.eq (R m) destroyed_at_call). intro; constructor. - intro. rewrite H0; auto. - intro. rewrite H0; auto. - simpl. intuition congruence. - simpl. intuition congruence. -Qed. - -Lemma agree_postcall_call: - forall rs ls ls' sig, - agree rs ls -> - (forall l, - Loc.notin l (loc_arguments sig) -> Loc.notin l temporaries -> - ls' l = ls l) -> - agree (LTL.postcall_locs rs) ls'. -Proof. - intros. apply agree_postcall_2 with ls. apply agree_postcall_1; auto. - intros. apply H0. - apply arguments_not_preserved; auto. - destruct l; simpl. simpl in H2. intuition congruence. - destruct s; tauto. - apply temporaries_not_acceptable; auto. -Qed. - -Lemma agree_init_locs: - forall ls dsts vl, - locs_acceptable dsts -> - Loc.norepet dsts -> - Val.lessdef_list vl (map ls dsts) -> - agree (LTL.init_locs vl dsts) ls. -Proof. - induction dsts; intros; simpl. - red; intros. unfold Locmap.init. constructor. - simpl in H1. inv H1. inv H0. - apply agree_set with ls. apply H; auto with coqlib. auto. auto. - apply IHdsts; auto. red; intros; apply H; auto with coqlib. -Qed. - -Lemma call_regs_parameters: - forall ls sig, - map (call_regs ls) (loc_parameters sig) = map ls (loc_arguments sig). -Proof. - intros. unfold loc_parameters. rewrite list_map_compose. - apply list_map_exten; intros. - unfold call_regs, parameter_of_argument. - generalize (loc_arguments_acceptable _ _ H). - unfold loc_argument_acceptable. - destruct x. - intros. destruct (in_dec Loc.eq (R m) temporaries). contradiction. auto. - destruct s; intros; try contradiction. auto. -Qed. - -Lemma return_regs_preserve: - forall ls1 ls2 l, - ~ In l temporaries -> - ~ In l destroyed_at_call -> - return_regs ls1 ls2 l = ls1 l. -Proof. - intros. unfold return_regs. destruct l; auto. - destruct (In_dec Loc.eq (R m) temporaries). contradiction. - destruct (In_dec Loc.eq (R m) destroyed_at_call). contradiction. - auto. -Qed. - -Lemma return_regs_arguments: - forall ls1 ls2 sig, - tailcall_possible sig -> - map (return_regs ls1 ls2) (loc_arguments sig) = map ls2 (loc_arguments sig). -Proof. - intros. apply list_map_exten; intros. - unfold return_regs. generalize (H x H0). destruct x; intros. - destruct (In_dec Loc.eq (R m) temporaries). auto. - destruct (In_dec Loc.eq (R m) destroyed_at_call). auto. - elim n0. eapply arguments_caller_save; eauto. - contradiction. -Qed. - -Lemma return_regs_result: - forall ls1 ls2 sig, - return_regs ls1 ls2 (R (loc_result sig)) = ls2 (R (loc_result sig)). -Proof. - intros. unfold return_regs. - destruct (In_dec Loc.eq (R (loc_result sig)) temporaries). auto. - destruct (In_dec Loc.eq (R (loc_result sig)) destroyed_at_call). auto. - generalize (loc_result_caller_save sig). tauto. -Qed. - -(** * Preservation of labels and gotos *) - -Lemma find_label_add_spill: - forall lbl src dst k, - find_label lbl (add_spill src dst k) = find_label lbl k. -Proof. - intros. destruct dst; simpl; auto. - destruct (mreg_eq src m); auto. -Qed. - -Lemma find_label_add_reload: - forall lbl src dst k, - find_label lbl (add_reload src dst k) = find_label lbl k. -Proof. - intros. destruct src; simpl; auto. - destruct (mreg_eq m dst); auto. -Qed. - -Lemma find_label_add_reloads: - forall lbl srcs dsts k, - find_label lbl (add_reloads srcs dsts k) = find_label lbl k. -Proof. - induction srcs; intros; simpl. auto. - destruct dsts; auto. rewrite find_label_add_reload. auto. -Qed. - -Lemma find_label_add_move: - forall lbl src dst k, - find_label lbl (add_move src dst k) = find_label lbl k. -Proof. - intros; unfold add_move. - destruct (Loc.eq src dst); auto. - destruct src. apply find_label_add_spill. - destruct dst. apply find_label_add_reload. - rewrite find_label_add_reload. apply find_label_add_spill. -Qed. - -Lemma find_label_parallel_move: - forall lbl srcs dsts k, - find_label lbl (parallel_move srcs dsts k) = find_label lbl k. -Proof. - intros. unfold parallel_move. generalize (parmove srcs dsts). - induction m; simpl. auto. - rewrite find_label_add_move. auto. -Qed. - -Hint Rewrite find_label_add_spill find_label_add_reload - find_label_add_reloads find_label_add_move - find_label_parallel_move: labels. - -Opaque reg_for. - -Ltac FL := simpl; autorewrite with labels; auto. - -Lemma find_label_transf_instr: - forall lbl sg instr k, - find_label lbl (transf_instr sg instr k) = - if LTLin.is_label lbl instr then Some k else find_label lbl k. -Proof. - intros. destruct instr; FL. - destruct (is_move_operation o l); FL; FL. - FL. - destruct (enough_temporaries (l0 :: l)). - destruct (regs_for (l0 :: l)); FL. - FL. FL. - destruct s0; FL; FL; FL. - destruct s0; FL; FL; FL. - destruct (ef_reloads e). FL. FL. FL. - destruct o; FL. -Qed. - -Lemma transf_code_cons: - forall f i c, transf_code f (i :: c) = transf_instr f i (transf_code f c). -Proof. - unfold transf_code; intros. rewrite list_fold_right_eq; auto. -Qed. - -Lemma find_label_transf_code: - forall sg lbl c, - find_label lbl (transf_code sg c) = - option_map (transf_code sg) (LTLin.find_label lbl c). -Proof. - induction c; simpl. - auto. - rewrite transf_code_cons. - rewrite find_label_transf_instr. - destruct (LTLin.is_label lbl a); auto. -Qed. - -Lemma find_label_transf_function: - forall lbl f c, - LTLin.find_label lbl (LTLin.fn_code f) = Some c -> - find_label lbl (Linear.fn_code (transf_function f)) = - Some (transf_code f c). -Proof. - intros. destruct f; simpl in *. FL. - rewrite find_label_transf_code. rewrite H; auto. -Qed. - -(** * Semantic preservation *) - -Section PRESERVATION. - -Variable prog: LTLin.program. -Let tprog := transf_program prog. -Hypothesis WT_PROG: LTLintyping.wt_program prog. - -Let ge := Genv.globalenv prog. -Let tge := Genv.globalenv tprog. - -Lemma functions_translated: - forall v f, - Genv.find_funct ge v = Some f -> - Genv.find_funct tge v = Some (transf_fundef f). -Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog). - -Lemma function_ptr_translated: - forall v f, - Genv.find_funct_ptr ge v = Some f -> - Genv.find_funct_ptr tge v = Some (transf_fundef f). -Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog). - -Lemma symbols_preserved: - forall id, - Genv.find_symbol tge id = Genv.find_symbol ge id. -Proof (@Genv.find_symbol_transf _ _ _ transf_fundef prog). - -Lemma varinfo_preserved: - forall b, Genv.find_var_info tge b = Genv.find_var_info ge b. -Proof (@Genv.find_var_info_transf _ _ _ transf_fundef prog). - -Lemma sig_preserved: - forall f, funsig (transf_fundef f) = LTLin.funsig f. -Proof. - destruct f; reflexivity. -Qed. - -Lemma find_function_wt: - forall ros rs f, - LTLin.find_function ge ros rs = Some f -> wt_fundef f. -Proof. - intros until f. destruct ros; simpl. - intro. eapply Genv.find_funct_prop with (p := prog); eauto. - caseEq (Genv.find_symbol ge i); intros. - eapply Genv.find_funct_ptr_prop with (p := prog); eauto. - congruence. -Qed. - -(** The [match_state] predicate relates states in the original LTLin - program and the transformed Linear program. The main property - it enforces are: -- Agreement between the values of locations in the two programs, - according to the [agree] or [agree_arguments] predicates. -- Agreement between the memory states of the two programs, - according to the [Mem.lessdef] predicate. -- Lists of LTLin instructions appearing in the source state - are always suffixes of the code for the corresponding functions. -- Well-typedness of the source code, which ensures that - only acceptable locations are accessed by this code. -- Agreement over return types during calls: the return type of a function - is always equal to the return type of the signature of the corresponding - call. This invariant is necessary since the conventional location - used for passing return values depend on the return type. This invariant - is enforced through the third parameter of the [match_stackframes] - predicate, which is the signature of the called function. -*) - -Inductive match_stackframes: - list LTLin.stackframe -> list Linear.stackframe -> signature -> Prop := - | match_stackframes_nil: - forall sig, - sig.(sig_res) = Some Tint -> - match_stackframes nil nil sig - | match_stackframes_cons: - forall res f sp c rs s s' c' ls sig, - match_stackframes s s' (LTLin.fn_sig f) -> - c' = add_spill (loc_result sig) res (transf_code f c) -> - agree (LTL.postcall_locs rs) ls -> - loc_acceptable res -> - wt_function f -> - is_tail c (LTLin.fn_code f) -> - match_stackframes - (LTLin.Stackframe res f sp (LTL.postcall_locs rs) c :: s) - (Linear.Stackframe (transf_function f) sp ls c' :: s') - sig. - -Inductive match_states: LTLin.state -> Linear.state -> Prop := - | match_states_intro: - forall s f sp c rs m s' ls tm - (STACKS: match_stackframes s s' (LTLin.fn_sig f)) - (AG: agree rs ls) - (WT: wt_function f) - (TL: is_tail c (LTLin.fn_code f)) - (MMD: Mem.extends m tm), - match_states (LTLin.State s f sp c rs m) - (Linear.State s' (transf_function f) sp (transf_code f c) ls tm) - | match_states_call: - forall s f args m s' ls tm - (STACKS: match_stackframes s s' (LTLin.funsig f)) - (AG: Val.lessdef_list args (map ls (loc_arguments (LTLin.funsig f)))) - (PRES: forall l, ~In l temporaries -> ~In l destroyed_at_call -> - ls l = parent_locset s' l) - (WT: wt_fundef f) - (MMD: Mem.extends m tm), - match_states (LTLin.Callstate s f args m) - (Linear.Callstate s' (transf_fundef f) ls tm) - | match_states_return: - forall s res m s' ls tm sig - (STACKS: match_stackframes s s' sig) - (AG: Val.lessdef res (ls (R (loc_result sig)))) - (PRES: forall l, ~In l temporaries -> ~In l destroyed_at_call -> - ls l = parent_locset s' l) - (MMD: Mem.extends m tm), - match_states (LTLin.Returnstate s res m) - (Linear.Returnstate s' ls tm). - -Lemma match_stackframes_change_sig: - forall s s' sig1 sig2, - match_stackframes s s' sig1 -> - sig2.(sig_res) = sig1.(sig_res) -> - match_stackframes s s' sig2. -Proof. - intros. inv H. constructor. congruence. - econstructor; eauto. unfold loc_result. rewrite H0. auto. -Qed. - -Ltac ExploitWT := - match goal with - | [ WT: wt_function _, TL: is_tail _ _ |- _ ] => - generalize (wt_instrs _ WT _ (is_tail_in TL)); intro WTI - end. - -(** The proof of semantic preservation is a simulation argument - based on diagrams of the following form: -<< - st1 --------------- st2 - | | - t| *|t - | | - v v - st1'--------------- st2' ->> - It is possible for the transformed code to take no transition, - remaining in the same state; for instance, if the source transition - corresponds to a move instruction that was eliminated. - To ensure that this cannot occur infinitely often in a row, - we use the following [measure] function that just counts the - remaining number of instructions in the source code sequence. *) - -Definition measure (st: LTLin.state) : nat := - match st with - | LTLin.State s f sp c ls m => List.length c - | LTLin.Callstate s f ls m => 0%nat - | LTLin.Returnstate s ls m => 0%nat - end. - -Theorem transf_step_correct: - forall s1 t s2, LTLin.step ge s1 t s2 -> - forall s1' (MS: match_states s1 s1'), - (exists s2', plus Linear.step tge s1' t s2' /\ match_states s2 s2') - \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat. -Proof. - Opaque regs_for. Opaque add_reloads. - induction 1; intros; try inv MS; try rewrite transf_code_cons; simpl. - - (* Lop *) - ExploitWT. inv WTI. - (* move *) - simpl. - destruct (add_move_correct tge s' (transf_function f) sp r1 res (transf_code f b) ls tm) - as [ls2 [A [B C]]]. - inv A. - right. split. omega. split. auto. - rewrite H1. rewrite H1. econstructor; eauto with coqlib. - apply agree_set2 with ls2; auto. - rewrite B. simpl in H; inversion H. auto. - left; econstructor; split. eapply plus_left; eauto. - econstructor; eauto with coqlib. - apply agree_set2 with ls; auto. - rewrite B. simpl in H; inversion H. auto. - intros. apply C. apply Loc.diff_sym; auto. - simpl in H7; tauto. simpl in H7; tauto. simpl in *; tauto. - (* other ops *) - assert (is_move_operation op args = None). - caseEq (is_move_operation op args); intros. - destruct (is_move_operation_correct _ _ H0). congruence. - auto. - rewrite H0. - exploit add_reloads_correct. - eapply enough_temporaries_op_args; eauto. auto. - intros [ls2 [A [B C]]]. instantiate (1 := ls) in B. - assert (exists tv, eval_operation tge sp op (reglist ls2 (regs_for args)) tm = Some tv - /\ Val.lessdef v tv). - apply eval_operation_lessdef with (map rs args) m; auto. - rewrite B. eapply agree_locs; eauto. - rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. - destruct H1 as [tv [P Q]]. - exploit add_spill_correct. - intros [ls3 [D [E F]]]. - left; econstructor; split. - eapply star_plus_trans. eexact A. - eapply plus_left. eapply exec_Lop with (v := tv); eauto. - eexact D. eauto. traceEq. - econstructor; eauto with coqlib. - apply agree_set2 with ls; auto. - rewrite E. rewrite Locmap.gss. auto. - intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_op_others; auto. - apply reg_for_diff; auto. simpl in *; tauto. - - (* Lload *) - ExploitWT; inv WTI. - exploit add_reloads_correct. - eapply enough_temporaries_addr; eauto. auto. - intros [ls2 [A [B C]]]. - assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta - /\ Val.lessdef a ta). - apply eval_addressing_lessdef with (map rs args). - rewrite B. eapply agree_locs; eauto. - rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. - destruct H1 as [ta [P Q]]. - exploit Mem.loadv_extends; eauto. intros [tv [R S]]. - exploit add_spill_correct. - intros [ls3 [D [E F]]]. - left; econstructor; split. - eapply star_plus_trans. eauto. - eapply plus_left. eapply exec_Lload; eauto. - eauto. auto. traceEq. - econstructor; eauto with coqlib. - apply agree_set2 with ls; auto. - rewrite E. rewrite Locmap.gss. auto. - intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto. - apply reg_for_diff; auto. simpl in *; tauto. - - (* Lstore *) - ExploitWT; inv WTI. - caseEq (enough_temporaries (src :: args)); intro ENOUGH. - destruct (regs_for_cons src args) as [rsrc [rargs EQ]]. rewrite EQ. - exploit add_reloads_correct. - eauto. red; simpl; intros. destruct H1. congruence. auto. - intros [ls2 [A [B C]]]. rewrite EQ in A. rewrite EQ in B. - injection B. intros D E. - simpl in B. - assert (exists ta, eval_addressing tge sp addr (reglist ls2 rargs) = Some ta - /\ Val.lessdef a ta). - apply eval_addressing_lessdef with (map rs args). - rewrite D. eapply agree_locs; eauto. - rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. - destruct H1 as [ta [P Q]]. - assert (X: Val.lessdef (rs src) (ls2 (R rsrc))). - rewrite E. eapply agree_loc; eauto. - exploit Mem.storev_extends. eexact MMD. eauto. eexact Q. eexact X. - intros [tm2 [Y Z]]. - left; econstructor; split. - eapply plus_right. eauto. - eapply exec_Lstore with (a := ta); eauto. - traceEq. - econstructor; eauto with coqlib. - apply agree_undef_temps2. apply agree_exten with ls; auto. - (* not enough temporaries *) - destruct (add_reloads_correct tge s' (transf_function f) sp args - (Lop (op_for_binary_addressing addr) (regs_for args) IT2 - :: add_reload src (reg_for src) - (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src) - :: transf_code f b)) ls tm) - as [ls2 [A [B C]]]. - eapply enough_temporaries_addr; eauto. auto. - assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta - /\ Val.lessdef a ta). - apply eval_addressing_lessdef with (map rs args). - rewrite B. eapply agree_locs; eauto. - rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. - destruct H1 as [ta [P Q]]. - set (ls3 := Locmap.set (R IT2) ta (undef_op (op_for_binary_addressing addr) ls2)). - destruct (add_reload_correct_2 tge s' (transf_function f) sp src - (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src) - :: transf_code f b) - ls3 tm H8) - as [ls4 [D [E [F G]]]]. - assert (NT: Loc.notin src temporaries) by (apply temporaries_not_acceptable; auto). - assert (X: Val.lessdef (rs src) (ls4 (R (reg_for src)))). - rewrite E. unfold ls3. rewrite Locmap.gso. - rewrite undef_op_others; auto. rewrite C; auto. - apply Loc.diff_sym. simpl in NT; tauto. - exploit Mem.storev_extends. eexact MMD. eauto. eexact Q. eexact X. - intros [tm2 [Y Z]]. - left; econstructor; split. - eapply star_plus_trans. eauto. - eapply plus_left. eapply exec_Lop with (v := ta). - rewrite B. eapply not_enough_temporaries_addr; eauto. - rewrite <- B; auto. - eapply star_right. eauto. - eapply exec_Lstore with (a := ta); eauto. - simpl reglist. rewrite G. unfold ls3. rewrite Locmap.gss. simpl. - destruct ta; simpl in Y; try discriminate. simpl; rewrite Int.add_zero; auto. - reflexivity. reflexivity. traceEq. - econstructor; eauto with coqlib. - apply agree_undef_temps2. apply agree_exten with ls; auto. - intros. rewrite F; auto. unfold ls3. rewrite Locmap.gso. - rewrite undef_op_others; auto. - apply Loc.diff_sym. simpl in H1; tauto. - - (* Lcall *) - ExploitWT. inversion WTI. subst ros0 args0 res0. rewrite <- H0. - assert (WTF': wt_fundef f'). eapply find_function_wt; eauto. - destruct ros as [fn | id]. - (* indirect call *) - red in H6. destruct H6 as [OK1 [OK2 OK3]]. - rewrite <- H0 in H4. rewrite <- H0 in OK3. - destruct (parallel_move_arguments_correct tge s' (transf_function f) sp - args sig - (add_reload fn (reg_for fn) - (Lcall sig (inl ident (reg_for fn)) - :: add_spill (loc_result sig) res (transf_code f b))) - ls tm H4 H7) - as [ls2 [A [B C]]]. - destruct (add_reload_correct_2 tge s' (transf_function f) sp fn - (Lcall sig (inl ident (reg_for fn)) - :: add_spill (loc_result sig) res (transf_code f b)) - ls2 tm OK2) - as [ls3 [D [E [F G]]]]. - assert (ARGS: Val.lessdef_list (map rs args) - (map ls3 (loc_arguments sig))). - replace (map ls3 (loc_arguments sig)) with (map ls2 (loc_arguments sig)). - rewrite B. apply agree_locs; auto. - apply list_map_exten; intros. apply F. - apply Loc.disjoint_notin with (loc_arguments sig). - apply loc_arguments_not_temporaries. auto. - left; econstructor; split. - eapply star_plus_trans. eexact A. eapply plus_right. eexact D. - eapply exec_Lcall; eauto. - simpl. rewrite E. rewrite C. eapply agree_find_funct; eauto. - apply functions_translated. eauto. - apply loc_acceptable_notin_notin; auto. - apply temporaries_not_acceptable; auto. - rewrite H0; symmetry; apply sig_preserved. - eauto. traceEq. - econstructor; eauto. - econstructor; eauto with coqlib. - rewrite H0. auto. - apply agree_postcall_call with ls sig; auto. - intros. rewrite F; auto. congruence. - (* direct call *) - rewrite <- H0 in H4. - destruct (parallel_move_arguments_correct tge s' (transf_function f) sp - args sig - (Lcall sig (inr mreg id) - :: add_spill (loc_result sig) res (transf_code f b)) - ls tm H4 H7) - as [ls3 [D [E F]]]. - assert (ARGS: Val.lessdef_list (map rs args) (map ls3 (loc_arguments sig))). - rewrite E. apply agree_locs; auto. - left; econstructor; split. - eapply plus_right. eauto. - eapply exec_Lcall; eauto. - simpl. rewrite symbols_preserved. - generalize H; simpl. destruct (Genv.find_symbol ge id). - apply function_ptr_translated; auto. congruence. - rewrite H0. symmetry; apply sig_preserved. - traceEq. - econstructor; eauto. - econstructor; eauto with coqlib. rewrite H0; auto. - apply agree_postcall_call with ls sig; auto. - congruence. - - (* Ltailcall *) - ExploitWT. inversion WTI. subst ros0 args0. - assert (WTF': wt_fundef f'). eapply find_function_wt; eauto. - rewrite <- H0. - exploit Mem.free_parallel_extends; eauto. intros [tm' [FREE MMD']]. - destruct ros as [fn | id]. - (* indirect call *) - red in H5. destruct H5 as [OK1 [OK2 OK3]]. - rewrite <- H0 in H4. rewrite <- H0 in OK3. - destruct (parallel_move_arguments_correct tge s' (transf_function f) (Vptr stk Int.zero) - args sig - (add_reload fn IT1 - (Ltailcall sig (inl ident IT1) :: transf_code f b)) - ls tm H4 H6) - as [ls2 [A [B C]]]. - destruct (add_reload_correct tge s' (transf_function f) (Vptr stk Int.zero) fn IT1 - (Ltailcall sig (inl ident IT1) :: transf_code f b) - ls2 tm) - as [ls3 [D [E F]]]. - assert (ARGS: Val.lessdef_list (map rs args) - (map ls3 (loc_arguments sig))). - replace (map ls3 (loc_arguments sig)) with (map ls2 (loc_arguments sig)). - rewrite B. apply agree_locs; auto. - apply list_map_exten; intros. - exploit Loc.disjoint_notin. apply loc_arguments_not_temporaries. eauto. simpl; intros. - apply F. - apply Loc.diff_sym; tauto. - auto. - simpl; tauto. - left; econstructor; split. - eapply star_plus_trans. eexact A. eapply plus_right. eexact D. - eapply exec_Ltailcall; eauto. - simpl. rewrite E. rewrite C. eapply agree_find_funct; eauto. - apply functions_translated. eauto. - apply loc_acceptable_notin_notin; auto. - apply temporaries_not_acceptable; auto. - rewrite H0; symmetry; apply sig_preserved. - eauto. traceEq. - econstructor; eauto. - eapply match_stackframes_change_sig; eauto. - rewrite return_regs_arguments; auto. congruence. - exact (return_regs_preserve (parent_locset s') ls3). - (* direct call *) - rewrite <- H0 in H4. - destruct (parallel_move_arguments_correct tge s' (transf_function f) (Vptr stk Int.zero) - args sig - (Ltailcall sig (inr mreg id) :: transf_code f b) - ls tm H4 H6) - as [ls3 [D [E F]]]. - assert (ARGS: Val.lessdef_list (map rs args) - (map ls3 (loc_arguments sig))). - rewrite E. apply agree_locs. apply agree_exten with ls; auto. auto. - left; econstructor; split. - eapply plus_right. eauto. - eapply exec_Ltailcall; eauto. - simpl. rewrite symbols_preserved. - generalize H; simpl. destruct (Genv.find_symbol ge id). - apply function_ptr_translated; auto. congruence. - rewrite H0. symmetry; apply sig_preserved. - traceEq. - econstructor; eauto. - eapply match_stackframes_change_sig; eauto. - rewrite return_regs_arguments; auto. congruence. - exact (return_regs_preserve (parent_locset s') ls3). - - (* Lbuiltin *) - ExploitWT; inv WTI. - case_eq (ef_reloads ef); intro RELOADS. - exploit add_reloads_correct. - instantiate (1 := args). apply arity_ok_enough. rewrite H3. destruct H5. auto. congruence. auto. - intros [ls2 [A [B C]]]. - exploit external_call_mem_extends; eauto. - apply agree_locs; eauto. - intros [v' [tm' [P [Q [R S]]]]]. - exploit add_spill_correct. - intros [ls3 [D [E F]]]. - left; econstructor; split. - eapply star_plus_trans. eauto. - eapply plus_left. eapply exec_Lbuiltin. rewrite B. - eapply external_call_symbols_preserved; eauto. - exact symbols_preserved. exact varinfo_preserved. - eauto. reflexivity. traceEq. - econstructor; eauto with coqlib. - apply agree_set with ls; auto. - rewrite E. rewrite Locmap.gss. auto. - intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto. - apply reg_for_diff; auto. simpl in *; tauto. - (* no reload *) - exploit external_call_mem_extends; eauto. - apply agree_locs; eauto. - intros [v' [tm' [P [Q [R S]]]]]. - assert (EQ: v = Vundef). - destruct ef; simpl in RELOADS; try congruence. simpl in H; inv H. auto. - subst v. - left; econstructor; split. - apply plus_one. eapply exec_Lannot. - eapply external_call_symbols_preserved; eauto. - exact symbols_preserved. exact varinfo_preserved. - econstructor; eauto with coqlib. - apply agree_set with ls; auto. - - (* Llabel *) - left; econstructor; split. - apply plus_one. apply exec_Llabel. - econstructor; eauto with coqlib. - - (* Lgoto *) - left; econstructor; split. - apply plus_one. apply exec_Lgoto. apply find_label_transf_function; eauto. - econstructor; eauto. - eapply LTLin.find_label_is_tail; eauto. - - (* Lcond true *) - ExploitWT; inv WTI. - exploit add_reloads_correct. - eapply enough_temporaries_cond; eauto. auto. - intros [ls2 [A [B C]]]. - left; econstructor; split. - eapply plus_right. eauto. eapply exec_Lcond_true; eauto. - rewrite B. apply eval_condition_lessdef with (map rs args) m; auto. - eapply agree_locs; eauto. - apply find_label_transf_function; eauto. - traceEq. - econstructor; eauto. - apply agree_undef_temps2. apply agree_exten with ls; auto. - eapply LTLin.find_label_is_tail; eauto. - - (* Lcond false *) - ExploitWT; inv WTI. - exploit add_reloads_correct. - eapply enough_temporaries_cond; eauto. auto. - intros [ls2 [A [B C]]]. - left; econstructor; split. - eapply plus_right. eauto. eapply exec_Lcond_false; eauto. - rewrite B. apply eval_condition_lessdef with (map rs args) m; auto. - eapply agree_locs; eauto. - traceEq. - econstructor; eauto with coqlib. - apply agree_undef_temps2. apply agree_exten with ls; auto. - - (* Ljumptable *) - ExploitWT; inv WTI. - exploit add_reload_correct_2; eauto. - intros [ls2 [A [B [C D]]]]. - left; econstructor; split. - eapply plus_right. eauto. eapply exec_Ljumptable; eauto. - assert (Val.lessdef (rs arg) (ls arg)). apply AG. auto. - rewrite H in H2. inv H2. congruence. - apply find_label_transf_function; eauto. - traceEq. - econstructor; eauto with coqlib. - apply agree_undef_temps2. apply agree_exten with ls; auto. - eapply LTLin.find_label_is_tail; eauto. - - (* Lreturn *) - ExploitWT; inv WTI. - exploit Mem.free_parallel_extends; eauto. intros [tm' [FREE MMD']]. - destruct or; simpl. - (* with an argument *) - exploit add_reload_correct. - intros [ls2 [A [B C]]]. - left; econstructor; split. - eapply plus_right. eauto. eapply exec_Lreturn; eauto. - traceEq. - econstructor; eauto. - rewrite return_regs_result. rewrite B. apply agree_loc; auto. - apply return_regs_preserve. - (* without an argument *) - left; econstructor; split. - apply plus_one. eapply exec_Lreturn; eauto. - econstructor; eauto. - apply return_regs_preserve. - - (* internal function *) - simpl in WT. inversion_clear WT. inversion H0. simpl in AG. - exploit Mem.alloc_extends. eauto. eauto. - instantiate (1 := 0); omega. instantiate (1 := LTLin.fn_stacksize f); omega. - intros [tm' [ALLOC MMD']]. - destruct (parallel_move_parameters_correct tge s' (transf_function f) - (Vptr stk Int.zero) (LTLin.fn_params f) (LTLin.fn_sig f) - (transf_code f (LTLin.fn_code f)) (call_regs ls) tm' - wt_params wt_acceptable wt_norepet) - as [ls2 [A [B C]]]. - assert (AG2: agree (LTL.init_locs args (fn_params f)) ls2). - apply agree_init_locs; auto. - rewrite B. rewrite call_regs_parameters. auto. - left; econstructor; split. - eapply plus_left. - eapply exec_function_internal; eauto. - simpl. eauto. traceEq. - econstructor; eauto with coqlib. - - (* external function *) - exploit external_call_mem_extends; eauto. - intros [res' [tm' [A [B [C D]]]]]. - left; econstructor; split. - apply plus_one. eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. - exact symbols_preserved. exact varinfo_preserved. - econstructor; eauto. - simpl. rewrite Locmap.gss. auto. - intros. rewrite Locmap.gso. auto. - simpl. destruct l; auto. red; intro. elim H1. subst m0. - generalize (loc_result_caller_save (ef_sig ef)). tauto. - - (* return *) - inv STACKS. - exploit add_spill_correct. intros [ls2 [A [B C]]]. - left; econstructor; split. - eapply plus_left. eapply exec_return; eauto. - eauto. traceEq. - econstructor; eauto. - apply agree_set with ls; auto. - rewrite B. auto. - intros. apply C; auto. simpl in *; tauto. - unfold parent_locset in PRES. - apply agree_postcall_2 with ls0; auto. -Qed. - -Lemma transf_initial_states: - forall st1, LTLin.initial_state prog st1 -> - exists st2, Linear.initial_state tprog st2 /\ match_states st1 st2. -Proof. - intros. inversion H. - econstructor; split. - econstructor. - apply Genv.init_mem_transf; eauto. - rewrite symbols_preserved. eauto. - apply function_ptr_translated; eauto. - rewrite sig_preserved. auto. - econstructor; eauto. constructor. rewrite H3; auto. - rewrite H3. simpl. constructor. - eapply Genv.find_funct_ptr_prop; eauto. - apply Mem.extends_refl. -Qed. - -Lemma transf_final_states: - forall st1 st2 r, - match_states st1 st2 -> LTLin.final_state st1 r -> Linear.final_state st2 r. -Proof. - intros. inv H0. inv H. inv STACKS. econstructor. - unfold loc_result in AG. rewrite H in AG. inv AG. auto. -Qed. - -Theorem transf_program_correct: - forward_simulation (LTLin.semantics prog) (Linear.semantics tprog). -Proof. - eapply forward_simulation_star. - eexact symbols_preserved. - eexact transf_initial_states. - eexact transf_final_states. - eexact transf_step_correct. -Qed. - -End PRESERVATION. diff --git a/backend/Reloadtyping.v b/backend/Reloadtyping.v deleted file mode 100644 index 70d79bc..0000000 --- a/backend/Reloadtyping.v +++ /dev/null @@ -1,353 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Proof of type preservation for Reload and of - correctness of computation of stack bounds for Linear. *) - -Require Import Coqlib. -Require Import AST. -Require Import Op. -Require Import Locations. -Require Import LTLin. -Require Import LTLintyping. -Require Import Linear. -Require Import Lineartyping. -Require Import Conventions. -Require Import Parallelmove. -Require Import Reload. -Require Import Reloadproof. - -(** * Typing Linear constructors *) - -(** We show that the Linear constructor functions defined in [Reload] - generate well-typed instruction sequences, - given sufficient typing and well-formedness hypotheses over the locations involved. *) - -Hint Constructors wt_instr: reloadty. - -Remark wt_code_cons: - forall f i c, wt_instr f i -> wt_code f c -> wt_code f (i :: c). -Proof. - intros; red; simpl; intros. elim H1; intro. - subst i; auto. auto. -Qed. - -Hint Resolve wt_code_cons: reloadty. - -Definition loc_valid (f: function) (l: loc) := - match l with R _ => True | S s => slot_valid f s end. - -Lemma loc_acceptable_valid: - forall f l, loc_acceptable l -> loc_valid f l. -Proof. - destruct l; simpl; intro. auto. - destruct s; simpl. omega. tauto. tauto. -Qed. - -Definition loc_writable (l: loc) := - match l with R _ => True | S s => slot_writable s end. - -Lemma loc_acceptable_writable: - forall l, loc_acceptable l -> loc_writable l. -Proof. - destruct l; simpl; intro. auto. - destruct s; simpl; tauto. -Qed. - -Hint Resolve loc_acceptable_valid loc_acceptable_writable: reloadty. - -Definition locs_valid (f: function) (ll: list loc) := - forall l, In l ll -> loc_valid f l. -Definition locs_writable (ll: list loc) := - forall l, In l ll -> loc_writable l. - -Lemma locs_acceptable_valid: - forall f ll, locs_acceptable ll -> locs_valid f ll. -Proof. - unfold locs_acceptable, locs_valid. auto with reloadty. -Qed. - -Lemma locs_acceptable_writable: - forall ll, locs_acceptable ll -> locs_writable ll. -Proof. - unfold locs_acceptable, locs_writable. auto with reloadty. -Qed. - -Hint Resolve locs_acceptable_valid locs_acceptable_writable: reloadty. - -Lemma wt_add_reload: - forall f src dst k, - loc_valid f src -> Loc.type src = mreg_type dst -> - wt_code f k -> wt_code f (add_reload src dst k). -Proof. - intros; unfold add_reload. - destruct src; eauto with reloadty. - destruct (mreg_eq m dst); eauto with reloadty. -Qed. - -Hint Resolve wt_add_reload: reloadty. - -Lemma wt_add_reloads: - forall f srcs dsts k, - locs_valid f srcs -> map Loc.type srcs = map mreg_type dsts -> - wt_code f k -> wt_code f (add_reloads srcs dsts k). -Proof. - induction srcs; destruct dsts; simpl; intros; try congruence. - auto. inv H0. apply wt_add_reload; auto with coqlib reloadty. - apply IHsrcs; auto. red; intros; auto with coqlib. -Qed. - -Hint Resolve wt_add_reloads: reloadty. - -Lemma wt_add_spill: - forall f src dst k, - loc_valid f dst -> loc_writable dst -> Loc.type dst = mreg_type src -> - wt_code f k -> wt_code f (add_spill src dst k). -Proof. - intros; unfold add_spill. - destruct dst; eauto with reloadty. - destruct (mreg_eq src m); eauto with reloadty. -Qed. - -Hint Resolve wt_add_spill: reloadty. - -Lemma wt_add_move: - forall f src dst k, - loc_valid f src -> loc_valid f dst -> loc_writable dst -> - Loc.type dst = Loc.type src -> - wt_code f k -> wt_code f (add_move src dst k). -Proof. - intros. unfold add_move. - destruct (Loc.eq src dst); auto. - destruct src; auto with reloadty. - destruct dst; auto with reloadty. - set (tmp := match slot_type s with - | Tint => IT1 - | Tfloat => FT1 - end). - assert (mreg_type tmp = Loc.type (S s)). - simpl. destruct (slot_type s); reflexivity. - apply wt_add_reload; auto with reloadty. - apply wt_add_spill; auto. congruence. -Qed. - -Hint Resolve wt_add_move: reloadty. - -Lemma wt_add_moves: - forall f b moves, - (forall s d, In (s, d) moves -> - loc_valid f s /\ loc_valid f d /\ loc_writable d /\ Loc.type s = Loc.type d) -> - wt_code f b -> - wt_code f - (List.fold_right (fun p k => add_move (fst p) (snd p) k) b moves). -Proof. - induction moves; simpl; intros. - auto. - destruct a as [s d]. simpl. - destruct (H s d) as [A [B [C D]]]. auto. - auto with reloadty. -Qed. - -Theorem wt_parallel_move: - forall f srcs dsts b, - List.map Loc.type srcs = List.map Loc.type dsts -> - locs_valid f srcs -> locs_valid f dsts -> locs_writable dsts -> - wt_code f b -> wt_code f (parallel_move srcs dsts b). -Proof. - intros. unfold parallel_move. apply wt_add_moves; auto. - intros. - elim (parmove_prop_2 _ _ _ _ H4); intros A B. - split. destruct A as [C|[C|C]]; auto; subst s; exact I. - split. destruct B as [C|[C|C]]; auto; subst d; exact I. - split. destruct B as [C|[C|C]]; auto; subst d; exact I. - eapply parmove_prop_3; eauto. -Qed. -Hint Resolve wt_parallel_move: reloadty. - -Lemma wt_reg_for: - forall l, mreg_type (reg_for l) = Loc.type l. -Proof. - intros. destruct l; simpl. auto. - case (slot_type s); reflexivity. -Qed. -Hint Resolve wt_reg_for: reloadty. - -Lemma wt_regs_for_rec: - forall locs itmps ftmps, - enough_temporaries_rec locs itmps ftmps = true -> - (forall r, In r itmps -> mreg_type r = Tint) -> - (forall r, In r ftmps -> mreg_type r = Tfloat) -> - List.map mreg_type (regs_for_rec locs itmps ftmps) = List.map Loc.type locs. -Proof. - induction locs; intros. - simpl. auto. - simpl in H. simpl. - destruct a. - simpl. decEq. eauto. - caseEq (slot_type s); intro SLOTTYPE; rewrite SLOTTYPE in H. - destruct itmps. discriminate. - simpl. decEq. - rewrite SLOTTYPE. auto with coqlib. - apply IHlocs; auto with coqlib. - destruct ftmps. discriminate. simpl. decEq. - rewrite SLOTTYPE. auto with coqlib. - apply IHlocs; auto with coqlib. -Qed. - -Lemma wt_regs_for: - forall locs, - enough_temporaries locs = true -> - List.map mreg_type (regs_for locs) = List.map Loc.type locs. -Proof. - intros. unfold regs_for. apply wt_regs_for_rec. - auto. - simpl; intros; intuition; subst r; reflexivity. - simpl; intros; intuition; subst r; reflexivity. -Qed. -Hint Resolve wt_regs_for: reloadty. - -Hint Resolve enough_temporaries_op_args enough_temporaries_cond enough_temporaries_addr: reloadty. - -Hint Extern 4 (_ = _) => congruence : reloadty. - -Lemma wt_transf_instr: - forall f instr k, - LTLintyping.wt_instr (LTLin.fn_sig f) instr -> - wt_code (transf_function f) k -> - wt_code (transf_function f) (transf_instr f instr k). -Proof. - Opaque reg_for regs_for. - intros. inv H; simpl; auto with reloadty. - caseEq (is_move_operation op args); intros. - destruct (is_move_operation_correct _ _ H). congruence. - assert (map mreg_type (regs_for args) = map Loc.type args). - eauto with reloadty. - assert (mreg_type (reg_for res) = Loc.type res). eauto with reloadty. - auto with reloadty. - - assert (map mreg_type (regs_for args) = map Loc.type args). - eauto with reloadty. - assert (mreg_type (reg_for dst) = Loc.type dst). eauto with reloadty. - auto with reloadty. - - caseEq (enough_temporaries (src :: args)); intro ENOUGH. - caseEq (regs_for (src :: args)); intros. auto. - assert (map mreg_type (regs_for (src :: args)) = map Loc.type (src :: args)). - apply wt_regs_for. auto. - rewrite H in H5. injection H5; intros. - auto with reloadty. - apply wt_add_reloads; auto with reloadty. - symmetry. apply wt_regs_for. eauto with reloadty. - assert (op_for_binary_addressing addr <> Omove). - unfold op_for_binary_addressing; destruct addr; congruence. - assert (type_of_operation (op_for_binary_addressing addr) = (type_of_addressing addr, Tint)). - apply type_op_for_binary_addressing. - rewrite <- H1. rewrite list_length_map. - eapply not_enough_temporaries_length; eauto. - apply wt_code_cons. - constructor; auto. rewrite H5. decEq. rewrite <- H1. eauto with reloadty. - apply wt_add_reload; auto with reloadty. - apply wt_code_cons; auto. constructor. reflexivity. - rewrite <- H2. eauto with reloadty. - - assert (locs_valid (transf_function f) (loc_arguments sig)). - red; intros. generalize (loc_arguments_acceptable sig l H). - destruct l; simpl; auto. destruct s; simpl; intuition. - assert (locs_writable (loc_arguments sig)). - red; intros. generalize (loc_arguments_acceptable sig l H6). - destruct l; simpl; auto. destruct s; simpl; intuition. - assert (map Loc.type args = map Loc.type (loc_arguments sig)). - rewrite loc_arguments_type; auto. - assert (Loc.type res = mreg_type (loc_result sig)). - rewrite H2. unfold loc_result. unfold proj_sig_res. - destruct (sig_res sig); auto. destruct t; auto. - destruct ros. - destruct H3 as [A [B C]]. - apply wt_parallel_move; eauto with reloadty. - apply wt_add_reload; eauto with reloadty. - apply wt_code_cons; eauto with reloadty. - constructor. rewrite <- A. auto with reloadty. - auto with reloadty. - - assert (locs_valid (transf_function f) (loc_arguments sig)). - red; intros. generalize (loc_arguments_acceptable sig l H). - destruct l; simpl; auto. destruct s; simpl; intuition. - assert (locs_writable (loc_arguments sig)). - red; intros. generalize (loc_arguments_acceptable sig l H6). - destruct l; simpl; auto. destruct s; simpl; intuition. - assert (map Loc.type args = map Loc.type (loc_arguments sig)). - rewrite loc_arguments_type; auto. - destruct ros. destruct H2 as [A [B C]]. auto 10 with reloadty. - auto 10 with reloadty. - destruct (ef_reloads ef) eqn:?. - assert (arity_ok (sig_args (ef_sig ef)) = true) by intuition congruence. - assert (map mreg_type (regs_for args) = map Loc.type args). - apply wt_regs_for. apply arity_ok_enough. congruence. - assert (mreg_type (reg_for res) = Loc.type res). eauto with reloadty. - auto 10 with reloadty. - auto with reloadty. - - assert (map mreg_type (regs_for args) = map Loc.type args). - eauto with reloadty. - auto with reloadty. - - assert (mreg_type (reg_for arg) = Loc.type arg). - eauto with reloadty. - auto with reloadty. - - destruct optres; simpl in *; auto with reloadty. - apply wt_add_reload; auto with reloadty. - unfold loc_result. rewrite <- H1. - destruct (Loc.type l); reflexivity. -Qed. - -Lemma wt_transf_code: - forall f c, - LTLintyping.wt_code (LTLin.fn_sig f) c -> - Lineartyping.wt_code (transf_function f) (transf_code f c). -Proof. - induction c; simpl; intros. - red; simpl; tauto. - rewrite transf_code_cons. - apply wt_transf_instr; auto with coqlib. - apply IHc. red; auto with coqlib. -Qed. - -Lemma wt_transf_fundef: - forall fd, - LTLintyping.wt_fundef fd -> - Lineartyping.wt_fundef (transf_fundef fd). -Proof. - intros. destruct fd; simpl. - inv H. inv H1. constructor. unfold wt_function. simpl. - apply wt_parallel_move; auto with reloadty. - rewrite loc_parameters_type. auto. - red; intros. - destruct (list_in_map_inv _ _ _ H) as [r [A B]]. - generalize (loc_arguments_acceptable _ _ B). - destruct r; intro. - rewrite A. simpl. auto. - red in H0. destruct s; try tauto. - simpl in A. subst l. simpl. auto. - apply wt_transf_code; auto. - constructor. -Qed. - -Lemma program_typing_preserved: - forall p, - LTLintyping.wt_program p -> - Lineartyping.wt_program (transf_program p). -Proof. - intros; red; intros. - destruct (transform_program_function _ _ _ _ H0) as [f0 [A B]]. - subst f; apply wt_transf_fundef. eauto. -Qed. diff --git a/backend/SelectLong.vp b/backend/SelectLong.vp new file mode 100644 index 0000000..8eb32c3 --- /dev/null +++ b/backend/SelectLong.vp @@ -0,0 +1,368 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Instruction selection for 64-bit integer operations *) + +Require Import Coqlib. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Op. +Require Import CminorSel. +Require Import SelectOp. + +Open Local Scope cminorsel_scope. + +(** Some operations on 64-bit integers are transformed into calls to + runtime library functions. The following record type collects + the names of these functions. *) + +Record helper_functions : Type := mk_helper_functions { + i64_dtos: ident; (**r float -> signed long *) + i64_dtou: ident; (**r float -> unsigned long *) + i64_stod: ident; (**r signed long -> float *) + i64_utod: ident; (**r unsigned long -> float *) + i64_neg: ident; (**r opposite *) + i64_add: ident; (**r addition *) + i64_sub: ident; (**r subtraction *) + i64_mul: ident; (**r multiplication 32x32->64 *) + i64_sdiv: ident; (**r signed division *) + i64_udiv: ident; (**r unsigned division *) + i64_smod: ident; (**r signed remainder *) + i64_umod: ident; (**r unsigned remainder *) + i64_shl: ident; (**r shift left *) + i64_shr: ident; (**r shift right unsigned *) + i64_sar: ident; (**r shift right signed *) + i64_scmp: ident; (**r signed comparison *) + i64_ucmp: ident (**r unsigned comparison *) +}. + +Definition sig_l_l := mksignature (Tlong :: nil) (Some Tlong). +Definition sig_l_f := mksignature (Tlong :: nil) (Some Tfloat). +Definition sig_f_l := mksignature (Tfloat :: nil) (Some Tlong). +Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) (Some Tlong). +Definition sig_li_l := mksignature (Tlong :: Tint :: nil) (Some Tlong). +Definition sig_ll_i := mksignature (Tlong :: Tlong :: nil) (Some Tint). +Definition sig_ii_l := mksignature (Tint :: Tint :: nil) (Some Tlong). + +Section SELECT. + +Variable hf: helper_functions. + +Definition makelong (h l: expr): expr := + Eop Omakelong (h ::: l ::: Enil). + +Nondetfunction splitlong (e: expr) (f: expr -> expr -> expr) := + match e with + | Eop Omakelong (h ::: l ::: Enil) => f h l + | _ => Elet e (f (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil))) + end. + +Nondetfunction splitlong2 (e1 e2: expr) (f: expr -> expr -> expr -> expr -> expr) := + match e1, e2 with + | Eop Omakelong (h1 ::: l1 ::: Enil), Eop Omakelong (h2 ::: l2 ::: Enil) => + f h1 l1 h2 l2 + | Eop Omakelong (h1 ::: l1 ::: Enil), t2 => + Elet t2 (f (lift h1) (lift l1) + (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil))) + | t1, Eop Omakelong (h2 ::: l2 ::: Enil) => + Elet t1 (f (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil)) + (lift h2) (lift l2)) + | _, _ => + Elet e1 (Elet (lift e2) + (f (Eop Ohighlong (Eletvar 1 ::: Enil)) (Eop Olowlong (Eletvar 1 ::: Enil)) + (Eop Ohighlong (Eletvar O ::: Enil)) (Eop Olowlong (Eletvar O ::: Enil)))) + end. + +Nondetfunction lowlong (e: expr) := + match e with + | Eop Omakelong (e1 ::: e2 ::: Enil) => e2 + | _ => Eop Olowlong (e ::: Enil) + end. + +Nondetfunction highlong (e: expr) := + match e with + | Eop Omakelong (e1 ::: e2 ::: Enil) => e1 + | _ => Eop Ohighlong (e ::: Enil) + end. + +Definition longconst (n: int64) : expr := + makelong (Eop (Ointconst (Int64.hiword n)) Enil) + (Eop (Ointconst (Int64.loword n)) Enil). + +Nondetfunction is_longconst (e: expr) := + match e with + | Eop Omakelong (Eop (Ointconst h) Enil ::: Eop (Ointconst l) Enil ::: Enil) => + Some(Int64.ofwords h l) + | _ => + None + end. + +Definition is_longconst_zero (e: expr) := + match is_longconst e with + | Some n => Int64.eq n Int64.zero + | None => false + end. + +Definition intoflong (e: expr) := lowlong e. + +Definition longofint (e: expr) := + Elet e (makelong (shrimm (Eletvar O) (Int.repr 31)) (Eletvar O)). + +Definition longofintu (e: expr) := + makelong (Eop (Ointconst Int.zero) Enil) e. + +Definition negl (e: expr) := + match is_longconst e with + | Some n => longconst (Int64.neg n) + | None => Ebuiltin (EF_builtin hf.(i64_neg) sig_l_l) (e ::: Enil) + end. + +Definition notl (e: expr) := + splitlong e (fun h l => makelong (notint h) (notint l)). + +Definition longoffloat (arg: expr) := + Eexternal hf.(i64_dtos) sig_f_l (arg ::: Enil). +Definition longuoffloat (arg: expr) := + Eexternal hf.(i64_dtou) sig_f_l (arg ::: Enil). +Definition floatoflong (arg: expr) := + Eexternal hf.(i64_stod) sig_l_f (arg ::: Enil). +Definition floatoflongu (arg: expr) := + Eexternal hf.(i64_utod) sig_l_f (arg ::: Enil). + +Definition andl (e1 e2: expr) := + splitlong2 e1 e2 (fun h1 l1 h2 l2 => makelong (and h1 h2) (and l1 l2)). + +Definition orl (e1 e2: expr) := + splitlong2 e1 e2 (fun h1 l1 h2 l2 => makelong (or h1 h2) (or l1 l2)). + +Definition xorl (e1 e2: expr) := + splitlong2 e1 e2 (fun h1 l1 h2 l2 => makelong (xor h1 h2) (xor l1 l2)). + +Definition shllimm (e1: expr) (n: int) := + if Int.eq n Int.zero then e1 else + if Int.ltu n Int.iwordsize then + splitlong e1 (fun h l => + makelong (or (shlimm h n) (shruimm l (Int.sub Int.iwordsize n))) + (shlimm l n)) + else if Int.ltu n Int64.iwordsize' then + makelong (shlimm (lowlong e1) (Int.sub n Int.iwordsize)) + (Eop (Ointconst Int.zero) Enil) + else + Eexternal hf.(i64_shl) sig_li_l (e1 ::: Eop (Ointconst n) Enil ::: Enil). + +Definition shrluimm (e1: expr) (n: int) := + if Int.eq n Int.zero then e1 else + if Int.ltu n Int.iwordsize then + splitlong e1 (fun h l => + makelong (shruimm h n) + (or (shruimm l n) (shlimm h (Int.sub Int.iwordsize n)))) + else if Int.ltu n Int64.iwordsize' then + makelong (Eop (Ointconst Int.zero) Enil) + (shruimm (highlong e1) (Int.sub n Int.iwordsize)) + else + Eexternal hf.(i64_shr) sig_li_l (e1 ::: Eop (Ointconst n) Enil ::: Enil). + +Definition shrlimm (e1: expr) (n: int) := + if Int.eq n Int.zero then e1 else + if Int.ltu n Int.iwordsize then + splitlong e1 (fun h l => + makelong (shrimm h n) + (or (shruimm l n) (shlimm h (Int.sub Int.iwordsize n)))) + else if Int.ltu n Int64.iwordsize' then + Elet (highlong e1) + (makelong (shrimm (Eletvar 0) (Int.repr 31)) + (shrimm (Eletvar 0) (Int.sub n Int.iwordsize))) + else + Eexternal hf.(i64_sar) sig_li_l (e1 ::: Eop (Ointconst n) Enil ::: Enil). + +Definition is_intconst (e: expr) := + match e with + | Eop (Ointconst n) Enil => Some n + | _ => None + end. + +Definition shll (e1 e2: expr) := + match is_intconst e2 with + | Some n => shllimm e1 n + | None => Eexternal hf.(i64_shl) sig_li_l (e1 ::: e2 ::: Enil) + end. + +Definition shrlu (e1 e2: expr) := + match is_intconst e2 with + | Some n => shrluimm e1 n + | None => Eexternal hf.(i64_shr) sig_li_l (e1 ::: e2 ::: Enil) + end. + +Definition shrl (e1 e2: expr) := + match is_intconst e2 with + | Some n => shrlimm e1 n + | None => Eexternal hf.(i64_sar) sig_li_l (e1 ::: e2 ::: Enil) + end. + +Definition addl (e1 e2: expr) := + let default := Ebuiltin (EF_builtin hf.(i64_add) sig_ll_l) (e1 ::: e2 ::: Enil) in + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => longconst (Int64.add n1 n2) + | Some n1, _ => if Int64.eq n1 Int64.zero then e2 else default + | _, Some n2 => if Int64.eq n2 Int64.zero then e1 else default + | _, _ => default + end. + +Definition subl (e1 e2: expr) := + let default := Ebuiltin (EF_builtin hf.(i64_sub) sig_ll_l) (e1 ::: e2 ::: Enil) in + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => longconst (Int64.sub n1 n2) + | Some n1, _ => if Int64.eq n1 Int64.zero then negl e2 else default + | _, Some n2 => if Int64.eq n2 Int64.zero then e1 else default + | _, _ => default + end. + +Definition mull_base (e1 e2: expr) := + splitlong2 e1 e2 (fun h1 l1 h2 l2 => + Elet (Ebuiltin (EF_builtin hf.(i64_mul) sig_ii_l) (l1 ::: l2 ::: Enil)) + (makelong + (add (add (Eop Ohighlong (Eletvar O ::: Enil)) + (mul (lift l1) (lift h2))) + (mul (lift h1) (lift l2))) + (Eop Olowlong (Eletvar O ::: Enil)))). + +Definition mullimm (e: expr) (n: int64) := + if Int64.eq n Int64.zero then longconst Int64.zero else + if Int64.eq n Int64.one then e else + match Int64.is_power2 n with + | Some l => shllimm e (Int.repr (Int64.unsigned l)) + | None => mull_base e (longconst n) + end. + +Definition mull (e1 e2: expr) := + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => longconst (Int64.mul n1 n2) + | Some n1, _ => mullimm e2 n1 + | _, Some n2 => mullimm e1 n2 + | _, _ => mull_base e1 e2 + end. + +Definition binop_long (id: ident) (sem: int64 -> int64 -> int64) (e1 e2: expr) := + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => longconst (sem n1 n2) + | _, _ => Eexternal id sig_ll_l (e1 ::: e2 ::: Enil) + end. + +Definition divl := binop_long hf.(i64_sdiv) Int64.divs. +Definition modl := binop_long hf.(i64_smod) Int64.mods. + +Definition divlu (e1 e2: expr) := + let default := Eexternal hf.(i64_udiv) sig_ll_l (e1 ::: e2 ::: Enil) in + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => longconst (Int64.divu n1 n2) + | _, Some n2 => + match Int64.is_power2 n2 with + | Some l => shrluimm e1 (Int.repr (Int64.unsigned l)) + | None => default + end + | _, _ => default + end. + +Definition modlu (e1 e2: expr) := + let default := Eexternal hf.(i64_umod) sig_ll_l (e1 ::: e2 ::: Enil) in + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => longconst (Int64.modu n1 n2) + | _, Some n2 => + match Int64.is_power2 n2 with + | Some l => andl e1 (longconst (Int64.sub n2 Int64.one)) + | None => default + end + | _, _ => default + end. + +Definition cmpl_eq_zero (e: expr) := + splitlong e (fun h l => comp Ceq (or h l) (Eop (Ointconst Int.zero) Enil)). + +Definition cmpl_ne_zero (e: expr) := + splitlong e (fun h l => comp Cne (or h l) (Eop (Ointconst Int.zero) Enil)). + +Definition cmplu (c: comparison) (e1 e2: expr) := + match c with + | Ceq => + if is_longconst_zero e2 + then cmpl_eq_zero e1 + else cmpl_eq_zero (xorl e1 e2) + | Cne => + if is_longconst_zero e2 + then cmpl_ne_zero e1 + else cmpl_ne_zero (xorl e1 e2) + | _ => + comp c (Eexternal hf.(i64_ucmp) sig_ll_i (e1 ::: e2 ::: Enil)) + (Eop (Ointconst Int.zero) Enil) + end. + +Definition cmpl (c: comparison) (e1 e2: expr) := + let default := + comp c (Eexternal hf.(i64_scmp) sig_ll_i (e1 ::: e2 ::: Enil)) + (Eop (Ointconst Int.zero) Enil) in + match c with + | Ceq => + if is_longconst_zero e2 + then cmpl_eq_zero e1 + else cmpl_eq_zero (xorl e1 e2) + | Cne => + if is_longconst_zero e2 + then cmpl_ne_zero e1 + else cmpl_ne_zero (xorl e1 e2) + | Clt => + if is_longconst_zero e2 + then comp Clt (highlong e1) (Eop (Ointconst Int.zero) Enil) + else default + | Cge => + if is_longconst_zero e2 + then comp Cge (highlong e1) (Eop (Ointconst Int.zero) Enil) + else default + | _ => + default + end. + +End SELECT. + +(** Setting up the helper functions *) + +Require Import Errors. + +Local Open Scope string_scope. +Local Open Scope error_monad_scope. + +Parameter get_helper: Cminor.genv -> String.string -> signature -> res ident. +Parameter get_builtin: String.string -> signature -> res ident. + +Definition get_helpers (ge: Cminor.genv): res helper_functions := + do i64_dtos <- get_helper ge "__i64_dtos" sig_f_l ; + do i64_dtou <- get_helper ge "__i64_dtou" sig_f_l ; + do i64_stod <- get_helper ge "__i64_stod" sig_l_f ; + do i64_utod <- get_helper ge "__i64_utod" sig_l_f ; + do i64_neg <- get_builtin "__builtin_negl" sig_l_l ; + do i64_add <- get_builtin "__builtin_addl" sig_ll_l ; + do i64_sub <- get_builtin "__builtin_subl" sig_ll_l ; + do i64_mul <- get_builtin "__builtin_mull" sig_ll_l ; + do i64_sdiv <- get_helper ge "__i64_sdiv" sig_ll_l ; + do i64_udiv <- get_helper ge "__i64_udiv" sig_ll_l ; + do i64_smod <- get_helper ge "__i64_smod" sig_ll_l ; + do i64_umod <- get_helper ge "__i64_umod" sig_ll_l ; + do i64_shl <- get_helper ge "__i64_shl" sig_li_l ; + do i64_shr <- get_helper ge "__i64_shr" sig_li_l ; + do i64_sar <- get_helper ge "__i64_sar" sig_li_l ; + do i64_scmp <- get_helper ge "__i64_scmp" sig_ll_i ; + do i64_ucmp <- get_helper ge "__i64_ucmp" sig_ll_i ; + OK (mk_helper_functions + i64_dtos i64_dtou i64_stod i64_utod + i64_neg i64_add i64_sub i64_mul i64_sdiv i64_udiv i64_smod i64_umod + i64_shl i64_shr i64_sar + i64_scmp i64_ucmp). diff --git a/backend/SelectLongproof.v b/backend/SelectLongproof.v new file mode 100644 index 0000000..aca05bf --- /dev/null +++ b/backend/SelectLongproof.v @@ -0,0 +1,1063 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Correctness of instruction selection for 64-bit integer operations *) + +Require Import Coqlib. +Require Import AST. +Require Import Errors. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Memory. +Require Import Globalenvs. +Require Import Events. +Require Import Cminor. +Require Import Op. +Require Import CminorSel. +Require Import SelectOp. +Require Import SelectOpproof. +Require Import SelectLong. + +Open Local Scope cminorsel_scope. + +(** * Axiomatization of the helper functions *) + +Section HELPERS. + +Context {F V: Type} (ge: Genv.t (AST.fundef F) V). + +Definition helper_implements (id: ident) (sg: signature) (vargs: list val) (vres: val) : Prop := + exists b, exists ef, + Genv.find_symbol ge id = Some b + /\ Genv.find_funct_ptr ge b = Some (External ef) + /\ ef_sig ef = sg + /\ forall m, external_call ef ge vargs m E0 vres m. + +Definition builtin_implements (id: ident) (sg: signature) (vargs: list val) (vres: val) : Prop := + forall m, external_call (EF_builtin id sg) ge vargs m E0 vres m. + +Definition i64_helpers_correct (hf: helper_functions) : Prop := + (forall x z, Val.longoffloat x = Some z -> helper_implements hf.(i64_dtos) sig_f_l (x::nil) z) + /\(forall x z, Val.longuoffloat x = Some z -> helper_implements hf.(i64_dtou) sig_f_l (x::nil) z) + /\(forall x z, Val.floatoflong x = Some z -> helper_implements hf.(i64_stod) sig_l_f (x::nil) z) + /\(forall x z, Val.floatoflongu x = Some z -> helper_implements hf.(i64_utod) sig_l_f (x::nil) z) + /\(forall x, builtin_implements hf.(i64_neg) sig_l_l (x::nil) (Val.negl x)) + /\(forall x y, builtin_implements hf.(i64_add) sig_ll_l (x::y::nil) (Val.addl x y)) + /\(forall x y, builtin_implements hf.(i64_sub) sig_ll_l (x::y::nil) (Val.subl x y)) + /\(forall x y, builtin_implements hf.(i64_mul) sig_ii_l (x::y::nil) (Val.mull' x y)) + /\(forall x y z, Val.divls x y = Some z -> helper_implements hf.(i64_sdiv) sig_ll_l (x::y::nil) z) + /\(forall x y z, Val.divlu x y = Some z -> helper_implements hf.(i64_udiv) sig_ll_l (x::y::nil) z) + /\(forall x y z, Val.modls x y = Some z -> helper_implements hf.(i64_smod) sig_ll_l (x::y::nil) z) + /\(forall x y z, Val.modlu x y = Some z -> helper_implements hf.(i64_umod) sig_ll_l (x::y::nil) z) + /\(forall x y, helper_implements hf.(i64_shl) sig_li_l (x::y::nil) (Val.shll x y)) + /\(forall x y, helper_implements hf.(i64_shr) sig_li_l (x::y::nil) (Val.shrlu x y)) + /\(forall x y, helper_implements hf.(i64_sar) sig_li_l (x::y::nil) (Val.shrl x y)) + /\(forall c x y, exists z, helper_implements hf.(i64_scmp) sig_ll_i (x::y::nil) z + /\ Val.cmpl c x y = Val.cmp c z Vzero) + /\(forall c x y, exists z, helper_implements hf.(i64_ucmp) sig_ll_i (x::y::nil) z + /\ Val.cmplu c x y = Val.cmp c z Vzero). + +End HELPERS. + +(** * Correctness of the instruction selection functions for 64-bit operators *) + +Section CMCONSTR. + +Variable hf: helper_functions. +Variable ge: genv. +Hypothesis HELPERS: i64_helpers_correct ge hf. +Variable sp: val. +Variable e: env. +Variable m: mem. + +Ltac UseHelper := + red in HELPERS; + repeat (eauto; match goal with | [ H: _ /\ _ |- _ ] => destruct H end). + +Lemma eval_helper: + forall le id sg args vargs vres, + eval_exprlist ge sp e m le args vargs -> + helper_implements ge id sg vargs vres -> + eval_expr ge sp e m le (Eexternal id sg args) vres. +Proof. + intros. destruct H0 as (b & ef & A & B & C & D). econstructor; eauto. +Qed. + +Corollary eval_helper_1: + forall le id sg arg1 varg1 vres, + eval_expr ge sp e m le arg1 varg1 -> + helper_implements ge id sg (varg1::nil) vres -> + eval_expr ge sp e m le (Eexternal id sg (arg1 ::: Enil)) vres. +Proof. + intros. eapply eval_helper; eauto. constructor; auto. constructor. +Qed. + +Corollary eval_helper_2: + forall le id sg arg1 arg2 varg1 varg2 vres, + eval_expr ge sp e m le arg1 varg1 -> + eval_expr ge sp e m le arg2 varg2 -> + helper_implements ge id sg (varg1::varg2::nil) vres -> + eval_expr ge sp e m le (Eexternal id sg (arg1 ::: arg2 ::: Enil)) vres. +Proof. + intros. eapply eval_helper; eauto. constructor; auto. constructor; auto. constructor. +Qed. + +Remark eval_builtin_1: + forall le id sg arg1 varg1 vres, + eval_expr ge sp e m le arg1 varg1 -> + builtin_implements ge id sg (varg1::nil) vres -> + eval_expr ge sp e m le (Ebuiltin (EF_builtin id sg) (arg1 ::: Enil)) vres. +Proof. + intros. econstructor. econstructor. eauto. constructor. apply H0. +Qed. + +Remark eval_builtin_2: + forall le id sg arg1 arg2 varg1 varg2 vres, + eval_expr ge sp e m le arg1 varg1 -> + eval_expr ge sp e m le arg2 varg2 -> + builtin_implements ge id sg (varg1::varg2::nil) vres -> + eval_expr ge sp e m le (Ebuiltin (EF_builtin id sg) (arg1 ::: arg2 ::: Enil)) vres. +Proof. + intros. econstructor. constructor; eauto. constructor; eauto. constructor. apply H1. +Qed. + +Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop := + forall le a x, + eval_expr ge sp e m le a x -> + exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v. + +Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop := + forall le a x b y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v. + +Ltac EvalOp := + eauto; + match goal with + | [ |- eval_exprlist _ _ _ _ _ Enil _ ] => constructor + | [ |- eval_exprlist _ _ _ _ _ (_:::_) _ ] => econstructor; EvalOp + | [ |- eval_expr _ _ _ _ _ (Eletvar _) _ ] => constructor; simpl; eauto + | [ |- eval_expr _ _ _ _ _ (Elet _ _) _ ] => econstructor; EvalOp + | [ |- eval_expr _ _ _ _ _ (lift _) _ ] => apply eval_lift; EvalOp + | [ |- eval_expr _ _ _ _ _ _ _ ] => eapply eval_Eop; [EvalOp | simpl; eauto] + | _ => idtac + end. + +Lemma eval_splitlong: + forall le a f v sem, + (forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (f a b) v /\ + (forall p q, x = Vint p -> y = Vint q -> v = sem (Vlong (Int64.ofwords p q)))) -> + match v with Vlong _ => True | _ => sem v = Vundef end -> + eval_expr ge sp e m le a v -> + exists v', eval_expr ge sp e m le (splitlong a f) v' /\ Val.lessdef (sem v) v'. +Proof. + intros until sem; intros EXEC UNDEF. + unfold splitlong. case (splitlong_match a); intros. +- InvEval. subst v. + exploit EXEC. eexact H2. eexact H3. intros [v' [A B]]. + exists v'; split. auto. + destruct v1; simpl in *; try (rewrite UNDEF; auto). + destruct v0; simpl in *; try (rewrite UNDEF; auto). + erewrite B; eauto. +- exploit (EXEC (v :: le) (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))). + EvalOp. EvalOp. + intros [v' [A B]]. + exists v'; split. econstructor; eauto. + destruct v; try (rewrite UNDEF; auto). erewrite B; simpl; eauto. rewrite Int64.ofwords_recompose. auto. +Qed. + +Lemma eval_splitlong2: + forall le a b f va vb sem, + (forall le a1 a2 b1 b2 x1 x2 y1 y2, + eval_expr ge sp e m le a1 x1 -> + eval_expr ge sp e m le a2 x2 -> + eval_expr ge sp e m le b1 y1 -> + eval_expr ge sp e m le b2 y2 -> + exists v, + eval_expr ge sp e m le (f a1 a2 b1 b2) v /\ + (forall p1 p2 q1 q2, + x1 = Vint p1 -> x2 = Vint p2 -> y1 = Vint q1 -> y2 = Vint q2 -> + v = sem (Vlong (Int64.ofwords p1 p2)) (Vlong (Int64.ofwords q1 q2)))) -> + match va, vb with Vlong _, Vlong _ => True | _, _ => sem va vb = Vundef end -> + eval_expr ge sp e m le a va -> + eval_expr ge sp e m le b vb -> + exists v, eval_expr ge sp e m le (splitlong2 a b f) v /\ Val.lessdef (sem va vb) v. +Proof. + intros until sem; intros EXEC UNDEF. + unfold splitlong2. case (splitlong2_match a); intros. +- InvEval. subst va vb. + exploit (EXEC le h1 l1 h2 l2); eauto. intros [v [A B]]. + exists v; split; auto. + destruct v1; simpl in *; try (rewrite UNDEF; auto). + destruct v0; try (rewrite UNDEF; auto). + destruct v2; simpl in *; try (rewrite UNDEF; auto). + destruct v3; try (rewrite UNDEF; auto). + erewrite B; eauto. +- InvEval. subst va. + exploit (EXEC (vb :: le) (lift h1) (lift l1) + (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))). + EvalOp. EvalOp. EvalOp. EvalOp. + intros [v [A B]]. + exists v; split. + econstructor; eauto. + destruct v1; simpl in *; try (rewrite UNDEF; auto). + destruct v0; try (rewrite UNDEF; auto). + destruct vb; try (rewrite UNDEF; auto). + erewrite B; simpl; eauto. rewrite Int64.ofwords_recompose. auto. +- InvEval. subst vb. + exploit (EXEC (va :: le) + (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil)) + (lift h2) (lift l2)). + EvalOp. EvalOp. EvalOp. EvalOp. + intros [v [A B]]. + exists v; split. + econstructor; eauto. + destruct va; try (rewrite UNDEF; auto). + destruct v1; simpl in *; try (rewrite UNDEF; auto). + destruct v0; try (rewrite UNDEF; auto). + erewrite B; simpl; eauto. rewrite Int64.ofwords_recompose. auto. +- exploit (EXEC (vb :: va :: le) + (Eop Ohighlong (Eletvar 1 ::: Enil)) (Eop Olowlong (Eletvar 1 ::: Enil)) + (Eop Ohighlong (Eletvar 0 ::: Enil)) (Eop Olowlong (Eletvar 0 ::: Enil))). + EvalOp. EvalOp. EvalOp. EvalOp. + intros [v [A B]]. + exists v; split. EvalOp. + destruct va; try (rewrite UNDEF; auto); destruct vb; try (rewrite UNDEF; auto). + erewrite B; simpl; eauto. rewrite ! Int64.ofwords_recompose; auto. +Qed. + +Lemma is_longconst_sound: + forall le a x n, + is_longconst a = Some n -> + eval_expr ge sp e m le a x -> + x = Vlong n. +Proof. + unfold is_longconst; intros until n; intros LC. + destruct (is_longconst_match a); intros. + inv LC. InvEval. simpl in H5. inv H5. auto. + discriminate. +Qed. + +Lemma is_longconst_zero_sound: + forall le a x, + is_longconst_zero a = true -> + eval_expr ge sp e m le a x -> + x = Vlong Int64.zero. +Proof. + unfold is_longconst_zero; intros. + destruct (is_longconst a) as [n|] eqn:E; try discriminate. + revert H. predSpec Int64.eq Int64.eq_spec n Int64.zero. + intros. subst. eapply is_longconst_sound; eauto. + congruence. +Qed. + +Lemma eval_lowlong: unary_constructor_sound lowlong Val.loword. +Proof. + unfold lowlong; red. intros until x. destruct (lowlong_match a); intros. + InvEval. subst x. exists v0; split; auto. + destruct v1; simpl; auto. destruct v0; simpl; auto. + rewrite Int64.lo_ofwords. auto. + exists (Val.loword x); split; auto. EvalOp. +Qed. + +Lemma eval_highlong: unary_constructor_sound highlong Val.hiword. +Proof. + unfold highlong; red. intros until x. destruct (highlong_match a); intros. + InvEval. subst x. exists v1; split; auto. + destruct v1; simpl; auto. destruct v0; simpl; auto. + rewrite Int64.hi_ofwords. auto. + exists (Val.hiword x); split; auto. EvalOp. +Qed. + +Lemma eval_longconst: + forall le n, eval_expr ge sp e m le (longconst n) (Vlong n). +Proof. + intros. EvalOp. rewrite Int64.ofwords_recompose; auto. +Qed. + +Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword. +Proof eval_lowlong. + +Theorem eval_longofintu: unary_constructor_sound longofintu Val.longofintu. +Proof. + red; intros. unfold longofintu. econstructor; split. EvalOp. + unfold Val.longofintu. destruct x; auto. + replace (Int64.repr (Int.unsigned i)) with (Int64.ofwords Int.zero i); auto. + apply Int64.same_bits_eq; intros. + rewrite Int64.testbit_repr by auto. + rewrite Int64.bits_ofwords by auto. + fold (Int.testbit i i0). + destruct (zlt i0 Int.zwordsize). + auto. + rewrite Int.bits_zero. rewrite Int.bits_above by omega. auto. +Qed. + +Theorem eval_longofint: unary_constructor_sound longofint Val.longofint. +Proof. + red; intros. unfold longofint. + exploit (eval_shrimm ge sp e m (Int.repr 31) (x :: le) (Eletvar 0)). EvalOp. + intros [v1 [A B]]. + econstructor; split. EvalOp. + destruct x; simpl; auto. + simpl in B. inv B. simpl. + replace (Int64.repr (Int.signed i)) + with (Int64.ofwords (Int.shr i (Int.repr 31)) i); auto. + apply Int64.same_bits_eq; intros. + rewrite Int64.testbit_repr by auto. + rewrite Int64.bits_ofwords by auto. + rewrite Int.bits_signed by omega. + destruct (zlt i0 Int.zwordsize). + auto. + assert (Int64.zwordsize = 2 * Int.zwordsize) by reflexivity. + rewrite Int.bits_shr by omega. + change (Int.unsigned (Int.repr 31)) with (Int.zwordsize - 1). + f_equal. destruct (zlt (i0 - Int.zwordsize + (Int.zwordsize - 1)) Int.zwordsize); omega. +Qed. + +Theorem eval_negl: unary_constructor_sound (negl hf) Val.negl. +Proof. + unfold negl; red; intros. destruct (is_longconst a) eqn:E. + econstructor; split. apply eval_longconst. + exploit is_longconst_sound; eauto. intros EQ; subst x. simpl. auto. + econstructor; split. eapply eval_builtin_1; eauto. UseHelper. auto. +Qed. + +Theorem eval_notl: unary_constructor_sound notl Val.notl. +Proof. + red; intros. unfold notl. apply eval_splitlong; auto. + intros. + exploit eval_notint. eexact H0. intros [va [A B]]. + exploit eval_notint. eexact H1. intros [vb [C D]]. + exists (Val.longofwords va vb); split. EvalOp. + intros; subst. simpl in *. inv B; inv D. + simpl. unfold Int.not. rewrite <- Int64.decompose_xor. auto. + destruct x; auto. +Qed. + +Theorem eval_longoffloat: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.longoffloat x = Some y -> + exists v, eval_expr ge sp e m le (longoffloat hf a) v /\ Val.lessdef y v. +Proof. + intros; unfold longoffloat. econstructor; split. + eapply eval_helper_1; eauto. UseHelper. + auto. +Qed. + +Theorem eval_longuoffloat: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.longuoffloat x = Some y -> + exists v, eval_expr ge sp e m le (longuoffloat hf a) v /\ Val.lessdef y v. +Proof. + intros; unfold longuoffloat. econstructor; split. + eapply eval_helper_1; eauto. UseHelper. + auto. +Qed. + +Theorem eval_floatoflong: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.floatoflong x = Some y -> + exists v, eval_expr ge sp e m le (floatoflong hf a) v /\ Val.lessdef y v. +Proof. + intros; unfold floatoflong. econstructor; split. + eapply eval_helper_1; eauto. UseHelper. + auto. +Qed. + +Theorem eval_floatoflongu: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.floatoflongu x = Some y -> + exists v, eval_expr ge sp e m le (floatoflongu hf a) v /\ Val.lessdef y v. +Proof. + intros; unfold floatoflongu. econstructor; split. + eapply eval_helper_1; eauto. UseHelper. + auto. +Qed. + +Theorem eval_andl: binary_constructor_sound andl Val.andl. +Proof. + red; intros. unfold andl. apply eval_splitlong2; auto. + intros. + exploit eval_and. eexact H1. eexact H3. intros [va [A B]]. + exploit eval_and. eexact H2. eexact H4. intros [vb [C D]]. + exists (Val.longofwords va vb); split. EvalOp. + intros; subst. simpl in B; inv B. simpl in D; inv D. + simpl. f_equal. rewrite Int64.decompose_and. auto. + destruct x; auto. destruct y; auto. +Qed. + +Theorem eval_orl: binary_constructor_sound orl Val.orl. +Proof. + red; intros. unfold orl. apply eval_splitlong2; auto. + intros. + exploit eval_or. eexact H1. eexact H3. intros [va [A B]]. + exploit eval_or. eexact H2. eexact H4. intros [vb [C D]]. + exists (Val.longofwords va vb); split. EvalOp. + intros; subst. simpl in B; inv B. simpl in D; inv D. + simpl. f_equal. rewrite Int64.decompose_or. auto. + destruct x; auto. destruct y; auto. +Qed. + +Theorem eval_xorl: binary_constructor_sound xorl Val.xorl. +Proof. + red; intros. unfold xorl. apply eval_splitlong2; auto. + intros. + exploit eval_xor. eexact H1. eexact H3. intros [va [A B]]. + exploit eval_xor. eexact H2. eexact H4. intros [vb [C D]]. + exists (Val.longofwords va vb); split. EvalOp. + intros; subst. simpl in B; inv B. simpl in D; inv D. + simpl. f_equal. rewrite Int64.decompose_xor. auto. + destruct x; auto. destruct y; auto. +Qed. + +Lemma is_intconst_sound: + forall le a x n, + is_intconst a = Some n -> + eval_expr ge sp e m le a x -> + x = Vint n. +Proof. + unfold is_intconst; intros until n; intros LC. + destruct a; try discriminate. destruct o; try discriminate. destruct e0; try discriminate. + inv LC. intros. InvEval. auto. +Qed. + +Remark eval_shift_imm: + forall (P: expr -> Prop) n a0 a1 a2 a3, + (n = Int.zero -> P a0) -> + (0 <= Int.unsigned n < Int.zwordsize -> + Int.ltu n Int.iwordsize = true -> + Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize = true -> + Int.ltu n Int64.iwordsize' = true -> + P a1) -> + (Int.zwordsize <= Int.unsigned n < Int64.zwordsize -> + Int.ltu (Int.sub n Int.iwordsize) Int.iwordsize = true -> + P a2) -> + P a3 -> + P (if Int.eq n Int.zero then a0 + else if Int.ltu n Int.iwordsize then a1 + else if Int.ltu n Int64.iwordsize' then a2 + else a3). +Proof. + intros until a3; intros A0 A1 A2 A3. + predSpec Int.eq Int.eq_spec n Int.zero. + apply A0; auto. + assert (NZ: Int.unsigned n <> 0). + { red; intros. elim H. rewrite <- (Int.repr_unsigned n). rewrite H0. auto. } + destruct (Int.ltu n Int.iwordsize) eqn:LT. + exploit Int.ltu_iwordsize_inv; eauto. intros RANGE. + assert (0 <= Int.zwordsize - Int.unsigned n < Int.zwordsize) by omega. + apply A1. auto. auto. + unfold Int.ltu, Int.sub. rewrite Int.unsigned_repr_wordsize. + rewrite Int.unsigned_repr. rewrite zlt_true; auto. omega. + generalize Int.wordsize_max_unsigned; omega. + unfold Int.ltu. rewrite zlt_true; auto. + change (Int.unsigned Int64.iwordsize') with 64. + change Int.zwordsize with 32 in RANGE. omega. + destruct (Int.ltu n Int64.iwordsize') eqn:LT'. + exploit Int.ltu_inv; eauto. + change (Int.unsigned Int64.iwordsize') with (Int.zwordsize * 2). + intros RANGE. + assert (Int.zwordsize <= Int.unsigned n). + unfold Int.ltu in LT. rewrite Int.unsigned_repr_wordsize in LT. + destruct (zlt (Int.unsigned n) Int.zwordsize). discriminate. omega. + apply A2. tauto. unfold Int.ltu, Int.sub. rewrite Int.unsigned_repr_wordsize. + rewrite Int.unsigned_repr. rewrite zlt_true; auto. omega. + generalize Int.wordsize_max_unsigned; omega. + auto. +Qed. + +Lemma eval_shllimm: + forall n, + unary_constructor_sound (fun e => shllimm hf e n) (fun v => Val.shll v (Vint n)). +Proof. + unfold shllimm; red; intros. + apply eval_shift_imm; intros. + + (* n = 0 *) + subst n. exists x; split; auto. destruct x; simpl; auto. + change (Int64.shl' i Int.zero) with (Int64.shl i Int64.zero). + rewrite Int64.shl_zero. auto. + + (* 0 < n < 32 *) + apply eval_splitlong with (sem := fun x => Val.shll x (Vint n)); auto. + intros. + exploit eval_shlimm. eexact H4. instantiate (1 := n). intros [v1 [A1 B1]]. + exploit eval_shlimm. eexact H5. instantiate (1 := n). intros [v2 [A2 B2]]. + exploit eval_shruimm. eexact H5. instantiate (1 := Int.sub Int.iwordsize n). intros [v3 [A3 B3]]. + exploit eval_or. eexact A1. eexact A3. intros [v4 [A4 B4]]. + econstructor; split. EvalOp. + intros. subst. simpl in *. rewrite H1 in *. rewrite H2 in *. rewrite H3. + inv B1; inv B2; inv B3. simpl in B4. inv B4. + simpl. rewrite Int64.decompose_shl_1; auto. + destruct x; auto. + + (* 32 <= n < 64 *) + exploit eval_lowlong. eexact H. intros [v1 [A1 B1]]. + exploit eval_shlimm. eexact A1. instantiate (1 := Int.sub n Int.iwordsize). intros [v2 [A2 B2]]. + econstructor; split. EvalOp. + destruct x; simpl; auto. + destruct (Int.ltu n Int64.iwordsize'); auto. + simpl in B1; inv B1. simpl in B2. rewrite H1 in B2. inv B2. + simpl. erewrite <- Int64.decompose_shl_2. instantiate (1 := Int64.hiword i). + rewrite Int64.ofwords_recompose. auto. auto. + + (* n >= 64 *) + econstructor; split. eapply eval_helper_2; eauto. EvalOp. UseHelper. auto. +Qed. + +Theorem eval_shll: binary_constructor_sound (shll hf) Val.shll. +Proof. + unfold shll; red; intros. + destruct (is_intconst b) as [n|] eqn:IC. +- (* Immediate *) + exploit is_intconst_sound; eauto. intros EQ; subst y; clear H0. + eapply eval_shllimm; eauto. +- (* General case *) + econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. +Qed. + +Lemma eval_shrluimm: + forall n, + unary_constructor_sound (fun e => shrluimm hf e n) (fun v => Val.shrlu v (Vint n)). +Proof. + unfold shrluimm; red; intros. apply eval_shift_imm; intros. + + (* n = 0 *) + subst n. exists x; split; auto. destruct x; simpl; auto. + change (Int64.shru' i Int.zero) with (Int64.shru i Int64.zero). + rewrite Int64.shru_zero. auto. + + (* 0 < n < 32 *) + apply eval_splitlong with (sem := fun x => Val.shrlu x (Vint n)); auto. + intros. + exploit eval_shruimm. eexact H5. instantiate (1 := n). intros [v1 [A1 B1]]. + exploit eval_shruimm. eexact H4. instantiate (1 := n). intros [v2 [A2 B2]]. + exploit eval_shlimm. eexact H4. instantiate (1 := Int.sub Int.iwordsize n). intros [v3 [A3 B3]]. + exploit eval_or. eexact A1. eexact A3. intros [v4 [A4 B4]]. + econstructor; split. EvalOp. + intros. subst. simpl in *. rewrite H1 in *. rewrite H2 in *. rewrite H3. + inv B1; inv B2; inv B3. simpl in B4. inv B4. + simpl. rewrite Int64.decompose_shru_1; auto. + destruct x; auto. + + (* 32 <= n < 64 *) + exploit eval_highlong. eexact H. intros [v1 [A1 B1]]. + exploit eval_shruimm. eexact A1. instantiate (1 := Int.sub n Int.iwordsize). intros [v2 [A2 B2]]. + econstructor; split. EvalOp. + destruct x; simpl; auto. + destruct (Int.ltu n Int64.iwordsize'); auto. + simpl in B1; inv B1. simpl in B2. rewrite H1 in B2. inv B2. + simpl. erewrite <- Int64.decompose_shru_2. instantiate (1 := Int64.loword i). + rewrite Int64.ofwords_recompose. auto. auto. + + (* n >= 64 *) + econstructor; split. eapply eval_helper_2; eauto. EvalOp. UseHelper. auto. +Qed. + +Theorem eval_shrlu: binary_constructor_sound (shrlu hf) Val.shrlu. +Proof. + unfold shrlu; red; intros. + destruct (is_intconst b) as [n|] eqn:IC. +- (* Immediate *) + exploit is_intconst_sound; eauto. intros EQ; subst y; clear H0. + eapply eval_shrluimm; eauto. +- (* General case *) + econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. +Qed. + +Lemma eval_shrlimm: + forall n, + unary_constructor_sound (fun e => shrlimm hf e n) (fun v => Val.shrl v (Vint n)). +Proof. + unfold shrlimm; red; intros. apply eval_shift_imm; intros. + + (* n = 0 *) + subst n. exists x; split; auto. destruct x; simpl; auto. + change (Int64.shr' i Int.zero) with (Int64.shr i Int64.zero). + rewrite Int64.shr_zero. auto. + + (* 0 < n < 32 *) + apply eval_splitlong with (sem := fun x => Val.shrl x (Vint n)); auto. + intros. + exploit eval_shruimm. eexact H5. instantiate (1 := n). intros [v1 [A1 B1]]. + exploit eval_shrimm. eexact H4. instantiate (1 := n). intros [v2 [A2 B2]]. + exploit eval_shlimm. eexact H4. instantiate (1 := Int.sub Int.iwordsize n). intros [v3 [A3 B3]]. + exploit eval_or. eexact A1. eexact A3. intros [v4 [A4 B4]]. + econstructor; split. EvalOp. + intros. subst. simpl in *. rewrite H1 in *. rewrite H2 in *. rewrite H3. + inv B1; inv B2; inv B3. simpl in B4. inv B4. + simpl. rewrite Int64.decompose_shr_1; auto. + destruct x; auto. + + (* 32 <= n < 64 *) + exploit eval_highlong. eexact H. intros [v1 [A1 B1]]. + assert (eval_expr ge sp e m (v1 :: le) (Eletvar 0) v1) by EvalOp. + exploit eval_shrimm. eexact H2. instantiate (1 := Int.sub n Int.iwordsize). intros [v2 [A2 B2]]. + exploit eval_shrimm. eexact H2. instantiate (1 := Int.repr 31). intros [v3 [A3 B3]]. + econstructor; split. EvalOp. + destruct x; simpl; auto. + destruct (Int.ltu n Int64.iwordsize'); auto. + simpl in B1; inv B1. simpl in B2. rewrite H1 in B2. inv B2. + simpl in B3. inv B3. + change (Int.ltu (Int.repr 31) Int.iwordsize) with true. simpl. + erewrite <- Int64.decompose_shr_2. instantiate (1 := Int64.loword i). + rewrite Int64.ofwords_recompose. auto. auto. + + (* n >= 64 *) + econstructor; split. eapply eval_helper_2; eauto. EvalOp. UseHelper. auto. +Qed. + +Theorem eval_shrl: binary_constructor_sound (shrl hf) Val.shrl. +Proof. + unfold shrl; red; intros. + destruct (is_intconst b) as [n|] eqn:IC. +- (* Immediate *) + exploit is_intconst_sound; eauto. intros EQ; subst y; clear H0. + eapply eval_shrlimm; eauto. +- (* General case *) + econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. +Qed. + +Theorem eval_addl: binary_constructor_sound (addl hf) Val.addl. +Proof. + unfold addl; red; intros. + set (default := Ebuiltin (EF_builtin (i64_add hf) sig_ll_l) (a ::: b ::: Enil)). + assert (DEFAULT: + exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.addl x y) v). + { + econstructor; split. eapply eval_builtin_2; eauto. UseHelper. auto. + } + destruct (is_longconst a) as [p|] eqn:LC1; + destruct (is_longconst b) as [q|] eqn:LC2. +- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + econstructor; split. apply eval_longconst. simpl; auto. +- predSpec Int64.eq Int64.eq_spec p Int64.zero; auto. + subst p. exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + exists y; split; auto. simpl. destruct y; auto. rewrite Int64.add_zero_l; auto. +- predSpec Int64.eq Int64.eq_spec q Int64.zero; auto. + subst q. exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + exists x; split; auto. destruct x; simpl; auto. rewrite Int64.add_zero; auto. +- auto. +Qed. + +Theorem eval_subl: binary_constructor_sound (subl hf) Val.subl. +Proof. + unfold subl; red; intros. + set (default := Ebuiltin (EF_builtin (i64_sub hf) sig_ll_l) (a ::: b ::: Enil)). + assert (DEFAULT: + exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.subl x y) v). + { + econstructor; split. eapply eval_builtin_2; eauto. UseHelper. auto. + } + destruct (is_longconst a) as [p|] eqn:LC1; + destruct (is_longconst b) as [q|] eqn:LC2. +- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + econstructor; split. apply eval_longconst. simpl; auto. +- predSpec Int64.eq Int64.eq_spec p Int64.zero; auto. + replace (Val.subl x y) with (Val.negl y). eapply eval_negl; eauto. + subst p. exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + destruct y; simpl; auto. +- predSpec Int64.eq Int64.eq_spec q Int64.zero; auto. + subst q. exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + exists x; split; auto. destruct x; simpl; auto. rewrite Int64.sub_zero_l; auto. +- auto. +Qed. + +Lemma eval_mull_base: binary_constructor_sound (mull_base hf) Val.mull. +Proof. + unfold mull_base; red; intros. apply eval_splitlong2; auto. +- intros. + set (p := Val.mull' x2 y2). set (le1 := p :: le0). + assert (E1: eval_expr ge sp e m le1 (Eop Olowlong (Eletvar O ::: Enil)) (Val.loword p)) by EvalOp. + assert (E2: eval_expr ge sp e m le1 (Eop Ohighlong (Eletvar O ::: Enil)) (Val.hiword p)) by EvalOp. + exploit eval_mul. apply eval_lift. eexact H2. apply eval_lift. eexact H3. + instantiate (1 := p). fold le1. intros [v3 [E3 L3]]. + exploit eval_mul. apply eval_lift. eexact H1. apply eval_lift. eexact H4. + instantiate (1 := p). fold le1. intros [v4 [E4 L4]]. + exploit eval_add. eexact E2. eexact E3. intros [v5 [E5 L5]]. + exploit eval_add. eexact E5. eexact E4. intros [v6 [E6 L6]]. + exists (Val.longofwords v6 (Val.loword p)); split. + EvalOp. eapply eval_builtin_2; eauto. UseHelper. + intros. unfold le1, p in *; subst; simpl in *. + inv L3. inv L4. inv L5. simpl in L6. inv L6. + simpl. f_equal. symmetry. apply Int64.decompose_mul. +- destruct x; auto; destruct y; auto. +Qed. + +Lemma eval_mullimm: + forall n, unary_constructor_sound (fun a => mullimm hf a n) (fun v => Val.mull v (Vlong n)). +Proof. + unfold mullimm; red; intros. + predSpec Int64.eq Int64.eq_spec n Int64.zero. + subst n. econstructor; split. apply eval_longconst. + destruct x; simpl; auto. rewrite Int64.mul_zero. auto. + predSpec Int64.eq Int64.eq_spec n Int64.one. + subst n. exists x; split; auto. + destruct x; simpl; auto. rewrite Int64.mul_one. auto. + destruct (Int64.is_power2 n) as [l|] eqn:P2. + exploit eval_shllimm. eauto. instantiate (1 := Int.repr (Int64.unsigned l)). + intros [v [A B]]. + exists v; split; auto. + destruct x; simpl; auto. + erewrite Int64.mul_pow2 by eauto. + assert (EQ: Int.unsigned (Int.repr (Int64.unsigned l)) = Int64.unsigned l). + { apply Int.unsigned_repr. + exploit Int64.is_power2_rng; eauto. + assert (Int64.zwordsize < Int.max_unsigned) by (compute; auto). + omega. + } + simpl in B. + replace (Int.ltu (Int.repr (Int64.unsigned l)) Int64.iwordsize') + with (Int64.ltu l Int64.iwordsize) in B. + erewrite Int64.is_power2_range in B by eauto. + unfold Int64.shl' in B. rewrite EQ in B. auto. + unfold Int64.ltu, Int.ltu. rewrite EQ. auto. + apply eval_mull_base; auto. apply eval_longconst. +Qed. + +Theorem eval_mull: binary_constructor_sound (mull hf) Val.mull. +Proof. + unfold mull; red; intros. + destruct (is_longconst a) as [p|] eqn:LC1; + destruct (is_longconst b) as [q|] eqn:LC2. +- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + econstructor; split. apply eval_longconst. simpl; auto. +- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + replace (Val.mull (Vlong p) y) with (Val.mull y (Vlong p)) in *. + eapply eval_mullimm; eauto. + destruct y; simpl; auto. rewrite Int64.mul_commut; auto. +- exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + eapply eval_mullimm; eauto. +- apply eval_mull_base; auto. +Qed. + +Lemma eval_binop_long: + forall id sem le a b x y z, + (forall p q, x = Vlong p -> y = Vlong q -> z = Vlong (sem p q)) -> + helper_implements ge id sig_ll_l (x::y::nil) z -> + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (binop_long id sem a b) v /\ Val.lessdef z v. +Proof. + intros. unfold binop_long. + destruct (is_longconst a) as [p|] eqn:LC1. + destruct (is_longconst b) as [q|] eqn:LC2. + exploit is_longconst_sound. eexact LC1. eauto. intros EQ; subst x. + exploit is_longconst_sound. eexact LC2. eauto. intros EQ; subst y. + econstructor; split. EvalOp. erewrite H by eauto. rewrite Int64.ofwords_recompose. auto. + econstructor; split. eapply eval_helper_2; eauto. auto. + econstructor; split. eapply eval_helper_2; eauto. auto. +Qed. + +Theorem eval_divl: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.divls x y = Some z -> + exists v, eval_expr ge sp e m le (divl hf a b) v /\ Val.lessdef z v. +Proof. + intros. eapply eval_binop_long; eauto. + intros; subst; simpl in H1. + destruct (Int64.eq q Int64.zero + || Int64.eq p (Int64.repr Int64.min_signed) && Int64.eq q Int64.mone); inv H1. + auto. + UseHelper. +Qed. + +Theorem eval_modl: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.modls x y = Some z -> + exists v, eval_expr ge sp e m le (modl hf a b) v /\ Val.lessdef z v. +Proof. + intros. eapply eval_binop_long; eauto. + intros; subst; simpl in H1. + destruct (Int64.eq q Int64.zero + || Int64.eq p (Int64.repr Int64.min_signed) && Int64.eq q Int64.mone); inv H1. + auto. + UseHelper. +Qed. + +Theorem eval_divlu: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.divlu x y = Some z -> + exists v, eval_expr ge sp e m le (divlu hf a b) v /\ Val.lessdef z v. +Proof. + intros. unfold divlu. + set (default := Eexternal (i64_udiv hf) sig_ll_l (a ::: b ::: Enil)). + assert (DEFAULT: + exists v, eval_expr ge sp e m le default v /\ Val.lessdef z v). + { + econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. + } + destruct (is_longconst a) as [p|] eqn:LC1; + destruct (is_longconst b) as [q|] eqn:LC2. +- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + econstructor; split. apply eval_longconst. + simpl in H1. destruct (Int64.eq q Int64.zero); inv H1. auto. +- auto. +- destruct (Int64.is_power2 q) as [l|] eqn:P2; auto. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + replace z with (Val.shrlu x (Vint (Int.repr (Int64.unsigned l)))). + apply eval_shrluimm. auto. + destruct x; simpl in H1; try discriminate. + destruct (Int64.eq q Int64.zero); inv H1. + simpl. + assert (EQ: Int.unsigned (Int.repr (Int64.unsigned l)) = Int64.unsigned l). + { apply Int.unsigned_repr. + exploit Int64.is_power2_rng; eauto. + assert (Int64.zwordsize < Int.max_unsigned) by (compute; auto). + omega. + } + replace (Int.ltu (Int.repr (Int64.unsigned l)) Int64.iwordsize') + with (Int64.ltu l Int64.iwordsize). + erewrite Int64.is_power2_range by eauto. + erewrite Int64.divu_pow2 by eauto. + unfold Int64.shru', Int64.shru. rewrite EQ. auto. + unfold Int64.ltu, Int.ltu. rewrite EQ. auto. +- auto. +Qed. + +Theorem eval_modlu: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.modlu x y = Some z -> + exists v, eval_expr ge sp e m le (modlu hf a b) v /\ Val.lessdef z v. +Proof. + intros. unfold modlu. + set (default := Eexternal (i64_umod hf) sig_ll_l (a ::: b ::: Enil)). + assert (DEFAULT: + exists v, eval_expr ge sp e m le default v /\ Val.lessdef z v). + { + econstructor; split. eapply eval_helper_2; eauto. UseHelper. auto. + } + destruct (is_longconst a) as [p|] eqn:LC1; + destruct (is_longconst b) as [q|] eqn:LC2. +- exploit (is_longconst_sound le a); eauto. intros EQ; subst x. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + econstructor; split. apply eval_longconst. + simpl in H1. destruct (Int64.eq q Int64.zero); inv H1. auto. +- auto. +- destruct (Int64.is_power2 q) as [l|] eqn:P2; auto. + exploit (is_longconst_sound le b); eauto. intros EQ; subst y. + replace z with (Val.andl x (Vlong (Int64.sub q Int64.one))). + apply eval_andl. auto. apply eval_longconst. + destruct x; simpl in H1; try discriminate. + destruct (Int64.eq q Int64.zero); inv H1. + simpl. + erewrite Int64.modu_and by eauto. auto. +- auto. +Qed. + +Remark decompose_cmpl_eq_zero: + forall h l, + Int64.eq (Int64.ofwords h l) Int64.zero = Int.eq (Int.or h l) Int.zero. +Proof. + intros. + assert (Int64.zwordsize = Int.zwordsize * 2) by reflexivity. + predSpec Int64.eq Int64.eq_spec (Int64.ofwords h l) Int64.zero. + replace (Int.or h l) with Int.zero. rewrite Int.eq_true. auto. + apply Int.same_bits_eq; intros. + rewrite Int.bits_zero. rewrite Int.bits_or by auto. + symmetry. apply orb_false_intro. + transitivity (Int64.testbit (Int64.ofwords h l) (i + Int.zwordsize)). + rewrite Int64.bits_ofwords by omega. rewrite zlt_false by omega. f_equal; omega. + rewrite H0. apply Int64.bits_zero. + transitivity (Int64.testbit (Int64.ofwords h l) i). + rewrite Int64.bits_ofwords by omega. rewrite zlt_true by omega. auto. + rewrite H0. apply Int64.bits_zero. + symmetry. apply Int.eq_false. red; intros; elim H0. + apply Int64.same_bits_eq; intros. + rewrite Int64.bits_zero. rewrite Int64.bits_ofwords by auto. + destruct (zlt i Int.zwordsize). + assert (Int.testbit (Int.or h l) i = false) by (rewrite H1; apply Int.bits_zero). + rewrite Int.bits_or in H3 by omega. exploit orb_false_elim; eauto. tauto. + assert (Int.testbit (Int.or h l) (i - Int.zwordsize) = false) by (rewrite H1; apply Int.bits_zero). + rewrite Int.bits_or in H3 by omega. exploit orb_false_elim; eauto. tauto. +Qed. + +Lemma eval_cmpl_eq_zero: + unary_constructor_sound cmpl_eq_zero (fun v => Val.cmpl Ceq v (Vlong Int64.zero)). +Proof. + red; intros. unfold cmpl_eq_zero. + apply eval_splitlong with (sem := fun x => Val.cmpl Ceq x (Vlong Int64.zero)); auto. +- intros. + exploit eval_or. eexact H0. eexact H1. intros [v1 [A1 B1]]. + exploit eval_comp. eexact A1. instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. + instantiate (1 := Ceq). intros [v2 [A2 B2]]. + exists v2; split. auto. intros; subst. + simpl in B1; inv B1. unfold Val.cmp in B2; simpl in B2. + unfold Val.cmpl; simpl. rewrite decompose_cmpl_eq_zero. + destruct (Int.eq (Int.or p q)); inv B2; auto. +- destruct x; auto. +Qed. + +Lemma eval_cmpl_ne_zero: + unary_constructor_sound cmpl_ne_zero (fun v => Val.cmpl Cne v (Vlong Int64.zero)). +Proof. + red; intros. unfold cmpl_eq_zero. + apply eval_splitlong with (sem := fun x => Val.cmpl Cne x (Vlong Int64.zero)); auto. +- intros. + exploit eval_or. eexact H0. eexact H1. intros [v1 [A1 B1]]. + exploit eval_comp. eexact A1. instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. + instantiate (1 := Cne). intros [v2 [A2 B2]]. + exists v2; split. auto. intros; subst. + simpl in B1; inv B1. unfold Val.cmp in B2; simpl in B2. + unfold Val.cmpl; simpl. rewrite decompose_cmpl_eq_zero. + destruct (Int.eq (Int.or p q)); inv B2; auto. +- destruct x; auto. +Qed. + +Remark int64_eq_xor: + forall p q, Int64.eq p q = Int64.eq (Int64.xor p q) Int64.zero. +Proof. + intros. + predSpec Int64.eq Int64.eq_spec p q. + subst q. rewrite Int64.xor_idem. rewrite Int64.eq_true. auto. + predSpec Int64.eq Int64.eq_spec (Int64.xor p q) Int64.zero. + elim H. apply Int64.xor_zero_equal; auto. + auto. +Qed. + +Theorem eval_cmplu: forall c, binary_constructor_sound (cmplu hf c) (Val.cmplu c). +Proof. + intros; red; intros. unfold cmplu. + set (default := comp c (Eexternal (i64_ucmp hf) sig_ll_i (a ::: b ::: Enil)) + (Eop (Ointconst Int.zero) Enil)). + assert (DEFAULT: + exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.cmplu c x y) v). + { + assert (HELP: exists z, helper_implements ge hf.(i64_ucmp) sig_ll_i (x::y::nil) z + /\ Val.cmplu c x y = Val.cmp c z Vzero) + by UseHelper. + destruct HELP as [z [A B]]. + exploit eval_comp. eapply eval_helper_2. eexact H. eexact H0. eauto. + instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. + instantiate (1 := c). fold Vzero. intros [v [C D]]. + econstructor; split; eauto. congruence. + } + destruct c; auto. +- (* Ceq *) + destruct (is_longconst_zero b) eqn:LC. ++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0. + apply eval_cmpl_eq_zero; auto. ++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]]. + exploit eval_cmpl_eq_zero. eexact A. intros [v' [C D]]. + exists v'; split; auto. + eapply Val.lessdef_trans; [idtac|eexact D]. + destruct x; auto. destruct y; auto. simpl in B; inv B. + unfold Val.cmplu, Val.cmpl; simpl. rewrite int64_eq_xor; auto. +- (* Cne *) + destruct (is_longconst_zero b) eqn:LC. ++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0. + apply eval_cmpl_ne_zero; auto. ++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]]. + exploit eval_cmpl_ne_zero. eexact A. intros [v' [C D]]. + exists v'; split; auto. + eapply Val.lessdef_trans; [idtac|eexact D]. + destruct x; auto. destruct y; auto. simpl in B; inv B. + unfold Val.cmplu, Val.cmpl; simpl. rewrite int64_eq_xor; auto. +Qed. + +Remark decompose_cmpl_lt_zero: + forall h l, + Int64.lt (Int64.ofwords h l) Int64.zero = Int.lt h Int.zero. +Proof. + intros. + generalize (Int64.shru_lt_zero (Int64.ofwords h l)). + change (Int64.shru (Int64.ofwords h l) (Int64.repr (Int64.zwordsize - 1))) + with (Int64.shru' (Int64.ofwords h l) (Int.repr 63)). + rewrite Int64.decompose_shru_2. + change (Int.sub (Int.repr 63) Int.iwordsize) + with (Int.repr (Int.zwordsize - 1)). + rewrite Int.shru_lt_zero. + destruct (Int64.lt (Int64.ofwords h l) Int64.zero); destruct (Int.lt h Int.zero); auto; intros. + elim Int64.one_not_zero. auto. + elim Int64.one_not_zero. auto. + vm_compute. intuition congruence. +Qed. + +Theorem eval_cmpl: forall c, binary_constructor_sound (cmpl hf c) (Val.cmpl c). +Proof. + intros; red; intros. unfold cmpl. + set (default := comp c (Eexternal (i64_scmp hf) sig_ll_i (a ::: b ::: Enil)) + (Eop (Ointconst Int.zero) Enil)). + assert (DEFAULT: + exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.cmpl c x y) v). + { + assert (HELP: exists z, helper_implements ge hf.(i64_scmp) sig_ll_i (x::y::nil) z + /\ Val.cmpl c x y = Val.cmp c z Vzero) + by UseHelper. + destruct HELP as [z [A B]]. + exploit eval_comp. eapply eval_helper_2. eexact H. eexact H0. eauto. + instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. + instantiate (1 := c). fold Vzero. intros [v [C D]]. + econstructor; split; eauto. congruence. + } + destruct c; auto. +- (* Ceq *) + destruct (is_longconst_zero b) eqn:LC. ++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0. + apply eval_cmpl_eq_zero; auto. ++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]]. + exploit eval_cmpl_eq_zero. eexact A. intros [v' [C D]]. + exists v'; split; auto. + eapply Val.lessdef_trans; [idtac|eexact D]. + destruct x; auto. destruct y; auto. simpl in B; inv B. + unfold Val.cmpl; simpl. rewrite int64_eq_xor; auto. +- (* Cne *) + destruct (is_longconst_zero b) eqn:LC. ++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0. + apply eval_cmpl_ne_zero; auto. ++ exploit eval_xorl. eexact H. eexact H0. intros [v [A B]]. + exploit eval_cmpl_ne_zero. eexact A. intros [v' [C D]]. + exists v'; split; auto. + eapply Val.lessdef_trans; [idtac|eexact D]. + destruct x; auto. destruct y; auto. simpl in B; inv B. + unfold Val.cmpl; simpl. rewrite int64_eq_xor; auto. +- (* Clt *) + destruct (is_longconst_zero b) eqn:LC. ++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0. + exploit eval_highlong. eexact H. intros [v1 [A1 B1]]. + exploit eval_comp. eexact A1. + instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. + instantiate (1 := Clt). intros [v2 [A2 B2]]. + econstructor; split. eauto. + destruct x; simpl in *; auto. inv B1. + unfold Val.cmp, Val.cmp_bool, Val.of_optbool, Int.cmp in B2. + unfold Val.cmpl, Val.cmpl_bool, Val.of_optbool, Int64.cmp. + rewrite <- (Int64.ofwords_recompose i). rewrite decompose_cmpl_lt_zero. + auto. ++ apply DEFAULT. +- (* Cge *) + destruct (is_longconst_zero b) eqn:LC. ++ exploit is_longconst_zero_sound; eauto. intros EQ; subst; clear H0. + exploit eval_highlong. eexact H. intros [v1 [A1 B1]]. + exploit eval_comp. eexact A1. + instantiate (2 := Eop (Ointconst Int.zero) Enil). EvalOp. + instantiate (1 := Cge). intros [v2 [A2 B2]]. + econstructor; split. eauto. + destruct x; simpl in *; auto. inv B1. + unfold Val.cmp, Val.cmp_bool, Val.of_optbool, Int.cmp in B2. + unfold Val.cmpl, Val.cmpl_bool, Val.of_optbool, Int64.cmp. + rewrite <- (Int64.ofwords_recompose i). rewrite decompose_cmpl_lt_zero. + auto. ++ apply DEFAULT. +Qed. + +End CMCONSTR. + diff --git a/backend/Selection.v b/backend/Selection.v index 29bdabc..7964feb 100644 --- a/backend/Selection.v +++ b/backend/Selection.v @@ -24,12 +24,14 @@ Require Import Coqlib. Require Import AST. +Require Import Errors. Require Import Integers. Require Import Globalenvs. Require Cminor. Require Import Op. Require Import CminorSel. Require Import SelectOp. +Require Import SelectLong. Open Local Scope cminorsel_scope. @@ -55,12 +57,17 @@ Definition store (chunk: memory_chunk) (e1 e2: expr) := (** Instruction selection for operator applications. Most of the work is done by the processor-specific smart constructors defined - in module [SelectOp]. *) + in modules [SelectOp] and [SelectLong]. *) + +Section SELECTION. + +Variable hf: helper_functions. Definition sel_constant (cst: Cminor.constant) : expr := match cst with | Cminor.Ointconst n => Eop (Ointconst n) Enil | Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil + | Cminor.Olongconst n => longconst n | Cminor.Oaddrsymbol id ofs => addrsymbol id ofs | Cminor.Oaddrstack ofs => addrstack ofs end. @@ -80,6 +87,15 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr := | Cminor.Ointuoffloat => intuoffloat arg | Cminor.Ofloatofint => floatofint arg | Cminor.Ofloatofintu => floatofintu arg + | Cminor.Onegl => negl hf arg + | Cminor.Onotl => notl arg + | Cminor.Ointoflong => intoflong arg + | Cminor.Olongofint => longofint arg + | Cminor.Olongofintu => longofintu arg + | Cminor.Olongoffloat => longoffloat hf arg + | Cminor.Olonguoffloat => longuoffloat hf arg + | Cminor.Ofloatoflong => floatoflong hf arg + | Cminor.Ofloatoflongu => floatoflongu hf arg end. Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr := @@ -101,9 +117,24 @@ Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr := | Cminor.Osubf => subf arg1 arg2 | Cminor.Omulf => mulf arg1 arg2 | Cminor.Odivf => divf arg1 arg2 + | Cminor.Oaddl => addl hf arg1 arg2 + | Cminor.Osubl => subl hf arg1 arg2 + | Cminor.Omull => mull hf arg1 arg2 + | Cminor.Odivl => divl hf arg1 arg2 + | Cminor.Odivlu => divlu hf arg1 arg2 + | Cminor.Omodl => modl hf arg1 arg2 + | Cminor.Omodlu => modlu hf arg1 arg2 + | Cminor.Oandl => andl arg1 arg2 + | Cminor.Oorl => orl arg1 arg2 + | Cminor.Oxorl => xorl arg1 arg2 + | Cminor.Oshll => shll hf arg1 arg2 + | Cminor.Oshrl => shrl hf arg1 arg2 + | Cminor.Oshrlu => shrlu hf arg1 arg2 | Cminor.Ocmp c => comp c arg1 arg2 | Cminor.Ocmpu c => compu c arg1 arg2 | Cminor.Ocmpf c => compf c arg1 arg2 + | Cminor.Ocmpl c => cmpl hf c arg1 arg2 + | Cminor.Ocmplu c => cmplu hf c arg1 arg2 end. (** Conversion from Cminor expression to Cminorsel expressions *) @@ -186,22 +217,26 @@ Fixpoint sel_stmt (ge: Cminor.genv) (s: Cminor.stmt) : stmt := | Cminor.Sgoto lbl => Sgoto lbl end. -(** Conversion of functions and programs. *) +End SELECTION. + +(** Conversion of functions. *) -Definition sel_function (ge: Cminor.genv) (f: Cminor.function) : function := +Definition sel_function (hf: helper_functions) (ge: Cminor.genv) (f: Cminor.function) : function := mkfunction f.(Cminor.fn_sig) f.(Cminor.fn_params) f.(Cminor.fn_vars) f.(Cminor.fn_stackspace) - (sel_stmt ge f.(Cminor.fn_body)). + (sel_stmt hf ge f.(Cminor.fn_body)). -Definition sel_fundef (ge: Cminor.genv) (f: Cminor.fundef) : fundef := - transf_fundef (sel_function ge) f. +Definition sel_fundef (hf: helper_functions) (ge: Cminor.genv) (f: Cminor.fundef) : fundef := + transf_fundef (sel_function hf ge) f. -Definition sel_program (p: Cminor.program) : program := - let ge := Genv.globalenv p in - transform_program (sel_fundef ge) p. +(** Conversion of programs. *) +Local Open Scope error_monad_scope. +Definition sel_program (p: Cminor.program) : res program := + let ge := Genv.globalenv p in + do hf <- get_helpers ge; OK (transform_program (sel_fundef hf ge) p). diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v index 0269438..525a29d 100644 --- a/backend/Selectionproof.v +++ b/backend/Selectionproof.v @@ -15,6 +15,7 @@ Require Import Coqlib. Require Import Maps. Require Import AST. +Require Import Errors. Require Import Integers. Require Import Values. Require Import Memory. @@ -25,27 +26,114 @@ Require Import Cminor. Require Import Op. Require Import CminorSel. Require Import SelectOp. +Require Import SelectLong. Require Import Selection. Require Import SelectOpproof. +Require Import SelectLongproof. Open Local Scope cminorsel_scope. + (** * Correctness of the instruction selection functions for expressions *) +Section PRESERVATION. + +Variable prog: Cminor.program. +Let ge := Genv.globalenv prog. +Variable hf: helper_functions. +Let tprog := transform_program (sel_fundef hf ge) prog. +Let tge := Genv.globalenv tprog. +Hypothesis HELPERS: i64_helpers_correct tge hf. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + intros; unfold ge, tge, tprog. apply Genv.find_symbol_transf. +Qed. + +Lemma function_ptr_translated: + forall (b: block) (f: Cminor.fundef), + Genv.find_funct_ptr ge b = Some f -> + Genv.find_funct_ptr tge b = Some (sel_fundef hf ge f). +Proof. + intros. + exact (Genv.find_funct_ptr_transf (sel_fundef hf ge) _ _ H). +Qed. + +Lemma functions_translated: + forall (v v': val) (f: Cminor.fundef), + Genv.find_funct ge v = Some f -> + Val.lessdef v v' -> + Genv.find_funct tge v' = Some (sel_fundef hf ge f). +Proof. + intros. inv H0. + exact (Genv.find_funct_transf (sel_fundef hf ge) _ _ H). + simpl in H. discriminate. +Qed. + +Lemma sig_function_translated: + forall f, + funsig (sel_fundef hf ge f) = Cminor.funsig f. +Proof. + intros. destruct f; reflexivity. +Qed. + +Lemma varinfo_preserved: + forall b, Genv.find_var_info tge b = Genv.find_var_info ge b. +Proof. + intros; unfold ge, tge, tprog, sel_program. + apply Genv.find_var_info_transf. +Qed. + +Lemma helper_implements_preserved: + forall id sg vargs vres, + helper_implements ge id sg vargs vres -> + helper_implements tge id sg vargs vres. +Proof. + intros. destruct H as (b & ef & A & B & C & D). + exploit function_ptr_translated; eauto. simpl. intros. + exists b; exists ef. + split. rewrite symbols_preserved. auto. + split. auto. + split. auto. + intros. eapply external_call_symbols_preserved; eauto. + exact symbols_preserved. exact varinfo_preserved. +Qed. + +Lemma builtin_implements_preserved: + forall id sg vargs vres, + builtin_implements ge id sg vargs vres -> + builtin_implements tge id sg vargs vres. +Proof. + unfold builtin_implements; intros. + eapply external_call_symbols_preserved; eauto. + exact symbols_preserved. exact varinfo_preserved. +Qed. + +Lemma helpers_correct_preserved: + forall h, i64_helpers_correct ge h -> i64_helpers_correct tge h. +Proof. + unfold i64_helpers_correct; intros. + repeat (match goal with [ H: _ /\ _ |- _ /\ _ ] => destruct H; split end); + intros; try (eapply helper_implements_preserved; eauto); + try (eapply builtin_implements_preserved; eauto). + exploit H14; eauto. intros [z [A B]]. exists z; split; eauto. eapply helper_implements_preserved; eauto. + exploit H15; eauto. intros [z [A B]]. exists z; split; eauto. eapply helper_implements_preserved; eauto. +Qed. + Section CMCONSTR. -Variable ge: genv. Variable sp: val. Variable e: env. Variable m: mem. Lemma eval_condition_of_expr: forall le a v b, - eval_expr ge sp e m le a v -> + eval_expr tge sp e m le a v -> Val.bool_of_val v b -> match condition_of_expr a with (cond, args) => exists vl, - eval_exprlist ge sp e m le args vl /\ + eval_exprlist tge sp e m le args vl /\ eval_condition cond vl m = Some b end. Proof. @@ -60,9 +148,9 @@ Qed. Lemma eval_load: forall le a v chunk v', - eval_expr ge sp e m le a v -> + eval_expr tge sp e m le a v -> Mem.loadv chunk m v = Some v' -> - eval_expr ge sp e m le (load chunk a) v'. + eval_expr tge sp e m le (load chunk a) v'. Proof. intros. generalize H0; destruct v; simpl; intro; try discriminate. unfold load. @@ -73,11 +161,11 @@ Qed. Lemma eval_store: forall chunk a1 a2 v1 v2 f k m', - eval_expr ge sp e m nil a1 v1 -> - eval_expr ge sp e m nil a2 v2 -> + eval_expr tge sp e m nil a1 v1 -> + eval_expr tge sp e m nil a2 v2 -> Mem.storev chunk m v1 v2 = Some m' -> - step ge (State f (store chunk a1 a2) k sp e m) - E0 (State f Sskip k sp e m'). + step tge (State f (store chunk a1 a2) k sp e m) + E0 (State f Sskip k sp e m'). Proof. intros. generalize H1; destruct v1; simpl; intro; try discriminate. unfold store. @@ -90,9 +178,9 @@ Qed. Lemma eval_sel_unop: forall le op a1 v1 v, - eval_expr ge sp e m le a1 v1 -> + eval_expr tge sp e m le a1 v1 -> eval_unop op v1 = Some v -> - exists v', eval_expr ge sp e m le (sel_unop op a1) v' /\ Val.lessdef v v'. + exists v', eval_expr tge sp e m le (sel_unop hf op a1) v' /\ Val.lessdef v v'. Proof. destruct op; simpl; intros; FuncInv; try subst v. apply eval_cast8unsigned; auto. @@ -108,14 +196,23 @@ Proof. eapply eval_intuoffloat; eauto. eapply eval_floatofint; eauto. eapply eval_floatofintu; eauto. + eapply eval_negl; eauto. + eapply eval_notl; eauto. + eapply eval_intoflong; eauto. + eapply eval_longofint; eauto. + eapply eval_longofintu; eauto. + eapply eval_longoffloat; eauto. + eapply eval_longuoffloat; eauto. + eapply eval_floatoflong; eauto. + eapply eval_floatoflongu; eauto. Qed. Lemma eval_sel_binop: forall le op a1 a2 v1 v2 v, - eval_expr ge sp e m le a1 v1 -> - eval_expr ge sp e m le a2 v2 -> + eval_expr tge sp e m le a1 v1 -> + eval_expr tge sp e m le a2 v2 -> eval_binop op v1 v2 m = Some v -> - exists v', eval_expr ge sp e m le (sel_binop op a1 a2) v' /\ Val.lessdef v v'. + exists v', eval_expr tge sp e m le (sel_binop hf op a1 a2) v' /\ Val.lessdef v v'. Proof. destruct op; simpl; intros; FuncInv; try subst v. apply eval_add; auto. @@ -135,9 +232,24 @@ Proof. apply eval_subf; auto. apply eval_mulf; auto. apply eval_divf; auto. + eapply eval_addl; eauto. + eapply eval_subl; eauto. + eapply eval_mull; eauto. + eapply eval_divl; eauto. + eapply eval_divlu; eauto. + eapply eval_modl; eauto. + eapply eval_modlu; eauto. + eapply eval_andl; eauto. + eapply eval_orl; eauto. + eapply eval_xorl; eauto. + eapply eval_shll; eauto. + eapply eval_shrl; eauto. + eapply eval_shrlu; eauto. apply eval_comp; auto. apply eval_compu; auto. apply eval_compf; auto. + apply eval_cmpl; auto. + apply eval_cmplu; auto. Qed. End CMCONSTR. @@ -156,7 +268,7 @@ Proof. Qed. Lemma classify_call_correct: - forall ge sp e m a v fd, + forall sp e m a v fd, Cminor.eval_expr ge sp e m a v -> Genv.find_funct ge v = Some fd -> match classify_call ge a with @@ -215,57 +327,6 @@ Qed. (** * Semantic preservation for instruction selection. *) -Section PRESERVATION. - -Variable prog: Cminor.program. -Let tprog := sel_program prog. -Let ge := Genv.globalenv prog. -Let tge := Genv.globalenv tprog. - -(** Relationship between the global environments for the original - Cminor program and the generated CminorSel program. *) - -Lemma symbols_preserved: - forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. -Proof. - intros; unfold ge, tge, tprog, sel_program. - apply Genv.find_symbol_transf. -Qed. - -Lemma function_ptr_translated: - forall (b: block) (f: Cminor.fundef), - Genv.find_funct_ptr ge b = Some f -> - Genv.find_funct_ptr tge b = Some (sel_fundef ge f). -Proof. - intros. - exact (Genv.find_funct_ptr_transf (sel_fundef ge) _ _ H). -Qed. - -Lemma functions_translated: - forall (v v': val) (f: Cminor.fundef), - Genv.find_funct ge v = Some f -> - Val.lessdef v v' -> - Genv.find_funct tge v' = Some (sel_fundef ge f). -Proof. - intros. inv H0. - exact (Genv.find_funct_transf (sel_fundef ge) _ _ H). - simpl in H. discriminate. -Qed. - -Lemma sig_function_translated: - forall f, - funsig (sel_fundef ge f) = Cminor.funsig f. -Proof. - intros. destruct f; reflexivity. -Qed. - -Lemma varinfo_preserved: - forall b, Genv.find_var_info tge b = Genv.find_var_info ge b. -Proof. - intros; unfold ge, tge, tprog, sel_program. - apply Genv.find_var_info_transf. -Qed. - (** Relationship between the local environments. *) Definition env_lessdef (e1 e2: env) : Prop := @@ -305,7 +366,7 @@ Lemma sel_expr_correct: Cminor.eval_expr ge sp e m a v -> forall e' le m', env_lessdef e e' -> Mem.extends m m' -> - exists v', eval_expr tge sp e' m' le (sel_expr a) v' /\ Val.lessdef v v'. + exists v', eval_expr tge sp e' m' le (sel_expr hf a) v' /\ Val.lessdef v v'. Proof. induction 1; intros; simpl. (* Evar *) @@ -314,6 +375,9 @@ Proof. destruct cst; simpl in *; inv H. exists (Vint i); split; auto. econstructor. constructor. auto. exists (Vfloat f); split; auto. econstructor. constructor. auto. + exists (Val.longofwords (Vint (Int64.hiword i)) (Vint (Int64.loword i))); split. + eapply eval_Eop. constructor. EvalOp. simpl; eauto. constructor. EvalOp. simpl; eauto. constructor. auto. + simpl. rewrite Int64.ofwords_recompose. auto. rewrite <- symbols_preserved. fold (symbol_address tge i i0). apply eval_addrsymbol. apply eval_addrstack. (* Eunop *) @@ -338,7 +402,7 @@ Lemma sel_exprlist_correct: Cminor.eval_exprlist ge sp e m a v -> forall e' le m', env_lessdef e e' -> Mem.extends m m' -> - exists v', eval_exprlist tge sp e' m' le (sel_exprlist a) v' /\ Val.lessdef_list v v'. + exists v', eval_exprlist tge sp e' m' le (sel_exprlist hf a) v' /\ Val.lessdef_list v v'. Proof. induction 1; intros; simpl. exists (@nil val); split; auto. constructor. @@ -354,30 +418,30 @@ Inductive match_cont: Cminor.cont -> CminorSel.cont -> Prop := match_cont Cminor.Kstop Kstop | match_cont_seq: forall s k k', match_cont k k' -> - match_cont (Cminor.Kseq s k) (Kseq (sel_stmt ge s) k') + match_cont (Cminor.Kseq s k) (Kseq (sel_stmt hf ge s) k') | match_cont_block: forall k k', match_cont k k' -> match_cont (Cminor.Kblock k) (Kblock k') | match_cont_call: forall id f sp e k e' k', match_cont k k' -> env_lessdef e e' -> - match_cont (Cminor.Kcall id f sp e k) (Kcall id (sel_function ge f) sp e' k'). + match_cont (Cminor.Kcall id f sp e k) (Kcall id (sel_function hf ge f) sp e' k'). Inductive match_states: Cminor.state -> CminorSel.state -> Prop := | match_state: forall f s k s' k' sp e m e' m', - s' = sel_stmt ge s -> + s' = sel_stmt hf ge s -> match_cont k k' -> env_lessdef e e' -> Mem.extends m m' -> match_states (Cminor.State f s k sp e m) - (State (sel_function ge f) s' k' sp e' m') + (State (sel_function hf ge f) s' k' sp e' m') | match_callstate: forall f args args' k k' m m', match_cont k k' -> Val.lessdef_list args args' -> Mem.extends m m' -> match_states (Cminor.Callstate f args k m) - (Callstate (sel_fundef ge f) args' k' m') + (Callstate (sel_fundef hf ge f) args' k' m') | match_returnstate: forall v v' k k' m m', match_cont k k' -> Val.lessdef v v' -> @@ -393,7 +457,7 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop := eval_exprlist tge sp e' m' nil al args' -> match_states (Cminor.Callstate (External ef) args (Cminor.Kcall optid f sp e k) m) - (State (sel_function ge f) (Sbuiltin optid ef al) k' sp e' m') + (State (sel_function hf ge f) (Sbuiltin optid ef al) k' sp e' m') | match_builtin_2: forall v v' optid f sp e k m e' m' k', match_cont k k' -> Val.lessdef v v' -> @@ -401,7 +465,7 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop := Mem.extends m m' -> match_states (Cminor.Returnstate v (Cminor.Kcall optid f sp e k) m) - (State (sel_function ge f) Sskip k' sp (set_optvar optid v' e') m'). + (State (sel_function hf ge f) Sskip k' sp (set_optvar optid v' e') m'). Remark call_cont_commut: forall k k', match_cont k k' -> match_cont (Cminor.call_cont k) (call_cont k'). @@ -412,15 +476,15 @@ Qed. Remark find_label_commut: forall lbl s k k', match_cont k k' -> - match Cminor.find_label lbl s k, find_label lbl (sel_stmt ge s) k' with + match Cminor.find_label lbl s k, find_label lbl (sel_stmt hf ge s) k' with | None, None => True - | Some(s1, k1), Some(s1', k1') => s1' = sel_stmt ge s1 /\ match_cont k1 k1' + | Some(s1, k1), Some(s1', k1') => s1' = sel_stmt hf ge s1 /\ match_cont k1 k1' | _, _ => False end. Proof. induction s; intros; simpl; auto. (* store *) - unfold store. destruct (addressing m (sel_expr e)); simpl; auto. + unfold store. destruct (addressing m (sel_expr hf e)); simpl; auto. (* call *) destruct (classify_call ge e); simpl; auto. (* tailcall *) @@ -428,13 +492,13 @@ Proof. (* seq *) exploit (IHs1 (Cminor.Kseq s2 k)). constructor; eauto. destruct (Cminor.find_label lbl s1 (Cminor.Kseq s2 k)) as [[sx kx] | ]; - destruct (find_label lbl (sel_stmt ge s1) (Kseq (sel_stmt ge s2) k')) as [[sy ky] | ]; + destruct (find_label lbl (sel_stmt hf ge s1) (Kseq (sel_stmt hf ge s2) k')) as [[sy ky] | ]; intuition. apply IHs2; auto. (* ifthenelse *) - destruct (condition_of_expr (sel_expr e)) as [cond args]. simpl. + destruct (condition_of_expr (sel_expr hf e)) as [cond args]. simpl. exploit (IHs1 k); eauto. destruct (Cminor.find_label lbl s1 k) as [[sx kx] | ]; - destruct (find_label lbl (sel_stmt ge s1) k') as [[sy ky] | ]; + destruct (find_label lbl (sel_stmt hf ge s1) k') as [[sy ky] | ]; intuition. apply IHs2; auto. (* loop *) apply IHs. constructor; auto. @@ -531,9 +595,9 @@ Proof. exploit sel_expr_correct; eauto. intros [v' [A B]]. assert (Val.bool_of_val v' b). inv B. auto. inv H0. exploit eval_condition_of_expr; eauto. - destruct (condition_of_expr (sel_expr a)) as [cond args]. + destruct (condition_of_expr (sel_expr hf a)) as [cond args]. intros [vl' [C D]]. - left; exists (State (sel_function ge f) (if b then sel_stmt ge s1 else sel_stmt ge s2) k' sp e' m'); split. + left; exists (State (sel_function hf ge f) (if b then sel_stmt hf ge s1 else sel_stmt hf ge s2) k' sp e' m'); split. econstructor; eauto. constructor; auto. destruct b; auto. (* Sloop *) @@ -566,7 +630,7 @@ Proof. exploit (find_label_commut lbl (Cminor.fn_body f) (Cminor.call_cont k)). apply call_cont_commut; eauto. rewrite H. - destruct (find_label lbl (sel_stmt ge (Cminor.fn_body f)) (call_cont k'0)) + destruct (find_label lbl (sel_stmt hf ge (Cminor.fn_body f)) (call_cont k'0)) as [[s'' k'']|] eqn:?; intros; try contradiction. destruct H0. left; econstructor; split. @@ -623,14 +687,22 @@ Proof. intros. inv H0. inv H. inv H3. inv H5. constructor. Qed. +End PRESERVATION. + +Axiom get_helpers_correct: + forall ge hf, get_helpers ge = OK hf -> i64_helpers_correct ge hf. + Theorem transf_program_correct: + forall prog tprog, + sel_program prog = OK tprog -> forward_simulation (Cminor.semantics prog) (CminorSel.semantics tprog). Proof. + intros. unfold sel_program in H. + destruct (get_helpers (Genv.globalenv prog)) as [hf|] eqn:E; simpl in H; try discriminate. + inv H. eapply forward_simulation_opt. - eexact symbols_preserved. - eexact sel_initial_states. - eexact sel_final_states. - eexact sel_step_correct. + apply symbols_preserved. + apply sel_initial_states. + apply sel_final_states. + apply sel_step_correct. apply helpers_correct_preserved. apply get_helpers_correct. auto. Qed. - -End PRESERVATION. diff --git a/backend/Splitting.ml b/backend/Splitting.ml new file mode 100644 index 0000000..60f6818 --- /dev/null +++ b/backend/Splitting.ml @@ -0,0 +1,184 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Live range splitting over RTL *) + +open Camlcoq +open Datatypes +open Coqlib +open Maps +open AST +open Kildall +open Registers +open RTL + +(* We represent live ranges by the following unification variables. + Live range inference manipulates only variable live ranges. + Code rewriting assigns fresh RTL registers to live ranges. *) + +type live_range = { source: reg; mutable kind: live_range_kind } + +and live_range_kind = + | Link of live_range + | Var + | Reg of reg + +let new_range r = { source = r; kind = Var } + +let rec repr lr = + match lr.kind with + | Link lr' -> let lr'' = repr lr' in lr.kind <- Link lr''; lr'' + | _ -> lr + +let same_range lr1 lr2 = + lr1 == lr2 || (* quick test for speed *) + repr lr1 == repr lr2 (* the real test *) + +let unify lr1 lr2 = + let lr1 = repr lr1 and lr2 = repr lr2 in + if lr1 != lr2 then begin + match lr1.kind, lr2.kind with + | Var, _ -> lr1.kind <- Link lr2 + | _, Var -> lr2.kind <- Link lr1 + | _, _ -> assert false + end + +let reg_for lr = + let lr = repr lr in + match lr.kind with + | Link _ -> assert false + | Reg r -> r + | Var -> let r = XTL.new_reg() in lr.kind <- Reg r; r + +(* Live range inference is a variant on liveness analysis. + At each PC and for each register, liveness analysis determines + whether the reg is live or not. Live range inference associates + a live range to the reg if it is live, and no live range if it + is dead. *) + +module LRMap = struct + + type t = live_range PTree.t (* live register -> live range *) + + let beq m1 m2 = PTree.beq same_range m1 m2 + + let bot : t = PTree.empty + + let lub_opt_range olr1 olr2 = + match olr1, olr2 with + | Some lr1, Some lr2 -> unify lr1 lr2; olr1 + | Some _, None -> olr1 + | None, _ -> olr2 + + let lub m1 m2 = + PTree.combine lub_opt_range m1 m2 + +end + +module Solver = Backward_Dataflow_Solver(LRMap)(NodeSetBackward) + +(* A cache of live ranges associated to (pc, used reg) pairs. *) + +let live_range_cache = + (Hashtbl.create 123 : (int32 * int32, live_range) Hashtbl.t) + +let live_range_for pc r = + let pc' = P.to_int32 pc + and r' = P.to_int32 r in + try + Hashtbl.find live_range_cache (pc',r') + with Not_found -> + let lr = new_range r in + Hashtbl.add live_range_cache (pc', r') lr; + lr + +(* The transfer function *) + +let reg_live pc r map = + match PTree.get r map with + | Some lr -> map (* already live *) + | None -> PTree.set r (live_range_for pc r) map (* becomes live *) + +let reg_list_live pc rl map = List.fold_right (reg_live pc) rl map + +let reg_dead r map = + PTree.remove r map + +let transfer f pc after = + match PTree.get pc f.fn_code with + | None -> + LRMap.bot + | Some i -> + let across = + match instr_defs i with + | None -> after + | Some r -> reg_dead r after in + reg_list_live pc (instr_uses i) across + +(* The live range analysis *) + +let analysis f = Solver.fixpoint (successors f) (transfer f) [] + +(* Produce renamed registers for each instruction. *) + +let ren_reg map r = + match PTree.get r map with + | Some lr -> reg_for lr + | None -> XTL.new_reg() + +let ren_regs map rl = + List.map (ren_reg map) rl + +let ren_ros map ros = + sum_left_map (ren_reg map) ros + +(* Rename in an instruction *) + +let ren_instr f maps pc i = + let after = PMap.get pc maps in + let before = transfer f pc after in + match i with + | Inop s -> Inop s + | Iop(op, args, res, s) -> + Iop(op, ren_regs before args, ren_reg after res, s) + | Iload(chunk, addr, args, dst, s) -> + Iload(chunk, addr, ren_regs before args, ren_reg after dst, s) + | Istore(chunk, addr, args, src, s) -> + Istore(chunk, addr, ren_regs before args, ren_reg before src, s) + | Icall(sg, ros, args, res, s) -> + Icall(sg, ren_ros before ros, ren_regs before args, ren_reg after res, s) + | Itailcall(sg, ros, args) -> + Itailcall(sg, ren_ros before ros, ren_regs before args) + | Ibuiltin(ef, args, res, s) -> + Ibuiltin(ef, ren_regs before args, ren_reg after res, s) + | Icond(cond, args, s1, s2) -> + Icond(cond, ren_regs before args, s1, s2) + | Ijumptable(arg, tbl) -> + Ijumptable(ren_reg before arg, tbl) + | Ireturn optarg -> + Ireturn(option_map (ren_reg before) optarg) + +(* Rename live ranges in a function *) + +let rename_function f = + Hashtbl.clear live_range_cache; + let maps = + match analysis f with + | None -> assert false + | Some maps -> maps in + let before_entrypoint = + transfer f f.fn_entrypoint (PMap.get f.fn_entrypoint maps) in + { fn_sig = f.fn_sig; + fn_params = ren_regs before_entrypoint f.fn_params; + fn_stacksize = f.fn_stacksize; + fn_code = PTree.map (ren_instr f maps) f.fn_code; + fn_entrypoint = f.fn_entrypoint } diff --git a/backend/Stacking.v b/backend/Stacking.v index 03e882e..f7c16d1 100644 --- a/backend/Stacking.v +++ b/backend/Stacking.v @@ -23,6 +23,7 @@ Require Import Bounds. Require Import Mach. Require Import Conventions. Require Import Stacklayout. +Require Import Lineartyping. (** * Layout of activation records *) @@ -44,8 +45,7 @@ Definition offset_of_index (fe: frame_env) (idx: frame_index) := match idx with | FI_link => fe.(fe_ofs_link) | FI_retaddr => fe.(fe_ofs_retaddr) - | FI_local x Tint => fe.(fe_ofs_int_local) + 4 * x - | FI_local x Tfloat => fe.(fe_ofs_float_local) + 8 * x + | FI_local x ty => fe.(fe_ofs_local) + 4 * x | FI_arg x ty => fe_ofs_arg + 4 * x | FI_saved_int x => fe.(fe_ofs_int_callee_save) + 4 * x | FI_saved_float x => fe.(fe_ofs_float_callee_save) + 8 * x @@ -133,8 +133,8 @@ Definition transl_addr (fe: frame_env) (addr: addressing) := Definition transl_annot_param (fe: frame_env) (l: loc) : annot_param := match l with | R r => APreg r - | S (Local ofs ty) => APstack (chunk_of_type ty) (offset_of_index fe (FI_local ofs ty)) - | S _ => APstack Mint32 (-1) (**r never happens *) + | S Local ofs ty => APstack (chunk_of_type ty) (offset_of_index fe (FI_local ofs ty)) + | S _ _ _ => APstack Mint32 (-1) (**r never happens *) end. @@ -150,22 +150,22 @@ Definition transl_annot_param (fe: frame_env) (l: loc) : annot_param := Definition transl_instr (fe: frame_env) (i: Linear.instruction) (k: Mach.code) : Mach.code := match i with - | Lgetstack s r => - match s with - | Local ofs ty => + | Lgetstack sl ofs ty r => + match sl with + | Local => Mgetstack (Int.repr (offset_of_index fe (FI_local ofs ty))) ty r :: k - | Incoming ofs ty => + | Incoming => Mgetparam (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty r :: k - | Outgoing ofs ty => + | Outgoing => Mgetstack (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty r :: k end - | Lsetstack r s => - match s with - | Local ofs ty => + | Lsetstack r sl ofs ty => + match sl with + | Local => Msetstack r (Int.repr (offset_of_index fe (FI_local ofs ty))) ty :: k - | Incoming ofs ty => + | Incoming => k (* should not happen *) - | Outgoing ofs ty => + | Outgoing => Msetstack r (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty :: k end | Lop op args res => @@ -216,7 +216,9 @@ Open Local Scope string_scope. Definition transf_function (f: Linear.function) : res Mach.function := let fe := make_env (function_bounds f) in - if zlt Int.max_unsigned fe.(fe_size) then + if negb (wt_function f) then + Error (msg "Ill-formed Linear code") + else if zlt Int.max_unsigned fe.(fe_size) then Error (msg "Too many spilled variables, stack size exceeded") else OK (Mach.mkfunction diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v index a73f0aa..1808402 100644 --- a/backend/Stackingproof.v +++ b/backend/Stackingproof.v @@ -25,7 +25,7 @@ Require Import Events. Require Import Globalenvs. Require Import Smallstep. Require Import Locations. -Require LTL. +Require Import LTL. Require Import Linear. Require Import Lineartyping. Require Import Mach. @@ -83,17 +83,28 @@ Lemma unfold_transf_function: (Int.repr fe.(fe_ofs_retaddr)). Proof. generalize TRANSF_F. unfold transf_function. + destruct (wt_function f); simpl negb. destruct (zlt Int.max_unsigned (fe_size (make_env (function_bounds f)))). intros; discriminate. intros. unfold fe. unfold b. congruence. + intros; discriminate. +Qed. + +Lemma transf_function_well_typed: + wt_function f = true. +Proof. + generalize TRANSF_F. unfold transf_function. + destruct (wt_function f); simpl negb. auto. intros; discriminate. Qed. Lemma size_no_overflow: fe.(fe_size) <= Int.max_unsigned. Proof. generalize TRANSF_F. unfold transf_function. + destruct (wt_function f); simpl negb. destruct (zlt Int.max_unsigned (fe_size (make_env (function_bounds f)))). intros; discriminate. intros. unfold fe. unfold b. omega. + intros; discriminate. Qed. Remark bound_stack_data_stacksize: @@ -109,9 +120,8 @@ Definition index_valid (idx: frame_index) := match idx with | FI_link => True | FI_retaddr => True - | FI_local x Tint => 0 <= x < b.(bound_int_local) - | FI_local x Tfloat => 0 <= x < b.(bound_float_local) - | FI_arg x ty => 0 <= x /\ x + typesize ty <= b.(bound_outgoing) + | FI_local x ty => ty <> Tlong /\ 0 <= x /\ x + typesize ty <= b.(bound_local) + | FI_arg x ty => ty <> Tlong /\ 0 <= x /\ x + typesize ty <= b.(bound_outgoing) | FI_saved_int x => 0 <= x < b.(bound_int_callee_save) | FI_saved_float x => 0 <= x < b.(bound_float_callee_save) end. @@ -134,7 +144,7 @@ Definition index_diff (idx1 idx2: frame_index) : Prop := | FI_link, FI_link => False | FI_retaddr, FI_retaddr => False | FI_local x1 ty1, FI_local x2 ty2 => - x1 <> x2 \/ ty1 <> ty2 + x1 + typesize ty1 <= x2 \/ x2 + typesize ty2 <= x1 | FI_arg x1 ty1, FI_arg x2 ty2 => x1 + typesize ty1 <= x2 \/ x2 + typesize ty2 <= x1 | FI_saved_int x1, FI_saved_int x2 => x1 <> x2 @@ -150,8 +160,7 @@ Proof. Qed. Ltac AddPosProps := - generalize (bound_int_local_pos b); intro; - generalize (bound_float_local_pos b); intro; + generalize (bound_local_pos b); intro; generalize (bound_int_callee_save_pos b); intro; generalize (bound_float_callee_save_pos b); intro; generalize (bound_outgoing_pos b); intro; @@ -166,6 +175,12 @@ Qed. Opaque function_bounds. +Ltac InvIndexValid := + match goal with + | [ H: ?ty <> Tlong /\ _ |- _ ] => + destruct H; generalize (typesize_pos ty) (typesize_typesize ty); intros + end. + Lemma offset_of_index_disj: forall idx1 idx2, index_valid idx1 -> index_valid idx2 -> @@ -177,12 +192,11 @@ Proof. generalize (frame_env_separated b). intuition. fold fe in H. AddPosProps. destruct idx1; destruct idx2; - try (destruct t); try (destruct t0); - unfold offset_of_index, type_of_index, AST.typesize; - simpl in V1; simpl in V2; simpl in DIFF; - try omega. - assert (z <> z0). intuition auto. omega. - assert (z <> z0). intuition auto. omega. + simpl in V1; simpl in V2; repeat InvIndexValid; simpl in DIFF; + unfold offset_of_index, type_of_index; + change (AST.typesize Tint) with 4; + change (AST.typesize Tfloat) with 8; + omega. Qed. Lemma offset_of_index_disj_stack_data_1: @@ -194,9 +208,11 @@ Proof. intros idx V. generalize (frame_env_separated b). intuition. fold fe in H. AddPosProps. - destruct idx; try (destruct t); - unfold offset_of_index, type_of_index, AST.typesize; - simpl in V; + destruct idx; + simpl in V; repeat InvIndexValid; + unfold offset_of_index, type_of_index; + change (AST.typesize Tint) with 4; + change (AST.typesize Tfloat) with 8; omega. Qed. @@ -250,18 +266,22 @@ Qed. Lemma index_local_valid: forall ofs ty, - slot_within_bounds f b (Local ofs ty) -> + slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true -> index_valid (FI_local ofs ty). Proof. - unfold slot_within_bounds, index_valid. auto. + unfold slot_within_bounds, slot_valid, index_valid; intros. + InvBooleans. + split. destruct ty; congruence. auto. Qed. Lemma index_arg_valid: forall ofs ty, - slot_within_bounds f b (Outgoing ofs ty) -> + slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true -> index_valid (FI_arg ofs ty). Proof. - unfold slot_within_bounds, index_valid. auto. + unfold slot_within_bounds, slot_valid, index_valid; intros. + InvBooleans. + split. destruct ty; congruence. auto. Qed. Lemma index_saved_int_valid: @@ -300,9 +320,10 @@ Proof. intros idx V. generalize (frame_env_separated b). intros [A B]. fold fe in A. fold fe in B. AddPosProps. - destruct idx; try (destruct t); - unfold offset_of_index, type_of_index, AST.typesize; - simpl in V; + destruct idx; simpl in V; repeat InvIndexValid; + unfold offset_of_index, type_of_index; + change (AST.typesize Tint) with 4; + change (AST.typesize Tfloat) with 8; omega. Qed. @@ -459,7 +480,9 @@ Proof. apply offset_of_index_perm; auto. replace (align_chunk (chunk_of_type (type_of_index idx))) with 4. apply offset_of_index_aligned; auto. - destruct (type_of_index idx); auto. + assert (type_of_index idx <> Tlong). + destruct idx; simpl in *; tauto || congruence. + destruct (type_of_index idx); reflexivity || congruence. exists m'; auto. Qed. @@ -539,7 +562,10 @@ Proof. apply Mem.range_perm_implies with Freeable; auto with mem. apply offset_of_index_perm; auto. replace (align_chunk (chunk_of_type (type_of_index idx))) with 4. - apply offset_of_index_aligned. destruct (type_of_index idx); auto. + apply offset_of_index_aligned. + assert (type_of_index idx <> Tlong). + destruct idx; simpl in *; tauto || congruence. + destruct (type_of_index idx); reflexivity || congruence. intros [v C]. exists v; split; auto. constructor; auto. Qed. @@ -570,19 +596,19 @@ Record agree_frame (j: meminj) (ls ls0: locset) at the corresponding offsets. *) agree_locals: forall ofs ty, - slot_within_bounds f b (Local ofs ty) -> - index_contains_inj j m' sp' (FI_local ofs ty) (ls (S (Local ofs ty))); + slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true -> + index_contains_inj j m' sp' (FI_local ofs ty) (ls (S Local ofs ty)); agree_outgoing: forall ofs ty, - slot_within_bounds f b (Outgoing ofs ty) -> - index_contains_inj j m' sp' (FI_arg ofs ty) (ls (S (Outgoing ofs ty))); + slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true -> + index_contains_inj j m' sp' (FI_arg ofs ty) (ls (S Outgoing ofs ty)); (** Incoming stack slots have the same value as the corresponding Outgoing stack slots in the caller *) agree_incoming: forall ofs ty, - In (S (Incoming ofs ty)) (loc_parameters f.(Linear.fn_sig)) -> - ls (S (Incoming ofs ty)) = ls0 (S (Outgoing ofs ty)); + In (S Incoming ofs ty) (loc_parameters f.(Linear.fn_sig)) -> + ls (S Incoming ofs ty) = ls0 (S Outgoing ofs ty); (** The back link and return address slots of the Mach frame contain the [parent] and [retaddr] values, respectively. *) @@ -640,8 +666,8 @@ Hint Resolve agree_unused_reg agree_locals agree_outgoing agree_incoming Definition agree_callee_save (ls ls0: locset) : Prop := forall l, match l with - | R r => In r int_callee_save_regs \/ In r float_callee_save_regs - | S s => True + | R r => ~In r destroyed_at_call + | S _ _ _ => True end -> ls l = ls0 l. @@ -680,6 +706,18 @@ Proof. rewrite Locmap.gso; auto. red. auto. Qed. +Lemma agree_regs_set_regs: + forall j rl vl vl' ls rs, + agree_regs j ls rs -> + val_list_inject j vl vl' -> + agree_regs j (Locmap.setlist (map R rl) vl ls) (set_regs rl vl' rs). +Proof. + induction rl; simpl; intros. + auto. + inv H0. auto. + apply IHrl; auto. apply agree_regs_set_reg; auto. +Qed. + Lemma agree_regs_exten: forall j ls rs ls' rs', agree_regs j ls rs -> @@ -692,52 +730,24 @@ Proof. rewrite A; rewrite B; auto. Qed. -Lemma agree_regs_undef_list: +Lemma agree_regs_undef_regs: forall j rl ls rs, agree_regs j ls rs -> - agree_regs j (Locmap.undef (List.map R rl) ls) (undef_regs rl rs). + agree_regs j (LTL.undef_regs rl ls) (Mach.undef_regs rl rs). Proof. induction rl; simpl; intros. - auto. - apply IHrl. apply agree_regs_set_reg; auto. -Qed. - -Lemma agree_regs_undef_temps: - forall j ls rs, - agree_regs j ls rs -> - agree_regs j (LTL.undef_temps ls) (undef_temps rs). -Proof. - unfold LTL.undef_temps, undef_temps, temporaries. - intros; apply agree_regs_undef_list; auto. -Qed. - -Lemma agree_regs_undef_setstack: - forall j ls rs, - agree_regs j ls rs -> - agree_regs j (Linear.undef_setstack ls) (undef_setstack rs). -Proof. - intros. unfold Linear.undef_setstack, undef_setstack, destroyed_at_move. - apply agree_regs_undef_list; auto. -Qed. - -Lemma agree_regs_undef_op: - forall op j ls rs, - agree_regs j ls rs -> - agree_regs j (Linear.undef_op op ls) (undef_op (transl_op fe op) rs). -Proof. - intros. generalize (agree_regs_undef_temps _ _ _ H); intro A. -Opaque destroyed_at_move_regs. - destruct op; auto; simpl; apply agree_regs_undef_setstack; auto. + auto. + apply agree_regs_set_reg; auto. Qed. (** Preservation under assignment of stack slot *) Lemma agree_regs_set_slot: - forall j ls rs ss v, + forall j ls rs sl ofs ty v, agree_regs j ls rs -> - agree_regs j (Locmap.set (S ss) v ls) rs. + agree_regs j (Locmap.set (S sl ofs ty) v ls) rs. Proof. - intros; red; intros. rewrite Locmap.gso; auto. red. destruct ss; auto. + intros; red; intros. rewrite Locmap.gso; auto. red. auto. Qed. (** Preservation by increasing memory injections *) @@ -754,16 +764,10 @@ Qed. Lemma agree_regs_call_regs: forall j ls rs, agree_regs j ls rs -> - agree_regs j (call_regs ls) (undef_temps rs). + agree_regs j (call_regs ls) rs. Proof. - intros. - assert (agree_regs j (LTL.undef_temps ls) (undef_temps rs)). - apply agree_regs_undef_temps; auto. - unfold call_regs; intros; red; intros. - destruct (in_dec Loc.eq (R r) temporaries). - auto. - generalize (H0 r). unfold LTL.undef_temps. rewrite Locmap.guo. auto. - apply Loc.reg_notin; auto. + intros. + unfold call_regs; intros; red; intros; auto. Qed. (** ** Properties of [agree_frame] *) @@ -782,62 +786,49 @@ Proof. apply wt_setloc; auto. Qed. -Remark temporary_within_bounds: - forall r, In (R r) temporaries -> mreg_within_bounds b r. +Lemma agree_frame_set_regs: + forall j ls0 m sp m' sp' parent ra rl vl ls, + agree_frame j ls ls0 m sp m' sp' parent ra -> + (forall r, In r rl -> mreg_within_bounds b r) -> + Val.has_type_list vl (map Loc.type (map R rl)) -> + agree_frame j (Locmap.setlist (map R rl) vl ls) ls0 m sp m' sp' parent ra. Proof. - intros; red. destruct (mreg_type r). - destruct (zlt (index_int_callee_save r) 0). - generalize (bound_int_callee_save_pos b). omega. - exploit int_callee_save_not_destroyed. - left. eauto with coqlib. apply index_int_callee_save_pos2; auto. - contradiction. - destruct (zlt (index_float_callee_save r) 0). - generalize (bound_float_callee_save_pos b). omega. - exploit float_callee_save_not_destroyed. - left. eauto with coqlib. apply index_float_callee_save_pos2; auto. - contradiction. + induction rl; destruct vl; simpl; intros; intuition. + apply IHrl; auto. + eapply agree_frame_set_reg; eauto. Qed. -Lemma agree_frame_undef_locs: +Lemma agree_frame_undef_regs: forall j ls0 m sp m' sp' parent ra regs ls, agree_frame j ls ls0 m sp m' sp' parent ra -> - incl (List.map R regs) temporaries -> - agree_frame j (Locmap.undef (List.map R regs) ls) ls0 m sp m' sp' parent ra. + (forall r, In r regs -> mreg_within_bounds b r) -> + agree_frame j (LTL.undef_regs regs ls) ls0 m sp m' sp' parent ra. Proof. induction regs; simpl; intros. auto. - apply IHregs; eauto with coqlib. - apply agree_frame_set_reg; auto. - apply temporary_within_bounds; eauto with coqlib. - red; auto. -Qed. - -Lemma agree_frame_undef_temps: - forall j ls ls0 m sp m' sp' parent ra, - agree_frame j ls ls0 m sp m' sp' parent ra -> - agree_frame j (LTL.undef_temps ls) ls0 m sp m' sp' parent ra. -Proof. - intros. unfold temporaries. apply agree_frame_undef_locs; auto. apply incl_refl. + apply agree_frame_set_reg; auto. red; auto. Qed. -Lemma agree_frame_undef_setstack: - forall j ls ls0 m sp m' sp' parent ra, - agree_frame j ls ls0 m sp m' sp' parent ra -> - agree_frame j (Linear.undef_setstack ls) ls0 m sp m' sp' parent ra. +Lemma caller_save_reg_within_bounds: + forall r, + In r destroyed_at_call -> mreg_within_bounds b r. Proof. - intros. unfold Linear.undef_setstack, destroyed_at_move. - apply agree_frame_undef_locs; auto. - red; simpl; tauto. + intros. red. + destruct (zlt (index_int_callee_save r) 0). + destruct (zlt (index_float_callee_save r) 0). + generalize (bound_int_callee_save_pos b) (bound_float_callee_save_pos b); omega. + exfalso. eapply float_callee_save_not_destroyed; eauto. eapply index_float_callee_save_pos2; eauto. + exfalso. eapply int_callee_save_not_destroyed; eauto. eapply index_int_callee_save_pos2; eauto. Qed. -Lemma agree_frame_undef_op: - forall j ls ls0 m sp m' sp' parent ra op, +Lemma agree_frame_undef_locs: + forall j ls0 m sp m' sp' parent ra regs ls, agree_frame j ls ls0 m sp m' sp' parent ra -> - agree_frame j (Linear.undef_op op ls) ls0 m sp m' sp' parent ra. + incl regs destroyed_at_call -> + agree_frame j (LTL.undef_regs regs ls) ls0 m sp m' sp' parent ra. Proof. - intros. - exploit agree_frame_undef_temps; eauto. - destruct op; simpl; auto; intros; apply agree_frame_undef_setstack; auto. + intros. eapply agree_frame_undef_regs; eauto. + intros. apply caller_save_reg_within_bounds. auto. Qed. (** Preservation by assignment to local slot *) @@ -845,31 +836,35 @@ Qed. Lemma agree_frame_set_local: forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'', agree_frame j ls ls0 m sp m' sp' parent retaddr -> - slot_within_bounds f b (Local ofs ty) -> + slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true -> val_inject j v v' -> Val.has_type v ty -> Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_local ofs ty)) v' = Some m'' -> - agree_frame j (Locmap.set (S (Local ofs ty)) v ls) ls0 m sp m'' sp' parent retaddr. + agree_frame j (Locmap.set (S Local ofs ty) v ls) ls0 m sp m'' sp' parent retaddr. Proof. intros. inv H. - change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H3. + change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H4. constructor; auto; intros. (* local *) - unfold Locmap.set. simpl. destruct (Loc.eq (S (Local ofs ty)) (S (Local ofs0 ty0))). - inv e. eapply gss_index_contains_inj; eauto. - eapply gso_index_contains_inj. eauto. simpl; auto. eauto with stacking. - simpl. destruct (zeq ofs ofs0); auto. destruct (typ_eq ty ty0); auto. congruence. + unfold Locmap.set. + destruct (Loc.eq (S Local ofs ty) (S Local ofs0 ty0)). + inv e. eapply gss_index_contains_inj; eauto with stacking. + destruct (Loc.diff_dec (S Local ofs ty) (S Local ofs0 ty0)). + eapply gso_index_contains_inj. eauto. eauto with stacking. eauto. + simpl. simpl in d. intuition. + apply index_contains_inj_undef. auto with stacking. + red; intros. eapply Mem.perm_store_1; eauto. (* outgoing *) rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. - simpl; auto. red; auto. + red; auto. red; left; congruence. (* parent *) - eapply gso_index_contains; eauto. red; auto. + eapply gso_index_contains; eauto with stacking. red; auto. (* retaddr *) - eapply gso_index_contains; eauto. red; auto. + eapply gso_index_contains; eauto with stacking. red; auto. (* int callee save *) - eapply gso_index_contains_inj; eauto. simpl; auto. + eapply gso_index_contains_inj; eauto with stacking. simpl; auto. (* float callee save *) - eapply gso_index_contains_inj; eauto. simpl; auto. + eapply gso_index_contains_inj; eauto with stacking. simpl; auto. (* valid *) eauto with mem. (* perm *) @@ -883,25 +878,26 @@ Qed. Lemma agree_frame_set_outgoing: forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'', agree_frame j ls ls0 m sp m' sp' parent retaddr -> - slot_within_bounds f b (Outgoing ofs ty) -> + slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true -> val_inject j v v' -> Val.has_type v ty -> Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_arg ofs ty)) v' = Some m'' -> - agree_frame j (Locmap.set (S (Outgoing ofs ty)) v ls) ls0 m sp m'' sp' parent retaddr. + agree_frame j (Locmap.set (S Outgoing ofs ty) v ls) ls0 m sp m'' sp' parent retaddr. Proof. intros. inv H. - change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H3. + change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H4. constructor; auto; intros. (* local *) - rewrite Locmap.gso. eapply gso_index_contains_inj; eauto. simpl; auto. red; auto. + rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. red; auto. + red; left; congruence. (* outgoing *) - unfold Locmap.set. simpl. destruct (Loc.eq (S (Outgoing ofs ty)) (S (Outgoing ofs0 ty0))). - inv e. eapply gss_index_contains_inj; eauto. - case_eq (Loc.overlap_aux ty ofs ofs0 || Loc.overlap_aux ty0 ofs0 ofs); intros. - apply index_contains_inj_undef. auto. + unfold Locmap.set. destruct (Loc.eq (S Outgoing ofs ty) (S Outgoing ofs0 ty0)). + inv e. eapply gss_index_contains_inj; eauto with stacking. + destruct (Loc.diff_dec (S Outgoing ofs ty) (S Outgoing ofs0 ty0)). + eapply gso_index_contains_inj; eauto with stacking. + red. red in d. intuition. + apply index_contains_inj_undef. auto with stacking. red; intros. eapply Mem.perm_store_1; eauto. - eapply gso_index_contains_inj; eauto. - red. eapply Loc.overlap_aux_false_1; eauto. (* parent *) eapply gso_index_contains; eauto with stacking. red; auto. (* retaddr *) @@ -1038,18 +1034,6 @@ Qed. (** Preservation at return points (when [ls] is changed but not [ls0]). *) -Remark mreg_not_within_bounds_callee_save: - forall r, - ~mreg_within_bounds b r -> In r int_callee_save_regs \/ In r float_callee_save_regs. -Proof. - intro r; unfold mreg_within_bounds. - destruct (mreg_type r); intro. - left. apply index_int_callee_save_pos2. - generalize (bound_int_callee_save_pos b). omega. - right. apply index_float_callee_save_pos2. - generalize (bound_float_callee_save_pos b). omega. -Qed. - Lemma agree_frame_return: forall j ls ls0 m sp m' sp' parent retaddr ls', agree_frame j ls ls0 m sp m' sp' parent retaddr -> @@ -1058,7 +1042,7 @@ Lemma agree_frame_return: agree_frame j ls' ls0 m sp m' sp' parent retaddr. Proof. intros. red in H0. inv H; constructor; auto; intros. - rewrite H0; auto. apply mreg_not_within_bounds_callee_save; auto. + rewrite H0; auto. red; intros; elim H. apply caller_save_reg_within_bounds; auto. rewrite H0; auto. rewrite H0; auto. rewrite H0; auto. @@ -1073,13 +1057,12 @@ Lemma agree_frame_tailcall: agree_frame j ls ls0' m sp m' sp' parent retaddr. Proof. intros. red in H0. inv H; constructor; auto; intros. - rewrite <- H0; auto. apply mreg_not_within_bounds_callee_save; auto. - rewrite <- H0; auto. - rewrite <- H0; auto. + rewrite <- H0; auto. red; intros; elim H. apply caller_save_reg_within_bounds; auto. rewrite <- H0; auto. + rewrite <- H0. auto. red; intros. eapply int_callee_save_not_destroyed; eauto. + rewrite <- H0. auto. red; intros. eapply float_callee_save_not_destroyed; eauto. Qed. - (** Properties of [agree_callee_save]. *) Lemma agree_callee_save_return_regs: @@ -1088,21 +1071,107 @@ Lemma agree_callee_save_return_regs: Proof. intros; red; intros. unfold return_regs. destruct l; auto. - generalize (int_callee_save_not_destroyed m); intro. - generalize (float_callee_save_not_destroyed m); intro. - destruct (In_dec Loc.eq (R m) temporaries). tauto. - destruct (In_dec Loc.eq (R m) destroyed_at_call). tauto. - auto. + rewrite pred_dec_false; auto. Qed. Lemma agree_callee_save_set_result: - forall ls1 ls2 v sg, + forall sg vl ls1 ls2, agree_callee_save ls1 ls2 -> - agree_callee_save (Locmap.set (R (loc_result sg)) v ls1) ls2. + agree_callee_save (Locmap.setlist (map R (loc_result sg)) vl ls1) ls2. +Proof. + intros sg. generalize (loc_result_caller_save sg). + generalize (loc_result sg). +Opaque destroyed_at_call. + induction l; simpl; intros. + auto. + destruct vl; auto. + apply IHl; auto. + red; intros. rewrite Locmap.gso. apply H0; auto. + destruct l0; simpl; auto. +Qed. + +(** Properties of destroyed registers. *) + +Lemma check_mreg_list_incl: + forall l1 l2, + forallb (fun r => In_dec mreg_eq r l2) l1 = true -> + incl l1 l2. Proof. - intros; red; intros. rewrite <- H; auto. - apply Locmap.gso. destruct l; simpl; auto. - red; intro. subst m. elim (loc_result_not_callee_save _ H0). + intros; red; intros. + rewrite forallb_forall in H. eapply proj_sumbool_true. eauto. +Qed. + +Remark destroyed_by_op_caller_save: + forall op, incl (destroyed_by_op op) destroyed_at_call. +Proof. + destruct op; apply check_mreg_list_incl; compute; auto. +Qed. + +Remark destroyed_by_load_caller_save: + forall chunk addr, incl (destroyed_by_load chunk addr) destroyed_at_call. +Proof. + intros. destruct chunk; apply check_mreg_list_incl; compute; auto. +Qed. + +Remark destroyed_by_store_caller_save: + forall chunk addr, incl (destroyed_by_store chunk addr) destroyed_at_call. +Proof. + intros. destruct chunk; apply check_mreg_list_incl; compute; auto. +Qed. + +Remark destroyed_by_cond_caller_save: + forall cond, incl (destroyed_by_cond cond) destroyed_at_call. +Proof. + destruct cond; apply check_mreg_list_incl; compute; auto. +Qed. + +Remark destroyed_by_jumptable_caller_save: + incl destroyed_by_jumptable destroyed_at_call. +Proof. + apply check_mreg_list_incl; compute; auto. +Qed. + +Remark destroyed_at_function_entry_caller_save: + incl destroyed_at_function_entry destroyed_at_call. +Proof. + apply check_mreg_list_incl; compute; auto. +Qed. + +Remark destroyed_by_move_at_function_entry: + incl (destroyed_by_op Omove) destroyed_at_function_entry. +Proof. + apply check_mreg_list_incl; compute; auto. +Qed. + +Remark temp_for_parent_frame_caller_save: + In temp_for_parent_frame destroyed_at_call. +Proof. + Transparent temp_for_parent_frame. + Transparent destroyed_at_call. + unfold temp_for_parent_frame; simpl; tauto. +Qed. + +Hint Resolve destroyed_by_op_caller_save destroyed_by_load_caller_save + destroyed_by_store_caller_save + destroyed_by_cond_caller_save destroyed_by_jumptable_caller_save + destroyed_at_function_entry_caller_save: stacking. + +Remark transl_destroyed_by_op: + forall op e, destroyed_by_op (transl_op e op) = destroyed_by_op op. +Proof. + intros; destruct op; reflexivity. +Qed. + +Remark transl_destroyed_by_load: + forall chunk addr e, destroyed_by_load chunk (transl_addr e addr) = destroyed_by_load chunk addr. +Proof. + intros; destruct chunk; reflexivity. +Qed. + +Remark transl_destroyed_by_store: + forall chunk addr e, destroyed_by_store chunk (transl_addr e addr) = destroyed_by_store chunk addr. +Proof. + intros; destruct chunk; reflexivity. Qed. (** * Correctness of saving and restoring of callee-save registers *) @@ -1157,7 +1226,7 @@ Hypothesis csregs_typ: forall r, In r csregs -> mreg_type r = ty. Hypothesis ls_temp_undef: - forall r, In r destroyed_at_move_regs -> ls (R r) = Vundef. + forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef. Hypothesis wt_ls: wt_locset ls. @@ -1200,18 +1269,18 @@ Proof. (* a store takes place *) exploit store_index_succeeds. apply (mkindex_valid a); auto with coqlib. eauto. instantiate (1 := rs a). intros [m1 ST]. - exploit (IHl k (undef_setstack rs) m1). auto with coqlib. auto. + exploit (IHl k (undef_regs (destroyed_by_op Omove) rs) m1). auto with coqlib. auto. red; eauto with mem. apply agree_regs_exten with ls rs. auto. - intros. destruct (In_dec mreg_eq r destroyed_at_move_regs). + intros. destruct (In_dec mreg_eq r (destroyed_by_op Omove)). left. apply ls_temp_undef; auto. - right; split. auto. unfold undef_setstack, undef_move. apply undef_regs_other; auto. + right; split. auto. apply undef_regs_other; auto. intros [rs' [m' [A [B [C [D [E F]]]]]]]. exists rs'; exists m'. split. eapply star_left; eauto. econstructor. rewrite <- (mkindex_typ (number a)). apply store_stack_succeeds; auto with coqlib. - traceEq. + auto. traceEq. split; intros. simpl in H3. destruct (mreg_eq a r). subst r. assert (index_contains_inj j m1 sp (mkindex (number a)) (ls (R a))). @@ -1240,9 +1309,33 @@ Qed. End SAVE_CALLEE_SAVE. +Remark LTL_undef_regs_same: + forall r rl ls, In r rl -> LTL.undef_regs rl ls (R r) = Vundef. +Proof. + induction rl; simpl; intros. contradiction. + unfold Locmap.set. destruct (Loc.eq (R a) (R r)). auto. + destruct (Loc.diff_dec (R a) (R r)); auto. + apply IHrl. intuition congruence. +Qed. + +Remark LTL_undef_regs_others: + forall r rl ls, ~In r rl -> LTL.undef_regs rl ls (R r) = ls (R r). +Proof. + induction rl; simpl; intros. auto. + rewrite Locmap.gso. apply IHrl. intuition. red. intuition. +Qed. + +Remark LTL_undef_regs_slot: + forall sl ofs ty rl ls, LTL.undef_regs rl ls (S sl ofs ty) = ls (S sl ofs ty). +Proof. + induction rl; simpl; intros. auto. + rewrite Locmap.gso. apply IHrl. red; auto. +Qed. + Lemma save_callee_save_correct: - forall j ls rs sp cs fb k m, - agree_regs j (call_regs ls) rs -> wt_locset (call_regs ls) -> + forall j ls0 rs sp cs fb k m, + let ls := LTL.undef_regs destroyed_at_function_entry ls0 in + agree_regs j ls rs -> wt_locset ls -> frame_perm_freeable m sp -> exists rs', exists m', star step tge @@ -1250,10 +1343,10 @@ Lemma save_callee_save_correct: E0 (State cs fb (Vptr sp Int.zero) k rs' m') /\ (forall r, In r int_callee_save_regs -> index_int_callee_save r < b.(bound_int_callee_save) -> - index_contains_inj j m' sp (FI_saved_int (index_int_callee_save r)) (call_regs ls (R r))) + index_contains_inj j m' sp (FI_saved_int (index_int_callee_save r)) (ls (R r))) /\ (forall r, In r float_callee_save_regs -> index_float_callee_save r < b.(bound_float_callee_save) -> - index_contains_inj j m' sp (FI_saved_float (index_float_callee_save r)) (call_regs ls (R r))) + index_contains_inj j m' sp (FI_saved_float (index_float_callee_save r)) (ls (R r))) /\ (forall idx v, index_valid idx -> match idx with FI_saved_int _ => False | FI_saved_float _ => False | _ => True end -> @@ -1261,18 +1354,16 @@ Lemma save_callee_save_correct: index_contains m' sp idx v) /\ stores_in_frame sp m m' /\ frame_perm_freeable m' sp - /\ agree_regs j (call_regs ls) rs'. + /\ agree_regs j ls rs'. Proof. intros. - assert (ls_temp_undef: forall r, In r destroyed_at_move_regs -> call_regs ls (R r) = Vundef). - intros; unfold call_regs. apply pred_dec_true. -Transparent destroyed_at_move_regs. - simpl in *; intuition congruence. + assert (UNDEF: forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef). + intros. unfold ls. apply LTL_undef_regs_same. apply destroyed_by_move_at_function_entry; auto. exploit (save_callee_save_regs_correct fe_num_int_callee_save index_int_callee_save FI_saved_int Tint - j cs fb sp int_callee_save_regs (call_regs ls)). + j cs fb sp int_callee_save_regs ls). intros. apply index_int_callee_save_inj; auto. intros. simpl. split. apply Zge_le. apply index_int_callee_save_pos; auto. assumption. auto. @@ -1290,7 +1381,7 @@ Transparent destroyed_at_move_regs. fe_num_float_callee_save index_float_callee_save FI_saved_float Tfloat - j cs fb sp float_callee_save_regs (call_regs ls)). + j cs fb sp float_callee_save_regs ls). intros. apply index_float_callee_save_inj; auto. intros. simpl. split. apply Zge_le. apply index_float_callee_save_pos; auto. assumption. simpl; auto. @@ -1366,10 +1457,12 @@ Qed. saving of the used callee-save registers). *) Lemma function_prologue_correct: - forall j ls ls0 rs m1 m1' m2 sp parent ra cs fb k, + forall j ls ls0 ls1 rs rs1 m1 m1' m2 sp parent ra cs fb k, agree_regs j ls rs -> agree_callee_save ls ls0 -> wt_locset ls -> + ls1 = LTL.undef_regs destroyed_at_function_entry (LTL.call_regs ls) -> + rs1 = undef_regs destroyed_at_function_entry rs -> Mem.inject j m1 m1' -> Mem.alloc m1 0 f.(Linear.fn_stacksize) = (m2, sp) -> Val.has_type parent Tint -> Val.has_type ra Tint -> @@ -1378,16 +1471,16 @@ Lemma function_prologue_correct: /\ store_stack m2' (Vptr sp' Int.zero) Tint tf.(fn_link_ofs) parent = Some m3' /\ store_stack m3' (Vptr sp' Int.zero) Tint tf.(fn_retaddr_ofs) ra = Some m4' /\ star step tge - (State cs fb (Vptr sp' Int.zero) (save_callee_save fe k) (undef_temps rs) m4') + (State cs fb (Vptr sp' Int.zero) (save_callee_save fe k) rs1 m4') E0 (State cs fb (Vptr sp' Int.zero) k rs' m5') - /\ agree_regs j' (call_regs ls) rs' - /\ agree_frame j' (call_regs ls) ls0 m2 sp m5' sp' parent ra + /\ agree_regs j' ls1 rs' + /\ agree_frame j' ls1 ls0 m2 sp m5' sp' parent ra /\ inject_incr j j' /\ inject_separated j j' m1 m1' /\ Mem.inject j' m2 m5' /\ stores_in_frame sp' m2' m5'. Proof. - intros until k; intros AGREGS AGCS WTREGS INJ1 ALLOC TYPAR TYRA. + intros until k; intros AGREGS AGCS WTREGS LS1 RS1 INJ1 ALLOC TYPAR TYRA. rewrite unfold_transf_function. unfold fn_stacksize, fn_link_ofs, fn_retaddr_ofs. (* Allocation step *) @@ -1424,9 +1517,12 @@ Proof. assert (PERM4: frame_perm_freeable m4' sp'). red; intros. eauto with mem. exploit save_callee_save_correct. + instantiate (1 := rs1). instantiate (1 := call_regs ls). instantiate (1 := j'). + subst rs1. apply agree_regs_undef_regs. apply agree_regs_call_regs. eapply agree_regs_inject_incr; eauto. - apply wt_call_regs. auto. + apply wt_undef_regs. apply wt_call_regs. auto. eexact PERM4. + rewrite <- LS1. intros [rs' [m5' [STEPS [ICS [FCS [OTHERS [STORES [PERM5 AGREGS']]]]]]]]. (* stores in frames *) assert (SIF: stores_in_frame sp' m2' m5'). @@ -1460,15 +1556,20 @@ Proof. (* agree frame *) split. constructor; intros. (* unused regs *) - unfold call_regs. destruct (in_dec Loc.eq (R r) temporaries). - elim H. apply temporary_within_bounds; auto. - apply AGCS. apply mreg_not_within_bounds_callee_save; auto. + assert (~In r destroyed_at_call). + red; intros; elim H; apply caller_save_reg_within_bounds; auto. + rewrite LS1. rewrite LTL_undef_regs_others. unfold call_regs. + apply AGCS; auto. red; intros; elim H0. + apply destroyed_at_function_entry_caller_save; auto. (* locals *) - simpl. apply index_contains_inj_undef; auto. + rewrite LS1. rewrite LTL_undef_regs_slot. unfold call_regs. + apply index_contains_inj_undef; auto with stacking. (* outgoing *) - simpl. apply index_contains_inj_undef; auto. + rewrite LS1. rewrite LTL_undef_regs_slot. unfold call_regs. + apply index_contains_inj_undef; auto with stacking. (* incoming *) - unfold call_regs. apply AGCS. auto. + rewrite LS1. rewrite LTL_undef_regs_slot. unfold call_regs. + apply AGCS; auto. (* parent *) apply OTHERS; auto. red; auto. eapply gso_index_contains; eauto. red; auto. @@ -1478,17 +1579,17 @@ Proof. apply OTHERS; auto. red; auto. eapply gss_index_contains; eauto. red; auto. (* int callee save *) - rewrite <- AGCS. replace (ls (R r)) with (call_regs ls (R r)). - apply ICS; auto. - unfold call_regs. apply pred_dec_false. - red; intros; exploit int_callee_save_not_destroyed; eauto. - auto. + assert (~In r destroyed_at_call). + red; intros. eapply int_callee_save_not_destroyed; eauto. + exploit ICS; eauto. rewrite LS1. rewrite LTL_undef_regs_others. unfold call_regs. + rewrite AGCS; auto. + red; intros; elim H1. apply destroyed_at_function_entry_caller_save; auto. (* float callee save *) - rewrite <- AGCS. replace (ls (R r)) with (call_regs ls (R r)). - apply FCS; auto. - unfold call_regs. apply pred_dec_false. - red; intros; exploit float_callee_save_not_destroyed; eauto. - auto. + assert (~In r destroyed_at_call). + red; intros. eapply float_callee_save_not_destroyed; eauto. + exploit FCS; eauto. rewrite LS1. rewrite LTL_undef_regs_others. unfold call_regs. + rewrite AGCS; auto. + red; intros; elim H1. apply destroyed_at_function_entry_caller_save; auto. (* inj *) auto. (* inj_unique *) @@ -1502,7 +1603,7 @@ Proof. (* perms *) auto. (* wt *) - apply wt_call_regs; auto. + rewrite LS1. apply wt_undef_regs. apply wt_call_regs; auto. (* incr *) split. auto. (* separated *) @@ -1625,7 +1726,12 @@ Proof. Tint int_callee_save_regs j cs fb sp' ls0 m'); auto. - intros. unfold mreg_within_bounds. rewrite (int_callee_save_type r H1). tauto. + intros. unfold mreg_within_bounds. split; intros. + split; auto. destruct (zlt (index_float_callee_save r) 0). + generalize (bound_float_callee_save_pos b). omega. + eelim int_float_callee_save_disjoint. eauto. + eapply index_float_callee_save_pos2. eauto. auto. + destruct H2; auto. eapply agree_saved_int; eauto. apply incl_refl. apply int_callee_save_norepet. @@ -1638,7 +1744,12 @@ Proof. Tfloat float_callee_save_regs j cs fb sp' ls0 m'); auto. - intros. unfold mreg_within_bounds. rewrite (float_callee_save_type r H1). tauto. + intros. unfold mreg_within_bounds. split; intros. + split; auto. destruct (zlt (index_int_callee_save r) 0). + generalize (bound_int_callee_save_pos b). omega. + eelim int_float_callee_save_disjoint. + eapply index_int_callee_save_pos2. eauto. eauto. auto. + destruct H2; auto. eapply agree_saved_float; eauto. apply incl_refl. apply float_callee_save_norepet. @@ -1672,7 +1783,6 @@ Lemma function_epilogue_correct: E0 (State cs fb (Vptr sp' Int.zero) k rs1 m') /\ agree_regs j (return_regs ls0 ls) rs1 /\ agree_callee_save (return_regs ls0 ls) ls0 - /\ rs1 IT1 = rs IT1 /\ Mem.inject j m1 m1'. Proof. intros. @@ -1707,16 +1817,12 @@ Proof. eapply index_contains_load_stack with (idx := FI_retaddr); eauto with stacking. split. auto. split. eexact A. - split. red;intros. unfold return_regs. + split. red; intros. unfold return_regs. generalize (register_classification r) (int_callee_save_not_destroyed r) (float_callee_save_not_destroyed r); intros. - destruct (in_dec Loc.eq (R r) temporaries). + destruct (in_dec mreg_eq r destroyed_at_call). rewrite C; auto. - destruct (in_dec Loc.eq (R r) destroyed_at_call). - rewrite C; auto. - intuition. + apply B. intuition. split. apply agree_callee_save_return_regs. - split. apply C. apply int_callee_save_not_destroyed. left; simpl; auto. - apply float_callee_save_not_destroyed. left; simpl; auto. auto. Qed. @@ -1741,15 +1847,15 @@ Inductive match_stacks (j: meminj) (m m': mem): match_stacks j m m' nil nil sg bound bound' | match_stacks_cons: forall f sp ls c cs fb sp' ra c' cs' sg bound bound' trf (TAIL: is_tail c (Linear.fn_code f)) - (WTF: wt_function f) + (WTF: wt_function f = true) (FINDF: Genv.find_funct_ptr tge fb = Some (Internal trf)) (TRF: transf_function f = OK trf) (TRC: transl_code (make_env (function_bounds f)) c = c') (TY_RA: Val.has_type ra Tint) (FRM: agree_frame f j ls (parent_locset cs) m sp m' sp' (parent_sp cs') (parent_ra cs')) (ARGS: forall ofs ty, - In (S (Outgoing ofs ty)) (loc_arguments sg) -> - slot_within_bounds f (function_bounds f) (Outgoing ofs ty)) + In (S Outgoing ofs ty) (loc_arguments sg) -> + slot_within_bounds (function_bounds f) Outgoing ofs ty) (STK: match_stacks j m m' cs cs' (Linear.fn_sig f) sp sp') (BELOW: sp < bound) (BELOW': sp' < bound'), @@ -1903,8 +2009,9 @@ Lemma match_stacks_change_sig: tailcall_possible sg1 -> match_stacks j m m' cs cs' sg1 bound bound'. Proof. - induction 1; intros. econstructor; eauto. econstructor; eauto. - intros. elim (H0 _ H1). + induction 1; intros. + econstructor; eauto. + econstructor; eauto. intros. elim (H0 _ H1). Qed. (** [match_stacks] implies [match_globalenvs], which implies [meminj_preserves_globals]. *) @@ -2182,18 +2289,6 @@ Proof. rewrite symbols_preserved. auto. Qed. -Hypothesis wt_prog: wt_program prog. - -Lemma find_function_well_typed: - forall ros ls f, - Linear.find_function ge ros ls = Some f -> wt_fundef f. -Proof. - intros until f; destruct ros; simpl; unfold ge. - intro. eapply Genv.find_funct_prop; eauto. - destruct (Genv.find_symbol (Genv.globalenv prog) i); try congruence. - intro. eapply Genv.find_funct_ptr_prop; eauto. -Qed. - (** Preservation of the arguments to an external call. *) Section EXTERNAL_ARGUMENTS. @@ -2218,15 +2313,21 @@ Proof. intros. assert (loc_argument_acceptable l). apply loc_arguments_acceptable with sg; auto. destruct l; red in H0. - exists (rs m0); split. constructor. auto. - destruct s; try contradiction. - inv MS. + exists (rs r); split. constructor. auto. + destruct sl; try contradiction. + inv MS. elim (H4 _ H). - unfold parent_sp. + unfold parent_sp. + assert (slot_valid f Outgoing pos ty = true). + exploit loc_arguments_acceptable; eauto. intros [A B]. + unfold slot_valid. unfold proj_sumbool. rewrite zle_true. + destruct ty; reflexivity || congruence. omega. + assert (slot_within_bounds (function_bounds f) Outgoing pos ty). + eauto. exploit agree_outgoing; eauto. intros [v [A B]]. exists v; split. constructor. - eapply index_contains_load_stack with (idx := FI_arg z t); eauto. + eapply index_contains_load_stack with (idx := FI_arg pos ty); eauto. red in AGCS. rewrite AGCS; auto. Qed. @@ -2273,37 +2374,34 @@ Hypothesis AGF: agree_frame f j ls ls0 m sp m' sp' parent retaddr. Lemma transl_annot_param_correct: forall l, - loc_acceptable l -> - match l with S s => slot_within_bounds f b s | _ => True end -> + loc_valid f l = true -> + match l with S sl ofs ty => slot_within_bounds b sl ofs ty | _ => True end -> exists v, annot_arg rs m' (Vptr sp' Int.zero) (transl_annot_param fe l) v /\ val_inject j (ls l) v. Proof. - intros. destruct l; red in H. + intros. destruct l; simpl in H. (* reg *) - exists (rs m0); split. constructor. auto. + exists (rs r); split. constructor. auto. (* stack *) - destruct s; try contradiction. + destruct sl; try discriminate. exploit agree_locals; eauto. intros [v [A B]]. inv A. exists v; split. constructor. rewrite Zplus_0_l. auto. auto. Qed. Lemma transl_annot_params_correct: forall ll, - locs_acceptable ll -> - (forall s, In (S s) ll -> slot_within_bounds f b s) -> + forallb (loc_valid f) ll = true -> + (forall sl ofs ty, In (S sl ofs ty) ll -> slot_within_bounds b sl ofs ty) -> exists vl, annot_arguments rs m' (Vptr sp' Int.zero) (map (transl_annot_param fe) ll) vl /\ val_list_inject j (map ls ll) vl. Proof. - induction ll; intros. + induction ll; simpl; intros. exists (@nil val); split; constructor. - exploit (transl_annot_param_correct a). - apply H; auto with coqlib. - destruct a; auto with coqlib. + InvBooleans. + exploit (transl_annot_param_correct a). auto. destruct a; auto. intros [v1 [A B]]. - exploit IHll. - red; intros; apply H; auto with coqlib. - intros; apply H0; auto with coqlib. + exploit IHll. auto. auto. intros [vl [C D]]. exists (v1 :: vl); split; constructor; auto. Qed. @@ -2339,7 +2437,7 @@ Inductive match_states: Linear.state -> Mach.state -> Prop := (STACKS: match_stacks j m m' cs cs' f.(Linear.fn_sig) sp sp') (TRANSL: transf_function f = OK tf) (FIND: Genv.find_funct_ptr tge fb = Some (Internal tf)) - (WTF: wt_function f) + (WTF: wt_function f = true) (AGREGS: agree_regs j ls rs) (AGFRAME: agree_frame f j ls (parent_locset cs) m sp m' sp' (parent_sp cs') (parent_ra cs')) (TAIL: is_tail c (Linear.fn_code f)), @@ -2351,7 +2449,6 @@ Inductive match_states: Linear.state -> Mach.state -> Prop := (STACKS: match_stacks j m m' cs cs' (Linear.funsig f) (Mem.nextblock m) (Mem.nextblock m')) (TRANSL: transf_fundef f = OK tf) (FIND: Genv.find_funct_ptr tge fb = Some tf) - (WTF: wt_fundef f) (WTLS: wt_locset ls) (AGREGS: agree_regs j ls rs) (AGLOCS: agree_callee_save ls (parent_locset cs)), @@ -2372,16 +2469,21 @@ Theorem transf_step_correct: forall s1' (MS: match_states s1 s1'), exists s2', plus step tge s1' t s2' /\ match_states s2 s2'. Proof. + assert (USEWTF: forall f i c, + wt_function f = true -> is_tail (i :: c) (Linear.fn_code f) -> + wt_instr f i = true). + intros. unfold wt_function, wt_code in H. rewrite forallb_forall in H. + apply H. eapply is_tail_in; eauto. induction 1; intros; try inv MS; try rewrite transl_code_eq; - try (generalize (WTF _ (is_tail_in TAIL)); intro WTI); - try (generalize (function_is_within_bounds f WTF _ (is_tail_in TAIL)); + try (generalize (USEWTF _ _ _ WTF TAIL); intro WTI; simpl in WTI; InvBooleans); + try (generalize (function_is_within_bounds f _ (is_tail_in TAIL)); intro BOUND; simpl in BOUND); unfold transl_instr. (* Lgetstack *) - inv WTI. destruct BOUND. unfold undef_getstack; destruct sl. + destruct BOUND. unfold destroyed_by_getstack; destruct sl. (* Lgetstack, local *) exploit agree_locals; eauto. intros [v [A B]]. econstructor; split. @@ -2389,26 +2491,33 @@ Proof. eapply index_contains_load_stack; eauto. econstructor; eauto with coqlib. apply agree_regs_set_reg; auto. - apply agree_frame_set_reg; auto. simpl; rewrite <- H1. eapply agree_wt_ls; eauto. + apply agree_frame_set_reg; auto. simpl. rewrite <- H. eapply agree_wt_ls; eauto. (* Lgetstack, incoming *) - red in H2. exploit incoming_slot_in_parameters; eauto. intros IN_ARGS. - inv STACKS. elim (H6 _ IN_ARGS). + unfold slot_valid in H0. InvBooleans. + exploit incoming_slot_in_parameters; eauto. intros IN_ARGS. + inversion STACKS; clear STACKS. + elim (H7 _ IN_ARGS). + subst bound bound' s cs'. exploit agree_outgoing. eexact FRM. eapply ARGS; eauto. + exploit loc_arguments_acceptable; eauto. intros [A B]. + unfold slot_valid, proj_sumbool. rewrite zle_true. + destruct ty; reflexivity || congruence. omega. intros [v [A B]]. econstructor; split. apply plus_one. eapply exec_Mgetparam; eauto. rewrite (unfold_transf_function _ _ TRANSL). unfold fn_link_ofs. eapply index_contains_load_stack with (idx := FI_link). eapply TRANSL. eapply agree_link; eauto. simpl parent_sp. - change (offset_of_index (make_env (function_bounds f)) (FI_arg z t)) - with (offset_of_index (make_env (function_bounds f0)) (FI_arg z t)). - eapply index_contains_load_stack with (idx := FI_arg z t). eauto. eauto. + change (offset_of_index (make_env (function_bounds f)) (FI_arg ofs ty)) + with (offset_of_index (make_env (function_bounds f0)) (FI_arg ofs ty)). + eapply index_contains_load_stack with (idx := FI_arg ofs ty). eauto. eauto. exploit agree_incoming; eauto. intros EQ; simpl in EQ. econstructor; eauto with coqlib. econstructor; eauto. apply agree_regs_set_reg. apply agree_regs_set_reg. auto. auto. congruence. eapply agree_frame_set_reg; eauto. eapply agree_frame_set_reg; eauto. - apply temporary_within_bounds. simpl; auto. - simpl; auto. simpl; rewrite <- H1. eapply agree_wt_ls; eauto. + apply caller_save_reg_within_bounds. + apply temp_for_parent_frame_caller_save. + subst ty. simpl. eapply agree_wt_ls; eauto. (* Lgetstack, outgoing *) exploit agree_outgoing; eauto. intros [v [A B]]. econstructor; split. @@ -2416,14 +2525,13 @@ Proof. eapply index_contains_load_stack; eauto. econstructor; eauto with coqlib. apply agree_regs_set_reg; auto. - apply agree_frame_set_reg; auto. simpl; rewrite <- H1; eapply agree_wt_ls; eauto. + apply agree_frame_set_reg; auto. simpl; rewrite <- H; eapply agree_wt_ls; eauto. (* Lsetstack *) - inv WTI. set (idx := match sl with - | Local ofs ty => FI_local ofs ty - | Incoming ofs ty => FI_link (*dummy*) - | Outgoing ofs ty => FI_arg ofs ty + | Local => FI_local ofs ty + | Incoming => FI_link (*dummy*) + | Outgoing => FI_arg ofs ty end). assert (index_valid f idx). unfold idx; destruct sl. @@ -2431,13 +2539,13 @@ Proof. red; auto. apply index_arg_valid; auto. exploit store_index_succeeds; eauto. eapply agree_perm; eauto. - instantiate (1 := rs0 r). intros [m1' STORE]. + instantiate (1 := rs0 src). intros [m1' STORE]. econstructor; split. - apply plus_one. destruct sl; simpl in H3. - econstructor. eapply store_stack_succeeds with (idx := idx); eauto. - contradiction. - econstructor. eapply store_stack_succeeds with (idx := idx); eauto. - econstructor; eauto with coqlib. + apply plus_one. destruct sl; simpl in H0. + econstructor. eapply store_stack_succeeds with (idx := idx); eauto. eauto. + discriminate. + econstructor. eapply store_stack_succeeds with (idx := idx); eauto. auto. + econstructor. eapply Mem.store_outside_inject; eauto. intros. exploit agree_inj_unique; eauto. intros [EQ1 EQ2]; subst b' delta. rewrite size_type_chunk in H5. @@ -2446,20 +2554,30 @@ Proof. omega. apply match_stacks_change_mach_mem with m'; auto. eauto with mem. eauto with mem. intros. rewrite <- H4; eapply Mem.load_store_other; eauto. left; unfold block; omega. - apply agree_regs_set_slot. apply agree_regs_undef_setstack; auto. + eauto. eauto. auto. + apply agree_regs_set_slot. apply agree_regs_undef_regs; auto. destruct sl. - eapply agree_frame_set_local. eapply agree_frame_undef_setstack; eauto. auto. auto. - simpl in H1; rewrite H1; eapply agree_wt_ls; eauto. auto. - simpl in H3; contradiction. - eapply agree_frame_set_outgoing. apply agree_frame_undef_setstack; eauto. auto. auto. - simpl in H1; rewrite H1; eapply agree_wt_ls; eauto. auto. + eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto. + apply destroyed_by_op_caller_save. auto. auto. apply AGREGS. + rewrite H; eapply agree_wt_ls; eauto. + assumption. + simpl in H0; discriminate. + eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto. + apply destroyed_by_op_caller_save. auto. auto. apply AGREGS. + rewrite H; eapply agree_wt_ls; eauto. + assumption. + eauto with coqlib. (* Lop *) assert (Val.has_type v (mreg_type res)). - inv WTI. simpl in H. inv H. rewrite <- H1. eapply agree_wt_ls; eauto. + destruct (is_move_operation op args) as [arg|] eqn:?. + exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst op args. + InvBooleans. simpl in H. inv H. rewrite <- H0. eapply agree_wt_ls; eauto. replace (mreg_type res) with (snd (type_of_operation op)). - eapply type_of_operation_sound; eauto. - rewrite <- H4; auto. + eapply type_of_operation_sound; eauto. + red; intros. subst op. simpl in Heqo. + destruct args; simpl in H. discriminate. destruct args. discriminate. simpl in H. discriminate. + destruct (type_of_operation op) as [targs tres]. InvBooleans. auto. assert (exists v', eval_operation ge (Vptr sp' Int.zero) (transl_op (make_env (function_bounds f)) op) rs0##args m' = Some v' /\ val_inject j v v'). @@ -2468,12 +2586,14 @@ Proof. eapply agree_inj; eauto. eapply agree_reglist; eauto. destruct H1 as [v' [A B]]. econstructor; split. - apply plus_one. constructor. + apply plus_one. econstructor. instantiate (1 := v'). rewrite <- A. apply eval_operation_preserved. - exact symbols_preserved. + exact symbols_preserved. eauto. econstructor; eauto with coqlib. - apply agree_regs_set_reg; auto. apply agree_regs_undef_op; auto. - apply agree_frame_set_reg; auto. apply agree_frame_undef_op; auto. + apply agree_regs_set_reg; auto. + rewrite transl_destroyed_by_op. apply agree_regs_undef_regs; auto. + apply agree_frame_set_reg; auto. apply agree_frame_undef_locs; auto. + apply destroyed_by_op_caller_save. (* Lload *) assert (exists a', @@ -2482,17 +2602,17 @@ Proof. eapply eval_addressing_inject; eauto. eapply match_stacks_preserves_globals; eauto. eapply agree_inj; eauto. eapply agree_reglist; eauto. - destruct H1 as [a' [A B]]. + destruct H2 as [a' [A B]]. exploit Mem.loadv_inject; eauto. intros [v' [C D]]. econstructor; split. apply plus_one. econstructor. instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved. - eexact C. + eexact C. eauto. econstructor; eauto with coqlib. - apply agree_regs_set_reg; auto. apply agree_regs_undef_temps; auto. - apply agree_frame_set_reg; auto. apply agree_frame_undef_temps; auto. - simpl. inv WTI. rewrite H6. - inv B; simpl in H0; try discriminate. eapply Mem.load_type; eauto. + apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto. + apply agree_frame_set_reg. apply agree_frame_undef_locs; auto. + apply destroyed_by_load_caller_save. auto. + simpl. rewrite H1. destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto. (* Lstore *) assert (exists a', @@ -2506,12 +2626,14 @@ Proof. econstructor; split. apply plus_one. econstructor. instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved. - eexact C. + eexact C. eauto. econstructor; eauto with coqlib. eapply match_stacks_parallel_stores. eexact MINJ. eexact B. eauto. eauto. auto. - apply agree_regs_undef_temps; auto. - apply agree_frame_undef_temps; auto. + rewrite transl_destroyed_by_store. + apply agree_regs_undef_regs; auto. + apply agree_frame_undef_locs; auto. eapply agree_frame_parallel_stores; eauto. + apply destroyed_by_store_caller_save. (* Lcall *) exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]]. @@ -2524,84 +2646,80 @@ Proof. econstructor; eauto. econstructor; eauto with coqlib. simpl; auto. - intros; red. split. - generalize (loc_arguments_acceptable _ _ H0). simpl. omega. + intros; red. apply Zle_trans with (size_arguments (Linear.funsig f')); auto. apply loc_arguments_bounded; auto. eapply agree_valid_linear; eauto. eapply agree_valid_mach; eauto. - eapply find_function_well_typed; eauto. eapply agree_wt_ls; eauto. simpl; red; auto. (* Ltailcall *) - exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]]. exploit function_epilogue_correct; eauto. - intros [rs1 [m1' [P [Q [R [S [T [U [V W]]]]]]]]]. + intros [rs1 [m1' [P [Q [R [S [T [U V]]]]]]]]. + exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]]. econstructor; split. - eapply plus_right. eexact S. econstructor; eauto. - replace (find_function_ptr tge ros rs1) - with (find_function_ptr tge ros rs0). eauto. - destruct ros; simpl; auto. inv WTI. rewrite V; auto. - traceEq. + eapply plus_right. eexact S. econstructor; eauto. traceEq. econstructor; eauto. - inv WTI. apply match_stacks_change_sig with (Linear.fn_sig f); auto. + apply match_stacks_change_sig with (Linear.fn_sig f); auto. apply match_stacks_change_bounds with stk sp'. apply match_stacks_change_linear_mem with m. apply match_stacks_change_mach_mem with m'0. auto. eauto with mem. intros. eapply Mem.perm_free_1; eauto. left; unfold block; omega. - intros. rewrite <- H2. eapply Mem.load_free; eauto. left; unfold block; omega. + intros. rewrite <- H3. eapply Mem.load_free; eauto. left; unfold block; omega. eauto with mem. intros. eapply Mem.perm_free_3; eauto. apply Zlt_le_weak. change (Mem.valid_block m' stk). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_linear; eauto. apply Zlt_le_weak. change (Mem.valid_block m1' sp'). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_mach; eauto. - eapply find_function_well_typed; eauto. + apply zero_size_arguments_tailcall_possible; auto. apply wt_return_regs; auto. eapply match_stacks_wt_locset; eauto. eapply agree_wt_ls; eauto. (* Lbuiltin *) - exploit external_call_mem_inject; eauto. + exploit external_call_mem_inject'; eauto. eapply match_stacks_preserves_globals; eauto. eapply agree_reglist; eauto. intros [j' [res' [m1' [A [B [C [D [E [F G]]]]]]]]]. econstructor; split. apply plus_one. econstructor; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto with coqlib. + inversion H; inversion A; subst. eapply match_stack_change_extcall; eauto. apply Zlt_le_weak. change (Mem.valid_block m sp0). eapply agree_valid_linear; eauto. apply Zlt_le_weak. change (Mem.valid_block m'0 sp'). eapply agree_valid_mach; eauto. - apply agree_regs_set_reg; auto. apply agree_regs_undef_temps; auto. eapply agree_regs_inject_incr; eauto. - apply agree_frame_set_reg; auto. apply agree_frame_undef_temps; auto. + apply agree_regs_set_regs; auto. apply agree_regs_undef_regs; auto. eapply agree_regs_inject_incr; eauto. + apply agree_frame_set_regs; auto. apply agree_frame_undef_regs; auto. eapply agree_frame_inject_incr; eauto. apply agree_frame_extcall_invariant with m m'0; auto. - eapply external_call_valid_block; eauto. - intros. eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto. - eapply external_call_valid_block; eauto. + eapply external_call_valid_block'; eauto. + intros. inv H; eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto. + eapply external_call_valid_block'; eauto. eapply agree_valid_mach; eauto. - inv WTI. simpl; rewrite H4. eapply external_call_well_typed; eauto. + simpl. rewrite list_map_compose. + change (fun x => Loc.type (R x)) with mreg_type. + rewrite H0. eapply external_call_well_typed'; eauto. (* Lannot *) - inv WTI. exploit transl_annot_params_correct; eauto. intros [vargs' [P Q]]. - exploit external_call_mem_inject; eauto. + exploit external_call_mem_inject'; eauto. eapply match_stacks_preserves_globals; eauto. intros [j' [res' [m1' [A [B [C [D [E [F G]]]]]]]]]. econstructor; split. apply plus_one. econstructor; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto with coqlib. - eapply match_stack_change_extcall; eauto. + inv H; inv A. eapply match_stack_change_extcall; eauto. apply Zlt_le_weak. change (Mem.valid_block m sp0). eapply agree_valid_linear; eauto. apply Zlt_le_weak. change (Mem.valid_block m'0 sp'). eapply agree_valid_mach; eauto. eapply agree_regs_inject_incr; eauto. eapply agree_frame_inject_incr; eauto. apply agree_frame_extcall_invariant with m m'0; auto. - eapply external_call_valid_block; eauto. - intros. eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto. - eapply external_call_valid_block; eauto. + eapply external_call_valid_block'; eauto. + intros. inv H; eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto. + eapply external_call_valid_block'; eauto. eapply agree_valid_mach; eauto. (* Llabel *) @@ -2620,19 +2738,20 @@ Proof. econstructor; split. apply plus_one. eapply exec_Mcond_true; eauto. eapply eval_condition_inject; eauto. eapply agree_reglist; eauto. - eapply transl_find_label; eauto. - econstructor; eauto with coqlib. - apply agree_regs_undef_temps; auto. - apply agree_frame_undef_temps; auto. + eapply transl_find_label; eauto. + econstructor. eauto. eauto. eauto. eauto. eauto. + apply agree_regs_undef_regs; auto. + apply agree_frame_undef_locs; auto. apply destroyed_by_cond_caller_save. eapply find_label_tail; eauto. (* Lcond, false *) econstructor; split. apply plus_one. eapply exec_Mcond_false; eauto. eapply eval_condition_inject; eauto. eapply agree_reglist; eauto. - econstructor; eauto with coqlib. - apply agree_regs_undef_temps; auto. - apply agree_frame_undef_temps; auto. + econstructor. eauto. eauto. eauto. eauto. eauto. + apply agree_regs_undef_regs; auto. + apply agree_frame_undef_locs; auto. apply destroyed_by_cond_caller_save. + eauto with coqlib. (* Ljumptable *) assert (rs0 arg = Vint n). @@ -2640,14 +2759,14 @@ Proof. econstructor; split. apply plus_one; eapply exec_Mjumptable; eauto. apply transl_find_label; eauto. - econstructor; eauto. - apply agree_regs_undef_temps; auto. - apply agree_frame_undef_temps; auto. + econstructor. eauto. eauto. eauto. eauto. eauto. + apply agree_regs_undef_regs; auto. + apply agree_frame_undef_locs; auto. apply destroyed_by_jumptable_caller_save. eapply find_label_tail; eauto. (* Lreturn *) exploit function_epilogue_correct; eauto. - intros [rs1 [m1' [P [Q [R [S [T [U [V W]]]]]]]]]. + intros [rs1 [m1' [P [Q [R [S [T [U V]]]]]]]]. econstructor; split. eapply plus_right. eexact S. econstructor; eauto. traceEq. @@ -2667,7 +2786,6 @@ Proof. revert TRANSL. unfold transf_fundef, transf_partial_fundef. caseEq (transf_function f); simpl; try congruence. intros tfn TRANSL EQ. inversion EQ; clear EQ; subst tf. - inversion WTF as [|f' WTFN]. subst f'. exploit function_prologue_correct; eauto. eapply match_stacks_type_sp; eauto. eapply match_stacks_type_retaddr; eauto. @@ -2689,30 +2807,28 @@ Proof. intros. eapply stores_in_frame_perm; eauto with mem. intros. rewrite <- H1. transitivity (Mem.load chunk m2' b ofs). eapply stores_in_frame_contents; eauto. eapply Mem.load_alloc_unchanged; eauto. red. congruence. + eapply transf_function_well_typed; eauto. auto with coqlib. (* external function *) simpl in TRANSL. inversion TRANSL; subst tf. - inversion WTF. subst ef0. exploit transl_external_arguments; eauto. intros [vl [ARGS VINJ]]. - exploit external_call_mem_inject; eauto. + exploit external_call_mem_inject'; eauto. eapply match_stacks_preserves_globals; eauto. intros [j' [res' [m1' [A [B [C [D [E [F G]]]]]]]]]. econstructor; split. apply plus_one. eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. apply match_stacks_change_bounds with (Mem.nextblock m) (Mem.nextblock m'0). - eapply match_stack_change_extcall; eauto. omega. omega. - exploit external_call_valid_block. eexact H. + inv H0; inv A. eapply match_stack_change_extcall; eauto. omega. omega. + exploit external_call_valid_block'. eexact H0. instantiate (1 := (Mem.nextblock m - 1)). red; omega. unfold Mem.valid_block; omega. - exploit external_call_valid_block. eexact A. + exploit external_call_valid_block'. eexact A. instantiate (1 := (Mem.nextblock m'0 - 1)). red; omega. unfold Mem.valid_block; omega. - apply wt_setloc; auto. simpl. rewrite loc_result_type. - change (Val.has_type res (proj_sig_res (ef_sig ef))). - eapply external_call_well_typed; eauto. - apply agree_regs_set_reg; auto. apply agree_regs_inject_incr with j; auto. + inv H0. apply wt_setlist_result. eapply external_call_well_typed; eauto. auto. + apply agree_regs_set_regs; auto. apply agree_regs_inject_incr with j; auto. apply agree_callee_save_set_result; auto. (* return *) @@ -2745,8 +2861,6 @@ Proof. intros. change (Mem.valid_block m0 b0). eapply Genv.find_funct_ptr_not_fresh; eauto. intros. change (Mem.valid_block m0 b0). eapply Genv.find_var_info_not_fresh; eauto. rewrite H3. red; intros. contradiction. - eapply Genv.find_funct_ptr_prop. eexact wt_prog. - fold ge0; eauto. apply wt_init. unfold Locmap.init. red; intros; auto. unfold parent_locset. red; auto. @@ -2757,9 +2871,8 @@ Lemma transf_final_states: match_states st1 st2 -> Linear.final_state st1 r -> Mach.final_state st2 r. Proof. intros. inv H0. inv H. inv STACKS. - constructor. - set (rres := loc_result {| sig_args := nil; sig_res := Some Tint |}) in *. - generalize (AGREGS rres). rewrite H1. intros IJ; inv IJ. auto. + generalize (AGREGS r0). rewrite H2. intros A; inv A. + econstructor; eauto. Qed. Theorem transf_program_correct: diff --git a/backend/Tunneling.v b/backend/Tunneling.v index 18414a8..bdc8117 100644 --- a/backend/Tunneling.v +++ b/backend/Tunneling.v @@ -64,9 +64,9 @@ Require Import LTL. Module U := UnionFind.UF(PTree). -Definition record_goto (uf: U.t) (pc: node) (i: instruction) : U.t := - match i with - | Lnop s => U.union uf pc s +Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t := + match b with + | Lbranch s :: _ => U.union uf pc s | _ => uf end. @@ -77,37 +77,23 @@ Definition record_gotos (f: LTL.function) : U.t := successor [s] of every instruction by the canonical representative of its equivalence class in the union-find data structure. *) -Definition tunnel_instr (uf: U.t) (b: instruction) : instruction := - match b with - | Lnop s => - Lnop (U.repr uf s) - | Lop op args res s => - Lop op args res (U.repr uf s) - | Lload chunk addr args dst s => - Lload chunk addr args dst (U.repr uf s) - | Lstore chunk addr args src s => - Lstore chunk addr args src (U.repr uf s) - | Lcall sig ros args res s => - Lcall sig ros args res (U.repr uf s) - | Ltailcall sig ros args => - Ltailcall sig ros args - | Lbuiltin ef args res s => - Lbuiltin ef args res (U.repr uf s) - | Lcond cond args s1 s2 => - Lcond cond args (U.repr uf s1) (U.repr uf s2) - | Ljumptable arg tbl => - Ljumptable arg (List.map (U.repr uf) tbl) - | Lreturn or => - Lreturn or +Definition tunnel_instr (uf: U.t) (i: instruction) : instruction := + match i with + | Lbranch s => Lbranch (U.repr uf s) + | Lcond cond args s1 s2 => Lcond cond args (U.repr uf s1) (U.repr uf s2) + | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl) + | _ => i end. +Definition tunnel_block (uf: U.t) (b: bblock) : bblock := + List.map (tunnel_instr uf) b. + Definition tunnel_function (f: LTL.function) : LTL.function := let uf := record_gotos f in mkfunction (fn_sig f) - (fn_params f) (fn_stacksize f) - (PTree.map (fun pc b => tunnel_instr uf b) (fn_code f)) + (PTree.map1 (tunnel_block uf) (fn_code f)) (U.repr uf (fn_entrypoint f)). Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef := diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v index d589260..d02cb2e 100644 --- a/backend/Tunnelingproof.v +++ b/backend/Tunnelingproof.v @@ -36,16 +36,16 @@ Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat := else if peq (U.repr u x) pc then (f x + f s + 1)%nat else f x. -Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (i: instruction) : U.t * (node -> nat) := - match i with - | Lnop s => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f) +Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) := + match b with + | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f) | _ => uf end. Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop := forall pc, match c!pc with - | Some(Lnop s) => + | Some(Lbranch s :: b) => U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat | _ => U.repr (fst uf) pc = pc @@ -65,21 +65,21 @@ Proof. red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty. (* inductive case *) - intros m uf pc i; intros. destruct uf as [u f]. + intros m uf pc bb; intros. destruct uf as [u f]. assert (PC: U.repr u pc = pc). generalize (H1 pc). rewrite H. auto. - assert (record_goto' (u, f) pc i = (u, f) - \/ exists s, i = Lnop s /\ record_goto' (u, f) pc i = (U.union u pc s, measure_edge u pc s f)). - unfold record_goto'; simpl. destruct i; auto. right. exists n; auto. - destruct H2 as [B | [s [EQ B]]]. + assert (record_goto' (u, f) pc bb = (u, f) + \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)). + unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto. + destruct H2 as [B | [s [bb' [EQ B]]]]. (* u and f are unchanged *) rewrite B. red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. - destruct i; auto. + destruct bb; auto. destruct i; auto. apply H1. -(* i is Lnop s, u becomes union u pc s, f becomes measure_edge u pc s f *) +(* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *) rewrite B. red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ. @@ -91,11 +91,11 @@ Proof. (* An old instruction *) assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc'). intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. - generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct i0; auto. + generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto. intros [P | [P Q]]. left; auto. right. split. apply U.sameclass_union_2. auto. unfold measure_edge. destruct (peq (U.repr u s) pc). auto. - rewrite P. destruct (peq (U.repr u n0) pc). omega. auto. + rewrite P. destruct (peq (U.repr u s0) pc). omega. auto. Qed. Definition record_gotos' (f: function) := @@ -110,7 +110,7 @@ Proof. induction l; intros; simpl. auto. unfold record_goto' at 2. unfold record_goto at 2. - destruct (snd a); apply IHl. + destruct (snd a). apply IHl. destruct i; apply IHl. Qed. Definition branch_target (f: function) (pc: node) : node := @@ -122,7 +122,7 @@ Definition count_gotos (f: function) (pc: node) : nat := Theorem record_gotos_correct: forall f pc, match f.(fn_code)!pc with - | Some(Lnop s) => + | Some(Lbranch s :: b) => branch_target f pc = pc \/ (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat | _ => branch_target f pc = pc @@ -204,15 +204,18 @@ Qed. the values of locations and the memory states are identical in the original and transformed codes. *) +Definition tunneled_block (f: function) (b: bblock) := + tunnel_block (record_gotos f) b. + Definition tunneled_code (f: function) := - PTree.map (fun pc b => tunnel_instr (record_gotos f) b) (fn_code f). + PTree.map1 (tunneled_block f) (fn_code f). Inductive match_stackframes: stackframe -> stackframe -> Prop := | match_stackframes_intro: - forall res f sp ls0 pc, + forall f sp ls0 bb, match_stackframes - (Stackframe res f sp ls0 pc) - (Stackframe res (tunnel_function f) sp ls0 (branch_target f pc)). + (Stackframe f sp ls0 bb) + (Stackframe (tunnel_function f) sp ls0 (tunneled_block f bb)). Inductive match_states: state -> state -> Prop := | match_states_intro: @@ -220,6 +223,16 @@ Inductive match_states: state -> state -> Prop := list_forall2 match_stackframes s ts -> match_states (State s f sp pc ls m) (State ts (tunnel_function f) sp (branch_target f pc) ls m) + | match_states_block: + forall s f sp bb ls m ts, + list_forall2 match_stackframes s ts -> + match_states (Block s f sp bb ls m) + (Block ts (tunnel_function f) sp (tunneled_block f bb) ls m) + | match_states_interm: + forall s f sp pc bb ls m ts, + list_forall2 match_stackframes s ts -> + match_states (Block s f sp (Lbranch pc :: bb) ls m) + (State ts (tunnel_function f) sp (branch_target f pc) ls m) | match_states_call: forall s f ls m ts, list_forall2 match_stackframes s ts -> @@ -238,17 +251,19 @@ Inductive match_states: state -> state -> Prop := Definition measure (st: state) : nat := match st with - | State s f sp pc ls m => count_gotos f pc + | State s f sp pc ls m => (count_gotos f pc * 2)%nat + | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat + | Block s f sp bb ls m => 0%nat | Callstate s f ls m => 0%nat | Returnstate s ls m => 0%nat end. -Lemma tunnel_function_lookup: - forall f pc i, - f.(fn_code)!pc = Some i -> - (tunnel_function f).(fn_code)!pc = Some (tunnel_instr (record_gotos f) i). +Lemma match_parent_locset: + forall s ts, + list_forall2 match_stackframes s ts -> + parent_locset ts = parent_locset s. Proof. - intros. simpl. rewrite PTree.gmap. rewrite H. auto. + induction 1; simpl. auto. inv H; auto. Qed. Lemma tunnel_step_correct: @@ -258,98 +273,113 @@ Lemma tunnel_step_correct: \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. Proof. induction 1; intros; try inv MS. - (* Lnop *) - generalize (record_gotos_correct f pc); rewrite H. - intros [A | [B C]]. - left; econstructor; split. - eapply exec_Lnop. rewrite A. - rewrite (tunnel_function_lookup _ _ _ H); simpl; auto. - econstructor; eauto. - right. split. simpl. auto. split. auto. - rewrite B. econstructor; eauto. + + (* entering a block *) + assert (DEFAULT: branch_target f pc = pc -> + (exists st2' : state, + step tge (State ts (tunnel_function f) sp (branch_target f pc) rs m) E0 st2' + /\ match_states (Block s f sp bb rs m) st2')). + intros. rewrite H0. econstructor; split. + econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto. + econstructor; eauto. + + generalize (record_gotos_correct f pc). rewrite H. + destruct bb; auto. destruct i; auto. + intros [A | [B C]]. auto. + right. split. simpl. omega. + split. auto. + rewrite B. econstructor; eauto. + (* Lop *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + left; simpl; econstructor; split. eapply exec_Lop with (v := v); eauto. - rewrite (tunnel_function_lookup _ _ _ H); simpl; auto. - rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved. + rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. econstructor; eauto. (* Lload *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + left; simpl; econstructor; split. eapply exec_Lload with (a := a). - rewrite (tunnel_function_lookup _ _ _ H); simpl; auto. - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eauto. + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + eauto. eauto. + econstructor; eauto. + (* Lgetstack *) + left; simpl; econstructor; split. + econstructor; eauto. + econstructor; eauto. + (* Lsetstack *) + left; simpl; econstructor; split. + econstructor; eauto. econstructor; eauto. (* Lstore *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + left; simpl; econstructor; split. eapply exec_Lstore with (a := a). - rewrite (tunnel_function_lookup _ _ _ H); simpl; auto. - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eauto. + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + eauto. eauto. econstructor; eauto. (* Lcall *) - generalize (record_gotos_correct f pc); rewrite H; intro A. - left; econstructor; split. - eapply exec_Lcall with (f' := tunnel_fundef f'); eauto. - rewrite A. rewrite (tunnel_function_lookup _ _ _ H); simpl. - rewrite sig_preserved. auto. + left; simpl; econstructor; split. + eapply exec_Lcall with (fd := tunnel_fundef fd); eauto. apply find_function_translated; auto. + rewrite sig_preserved. auto. econstructor; eauto. constructor; auto. constructor; auto. (* Ltailcall *) - generalize (record_gotos_correct f pc); rewrite H; intro A. - left; econstructor; split. - eapply exec_Ltailcall with (f' := tunnel_fundef f'); eauto. - rewrite A. rewrite (tunnel_function_lookup _ _ _ H); simpl. - rewrite sig_preserved. auto. + left; simpl; econstructor; split. + eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto. + erewrite match_parent_locset; eauto. apply find_function_translated; auto. + apply sig_preserved. + erewrite <- match_parent_locset; eauto. econstructor; eauto. (* Lbuiltin *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + left; simpl; econstructor; split. eapply exec_Lbuiltin; eauto. - rewrite (tunnel_function_lookup _ _ _ H); simpl; auto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. - (* cond *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + (* Lannot *) + left; simpl; econstructor; split. + eapply exec_Lannot; eauto. + eapply external_call_symbols_preserved'; eauto. + exact symbols_preserved. exact varinfo_preserved. + econstructor; eauto. + + (* Lbranch (preserved) *) + left; simpl; econstructor; split. + eapply exec_Lbranch; eauto. + fold (branch_target f pc). econstructor; eauto. + (* Lbranch (eliminated) *) + right; split. simpl. omega. split. auto. constructor; auto. + + (* Lcond *) + left; simpl; econstructor; split. eapply exec_Lcond; eauto. - rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto. destruct b; econstructor; eauto. - (* jumptable *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + (* Ljumptable *) + left; simpl; econstructor; split. eapply exec_Ljumptable. - rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto. - eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H1. reflexivity. + eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto. econstructor; eauto. - (* return *) - generalize (record_gotos_correct f pc); rewrite H; intro A; rewrite A. - left; econstructor; split. + (* Lreturn *) + left; simpl; econstructor; split. eapply exec_Lreturn; eauto. - rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto. - simpl. constructor; auto. + erewrite <- match_parent_locset; eauto. + constructor; auto. (* internal function *) - simpl. left; econstructor; split. + left; simpl; econstructor; split. eapply exec_function_internal; eauto. simpl. econstructor; eauto. (* external function *) - simpl. left; econstructor; split. + left; simpl; econstructor; split. eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. simpl. econstructor; eauto. (* return *) inv H3. inv H1. left; econstructor; split. eapply exec_return; eauto. - constructor. auto. + constructor; auto. Qed. Lemma transf_initial_states: @@ -357,7 +387,7 @@ Lemma transf_initial_states: exists st2, initial_state tprog st2 /\ match_states st1 st2. Proof. intros. inversion H. - exists (Callstate nil (tunnel_fundef f) nil m0); split. + exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split. econstructor; eauto. apply Genv.init_mem_transf; auto. change (prog_main tprog) with (prog_main prog). @@ -371,7 +401,7 @@ Lemma transf_final_states: forall st1 st2 r, match_states st1 st2 -> final_state st1 r -> final_state st2 r. Proof. - intros. inv H0. inv H. inv H4. constructor. + intros. inv H0. inv H. inv H6. econstructor; eauto. Qed. Theorem transf_program_correct: diff --git a/backend/Tunnelingtyping.v b/backend/Tunnelingtyping.v deleted file mode 100644 index dfc36b6..0000000 --- a/backend/Tunnelingtyping.v +++ /dev/null @@ -1,103 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Type preservation for the Tunneling pass *) - -Require Import Coqlib. -Require Import Maps. -Require Import AST. -Require Import LTL. -Require Import LTLtyping. -Require Import Tunneling. -Require Import Tunnelingproof. - -(** Tunneling preserves typing. *) - -Lemma branch_target_valid_1: - forall f pc, wt_function f -> - valid_successor f pc -> - valid_successor f (branch_target f pc). -Proof. - intros. - assert (forall n p, - (count_gotos f p < n)%nat -> - valid_successor f p -> - valid_successor f (branch_target f p)). - induction n; intros. - omegaContradiction. - elim H2; intros i EQ. - generalize (record_gotos_correct f p). rewrite EQ. - destruct i; try congruence. - intros [A | [B C]]. congruence. - generalize (wt_instrs _ H _ _ EQ); intro WTI; inv WTI. - rewrite B. apply IHn. omega. auto. - - apply H1 with (Datatypes.S (count_gotos f pc)); auto. -Qed. - -Lemma tunnel_valid_successor: - forall f pc, - valid_successor f pc -> valid_successor (tunnel_function f) pc. -Proof. - intros. destruct H as [i AT]. - unfold valid_successor; simpl. rewrite PTree.gmap. rewrite AT. - simpl. exists (tunnel_instr (record_gotos f) i); auto. -Qed. - -Lemma branch_target_valid: - forall f pc, - wt_function f -> - valid_successor f pc -> - valid_successor (tunnel_function f) (branch_target f pc). -Proof. - intros. apply tunnel_valid_successor. apply branch_target_valid_1; auto. -Qed. - -Lemma wt_tunnel_instr: - forall f i, - wt_function f -> - wt_instr f i -> wt_instr (tunnel_function f) (tunnel_instr (record_gotos f) i). -Proof. - intros; inv H0; simpl; econstructor; eauto; - try (eapply branch_target_valid; eauto). - intros. exploit list_in_map_inv; eauto. intros [x [A B]]. subst lbl. - eapply branch_target_valid; eauto. - rewrite list_length_z_map. auto. -Qed. - -Lemma wt_tunnel_function: - forall f, wt_function f -> wt_function (tunnel_function f). -Proof. - intros. inversion H. constructor; simpl; auto. - intros until instr. rewrite PTree.gmap. unfold option_map. - caseEq (fn_code f)!pc. intros b0 AT EQ. inv EQ. - apply wt_tunnel_instr; eauto. - congruence. - eapply branch_target_valid; eauto. -Qed. - -Lemma wt_tunnel_fundef: - forall f, wt_fundef f -> wt_fundef (tunnel_fundef f). -Proof. - intros. inversion H; simpl. constructor; auto. - constructor. apply wt_tunnel_function; auto. -Qed. - -Lemma program_typing_preserved: - forall (p: LTL.program), - wt_program p -> wt_program (tunnel_program p). -Proof. - intros; red; intros. - generalize (transform_program_function tunnel_fundef p i f H0). - intros [f0 [IN TRANSF]]. - subst f. apply wt_tunnel_fundef. eauto. -Qed. diff --git a/backend/XTL.ml b/backend/XTL.ml new file mode 100644 index 0000000..93cab36 --- /dev/null +++ b/backend/XTL.ml @@ -0,0 +1,213 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** The XTL intermediate language for register allocation *) + +open Datatypes +open Camlcoq +open Maps +open AST +open Registers +open Op +open Locations + +type var = V of reg * typ | L of loc + +type node = P.t + +type instruction = + | Xmove of var * var + | Xreload of var * var + | Xspill of var * var + | Xparmove of var list * var list * var * var + | Xop of operation * var list * var + | Xload of memory_chunk * addressing * var list * var + | Xstore of memory_chunk * addressing * var list * var + | Xcall of signature * (var, ident) sum * var list * var list + | Xtailcall of signature * (var, ident) sum * var list + | Xbuiltin of external_function * var list * var list + | Xbranch of node + | Xcond of condition * var list * node * node + | Xjumptable of var * node list + | Xreturn of var list + +type block = instruction list + (* terminated by one of Xbranch, Xcond, Xjumptable, Xtailcall or Xreturn *) + +type code = block PTree.t + (* mapping node -> block *) + +type xfunction = { + fn_sig: signature; + fn_stacksize: Z.t; + fn_code: code; + fn_entrypoint: node +} + +(* Type of a variable *) + +let typeof = function V(_, ty) -> ty | L l -> Loc.coq_type l + +(* Constructors for type [var] *) + +let vloc l = L l +let vlocs ll = List.map vloc ll +let vmreg mr = L(R mr) +let vmregs mrl = List.map vmreg mrl + +(* Sets of variables *) + +module VSet = Set.Make(struct type t = var let compare = compare end) + +(*** Generation of fresh registers and fresh register variables *) + +let next_temp = ref P.one + +let twin_table : (int32, P.t) Hashtbl.t = Hashtbl.create 27 + +let reset_temps () = + next_temp := P.one; Hashtbl.clear twin_table + +let new_reg() = + let r = !next_temp in next_temp := P.succ !next_temp; r + +let new_temp ty = V (new_reg(), ty) + +let twin_reg r = + let r = P.to_int32 r in + try + Hashtbl.find twin_table r + with Not_found -> + let t = new_reg() in Hashtbl.add twin_table r t; t + +(*** Successors (for dataflow analysis) *) + +let rec successors_block = function + | Xbranch s :: _ -> [s] + | Xtailcall(sg, vos, args) :: _ -> [] + | Xcond(cond, args, s1, s2) :: _ -> [s1; s2] + | Xjumptable(arg, tbl) :: _ -> tbl + | Xreturn _:: _ -> [] + | instr :: blk -> successors_block blk + | [] -> assert false + +let successors fn = + PTree.map1 successors_block fn.fn_code + +(**** Type checking for XTL *) + +exception Type_error + +exception Type_error_at of node + +let set_var_type v ty = + if typeof v <> ty then raise Type_error + +let rec set_vars_type vl tyl = + match vl, tyl with + | [], [] -> () + | v1 :: vl, ty1 :: tyl -> set_var_type v1 ty1; set_vars_type vl tyl + | _, _ -> raise Type_error + +let unify_var_type v1 v2 = + if typeof v1 <> typeof v2 then raise Type_error + +let type_instr = function + | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) -> + unify_var_type src dst + | Xparmove(srcs, dsts, itmp, ftmp) -> + List.iter2 unify_var_type srcs dsts; + set_var_type itmp Tint; + set_var_type ftmp Tfloat + | Xop(op, args, res) -> + let (targs, tres) = type_of_operation op in + set_vars_type args targs; + set_var_type res tres + | Xload(chunk, addr, args, dst) -> + set_vars_type args (type_of_addressing addr); + set_var_type dst (type_of_chunk chunk) + | Xstore(chunk, addr, args, src) -> + set_vars_type args (type_of_addressing addr); + set_var_type src (type_of_chunk chunk) + | Xcall(sg, Coq_inl v, args, res) -> + set_var_type v Tint + | Xcall(sg, Coq_inr id, args, res) -> + () + | Xtailcall(sg, Coq_inl v, args) -> + set_var_type v Tint + | Xtailcall(sg, Coq_inr id, args) -> + () + | Xbuiltin(ef, args, res) -> + let sg = ef_sig ef in + set_vars_type args sg.sig_args; + set_vars_type res (Events.proj_sig_res' sg) + | Xbranch s -> + () + | Xcond(cond, args, s1, s2) -> + set_vars_type args (type_of_condition cond) + | Xjumptable(arg, tbl) -> + set_var_type arg Tint + | Xreturn args -> + () + +let type_block blk = + List.iter type_instr blk + +let type_function f = + PTree.fold + (fun () pc blk -> + try + type_block blk + with Type_error -> + raise (Type_error_at pc)) + f.fn_code () + +(*** A generic framework for transforming extended basic blocks *) + +(* Determine instructions that start an extended basic block. + These are instructions that have >= 2 predecessors. *) + +let basic_blocks_map f = (* return mapping pc -> number of predecessors *) + let add_successor map s = + PMap.set s (1 + PMap.get s map) map in + let add_successors_block map pc blk = + List.fold_left add_successor map (successors_block blk) in + PTree.fold add_successors_block f.fn_code + (PMap.set f.fn_entrypoint 2 (PMap.init 0)) + +let transform_basic_blocks + (transf: node -> block -> 'state -> block * 'state) + (top: 'state) + f = + let bbmap = basic_blocks_map f in + let rec transform_block st newcode pc bb = + assert (PTree.get pc newcode = None); + let (bb', st') = transf pc bb st in + (* Record new code after transformation *) + let newcode' = PTree.set pc bb' newcode in + (* Propagate outgoing state to all successors *) + List.fold_left (transform_successor st') newcode' (successors_block bb) + and transform_successor st newcode pc = + if PMap.get pc bbmap <> 1 then newcode else begin + match PTree.get pc f.fn_code with + | None -> newcode + | Some bb -> transform_block st newcode pc bb + end in + (* Iterate over all extended basic block heads *) + let newcode = + PTree.fold + (fun newcode pc bb -> + if PMap.get pc bbmap >= 2 + then transform_block top newcode pc bb + else newcode) + f.fn_code PTree.empty + in {f with fn_code = newcode} diff --git a/backend/XTL.mli b/backend/XTL.mli new file mode 100644 index 0000000..f2c2715 --- /dev/null +++ b/backend/XTL.mli @@ -0,0 +1,100 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** The XTL intermediate language for register allocation *) + +open Datatypes +open Camlcoq +open Maps +open AST +open Registers +open Machregs +open Locations +open Op + +type var = V of reg * typ | L of loc + +type node = P.t + +type instruction = + | Xmove of var * var + | Xreload of var * var + | Xspill of var * var + | Xparmove of var list * var list * var * var + | Xop of operation * var list * var + | Xload of memory_chunk * addressing * var list * var + | Xstore of memory_chunk * addressing * var list * var + | Xcall of signature * (var, ident) sum * var list * var list + | Xtailcall of signature * (var, ident) sum * var list + | Xbuiltin of external_function * var list * var list + | Xbranch of node + | Xcond of condition * var list * node * node + | Xjumptable of var * node list + | Xreturn of var list + +type block = instruction list + (* terminated by one of Xbranch, Xcond, Xjumptable, Xtailcall or Xreturn *) + +type code = block PTree.t + (* mapping node -> block *) + +type xfunction = { + fn_sig: signature; + fn_stacksize: Z.t; + fn_code: code; + fn_entrypoint: node +} + +(* Type of a variable *) + +val typeof: var -> typ + +(* Constructors for type [var] *) + +val vloc: loc -> var +val vlocs: loc list -> var list +val vmreg: mreg -> var +val vmregs: mreg list -> var list + +(* Sets of variables *) + +module VSet: Set.S with type elt = var + +(* Generation of fresh registers and fresh register variables *) + +val reset_temps: unit -> unit +val new_reg: unit -> reg +val new_temp: typ -> var +val twin_reg: reg -> reg + +(* Type checking *) + +val type_function: xfunction -> unit +exception Type_error_at of node + +(* Successors for dataflow analysis *) + +val successors_block: block -> node list +val successors: xfunction -> node list PTree.t + +(* A generic framework for transforming extended basic blocks *) + +val transform_basic_blocks: + (node -> block -> 'state -> block * 'state) -> + 'state -> + xfunction -> xfunction +(* First arg is the transfer function + (node, block, state before) -> (transformed block, state after). + Second arg is "top" state, to be used as initial state for + extended basic block heads. *) + + diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml index 3bf459f..0ffbd66 100644 --- a/cfrontend/C2C.ml +++ b/cfrontend/C2C.ml @@ -160,10 +160,12 @@ let register_stub_function name tres targs = let rec letters_of_type = function | Tnil -> [] | Tcons(Tfloat _, tl) -> "f" :: letters_of_type tl + | Tcons(Tlong _, tl) -> "l" :: letters_of_type tl | Tcons(_, tl) -> "i" :: letters_of_type tl in let rec types_of_types = function | Tnil -> Tnil | Tcons(Tfloat _, tl) -> Tcons(Tfloat(F64, noattr), types_of_types tl) + | Tcons(Tlong _, tl) -> Tcons(Tlong(Signed, noattr), types_of_types tl) | Tcons(_, tl) -> Tcons(Tpointer(Tvoid, noattr), types_of_types tl) in let stub_name = name ^ "$" ^ String.concat "" (letters_of_type targs) in @@ -211,6 +213,7 @@ let mergeTypAttr ty a2 = | Tvoid -> ty | Tint(sz, sg, a1) -> Tint(sz, sg, mergeAttr a1 a2) | Tfloat(sz, a1) -> Tfloat(sz, mergeAttr a1 a2) + | Tlong(sg, a1) -> Tlong(sg, mergeAttr a1 a2) | Tpointer(ty', a1) -> Tpointer(ty', mergeAttr a1 a2) | Tarray(ty', sz, a1) -> Tarray(ty', sz, mergeAttr a1 a2) | Tfunction(targs, tres) -> ty @@ -233,8 +236,7 @@ let convertIkind = function | C.ILong -> (Signed, I32) | C.IULong -> (Unsigned, I32) (* Special-cased in convertTyp below *) - | C.ILongLong -> unsupported "'long long' type"; (Signed, I32) - | C.IULongLong -> unsupported "'unsigned long long' type"; (Unsigned, I32) + | C.ILongLong | C.IULongLong -> assert false let convertFkind = function | C.FFloat -> F32 @@ -243,14 +245,6 @@ let convertFkind = function if not !Clflags.option_flongdouble then unsupported "'long double' type"; F64 -let int64_struct a = - let ty = Tint(I32,Unsigned,noattr) in - Tstruct(intern_string "struct __int64", - (if Memdataaux.big_endian - then Fcons(intern_string "hi", ty, Fcons(intern_string "lo", ty, Fnil)) - else Fcons(intern_string "lo", ty, Fcons(intern_string "hi", ty, Fnil))), - a) - (** A cache for structs and unions already converted *) let compositeCache : (C.ident, coq_type) Hashtbl.t = Hashtbl.create 77 @@ -260,8 +254,10 @@ let convertTyp env t = let rec convertTyp seen t = match Cutil.unroll env t with | C.TVoid a -> Tvoid - | C.TInt((C.ILongLong|C.IULongLong), a) when !Clflags.option_flonglong -> - int64_struct (convertAttr a) + | C.TInt(C.ILongLong, a) -> + Tlong(Signed, convertAttr a) + | C.TInt(C.IULongLong, a) -> + Tlong(Unsigned, convertAttr a) | C.TInt(ik, a) -> let (sg, sz) = convertIkind ik in Tint(sz, sg, convertAttr a) | C.TFloat(fk, a) -> @@ -360,14 +356,8 @@ let string_of_type ty = Format.pp_print_flush fb (); Buffer.contents b -let first_class_value env ty = - match Cutil.unroll env ty with - | C.TInt((C.ILongLong|C.IULongLong), _) -> false - | _ -> true - let supported_return_type env ty = match Cutil.unroll env ty with - | C.TInt((C.ILongLong|C.IULongLong), _) -> false | C.TStruct _ | C.TUnion _ -> false | _ -> true @@ -419,10 +409,6 @@ let convertFloat f kind = let ezero = Eval(Vint(coqint_of_camlint 0l), type_int32s) -let check_assignop msg env e = - if not (first_class_value env e.etyp) then - unsupported (msg ^ " on a l-value of type " ^ string_of_type e.etyp) - let rec convertExpr env e = let ty = convertTyp env e.etyp in match e.edesc with @@ -430,13 +416,11 @@ let rec convertExpr env e = | C.EUnop((C.Oderef|C.Odot _|C.Oarrow _), _) | C.EBinop(C.Oindex, _, _, _) -> let l = convertLvalue env e in - if not (first_class_value env e.etyp) then - unsupported ("r-value of type " ^ string_of_type e.etyp); Evalof(l, ty) + | C.EConst(C.CInt(i, (ILongLong|IULongLong), _)) -> + Eval(Vlong(coqint_of_camlint64 i), ty) | C.EConst(C.CInt(i, k, _)) -> - if k = C.ILongLong || k = C.IULongLong then - unsupported "'long long' integer literal"; Eval(Vint(convertInt i), ty) | C.EConst(C.CFloat(f, k)) -> if k = C.FLongDouble && not !Clflags.option_flongdouble then @@ -465,21 +449,17 @@ let rec convertExpr env e = | C.EUnop(C.Oaddrof, e1) -> Eaddrof(convertLvalue env e1, ty) | C.EUnop(C.Opreincr, e1) -> - check_assignop "pre-increment" env e1; coq_Epreincr Incr (convertLvalue env e1) ty | C.EUnop(C.Opredecr, e1) -> - check_assignop "pre-decrement" env e1; coq_Epreincr Decr (convertLvalue env e1) ty | C.EUnop(C.Opostincr, e1) -> - check_assignop "post-increment" env e1; Epostincr(Incr, convertLvalue env e1, ty) | C.EUnop(C.Opostdecr, e1) -> - check_assignop "post-decrement" env e1; Epostincr(Decr, convertLvalue env e1, ty) | C.EBinop((C.Oadd|C.Osub|C.Omul|C.Odiv|C.Omod|C.Oand|C.Oor|C.Oxor| C.Oshl|C.Oshr|C.Oeq|C.One|C.Olt|C.Ogt|C.Ole|C.Oge) as op, - e1, e2, _) -> + e1, e2, tyres) -> let op' = match op with | C.Oadd -> Oadd @@ -503,7 +483,6 @@ let rec convertExpr env e = | C.EBinop(C.Oassign, e1, e2, _) -> let e1' = convertLvalue env e1 in let e2' = convertExpr env e2 in - check_assignop "assignment" env e1; Eassign(e1', e2', ty) | C.EBinop((C.Oadd_assign|C.Osub_assign|C.Omul_assign|C.Odiv_assign| C.Omod_assign|C.Oand_assign|C.Oor_assign|C.Oxor_assign| @@ -525,7 +504,6 @@ let rec convertExpr env e = | _ -> assert false in let e1' = convertLvalue env e1 in let e2' = convertExpr env e2 in - check_assignop "assignment-operation" env e1; Eassignop(op', e1', e2', tyres, ty) | C.EBinop(C.Ocomma, e1, e2, _) -> Ecomma(convertExpr env e1, convertExpr env e2, ty) @@ -537,8 +515,6 @@ let rec convertExpr env e = | C.EConditional(e1, e2, e3) -> Econdition(convertExpr env e1, convertExpr env e2, convertExpr env e3, ty) | C.ECast(ty1, e1) -> - if not (first_class_value env ty1) then - unsupported ("cast to type " ^ string_of_type ty1); Ecast(convertExpr env e1, convertTyp env ty1) | C.ECall({edesc = C.EVar {name = "__builtin_annot"}}, args) -> diff --git a/cfrontend/CPragmas.ml b/cfrontend/CPragmas.ml index 7bf80bb..5ae147e 100644 --- a/cfrontend/CPragmas.ml +++ b/cfrontend/CPragmas.ml @@ -49,8 +49,7 @@ let process_reserve_register_pragma name = C2C.error "unknown register in `reserve_register' pragma" | Some r -> if Machregsaux.can_reserve_register r then - Coloringaux.reserved_registers := - r :: !Coloringaux.reserved_registers + IRC.reserved_registers := r :: !IRC.reserved_registers else C2C.error "cannot reserve this register (not a callee-save)" diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v index ebc27ad..a19a0e2 100644 --- a/cfrontend/Cexec.v +++ b/cfrontend/Cexec.v @@ -104,6 +104,7 @@ Definition eventval_of_val (v: val) (t: typ) : option eventval := match v, t with | Vint i, AST.Tint => Some (EVint i) | Vfloat f, AST.Tfloat => Some (EVfloat f) + | Vlong n, AST.Tlong => Some (EVlong n) | Vptr b ofs, AST.Tint => do id <- Genv.invert_symbol ge b; Some (EVptr_global id ofs) | _, _ => None end. @@ -122,6 +123,7 @@ Definition val_of_eventval (ev: eventval) (t: typ) : option val := match ev, t with | EVint i, AST.Tint => Some (Vint i) | EVfloat f, AST.Tfloat => Some (Vfloat f) + | EVlong n, AST.Tlong => Some (Vlong n) | EVptr_global id ofs, AST.Tint => do b <- Genv.find_symbol ge id; Some (Vptr b ofs) | _, _ => None end. @@ -132,6 +134,7 @@ Proof. intros. destruct v; destruct t; simpl in H; inv H. constructor. constructor. + constructor. destruct (Genv.invert_symbol ge b) as [id|] eqn:?; inv H1. constructor. apply Genv.invert_find_symbol; auto. Qed. @@ -166,6 +169,7 @@ Proof. intros. destruct ev; destruct t; simpl in H; inv H. constructor. constructor. + constructor. destruct (Genv.find_symbol ge i) as [b|] eqn:?; inv H1. constructor. auto. Qed. diff --git a/cfrontend/Clight.v b/cfrontend/Clight.v index e9ec7cc..3bd06df 100644 --- a/cfrontend/Clight.v +++ b/cfrontend/Clight.v @@ -48,6 +48,7 @@ Require Import Cop. Inductive expr : Type := | Econst_int: int -> type -> expr (**r integer literal *) | Econst_float: float -> type -> expr (**r float literal *) + | Econst_long: int64 -> type -> expr (**r long integer literal *) | Evar: ident -> type -> expr (**r variable *) | Etempvar: ident -> type -> expr (**r temporary variable *) | Ederef: expr -> type -> expr (**r pointer dereference (unary [*]) *) @@ -68,6 +69,7 @@ Definition typeof (e: expr) : type := match e with | Econst_int _ ty => ty | Econst_float _ ty => ty + | Econst_long _ ty => ty | Evar _ ty => ty | Etempvar _ ty => ty | Ederef _ ty => ty @@ -347,6 +349,8 @@ Inductive eval_expr: expr -> val -> Prop := eval_expr (Econst_int i ty) (Vint i) | eval_Econst_float: forall f ty, eval_expr (Econst_float f ty) (Vfloat f) + | eval_Econst_long: forall i ty, + eval_expr (Econst_long i ty) (Vlong i) | eval_Etempvar: forall id ty v, le!id = Some v -> eval_expr (Etempvar id ty) v diff --git a/cfrontend/Cminorgen.v b/cfrontend/Cminorgen.v index af85971..419ffff 100644 --- a/cfrontend/Cminorgen.v +++ b/cfrontend/Cminorgen.v @@ -171,6 +171,7 @@ Definition of_chunk (chunk: memory_chunk) := | Mint16signed => Int16s | Mint16unsigned => Int16u | Mint32 => Any + | Mint64 => Any | Mfloat32 => Float32 | Mfloat64 => Any | Mfloat64al32 => Any @@ -239,6 +240,8 @@ Definition transl_constant (cst: Csharpminor.constant): (constant * approx) := (Ointconst n, Approx.of_int n) | Csharpminor.Ofloatconst n => (Ofloatconst n, Approx.of_float n) + | Csharpminor.Olongconst n => + (Olongconst n, Any) end. (** Translation of expressions. Return both a Cminor expression and diff --git a/cfrontend/Cminorgenproof.v b/cfrontend/Cminorgenproof.v index a61808a..9d3d255 100644 --- a/cfrontend/Cminorgenproof.v +++ b/cfrontend/Cminorgenproof.v @@ -1495,6 +1495,8 @@ Ltac TrivialExists := exists (Vint x); split; [eauto with evalexpr | constructor] | [ |- exists y, _ /\ val_inject _ (Vfloat ?x) _ ] => exists (Vfloat x); split; [eauto with evalexpr | constructor] + | [ |- exists y, _ /\ val_inject _ (Vlong ?x) _ ] => + exists (Vlong x); split; [eauto with evalexpr | constructor] | _ => idtac end. @@ -1522,6 +1524,15 @@ Proof. inv H0; simpl in H; inv H. simpl. destruct (Float.intuoffloat f0); simpl in *; inv H1. TrivialExists. inv H0; simpl in H; inv H. simpl. TrivialExists. inv H0; simpl in H; inv H. simpl. TrivialExists. + inv H; inv H0; simpl; TrivialExists. + inv H; inv H0; simpl; TrivialExists. + inv H; inv H0; simpl; TrivialExists. + inv H; inv H0; simpl; TrivialExists. + inv H; inv H0; simpl; TrivialExists. + inv H0; simpl in H; inv H. simpl. destruct (Float.longoffloat f0); simpl in *; inv H1. TrivialExists. + inv H0; simpl in H; inv H. simpl. destruct (Float.longuoffloat f0); simpl in *; inv H1. TrivialExists. + inv H0; simpl in H; inv H. simpl. TrivialExists. + inv H0; simpl in H; inv H. simpl. TrivialExists. Qed. (** Compatibility of [eval_binop] with respect to [val_inject]. *) @@ -1566,46 +1577,43 @@ Proof. inv H; inv H0; inv H1; TrivialExists. inv H; inv H0; inv H1; TrivialExists. inv H; inv H0; inv H1; TrivialExists. + inv H; inv H0; inv H1; TrivialExists. + inv H; inv H0; inv H1; TrivialExists. + inv H; inv H0; inv H1; TrivialExists. + inv H0; try discriminate; inv H1; try discriminate. simpl in *. + destruct (Int64.eq i0 Int64.zero + || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H; TrivialExists. + inv H0; try discriminate; inv H1; try discriminate. simpl in *. + destruct (Int64.eq i0 Int64.zero); inv H. TrivialExists. + inv H0; try discriminate; inv H1; try discriminate. simpl in *. + destruct (Int64.eq i0 Int64.zero + || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H; TrivialExists. + inv H0; try discriminate; inv H1; try discriminate. simpl in *. + destruct (Int64.eq i0 Int64.zero); inv H. TrivialExists. + inv H; inv H0; inv H1; TrivialExists. + inv H; inv H0; inv H1; TrivialExists. + inv H; inv H0; inv H1; TrivialExists. + inv H; inv H0; inv H1; TrivialExists. simpl. destruct (Int.ltu i0 Int64.iwordsize'); auto. + inv H; inv H0; inv H1; TrivialExists. simpl. destruct (Int.ltu i0 Int64.iwordsize'); auto. + inv H; inv H0; inv H1; TrivialExists. simpl. destruct (Int.ltu i0 Int64.iwordsize'); auto. inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool. (* cmpu *) - inv H; inv H0; inv H1; TrivialExists. - apply val_inject_val_of_optbool. - apply val_inject_val_of_optbool. - apply val_inject_val_of_optbool. -Opaque Int.add. - unfold Val.cmpu. simpl. - destruct (zeq b1 b0); subst. - (* same blocks *) - rewrite H0 in H. inv H. rewrite zeq_true. - fold (Mem.weak_valid_pointer m b0 (Int.unsigned ofs1)). - fold (Mem.weak_valid_pointer m b0 (Int.unsigned ofs0)). - fold (Mem.weak_valid_pointer tm b2 (Int.unsigned (Int.add ofs1 (Int.repr delta)))). - fold (Mem.weak_valid_pointer tm b2 (Int.unsigned (Int.add ofs0 (Int.repr delta)))). - destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs1)) eqn:?; auto. - destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; auto. - rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb) by eauto. - rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb0) by eauto. - rewrite Int.translate_cmpu - by eauto using Mem.weak_valid_pointer_inject_no_overflow. - apply val_inject_val_of_optbool. - (* different source blocks *) - destruct (Mem.valid_pointer m b1 (Int.unsigned ofs1)) eqn:?; auto. - destruct (Mem.valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; auto. - destruct (zeq b2 b3). - fold (Mem.weak_valid_pointer tm b2 (Int.unsigned (Int.add ofs1 (Int.repr delta)))). - fold (Mem.weak_valid_pointer tm b3 (Int.unsigned (Int.add ofs0 (Int.repr delta0)))). - rewrite Mem.valid_pointer_implies - by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb); eauto). - rewrite Mem.valid_pointer_implies - by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb0); eauto). - exploit Mem.different_pointers_inject; eauto. intros [A|A]; [congruence |]. - destruct c; simpl; auto. - rewrite Int.eq_false. constructor. congruence. - rewrite Int.eq_false. constructor. congruence. - rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb) by eauto. - rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ H2 Heqb0) by eauto. - apply val_inject_val_of_optbool. - (* cmpf *) + inv H. econstructor; split; eauto. + unfold Val.cmpu. + destruct (Val.cmpu_bool (Mem.valid_pointer m) c v1 v2) as [b|] eqn:E. + replace (Val.cmpu_bool (Mem.valid_pointer tm) c tv1 tv2) with (Some b). + destruct b; simpl; constructor. + symmetry. eapply val_cmpu_bool_inject; eauto. + intros; eapply Mem.valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto. + intros; eapply Mem.different_pointers_inject; eauto. + simpl; auto. +(* cmpf *) + inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool. +(* cmpl *) + inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool. +(* cmplu *) inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool. Qed. @@ -1843,6 +1851,8 @@ Proof. repeat rewrite Int.zero_ext_and; auto. omega. omega. (* int32 *) exists va; auto. + (* int64 *) + exists va; auto. (* float32 *) exploit eval_uncast_float32; eauto. intros [v' [A B]]. exists v'; split; auto. @@ -2036,6 +2046,7 @@ Proof. destruct cst; simpl; intros; inv H. exists (Vint i); intuition. apply approx_of_int_sound. exists (Vfloat f0); intuition. apply approx_of_float_sound. + exists (Vlong i); intuition. Qed. Lemma transl_expr_correct: diff --git a/cfrontend/Cop.v b/cfrontend/Cop.v index af7aaa8..c6e07b7 100644 --- a/cfrontend/Cop.v +++ b/cfrontend/Cop.v @@ -76,23 +76,35 @@ Inductive classify_cast_cases : Type := | cast_case_f2f (sz2:floatsize) (**r float -> float *) | cast_case_i2f (si1:signedness) (sz2:floatsize) (**r int -> float *) | cast_case_f2i (sz2:intsize) (si2:signedness) (**r float -> int *) + | cast_case_l2l (**r long -> long *) + | cast_case_i2l (si1: signedness) (**r int -> long *) + | cast_case_l2i (sz2: intsize) (si2: signedness) (**r long -> int *) + | cast_case_l2f (si1: signedness) (sz2: floatsize) (**r long -> float *) + | cast_case_f2l (si2: signedness) (**r float -> long *) | cast_case_f2bool (**r float -> bool *) + | cast_case_l2bool (**r long -> bool *) | cast_case_p2bool (**r pointer -> bool *) | cast_case_struct (id1: ident) (fld1: fieldlist) (id2: ident) (fld2: fieldlist) (**r struct -> struct *) | cast_case_union (id1: ident) (fld1: fieldlist) (id2: ident) (fld2: fieldlist) (**r union -> union *) | cast_case_void (**r any -> void *) | cast_case_default. -Function classify_cast (tfrom tto: type) : classify_cast_cases := +Definition classify_cast (tfrom tto: type) : classify_cast_cases := match tto, tfrom with - | Tint I32 si2 _, (Tint _ _ _ | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral + | Tint I32 si2 _, (Tint _ _ _ | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral | Tint IBool _ _, Tfloat _ _ => cast_case_f2bool - | Tint IBool _ _, (Tpointer _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_p2bool + | Tint IBool _ _, (Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_p2bool | Tint sz2 si2 _, Tint sz1 si1 _ => cast_case_i2i sz2 si2 | Tint sz2 si2 _, Tfloat sz1 _ => cast_case_f2i sz2 si2 | Tfloat sz2 _, Tfloat sz1 _ => cast_case_f2f sz2 | Tfloat sz2 _, Tint sz1 si1 _ => cast_case_i2f si1 sz2 - | Tpointer _ _, (Tint _ _ _ | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral + | (Tpointer _ _ | Tcomp_ptr _ _), (Tint _ _ _ | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _) => cast_case_neutral + | Tlong _ _, Tlong _ _ => cast_case_l2l + | Tlong _ _, Tint sz1 si1 _ => cast_case_i2l si1 + | Tint IBool _ _, Tlong _ _ => cast_case_l2bool + | Tint sz2 si2 _, Tlong _ _ => cast_case_l2i sz2 si2 + | Tlong si2 _, Tfloat sz1 _ => cast_case_f2l si2 + | Tfloat sz2 _, Tlong si1 _ => cast_case_l2f si1 sz2 | Tstruct id2 fld2 _, Tstruct id1 fld1 _ => cast_case_struct id1 fld1 id2 fld2 | Tunion id2 fld2 _, Tunion id1 fld1 _ => cast_case_union id1 fld1 id2 fld2 | Tvoid, _ => cast_case_void @@ -131,7 +143,25 @@ Definition cast_float_float (sz: floatsize) (f: float) : float := | F64 => f end. -Function sem_cast (v: val) (t1 t2: type) : option val := +Definition cast_int_long (si: signedness) (i: int) : int64 := + match si with + | Signed => Int64.repr (Int.signed i) + | Unsigned => Int64.repr (Int.unsigned i) + end. + +Definition cast_long_float (si : signedness) (i: int64) : float := + match si with + | Signed => Float.floatoflong i + | Unsigned => Float.floatoflongu i + end. + +Definition cast_float_long (si : signedness) (f: float) : option int64 := + match si with + | Signed => Float.longoffloat f + | Unsigned => Float.longuoffloat f + end. + +Definition sem_cast (v: val) (t1 t2: type) : option val := match classify_cast t1 t2 with | cast_case_neutral => match v with @@ -174,10 +204,53 @@ Function sem_cast (v: val) (t1 t2: type) : option val := | Vptr _ _ => Some (Vint Int.one) | _ => None end + | cast_case_l2l => + match v with + | Vlong n => Some (Vlong n) + | _ => None + end + | cast_case_i2l si => + match v with + | Vint n => Some(Vlong (cast_int_long si n)) + | _ => None + end + | cast_case_l2i sz si => + match v with + | Vlong n => Some(Vint (cast_int_int sz si (Int.repr (Int64.unsigned n)))) + | _ => None + end + | cast_case_l2bool => + match v with + | Vlong n => + Some(Vint(if Int64.eq n Int64.zero then Int.zero else Int.one)) + | _ => None + end + | cast_case_l2f si1 sz2 => + match v with + | Vlong i => Some (Vfloat (cast_float_float sz2 (cast_long_float si1 i))) + | _ => None + end + | cast_case_f2l si2 => + match v with + | Vfloat f => + match cast_float_long si2 f with + | Some i => Some (Vlong i) + | None => None + end + | _ => None + end | cast_case_struct id1 fld1 id2 fld2 => - if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None + match v with + | Vptr b ofs => + if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None + | _ => None + end | cast_case_union id1 fld1 id2 fld2 => - if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None + match v with + | Vptr b ofs => + if ident_eq id1 id2 && fieldlist_eq fld1 fld2 then Some v else None + | _ => None + end | cast_case_void => Some v | cast_case_default => @@ -192,13 +265,15 @@ Inductive classify_bool_cases : Type := | bool_case_i (**r integer *) | bool_case_f (**r float *) | bool_case_p (**r pointer *) + | bool_case_l (**r long *) | bool_default. Definition classify_bool (ty: type) : classify_bool_cases := match typeconv ty with | Tint _ _ _ => bool_case_i - | Tpointer _ _ => bool_case_p + | Tpointer _ _ | Tcomp_ptr _ _ => bool_case_p | Tfloat _ _ => bool_case_f + | Tlong _ _ => bool_case_l | _ => bool_default end. @@ -207,7 +282,7 @@ Definition classify_bool (ty: type) : classify_bool_cases := considered as true. The integer zero (which also represents the null pointer) and the float 0.0 are false. *) -Function bool_val (v: val) (t: type) : option bool := +Definition bool_val (v: val) (t: type) : option bool := match classify_bool t with | bool_case_i => match v with @@ -225,6 +300,11 @@ Function bool_val (v: val) (t: type) : option bool := | Vptr b ofs => Some true | _ => None end + | bool_case_l => + match v with + | Vlong n => Some (negb (Int64.eq n Int64.zero)) + | _ => None + end | bool_default => None end. @@ -239,25 +319,29 @@ Proof. assert (A: classify_bool t = match t with | Tint _ _ _ => bool_case_i - | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _ => bool_case_p + | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ => bool_case_p | Tfloat _ _ => bool_case_f + | Tlong _ _ => bool_case_l | _ => bool_default end). - unfold classify_bool; destruct t; simpl; auto. destruct i; auto. destruct s; auto. - + { + unfold classify_bool; destruct t; simpl; auto. destruct i; auto. + } unfold bool_val. rewrite A. unfold sem_cast. destruct t; simpl; auto; destruct v; auto. - destruct (Int.eq i0 Int.zero); auto. + destruct (Int.eq i0 Int.zero); auto. + destruct (Int64.eq i Int64.zero); auto. destruct (Float.cmp Ceq f0 Float.zero); auto. destruct (Int.eq i Int.zero); auto. destruct (Int.eq i Int.zero); auto. destruct (Int.eq i Int.zero); auto. + destruct (Int.eq i0 Int.zero); auto. Qed. (** ** Unary operators *) (** *** Boolean negation *) -Function sem_notbool (v: val) (ty: type) : option val := +Definition sem_notbool (v: val) (ty: type) : option val := match classify_bool ty with | bool_case_i => match v with @@ -275,6 +359,11 @@ Function sem_notbool (v: val) (ty: type) : option val := | Vptr _ _ => Some Vfalse | _ => None end + | bool_case_l => + match v with + | Vlong n => Some (Val.of_bool (Int64.eq n Int64.zero)) + | _ => None + end | bool_default => None end. @@ -294,6 +383,7 @@ Qed. Inductive classify_neg_cases : Type := | neg_case_i(s: signedness) (**r int *) | neg_case_f (**r float *) + | neg_case_l(s: signedness) (**r long *) | neg_default. Definition classify_neg (ty: type) : classify_neg_cases := @@ -301,10 +391,11 @@ Definition classify_neg (ty: type) : classify_neg_cases := | Tint I32 Unsigned _ => neg_case_i Unsigned | Tint _ _ _ => neg_case_i Signed | Tfloat _ _ => neg_case_f + | Tlong si _ => neg_case_l si | _ => neg_default end. -Function sem_neg (v: val) (ty: type) : option val := +Definition sem_neg (v: val) (ty: type) : option val := match classify_neg ty with | neg_case_i sg => match v with @@ -316,6 +407,11 @@ Function sem_neg (v: val) (ty: type) : option val := | Vfloat f => Some (Vfloat (Float.neg f)) | _ => None end + | neg_case_l sg => + match v with + | Vlong n => Some (Vlong (Int64.neg n)) + | _ => None + end | neg_default => None end. @@ -323,20 +419,27 @@ Function sem_neg (v: val) (ty: type) : option val := Inductive classify_notint_cases : Type := | notint_case_i(s: signedness) (**r int *) + | notint_case_l(s: signedness) (**r long *) | notint_default. Definition classify_notint (ty: type) : classify_notint_cases := match ty with | Tint I32 Unsigned _ => notint_case_i Unsigned | Tint _ _ _ => notint_case_i Signed + | Tlong si _ => notint_case_l si | _ => notint_default end. -Function sem_notint (v: val) (ty: type): option val := +Definition sem_notint (v: val) (ty: type): option val := match classify_notint ty with | notint_case_i sg => match v with - | Vint n => Some (Vint (Int.xor n Int.mone)) + | Vint n => Some (Vint (Int.not n)) + | _ => None + end + | notint_case_l sg => + match v with + | Vlong n => Some (Vlong (Int64.not n)) | _ => None end | notint_default => None @@ -351,59 +454,119 @@ Function sem_notint (v: val) (ty: type): option val := (e.g. division, modulus, comparisons), the unsigned operation is selected if at least one of the arguments is of type "unsigned int32", otherwise the signed operation is performed. +- Likewise if both arguments are of long integer type. - If both arguments are of float type, a float operation is performed. We choose to perform all float arithmetic in double precision, even if both arguments are single-precision floats. -- If one argument has integer type and the other has float type, +- If one argument has integer/long type and the other has float type, we convert the integer argument to float, then perform the float operation. +- If one argument has long integer type and the other integer type, + we convert the integer argument to long, then perform the long operation *) -(** *** Addition *) - -Inductive classify_add_cases : Type := - | add_case_ii(s: signedness) (**r int, int *) - | add_case_ff (**r float, float *) - | add_case_if(s: signedness) (**r int, float *) - | add_case_fi(s: signedness) (**r float, int *) - | add_case_pi(ty: type)(a: attr) (**r pointer, int *) - | add_case_ip(ty: type)(a: attr) (**r int, pointer *) - | add_default. - -Definition classify_add (ty1: type) (ty2: type) := +Inductive binarith_cases: Type := + | bin_case_ii(s: signedness) (**r int, int *) + | bin_case_ff (**r float, float *) + | bin_case_if(s: signedness) (**r int, float *) + | bin_case_fi(s: signedness) (**r float, int *) + | bin_case_ll(s: signedness) (**r long, long *) + | bin_case_il(s1 s2: signedness) (**r int, long *) + | bin_case_li(s1 s2: signedness) (**r long, int *) + | bin_case_lf(s: signedness) (**r long, float *) + | bin_case_fl(s: signedness) (**r float, long *) + | bin_default. + +Definition classify_binarith (ty1: type) (ty2: type) : binarith_cases := match typeconv ty1, typeconv ty2 with - | Tint I32 Unsigned _, Tint _ _ _ => add_case_ii Unsigned - | Tint _ _ _, Tint I32 Unsigned _ => add_case_ii Unsigned - | Tint _ _ _, Tint _ _ _ => add_case_ii Signed - | Tfloat _ _, Tfloat _ _ => add_case_ff - | Tint _ sg _, Tfloat _ _ => add_case_if sg - | Tfloat _ _, Tint _ sg _ => add_case_fi sg - | Tpointer ty a, Tint _ _ _ => add_case_pi ty a - | Tint _ _ _, Tpointer ty a => add_case_ip ty a - | _, _ => add_default + | Tint I32 Unsigned _, Tint _ _ _ => bin_case_ii Unsigned + | Tint _ _ _, Tint I32 Unsigned _ => bin_case_ii Unsigned + | Tint _ _ _, Tint _ _ _ => bin_case_ii Signed + | Tfloat _ _, Tfloat _ _ => bin_case_ff + | Tint _ sg _, Tfloat _ _ => bin_case_if sg + | Tfloat _ _, Tint _ sg _ => bin_case_fi sg + | Tlong Signed _, Tlong Signed _ => bin_case_ll Signed + | Tlong _ _, Tlong _ _ => bin_case_ll Unsigned + | Tint _ s1 _, Tlong s2 _ => bin_case_il s1 s2 + | Tlong s1 _, Tint _ s2 _ => bin_case_li s1 s2 + | Tlong sg _, Tfloat _ _ => bin_case_lf sg + | Tfloat _ _, Tlong sg _ => bin_case_fl sg + | _, _ => bin_default end. -Function sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_add t1 t2 with - | add_case_ii sg => (**r integer addition *) +Definition sem_binarith + (sem_int: signedness -> int -> int -> option val) + (sem_long: signedness -> int64 -> int64 -> option val) + (sem_float: float -> float -> option val) + (v1: val) (t1: type) (v2: val) (t2: type) : option val := + match classify_binarith t1 t2 with + | bin_case_ii sg => + match v1, v2 with + | Vint n1, Vint n2 => sem_int sg n1 n2 + | _, _ => None + end + | bin_case_ff => + match v1, v2 with + | Vfloat n1, Vfloat n2 => sem_float n1 n2 + | _, _ => None + end + | bin_case_if sg => + match v1, v2 with + | Vint n1, Vfloat n2 => sem_float (cast_int_float sg n1) n2 + | _, _ => None + end + | bin_case_fi sg => + match v1, v2 with + | Vfloat n1, Vint n2 => sem_float n1 (cast_int_float sg n2) + | _, _ => None + end + | bin_case_ll sg => + match v1, v2 with + | Vlong n1, Vlong n2 => sem_long sg n1 n2 + | _, _ => None + end + | bin_case_il sg1 sg2 => match v1, v2 with - | Vint n1, Vint n2 => Some (Vint (Int.add n1 n2)) + | Vint n1, Vlong n2 => sem_long sg2 (cast_int_long sg1 n1) n2 | _, _ => None end - | add_case_ff => (**r float addition *) + | bin_case_li sg1 sg2 => match v1, v2 with - | Vfloat n1, Vfloat n2 => Some (Vfloat (Float.add n1 n2)) + | Vlong n1, Vint n2 => sem_long sg1 n1 (cast_int_long sg2 n2) | _, _ => None end - | add_case_if sg => (**r int plus float *) + | bin_case_lf sg => match v1, v2 with - | Vint n1, Vfloat n2 => Some (Vfloat (Float.add (cast_int_float sg n1) n2)) + | Vlong n1, Vfloat n2 => sem_float (cast_long_float sg n1) n2 | _, _ => None end - | add_case_fi sg => (**r float plus int *) + | bin_case_fl sg => match v1, v2 with - | Vfloat n1, Vint n2 => Some (Vfloat (Float.add n1 (cast_int_float sg n2))) + | Vfloat n1, Vlong n2 => sem_float n1 (cast_long_float sg n2) | _, _ => None end + | bin_default => None + end. + +(** *** Addition *) + +Inductive classify_add_cases : Type := + | add_case_pi(ty: type)(a: attr) (**r pointer, int *) + | add_case_ip(ty: type)(a: attr) (**r int, pointer *) + | add_case_pl(ty: type)(a: attr) (**r pointer, long *) + | add_case_lp(ty: type)(a: attr) (**r long, pointer *) + | add_default. (**r numerical type, numerical type *) + +Definition classify_add (ty1: type) (ty2: type) := + match typeconv ty1, typeconv ty2 with + | Tpointer ty a, Tint _ _ _ => add_case_pi ty a + | Tint _ _ _, Tpointer ty a => add_case_ip ty a + | Tpointer ty a, Tlong _ _ => add_case_pl ty a + | Tlong _ _, Tpointer ty a => add_case_lp ty a + | _, _ => add_default + end. + +Definition sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val := + match classify_add t1 t2 with | add_case_pi ty _ => (**r pointer plus integer *) match v1,v2 with | Vptr b1 ofs1, Vint n2 => @@ -416,59 +579,57 @@ Function sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val := Some (Vptr b2 (Int.add ofs2 (Int.mul (Int.repr (sizeof ty)) n1))) | _, _ => None end - | add_default => None -end. + | add_case_pl ty _ => (**r pointer plus long *) + match v1,v2 with + | Vptr b1 ofs1, Vlong n2 => + let n2 := Int.repr (Int64.unsigned n2) in + Some (Vptr b1 (Int.add ofs1 (Int.mul (Int.repr (sizeof ty)) n2))) + | _, _ => None + end + | add_case_lp ty _ => (**r long plus pointer *) + match v1,v2 with + | Vlong n1, Vptr b2 ofs2 => + let n1 := Int.repr (Int64.unsigned n1) in + Some (Vptr b2 (Int.add ofs2 (Int.mul (Int.repr (sizeof ty)) n1))) + | _, _ => None + end + | add_default => + sem_binarith + (fun sg n1 n2 => Some(Vint(Int.add n1 n2))) + (fun sg n1 n2 => Some(Vlong(Int64.add n1 n2))) + (fun n1 n2 => Some(Vfloat(Float.add n1 n2))) + v1 t1 v2 t2 + end. (** *** Subtraction *) Inductive classify_sub_cases : Type := - | sub_case_ii(s: signedness) (**r int , int *) - | sub_case_ff (**r float , float *) - | sub_case_if(s: signedness) (**r int, float *) - | sub_case_fi(s: signedness) (**r float, int *) - | sub_case_pi(ty: type) (**r pointer, int *) + | sub_case_pi(ty: type)(a: attr) (**r pointer, int *) | sub_case_pp(ty: type) (**r pointer, pointer *) - | sub_default. + | sub_case_pl(ty: type)(a: attr) (**r pointer, long *) + | sub_default. (**r numerical type, numerical type *) Definition classify_sub (ty1: type) (ty2: type) := match typeconv ty1, typeconv ty2 with - | Tint I32 Unsigned _, Tint _ _ _ => sub_case_ii Unsigned - | Tint _ _ _, Tint I32 Unsigned _ => sub_case_ii Unsigned - | Tint _ _ _, Tint _ _ _ => sub_case_ii Signed - | Tfloat _ _ , Tfloat _ _ => sub_case_ff - | Tint _ sg _, Tfloat _ _ => sub_case_if sg - | Tfloat _ _, Tint _ sg _ => sub_case_fi sg - | Tpointer ty _, Tint _ _ _ => sub_case_pi ty + | Tpointer ty a, Tint _ _ _ => sub_case_pi ty a | Tpointer ty _ , Tpointer _ _ => sub_case_pp ty - | _ ,_ => sub_default + | Tpointer ty a, Tlong _ _ => sub_case_pl ty a + | _, _ => sub_default end. -Function sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val := +Definition sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val := match classify_sub t1 t2 with - | sub_case_ii sg => (**r integer subtraction *) - match v1,v2 with - | Vint n1, Vint n2 => Some (Vint (Int.sub n1 n2)) - | _, _ => None - end - | sub_case_ff => (**r float subtraction *) + | sub_case_pi ty attr => (**r pointer minus integer *) match v1,v2 with - | Vfloat f1, Vfloat f2 => Some (Vfloat(Float.sub f1 f2)) + | Vptr b1 ofs1, Vint n2 => + Some (Vptr b1 (Int.sub ofs1 (Int.mul (Int.repr (sizeof ty)) n2))) | _, _ => None end - | sub_case_if sg => (**r int minus float *) - match v1, v2 with - | Vint n1, Vfloat n2 => Some (Vfloat (Float.sub (cast_int_float sg n1) n2)) - | _, _ => None - end - | sub_case_fi sg => (**r float minus int *) - match v1, v2 with - | Vfloat n1, Vint n2 => Some (Vfloat (Float.sub n1 (cast_int_float sg n2))) - | _, _ => None - end - | sub_case_pi ty => (**r pointer minus integer *) + | sub_case_pl ty attr => (**r pointer minus long *) match v1,v2 with - | Vptr b1 ofs1, Vint n2 => - Some (Vptr b1 (Int.sub ofs1 (Int.mul (Int.repr (sizeof ty)) n2))) + | Vptr b1 ofs1, Vlong n2 => + let n2 := Int.repr (Int64.unsigned n2) in + Some (Vptr b1 (Int.sub ofs1 (Int.mul (Int.repr (sizeof ty)) n2))) | _, _ => None end | sub_case_pp ty => (**r pointer minus pointer *) @@ -480,255 +641,187 @@ Function sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val := else None | _, _ => None end - | sub_default => None + | sub_default => + sem_binarith + (fun sg n1 n2 => Some(Vint(Int.sub n1 n2))) + (fun sg n1 n2 => Some(Vlong(Int64.sub n1 n2))) + (fun n1 n2 => Some(Vfloat(Float.sub n1 n2))) + v1 t1 v2 t2 end. -(** *** Multiplication *) - -Inductive classify_mul_cases : Type:= - | mul_case_ii(s: signedness) (**r int , int *) - | mul_case_ff (**r float , float *) - | mul_case_if(s: signedness) (**r int, float *) - | mul_case_fi(s: signedness) (**r float, int *) - | mul_default. - -Definition classify_mul (ty1: type) (ty2: type) := - match typeconv ty1, typeconv ty2 with - | Tint I32 Unsigned _, Tint _ _ _ => mul_case_ii Unsigned - | Tint _ _ _, Tint I32 Unsigned _ => mul_case_ii Unsigned - | Tint _ _ _, Tint _ _ _ => mul_case_ii Signed - | Tfloat _ _ , Tfloat _ _ => mul_case_ff - | Tint _ sg _, Tfloat _ _ => mul_case_if sg - | Tfloat _ _, Tint _ sg _ => mul_case_fi sg - | _,_ => mul_default -end. - -Function sem_mul (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_mul t1 t2 with - | mul_case_ii sg => - match v1,v2 with - | Vint n1, Vint n2 => Some (Vint (Int.mul n1 n2)) - | _, _ => None - end - | mul_case_ff => - match v1,v2 with - | Vfloat f1, Vfloat f2 => Some (Vfloat (Float.mul f1 f2)) - | _, _ => None - end - | mul_case_if sg => - match v1, v2 with - | Vint n1, Vfloat n2 => Some (Vfloat (Float.mul (cast_int_float sg n1) n2)) - | _, _ => None - end - | mul_case_fi sg => - match v1, v2 with - | Vfloat n1, Vint n2 => Some (Vfloat (Float.mul n1 (cast_int_float sg n2))) - | _, _ => None - end - | mul_default => - None -end. - -(** *** Division *) - -Inductive classify_div_cases : Type:= - | div_case_ii(s: signedness) (**r int , int *) - | div_case_ff (**r float , float *) - | div_case_if(s: signedness) (**r int, float *) - | div_case_fi(s: signedness) (**r float, int *) - | div_default. - -Definition classify_div (ty1: type) (ty2: type) := - match typeconv ty1, typeconv ty2 with - | Tint I32 Unsigned _, Tint _ _ _ => div_case_ii Unsigned - | Tint _ _ _, Tint I32 Unsigned _ => div_case_ii Unsigned - | Tint _ _ _, Tint _ _ _ => div_case_ii Signed - | Tfloat _ _ , Tfloat _ _ => div_case_ff - | Tint _ sg _, Tfloat _ _ => div_case_if sg - | Tfloat _ _, Tint _ sg _ => div_case_fi sg - | _,_ => div_default -end. - -Function sem_div (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_div t1 t2 with - | div_case_ii Unsigned => - match v1,v2 with - | Vint n1, Vint n2 => - if Int.eq n2 Int.zero then None else Some (Vint (Int.divu n1 n2)) - | _,_ => None - end - | div_case_ii Signed => - match v1,v2 with - | Vint n1, Vint n2 => +(** *** Multiplication, division, modulus *) + +Definition sem_mul (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_binarith + (fun sg n1 n2 => Some(Vint(Int.mul n1 n2))) + (fun sg n1 n2 => Some(Vlong(Int64.mul n1 n2))) + (fun n1 n2 => Some(Vfloat(Float.mul n1 n2))) + v1 t1 v2 t2. + +Definition sem_div (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_binarith + (fun sg n1 n2 => + match sg with + | Signed => if Int.eq n2 Int.zero || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone - then None else Some (Vint(Int.divs n1 n2)) - | _,_ => None - end - | div_case_ff => - match v1,v2 with - | Vfloat f1, Vfloat f2 => Some (Vfloat(Float.div f1 f2)) - | _, _ => None - end - | div_case_if sg => - match v1, v2 with - | Vint n1, Vfloat n2 => Some (Vfloat (Float.div (cast_int_float sg n1) n2)) - | _, _ => None - end - | div_case_fi sg => - match v1, v2 with - | Vfloat n1, Vint n2 => Some (Vfloat (Float.div n1 (cast_int_float sg n2))) - | _, _ => None - end - | div_default => - None -end. - -(** *** Integer-only binary operations: modulus, bitwise "and", "or", and "xor" *) - -Inductive classify_binint_cases : Type:= - | binint_case_ii(s: signedness) (**r int , int *) - | binint_default. - -Definition classify_binint (ty1: type) (ty2: type) := - match typeconv ty1, typeconv ty2 with - | Tint I32 Unsigned _, Tint _ _ _ => binint_case_ii Unsigned - | Tint _ _ _, Tint I32 Unsigned _ => binint_case_ii Unsigned - | Tint _ _ _, Tint _ _ _ => binint_case_ii Signed - | _,_ => binint_default -end. - -Function sem_mod (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_binint t1 t2 with - | binint_case_ii Unsigned => - match v1, v2 with - | Vint n1, Vint n2 => - if Int.eq n2 Int.zero then None else Some (Vint (Int.modu n1 n2)) - | _, _ => None - end - | binint_case_ii Signed => - match v1,v2 with - | Vint n1, Vint n2 => + then None else Some(Vint(Int.divs n1 n2)) + | Unsigned => + if Int.eq n2 Int.zero + then None else Some(Vint(Int.divu n1 n2)) + end) + (fun sg n1 n2 => + match sg with + | Signed => + if Int64.eq n2 Int64.zero + || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone + then None else Some(Vlong(Int64.divs n1 n2)) + | Unsigned => + if Int64.eq n2 Int64.zero + then None else Some(Vlong(Int64.divu n1 n2)) + end) + (fun n1 n2 => Some(Vfloat(Float.div n1 n2))) + v1 t1 v2 t2. + +Definition sem_mod (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_binarith + (fun sg n1 n2 => + match sg with + | Signed => if Int.eq n2 Int.zero || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone - then None else Some (Vint (Int.mods n1 n2)) - | _, _ => None - end - | binint_default => - None - end. - -Function sem_and (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_binint t1 t2 with - | binint_case_ii sg => - match v1, v2 with - | Vint n1, Vint n2 => Some (Vint(Int.and n1 n2)) - | _, _ => None - end - | binint_default => None - end. - -Function sem_or (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_binint t1 t2 with - | binint_case_ii sg => - match v1, v2 with - | Vint n1, Vint n2 => Some (Vint(Int.or n1 n2)) - | _, _ => None - end - | binint_default => None - end. - -Function sem_xor (v1:val) (t1:type) (v2: val) (t2:type) : option val := - match classify_binint t1 t2 with - | binint_case_ii sg => - match v1, v2 with - | Vint n1, Vint n2 => Some (Vint(Int.xor n1 n2)) - | _, _ => None - end - | binint_default => None - end. + then None else Some(Vint(Int.mods n1 n2)) + | Unsigned => + if Int.eq n2 Int.zero + then None else Some(Vint(Int.modu n1 n2)) + end) + (fun sg n1 n2 => + match sg with + | Signed => + if Int64.eq n2 Int64.zero + || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone + then None else Some(Vlong(Int64.mods n1 n2)) + | Unsigned => + if Int64.eq n2 Int64.zero + then None else Some(Vlong(Int64.modu n1 n2)) + end) + (fun n1 n2 => None) + v1 t1 v2 t2. + +Definition sem_and (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_binarith + (fun sg n1 n2 => Some(Vint(Int.and n1 n2))) + (fun sg n1 n2 => Some(Vlong(Int64.and n1 n2))) + (fun n1 n2 => None) + v1 t1 v2 t2. + +Definition sem_or (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_binarith + (fun sg n1 n2 => Some(Vint(Int.or n1 n2))) + (fun sg n1 n2 => Some(Vlong(Int64.or n1 n2))) + (fun n1 n2 => None) + v1 t1 v2 t2. + +Definition sem_xor (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_binarith + (fun sg n1 n2 => Some(Vint(Int.xor n1 n2))) + (fun sg n1 n2 => Some(Vlong(Int64.xor n1 n2))) + (fun n1 n2 => None) + v1 t1 v2 t2. (** *** Shifts *) +(** Shifts do not perform the usual binary conversions. Instead, + each argument is converted independently, and the signedness + of the result is always that of the first argument. *) + Inductive classify_shift_cases : Type:= - | shift_case_ii(s: signedness) (**r int , int *) + | shift_case_ii(s: signedness) (**r int , int *) + | shift_case_ll(s: signedness) (**r long, long *) + | shift_case_il(s: signedness) (**r int, long *) + | shift_case_li(s: signedness) (**r long, int *) | shift_default. Definition classify_shift (ty1: type) (ty2: type) := match typeconv ty1, typeconv ty2 with | Tint I32 Unsigned _, Tint _ _ _ => shift_case_ii Unsigned | Tint _ _ _, Tint _ _ _ => shift_case_ii Signed + | Tint I32 Unsigned _, Tlong _ _ => shift_case_il Unsigned + | Tint _ _ _, Tlong _ _ => shift_case_il Signed + | Tlong s _, Tint _ _ _ => shift_case_li s + | Tlong s _, Tlong _ _ => shift_case_ll s | _,_ => shift_default -end. + end. -Function sem_shl (v1:val) (t1:type) (v2: val) (t2:type) : option val := +Definition sem_shift + (sem_int: signedness -> int -> int -> int) + (sem_long: signedness -> int64 -> int64 -> int64) + (v1: val) (t1: type) (v2: val) (t2: type) : option val := match classify_shift t1 t2 with | shift_case_ii sg => match v1, v2 with - | Vint n1, Vint n2 => - if Int.ltu n2 Int.iwordsize then Some (Vint(Int.shl n1 n2)) else None + | Vint n1, Vint n2 => + if Int.ltu n2 Int.iwordsize + then Some(Vint(sem_int sg n1 n2)) else None + | _, _ => None + end + | shift_case_il sg => + match v1, v2 with + | Vint n1, Vlong n2 => + if Int64.ltu n2 (Int64.repr 32) + then Some(Vint(sem_int sg n1 (Int64.loword n2))) else None + | _, _ => None + end + | shift_case_li sg => + match v1, v2 with + | Vlong n1, Vint n2 => + if Int.ltu n2 Int64.iwordsize' + then Some(Vlong(sem_long sg n1 (Int64.repr (Int.unsigned n2)))) else None + | _, _ => None + end + | shift_case_ll sg => + match v1, v2 with + | Vlong n1, Vlong n2 => + if Int64.ltu n2 Int64.iwordsize + then Some(Vlong(sem_long sg n1 n2)) else None | _, _ => None end | shift_default => None end. -Function sem_shr (v1: val) (t1: type) (v2: val) (t2: type): option val := - match classify_shift t1 t2 with - | shift_case_ii Unsigned => - match v1,v2 with - | Vint n1, Vint n2 => - if Int.ltu n2 Int.iwordsize then Some (Vint (Int.shru n1 n2)) else None - | _,_ => None - end - | shift_case_ii Signed => - match v1,v2 with - | Vint n1, Vint n2 => - if Int.ltu n2 Int.iwordsize then Some (Vint (Int.shr n1 n2)) else None - | _, _ => None - end - | shift_default => - None - end. +Definition sem_shl (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_shift + (fun sg n1 n2 => Int.shl n1 n2) + (fun sg n1 n2 => Int64.shl n1 n2) + v1 t1 v2 t2. + +Definition sem_shr (v1:val) (t1:type) (v2: val) (t2:type) : option val := + sem_shift + (fun sg n1 n2 => match sg with Signed => Int.shr n1 n2 | Unsigned => Int.shru n1 n2 end) + (fun sg n1 n2 => match sg with Signed => Int64.shr n1 n2 | Unsigned => Int64.shru n1 n2 end) + v1 t1 v2 t2. (** *** Comparisons *) -Inductive classify_cmp_cases : Type:= - | cmp_case_ii(s: signedness) (**r int, int *) - | cmp_case_pp (**r pointer, pointer *) - | cmp_case_ff (**r float , float *) - | cmp_case_if(s: signedness) (**r int, float *) - | cmp_case_fi(s: signedness) (**r float, int *) - | cmp_default. +Inductive classify_cmp_cases : Type := + | cmp_case_pp (**r pointer, pointer *) + | cmp_default. (**r numerical, numerical *) Definition classify_cmp (ty1: type) (ty2: type) := match typeconv ty1, typeconv ty2 with - | Tint I32 Unsigned _ , Tint _ _ _ => cmp_case_ii Unsigned - | Tint _ _ _ , Tint I32 Unsigned _ => cmp_case_ii Unsigned - | Tint _ _ _ , Tint _ _ _ => cmp_case_ii Signed - | Tfloat _ _ , Tfloat _ _ => cmp_case_ff - | Tint _ sg _, Tfloat _ _ => cmp_case_if sg - | Tfloat _ _, Tint _ sg _ => cmp_case_fi sg | Tpointer _ _ , Tpointer _ _ => cmp_case_pp | Tpointer _ _ , Tint _ _ _ => cmp_case_pp | Tint _ _ _, Tpointer _ _ => cmp_case_pp - | _ , _ => cmp_default + | _, _ => cmp_default end. -Function sem_cmp (c:comparison) +Definition sem_cmp (c:comparison) (v1: val) (t1: type) (v2: val) (t2: type) (m: mem): option val := match classify_cmp t1 t2 with - | cmp_case_ii Signed => - match v1,v2 with - | Vint n1, Vint n2 => Some (Val.of_bool (Int.cmp c n1 n2)) - | _, _ => None - end - | cmp_case_ii Unsigned => - match v1,v2 with - | Vint n1, Vint n2 => Some (Val.of_bool (Int.cmpu c n1 n2)) - | _, _ => None - end | cmp_case_pp => + option_map Val.of_bool (Val.cmpu_bool (Mem.valid_pointer m) c v1 v2) +(* match v1,v2 with | Vint n1, Vint n2 => Some (Val.of_bool (Int.cmpu c n1 n2)) | Vptr b1 ofs1, Vptr b2 ofs2 => @@ -752,22 +845,16 @@ Function sem_cmp (c:comparison) else None | _, _ => None end - | cmp_case_ff => - match v1,v2 with - | Vfloat f1, Vfloat f2 => Some (Val.of_bool (Float.cmp c f1 f2)) - | _, _ => None - end - | cmp_case_if sg => - match v1, v2 with - | Vint n1, Vfloat n2 => Some (Val.of_bool (Float.cmp c (cast_int_float sg n1) n2)) - | _, _ => None - end - | cmp_case_fi sg => - match v1, v2 with - | Vfloat n1, Vint n2 => Some (Val.of_bool (Float.cmp c n1 (cast_int_float sg n2))) - | _, _ => None - end - | cmp_default => None +*) + | cmp_default => + sem_binarith + (fun sg n1 n2 => + Some(Val.of_bool(match sg with Signed => Int.cmp c n1 n2 | Unsigned => Int.cmpu c n1 n2 end))) + (fun sg n1 n2 => + Some(Val.of_bool(match sg with Signed => Int64.cmp c n1 n2 | Unsigned => Int64.cmpu c n1 n2 end))) + (fun n1 n2 => + Some(Val.of_bool(Float.cmp c n1 n2))) + v1 t1 v2 t2 end. (** ** Function applications *) @@ -821,3 +908,249 @@ Definition sem_incrdecr (id: incr_or_decr) (v: val) (ty: type) := | Incr => sem_add v ty (Vint Int.one) type_int32s | Decr => sem_sub v ty (Vint Int.one) type_int32s end. + +(** * Compatibility with extensions and injections *) + +Section GENERIC_INJECTION. + +Variable f: meminj. +Variables m m': mem. + +Hypothesis valid_pointer_inj: + forall b1 ofs b2 delta, + f b1 = Some(b2, delta) -> + Mem.valid_pointer m b1 (Int.unsigned ofs) = true -> + Mem.valid_pointer m' b2 (Int.unsigned (Int.add ofs (Int.repr delta))) = true. + +Hypothesis weak_valid_pointer_inj: + forall b1 ofs b2 delta, + f b1 = Some(b2, delta) -> + Mem.weak_valid_pointer m b1 (Int.unsigned ofs) = true -> + Mem.weak_valid_pointer m' b2 (Int.unsigned (Int.add ofs (Int.repr delta))) = true. + +Hypothesis weak_valid_pointer_no_overflow: + forall b1 ofs b2 delta, + f b1 = Some(b2, delta) -> + Mem.weak_valid_pointer m b1 (Int.unsigned ofs) = true -> + 0 <= Int.unsigned ofs + Int.unsigned (Int.repr delta) <= Int.max_unsigned. + +Hypothesis valid_different_pointers_inj: + forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2, + b1 <> b2 -> + Mem.valid_pointer m b1 (Int.unsigned ofs1) = true -> + Mem.valid_pointer m b2 (Int.unsigned ofs2) = true -> + f b1 = Some (b1', delta1) -> + f b2 = Some (b2', delta2) -> + b1' <> b2' \/ + Int.unsigned (Int.add ofs1 (Int.repr delta1)) <> Int.unsigned (Int.add ofs2 (Int.repr delta2)). + +Remark val_inject_vtrue: forall f, val_inject f Vtrue Vtrue. +Proof. unfold Vtrue; auto. Qed. + +Remark val_inject_vfalse: forall f, val_inject f Vfalse Vfalse. +Proof. unfold Vfalse; auto. Qed. + +Remark val_inject_of_bool: forall f b, val_inject f (Val.of_bool b) (Val.of_bool b). +Proof. intros. unfold Val.of_bool. destruct b; [apply val_inject_vtrue|apply val_inject_vfalse]. +Qed. + +Hint Resolve val_inject_vtrue val_inject_vfalse val_inject_of_bool. + +Ltac TrivialInject := + match goal with + | |- exists v', Some ?v = Some v' /\ _ => exists v; split; auto + | _ => idtac + end. + +Lemma sem_unary_operation_inject: + forall op v1 ty v tv1, + sem_unary_operation op v1 ty = Some v -> + val_inject f v1 tv1 -> + exists tv, sem_unary_operation op tv1 ty = Some tv /\ val_inject f v tv. +Proof. + unfold sem_unary_operation; intros. destruct op. + (* notbool *) + unfold sem_notbool in *; destruct (classify_bool ty); inv H0; inv H; TrivialInject. + (* notint *) + unfold sem_notint in *; destruct (classify_notint ty); inv H0; inv H; TrivialInject. + (* neg *) + unfold sem_neg in *; destruct (classify_neg ty); inv H0; inv H; TrivialInject. +Qed. + +Definition optval_self_injects (ov: option val) : Prop := + match ov with + | Some (Vptr b ofs) => False + | _ => True + end. + +Remark sem_binarith_inject: + forall sem_int sem_long sem_float v1 t1 v2 t2 v v1' v2', + sem_binarith sem_int sem_long sem_float v1 t1 v2 t2 = Some v -> + val_inject f v1 v1' -> val_inject f v2 v2' -> + (forall sg n1 n2, optval_self_injects (sem_int sg n1 n2)) -> + (forall sg n1 n2, optval_self_injects (sem_long sg n1 n2)) -> + (forall n1 n2, optval_self_injects (sem_float n1 n2)) -> + exists v', sem_binarith sem_int sem_long sem_float v1' t1 v2' t2 = Some v' /\ val_inject f v v'. +Proof. + intros. + assert (SELF: forall ov v, ov = Some v -> optval_self_injects ov -> val_inject f v v). + { + intros. subst ov; simpl in H6. destruct v0; contradiction || constructor. + } + exists v. + unfold sem_binarith in *; destruct (classify_binarith t1 t2); inv H0; inv H1; discriminate || eauto. +Qed. + +Remark sem_shift_inject: + forall sem_int sem_long v1 t1 v2 t2 v v1' v2', + sem_shift sem_int sem_long v1 t1 v2 t2 = Some v -> + val_inject f v1 v1' -> val_inject f v2 v2' -> + exists v', sem_shift sem_int sem_long v1' t1 v2' t2 = Some v' /\ val_inject f v v'. +Proof. + intros. exists v. + unfold sem_shift in *; destruct (classify_shift t1 t2); inv H0; inv H1; try discriminate. + destruct (Int.ltu i0 Int.iwordsize); inv H; auto. + destruct (Int64.ltu i0 Int64.iwordsize); inv H; auto. + destruct (Int64.ltu i0 (Int64.repr 32)); inv H; auto. + destruct (Int.ltu i0 Int64.iwordsize'); inv H; auto. +Qed. + +Remark sem_cmp_inj: + forall cmp v1 tv1 ty1 v2 tv2 ty2 v, + sem_cmp cmp v1 ty1 v2 ty2 m = Some v -> + val_inject f v1 tv1 -> + val_inject f v2 tv2 -> + exists tv, sem_cmp cmp tv1 ty1 tv2 ty2 m' = Some tv /\ val_inject f v tv. +Proof. + intros. + unfold sem_cmp in *; destruct (classify_cmp ty1 ty2). +- (* pointer - pointer *) + destruct (Val.cmpu_bool (Mem.valid_pointer m) cmp v1 v2) as [b|] eqn:E; simpl in H; inv H. + replace (Val.cmpu_bool (Mem.valid_pointer m') cmp tv1 tv2) with (Some b). + simpl. TrivialInject. + symmetry. eapply val_cmpu_bool_inject; eauto. +- (* numerical - numerical *) + assert (SELF: forall b, optval_self_injects (Some (Val.of_bool b))). + { + destruct b; exact I. + } + eapply sem_binarith_inject; eauto. +Qed. + +Lemma sem_binary_operation_inj: + forall op v1 ty1 v2 ty2 v tv1 tv2, + sem_binary_operation op v1 ty1 v2 ty2 m = Some v -> + val_inject f v1 tv1 -> val_inject f v2 tv2 -> + exists tv, sem_binary_operation op tv1 ty1 tv2 ty2 m' = Some tv /\ val_inject f v tv. +Proof. + unfold sem_binary_operation; intros; destruct op. +- (* add *) + unfold sem_add in *; destruct (classify_add ty1 ty2). + + inv H0; inv H1; inv H. TrivialInject. + econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + + inv H0; inv H1; inv H. TrivialInject. + econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + + inv H0; inv H1; inv H. TrivialInject. + econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + + inv H0; inv H1; inv H. TrivialInject. + econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + + eapply sem_binarith_inject; eauto; intros; exact I. +- (* sub *) + unfold sem_sub in *; destruct (classify_sub ty1 ty2). + + inv H0; inv H1; inv H. TrivialInject. + econstructor. eauto. rewrite Int.sub_add_l. auto. + + inv H0; inv H1; inv H. TrivialInject. + destruct (zeq b1 b0); try discriminate. subst b1. + rewrite H0 in H2; inv H2. rewrite zeq_true. + destruct (Int.eq (Int.repr (sizeof ty)) Int.zero); inv H3. + rewrite Int.sub_shifted. TrivialInject. + + inv H0; inv H1; inv H. TrivialInject. + econstructor. eauto. rewrite Int.sub_add_l. auto. + + eapply sem_binarith_inject; eauto; intros; exact I. +- (* mul *) + eapply sem_binarith_inject; eauto; intros; exact I. +- (* div *) + eapply sem_binarith_inject; eauto; intros. + destruct sg. + destruct (Int.eq n2 Int.zero + || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone); exact I. + destruct (Int.eq n2 Int.zero); exact I. + destruct sg. + destruct (Int64.eq n2 Int64.zero + || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); exact I. + destruct (Int64.eq n2 Int64.zero); exact I. + exact I. +- (* mod *) + eapply sem_binarith_inject; eauto; intros. + destruct sg. + destruct (Int.eq n2 Int.zero + || Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone); exact I. + destruct (Int.eq n2 Int.zero); exact I. + destruct sg. + destruct (Int64.eq n2 Int64.zero + || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); exact I. + destruct (Int64.eq n2 Int64.zero); exact I. + exact I. +- (* and *) + eapply sem_binarith_inject; eauto; intros; exact I. +- (* or *) + eapply sem_binarith_inject; eauto; intros; exact I. +- (* xor *) + eapply sem_binarith_inject; eauto; intros; exact I. +- (* shl *) + eapply sem_shift_inject; eauto. +- (* shr *) + eapply sem_shift_inject; eauto. + (* comparisons *) +- eapply sem_cmp_inj; eauto. +- eapply sem_cmp_inj; eauto. +- eapply sem_cmp_inj; eauto. +- eapply sem_cmp_inj; eauto. +- eapply sem_cmp_inj; eauto. +- eapply sem_cmp_inj; eauto. +Qed. + +Lemma sem_cast_inject: + forall v1 ty1 ty v tv1, + sem_cast v1 ty1 ty = Some v -> + val_inject f v1 tv1 -> + exists tv, sem_cast tv1 ty1 ty = Some tv /\ val_inject f v tv. +Proof. + unfold sem_cast; intros; destruct (classify_cast ty1 ty); + inv H0; inv H; TrivialInject. +- econstructor; eauto. +- destruct (cast_float_int si2 f0); inv H1; TrivialInject. +- destruct (cast_float_long si2 f0); inv H1; TrivialInject. +- destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; TrivialInject. econstructor; eauto. +- destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; TrivialInject. econstructor; eauto. +- econstructor; eauto. +Qed. + +Lemma bool_val_inject: + forall v ty b tv, + bool_val v ty = Some b -> + val_inject f v tv -> + bool_val tv ty = Some b. +Proof. + unfold bool_val; intros. + destruct (classify_bool ty); inv H0; congruence. +Qed. + +End GENERIC_INJECTION. + +Lemma sem_binary_operation_inject: + forall f m m' op v1 ty1 v2 ty2 v tv1 tv2, + sem_binary_operation op v1 ty1 v2 ty2 m = Some v -> + val_inject f v1 tv1 -> val_inject f v2 tv2 -> + Mem.inject f m m' -> + exists tv, sem_binary_operation op tv1 ty1 tv2 ty2 m' = Some tv /\ val_inject f v tv. +Proof. + intros. eapply sem_binary_operation_inj; eauto. + intros; eapply Mem.valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto. + intros; eapply Mem.different_pointers_inject; eauto. +Qed. + + + diff --git a/cfrontend/Csharpminor.v b/cfrontend/Csharpminor.v index d0bd9f4..718daa1 100644 --- a/cfrontend/Csharpminor.v +++ b/cfrontend/Csharpminor.v @@ -33,7 +33,8 @@ Require Import Smallstep. Inductive constant : Type := | Ointconst: int -> constant (**r integer constant *) - | Ofloatconst: float -> constant. (**r floating-point constant *) + | Ofloatconst: float -> constant (**r floating-point constant *) + | Olongconst: int64 -> constant. (**r long integer constant *) Definition unary_operation : Type := Cminor.unary_operation. Definition binary_operation : Type := Cminor.binary_operation. @@ -238,6 +239,7 @@ Definition eval_constant (cst: constant) : option val := match cst with | Ointconst n => Some (Vint n) | Ofloatconst n => Some (Vfloat n) + | Olongconst n => Some (Vlong n) end. Definition eval_unop := Cminor.eval_unop. diff --git a/cfrontend/Cshmgen.v b/cfrontend/Cshmgen.v index 51f89da..2a27852 100644 --- a/cfrontend/Cshmgen.v +++ b/cfrontend/Cshmgen.v @@ -44,7 +44,7 @@ Open Local Scope error_monad_scope. - The C types of the arguments of the operation. These types are used to insert the necessary numeric conversions and to resolve operation overloading. - Most of these functions return an [option expr], with [None] + Most of these functions return a [res expr], with [Error] denoting a case where the operation is not defined at the given types. *) @@ -52,6 +52,8 @@ Definition make_intconst (n: int) := Econst (Ointconst n). Definition make_floatconst (f: float) := Econst (Ofloatconst f). +Definition make_longconst (f: int64) := Econst (Olongconst f). + Definition make_floatofint (e: expr) (sg: signedness) := match sg with | Signed => Eunop Ofloatofint e @@ -64,6 +66,24 @@ Definition make_intoffloat (e: expr) (sg: signedness) := | Unsigned => Eunop Ointuoffloat e end. +Definition make_longofint (e: expr) (sg: signedness) := + match sg with + | Signed => Eunop Olongofint e + | Unsigned => Eunop Olongofintu e + end. + +Definition make_floatoflong (e: expr) (sg: signedness) := + match sg with + | Signed => Eunop Ofloatoflong e + | Unsigned => Eunop Ofloatoflongu e + end. + +Definition make_longoffloat (e: expr) (sg: signedness) := + match sg with + | Signed => Eunop Olongoffloat e + | Unsigned => Eunop Olonguoffloat e + end. + (** [make_boolean e ty] returns a Csharpminor expression that evaluates to the boolean value of [e]. *) @@ -80,6 +100,7 @@ Definition make_boolean (e: expr) (ty: type) := | bool_case_i => make_cmp_ne_zero e | bool_case_f => Ebinop (Ocmpf Cne) e (make_floatconst Float.zero) | bool_case_p => Ebinop (Ocmpu Cne) e (make_intconst Int.zero) + | bool_case_l => Ebinop (Ocmpl Cne) e (make_longconst Int64.zero) | bool_default => e (**r should not happen *) end. @@ -88,6 +109,7 @@ Definition make_notbool (e: expr) (ty: type) := | bool_case_i => OK (Ebinop (Ocmp Ceq) e (make_intconst Int.zero)) | bool_case_f => OK (Ebinop (Ocmpf Ceq) e (make_floatconst Float.zero)) | bool_case_p => OK (Ebinop (Ocmpu Ceq) e (make_intconst Int.zero)) + | bool_case_l => OK (Ebinop (Ocmpl Ceq) e (make_longconst Int64.zero)) | _ => Error (msg "Cshmgen.make_notbool") end. @@ -95,96 +117,144 @@ Definition make_neg (e: expr) (ty: type) := match classify_neg ty with | neg_case_i _ => OK (Eunop Onegint e) | neg_case_f => OK (Eunop Onegf e) + | neg_case_l _ => OK (Eunop Onegl e) | _ => Error (msg "Cshmgen.make_neg") end. Definition make_notint (e: expr) (ty: type) := - Eunop Onotint e. + match classify_notint ty with + | notint_case_i _ => OK (Eunop Onotint e) + | notint_case_l _ => OK (Eunop Onotl e) + | _ => Error (msg "Cshmgen.make_notint") + end. + +Definition make_binarith (iop iopu fop lop lopu: binary_operation) + (e1: expr) (ty1: type) (e2: expr) (ty2: type) := + match classify_binarith ty1 ty2 with + | bin_case_ii Signed => OK (Ebinop iop e1 e2) + | bin_case_ii Unsigned => OK (Ebinop iopu e1 e2) + | bin_case_ff => OK (Ebinop fop e1 e2) + | bin_case_if sg => OK (Ebinop fop (make_floatofint e1 sg) e2) + | bin_case_fi sg => OK (Ebinop fop e1 (make_floatofint e2 sg)) + | bin_case_ll Signed => OK (Ebinop lop e1 e2) + | bin_case_ll Unsigned => OK (Ebinop lopu e1 e2) + | bin_case_il sg1 Signed => OK (Ebinop lop (make_longofint e1 sg1) e2) + | bin_case_il sg1 Unsigned => OK (Ebinop lopu (make_longofint e1 sg1) e2) + | bin_case_li Signed sg2 => OK (Ebinop lop e1 (make_longofint e2 sg2)) + | bin_case_li Unsigned sg2 => OK (Ebinop lopu e1 (make_longofint e2 sg2)) + | bin_case_lf sg => OK (Ebinop fop (make_floatoflong e1 sg) e2) + | bin_case_fl sg => OK (Ebinop fop e1 (make_floatoflong e2 sg)) + | bin_default => Error (msg "Cshmgen.make_binarith") + end. Definition make_add (e1: expr) (ty1: type) (e2: expr) (ty2: type) := match classify_add ty1 ty2 with - | add_case_ii _ => OK (Ebinop Oadd e1 e2) - | add_case_ff => OK (Ebinop Oaddf e1 e2) - | add_case_if sg => OK (Ebinop Oaddf (make_floatofint e1 sg) e2) - | add_case_fi sg => OK (Ebinop Oaddf e1 (make_floatofint e2 sg)) | add_case_pi ty _ => let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in OK (Ebinop Oadd e1 (Ebinop Omul n e2)) | add_case_ip ty _ => let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in OK (Ebinop Oadd e2 (Ebinop Omul n e1)) - | add_default => Error (msg "Cshmgen.make_add") + | add_case_pl ty _ => + let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in + OK (Ebinop Oadd e1 (Ebinop Omul n (Eunop Ointoflong e2))) + | add_case_lp ty _ => + let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in + OK (Ebinop Oadd e2 (Ebinop Omul n (Eunop Ointoflong e1))) + | add_default => + make_binarith Oadd Oadd Oaddf Oaddl Oaddl e1 ty1 e2 ty2 end. Definition make_sub (e1: expr) (ty1: type) (e2: expr) (ty2: type) := match classify_sub ty1 ty2 with - | sub_case_ii _ => OK (Ebinop Osub e1 e2) - | sub_case_ff => OK (Ebinop Osubf e1 e2) - | sub_case_if sg => OK (Ebinop Osubf (make_floatofint e1 sg) e2) - | sub_case_fi sg => OK (Ebinop Osubf e1 (make_floatofint e2 sg)) - | sub_case_pi ty => + | sub_case_pi ty _ => let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in OK (Ebinop Osub e1 (Ebinop Omul n e2)) | sub_case_pp ty => let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in OK (Ebinop Odivu (Ebinop Osub e1 e2) n) - | sub_default => Error (msg "Cshmgen.make_sub") + | sub_case_pl ty _ => + let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in + OK (Ebinop Osub e1 (Ebinop Omul n (Eunop Ointoflong e2))) + | sub_default => + make_binarith Osub Osub Osubf Osubl Osubl e1 ty1 e2 ty2 end. Definition make_mul (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - match classify_mul ty1 ty2 with - | mul_case_ii _ => OK (Ebinop Omul e1 e2) - | mul_case_ff => OK (Ebinop Omulf e1 e2) - | mul_case_if sg => OK (Ebinop Omulf (make_floatofint e1 sg) e2) - | mul_case_fi sg => OK (Ebinop Omulf e1 (make_floatofint e2 sg)) - | mul_default => Error (msg "Cshmgen.make_mul") - end. + make_binarith Omul Omul Omulf Omull Omull e1 ty1 e2 ty2. Definition make_div (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - match classify_div ty1 ty2 with - | div_case_ii Unsigned => OK (Ebinop Odivu e1 e2) - | div_case_ii Signed => OK (Ebinop Odiv e1 e2) - | div_case_ff => OK (Ebinop Odivf e1 e2) - | div_case_if sg => OK (Ebinop Odivf (make_floatofint e1 sg) e2) - | div_case_fi sg => OK (Ebinop Odivf e1 (make_floatofint e2 sg)) - | div_default => Error (msg "Cshmgen.make_div") + make_binarith Odiv Odivu Odivf Odivl Odivlu e1 ty1 e2 ty2. + +Definition make_binarith_int (iop iopu lop lopu: binary_operation) + (e1: expr) (ty1: type) (e2: expr) (ty2: type) := + match classify_binarith ty1 ty2 with + | bin_case_ii Signed => OK (Ebinop iop e1 e2) + | bin_case_ii Unsigned => OK (Ebinop iopu e1 e2) + | bin_case_ll Signed => OK (Ebinop lop e1 e2) + | bin_case_ll Unsigned => OK (Ebinop lopu e1 e2) + | bin_case_il sg1 Signed => OK (Ebinop lop (make_longofint e1 sg1) e2) + | bin_case_il sg1 Unsigned => OK (Ebinop lopu (make_longofint e1 sg1) e2) + | bin_case_li Signed sg2 => OK (Ebinop lop e1 (make_longofint e2 sg2)) + | bin_case_li Unsigned sg2 => OK (Ebinop lopu e1 (make_longofint e2 sg2)) + | _ => Error (msg "Cshmgen.make_binarith_int") end. Definition make_mod (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - match classify_binint ty1 ty2 with - | binint_case_ii Unsigned => OK (Ebinop Omodu e1 e2) - | binint_case_ii Signed => OK (Ebinop Omod e1 e2) - | mod_default => Error (msg "Cshmgen.make_mod") - end. + make_binarith_int Omod Omodu Omodl Omodlu e1 ty1 e2 ty2. Definition make_and (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - OK(Ebinop Oand e1 e2). + make_binarith_int Oand Oand Oandl Oandl e1 ty1 e2 ty2. Definition make_or (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - OK(Ebinop Oor e1 e2). + make_binarith_int Oor Oor Oorl Oorl e1 ty1 e2 ty2. Definition make_xor (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - OK(Ebinop Oxor e1 e2). + make_binarith_int Oxor Oxor Oxorl Oxorl e1 ty1 e2 ty2. + +(* +Definition make_shift (iop iopu lop lopu: binary_operation) + (e1: expr) (ty1: type) (e2: expr) (ty2: type) := + match classify_shift ty1 ty2 with + | shift_case_ii Signed => OK (Ebinop iop e1 e2) + | shift_case_ii Unsigned => OK (Ebinop iopu e1 e2) + | shift_case_li Signed => OK (Ebinop lop e1 e2) + | shift_case_li Unsigned => OK (Ebinop lopu e1 e2) + | shift_case_il Signed => OK (Ebinop iop e1 (Eunop Ointoflong e2)) + | shift_case_il Unsigned => OK (Ebinop iopu e1 (Eunop Ointoflong e2)) + | shift_case_ll Signed => OK (Ebinop lop e1 (Eunop Ointoflong e2)) + | shift_case_ll Unsigned => OK (Ebinop lopu e1 (Eunop Ointoflong e2)) + | shift_default => Error (msg "Cshmgen.make_shift") + end. +*) Definition make_shl (e1: expr) (ty1: type) (e2: expr) (ty2: type) := - OK(Ebinop Oshl e1 e2). + match classify_shift ty1 ty2 with + | shift_case_ii _ => OK (Ebinop Oshl e1 e2) + | shift_case_li _ => OK (Ebinop Oshll e1 e2) + | shift_case_il _ => OK (Ebinop Oshl e1 (Eunop Ointoflong e2)) + | shift_case_ll _ => OK (Ebinop Oshll e1 (Eunop Ointoflong e2)) + | shift_default => Error (msg "Cshmgen.make_shl") + end. Definition make_shr (e1: expr) (ty1: type) (e2: expr) (ty2: type) := match classify_shift ty1 ty2 with - | shift_case_ii Unsigned => OK (Ebinop Oshru e1 e2) | shift_case_ii Signed => OK (Ebinop Oshr e1 e2) - | shr_default => Error (msg "Cshmgen.make_shr") + | shift_case_ii Unsigned => OK (Ebinop Oshru e1 e2) + | shift_case_li Signed => OK (Ebinop Oshrl e1 e2) + | shift_case_li Unsigned => OK (Ebinop Oshrlu e1 e2) + | shift_case_il Signed => OK (Ebinop Oshr e1 (Eunop Ointoflong e2)) + | shift_case_il Unsigned => OK (Ebinop Oshru e1 (Eunop Ointoflong e2)) + | shift_case_ll Signed => OK (Ebinop Oshrl e1 (Eunop Ointoflong e2)) + | shift_case_ll Unsigned => OK (Ebinop Oshrlu e1 (Eunop Ointoflong e2)) + | shift_default => Error (msg "Cshmgen.make_shr") end. Definition make_cmp (c: comparison) (e1: expr) (ty1: type) (e2: expr) (ty2: type) := match classify_cmp ty1 ty2 with - | cmp_case_ii Signed => OK (Ebinop (Ocmp c) e1 e2) - | cmp_case_ii Unsigned => OK (Ebinop (Ocmpu c) e1 e2) | cmp_case_pp => OK (Ebinop (Ocmpu c) e1 e2) - | cmp_case_ff => OK (Ebinop (Ocmpf c) e1 e2) - | cmp_case_if sg => OK (Ebinop (Ocmpf c) (make_floatofint e1 sg) e2) - | cmp_case_fi sg => OK (Ebinop (Ocmpf c) e1 (make_floatofint e2 sg)) - | cmp_default => Error (msg "Cshmgen.make_cmp") + | cmp_default => + make_binarith (Ocmp c) (Ocmpu c) (Ocmpf c) (Ocmpl c) (Ocmplu c) e1 ty1 e2 ty2 end. (** [make_cast from to e] applies to [e] the numeric conversions needed @@ -208,17 +278,23 @@ Definition make_cast_float (e: expr) (sz: floatsize) := Definition make_cast (from to: type) (e: expr) := match classify_cast from to with - | cast_case_neutral => e - | cast_case_i2i sz2 si2 => make_cast_int e sz2 si2 - | cast_case_f2f sz2 => make_cast_float e sz2 - | cast_case_i2f si1 sz2 => make_cast_float (make_floatofint e si1) sz2 - | cast_case_f2i sz2 si2 => make_cast_int (make_intoffloat e si2) sz2 si2 - | cast_case_f2bool => Ebinop (Ocmpf Cne) e (make_floatconst Float.zero) - | cast_case_p2bool => Ebinop (Ocmpu Cne) e (make_intconst Int.zero) - | cast_case_struct id1 fld1 id2 fld2 => e - | cast_case_union id1 fld1 id2 fld2 => e - | cast_case_void => e - | cast_case_default => e + | cast_case_neutral => OK e + | cast_case_i2i sz2 si2 => OK (make_cast_int e sz2 si2) + | cast_case_f2f sz2 => OK (make_cast_float e sz2) + | cast_case_i2f si1 sz2 => OK (make_cast_float (make_floatofint e si1) sz2) + | cast_case_f2i sz2 si2 => OK (make_cast_int (make_intoffloat e si2) sz2 si2) + | cast_case_l2l => OK e + | cast_case_i2l si1 => OK (make_longofint e si1) + | cast_case_l2i sz2 si2 => OK (make_cast_int (Eunop Ointoflong e) sz2 si2) + | cast_case_l2f si1 sz2 => OK (make_cast_float (make_floatoflong e si1) sz2) + | cast_case_f2l si2 => OK (make_longoffloat e si2) + | cast_case_f2bool => OK (Ebinop (Ocmpf Cne) e (make_floatconst Float.zero)) + | cast_case_l2bool => OK (Ebinop (Ocmpl Cne) e (make_longconst Int64.zero)) + | cast_case_p2bool => OK (Ebinop (Ocmpu Cne) e (make_intconst Int.zero)) + | cast_case_struct id1 fld1 id2 fld2 => OK e + | cast_case_union id1 fld1 id2 fld2 => OK e + | cast_case_void => OK e + | cast_case_default => Error (msg "Cshmgen.make_cast") end. (** [make_load addr ty_res] loads a value of type [ty_res] from @@ -259,7 +335,7 @@ Definition make_store (addr: expr) (ty: type) (rhs: expr) := Definition transl_unop (op: Cop.unary_operation) (a: expr) (ta: type) : res expr := match op with | Cop.Onotbool => make_notbool a ta - | Cop.Onotint => OK(make_notint a ta) + | Cop.Onotint => make_notint a ta | Cop.Oneg => make_neg a ta end. @@ -297,6 +373,8 @@ Fixpoint transl_expr (a: Clight.expr) {struct a} : res expr := OK(make_intconst n) | Clight.Econst_float n _ => OK(make_floatconst n) + | Clight.Econst_long n _ => + OK(make_longconst n) | Clight.Evar id ty => make_load (Eaddrof id) ty | Clight.Etempvar id ty => @@ -315,7 +393,7 @@ Fixpoint transl_expr (a: Clight.expr) {struct a} : res expr := transl_binop op tb (typeof b) tc (typeof c) | Clight.Ecast b ty => do tb <- transl_expr b; - OK (make_cast (typeof b) ty tb) + make_cast (typeof b) ty tb | Clight.Efield b i ty => match typeof b with | Tstruct _ fld _ => @@ -369,8 +447,9 @@ Fixpoint transl_arglist (al: list Clight.expr) (tyl: typelist) | nil, Tnil => OK nil | a1 :: a2, Tcons ty1 ty2 => do ta1 <- transl_expr a1; - do ta2 <- transl_arglist a2 ty2; - OK (make_cast (typeof a1) ty1 ta1 :: ta2) + do ta1' <- make_cast (typeof a1) ty1 ta1; + do ta2 <- transl_arglist a2 ty2; + OK (ta1' :: ta2) | _, _ => Error(msg "Cshmgen.transl_arglist: arity mismatch") end. @@ -408,7 +487,8 @@ Fixpoint transl_statement (tyret: type) (nbrk ncnt: nat) | Clight.Sassign b c => do tb <- transl_lvalue b; do tc <- transl_expr c; - make_store tb (typeof b) (make_cast (typeof c) (typeof b) tc) + do tc' <- make_cast (typeof c) (typeof b) tc; + make_store tb (typeof b) tc' | Clight.Sset x b => do tb <- transl_expr b; OK(Sset x tb) @@ -442,7 +522,8 @@ Fixpoint transl_statement (tyret: type) (nbrk ncnt: nat) OK (Sexit ncnt) | Clight.Sreturn (Some e) => do te <- transl_expr e; - OK (Sreturn (Some (make_cast (typeof e) tyret te))) + do te' <- make_cast (typeof e) tyret te; + OK (Sreturn (Some te')) | Clight.Sreturn None => OK (Sreturn None) | Clight.Sswitch a sl => @@ -486,9 +567,6 @@ Definition transl_function (f: Clight.function) : res function := (map fst (Clight.fn_temps f)) tbody). -Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2} - := list_eq_dec typ_eq. - Definition transl_fundef (f: Clight.fundef) : res fundef := match f with | Clight.Internal g => diff --git a/cfrontend/Cshmgenproof.v b/cfrontend/Cshmgenproof.v index ad5ada6..7c24d0d 100644 --- a/cfrontend/Cshmgenproof.v +++ b/cfrontend/Cshmgenproof.v @@ -67,157 +67,10 @@ Proof. rewrite H0; reflexivity. Qed. -(* -Lemma var_kind_by_value: - forall ty chunk, - access_mode ty = By_value chunk -> - var_kind_of_type ty = OK(Vscalar chunk). -Proof. - intros ty chunk; destruct ty; simpl; try congruence. - destruct i; try congruence; destruct s; congruence. - destruct f; congruence. -Qed. - -Lemma var_kind_by_reference: - forall ty vk, - access_mode ty = By_reference \/ access_mode ty = By_copy -> - var_kind_of_type ty = OK vk -> - vk = Varray (Ctypes.sizeof ty) (Ctypes.alignof ty). -Proof. - intros ty vk; destruct ty; simpl; try intuition congruence. - destruct i; try congruence; destruct s; intuition congruence. - destruct f; intuition congruence. -Qed. - -Lemma sizeof_var_kind_of_type: - forall ty vk, - var_kind_of_type ty = OK vk -> - Csharpminor.sizeof vk = Ctypes.sizeof ty. -Proof. - intros ty vk. - assert (sizeof (Varray (Ctypes.sizeof ty) (Ctypes.alignof ty)) = Ctypes.sizeof ty). - simpl. rewrite Zmax_spec. apply zlt_false. - generalize (Ctypes.sizeof_pos ty). omega. - destruct ty; try (destruct i; try destruct s); try (destruct f); - simpl; intro EQ; inversion EQ; subst vk; auto. -Qed. -*) -(**** -Remark cast_int_int_normalized: - forall sz si a chunk n, - access_mode (Tint sz si a) = By_value chunk -> - val_normalized (Vint (cast_int_int sz si n)) chunk. -Proof. - unfold access_mode, cast_int_int, val_normalized; intros. destruct sz. - destruct si; inv H; simpl. - rewrite Int.sign_ext_idem; auto. compute; auto. - rewrite Int.zero_ext_idem; auto. compute; auto. - destruct si; inv H; simpl. - rewrite Int.sign_ext_idem; auto. compute; auto. - rewrite Int.zero_ext_idem; auto. compute; auto. - inv H. auto. - inv H. destruct (Int.eq n Int.zero); auto. -Qed. - -Remark cast_float_float_normalized: - forall sz a chunk n, - access_mode (Tfloat sz a) = By_value chunk -> - val_normalized (Vfloat (cast_float_float sz n)) chunk. -Proof. - unfold access_mode, cast_float_float, val_normalized; intros. - destruct sz; inv H; simpl. - rewrite Float.singleoffloat_idem. auto. - auto. -Qed. - -Remark cast_neutral_normalized: - forall ty1 ty2 chunk, - classify_cast ty1 ty2 = cast_case_neutral -> - access_mode ty2 = By_value chunk -> - chunk = Mint32. -Proof. - intros. functional inversion H; subst; simpl in H0; congruence. -Qed. - -Remark cast_to_bool_normalized: - forall ty1 ty2 chunk, - classify_cast ty1 ty2 = cast_case_f2bool \/ - classify_cast ty1 ty2 = cast_case_p2bool -> - access_mode ty2 = By_value chunk -> - chunk = Mint8unsigned. -Proof. - intros. destruct ty2; simpl in *; try discriminate. - destruct i; destruct ty1; intuition congruence. - destruct ty1; intuition discriminate. - destruct ty1; intuition discriminate. -Qed. - -Lemma cast_result_normalized: - forall chunk v1 ty1 ty2 v2, - sem_cast v1 ty1 ty2 = Some v2 -> - access_mode ty2 = By_value chunk -> - val_normalized v2 chunk. -Proof. - intros. functional inversion H; subst. - rewrite (cast_neutral_normalized ty1 ty2 chunk); auto. red; auto. - rewrite (cast_neutral_normalized ty1 ty2 chunk); auto. red; auto. - functional inversion H2; subst. eapply cast_int_int_normalized; eauto. - functional inversion H2; subst. eapply cast_float_float_normalized; eauto. - functional inversion H2; subst. eapply cast_float_float_normalized; eauto. - functional inversion H2; subst. eapply cast_int_int_normalized; eauto. - rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. red; auto. - rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. red; auto. - rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. destruct (Int.eq i Int.zero); red; auto. - rewrite (cast_to_bool_normalized ty1 ty2 chunk); auto. red; auto. - functional inversion H2; subst. simpl in H0. congruence. - functional inversion H2; subst. simpl in H0. congruence. - functional inversion H5; subst. simpl in H0. congruence. -Qed. - -Definition val_casted (v: val) (ty: type) : Prop := - exists v0, exists ty0, sem_cast v0 ty0 ty = Some v. - -Lemma val_casted_normalized: - forall v ty chunk, - val_casted v ty -> access_mode ty = By_value chunk -> val_normalized v chunk. -Proof. - intros. destruct H as [v0 [ty0 CAST]]. eapply cast_result_normalized; eauto. -Qed. - -Fixpoint val_casted_list (vl: list val) (tyl: typelist) {struct vl}: Prop := - match vl, tyl with - | nil, Tnil => True - | v1 :: vl', Tcons ty1 tyl' => val_casted v1 ty1 /\ val_casted_list vl' tyl' - | _, _ => False - end. - -Lemma eval_exprlist_casted: - forall ge e le m al tyl vl, - Clight.eval_exprlist ge e le m al tyl vl -> - val_casted_list vl tyl. -Proof. - induction 1; simpl. - auto. - split. exists v1; exists (typeof a); auto. eauto. -Qed. - -*******) - (** * Properties of the translation functions *) (** Transformation of expressions and statements. *) -(* -Lemma is_variable_correct: - forall a id, - is_variable a = Some id -> - a = Clight.Evar id (typeof a). -Proof. - intros until id. unfold is_variable; destruct a; intros; try discriminate. - simpl. congruence. -Qed. -*) - Lemma transl_expr_lvalue: forall ge e le m a loc ofs ta, Clight.eval_lvalue ge e le m a loc ofs -> @@ -283,6 +136,13 @@ Proof. intros. unfold make_floatconst. econstructor. reflexivity. Qed. +Lemma make_longconst_correct: + forall n e le m, + eval_expr ge e le m (make_longconst n) (Vlong n). +Proof. + intros. unfold make_floatconst. econstructor. reflexivity. +Qed. + Lemma make_floatofint_correct: forall a n sg e le m, eval_expr ge e le m a (Vint n) -> @@ -302,8 +162,38 @@ Proof. destruct sg; econstructor; eauto; simpl; rewrite H0; auto. Qed. -Hint Resolve make_intconst_correct make_floatconst_correct +Lemma make_longofint_correct: + forall a n sg e le m, + eval_expr ge e le m a (Vint n) -> + eval_expr ge e le m (make_longofint a sg) (Vlong(cast_int_long sg n)). +Proof. + intros. unfold make_longofint, cast_int_long. + destruct sg; econstructor; eauto. +Qed. + +Lemma make_floatoflong_correct: + forall a n sg e le m, + eval_expr ge e le m a (Vlong n) -> + eval_expr ge e le m (make_floatoflong a sg) (Vfloat(cast_long_float sg n)). +Proof. + intros. unfold make_floatoflong, cast_int_long. + destruct sg; econstructor; eauto. +Qed. + +Lemma make_longoffloat_correct: + forall e le m a sg f i, + eval_expr ge e le m a (Vfloat f) -> + cast_float_long sg f = Some i -> + eval_expr ge e le m (make_longoffloat a sg) (Vlong i). +Proof. + unfold cast_float_long, make_longoffloat; intros. + destruct sg; econstructor; eauto; simpl; rewrite H0; auto. +Qed. + +Hint Resolve make_intconst_correct make_floatconst_correct make_longconst_correct make_floatofint_correct make_intoffloat_correct + make_longofint_correct + make_floatoflong_correct make_longoffloat_correct eval_Eunop eval_Ebinop: cshm. Hint Extern 2 (@eq trace _ _) => traceEq: cshm. @@ -357,38 +247,34 @@ Qed. Hint Resolve make_cast_int_correct make_cast_float_correct: cshm. Lemma make_cast_correct: - forall e le m a v ty1 ty2 v', + forall e le m a b v ty1 ty2 v', + make_cast ty1 ty2 a = OK b -> eval_expr ge e le m a v -> sem_cast v ty1 ty2 = Some v' -> - eval_expr ge e le m (make_cast ty1 ty2 a) v'. + eval_expr ge e le m b v'. Proof. - intros. unfold make_cast. functional inversion H0; subst. - (* neutral *) - rewrite H2; auto. - rewrite H2; auto. - (* int -> int *) - rewrite H2. auto with cshm. - (* float -> float *) - rewrite H2. auto with cshm. - (* int -> float *) - rewrite H2. auto with cshm. + intros. unfold make_cast, sem_cast in *; + destruct (classify_cast ty1 ty2); inv H; destruct v; inv H1; eauto with cshm. (* float -> int *) - rewrite H2. eauto with cshm. + destruct (cast_float_int si2 f) as [i|] eqn:E; inv H2. eauto with cshm. + (* float -> long *) + destruct (cast_float_long si2 f) as [i|] eqn:E; inv H2. eauto with cshm. (* float -> bool *) - rewrite H2. econstructor; eauto with cshm. - simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq. rewrite H7; auto. - rewrite H2. econstructor; eauto with cshm. - simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq. rewrite H7; auto. - (* pointer -> bool *) - rewrite H2. econstructor; eauto with cshm. - simpl. unfold Val.cmpu, Val.cmpu_bool, Int.cmpu. destruct (Int.eq i Int.zero); reflexivity. - rewrite H2. econstructor; eauto with cshm. - (* struct -> struct *) - rewrite H2. auto. - (* union -> union *) - rewrite H2. auto. - (* any -> void *) - rewrite H5. auto. + econstructor; eauto with cshm. + simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq. + destruct (Float.cmp Ceq f Float.zero); auto. + (* long -> bool *) + econstructor; eauto with cshm. + simpl. unfold Val.cmpl, Val.cmpl_bool, Int64.cmp. + destruct (Int64.eq i Int64.zero); auto. + (* int -> bool *) + econstructor; eauto with cshm. + simpl. unfold Val.cmpu, Val.cmpu_bool, Int.cmpu. + destruct (Int.eq i Int.zero); auto. + (* struct *) + destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; auto. + (* union *) + destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; auto. Qed. Lemma make_boolean_correct: @@ -413,6 +299,10 @@ Proof. unfold Val.cmpu, Val.cmpu_bool. simpl. destruct (Int.eq i Int.zero); simpl; constructor. exists Vtrue; split. econstructor; eauto with cshm. constructor. +(* long *) + econstructor; split. econstructor; eauto with cshm. simpl. eauto. + unfold Val.cmpl, Val.cmpl_bool. simpl. + destruct (Int64.eq i Int64.zero); simpl; constructor. Qed. Lemma make_neg_correct: @@ -422,10 +312,8 @@ Lemma make_neg_correct: eval_expr ge e le m a va -> eval_expr ge e le m c v. Proof. - intros until m; intro SEM. unfold make_neg. - functional inversion SEM; intros. - rewrite H0 in H4. inv H4. eapply eval_Eunop; eauto with cshm. - rewrite H0 in H4. inv H4. eauto with cshm. + unfold sem_neg, make_neg; intros until m; intros SEM MAKE EV1; + destruct (classify_neg tya); inv MAKE; destruct va; inv SEM; eauto with cshm. Qed. Lemma make_notbool_correct: @@ -435,20 +323,19 @@ Lemma make_notbool_correct: eval_expr ge e le m a va -> eval_expr ge e le m c v. Proof. - intros until m; intro SEM. unfold make_notbool. - functional inversion SEM; intros; rewrite H0 in H4; inversion H4; simpl; - eauto with cshm. + unfold sem_notbool, make_notbool; intros until m; intros SEM MAKE EV1; + destruct (classify_bool tya); inv MAKE; destruct va; inv SEM; eauto with cshm. Qed. Lemma make_notint_correct: forall a tya c va v e le m, sem_notint va tya = Some v -> - make_notint a tya = c -> + make_notint a tya = OK c -> eval_expr ge e le m a va -> eval_expr ge e le m c v. Proof. - intros until m; intro SEM. unfold make_notint. - functional inversion SEM; intros. subst. eauto with cshm. + unfold sem_notint, make_notint; intros until m; intros SEM MAKE EV1; + destruct (classify_notint tya); inv MAKE; destruct va; inv SEM; eauto with cshm. Qed. Definition binary_constructor_correct @@ -461,99 +348,206 @@ Definition binary_constructor_correct eval_expr ge e le m b vb -> eval_expr ge e le m c v. +Section MAKE_BIN. + +Variable sem_int: signedness -> int -> int -> option val. +Variable sem_long: signedness -> int64 -> int64 -> option val. +Variable sem_float: float -> float -> option val. +Variables iop iopu fop lop lopu: binary_operation. + +Hypothesis iop_ok: + forall x y m, eval_binop iop (Vint x) (Vint y) m = sem_int Signed x y. +Hypothesis iopu_ok: + forall x y m, eval_binop iopu (Vint x) (Vint y) m = sem_int Unsigned x y. +Hypothesis lop_ok: + forall x y m, eval_binop lop (Vlong x) (Vlong y) m = sem_long Signed x y. +Hypothesis lopu_ok: + forall x y m, eval_binop lopu (Vlong x) (Vlong y) m = sem_long Unsigned x y. +Hypothesis fop_ok: + forall x y m, eval_binop fop (Vfloat x) (Vfloat y) m = sem_float x y. + +Lemma make_binarith_correct: + binary_constructor_correct + (make_binarith iop iopu fop lop lopu) + (sem_binarith sem_int sem_long sem_float). +Proof. + red; unfold make_binarith, sem_binarith; + intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_binarith tya tyb); inv MAKE; + destruct va; try discriminate; destruct vb; try discriminate. +- destruct s; inv H0; econstructor; eauto with cshm. + rewrite iop_ok; auto. rewrite iopu_ok; auto. +- erewrite <- fop_ok in SEM; eauto with cshm. +- erewrite <- fop_ok in SEM; eauto with cshm. +- erewrite <- fop_ok in SEM; eauto with cshm. +- destruct s; inv H0; econstructor; eauto with cshm. + rewrite lop_ok; auto. rewrite lopu_ok; auto. +- destruct s2; inv H0; econstructor; eauto with cshm. + rewrite lop_ok; auto. rewrite lopu_ok; auto. +- destruct s1; inv H0; econstructor; eauto with cshm. + rewrite lop_ok; auto. rewrite lopu_ok; auto. +- erewrite <- fop_ok in SEM; eauto with cshm. +- erewrite <- fop_ok in SEM; eauto with cshm. +Qed. + +Lemma make_binarith_int_correct: + binary_constructor_correct + (make_binarith_int iop iopu lop lopu) + (sem_binarith sem_int sem_long (fun x y => None)). +Proof. + red; unfold make_binarith_int, sem_binarith; + intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_binarith tya tyb); inv MAKE; + destruct va; try discriminate; destruct vb; try discriminate. +- destruct s; inv H0; econstructor; eauto with cshm. + rewrite iop_ok; auto. rewrite iopu_ok; auto. +- destruct s; inv H0; econstructor; eauto with cshm. + rewrite lop_ok; auto. rewrite lopu_ok; auto. +- destruct s2; inv H0; econstructor; eauto with cshm. + rewrite lop_ok; auto. rewrite lopu_ok; auto. +- destruct s1; inv H0; econstructor; eauto with cshm. + rewrite lop_ok; auto. rewrite lopu_ok; auto. +Qed. + +End MAKE_BIN. + +Hint Extern 2 (@eq (option val) _ _) => (simpl; reflexivity) : cshm. + Lemma make_add_correct: binary_constructor_correct make_add sem_add. Proof. - red; intros until m. intro SEM. unfold make_add. - functional inversion SEM; rewrite H0; intros; - inversion H7; eauto with cshm. - eapply eval_Ebinop. eauto. - eapply eval_Ebinop. eauto with cshm. eauto. - simpl. reflexivity. reflexivity. - eapply eval_Ebinop. eauto. - eapply eval_Ebinop. eauto with cshm. eauto. - simpl. reflexivity. simpl. reflexivity. + red; unfold make_add, sem_add; + intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_add tya tyb); inv MAKE. +- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm. +- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm. +- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm. +- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm. +- eapply make_binarith_correct; eauto; intros; auto. Qed. Lemma make_sub_correct: binary_constructor_correct make_sub sem_sub. Proof. - red; intros until m. intro SEM. unfold make_sub. - functional inversion SEM; rewrite H0; intros; - inversion H7; eauto with cshm. - eapply eval_Ebinop. eauto. - eapply eval_Ebinop. eauto with cshm. eauto. - simpl. reflexivity. reflexivity. - inversion H9. eapply eval_Ebinop. - eapply eval_Ebinop; eauto. - simpl. unfold eq_block; rewrite H3. reflexivity. - eauto with cshm. simpl. rewrite H8. reflexivity. + red; unfold make_sub, sem_sub; + intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_sub tya tyb); inv MAKE. +- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm. +- destruct va; try discriminate; destruct vb; inv SEM. + destruct (zeq b0 b1); try discriminate. destruct (Int.eq (Int.repr (sizeof ty)) Int.zero) eqn:E; inv H0. + econstructor; eauto with cshm. rewrite zeq_true. simpl. rewrite E; auto. +- destruct va; try discriminate; destruct vb; inv SEM; eauto with cshm. +- eapply make_binarith_correct; eauto; intros; auto. Qed. Lemma make_mul_correct: binary_constructor_correct make_mul sem_mul. Proof. - red; intros until m. intro SEM. unfold make_mul. - functional inversion SEM; rewrite H0; intros; - inversion H7; eauto with cshm. + apply make_binarith_correct; intros; auto. Qed. Lemma make_div_correct: binary_constructor_correct make_div sem_div. Proof. - red; intros until m. intro SEM. unfold make_div. - functional inversion SEM; rewrite H0; intros. - inversion H8. eapply eval_Ebinop; eauto with cshm. - simpl. rewrite H7; auto. - inversion H8. eapply eval_Ebinop; eauto with cshm. - simpl. rewrite H7; auto. - inversion H7; eauto with cshm. - inversion H7; eauto with cshm. - inversion H7; eauto with cshm. + apply make_binarith_correct; intros; auto. Qed. Lemma make_mod_correct: binary_constructor_correct make_mod sem_mod. - red; intros until m. intro SEM. unfold make_mod. - functional inversion SEM; rewrite H0; intros. - inversion H8. eapply eval_Ebinop; eauto with cshm. - simpl. rewrite H7; auto. - inversion H8. eapply eval_Ebinop; eauto with cshm. - simpl. rewrite H7; auto. +Proof. + apply make_binarith_int_correct; intros; auto. Qed. Lemma make_and_correct: binary_constructor_correct make_and sem_and. Proof. - red; intros until m. intro SEM. unfold make_and. - functional inversion SEM. intros. inversion H7. - eauto with cshm. + apply make_binarith_int_correct; intros; auto. Qed. Lemma make_or_correct: binary_constructor_correct make_or sem_or. Proof. - red; intros until m. intro SEM. unfold make_or. - functional inversion SEM. intros. inversion H7. - eauto with cshm. + apply make_binarith_int_correct; intros; auto. Qed. Lemma make_xor_correct: binary_constructor_correct make_xor sem_xor. Proof. - red; intros until m. intro SEM. unfold make_xor. - functional inversion SEM. intros. inversion H7. - eauto with cshm. + apply make_binarith_int_correct; intros; auto. +Qed. + +Ltac comput val := + let x := fresh in set val as x in *; vm_compute in x; subst x. + +Remark small_shift_amount_1: + forall i, + Int64.ltu i Int64.iwordsize = true -> + Int.ltu (Int64.loword i) Int64.iwordsize' = true + /\ Int64.unsigned i = Int.unsigned (Int64.loword i). +Proof. + intros. apply Int64.ltu_inv in H. comput (Int64.unsigned Int64.iwordsize). + assert (Int64.unsigned i = Int.unsigned (Int64.loword i)). + { + unfold Int64.loword. rewrite Int.unsigned_repr; auto. + comput Int.max_unsigned; omega. + } + split; auto. unfold Int.ltu. apply zlt_true. rewrite <- H0. tauto. +Qed. + +Remark small_shift_amount_2: + forall i, + Int64.ltu i (Int64.repr 32) = true -> + Int.ltu (Int64.loword i) Int.iwordsize = true. +Proof. + intros. apply Int64.ltu_inv in H. comput (Int64.unsigned (Int64.repr 32)). + assert (Int64.unsigned i = Int.unsigned (Int64.loword i)). + { + unfold Int64.loword. rewrite Int.unsigned_repr; auto. + comput Int.max_unsigned; omega. + } + unfold Int.ltu. apply zlt_true. rewrite <- H0. tauto. +Qed. + +Lemma small_shift_amount_3: + forall i, + Int.ltu i Int64.iwordsize' = true -> + Int64.unsigned (Int64.repr (Int.unsigned i)) = Int.unsigned i. +Proof. + intros. apply Int.ltu_inv in H. comput (Int.unsigned Int64.iwordsize'). + apply Int64.unsigned_repr. comput Int64.max_unsigned; omega. Qed. Lemma make_shl_correct: binary_constructor_correct make_shl sem_shl. Proof. - red; intros until m. intro SEM. unfold make_shl. - functional inversion SEM. intros. inversion H8. - eapply eval_Ebinop; eauto with cshm. - simpl. rewrite H7. auto. + red; unfold make_shl, sem_shl, sem_shift; + intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_shift tya tyb); inv MAKE; + destruct va; try discriminate; destruct vb; try discriminate. +- destruct (Int.ltu i0 Int.iwordsize) eqn:E; inv SEM. + econstructor; eauto. simpl; rewrite E; auto. +- destruct (Int64.ltu i0 Int64.iwordsize) eqn:E; inv SEM. + exploit small_shift_amount_1; eauto. intros [A B]. + econstructor; eauto with cshm. simpl. rewrite A. + f_equal; f_equal. unfold Int64.shl', Int64.shl. rewrite B; auto. +- destruct (Int64.ltu i0 (Int64.repr 32)) eqn:E; inv SEM. + econstructor; eauto with cshm. simpl. rewrite small_shift_amount_2; auto. +- destruct (Int.ltu i0 Int64.iwordsize') eqn:E; inv SEM. + econstructor; eauto with cshm. simpl. rewrite E. + unfold Int64.shl', Int64.shl. rewrite small_shift_amount_3; auto. Qed. Lemma make_shr_correct: binary_constructor_correct make_shr sem_shr. Proof. - red; intros until m. intro SEM. unfold make_shr. - functional inversion SEM; intros; rewrite H0 in H8; inversion H8. - eapply eval_Ebinop; eauto with cshm. - simpl; rewrite H7; auto. - eapply eval_Ebinop; eauto with cshm. - simpl; rewrite H7; auto. + red; unfold make_shr, sem_shr, sem_shift; + intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_shift tya tyb); inv MAKE; + destruct va; try discriminate; destruct vb; try discriminate. +- destruct (Int.ltu i0 Int.iwordsize) eqn:E; inv SEM. + destruct s; inv H0; econstructor; eauto; simpl; rewrite E; auto. +- destruct (Int64.ltu i0 Int64.iwordsize) eqn:E; inv SEM. + exploit small_shift_amount_1; eauto. intros [A B]. + destruct s; inv H0; econstructor; eauto with cshm; simpl; rewrite A; + f_equal; f_equal. + unfold Int64.shr', Int64.shr; rewrite B; auto. + unfold Int64.shru', Int64.shru; rewrite B; auto. +- destruct (Int64.ltu i0 (Int64.repr 32)) eqn:E; inv SEM. + destruct s; inv H0; econstructor; eauto with cshm; simpl; rewrite small_shift_amount_2; auto. +- destruct (Int.ltu i0 Int64.iwordsize') eqn:E; inv SEM. + destruct s; inv H0; econstructor; eauto with cshm; simpl; rewrite E. + unfold Int64.shr', Int64.shr; rewrite small_shift_amount_3; auto. + unfold Int64.shru', Int64.shru; rewrite small_shift_amount_3; auto. Qed. Lemma make_cmp_correct: @@ -564,35 +558,12 @@ Lemma make_cmp_correct: eval_expr ge e le m b vb -> eval_expr ge e le m c v. Proof. - intros until m. intro SEM. unfold make_cmp. - functional inversion SEM; rewrite H0; intros. - (** ii Signed *) - inversion H8; eauto with cshm. - (* ii Unsigned *) - inversion H8. eauto with cshm. - (* pp int int *) - inversion H8. eauto with cshm. - (* pp ptr ptr *) - inversion H10. eapply eval_Ebinop; eauto with cshm. - simpl. unfold Val.cmpu. simpl. unfold Mem.weak_valid_pointer in *. - rewrite H3. rewrite H9. auto. - inversion H10. eapply eval_Ebinop; eauto with cshm. - simpl. unfold Val.cmpu. simpl. rewrite H3. rewrite H9. - destruct cmp; simpl in *; inv H; auto. - (* pp ptr int *) - inversion H9. eapply eval_Ebinop; eauto with cshm. - simpl. unfold Val.cmpu. simpl. rewrite H8. - destruct cmp; simpl in *; inv H; auto. - (* pp int ptr *) - inversion H9. eapply eval_Ebinop; eauto with cshm. - simpl. unfold Val.cmpu. simpl. rewrite H8. - destruct cmp; simpl in *; inv H; auto. - (* ff *) - inversion H8. eauto with cshm. - (* if *) - inversion H8. eauto with cshm. - (* fi *) - inversion H8. eauto with cshm. + unfold sem_cmp, make_cmp; intros until m; intros SEM MAKE EV1 EV2; + destruct (classify_cmp tya tyb). +- inv MAKE. destruct (Val.cmpu_bool (Mem.valid_pointer m) cmp va vb) as [bv|] eqn:E; + simpl in SEM; inv SEM. + econstructor; eauto. simpl. unfold Val.cmpu. rewrite E. auto. +- eapply make_binarith_correct; eauto; intros; auto. Qed. Lemma transl_unop_correct: @@ -604,7 +575,7 @@ Lemma transl_unop_correct: Proof. intros. destruct op; simpl in *. eapply make_notbool_correct; eauto. - eapply make_notint_correct with (tya := tya); eauto. congruence. + eapply make_notint_correct; eauto. eapply make_neg_correct; eauto. Qed. @@ -903,6 +874,8 @@ Proof. apply make_intconst_correct. (* const float *) apply make_floatconst_correct. +(* const long *) + apply make_longconst_correct. (* temp var *) constructor; auto. (* addrof *) @@ -957,8 +930,7 @@ Proof. induction 1; intros. monadInv H. constructor. monadInv H2. constructor. - eapply make_cast_correct. eapply transl_expr_correct; eauto. auto. - eauto. + eapply make_cast_correct; eauto. eapply transl_expr_correct; eauto. auto. Qed. End EXPR. @@ -1103,8 +1075,8 @@ Proof. (* skip *) auto. (* assign *) - unfold make_store, make_memcpy in EQ2. - destruct (access_mode (typeof e)); inv EQ2; auto. + unfold make_store, make_memcpy in EQ3. + destruct (access_mode (typeof e)); inv EQ3; auto. (* set *) auto. (* call *) @@ -1196,7 +1168,7 @@ Proof. monadInv TR. assert (SAME: ts' = ts /\ tk' = tk). inversion MTR. auto. - subst ts. unfold make_store, make_memcpy in EQ2. destruct (access_mode (typeof a1)); congruence. + subst ts. unfold make_store, make_memcpy in EQ3. destruct (access_mode (typeof a1)); congruence. destruct SAME; subst ts' tk'. econstructor; split. apply plus_one. eapply make_store_correct; eauto. @@ -1327,7 +1299,7 @@ Proof. monadInv TR. inv MTR. econstructor; split. apply plus_one. constructor. - eapply make_cast_correct. eapply transl_expr_correct; eauto. eauto. + eapply make_cast_correct; eauto. eapply transl_expr_correct; eauto. eapply match_env_free_blocks; eauto. econstructor; eauto. eapply match_cont_call_cont. eauto. diff --git a/cfrontend/Ctypes.v b/cfrontend/Ctypes.v index c05f21a..bf483a0 100644 --- a/cfrontend/Ctypes.v +++ b/cfrontend/Ctypes.v @@ -26,7 +26,7 @@ Require Import Errors. union). Numeric types (integers and floats) fully specify the bit size of the type. An integer type is a pair of a signed/unsigned flag and a bit size: 8, 16, or 32 bits, or the special [IBool] size - standing for the C99 [_Bool] type. *) + standing for the C99 [_Bool] type. 64-bit integers are treated separately. *) Inductive signedness : Type := | Signed: signedness @@ -92,6 +92,7 @@ Definition noattr := {| attr_volatile := false |}. Inductive type : Type := | Tvoid: type (**r the [void] type *) | Tint: intsize -> signedness -> attr -> type (**r integer types *) + | Tlong: signedness -> attr -> type (**r 64-bit integer types *) | Tfloat: floatsize -> attr -> type (**r floating-point types *) | Tpointer: type -> attr -> type (**r pointer types ([*ty]) *) | Tarray: type -> Z -> attr -> type (**r array types ([ty[len]]) *) @@ -131,6 +132,7 @@ Definition attr_of_type (ty: type) := match ty with | Tvoid => noattr | Tint sz si a => a + | Tlong si a => a | Tfloat sz a => a | Tpointer elt a => a | Tarray elt sz a => a @@ -148,8 +150,7 @@ Definition type_bool := Tint IBool Signed noattr. Definition typeconv (ty: type) : type := match ty with - | Tint I32 Unsigned _ => ty - | Tint _ _ a => Tint I32 Signed a + | Tint (I8 | I16 | IBool) _ a => Tint I32 Signed a | Tarray t sz a => Tpointer t a | Tfunction _ _ => Tpointer ty noattr | _ => ty @@ -166,6 +167,7 @@ Fixpoint alignof (t: type) : Z := | Tint I16 _ _ => 2 | Tint I32 _ _ => 4 | Tint IBool _ _ => 1 + | Tlong _ _ => 8 | Tfloat F32 _ => 4 | Tfloat F64 _ => 8 | Tpointer _ _ => 4 @@ -218,6 +220,7 @@ Fixpoint sizeof (t: type) : Z := | Tint I16 _ _ => 2 | Tint I32 _ _ => 4 | Tint IBool _ _ => 1 + | Tlong _ _ => 8 | Tfloat F32 _ => 4 | Tfloat F64 _ => 8 | Tpointer _ _ => 4 @@ -425,6 +428,7 @@ Definition access_mode (ty: type) : mode := | Tint I16 Unsigned _ => By_value Mint16unsigned | Tint I32 _ _ => By_value Mint32 | Tint IBool _ _ => By_value Mint8unsigned + | Tlong _ _ => By_value Mint64 | Tfloat F32 _ => By_value Mfloat32 | Tfloat F64 _ => By_value Mfloat64 | Tvoid => By_nothing @@ -458,6 +462,7 @@ Fixpoint unroll_composite (ty: type) : type := match ty with | Tvoid => ty | Tint _ _ _ => ty + | Tlong _ _ => ty | Tfloat _ _ => ty | Tpointer t1 a => Tpointer (unroll_composite t1) a | Tarray t1 sz a => Tarray (unroll_composite t1) sz a @@ -526,6 +531,7 @@ Fixpoint type_of_params (params: list (ident * type)) : typelist := Definition typ_of_type (t: type) : AST.typ := match t with | Tfloat _ _ => AST.Tfloat + | Tlong _ _ => AST.Tlong | _ => AST.Tint end. @@ -533,6 +539,7 @@ Definition opttyp_of_type (t: type) : option AST.typ := match t with | Tvoid => None | Tfloat _ _ => Some AST.Tfloat + | Tlong _ _ => Some AST.Tlong | _ => Some AST.Tint end. diff --git a/cfrontend/Initializers.v b/cfrontend/Initializers.v index 7711ade..657f607 100644 --- a/cfrontend/Initializers.v +++ b/cfrontend/Initializers.v @@ -54,7 +54,7 @@ Fixpoint constval (a: expr) : res val := match a with | Eval v ty => match v with - | Vint _ | Vfloat _ => OK v + | Vint _ | Vfloat _ | Vlong _ => OK v | Vptr _ _ | Vundef => Error(msg "illegal constant") end | Evalof l ty => @@ -152,6 +152,7 @@ Definition transl_init_single (ty: type) (a: expr) : res init_data := | Vint n, Tint I32 sg _ => OK(Init_int32 n) | Vint n, Tpointer _ _ => OK(Init_int32 n) | Vint n, Tcomp_ptr _ _ => OK(Init_int32 n) + | Vlong n, Tlong _ _ => OK(Init_int64 n) | Vfloat f, Tfloat F32 _ => OK(Init_float32 f) | Vfloat f, Tfloat F64 _ => OK(Init_float64 f) | Vptr (Zpos id) ofs, Tint I32 sg _ => OK(Init_addrof id ofs) diff --git a/cfrontend/Initializersproof.v b/cfrontend/Initializersproof.v index 75b73ff..c2ca135 100644 --- a/cfrontend/Initializersproof.v +++ b/cfrontend/Initializersproof.v @@ -333,45 +333,19 @@ Qed. (** * Soundness of the compile-time evaluator *) -(** [match_val v v'] holds if the compile-time value [v] - (with symbolic pointers) matches the run-time value [v'] - (with concrete pointers). *) - -Inductive match_val: val -> val -> Prop := - | match_vint: forall n, - match_val (Vint n) (Vint n) - | match_vfloat: forall f, - match_val (Vfloat f) (Vfloat f) - | match_vptr: forall id b ofs, - Genv.find_symbol ge id = Some b -> - match_val (Vptr (Zpos id) ofs) (Vptr b ofs) - | match_vundef: - match_val Vundef Vundef. - -Lemma match_val_of_bool: - forall b, match_val (Val.of_bool b) (Val.of_bool b). -Proof. - destruct b; constructor. -Qed. - -Hint Constructors match_val: mval. -Hint Resolve match_val_of_bool: mval. - -(** The [match_val] relation commutes with the evaluation functions - from [Csem]. *) - -Lemma sem_unary_match: - forall op ty v1 v v1' v', - sem_unary_operation op v1 ty = Some v -> - sem_unary_operation op v1' ty = Some v' -> - match_val v1 v1' -> - match_val v v'. -Proof. - intros. destruct op; simpl in *. - unfold sem_notbool in *. destruct (classify_bool ty); inv H1; inv H; inv H0; auto with mval. constructor. - unfold sem_notint in *. destruct (classify_notint ty); inv H1; inv H; inv H0; auto with mval. - unfold sem_neg in *. destruct (classify_neg ty); inv H1; inv H; inv H0; auto with mval. -Qed. +(** A global environment [ge] induces a memory injection mapping + our symbolic pointers [Vptr (Zpos id) ofs] to run-time pointers + [Vptr b ofs] where [Genv.find_symbol ge id = Some b]. *) + +Definition inj (b: block) := + match b with + | Zpos id => + match Genv.find_symbol ge id with + | Some b' => Some (b', 0) + | None => None + end + | _ => None + end. Lemma mem_empty_not_valid_pointer: forall b ofs, Mem.valid_pointer Mem.empty b ofs = false. @@ -387,104 +361,18 @@ Proof. now rewrite !mem_empty_not_valid_pointer. Qed. -Lemma sem_cmp_match: - forall c v1 ty1 v2 ty2 m v v1' v2' v', - sem_cmp c v1 ty1 v2 ty2 Mem.empty = Some v -> - sem_cmp c v1' ty1 v2' ty2 m = Some v' -> - match_val v1 v1' -> match_val v2 v2' -> - match_val v v'. -Proof. -Opaque zeq. - intros. unfold sem_cmp in *. - destruct (classify_cmp ty1 ty2); try (destruct s); inv H1; inv H2; inv H; inv H0; auto with mval. -- destruct (Int.eq n Int.zero); try discriminate. - unfold Val.cmp_different_blocks in *. destruct c; inv H3; inv H2; constructor. -- destruct (Int.eq n Int.zero); try discriminate. - unfold Val.cmp_different_blocks in *. destruct c; inv H2; inv H1; constructor. -- destruct (zeq (Z.pos id) (Z.pos id0)); discriminate. -Qed. - -Lemma sem_binary_match: - forall op v1 ty1 v2 ty2 m v v1' v2' v', - sem_binary_operation op v1 ty1 v2 ty2 Mem.empty = Some v -> - sem_binary_operation op v1' ty1 v2' ty2 m = Some v' -> - match_val v1 v1' -> match_val v2 v2' -> - match_val v v'. -Proof. - intros. unfold sem_binary_operation in *; destruct op. -(* add *) - unfold sem_add in *. destruct (classify_add ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval. -(* sub *) - unfold sem_sub in *. destruct (classify_sub ty1 ty2); inv H1; inv H2; try (inv H; inv H0; auto with mval; fail). - destruct (zeq (Zpos id) (Zpos id0)); try discriminate. - assert (b0 = b) by congruence. subst b0. rewrite zeq_true in H0. - destruct (Int.eq (Int.repr (sizeof ty)) Int.zero); inv H; inv H0; auto with mval. -(* mul *) - unfold sem_mul in *. destruct (classify_mul ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval. -(* div *) - unfold sem_div in H0. functional inversion H; rewrite H4 in H0; inv H1; inv H2; inv H0. - inv H12. rewrite H11 in H2. inv H2. constructor. - inv H12. rewrite H11 in H2. inv H2. constructor. - inv H11. constructor. - inv H11. constructor. - inv H11. constructor. -(* mod *) - unfold sem_mod in H0. functional inversion H; rewrite H4 in H0; inv H1; inv H2; inv H0. - inv H12. rewrite H11 in H2. inv H2. constructor. - inv H12. rewrite H11 in H2. inv H2. constructor. -(* and *) - unfold sem_and in *. destruct (classify_binint ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval. -(* or *) - unfold sem_or in *. destruct (classify_binint ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval. -(* xor *) - unfold sem_xor in *. destruct (classify_binint ty1 ty2); inv H1; inv H2; inv H; inv H0; auto with mval. -(* shl *) - unfold sem_shl in *. destruct (classify_shift ty1 ty2); inv H1; inv H2; try discriminate. - destruct (Int.ltu n0 Int.iwordsize); inv H0; inv H; constructor. -(* shr *) - unfold sem_shr in *. destruct (classify_shift ty1 ty2); try discriminate; - destruct s; inv H1; inv H2; try discriminate. - destruct (Int.ltu n0 Int.iwordsize); inv H0; inv H; constructor. - destruct (Int.ltu n0 Int.iwordsize); inv H0; inv H; constructor. -(* comparisons *) - eapply sem_cmp_match; eauto. - eapply sem_cmp_match; eauto. - eapply sem_cmp_match; eauto. - eapply sem_cmp_match; eauto. - eapply sem_cmp_match; eauto. - eapply sem_cmp_match; eauto. -Qed. - Lemma sem_cast_match: - forall v1 ty1 ty2 v2, sem_cast v1 ty1 ty2 = Some v2 -> - forall v1' v2', match_val v1' v1 -> do_cast v1' ty1 ty2 = OK v2' -> - match_val v2' v2. + forall v1 ty1 ty2 v2 v1' v2', + sem_cast v1 ty1 ty2 = Some v2 -> + do_cast v1' ty1 ty2 = OK v2' -> + val_inject inj v1' v1 -> + val_inject inj v2' v2. Proof. - intros. unfold do_cast in H1. destruct (sem_cast v1' ty1 ty2) as [v2''|] eqn:?; inv H1. - unfold sem_cast in H; functional inversion Heqo; subst. - rewrite H2 in H. inv H0. inv H. constructor. - rewrite H2 in H. inv H0. inv H. constructor; auto. - rewrite H2 in H. inv H0. inv H. constructor. - rewrite H2 in H. inv H0. inv H. constructor. - rewrite H2 in H. inv H0. inv H. constructor. - rewrite H2 in H. inv H0. destruct (cast_float_int si2 f); inv H. inv H7. constructor. - rewrite H2 in H. inv H0. inv H. rewrite H7. constructor. - rewrite H2 in H. inv H0. inv H. rewrite H7. constructor. - rewrite H2 in H. inv H0. inv H. constructor. - rewrite H2 in H. inv H0. inv H. constructor. - rewrite H2 in H. destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. auto. - rewrite H2 in H. destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. auto. - rewrite H5 in H. inv H. auto. + intros. unfold do_cast in H0. destruct (sem_cast v1' ty1 ty2) as [v2''|] eqn:E; inv H0. + exploit sem_cast_inject. eexact E. eauto. + intros [v' [A B]]. congruence. Qed. -Lemma bool_val_match: - forall v v' ty, - match_val v v' -> - bool_val v ty = bool_val v' ty. -Proof. - intros. inv H; auto. -Qed. - (** Soundness of [constval] with respect to the big-step semantics *) Lemma constval_rvalue: @@ -492,13 +380,13 @@ Lemma constval_rvalue: eval_simple_rvalue empty_env m a v -> forall v', constval a = OK v' -> - match_val v' v + val_inject inj v' v with constval_lvalue: forall m a b ofs, eval_simple_lvalue empty_env m a b ofs -> forall v', constval a = OK v' -> - match_val v' (Vptr b ofs). + val_inject inj v' (Vptr b ofs). Proof. (* rvalue *) induction 1; intros vres CV; simpl in CV; try (monadInv CV). @@ -509,11 +397,18 @@ Proof. (* addrof *) eauto. (* unop *) - revert EQ0. caseEq (sem_unary_operation op x (typeof r1)); intros; inv EQ0. - eapply sem_unary_match; eauto. + destruct (sem_unary_operation op x (typeof r1)) as [v1'|] eqn:E; inv EQ0. + exploit sem_unary_operation_inject. eexact E. eauto. + intros [v' [A B]]. congruence. (* binop *) - revert EQ2. caseEq (sem_binary_operation op x (typeof r1) x0 (typeof r2) Mem.empty); intros; inv EQ2. - eapply sem_binary_match; eauto. + destruct (sem_binary_operation op x (typeof r1) x0 (typeof r2) Mem.empty) as [v1'|] eqn:E; inv EQ2. + exploit (sem_binary_operation_inj inj Mem.empty m). + intros. rewrite mem_empty_not_valid_pointer in H3; discriminate. + intros. rewrite mem_empty_not_weak_valid_pointer in H3; discriminate. + intros. rewrite mem_empty_not_weak_valid_pointer in H3; discriminate. + intros. rewrite mem_empty_not_valid_pointer in H3; discriminate. + eauto. eauto. eauto. + intros [v' [A B]]. congruence. (* cast *) eapply sem_cast_match; eauto. (* sizeof *) @@ -521,18 +416,26 @@ Proof. (* alignof *) constructor. (* seqand *) - rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. - monadInv EQ2. eapply sem_cast_match; eauto. eapply sem_cast_match; eauto. - rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. - monadInv EQ2. constructor. + destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. + exploit bool_val_inject. eexact E. eauto. intros E'. + assert (b = true) by congruence. subst b. monadInv H5. + eapply sem_cast_match; eauto. eapply sem_cast_match; eauto. + destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. + exploit bool_val_inject. eexact E. eauto. intros E'. + assert (b = false) by congruence. subst b. inv H2. auto. (* seqor *) - rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. - monadInv EQ2. eapply sem_cast_match; eauto. eapply sem_cast_match; eauto. - rewrite (bool_val_match x v1 (typeof r1)) in EQ2; auto. rewrite H0 in EQ2. - monadInv EQ2. constructor. + destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. + exploit bool_val_inject. eexact E. eauto. intros E'. + assert (b = false) by congruence. subst b. monadInv H5. + eapply sem_cast_match; eauto. eapply sem_cast_match; eauto. + destruct (bool_val x (typeof r1)) as [b|] eqn:E; inv EQ2. + exploit bool_val_inject. eexact E. eauto. intros E'. + assert (b = true) by congruence. subst b. inv H2. auto. (* conditional *) - rewrite (bool_val_match x v1 (typeof r1)) in EQ3; auto. - rewrite H0 in EQ3. destruct b; eapply sem_cast_match; eauto. + destruct (bool_val x (typeof r1)) as [b'|] eqn:E; inv EQ3. + exploit bool_val_inject. eexact E. eauto. intros E'. + assert (b' = b) by congruence. subst b'. + destruct b; eapply sem_cast_match; eauto. (* comma *) auto. (* paren *) @@ -543,12 +446,14 @@ Proof. (* var local *) unfold empty_env in H. rewrite PTree.gempty in H. congruence. (* var_global *) - constructor; auto. + econstructor. unfold inj. rewrite H0. eauto. auto. (* deref *) eauto. (* field struct *) rewrite H0 in CV. monadInv CV. exploit constval_rvalue; eauto. intro MV. inv MV. - simpl. replace x with delta by congruence. constructor. auto. + simpl. replace x with delta by congruence. econstructor; eauto. + rewrite ! Int.add_assoc. f_equal. apply Int.add_commut. + simpl. auto. (* field union *) rewrite H0 in CV. eauto. Qed. @@ -569,7 +474,7 @@ Theorem constval_steps: forall f r m v v' ty m', star step ge (ExprState f r Kstop empty_env m) E0 (ExprState f (Eval v' ty) Kstop empty_env m') -> constval r = OK v -> - m' = m /\ ty = typeof r /\ match_val v v'. + m' = m /\ ty = typeof r /\ val_inject inj v v'. Proof. intros. exploit eval_simple_steps; eauto. eapply constval_simple; eauto. intros [A [B C]]. intuition. eapply constval_rvalue; eauto. @@ -595,19 +500,23 @@ Proof. inv D. (* int *) destruct ty; try discriminate. - destruct i; inv EQ2. + destruct i0; inv EQ2. destruct s; simpl in H2; inv H2. rewrite <- Mem.store_signed_unsigned_8; auto. auto. destruct s; simpl in H2; inv H2. rewrite <- Mem.store_signed_unsigned_16; auto. auto. simpl in H2; inv H2. assumption. inv EQ2. simpl in H2; inv H2. assumption. + (* long *) + destruct ty; inv EQ2. simpl in H2; inv H2. assumption. (* float *) destruct ty; try discriminate. destruct f1; inv EQ2; simpl in H2; inv H2; assumption. (* pointer *) - assert (data = Init_addrof id ofs0 /\ chunk = Mint32). + unfold inj in H. destruct b1; try discriminate. + assert (data = Init_addrof p ofs1 /\ chunk = Mint32). destruct ty; inv EQ2; inv H2. destruct i; inv H5. intuition congruence. auto. - destruct H4; subst. rewrite H. assumption. + destruct H4; subst. destruct (Genv.find_symbol ge p); inv H. + rewrite Int.add_zero in H3. auto. (* undef *) discriminate. Qed. @@ -624,12 +533,15 @@ Proof. destruct ty; try discriminate. destruct i0; inv EQ2; reflexivity. inv EQ2; reflexivity. + inv EQ2; reflexivity. + destruct ty; inv EQ2; reflexivity. destruct ty; try discriminate. destruct f0; inv EQ2; reflexivity. destruct b; try discriminate. destruct ty; try discriminate. destruct i0; inv EQ2; reflexivity. inv EQ2; reflexivity. + inv EQ2; reflexivity. Qed. Notation idlsize := Genv.init_data_list_size. diff --git a/cfrontend/PrintClight.ml b/cfrontend/PrintClight.ml index d61afa3..a8b2b98 100644 --- a/cfrontend/PrintClight.ml +++ b/cfrontend/PrintClight.ml @@ -43,6 +43,7 @@ let rec precedence = function | Efield _ -> (16, LtoR) | Econst_int _ -> (16, NA) | Econst_float _ -> (16, NA) + | Econst_long _ -> (16, NA) | Eunop _ -> (15, RtoL) | Eaddrof _ -> (15, RtoL) | Ecast _ -> (14, RtoL) @@ -79,6 +80,8 @@ let rec expr p (prec, e) = fprintf p "%ld" (camlint_of_coqint n) | Econst_float(f, _) -> fprintf p "%F" (camlfloat_of_coqfloat f) + | Econst_long(n, _) -> + fprintf p "%LdLL" (camlint64_of_coqint n) | Eunop(op, a1, _) -> fprintf p "%s%a" (name_unop op) expr (prec', a1) | Eaddrof(a1, _) -> @@ -258,6 +261,7 @@ let rec collect_expr e = match e with | Econst_int _ -> () | Econst_float _ -> () + | Econst_long _ -> () | Evar _ -> () | Etempvar _ -> () | Ederef(r, _) -> collect_expr r diff --git a/cfrontend/PrintCsyntax.ml b/cfrontend/PrintCsyntax.ml index b1ee234..f91dca6 100644 --- a/cfrontend/PrintCsyntax.ml +++ b/cfrontend/PrintCsyntax.ml @@ -64,6 +64,11 @@ let name_floattype sz = | F32 -> "float" | F64 -> "double" +let name_longtype sg = + match sg with + | Signed -> "long long" + | Unsigned -> "unsigned long long" + (* Collecting the names and fields of structs and unions *) module StructUnion = Map.Make(String) @@ -91,6 +96,8 @@ let rec name_cdecl id ty = name_inttype sz sg ^ attributes a ^ name_optid id | Tfloat(sz, a) -> name_floattype sz ^ attributes a ^ name_optid id + | Tlong(sg, a) -> + name_longtype sg ^ attributes a ^ name_optid id | Tpointer(t, a) -> let id' = match t with @@ -174,6 +181,7 @@ let print_value p v = match v with | Vint n -> fprintf p "%ld" (camlint_of_coqint n) | Vfloat f -> fprintf p "%F" (camlfloat_of_coqfloat f) + | Vlong n -> fprintf p "%Ld" (camlint64_of_coqint n) | Vptr(b, ofs) -> fprintf p "<ptr%a>" !print_pointer_hook (b, ofs) | Vundef -> fprintf p "<undef>" @@ -397,6 +405,7 @@ let print_init p = function | Init_int8 n -> fprintf p "%ld,@ " (camlint_of_coqint n) | Init_int16 n -> fprintf p "%ld,@ " (camlint_of_coqint n) | Init_int32 n -> fprintf p "%ld,@ " (camlint_of_coqint n) + | Init_int64 n -> fprintf p "%Ld,@ " (camlint64_of_coqint n) | Init_float32 n -> fprintf p "%F,@ " (camlfloat_of_coqfloat n) | Init_float64 n -> fprintf p "%F,@ " (camlfloat_of_coqfloat n) | Init_space n -> fprintf p "/* skip %ld, */@ " (camlint_of_coqint n) @@ -444,6 +453,7 @@ let rec collect_type = function | Tvoid -> () | Tint _ -> () | Tfloat _ -> () + | Tlong _ -> () | Tpointer(t, _) -> collect_type t | Tarray(t, _, _) -> collect_type t | Tfunction(args, res) -> collect_type_list args; collect_type res diff --git a/cfrontend/SimplExpr.v b/cfrontend/SimplExpr.v index 2dbd97e..98dad93 100644 --- a/cfrontend/SimplExpr.v +++ b/cfrontend/SimplExpr.v @@ -108,6 +108,7 @@ Function eval_simpl_expr (a: expr) : option val := match a with | Econst_int n _ => Some(Vint n) | Econst_float n _ => Some(Vfloat n) + | Econst_long n _ => Some(Vlong n) | Ecast b ty => match eval_simpl_expr b with | None => None @@ -222,6 +223,8 @@ Fixpoint transl_expr (dst: destination) (a: Csyntax.expr) : mon (list statement ret (finish dst nil (Econst_int n ty)) | Csyntax.Eval (Vfloat n) ty => ret (finish dst nil (Econst_float n ty)) + | Csyntax.Eval (Vlong n) ty => + ret (finish dst nil (Econst_long n ty)) | Csyntax.Eval _ ty => error (msg "SimplExpr.transl_expr: Eval") | Csyntax.Esizeof ty' ty => diff --git a/cfrontend/SimplExprspec.v b/cfrontend/SimplExprspec.v index b31738b..1e7a84c 100644 --- a/cfrontend/SimplExprspec.v +++ b/cfrontend/SimplExprspec.v @@ -768,6 +768,10 @@ Opaque makeif. constructor. auto. intros; constructor. constructor. constructor. auto. intros; constructor. + intros. destruct dst; simpl in *; inv H2. + constructor. auto. intros; constructor. + constructor. + constructor. auto. intros; constructor. (* var *) monadInv H; econstructor; split; auto with gensym. UseFinish. constructor. (* field *) diff --git a/cfrontend/SimplLocals.v b/cfrontend/SimplLocals.v index 18eeea2..edcd5fe 100644 --- a/cfrontend/SimplLocals.v +++ b/cfrontend/SimplLocals.v @@ -46,6 +46,7 @@ Definition make_cast (a: expr) (tto: type) : expr := | cast_case_neutral => a | cast_case_i2i I32 _ => a | cast_case_f2f F64 => a + | cast_case_l2l => a | cast_case_struct _ _ _ _ => a | cast_case_union _ _ _ _ => a | cast_case_void => a @@ -58,6 +59,7 @@ Fixpoint simpl_expr (cenv: compilenv) (a: expr) : expr := match a with | Econst_int _ _ => a | Econst_float _ _ => a + | Econst_long _ _ => a | Evar id ty => if VSet.mem id cenv then Etempvar id ty else Evar id ty | Etempvar id ty => Etempvar id ty | Ederef a1 ty => Ederef (simpl_expr cenv a1) ty @@ -156,6 +158,7 @@ Fixpoint addr_taken_expr (a: expr): VSet.t := match a with | Econst_int _ _ => VSet.empty | Econst_float _ _ => VSet.empty + | Econst_long _ _ => VSet.empty | Evar id ty => VSet.empty | Etempvar id ty => VSet.empty | Ederef a1 ty => addr_taken_expr a1 @@ -224,8 +227,7 @@ Definition add_lifted (cenv: compilenv) (vars1 vars2: list (ident * type)) := Definition transf_function (f: function) : res function := let cenv := cenv_for f in - do x <- assertion (list_disjoint_dec ident_eq (var_names f.(fn_params)) - (var_names f.(fn_temps))); + assertion (list_disjoint_dec ident_eq (var_names f.(fn_params)) (var_names f.(fn_temps))); do body' <- simpl_stmt cenv f.(fn_body); OK {| fn_return := f.(fn_return); fn_params := f.(fn_params); diff --git a/cfrontend/SimplLocalsproof.v b/cfrontend/SimplLocalsproof.v index e3aa4e2..1dcf630 100644 --- a/cfrontend/SimplLocalsproof.v +++ b/cfrontend/SimplLocalsproof.v @@ -202,16 +202,22 @@ Inductive val_casted: val -> type -> Prop := | val_casted_float: forall sz attr n, cast_float_float sz n = n -> val_casted (Vfloat n) (Tfloat sz attr) + | val_casted_long: forall si attr n, + val_casted (Vlong n) (Tlong si attr) | val_casted_ptr_ptr: forall b ofs ty attr, val_casted (Vptr b ofs) (Tpointer ty attr) | val_casted_int_ptr: forall n ty attr, val_casted (Vint n) (Tpointer ty attr) | val_casted_ptr_int: forall b ofs si attr, val_casted (Vptr b ofs) (Tint I32 si attr) - | val_casted_struct: forall id fld attr v, - val_casted v (Tstruct id fld attr) - | val_casted_union: forall id fld attr v, - val_casted v (Tunion id fld attr) + | val_casted_ptr_cptr: forall b ofs id attr, + val_casted (Vptr b ofs) (Tcomp_ptr id attr) + | val_casted_int_cptr: forall n id attr, + val_casted (Vint n) (Tcomp_ptr id attr) + | val_casted_struct: forall id fld attr b ofs, + val_casted (Vptr b ofs) (Tstruct id fld attr) + | val_casted_union: forall id fld attr b ofs, + val_casted (Vptr b ofs) (Tunion id fld attr) | val_casted_void: forall v, val_casted v Tvoid. @@ -240,12 +246,15 @@ Proof. constructor. (* int *) destruct i; destruct ty; simpl in H; try discriminate; destruct v; inv H. - constructor. apply (cast_int_int_idem I8 s). + constructor. apply (cast_int_int_idem I8 s). + constructor. apply (cast_int_int_idem I8 s). destruct (cast_float_int s f0); inv H1. constructor. apply (cast_int_int_idem I8 s). - constructor. apply (cast_int_int_idem I16 s). + constructor. apply (cast_int_int_idem I16 s). + constructor. apply (cast_int_int_idem I16 s). destruct (cast_float_int s f0); inv H1. constructor. apply (cast_int_int_idem I16 s). constructor. auto. - constructor. + constructor. + constructor. auto. destruct (cast_float_int s f0); inv H1. constructor. auto. constructor. auto. constructor. @@ -253,7 +262,10 @@ Proof. constructor. constructor; auto. constructor; auto. + constructor; auto. + constructor; auto. constructor. simpl. destruct (Int.eq i0 Int.zero); auto. + constructor. simpl. destruct (Int64.eq i Int64.zero); auto. constructor. simpl. destruct (Float.cmp Ceq f0 Float.zero); auto. constructor. simpl. destruct (Int.eq i Int.zero); auto. constructor; auto. @@ -261,20 +273,31 @@ Proof. constructor; auto. constructor. simpl. destruct (Int.eq i Int.zero); auto. constructor; auto. + constructor. simpl. destruct (Int.eq i0 Int.zero); auto. + constructor; auto. +(* long *) + destruct ty; try discriminate. + destruct v; inv H. constructor. + destruct v; inv H. constructor. + destruct v; try discriminate. destruct (cast_float_long s f0); inv H. constructor. (* float *) destruct ty; simpl in H; try discriminate; destruct v; inv H. constructor. apply cast_float_float_idem. constructor. apply cast_float_float_idem. + constructor. apply cast_float_float_idem. (* pointer *) destruct ty; simpl in H; try discriminate; destruct v; inv H; try constructor. (* impossible cases *) discriminate. discriminate. -(* structs,unions *) - constructor. - constructor. -(* impossible cases *) - discriminate. +(* structs *) + destruct ty; try discriminate; destruct v; try discriminate. + destruct (ident_eq i0 i && fieldlist_eq f0 f); inv H; constructor. +(* unions *) + destruct ty; try discriminate; destruct v; try discriminate. + destruct (ident_eq i0 i && fieldlist_eq f0 f); inv H; constructor. +(* comp_ptr *) + destruct ty; simpl in H; try discriminate; destruct v; inv H; constructor. Qed. Lemma val_casted_load_result: @@ -293,6 +316,9 @@ Proof. inv H0; auto. inv H0; auto. inv H0; auto. + inv H0; auto. + discriminate. + discriminate. discriminate. discriminate. discriminate. @@ -301,24 +327,18 @@ Qed. Lemma cast_val_casted: forall v ty, val_casted v ty -> sem_cast v ty ty = Some v. Proof. - intros. inversion H; clear H; subst v ty; unfold sem_cast; simpl. + intros. inversion H; clear H; subst v ty; unfold sem_cast; simpl; auto. destruct sz; congruence. congruence. - auto. - auto. - auto. unfold proj_sumbool; repeat rewrite dec_eq_true; auto. unfold proj_sumbool; repeat rewrite dec_eq_true; auto. - auto. Qed. Lemma val_casted_inject: forall f v v' ty, val_inject f v v' -> val_casted v ty -> val_casted v' ty. Proof. - intros. inv H. - auto. - auto. + intros. inv H; auto. inv H0; constructor. inv H0; constructor. Qed. @@ -356,7 +376,10 @@ Proof. destruct v1; inv H0; auto. destruct sz2; auto. destruct v1; inv H0; auto. destruct sz2; auto. destruct v1; inv H0; auto. + destruct v1; inv H0; auto. + destruct v1; try discriminate. destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H0; auto. + destruct v1; try discriminate. destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H0; auto. inv H0; auto. Qed. @@ -1324,176 +1347,6 @@ Proof. exists (Vptr loc' ofs'); split; auto. eapply deref_loc_copy; eauto. Qed. -Remark val_inject_vtrue: forall f, val_inject f Vtrue Vtrue. -Proof. unfold Vtrue; auto. Qed. - -Remark val_inject_vfalse: forall f, val_inject f Vfalse Vfalse. -Proof. unfold Vfalse; auto. Qed. - -Remark val_inject_of_bool: forall f b, val_inject f (Val.of_bool b) (Val.of_bool b). -Proof. intros. unfold Val.of_bool. destruct b; [apply val_inject_vtrue|apply val_inject_vfalse]. -Qed. - -Hint Resolve val_inject_vtrue val_inject_vfalse val_inject_of_bool. - -Ltac TrivialInject := - match goal with - | |- exists v', Some ?v = Some v' /\ _ => exists v; split; auto -(* - | |- exists v', _ /\ val_inject _ (Vint ?n) _ => exists (Vint n); split; auto - | |- exists v', _ /\ val_inject _ (Vfloat ?n) _ => exists (Vfloat n); split; auto - | |- exists v', _ /\ val_inject _ Vtrue _ => exists Vtrue; split; auto - | |- exists v', _ /\ val_inject _ Vfalse _ => exists Vfalse; split; auto - | |- exists v', _ /\ val_inject _ (Val.of_bool ?b) _ => exists (Val.of_bool b); split; auto -*) - | _ => idtac - end. - -Lemma sem_unary_operation_inject: - forall op v1 ty v tv1, - sem_unary_operation op v1 ty = Some v -> - val_inject f v1 tv1 -> - exists tv, sem_unary_operation op tv1 ty = Some tv /\ val_inject f v tv. -Proof. - unfold sem_unary_operation; intros. destruct op. - (* notbool *) - unfold sem_notbool in *; destruct (classify_bool ty); inv H0; inv H; TrivialInject. - (* notint *) - unfold sem_notint in *; destruct (classify_notint ty); inv H0; inv H; TrivialInject. - (* neg *) - unfold sem_neg in *; destruct (classify_neg ty); inv H0; inv H; TrivialInject. -Qed. - -Lemma sem_cmp_inject: - forall cmp v1 tv1 ty1 v2 tv2 ty2 v, - sem_cmp cmp v1 ty1 v2 ty2 m = Some v -> - val_inject f v1 tv1 -> - val_inject f v2 tv2 -> - exists tv, sem_cmp cmp tv1 ty1 tv2 ty2 tm = Some tv /\ val_inject f v tv. -Proof. - unfold sem_cmp; intros. - assert (MM: option_map Val.of_bool (Val.cmp_different_blocks cmp) = Some v -> - exists tv, option_map Val.of_bool (Val.cmp_different_blocks cmp) = Some tv /\ val_inject f v tv). - intros. exists v; split; auto. - destruct cmp; simpl in H2; inv H2; auto. - - destruct (classify_cmp ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject. - destruct (Int.eq i Int.zero); try discriminate; auto. - destruct (Int.eq i Int.zero); try discriminate; auto. - - destruct (zeq b1 b0); subst. - rewrite H0 in H2. inv H2. rewrite zeq_true. - destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs1)) eqn:?; try discriminate. - destruct (Mem.weak_valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; try discriminate. - simpl H3. - rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb) by eauto. - rewrite (Mem.weak_valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb0) by eauto. - simpl. replace (Int.cmpu cmp (Int.add ofs1 (Int.repr delta)) - (Int.add ofs0 (Int.repr delta))) - with (Int.cmpu cmp ofs1 ofs0). - inv H3; TrivialInject. - symmetry. apply Int.translate_cmpu. - eapply Mem.weak_valid_pointer_inject_no_overflow; eauto. - eapply Mem.weak_valid_pointer_inject_no_overflow; eauto. - destruct (Mem.valid_pointer m b1 (Int.unsigned ofs1)) eqn:?; try discriminate. - destruct (Mem.valid_pointer m b0 (Int.unsigned ofs0)) eqn:?; try discriminate. - destruct (zeq b2 b3); subst. - rewrite Mem.valid_pointer_implies - by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb); eauto). - rewrite Mem.valid_pointer_implies - by (eapply (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb0); eauto). - simpl. - exploit Mem.different_pointers_inject; eauto. intros [[]|A]. easy. - destruct cmp; simpl in H3; inv H3. - simpl. unfold Int.eq. rewrite zeq_false; auto. - simpl. unfold Int.eq. rewrite zeq_false; auto. - rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb) by eauto. - rewrite (Mem.valid_pointer_inject_val _ _ _ _ _ _ _ MEMINJ Heqb0) by eauto. - simpl in H3 |- *. auto. -Qed. - -Lemma sem_binary_operation_inject: - forall op v1 ty1 v2 ty2 v tv1 tv2, - sem_binary_operation op v1 ty1 v2 ty2 m = Some v -> - val_inject f v1 tv1 -> val_inject f v2 tv2 -> - exists tv, sem_binary_operation op tv1 ty1 tv2 ty2 tm = Some tv /\ val_inject f v tv. -Proof. - unfold sem_binary_operation; intros. destruct op. -(* add *) - unfold sem_add in *; destruct (classify_add ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. - econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. - econstructor. eauto. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. -(* sub *) - unfold sem_sub in *; destruct (classify_sub ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. - econstructor. eauto. rewrite Int.sub_add_l. auto. - destruct (zeq b1 b0); try discriminate. subst b1. rewrite H0 in H2; inv H2. - rewrite zeq_true. destruct (Int.eq (Int.repr (sizeof ty)) Int.zero); inv H3. - rewrite Int.sub_shifted. TrivialInject. -(* mul *) - unfold sem_mul in *; destruct (classify_mul ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. -(* div *) - unfold sem_div in *; destruct (classify_div ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject. - destruct ( Int.eq i0 Int.zero - || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H1; TrivialInject. - destruct (Int.eq i0 Int.zero); inv H1; TrivialInject. -(* mod *) - unfold sem_mod in *; destruct (classify_binint ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject. - destruct ( Int.eq i0 Int.zero - || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H1; TrivialInject. - destruct (Int.eq i0 Int.zero); inv H1; TrivialInject. -(* and *) - unfold sem_and in *; destruct (classify_binint ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. -(* or *) - unfold sem_or in *; destruct (classify_binint ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. -(* xor *) - unfold sem_xor in *; destruct (classify_binint ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. -(* shl *) - unfold sem_shl in *; destruct (classify_shift ty1 ty2); inv H0; try discriminate; inv H1; inv H; TrivialInject. - destruct (Int.ltu i0 Int.iwordsize); inv H1; TrivialInject. -(* shr *) - unfold sem_shr in *; destruct (classify_shift ty1 ty2); try destruct s; inv H0; try discriminate; inv H1; inv H; TrivialInject. - destruct (Int.ltu i0 Int.iwordsize); inv H1; TrivialInject. - destruct (Int.ltu i0 Int.iwordsize); inv H1; TrivialInject. -(* comparisons *) - eapply sem_cmp_inject; eauto. - eapply sem_cmp_inject; eauto. - eapply sem_cmp_inject; eauto. - eapply sem_cmp_inject; eauto. - eapply sem_cmp_inject; eauto. - eapply sem_cmp_inject; eauto. -Qed. - -Lemma sem_cast_inject: - forall v1 ty1 ty v tv1, - sem_cast v1 ty1 ty = Some v -> - val_inject f v1 tv1 -> - exists tv, sem_cast tv1 ty1 ty = Some tv /\ val_inject f v tv. -Proof. - unfold sem_cast; intros. - destruct (classify_cast ty1 ty); try discriminate. - inv H0; inv H; TrivialInject. econstructor; eauto. - inv H0; inv H; TrivialInject. - inv H0; inv H; TrivialInject. - inv H0; inv H; TrivialInject. - inv H0; try discriminate. destruct (cast_float_int si2 f0); inv H. TrivialInject. - inv H0; inv H. TrivialInject. - inv H0; inv H. TrivialInject. - TrivialInject. - destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. TrivialInject. - destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H. TrivialInject. - inv H; TrivialInject. -Qed. - -Lemma bool_val_inject: - forall v ty b tv, - bool_val v ty = Some b -> - val_inject f v tv -> - bool_val tv ty = Some b. -Proof. - unfold bool_val; intros. - destruct (classify_bool ty); inv H0; congruence. -Qed. - Lemma eval_simpl_expr: forall a v, eval_expr ge e le m a v -> @@ -1512,6 +1365,7 @@ Proof. (* const *) exists (Vint i); split; auto. constructor. exists (Vfloat f0); split; auto. constructor. + exists (Vlong i); split; auto. constructor. (* tempvar *) exploit me_temps; eauto. intros [[tv [A B]] C]. exists tv; split; auto. constructor; auto. @@ -2184,7 +2038,7 @@ Proof. (* goto *) generalize TRF; intros TRF'. monadInv TRF'. - exploit (simpl_find_label j (cenv_for f) m lo tlo lbl (fn_body f) (call_cont k) x0 (call_cont tk)). + exploit (simpl_find_label j (cenv_for f) m lo tlo lbl (fn_body f) (call_cont k) x (call_cont tk)). eauto. eapply match_cont_call_cont. eauto. apply compat_cenv_for. rewrite H. intros [ts' [tk' [A [B [C D]]]]]. @@ -2197,12 +2051,10 @@ Proof. generalize EQ; intro EQ'; monadInv EQ'. assert (list_norepet (var_names (fn_params f ++ fn_vars f))). unfold var_names. rewrite map_app. auto. - assert (list_disjoint (var_names (fn_params f)) (var_names (fn_temps f))). - monadInv EQ0. auto. exploit match_envs_alloc_variables; eauto. instantiate (1 := cenv_for_gen (addr_taken_stmt f.(fn_body)) (fn_params f ++ fn_vars f)). - intros. eapply cenv_for_gen_by_value; eauto. rewrite VSF.mem_iff. eexact H5. - intros. eapply cenv_for_gen_domain. rewrite VSF.mem_iff. eexact H4. + intros. eapply cenv_for_gen_by_value; eauto. rewrite VSF.mem_iff. eexact H4. + intros. eapply cenv_for_gen_domain. rewrite VSF.mem_iff. eexact H3. intros [j' [te [tm0 [A [B [C [D [E F]]]]]]]]. exploit store_params_correct. eauto. @@ -2212,7 +2064,7 @@ Proof. eexact B. eexact C. intros. apply (create_undef_temps_lifted id f). auto. intros. destruct (create_undef_temps (fn_temps f))!id as [v|] eqn:?; auto. - exploit create_undef_temps_inv; eauto. intros [P Q]. elim (H3 id id); auto. + exploit create_undef_temps_inv; eauto. intros [P Q]. elim (l id id); auto. intros [tel [tm1 [P [Q [R [S T]]]]]]. change (cenv_for_gen (addr_taken_stmt (fn_body f)) (fn_params f ++ fn_vars f)) with (cenv_for f) in *. diff --git a/checklink/Asm_printers.ml b/checklink/Asm_printers.ml index aeb4b3f..00b6e2e 100644 --- a/checklink/Asm_printers.ml +++ b/checklink/Asm_printers.ml @@ -221,6 +221,7 @@ let string_of_init_data = function | Init_int8(i) -> "Init_int8(" ^ string_of_int (z_int_lax i) ^ ")" | Init_int16(i) -> "Init_int16(" ^ string_of_int (z_int_lax i) ^ ")" | Init_int32(i) -> "Init_int32(" ^ string_of_int32i (z_int32 i) ^ ")" +| Init_int64(i) -> "Init_int64(" ^ string_of_int64i (z_int64 i) ^ ")" | Init_float32(f) -> "Init_float32(" ^ string_of_ffloat f ^ ")" | Init_float64(f) -> "Init_float64(" ^ string_of_ffloat f ^ ")" | Init_space(z) -> "Init_space(" ^ string_of_int (z_int z) ^ ")" diff --git a/checklink/Check.ml b/checklink/Check.ml index 5821953..73a7310 100644 --- a/checklink/Check.ml +++ b/checklink/Check.ml @@ -2696,6 +2696,14 @@ let compare_data (l: init_data list) (bs: bitstring) (sfw: s_framework) else ERR("Wrong float64") | { _ } -> error ) + | Init_int64(i) -> ( + bitmatch bs with + | { j : 64 : int; bs : -1 : bitstring } -> + if z_int64 i = j + then compare_data_aux l bs (s + 8) sfw + else ERR("Wrong int64") + | { _ } -> error + ) | Init_space(z) -> ( let space_size = z_int z in bitmatch bs with diff --git a/checklink/Library.ml b/checklink/Library.ml index acea80a..69a0d6e 100644 --- a/checklink/Library.ml +++ b/checklink/Library.ml @@ -110,6 +110,8 @@ let z_int z = Safe32.to_int (z_int32 z) let z_int_lax z = Safe32.to_int (z_int32_lax z) +let z_int64 = Camlcoq.Z.to_int64 + (* Some more printers *) let string_of_ffloat f = string_of_float (camlfloat_of_coqfloat f) @@ -144,6 +146,7 @@ let string_of_int32 = Printf.sprintf "0x%08lx" let string_of_int64 = Printf.sprintf "0x%08Lx" (* To print counts/indices *) let string_of_int32i = Int32.to_string +let string_of_int64i = Int64.to_string let string_of_positive p = string_of_int32i (positive_int32 p) diff --git a/common/AST.v b/common/AST.v index 8595b95..1f7f7d3 100644 --- a/common/AST.v +++ b/common/AST.v @@ -17,6 +17,7 @@ the abstract syntax trees of many of the intermediate languages. *) Require Import Coqlib. +Require String. Require Import Errors. Require Import Integers. Require Import Floats. @@ -32,16 +33,19 @@ Definition ident := positive. Definition ident_eq := peq. -(** The intermediate languages are weakly typed, using only two types: - [Tint] for integers and pointers, and [Tfloat] for floating-point - numbers. *) +Parameter ident_of_string : String.string -> ident. + +(** The intermediate languages are weakly typed, using only three + types: [Tint] for 32-bit integers and pointers, [Tfloat] for 64-bit + floating-point numbers, [Tlong] for 64-bit integers. *) Inductive typ : Type := - | Tint : typ - | Tfloat : typ. + | Tint + | Tfloat + | Tlong. Definition typesize (ty: typ) : Z := - match ty with Tint => 4 | Tfloat => 8 end. + match ty with Tint => 4 | Tfloat => 8 | Tlong => 8 end. Lemma typesize_pos: forall ty, typesize ty > 0. Proof. destruct ty; simpl; omega. Qed. @@ -54,6 +58,9 @@ Lemma opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}. Proof. decide equality. apply typ_eq. Defined. Global Opaque opt_typ_eq. +Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2} + := list_eq_dec typ_eq. + (** Additionally, function definitions and function calls are annotated by function signatures indicating the number and types of arguments, as well as the type of the returned value if any. These signatures @@ -76,14 +83,15 @@ Definition proj_sig_res (s: signature) : typ := chunk of memory being accessed. *) Inductive memory_chunk : Type := - | Mint8signed : memory_chunk (**r 8-bit signed integer *) - | Mint8unsigned : memory_chunk (**r 8-bit unsigned integer *) - | Mint16signed : memory_chunk (**r 16-bit signed integer *) - | Mint16unsigned : memory_chunk (**r 16-bit unsigned integer *) - | Mint32 : memory_chunk (**r 32-bit integer, or pointer *) - | Mfloat32 : memory_chunk (**r 32-bit single-precision float *) - | Mfloat64 : memory_chunk (**r 64-bit double-precision float *) - | Mfloat64al32 : memory_chunk. (**r 64-bit double-precision float, 4-aligned *) + | Mint8signed (**r 8-bit signed integer *) + | Mint8unsigned (**r 8-bit unsigned integer *) + | Mint16signed (**r 16-bit signed integer *) + | Mint16unsigned (**r 16-bit unsigned integer *) + | Mint32 (**r 32-bit integer, or pointer *) + | Mint64 (**r 64-bit integer *) + | Mfloat32 (**r 32-bit single-precision float *) + | Mfloat64 (**r 64-bit double-precision float *) + | Mfloat64al32. (**r 64-bit double-precision float, 4-aligned *) (** The type (integer/pointer or float) of a chunk. *) @@ -94,6 +102,7 @@ Definition type_of_chunk (c: memory_chunk) : typ := | Mint16signed => Tint | Mint16unsigned => Tint | Mint32 => Tint + | Mint64 => Tlong | Mfloat32 => Tfloat | Mfloat64 => Tfloat | Mfloat64al32 => Tfloat @@ -105,6 +114,7 @@ Inductive init_data: Type := | Init_int8: int -> init_data | Init_int16: int -> init_data | Init_int32: int -> init_data + | Init_int64: int64 -> init_data | Init_float32: float -> init_data | Init_float64: float -> init_data | Init_space: Z -> init_data @@ -549,4 +559,3 @@ Definition transf_partial_fundef (fd: fundef A): res (fundef B) := end. End TRANSF_PARTIAL_FUNDEF. - diff --git a/common/Errors.v b/common/Errors.v index 6b863a0..78e1199 100644 --- a/common/Errors.v +++ b/common/Errors.v @@ -96,30 +96,11 @@ Qed. (** Assertions *) -Definition assertion (b: bool) : res unit := - if b then OK tt else Error(msg "Assertion failed"). +Definition assertion_failed {A: Type} : res A := Error(msg "Assertion failed"). -Remark assertion_inversion: - forall b x, assertion b = OK x -> b = true. -Proof. - unfold assertion; intros. destruct b; inv H; auto. -Qed. - -Remark assertion_inversion_1: - forall (P Q: Prop) (a: {P}+{Q}) x, - assertion (proj_sumbool a) = OK x -> P. -Proof. - intros. exploit assertion_inversion; eauto. - unfold proj_sumbool. destruct a. auto. congruence. -Qed. - -Remark assertion_inversion_2: - forall (P Q: Prop) (a: {P}+{Q}) x, - assertion (negb(proj_sumbool a)) = OK x -> Q. -Proof. - intros. exploit assertion_inversion; eauto. - unfold proj_sumbool. destruct a; simpl. congruence. auto. -Qed. +Notation "'assertion' A ; B" := (if A then B else assertion_failed) + (at level 200, A at level 100, B at level 200) + : error_monad_scope. (** This is the familiar monadic map iterator. *) @@ -180,26 +161,19 @@ Ltac monadInv1 H := destruct (bind2_inversion F G H) as [x1 [x2 [EQ1 EQ2]]]; clear H; try (monadInv1 EQ2))))) - | (assertion (negb (proj_sumbool ?a)) = OK ?X) => - let A := fresh "A" in (generalize (assertion_inversion_2 _ H); intro A); - clear H - | (assertion (proj_sumbool ?a) = OK ?X) => - let A := fresh "A" in (generalize (assertion_inversion_1 _ H); intro A); - clear H - | (assertion ?b = OK ?X) => - let A := fresh "A" in (generalize (assertion_inversion _ H); intro A); - clear H + | (match ?X with left _ => _ | right _ => assertion_failed end = OK _) => + destruct X; [try (monadInv1 H) | discriminate] + | (match (negb ?X) with true => _ | false => assertion_failed end = OK _) => + destruct X as [] eqn:?; [discriminate | try (monadInv1 H)] + | (match ?X with true => _ | false => assertion_failed end = OK _) => + destruct X as [] eqn:?; [try (monadInv1 H) | discriminate] | (mmap ?F ?L = OK ?M) => generalize (mmap_inversion F L H); intro end. Ltac monadInv H := + monadInv1 H || match type of H with - | (OK _ = OK _) => monadInv1 H - | (Error _ = OK _) => monadInv1 H - | (bind ?F ?G = OK ?X) => monadInv1 H - | (bind2 ?F ?G = OK ?X) => monadInv1 H - | (assertion _ = OK _) => monadInv1 H | (?F _ _ _ _ _ _ _ _ = OK _) => ((progress simpl in H) || unfold F in H); monadInv1 H | (?F _ _ _ _ _ _ _ = OK _) => diff --git a/common/Events.v b/common/Events.v index e310bfe..f342799 100644 --- a/common/Events.v +++ b/common/Events.v @@ -55,6 +55,7 @@ Require Import Globalenvs. Inductive eventval: Type := | EVint: int -> eventval + | EVlong: int64 -> eventval | EVfloat: float -> eventval | EVptr_global: ident -> int -> eventval. @@ -267,6 +268,8 @@ Variable ge: Genv.t F V. Inductive eventval_match: eventval -> typ -> val -> Prop := | ev_match_int: forall i, eventval_match (EVint i) Tint (Vint i) + | ev_match_long: forall i, + eventval_match (EVlong i) Tlong (Vlong i) | ev_match_float: forall f, eventval_match (EVfloat f) Tfloat (Vfloat f) | ev_match_ptr: forall id b ofs, @@ -327,7 +330,7 @@ Lemma eventval_match_inject: forall ev ty v1 v2, eventval_match ev ty v1 -> val_inject f v1 v2 -> eventval_match ev ty v2. Proof. - intros. inv H; inv H0. constructor. constructor. + intros. inv H; inv H0; try constructor. destruct glob_pres as [A [B C]]. exploit A; eauto. intro EQ; rewrite H3 in EQ; inv EQ. rewrite Int.add_zero. econstructor; eauto. @@ -337,7 +340,7 @@ Lemma eventval_match_inject_2: forall ev ty v, eventval_match ev ty v -> val_inject f v v. Proof. - induction 1. constructor. constructor. + induction 1; auto. destruct glob_pres as [A [B C]]. exploit A; eauto. intro EQ. econstructor; eauto. rewrite Int.add_zero; auto. @@ -379,6 +382,7 @@ Qed. Definition eventval_valid (ev: eventval) : Prop := match ev with | EVint _ => True + | EVlong _ => True | EVfloat _ => True | EVptr_global id ofs => exists b, Genv.find_symbol ge id = Some b end. @@ -386,6 +390,7 @@ Definition eventval_valid (ev: eventval) : Prop := Definition eventval_type (ev: eventval) : typ := match ev with | EVint _ => Tint + | EVlong _ => Tlong | EVfloat _ => Tfloat | EVptr_global id ofs => Tint end. @@ -396,6 +401,7 @@ Lemma eventval_valid_match: Proof. intros. subst ty. destruct ev; simpl in *. exists (Vint i); constructor. + exists (Vlong i); constructor. exists (Vfloat f0); constructor. destruct H as [b A]. exists (Vptr b i0); constructor; auto. Qed. @@ -1693,3 +1699,190 @@ Proof. intros. exploit external_call_determ. eexact H. eexact H0. intuition. Qed. +(** Late in the back-end, calling conventions for external calls change: + arguments and results of type [Tlong] are passed as two integers. + We now wrap [external_call] to adapt to this convention. *) + +Fixpoint decode_longs (tyl: list typ) (vl: list val) : list val := + match tyl with + | nil => nil + | Tlong :: tys => + match vl with + | v1 :: v2 :: vs => Val.longofwords v1 v2 :: decode_longs tys vs + | _ => nil + end + | ty :: tys => + match vl with + | v1 :: vs => v1 :: decode_longs tys vs + | _ => nil + end + end. + +Definition encode_long (oty: option typ) (v: val) : list val := + match oty with + | Some Tlong => Val.hiword v :: Val.loword v :: nil + | _ => v :: nil + end. + +Definition proj_sig_res' (s: signature) : list typ := + match s.(sig_res) with + | Some Tlong => Tint :: Tint :: nil + | Some ty => ty :: nil + | None => Tint :: nil + end. + +Inductive external_call' + (ef: external_function) (F V: Type) (ge: Genv.t F V) + (vargs: list val) (m1: mem) (t: trace) (vres: list val) (m2: mem) : Prop := + external_call'_intro: forall v, + external_call ef ge (decode_longs (sig_args (ef_sig ef)) vargs) m1 t v m2 -> + vres = encode_long (sig_res (ef_sig ef)) v -> + external_call' ef ge vargs m1 t vres m2. + +Lemma decode_longs_lessdef: + forall tyl vl1 vl2, Val.lessdef_list vl1 vl2 -> Val.lessdef_list (decode_longs tyl vl1) (decode_longs tyl vl2). +Proof. + induction tyl; simpl; intros. + auto. + destruct a; inv H; auto. inv H1; auto. constructor; auto. apply Val.longofwords_lessdef; auto. +Qed. + +Lemma decode_longs_inject: + forall f tyl vl1 vl2, val_list_inject f vl1 vl2 -> val_list_inject f (decode_longs tyl vl1) (decode_longs tyl vl2). +Proof. + induction tyl; simpl; intros. + auto. + destruct a; inv H; auto. inv H1; auto. constructor; auto. apply val_longofwords_inject; auto. +Qed. + +Lemma encode_long_lessdef: + forall oty v1 v2, Val.lessdef v1 v2 -> Val.lessdef_list (encode_long oty v1) (encode_long oty v2). +Proof. + intros. destruct oty as [[]|]; simpl; auto. + constructor. apply Val.hiword_lessdef; auto. constructor. apply Val.loword_lessdef; auto. auto. +Qed. + +Lemma encode_long_inject: + forall f oty v1 v2, val_inject f v1 v2 -> val_list_inject f (encode_long oty v1) (encode_long oty v2). +Proof. + intros. destruct oty as [[]|]; simpl; auto. + constructor. apply val_hiword_inject; auto. constructor. apply val_loword_inject; auto. auto. +Qed. + +Lemma encode_long_has_type: + forall v sg, + Val.has_type v (proj_sig_res sg) -> + Val.has_type_list (encode_long (sig_res sg) v) (proj_sig_res' sg). +Proof. + unfold proj_sig_res, proj_sig_res', encode_long; intros. + destruct (sig_res sg) as [[] | ]; simpl; auto. + destruct v; simpl; auto. +Qed. + +Lemma external_call_well_typed': + forall ef (F V : Type) (ge : Genv.t F V) vargs m1 t vres m2, + external_call' ef ge vargs m1 t vres m2 -> + Val.has_type_list vres (proj_sig_res' (ef_sig ef)). +Proof. + intros. inv H. apply encode_long_has_type. + eapply external_call_well_typed; eauto. +Qed. + +Lemma external_call_symbols_preserved': + forall ef F1 F2 V (ge1: Genv.t F1 V) (ge2: Genv.t F2 V) vargs m1 t vres m2, + external_call' ef ge1 vargs m1 t vres m2 -> + (forall id, Genv.find_symbol ge2 id = Genv.find_symbol ge1 id) -> + (forall b, Genv.find_var_info ge2 b = Genv.find_var_info ge1 b) -> + external_call' ef ge2 vargs m1 t vres m2. +Proof. + intros. inv H. exists v; auto. eapply external_call_symbols_preserved; eauto. +Qed. + +Lemma external_call_valid_block': + forall ef (F V : Type) (ge : Genv.t F V) vargs m1 t vres m2 b, + external_call' ef ge vargs m1 t vres m2 -> + Mem.valid_block m1 b -> Mem.valid_block m2 b. +Proof. + intros. inv H. eapply external_call_valid_block; eauto. +Qed. + +Lemma external_call_mem_extends': + forall ef (F V : Type) (ge : Genv.t F V) vargs m1 t vres m2 m1' vargs', + external_call' ef ge vargs m1 t vres m2 -> + Mem.extends m1 m1' -> + Val.lessdef_list vargs vargs' -> + exists vres' m2', + external_call' ef ge vargs' m1' t vres' m2' + /\ Val.lessdef_list vres vres' + /\ Mem.extends m2 m2' + /\ mem_unchanged_on (loc_out_of_bounds m1) m1' m2'. +Proof. + intros. inv H. + exploit external_call_mem_extends; eauto. + eapply decode_longs_lessdef; eauto. + intros (v' & m2' & A & B & C & D). + exists (encode_long (sig_res (ef_sig ef)) v'); exists m2'; intuition. + econstructor; eauto. + eapply encode_long_lessdef; eauto. +Qed. + +Lemma external_call_mem_inject': + forall ef F V (ge: Genv.t F V) vargs m1 t vres m2 f m1' vargs', + meminj_preserves_globals ge f -> + external_call' ef ge vargs m1 t vres m2 -> + Mem.inject f m1 m1' -> + val_list_inject f vargs vargs' -> + exists f' vres' m2', + external_call' ef ge vargs' m1' t vres' m2' + /\ val_list_inject f' vres vres' + /\ Mem.inject f' m2 m2' + /\ mem_unchanged_on (loc_unmapped f) m1 m2 + /\ mem_unchanged_on (loc_out_of_reach f m1) m1' m2' + /\ inject_incr f f' + /\ inject_separated f f' m1 m1'. +Proof. + intros. inv H0. + exploit external_call_mem_inject; eauto. + eapply decode_longs_inject; eauto. + intros (f' & v' & m2' & A & B & C & D & E & P & Q). + exists f'; exists (encode_long (sig_res (ef_sig ef)) v'); exists m2'; intuition. + econstructor; eauto. + apply encode_long_inject; auto. +Qed. + +Lemma external_call_determ': + forall ef (F V : Type) (ge : Genv.t F V) vargs m t1 vres1 m1 t2 vres2 m2, + external_call' ef ge vargs m t1 vres1 m1 -> + external_call' ef ge vargs m t2 vres2 m2 -> + match_traces ge t1 t2 /\ (t1 = t2 -> vres1 = vres2 /\ m1 = m2). +Proof. + intros. inv H; inv H0. exploit external_call_determ. eexact H1. eexact H. + intros [A B]. split. auto. intros. destruct B as [C D]; auto. subst. auto. +Qed. + +Lemma external_call_match_traces': + forall ef (F V : Type) (ge : Genv.t F V) vargs m t1 vres1 m1 t2 vres2 m2, + external_call' ef ge vargs m t1 vres1 m1 -> + external_call' ef ge vargs m t2 vres2 m2 -> + match_traces ge t1 t2. +Proof. + intros. inv H; inv H0. eapply external_call_match_traces; eauto. +Qed. + +Lemma external_call_deterministic': + forall ef (F V : Type) (ge : Genv.t F V) vargs m t vres1 m1 vres2 m2, + external_call' ef ge vargs m t vres1 m1 -> + external_call' ef ge vargs m t vres2 m2 -> + vres1 = vres2 /\ m1 = m2. +Proof. + intros. inv H; inv H0. + exploit external_call_deterministic. eexact H1. eexact H. intros [A B]. + split; congruence. +Qed. + + + + + + + diff --git a/common/Globalenvs.v b/common/Globalenvs.v index 3d9f499..a082819 100644 --- a/common/Globalenvs.v +++ b/common/Globalenvs.v @@ -491,6 +491,7 @@ Definition init_data_size (i: init_data) : Z := | Init_int8 _ => 1 | Init_int16 _ => 2 | Init_int32 _ => 4 + | Init_int64 _ => 8 | Init_float32 _ => 4 | Init_float64 _ => 8 | Init_addrof _ _ => 4 @@ -508,6 +509,7 @@ Definition store_init_data (m: mem) (b: block) (p: Z) (id: init_data) : option m | Init_int8 n => Mem.store Mint8unsigned m b p (Vint n) | Init_int16 n => Mem.store Mint16unsigned m b p (Vint n) | Init_int32 n => Mem.store Mint32 m b p (Vint n) + | Init_int64 n => Mem.store Mint64 m b p (Vlong n) | Init_float32 n => Mem.store Mfloat32 m b p (Vfloat n) | Init_float64 n => Mem.store Mfloat64 m b p (Vfloat n) | Init_addrof symb ofs => @@ -761,6 +763,9 @@ Fixpoint load_store_init_data (m: mem) (b: block) (p: Z) (il: list init_data) {s | Init_int32 n :: il' => Mem.load Mint32 m b p = Some(Vint n) /\ load_store_init_data m b (p + 4) il' + | Init_int64 n :: il' => + Mem.load Mint64 m b p = Some(Vlong n) + /\ load_store_init_data m b (p + 8) il' | Init_float32 n :: il' => Mem.load Mfloat32 m b p = Some(Vfloat(Float.singleoffloat n)) /\ load_store_init_data m b (p + 4) il' @@ -795,6 +800,7 @@ Proof. eapply (A Mint8unsigned (Vint i)); eauto. eapply (A Mint16unsigned (Vint i)); eauto. eapply (A Mint32 (Vint i)); eauto. + eapply (A Mint64 (Vlong i)); eauto. eapply (A Mfloat32 (Vfloat f)); eauto. eapply (A Mfloat64 (Vfloat f)); eauto. destruct (find_symbol ge i); try congruence. exists b0; split; auto. diff --git a/common/Memdata.v b/common/Memdata.v index 3de5f39..c62ba99 100644 --- a/common/Memdata.v +++ b/common/Memdata.v @@ -35,6 +35,7 @@ Definition size_chunk (chunk: memory_chunk) : Z := | Mint16signed => 2 | Mint16unsigned => 2 | Mint32 => 4 + | Mint64 => 8 | Mfloat32 => 4 | Mfloat64 => 8 | Mfloat64al32 => 8 @@ -82,6 +83,7 @@ Definition align_chunk (chunk: memory_chunk) : Z := | Mint16signed => 2 | Mint16unsigned => 2 | Mint32 => 4 + | Mint64 => 8 | Mfloat32 => 4 | Mfloat64 => 8 | Mfloat64al32 => 4 @@ -96,7 +98,7 @@ Qed. Lemma align_size_chunk_divides: forall chunk, (align_chunk chunk | size_chunk chunk). Proof. - intros. destruct chunk; simpl; try apply Zdivide_refl. exists 2; auto. + intros. destruct chunk; simpl; try apply Zdivide_refl; exists 2; auto. Qed. Lemma align_le_divides: @@ -340,6 +342,7 @@ Definition encode_val (chunk: memory_chunk) (v: val) : list memval := | Vint n, (Mint16signed | Mint16unsigned) => inj_bytes (encode_int 2%nat (Int.unsigned n)) | Vint n, Mint32 => inj_bytes (encode_int 4%nat (Int.unsigned n)) | Vptr b ofs, Mint32 => inj_pointer 4%nat b ofs + | Vlong n, Mint64 => inj_bytes (encode_int 8%nat (Int64.unsigned n)) | Vfloat n, Mfloat32 => inj_bytes (encode_int 4%nat (Int.unsigned (Float.bits_of_single n))) | Vfloat n, (Mfloat64 | Mfloat64al32) => inj_bytes (encode_int 8%nat (Int64.unsigned (Float.bits_of_double n))) | _, _ => list_repeat (size_chunk_nat chunk) Undef @@ -354,6 +357,7 @@ Definition decode_val (chunk: memory_chunk) (vl: list memval) : val := | Mint16signed => Vint(Int.sign_ext 16 (Int.repr (decode_int bl))) | Mint16unsigned => Vint(Int.zero_ext 16 (Int.repr (decode_int bl))) | Mint32 => Vint(Int.repr(decode_int bl)) + | Mint64 => Vlong(Int64.repr(decode_int bl)) | Mfloat32 => Vfloat(Float.single_of_bits (Int.repr (decode_int bl))) | Mfloat64 | Mfloat64al32 => Vfloat(Float.double_of_bits (Int64.repr (decode_int bl))) end @@ -394,14 +398,19 @@ Definition decode_encode_val (v1: val) (chunk1 chunk2: memory_chunk) (v2: val) : | Vint n, Mint16unsigned, Mint16unsigned => v2 = Vint(Int.zero_ext 16 n) | Vint n, Mint32, Mint32 => v2 = Vint n | Vint n, Mint32, Mfloat32 => v2 = Vfloat(Float.single_of_bits n) - | Vint n, (Mfloat32 | Mfloat64 | Mfloat64al32), _ => v2 = Vundef + | Vint n, (Mint64 | Mfloat32 | Mfloat64 | Mfloat64al32), _ => v2 = Vundef | Vint n, _, _ => True (**r nothing meaningful to say about v2 *) | Vptr b ofs, Mint32, Mint32 => v2 = Vptr b ofs | Vptr b ofs, _, _ => v2 = Vundef + | Vlong n, Mint64, Mint64 => v2 = Vlong n + | Vlong n, Mint64, (Mfloat64 | Mfloat64al32) => v2 = Vfloat(Float.double_of_bits n) + | Vlong n, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mfloat32|Mfloat64|Mfloat64al32), _ => v2 = Vundef + | Vlong n, _, _ => True (**r nothing meaningful to say about v2 *) | Vfloat f, Mfloat32, Mfloat32 => v2 = Vfloat(Float.singleoffloat f) | Vfloat f, Mfloat32, Mint32 => v2 = Vint(Float.bits_of_single f) | Vfloat f, (Mfloat64 | Mfloat64al32), (Mfloat64 | Mfloat64al32) => v2 = Vfloat f - | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32), _ => v2 = Vundef + | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mint64), _ => v2 = Vundef + | Vfloat f, (Mfloat64 | Mfloat64al32), Mint64 => v2 = Vlong(Float.bits_of_double f) | Vfloat f, _, _ => True (* nothing interesting to say about v2 *) end. @@ -419,7 +428,6 @@ Opaque inj_pointer. intros. destruct v; destruct chunk1; simpl; try (apply decode_val_undef); destruct chunk2; unfold decode_val; auto; try (rewrite proj_inj_bytes). - (* int-int *) rewrite decode_encode_int_1. decEq. apply Int.sign_ext_zero_ext. omega. rewrite decode_encode_int_1. decEq. apply Int.zero_ext_idem. omega. rewrite decode_encode_int_1. decEq. apply Int.sign_ext_zero_ext. omega. @@ -430,10 +438,15 @@ Opaque inj_pointer. rewrite decode_encode_int_2. decEq. apply Int.zero_ext_idem. omega. rewrite decode_encode_int_4. auto. rewrite decode_encode_int_4. auto. + rewrite decode_encode_int_8. auto. + rewrite decode_encode_int_8. auto. + rewrite decode_encode_int_8. auto. rewrite decode_encode_int_4. auto. - rewrite decode_encode_int_4. decEq. apply Float.single_of_bits_of_single. + rewrite decode_encode_int_4. decEq. apply Float.single_of_bits_of_single. + rewrite decode_encode_int_8. auto. rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double. rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double. + rewrite decode_encode_int_8. auto. rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double. rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double. change (proj_bytes (inj_pointer 4 b i)) with (@None (list byte)). simpl. @@ -815,6 +828,7 @@ Proof. intros. inv H; simpl. destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self. destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self. + destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self. destruct chunk; try (apply repeat_Undef_inject_self). repeat econstructor; eauto. replace (size_chunk_nat chunk) with (length (encode_val chunk v2)). @@ -845,3 +859,143 @@ Proof. constructor. Qed. +(** * Breaking 64-bit memory accesses into two 32-bit accesses *) + +Lemma int_of_bytes_append: + forall l2 l1, + int_of_bytes (l1 ++ l2) = int_of_bytes l1 + int_of_bytes l2 * two_p (Z_of_nat (length l1) * 8). +Proof. + induction l1; simpl int_of_bytes; intros. + simpl. ring. + simpl length. rewrite inj_S. + replace (Z.succ (Z.of_nat (length l1)) * 8) with (Z_of_nat (length l1) * 8 + 8) by omega. + rewrite two_p_is_exp. change (two_p 8) with 256. rewrite IHl1. ring. + omega. omega. +Qed. + +Lemma int_of_bytes_range: + forall l, 0 <= int_of_bytes l < two_p (Z_of_nat (length l) * 8). +Proof. + induction l; intros. + simpl. omega. + simpl length. rewrite inj_S. + replace (Z.succ (Z.of_nat (length l)) * 8) with (Z.of_nat (length l) * 8 + 8) by omega. + rewrite two_p_is_exp. change (two_p 8) with 256. + simpl int_of_bytes. generalize (Byte.unsigned_range a). + change Byte.modulus with 256. omega. + omega. omega. +Qed. + +Lemma length_proj_bytes: + forall l b, proj_bytes l = Some b -> length b = length l. +Proof. + induction l; simpl; intros. + inv H; auto. + destruct a; try discriminate. + destruct (proj_bytes l) eqn:E; inv H. + simpl. f_equal. auto. +Qed. + +Lemma proj_bytes_append: + forall l2 l1, + proj_bytes (l1 ++ l2) = + match proj_bytes l1, proj_bytes l2 with + | Some b1, Some b2 => Some (b1 ++ b2) + | _, _ => None + end. +Proof. + induction l1; simpl. + destruct (proj_bytes l2); auto. + destruct a; auto. rewrite IHl1. + destruct (proj_bytes l1); auto. destruct (proj_bytes l2); auto. +Qed. + +Lemma decode_val_int64: + forall l1 l2, + length l1 = 4%nat -> length l2 = 4%nat -> + decode_val Mint64 (l1 ++ l2) = + Val.longofwords (decode_val Mint32 (if big_endian then l1 else l2)) + (decode_val Mint32 (if big_endian then l2 else l1)). +Proof. + intros. unfold decode_val. + assert (PP: forall vl, match proj_pointer vl with Vundef => True | Vptr _ _ => True | _ => False end). + intros. unfold proj_pointer. destruct vl; auto. destruct m; auto. + destruct (check_pointer 4 b i (Pointer b i n :: vl)); auto. + assert (PP1: forall vl v2, Val.longofwords (proj_pointer vl) v2 = Vundef). + intros. generalize (PP vl). destruct (proj_pointer vl); reflexivity || contradiction. + assert (PP2: forall v1 vl, Val.longofwords v1 (proj_pointer vl) = Vundef). + intros. destruct v1; simpl; auto. + generalize (PP vl). destruct (proj_pointer vl); reflexivity || contradiction. + rewrite proj_bytes_append. + destruct (proj_bytes l1) as [b1|] eqn:B1; destruct (proj_bytes l2) as [b2|] eqn:B2. +- exploit length_proj_bytes. eexact B1. rewrite H; intro L1. + exploit length_proj_bytes. eexact B2. rewrite H0; intro L2. + assert (UR: forall l, length l = 4%nat -> Int.unsigned (Int.repr (int_of_bytes l)) = int_of_bytes l). + intros. apply Int.unsigned_repr. + generalize (int_of_bytes_range l). rewrite H1. + change (two_p (Z.of_nat 4 * 8)) with (Int.max_unsigned + 1). + omega. + unfold decode_int, rev_if_be. destruct big_endian; rewrite B1; rewrite B2. + + rewrite <- (rev_length b1) in L1. + rewrite <- (rev_length b2) in L2. + rewrite rev_app_distr. + set (b1' := rev b1) in *; set (b2' := rev b2) in *. + unfold Val.longofwords. f_equal. rewrite Int64.ofwords_add. f_equal. + rewrite !UR by auto. rewrite int_of_bytes_append. + rewrite L2. change (Z.of_nat 4 * 8) with 32. ring. + + unfold Val.longofwords. f_equal. rewrite Int64.ofwords_add. f_equal. + rewrite !UR by auto. rewrite int_of_bytes_append. + rewrite L1. change (Z.of_nat 4 * 8) with 32. ring. +- destruct big_endian; rewrite B1; rewrite B2; auto. +- destruct big_endian; rewrite B1; rewrite B2; auto. +- destruct big_endian; rewrite B1; rewrite B2; auto. +Qed. + +Lemma bytes_of_int_append: + forall n2 x2 n1 x1, + 0 <= x1 < two_p (Z_of_nat n1 * 8) -> + bytes_of_int (n1 + n2) (x1 + x2 * two_p (Z_of_nat n1 * 8)) = + bytes_of_int n1 x1 ++ bytes_of_int n2 x2. +Proof. + induction n1; intros. +- simpl in *. f_equal. omega. +- assert (E: two_p (Z.of_nat (S n1) * 8) = two_p (Z.of_nat n1 * 8) * 256). + { + rewrite inj_S. change 256 with (two_p 8). rewrite <- two_p_is_exp. + f_equal. omega. omega. omega. + } + rewrite E in *. simpl. f_equal. + apply Byte.eqm_samerepr. exists (x2 * two_p (Z.of_nat n1 * 8)). + change Byte.modulus with 256. ring. + rewrite Zmult_assoc. rewrite Z_div_plus. apply IHn1. + apply Zdiv_interval_1. omega. apply two_p_gt_ZERO; omega. omega. + assumption. omega. +Qed. + +Lemma bytes_of_int64: + forall i, + bytes_of_int 8 (Int64.unsigned i) = + bytes_of_int 4 (Int.unsigned (Int64.loword i)) ++ bytes_of_int 4 (Int.unsigned (Int64.hiword i)). +Proof. + intros. transitivity (bytes_of_int (4 + 4) (Int64.unsigned (Int64.ofwords (Int64.hiword i) (Int64.loword i)))). + f_equal. f_equal. rewrite Int64.ofwords_recompose. auto. + rewrite Int64.ofwords_add'. + change 32 with (Z_of_nat 4 * 8). + rewrite Zplus_comm. apply bytes_of_int_append. apply Int.unsigned_range. +Qed. + +Lemma encode_val_int64: + forall v, + encode_val Mint64 v = + encode_val Mint32 (if big_endian then Val.hiword v else Val.loword v) + ++ encode_val Mint32 (if big_endian then Val.loword v else Val.hiword v). +Proof. + intros. destruct v; destruct big_endian eqn:BI; try reflexivity; + unfold Val.loword, Val.hiword, encode_val. + unfold inj_bytes. rewrite <- map_app. f_equal. + unfold encode_int, rev_if_be. rewrite BI. rewrite <- rev_app_distr. f_equal. + apply bytes_of_int64. + unfold inj_bytes. rewrite <- map_app. f_equal. + unfold encode_int, rev_if_be. rewrite BI. + apply bytes_of_int64. +Qed. diff --git a/common/Memory.v b/common/Memory.v index 12a0f45..54d50f7 100644 --- a/common/Memory.v +++ b/common/Memory.v @@ -862,6 +862,60 @@ Proof. rewrite inj_S. omega. Qed. +Theorem load_int64_split: + forall m b ofs v, + load Mint64 m b ofs = Some v -> + exists v1 v2, + load Mint32 m b ofs = Some (if big_endian then v1 else v2) + /\ load Mint32 m b (ofs + 4) = Some (if big_endian then v2 else v1) + /\ v = Val.longofwords v1 v2. +Proof. + intros. + exploit load_valid_access; eauto. intros [A B]. simpl in *. + exploit load_loadbytes. eexact H. simpl. intros [bytes [LB EQ]]. + change 8 with (4 + 4) in LB. + exploit loadbytes_split. eexact LB. omega. omega. + intros (bytes1 & bytes2 & LB1 & LB2 & APP). + change 4 with (size_chunk Mint32) in LB1. + exploit loadbytes_load. eexact LB1. + simpl. apply Zdivides_trans with 8; auto. exists 2; auto. + intros L1. + change 4 with (size_chunk Mint32) in LB2. + exploit loadbytes_load. eexact LB2. + simpl. apply Zdivide_plus_r. apply Zdivides_trans with 8; auto. exists 2; auto. exists 1; auto. + intros L2. + exists (decode_val Mint32 (if big_endian then bytes1 else bytes2)); + exists (decode_val Mint32 (if big_endian then bytes2 else bytes1)). + split. destruct big_endian; auto. + split. destruct big_endian; auto. + rewrite EQ. rewrite APP. apply decode_val_int64. + erewrite loadbytes_length; eauto. reflexivity. + erewrite loadbytes_length; eauto. reflexivity. +Qed. + +Theorem loadv_int64_split: + forall m a v, + loadv Mint64 m a = Some v -> + exists v1 v2, + loadv Mint32 m a = Some (if big_endian then v1 else v2) + /\ loadv Mint32 m (Val.add a (Vint (Int.repr 4))) = Some (if big_endian then v2 else v1) + /\ v = Val.longofwords v1 v2. +Proof. + intros. destruct a; simpl in H; try discriminate. + exploit load_int64_split; eauto. intros (v1 & v2 & L1 & L2 & EQ). + assert (NV: Int.unsigned (Int.add i (Int.repr 4)) = Int.unsigned i + 4). + rewrite Int.add_unsigned. apply Int.unsigned_repr. + exploit load_valid_access. eexact H. intros [P Q]. simpl in Q. + exploit (Zdivide_interval (Int.unsigned i) Int.modulus 8). + omega. apply Int.unsigned_range. auto. exists (two_p (32-3)); reflexivity. + unfold Int.max_unsigned. omega. + exists v1; exists v2. +Opaque Int.repr. + split. auto. + split. simpl. rewrite NV. auto. + auto. +Qed. + (** ** Properties related to [store] *) Theorem valid_access_store: @@ -1581,6 +1635,44 @@ Proof. exists m1; split; auto. Qed. +Theorem store_int64_split: + forall m b ofs v m', + store Mint64 m b ofs v = Some m' -> + exists m1, + store Mint32 m b ofs (if big_endian then Val.hiword v else Val.loword v) = Some m1 + /\ store Mint32 m1 b (ofs + 4) (if big_endian then Val.loword v else Val.hiword v) = Some m'. +Proof. + intros. + exploit store_valid_access_3; eauto. intros [A B]. simpl in *. + exploit store_storebytes. eexact H. intros SB. + rewrite encode_val_int64 in SB. + exploit storebytes_split. eexact SB. intros [m1 [SB1 SB2]]. + rewrite encode_val_length in SB2. simpl in SB2. + exists m1; split. + apply storebytes_store. exact SB1. + simpl. apply Zdivides_trans with 8; auto. exists 2; auto. + apply storebytes_store. exact SB2. + simpl. apply Zdivide_plus_r. apply Zdivides_trans with 8; auto. exists 2; auto. exists 1; auto. +Qed. + +Theorem storev_int64_split: + forall m a v m', + storev Mint64 m a v = Some m' -> + exists m1, + storev Mint32 m a (if big_endian then Val.hiword v else Val.loword v) = Some m1 + /\ storev Mint32 m1 (Val.add a (Vint (Int.repr 4))) (if big_endian then Val.loword v else Val.hiword v) = Some m'. +Proof. + intros. destruct a; simpl in H; try discriminate. + exploit store_int64_split; eauto. intros [m1 [A B]]. + exists m1; split. + exact A. + unfold storev, Val.add. rewrite Int.add_unsigned. rewrite Int.unsigned_repr. exact B. + exploit store_valid_access_3. eexact H. intros [P Q]. simpl in Q. + exploit (Zdivide_interval (Int.unsigned i) Int.modulus 8). + omega. apply Int.unsigned_range. auto. exists (two_p (32-3)); reflexivity. + change (Int.unsigned (Int.repr 4)) with 4. unfold Int.max_unsigned. omega. +Qed. + (** ** Properties related to [alloc]. *) Section ALLOC. diff --git a/common/PrintAST.ml b/common/PrintAST.ml index 7f2ed3f..c18b09d 100644 --- a/common/PrintAST.ml +++ b/common/PrintAST.ml @@ -16,7 +16,7 @@ open Printf open Camlcoq open AST -let name_of_type = function Tint -> "int" | Tfloat -> "float" +let name_of_type = function Tint -> "int" | Tfloat -> "float" | Tlong -> "long" let name_of_chunk = function | Mint8signed -> "int8signed" @@ -24,6 +24,7 @@ let name_of_chunk = function | Mint16signed -> "int16signed" | Mint16unsigned -> "int16unsigned" | Mint32 -> "int32" + | Mint64 -> "int64" | Mfloat32 -> "float32" | Mfloat64 -> "float64" | Mfloat64al32 -> "float64al32" diff --git a/common/Values.v b/common/Values.v index f629628..f917e0b 100644 --- a/common/Values.v +++ b/common/Values.v @@ -36,6 +36,7 @@ Definition eq_block := zeq. Inductive val: Type := | Vundef: val | Vint: int -> val + | Vlong: int64 -> val | Vfloat: float -> val | Vptr: block -> int -> val. @@ -58,6 +59,7 @@ Definition has_type (v: val) (t: typ) : Prop := match v, t with | Vundef, _ => True | Vint _, Tint => True + | Vlong _, Tlong => True | Vfloat _, Tfloat => True | Vptr _ _, Tint => True | _, _ => False @@ -70,6 +72,12 @@ Fixpoint has_type_list (vl: list val) (tl: list typ) {struct vl} : Prop := | _, _ => False end. +Definition has_opttype (v: val) (ot: option typ) : Prop := + match ot with + | None => v = Vundef + | Some t => has_type v t + end. + (** Truth values. Pointers and non-zero integers are treated as [True]. The integer 0 (also used to represent the null pointer) is [False]. [Vundef] and floats are neither true nor false. *) @@ -127,12 +135,6 @@ Definition floatofintu (v: val) : option val := | _ => None end. -Definition floatofwords (v1 v2: val) : val := - match v1, v2 with - | Vint n1, Vint n2 => Vfloat (Float.from_words n1 n2) - | _, _ => Vundef - end. - Definition negint (v: val) : val := match v with | Vint n => Vint (Int.neg n) @@ -344,6 +346,183 @@ Definition divf (v1 v2: val): val := | _, _ => Vundef end. +Definition floatofwords (v1 v2: val) : val := + match v1, v2 with + | Vint n1, Vint n2 => Vfloat (Float.from_words n1 n2) + | _, _ => Vundef + end. + +(** Operations on 64-bit integers *) + +Definition longofwords (v1 v2: val) : val := + match v1, v2 with + | Vint n1, Vint n2 => Vlong (Int64.ofwords n1 n2) + | _, _ => Vundef + end. + +Definition loword (v: val) : val := + match v with + | Vlong n => Vint (Int64.loword n) + | _ => Vundef + end. + +Definition hiword (v: val) : val := + match v with + | Vlong n => Vint (Int64.hiword n) + | _ => Vundef + end. + +Definition negl (v: val) : val := + match v with + | Vlong n => Vlong (Int64.neg n) + | _ => Vundef + end. + +Definition notl (v: val) : val := + match v with + | Vlong n => Vlong (Int64.not n) + | _ => Vundef + end. + +Definition longofint (v: val) : val := + match v with + | Vint n => Vlong (Int64.repr (Int.signed n)) + | _ => Vundef + end. + +Definition longofintu (v: val) : val := + match v with + | Vint n => Vlong (Int64.repr (Int.unsigned n)) + | _ => Vundef + end. + +Definition longoffloat (v: val) : option val := + match v with + | Vfloat f => option_map Vlong (Float.longoffloat f) + | _ => None + end. + +Definition longuoffloat (v: val) : option val := + match v with + | Vfloat f => option_map Vlong (Float.longuoffloat f) + | _ => None + end. + +Definition floatoflong (v: val) : option val := + match v with + | Vlong n => Some (Vfloat (Float.floatoflong n)) + | _ => None + end. + +Definition floatoflongu (v: val) : option val := + match v with + | Vlong n => Some (Vfloat (Float.floatoflongu n)) + | _ => None + end. + +Definition addl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vlong n2 => Vlong(Int64.add n1 n2) + | _, _ => Vundef + end. + +Definition subl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vlong n2 => Vlong(Int64.sub n1 n2) + | _, _ => Vundef + end. + +Definition mull (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vlong n2 => Vlong(Int64.mul n1 n2) + | _, _ => Vundef + end. + +Definition mull' (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vlong(Int64.mul' n1 n2) + | _, _ => Vundef + end. + +Definition divls (v1 v2: val): option val := + match v1, v2 with + | Vlong n1, Vlong n2 => + if Int64.eq n2 Int64.zero + || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone + then None + else Some(Vlong(Int64.divs n1 n2)) + | _, _ => None + end. + +Definition modls (v1 v2: val): option val := + match v1, v2 with + | Vlong n1, Vlong n2 => + if Int64.eq n2 Int64.zero + || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone + then None + else Some(Vlong(Int64.mods n1 n2)) + | _, _ => None + end. + +Definition divlu (v1 v2: val): option val := + match v1, v2 with + | Vlong n1, Vlong n2 => + if Int64.eq n2 Int64.zero then None else Some(Vlong(Int64.divu n1 n2)) + | _, _ => None + end. + +Definition modlu (v1 v2: val): option val := + match v1, v2 with + | Vlong n1, Vlong n2 => + if Int64.eq n2 Int64.zero then None else Some(Vlong(Int64.modu n1 n2)) + | _, _ => None + end. + +Definition andl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vlong n2 => Vlong(Int64.and n1 n2) + | _, _ => Vundef + end. + +Definition orl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vlong n2 => Vlong(Int64.or n1 n2) + | _, _ => Vundef + end. + +Definition xorl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vlong n2 => Vlong(Int64.xor n1 n2) + | _, _ => Vundef + end. + +Definition shll (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vint n2 => + if Int.ltu n2 Int64.iwordsize' + then Vlong(Int64.shl' n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition shrl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vint n2 => + if Int.ltu n2 Int64.iwordsize' + then Vlong(Int64.shr' n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition shrlu (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vint n2 => + if Int.ltu n2 Int64.iwordsize' + then Vlong(Int64.shru' n1 n2) + else Vundef + | _, _ => Vundef + end. + (** Comparisons *) Section COMPARISONS. @@ -392,6 +571,18 @@ Definition cmpf_bool (c: comparison) (v1 v2: val): option bool := | _, _ => None end. +Definition cmpl_bool (c: comparison) (v1 v2: val): option bool := + match v1, v2 with + | Vlong n1, Vlong n2 => Some (Int64.cmp c n1 n2) + | _, _ => None + end. + +Definition cmplu_bool (c: comparison) (v1 v2: val): option bool := + match v1, v2 with + | Vlong n1, Vlong n2 => Some (Int64.cmpu c n1 n2) + | _, _ => None + end. + Definition of_optbool (ob: option bool): val := match ob with Some true => Vtrue | Some false => Vfalse | None => Vundef end. @@ -404,6 +595,12 @@ Definition cmpu (c: comparison) (v1 v2: val): val := Definition cmpf (c: comparison) (v1 v2: val): val := of_optbool (cmpf_bool c v1 v2). +Definition cmpl (c: comparison) (v1 v2: val): val := + of_optbool (cmpl_bool c v1 v2). + +Definition cmplu (c: comparison) (v1 v2: val): val := + of_optbool (cmplu_bool c v1 v2). + End COMPARISONS. (** [load_result] is used in the memory model (library [Mem]) @@ -423,6 +620,7 @@ Definition load_result (chunk: memory_chunk) (v: val) := | Mint16unsigned, Vint n => Vint (Int.zero_ext 16 n) | Mint32, Vint n => Vint n | Mint32, Vptr b ofs => Vptr b ofs + | Mint64, Vlong n => Vlong n | Mfloat32, Vfloat f => Vfloat(Float.singleoffloat f) | (Mfloat64 | Mfloat64al32), Vfloat f => Vfloat f | _, _ => Vundef @@ -981,6 +1179,12 @@ Inductive lessdef: val -> val -> Prop := | lessdef_refl: forall v, lessdef v v | lessdef_undef: forall v, lessdef Vundef v. +Lemma lessdef_same: + forall v1 v2, v1 = v2 -> lessdef v1 v2. +Proof. + intros. subst v2. constructor. +Qed. + Lemma lessdef_trans: forall v1 v2 v3, lessdef v1 v2 -> lessdef v2 v3 -> lessdef v1 v3. Proof. @@ -1071,6 +1275,25 @@ Proof. intros. destruct ob; simpl; auto. rewrite (H b); auto. Qed. +Lemma longofwords_lessdef: + forall v1 v2 v1' v2', + lessdef v1 v1' -> lessdef v2 v2' -> lessdef (longofwords v1 v2) (longofwords v1' v2'). +Proof. + intros. unfold longofwords. inv H; auto. inv H0; auto. destruct v1'; auto. +Qed. + +Lemma loword_lessdef: + forall v v', lessdef v v' -> lessdef (loword v) (loword v'). +Proof. + intros. inv H; auto. +Qed. + +Lemma hiword_lessdef: + forall v v', lessdef v v' -> lessdef (hiword v) (hiword v'). +Proof. + intros. inv H; auto. +Qed. + End Val. (** * Values and memory injections *) @@ -1093,6 +1316,8 @@ Definition meminj : Type := block -> option (block * Z). Inductive val_inject (mi: meminj): val -> val -> Prop := | val_inject_int: forall i, val_inject mi (Vint i) (Vint i) + | val_inject_long: + forall i, val_inject mi (Vlong i) (Vlong i) | val_inject_float: forall f, val_inject mi (Vfloat f) (Vfloat f) | val_inject_ptr: @@ -1103,8 +1328,7 @@ Inductive val_inject (mi: meminj): val -> val -> Prop := | val_inject_undef: forall v, val_inject mi Vundef v. -Hint Resolve val_inject_int val_inject_float val_inject_ptr - val_inject_undef. +Hint Constructors val_inject. Inductive val_list_inject (mi: meminj): list val -> list val-> Prop:= | val_nil_inject : @@ -1225,6 +1449,25 @@ Proof. now erewrite !valid_ptr_inj by eauto. Qed. +Lemma val_longofwords_inject: + forall v1 v2 v1' v2', + val_inject f v1 v1' -> val_inject f v2 v2' -> val_inject f (Val.longofwords v1 v2) (Val.longofwords v1' v2'). +Proof. + intros. unfold Val.longofwords. inv H; auto. inv H0; auto. +Qed. + +Lemma val_loword_inject: + forall v v', val_inject f v v' -> val_inject f (Val.loword v) (Val.loword v'). +Proof. + intros. unfold Val.loword; inv H; auto. +Qed. + +Lemma val_hiword_inject: + forall v v', val_inject f v v' -> val_inject f (Val.hiword v) (Val.hiword v'). +Proof. + intros. unfold Val.hiword; inv H; auto. +Qed. + End VAL_INJ_OPS. (** Monotone evolution of a memory injection. *) @@ -1288,9 +1531,8 @@ Lemma val_inject_id: val_inject inject_id v1 v2 <-> Val.lessdef v1 v2. Proof. intros; split; intros. - inv H. constructor. constructor. + inv H; auto. unfold inject_id in H0. inv H0. rewrite Int.add_zero. constructor. - constructor. inv H. destruct v2; econstructor. unfold inject_id; reflexivity. rewrite Int.add_zero; auto. constructor. Qed. @@ -61,7 +61,7 @@ done # Per-target configuration cchecklink=false -has_runtime_lib=false +has_runtime_lib=true case "$target" in powerpc-linux|ppc-linux|powerpc-eabi|ppc-eabi) @@ -218,8 +218,8 @@ CLINKER=gcc # Math library. Set to empty under MacOS X LIBMATH=-lm -# Obsolete; do not change -HAS_RUNTIME_LIB=false +# Do not change +HAS_RUNTIME_LIB=true EOF fi diff --git a/driver/Clflags.ml b/driver/Clflags.ml index a433c59..619be1e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -37,6 +37,7 @@ let option_dinlining = ref false let option_dconstprop = ref false let option_dcse = ref false let option_dalloc = ref false +let option_dalloctrace = ref false let option_dmach = ref false let option_dasm = ref false let option_sdump = ref false diff --git a/driver/Compiler.v b/driver/Compiler.v index ea277ac..5d9e1a7 100644 --- a/driver/Compiler.v +++ b/driver/Compiler.v @@ -28,7 +28,6 @@ Require Cminor. Require CminorSel. Require RTL. Require LTL. -Require LTLin. Require Linear. Require Mach. Require Asm. @@ -49,16 +48,9 @@ Require Allocation. Require Tunneling. Require Linearize. Require CleanupLabels. -Require Reload. -Require RRE. Require Stacking. Require Asmgen. -(** Type systems. *) -Require RTLtyping. -Require LTLtyping. -Require LTLintyping. -Require Lineartyping. -(** Proofs of semantic preservation and typing preservation. *) +(** Proofs of semantic preservation. *) Require SimplExprproof. Require SimplLocalsproof. Require Cshmgenproof. @@ -71,17 +63,9 @@ Require Renumberproof. Require Constpropproof. Require CSEproof. Require Allocproof. -Require Alloctyping. Require Tunnelingproof. -Require Tunnelingtyping. Require Linearizeproof. -Require Linearizetyping. Require CleanupLabelsproof. -Require CleanupLabelstyping. -Require Reloadproof. -Require Reloadtyping. -Require RREproof. -Require RREtyping. Require Stackingproof. Require Asmgenproof. @@ -93,7 +77,7 @@ Parameter print_RTL_tailcall: RTL.program -> unit. Parameter print_RTL_inline: RTL.program -> unit. Parameter print_RTL_constprop: RTL.program -> unit. Parameter print_RTL_cse: RTL.program -> unit. -Parameter print_LTLin: LTLin.program -> unit. +Parameter print_LTL: LTL.program -> unit. Parameter print_Mach: Mach.program -> unit. Open Local Scope string_scope. @@ -137,12 +121,10 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program := @@@ CSE.transf_program @@ print print_RTL_cse @@@ Allocation.transf_program + @@ print print_LTL @@ Tunneling.tunnel_program @@@ Linearize.transf_program @@ CleanupLabels.transf_program - @@ print print_LTLin - @@ Reload.transf_program - @@ RRE.transf_program @@@ Stacking.transf_program @@ print print_Mach @@@ Asmgen.transf_program. @@ -150,7 +132,7 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program := Definition transf_cminor_program (p: Cminor.program) : res Asm.program := OK p @@ print print_Cminor - @@ Selection.sel_program + @@@ Selection.sel_program @@@ RTLgen.transl_program @@@ transf_rtl_program. @@ -223,20 +205,7 @@ Proof. set (p5 := Tunneling.tunnel_program p4) in *. destruct (Linearize.transf_program p5) as [p6|] eqn:?; simpl in H; try discriminate. set (p7 := CleanupLabels.transf_program p6) in *. - set (p8 := Reload.transf_program p7) in *. - set (p9 := RRE.transf_program p8) in *. - destruct (Stacking.transf_program p9) as [p10|] eqn:?; simpl in H; try discriminate. - - assert(TY1: LTLtyping.wt_program p5). - eapply Tunnelingtyping.program_typing_preserved. - eapply Alloctyping.program_typing_preserved; eauto. - assert(TY2: LTLintyping.wt_program p7). - eapply CleanupLabelstyping.program_typing_preserved. - eapply Linearizetyping.program_typing_preserved; eauto. - assert(TY3: Lineartyping.wt_program p9). - eapply RREtyping.program_typing_preserved. - eapply Reloadtyping.program_typing_preserved; eauto. - + destruct (Stacking.transf_program p7) as [p8|] eqn:?; simpl in H; try discriminate. eapply compose_forward_simulation. apply Tailcallproof.transf_program_correct. eapply compose_forward_simulation. apply Inliningproof.transf_program_correct. eassumption. eapply compose_forward_simulation. apply Renumberproof.transf_program_correct. @@ -245,12 +214,10 @@ Proof. eapply compose_forward_simulation. apply CSEproof.transf_program_correct. eassumption. eapply compose_forward_simulation. apply Allocproof.transf_program_correct. eassumption. eapply compose_forward_simulation. apply Tunnelingproof.transf_program_correct. - eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. eauto. + eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. eapply compose_forward_simulation. apply CleanupLabelsproof.transf_program_correct. - eapply compose_forward_simulation. apply Reloadproof.transf_program_correct. eauto. - eapply compose_forward_simulation. apply RREproof.transf_program_correct. eauto. eapply compose_forward_simulation. apply Stackingproof.transf_program_correct. - eexact Asmgenproof.return_address_exists. eassumption. eauto. + eexact Asmgenproof.return_address_exists. eassumption. apply Asmgenproof.transf_program_correct; eauto. split. auto. apply forward_to_backward_simulation. auto. @@ -269,9 +236,9 @@ Proof. unfold transf_cminor_program in H. repeat rewrite compose_print_identity in H. simpl in H. - set (p1 := Selection.sel_program p) in *. + destruct (Selection.sel_program p) as [p1|] eqn:?; simpl in H; try discriminate. destruct (RTLgen.transl_program p1) as [p2|] eqn:?; simpl in H; try discriminate. - eapply compose_forward_simulation. apply Selectionproof.transf_program_correct. + eapply compose_forward_simulation. apply Selectionproof.transf_program_correct. eauto. eapply compose_forward_simulation. apply RTLgenproof.transf_program_correct. eassumption. exact (fst (transf_rtl_program_correct _ _ H)). diff --git a/driver/Driver.ml b/driver/Driver.ml index 3a5981e..eb0e004 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -89,6 +89,7 @@ let parse_c_file sourcename ifile = (* Simplification options *) let simplifs = "b" (* blocks: mandatory *) +(* ^ (if !option_flonglong then "l" else "") *) ^ (if !option_fstruct_return then "s" else "") ^ (if !option_fbitfields then "f" else "") ^ (if !option_fpacked_structs then "p" else "") @@ -148,7 +149,8 @@ let compile_c_ast sourcename csyntax ofile = set_dest PrintRTL.destination_inlining option_dinlining ".inlining.rtl"; set_dest PrintRTL.destination_constprop option_dconstprop ".constprop.rtl"; set_dest PrintRTL.destination_cse option_dcse ".cse.rtl"; - set_dest PrintLTLin.destination option_dalloc ".alloc.ltl"; + set_dest Regalloc.destination_alloctrace option_dalloctrace ".alloctrace"; + set_dest PrintLTL.destination option_dalloc ".alloc.ltl"; set_dest PrintMach.destination option_dmach ".mach"; (* Convert to Asm *) let asm = @@ -451,6 +453,7 @@ let cmdline_actions = "-dconstprop$", Set option_dconstprop; "-dcse$", Set option_dcse; "-dalloc$", Set option_dalloc; + "-dalloctrace$", Set option_dalloctrace; "-dmach$", Set option_dmach; "-dasm$", Set option_dasm; "-sdump$", Set option_sdump; diff --git a/driver/Interp.ml b/driver/Interp.ml index b7971ed..4f50514 100644 --- a/driver/Interp.ml +++ b/driver/Interp.ml @@ -49,6 +49,7 @@ let print_id_ofs p (id, ofs) = let print_eventval p = function | EVint n -> fprintf p "%ld" (camlint_of_coqint n) | EVfloat f -> fprintf p "%F" (camlfloat_of_coqfloat f) + | EVlong n -> fprintf p "%Ld" (camlint64_of_coqint n) | EVptr_global(id, ofs) -> fprintf p "&%a" print_id_ofs (id, ofs) let print_eventval_list p = function diff --git a/exportclight/Clightdefs.v b/exportclight/Clightdefs.v index 4c3c942..1cb93d5 100644 --- a/exportclight/Clightdefs.v +++ b/exportclight/Clightdefs.v @@ -32,6 +32,8 @@ Definition tushort := Tint I16 Unsigned noattr. Definition tint := Tint I32 Signed noattr. Definition tuint := Tint I32 Unsigned noattr. Definition tbool := Tint IBool Unsigned noattr. +Definition tlong := Tlong Signed noattr. +Definition tulong := Tlong Unsigned noattr. Definition tfloat := Tfloat F32 noattr. Definition tdouble := Tfloat F64 noattr. Definition tptr (t: type) := Tpointer t noattr. @@ -43,6 +45,7 @@ Definition tvolatile (ty: type) := match ty with | Tvoid => Tvoid | Tint sz si a => Tint sz si volatile_attr + | Tlong si a => Tlong si volatile_attr | Tfloat sz a => Tfloat sz volatile_attr | Tpointer elt a => Tpointer elt volatile_attr | Tarray elt sz a => Tarray elt sz volatile_attr diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml index ef00512..04f170d 100644 --- a/exportclight/ExportClight.ml +++ b/exportclight/ExportClight.ml @@ -92,6 +92,12 @@ let coqfloat p n = then fprintf p "(Float.double_of_bits (Int64.repr %Ld))" n else fprintf p "(Float.double_of_bits (Int64.repr (%Ld)))" n +let coqint64 p n = + let n = camlint64_of_coqint n in + if n >= 0L + then fprintf p "(Int64.repr %Ld)" n + else fprintf p "(Int64.repr (%Ld))" n + (* Types *) let use_struct_names = ref true @@ -114,6 +120,11 @@ and rtyp p = function | I32, Signed -> "tint" | I32, Unsigned -> "tuint" | IBool, _ -> "tbool") + | Tlong(sg, _) -> + fprintf p "%s" ( + match sg with + | Signed -> "tlong" + | Unsigned -> "tulong") | Tfloat(sz, _) -> fprintf p "%s" ( match sz with @@ -151,7 +162,8 @@ and fieldlist p = function (* External functions *) let asttype p t = - fprintf p "%s" (match t with AST.Tint -> "AST.Tint" | AST.Tfloat -> "AST.Tfloat") + fprintf p "%s" + (match t with AST.Tint -> "AST.Tint" | AST.Tfloat -> "AST.Tfloat" | AST.Tlong -> "AST.Tlong") let name_of_chunk = function | Mint8signed -> "Mint8signed" @@ -159,6 +171,7 @@ let name_of_chunk = function | Mint16signed -> "Mint16signed" | Mint16unsigned -> "Mint16unsigned" | Mint32 -> "Mint32" + | Mint64 -> "Mint64" | Mfloat32 -> "Mfloat32" | Mfloat64 -> "Mfloat64" | Mfloat64al32 -> "Mfloat64al32" @@ -240,6 +253,8 @@ let rec expr p = function fprintf p "(Econst_int %a %a)" coqint n typ t | Econst_float(n, t) -> fprintf p "(Econst_float %a %a)" coqfloat n typ t + | Econst_long(n, t) -> + fprintf p "(Econst_long %a %a)" coqint64 n typ t | Eunop(op, a1, t) -> fprintf p "@[<hov 2>(Eunop %s@ %a@ %a)@]" (name_unop op) expr a1 typ t @@ -316,6 +331,7 @@ let init_data p = function | Init_int8 n -> fprintf p "Init_int8 %a" coqint n | Init_int16 n -> fprintf p "Init_int16 %a" coqint n | Init_int32 n -> fprintf p "Init_int32 %a" coqint n + | Init_int64 n -> fprintf p "Init_int64 %a" coqint64 n | Init_float32 n -> fprintf p "Init_float32 %a" coqfloat n | Init_float64 n -> fprintf p "Init_float64 %a" coqfloat n | Init_space n -> fprintf p "Init_space %ld" (Z.to_int32 n) @@ -359,6 +375,7 @@ let register_struct_union ty = let rec collect_type = function | Tvoid -> () | Tint _ -> () + | Tlong _ -> () | Tfloat _ -> () | Tpointer(t, _) -> collect_type t | Tarray(t, _, _) -> collect_type t @@ -382,6 +399,7 @@ let rec collect_expr e = match e with | Econst_int _ -> () | Econst_float _ -> () + | Econst_long _ -> () | Evar _ -> () | Etempvar _ -> () | Ederef(r, _) -> collect_expr r diff --git a/extraction/extraction.v b/extraction/extraction.v index 1397416..804ccef 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -11,13 +11,14 @@ (* *********************************************************************) Require Wfsimpl. +Require AST. Require Iteration. Require Floats. +Require SelectLong. Require RTLgen. Require Inlining. Require ConstpropOp. Require Constprop. -Require Coloring. Require Allocation. Require Compiler. @@ -28,6 +29,10 @@ Require Import ExtrOcamlString. (* Wfsimpl *) Extraction Inline Wfsimpl.Fix Wfsimpl.Fixm. +(* AST *) +Extract Constant AST.ident_of_string => + "fun s -> Camlcoq.intern_string (Camlcoq.camlstring_of_coqstring s)". + (* Memdata *) Extract Constant Memdata.big_endian => "Memdataaux.big_endian". @@ -44,6 +49,15 @@ Extract Constant Iteration.GenIter.iterate => match f a with Coq_inl b -> Some b | Coq_inr a' -> iter f a' in iter". +(* Selection *) + +Extract Constant SelectLong.get_helper => + "fun ge s sg -> + Errors.OK (Camlcoq.intern_string (Camlcoq.camlstring_of_coqstring s))". +Extract Constant SelectLong.get_builtin => + "fun s sg -> + Errors.OK (Camlcoq.intern_string (Camlcoq.camlstring_of_coqstring s))". + (* RTLgen *) Extract Constant RTLgen.compile_switch => "RTLgenaux.compile_switch". Extract Constant RTLgen.more_likely => "RTLgenaux.more_likely". @@ -59,8 +73,15 @@ Extract Constant ConstpropOp.propagate_float_constants => Extract Constant Constprop.generate_float_constants => "fun _ -> !Clflags.option_ffloatconstprop >= 2". -(* Coloring *) -Extract Constant Coloring.graph_coloring => "Coloringaux.graph_coloring". +(* Allocation *) +Extract Constant Allocation.eq_operation => "(=)". +Extract Constant Allocation.eq_addressing => "(=)". +Extract Constant Allocation.eq_opt_addressing => "(=)". +Extract Constant Allocation.eq_condition => "(=)". +Extract Constant Allocation.eq_chunk => "(=)". +Extract Constant Allocation.eq_external_function => "(=)". +Extract Constant Allocation.eq_signature => "(=)". +Extract Constant Allocation.regalloc => "Regalloc.regalloc". (* Linearize *) Extract Constant Linearize.enumerate_aux => "Linearizeaux.enumerate_aux". @@ -77,7 +98,7 @@ Extract Constant Compiler.print_RTL_tailcall => "PrintRTL.print_tailcall". Extract Constant Compiler.print_RTL_inline => "PrintRTL.print_inlining". Extract Constant Compiler.print_RTL_constprop => "PrintRTL.print_constprop". Extract Constant Compiler.print_RTL_cse => "PrintRTL.print_cse". -Extract Constant Compiler.print_LTLin => "PrintLTLin.print_if". +Extract Constant Compiler.print_LTL => "PrintLTL.print_if". Extract Constant Compiler.print_Mach => "PrintMach.print_if". Extract Constant Compiler.print => "fun (f: 'a -> unit) (x: 'a) -> f x; x". (*Extraction Inline Compiler.apply_total Compiler.apply_partial.*) @@ -106,5 +127,10 @@ Separate Extraction Compiler.transf_c_program Compiler.transf_cminor_program Cexec.do_initial_state Cexec.do_step Cexec.at_final_state Initializers.transl_init Initializers.constval - Csyntax.Eindex Csyntax.Epreincr. + Csyntax.Eindex Csyntax.Epreincr + Conventions1.dummy_int_reg Conventions1.dummy_float_reg + RTL.instr_defs RTL.instr_uses + Machregs.mregs_for_operation Machregs.mregs_for_builtin + Machregs.two_address_op. + @@ -163,6 +163,7 @@ Inductive instruction: Type := | Pshr_ri (rd: ireg) (n: int) | Psar_rcl (rd: ireg) | Psar_ri (rd: ireg) (n: int) + | Pshld_ri (rd: ireg) (r1: ireg) (n: int) | Pror_ri (rd: ireg) (n: int) | Pcmp_rr (r1 r2: ireg) | Pcmp_ri (r1: ireg) (n: int) @@ -193,7 +194,7 @@ Inductive instruction: Type := | Plabel(l: label) | Pallocframe(sz: Z)(ofs_ra ofs_link: int) | Pfreeframe(sz: Z)(ofs_ra ofs_link: int) - | Pbuiltin(ef: external_function)(args: list preg)(res: preg) + | Pbuiltin(ef: external_function)(args: list preg)(res: list preg) | Pannot(ef: external_function)(args: list annot_param) with annot_param : Type := @@ -232,6 +233,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset := | r :: l' => undef_regs l' (rs#r <- Vundef) end. +(** Assigning multiple registers *) + +Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset := + match rl, vl with + | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1) + | _, _ => rs + end. + Section RELSEM. (** Looking up instructions in a code sequence by position. *) @@ -433,9 +442,10 @@ Definition exec_load (chunk: memory_chunk) (m: mem) end. Definition exec_store (chunk: memory_chunk) (m: mem) - (a: addrmode) (rs: regset) (r1: preg) := + (a: addrmode) (rs: regset) (r1: preg) + (destroyed: list preg) := match Mem.storev chunk m (eval_addrmode a rs) (rs r1) with - | Some m' => Next (nextinstr_nf (if preg_eq r1 ST0 then rs#ST0 <- Vundef else rs)) m' + | Some m' => Next (nextinstr_nf (undef_regs destroyed rs)) m' | None => Stuck end. @@ -470,7 +480,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome | Pmov_rm rd a => exec_load Mint32 m a rs rd | Pmov_mr a r1 => - exec_store Mint32 m a rs r1 + exec_store Mint32 m a rs r1 nil | Pmovd_fr rd r1 => Next (nextinstr (rs#rd <- (rs r1))) m | Pmovd_rf rd r1 => @@ -482,7 +492,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome | Pmovsd_fm rd a => exec_load Mfloat64al32 m a rs rd | Pmovsd_mf a r1 => - exec_store Mfloat64al32 m a rs r1 + exec_store Mfloat64al32 m a rs r1 nil | Pfld_f r1 => Next (nextinstr (rs#ST0 <- (rs r1))) m | Pfld_m a => @@ -490,14 +500,14 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome | Pfstp_f rd => Next (nextinstr (rs#rd <- (rs ST0) #ST0 <- Vundef)) m | Pfstp_m a => - exec_store Mfloat64al32 m a rs ST0 + exec_store Mfloat64al32 m a rs ST0 (ST0 :: nil) | Pxchg_rr r1 r2 => Next (nextinstr (rs#r1 <- (rs r2) #r2 <- (rs r1))) m (** Moves with conversion *) | Pmovb_mr a r1 => - exec_store Mint8unsigned m a rs r1 + exec_store Mint8unsigned m a rs r1 nil | Pmovw_mr a r1 => - exec_store Mint16unsigned m a rs r1 + exec_store Mint16unsigned m a rs r1 nil | Pmovzb_rr rd r1 => Next (nextinstr (rs#rd <- (Val.zero_ext 8 rs#r1))) m | Pmovzb_rm rd a => @@ -519,7 +529,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome | Pcvtsd2ss_ff rd r1 => Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m | Pcvtsd2ss_mf a r1 => - exec_store Mfloat32 m a rs r1 + exec_store Mfloat32 m a rs r1 (FR XMM7 :: nil) | Pcvttsd2si_rf rd r1 => Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m | Pcvtsi2sd_fr rd r1 => @@ -573,6 +583,10 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd rs#ECX))) m | Psar_ri rd n => Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd (Vint n)))) m + | Pshld_ri rd r1 n => + Next (nextinstr_nf + (rs#rd <- (Val.or (Val.shl rs#rd (Vint n)) + (Val.shru rs#r1 (Vint (Int.sub Int.iwordsize n)))))) m | Pror_ri rd n => Next (nextinstr_nf (rs#rd <- (Val.ror rs#rd (Vint n)))) m | Pcmp_rr r1 r2 => @@ -590,7 +604,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome | None => Next (nextinstr (rs#rd <- Vundef)) m end | Psetcc c rd => - Next (nextinstr (rs#ECX <- Vundef #rd <- (Val.of_optbool (eval_testcond c rs)))) m + Next (nextinstr (rs#rd <- (Val.of_optbool (eval_testcond c rs)))) m (** Arithmetic operations over floats *) | Paddd_ff rd r1 => Next (nextinstr (rs#rd <- (Val.addf rs#rd rs#r1))) m @@ -632,7 +646,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome | Vint n => match list_nth_z tbl (Int.unsigned n) with | None => Stuck - | Some lbl => goto_label c lbl (rs #ECX <- Vundef #EDX <- Vundef) m + | Some lbl => goto_label c lbl rs m end | _ => Stuck end @@ -686,6 +700,8 @@ Definition preg_of (r: mreg) : preg := match r with | AX => IR EAX | BX => IR EBX + | CX => IR ECX + | DX => IR EDX | SI => IR ESI | DI => IR EDI | BP => IR EBP @@ -695,10 +711,8 @@ Definition preg_of (r: mreg) : preg := | X3 => FR XMM3 | X4 => FR XMM4 | X5 => FR XMM5 - | IT1 => IR EDX - | IT2 => IR ECX - | FT1 => FR XMM6 - | FT2 => FR XMM7 + | X6 => FR XMM6 + | X7 => FR XMM7 | FP0 => ST0 end. @@ -706,24 +720,24 @@ Definition preg_of (r: mreg) : preg := We exploit the calling conventions from module [Conventions], except that we use machine registers instead of locations. *) +Definition chunk_of_type (ty: typ) := + match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end. + Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := | extcall_arg_reg: forall r, extcall_arg rs m (R r) (rs (preg_of r)) - | extcall_arg_int_stack: forall ofs bofs v, + | extcall_arg_stack: forall ofs ty bofs v, bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mint32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tint)) v - | extcall_arg_float_stack: forall ofs bofs v, - bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mfloat64al32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tfloat)) v. + Mem.loadv (chunk_of_type ty) m + (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v -> + extcall_arg rs m (S Outgoing ofs ty) v. Definition extcall_arguments (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := list_forall2 (extcall_arg rs m) (loc_arguments sg) args. -Definition loc_external_result (sg: signature) : preg := - preg_of (loc_result sg). +Definition loc_external_result (sg: signature) : list preg := + map preg_of (loc_result sg). (** Extract the values of the arguments of an annotation. *) @@ -753,33 +767,31 @@ Inductive step: state -> trace -> state -> Prop := exec_instr c i rs m = Next rs' m' -> step (State rs m) E0 (State rs' m') | exec_step_builtin: - forall b ofs c ef args res rs m t v m', + forall b ofs c ef args res rs m t vl rs' m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal c) -> find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) -> - external_call ef ge (map rs args) m t v m' -> - step (State rs m) t - (State (nextinstr_nf(rs #EDX <- Vundef #ECX <- Vundef - #XMM6 <- Vundef #XMM7 <- Vundef - #ST0 <- Vundef - #res <- v)) m') + external_call' ef ge (map rs args) m t vl m' -> + rs' = nextinstr_nf + (set_regs res vl + (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) -> + step (State rs m) t (State rs' m') | exec_step_annot: forall b ofs c ef args rs m vargs t v m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal c) -> find_instr (Int.unsigned ofs) c = Some (Pannot ef args) -> annot_arguments rs m args vargs -> - external_call ef ge vargs m t v m' -> + external_call' ef ge vargs m t v m' -> step (State rs m) t (State (nextinstr rs) m') | exec_step_external: forall b ef args res rs m t rs' m', rs PC = Vptr b Int.zero -> Genv.find_funct_ptr ge b = Some (External ef) -> - external_call ef ge args m t res m' -> extcall_arguments rs m (ef_sig ef) args -> - rs' = (rs#(loc_external_result (ef_sig ef)) <- res - #PC <- (rs RA)) -> + external_call' ef ge args m t res m' -> + rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) -> step (State rs m) t (State rs' m'). End RELSEM. @@ -847,21 +859,21 @@ Ltac Equalities := discriminate. discriminate. inv H11. - exploit external_call_determ. eexact H4. eexact H11. intros [A B]. + exploit external_call_determ'. eexact H4. eexact H9. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. inv H12. assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0. - exploit external_call_determ. eexact H5. eexact H13. intros [A B]. + exploit external_call_determ'. eexact H5. eexact H13. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0. - exploit external_call_determ. eexact H3. eexact H8. intros [A B]. + exploit external_call_determ'. eexact H4. eexact H9. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. (* trace length *) red; intros; inv H; simpl. omega. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. + inv H3. eapply external_call_trace_length; eauto. + inv H4. eapply external_call_trace_length; eauto. + inv H3. eapply external_call_trace_length; eauto. (* initial states *) inv H; inv H0. f_equal. congruence. (* final no step *) @@ -882,17 +894,3 @@ Definition data_preg (r: preg) : bool := | RA => false end. -Definition nontemp_preg (r: preg) : bool := - match r with - | PC => false - | IR ECX => false - | IR EDX => false - | IR _ => true - | FR XMM6 => false - | FR XMM7 => false - | FR _ => true - | ST0 => false - | CR _ => false - | RA => false - end. - diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v index 6e3ccf8..78f7d6e 100644 --- a/ia32/Asmgen.v +++ b/ia32/Asmgen.v @@ -17,6 +17,7 @@ Require Import Errors. Require Import AST. Require Import Integers. Require Import Floats. +Require Import Memdata. Require Import Op. Require Import Locations. Require Import Mach. @@ -54,59 +55,12 @@ Definition mk_mov (rd rs: preg) (k: code) : res code := | _, _ => Error(msg "Asmgen.mk_mov") end. -Definition mk_shift (shift_instr: ireg -> instruction) - (r1 r2: ireg) (k: code) : res code := - if ireg_eq r2 ECX then - OK (shift_instr r1 :: k) - else if ireg_eq r1 ECX then - OK (Pxchg_rr r2 ECX :: shift_instr r2 :: Pxchg_rr r2 ECX :: k) - else - OK (Pmov_rr ECX r2 :: shift_instr r1 :: k). - -Definition mk_mov2 (src1 dst1 src2 dst2: ireg) (k: code) : code := - if ireg_eq src1 dst1 then - Pmov_rr dst2 src2 :: k - else if ireg_eq src2 dst2 then - Pmov_rr dst1 src1 :: k - else if ireg_eq src2 dst1 then - if ireg_eq src1 dst2 then - Pxchg_rr src1 src2 :: k - else - Pmov_rr dst2 src2 :: Pmov_rr dst1 src1 :: k - else - Pmov_rr dst1 src1 :: Pmov_rr dst2 src2 :: k. - -Definition mk_div (div_instr: ireg -> instruction) - (r1 r2: ireg) (k: code) : res code := - if ireg_eq r1 EAX then - if ireg_eq r2 EDX then - OK (Pmov_rr ECX EDX :: div_instr ECX :: k) - else - OK (div_instr r2 :: k) - else - OK (Pmovd_fr XMM7 EAX :: - mk_mov2 r1 EAX r2 ECX - (div_instr ECX :: Pmov_rr r1 EAX :: Pmovd_rf EAX XMM7 :: k)). - -Definition mk_mod (div_instr: ireg -> instruction) - (r1 r2: ireg) (k: code) : res code := - if ireg_eq r1 EAX then - if ireg_eq r2 EDX then - OK (Pmov_rr ECX EDX :: div_instr ECX :: Pmov_rr EAX EDX :: k) - else - OK (div_instr r2 :: Pmov_rr EAX EDX :: k) - else - OK (Pmovd_fr XMM7 EAX :: - mk_mov2 r1 EAX r2 ECX - (div_instr ECX :: Pmov_rr r1 EDX :: Pmovd_rf EAX XMM7 :: k)). - -Definition mk_shrximm (r: ireg) (n: int) (k: code) : res code := - let tmp := if ireg_eq r ECX then EDX else ECX in +Definition mk_shrximm (n: int) (k: code) : res code := let p := Int.sub (Int.shl Int.one n) Int.one in - OK (Ptest_rr r r :: - Plea tmp (Addrmode (Some r) None (inl _ p)) :: - Pcmov Cond_l r tmp :: - Psar_ri r n :: k). + OK (Ptest_rr EAX EAX :: + Plea ECX (Addrmode (Some EAX) None (inl _ p)) :: + Pcmov Cond_l EAX ECX :: + Psar_ri EAX n :: k). Definition low_ireg (r: ireg) : bool := match r with @@ -118,7 +72,7 @@ Definition mk_intconv (mk: ireg -> ireg -> instruction) (rd rs: ireg) (k: code) if low_ireg rs then OK (mk rd rs :: k) else - OK (Pmov_rr EDX rs :: mk rd EDX :: k). + OK (Pmov_rr EAX rs :: mk rd EAX :: k). Definition addressing_mentions (addr: addrmode) (r: ireg) : bool := match addr with Addrmode base displ const => @@ -130,11 +84,11 @@ Definition mk_smallstore (sto: addrmode -> ireg ->instruction) (addr: addrmode) (rs: ireg) (k: code) := if low_ireg rs then OK (sto addr rs :: k) - else if addressing_mentions addr ECX then - OK (Plea ECX addr :: Pmov_rr EDX rs :: - sto (Addrmode (Some ECX) None (inl _ Int.zero)) EDX :: k) + else if addressing_mentions addr EAX then + OK (Plea ECX addr :: Pmov_rr EAX rs :: + sto (Addrmode (Some ECX) None (inl _ Int.zero)) EAX :: k) else - OK (Pmov_rr ECX rs :: sto addr ECX :: k). + OK (Pmov_rr EAX rs :: sto addr EAX :: k). (** Accessing slots in the stack frame. *) @@ -149,6 +103,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := | ST0 => OK (Pfld_m (Addrmode (Some base) None (inl _ ofs)) :: k) | _ => Error (msg "Asmgen.loadind") end + | Tlong => + Error (msg "Asmgen.loadind") end. Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := @@ -162,6 +118,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := | ST0 => OK (Pfstp_m (Addrmode (Some base) None (inl _ ofs)) :: k) | _ => Error (msg "Asmgen.loadind") end + | Tlong => + Error (msg "Asmgen.storeind") end. (** Translation of addressing modes *) @@ -284,19 +242,25 @@ Definition testcond_for_condition (cond: condition) : extcond := (** Acting upon extended conditions. *) -Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) := +Definition mk_setcc_base (cond: extcond) (rd: ireg) (k: code) := match cond with - | Cond_base c => Psetcc c rd :: k + | Cond_base c => + Psetcc c rd :: k | Cond_and c1 c2 => - if ireg_eq rd EDX - then Psetcc c1 EDX :: Psetcc c2 ECX :: Pand_rr EDX ECX :: k - else Psetcc c1 EDX :: Psetcc c2 rd :: Pand_rr rd EDX :: k + if ireg_eq rd EAX + then Psetcc c1 EAX :: Psetcc c2 ECX :: Pand_rr EAX ECX :: k + else Psetcc c1 EAX :: Psetcc c2 rd :: Pand_rr rd EAX :: k | Cond_or c1 c2 => - if ireg_eq rd EDX - then Psetcc c1 EDX :: Psetcc c2 ECX :: Por_rr EDX ECX :: k - else Psetcc c1 EDX :: Psetcc c2 rd :: Por_rr rd EDX :: k + if ireg_eq rd EAX + then Psetcc c1 EAX :: Psetcc c2 ECX :: Por_rr EAX ECX :: k + else Psetcc c1 EAX :: Psetcc c2 rd :: Por_rr rd EAX :: k end. +Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) := + if low_ireg rd + then mk_setcc_base cond rd k + else mk_setcc_base cond EAX (Pmov_rr rd EAX :: k). + Definition mk_jcc (cond: extcond) (lbl: label) (k: code) := match cond with | Cond_base c => Pjcc c lbl :: k @@ -330,91 +294,106 @@ Definition transl_op | Ocast16unsigned, a1 :: nil => do r1 <- ireg_of a1; do r <- ireg_of res; mk_intconv Pmovzw_rr r r1 k | Oneg, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pneg r :: k) | Osub, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; do r2 <- ireg_of a2; OK (Psub_rr r r2 :: k) | Omul, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pimul_rr r r2 :: k) | Omulimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pimul_ri r n :: k) | Odiv, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_div Pidiv r r2 k + assertion (mreg_eq a1 AX); + assertion (mreg_eq a2 CX); + assertion (mreg_eq res AX); + OK(Pidiv ECX :: k) | Odivu, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_div Pdiv r r2 k + assertion (mreg_eq a1 AX); + assertion (mreg_eq a2 CX); + assertion (mreg_eq res AX); + OK(Pdiv ECX :: k) | Omod, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_mod Pidiv r r2 k + assertion (mreg_eq a1 AX); + assertion (mreg_eq a2 CX); + assertion (mreg_eq res DX); + OK(Pidiv ECX :: k) | Omodu, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_mod Pdiv r r2 k + assertion (mreg_eq a1 AX); + assertion (mreg_eq a2 CX); + assertion (mreg_eq res DX); + OK(Pdiv ECX :: k) | Oand, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pand_rr r r2 :: k) | Oandimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pand_ri r n :: k) | Oor, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; do r2 <- ireg_of a2; OK (Por_rr r r2 :: k) | Oorimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Por_ri r n :: k) | Oxor, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pxor_rr r r2 :: k) | Oxorimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pxor_ri r n :: k) | Oshl, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Psal_rcl r r2 k + assertion (mreg_eq a1 res); + assertion (mreg_eq a2 CX); + do r <- ireg_of res; OK (Psal_rcl r :: k) | Oshlimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Psal_ri r n :: k) | Oshr, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Psar_rcl r r2 k + assertion (mreg_eq a1 res); + assertion (mreg_eq a2 CX); + do r <- ireg_of res; OK (Psar_rcl r :: k) | Oshrimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Psar_ri r n :: k) | Oshru, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Pshr_rcl r r2 k + assertion (mreg_eq a1 res); + assertion (mreg_eq a2 CX); + do r <- ireg_of res; OK (Pshr_rcl r :: k) | Oshruimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pshr_ri r n :: k) | Oshrximm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); - do r <- ireg_of res; mk_shrximm r n k + assertion (mreg_eq a1 AX); + assertion (mreg_eq res AX); + mk_shrximm n k | Ororimm n, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pror_ri r n :: k) + | Oshldimm n, a1 :: a2 :: nil => + assertion (mreg_eq a1 res); + do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pshld_ri r r2 n :: k) | Olea addr, _ => do am <- transl_addressing addr args; do r <- ireg_of res; OK (Plea r am :: k) | Onegf, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- freg_of res; OK (Pnegd r :: k) | Oabsf, a1 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- freg_of res; OK (Pabsd r :: k) | Oaddf, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- freg_of res; do r2 <- freg_of a2; OK (Paddd_ff r r2 :: k) | Osubf, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- freg_of res; do r2 <- freg_of a2; OK (Psubd_ff r r2 :: k) | Omulf, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuld_ff r r2 :: k) | Odivf, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivd_ff r r2 :: k) | Osingleoffloat, a1 :: nil => do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtsd2ss_ff r r1 :: k) @@ -450,6 +429,8 @@ Definition transl_load (chunk: memory_chunk) do r <- freg_of dest; OK(Pcvtss2sd_fm r am :: k) | Mfloat64 | Mfloat64al32 => do r <- freg_of dest; OK(Pmovsd_fm r am :: k) + | Mint64 => + Error (msg "Asmgen.transl_load") end. Definition transl_store (chunk: memory_chunk) @@ -467,6 +448,8 @@ Definition transl_store (chunk: memory_chunk) do r <- freg_of src; OK(Pcvtsd2ss_mf am r :: k) | Mfloat64 | Mfloat64al32 => do r <- freg_of src; OK(Pmovsd_mf am r :: k) + | Mint64 => + Error (msg "Asmgen.transl_store") end. (** Translation of arguments to annotations *) @@ -491,7 +474,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) loadind EDX ofs ty dst k else (do k1 <- loadind EDX ofs ty dst k; - loadind ESP f.(fn_link_ofs) Tint IT1 k1) + loadind ESP f.(fn_link_ofs) Tint DX k1) | Mop op args res => transl_op op args res k | Mload chunk addr args dst => @@ -521,7 +504,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) OK (Pfreeframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) :: Pret :: k) | Mbuiltin ef args res => - OK (Pbuiltin ef (List.map preg_of args) (preg_of res) :: k) + OK (Pbuiltin ef (List.map preg_of args) (List.map preg_of res) :: k) | Mannot ef args => OK (Pannot ef (map transl_annot_param args) :: k) end. @@ -531,7 +514,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool := match i with | Msetstack src ofs ty => before - | Mgetparam ofs ty dst => negb (mreg_eq dst IT1) + | Mgetparam ofs ty dst => negb (mreg_eq dst DX) | _ => false end. diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v index e43392a..ca0fd18 100644 --- a/ia32/Asmgenproof.v +++ b/ia32/Asmgenproof.v @@ -138,49 +138,8 @@ Proof. Qed. Hint Resolve mk_mov_label: labels. -Remark mk_shift_label: - forall f r1 r2 k c, mk_shift f r1 r2 k = OK c -> - (forall r, nolabel (f r)) -> - tail_nolabel k c. -Proof. - unfold mk_shift; intros. TailNoLabel. -Qed. -Hint Resolve mk_shift_label: labels. - -Remark mk_mov2_label: - forall r1 r2 r3 r4 k, - tail_nolabel k (mk_mov2 r1 r2 r3 r4 k). -Proof. - intros; unfold mk_mov2. - destruct (ireg_eq r1 r2); TailNoLabel. - destruct (ireg_eq r3 r4); TailNoLabel. - destruct (ireg_eq r3 r2); TailNoLabel. - destruct (ireg_eq r1 r4); TailNoLabel. -Qed. -Hint Resolve mk_mov2_label: labels. - -Remark mk_div_label: - forall f r1 r2 k c, mk_div f r1 r2 k = OK c -> - (forall r, nolabel (f r)) -> - tail_nolabel k c. -Proof. - unfold mk_div; intros. TailNoLabel. - eapply tail_nolabel_trans; TailNoLabel. -Qed. -Hint Resolve mk_div_label: labels. - -Remark mk_mod_label: - forall f r1 r2 k c, mk_mod f r1 r2 k = OK c -> - (forall r, nolabel (f r)) -> - tail_nolabel k c. -Proof. - unfold mk_mod; intros. TailNoLabel. - eapply tail_nolabel_trans; TailNoLabel. -Qed. -Hint Resolve mk_mod_label: labels. - Remark mk_shrximm_label: - forall r n k c, mk_shrximm r n k = OK c -> tail_nolabel k c. + forall n k c, mk_shrximm n k = OK c -> tail_nolabel k c. Proof. intros. monadInv H; TailNoLabel. Qed. @@ -212,6 +171,7 @@ Proof. unfold loadind; intros. destruct ty. TailNoLabel. destruct (preg_of dst); TailNoLabel. + discriminate. Qed. Remark storeind_label: @@ -222,13 +182,23 @@ Proof. unfold storeind; intros. destruct ty. TailNoLabel. destruct (preg_of src); TailNoLabel. + discriminate. +Qed. + +Remark mk_setcc_base_label: + forall xc rd k, + tail_nolabel k (mk_setcc_base xc rd k). +Proof. + intros. destruct xc; simpl; destruct (ireg_eq rd EAX); TailNoLabel. Qed. Remark mk_setcc_label: forall xc rd k, tail_nolabel k (mk_setcc xc rd k). Proof. - intros. destruct xc; simpl; destruct (ireg_eq rd EDX); TailNoLabel. + intros. unfold mk_setcc. destruct (low_ireg rd). + apply mk_setcc_base_label. + eapply tail_nolabel_trans. apply mk_setcc_base_label. TailNoLabel. Qed. Remark mk_jcc_label: @@ -534,7 +504,7 @@ Proof. rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. exploit storeind_correct; eauto. intros [rs' [P Q]]. exists rs'; split. eauto. - split. unfold undef_setstack. eapply agree_undef_move; eauto. + split. eapply agree_undef_regs; eauto. simpl; intros. rewrite Q; auto with asmgen. - (* Mgetparam *) @@ -547,9 +517,9 @@ Proof. intros [v' [C D]]. Opaque loadind. left; eapply exec_straight_steps; eauto; intros. - assert (DIFF: negb (mreg_eq dst IT1) = true -> IR EDX <> preg_of dst). - intros. change (IR EDX) with (preg_of IT1). red; intros. - unfold proj_sumbool in H1. destruct (mreg_eq dst IT1); try discriminate. + assert (DIFF: negb (mreg_eq dst DX) = true -> IR EDX <> preg_of dst). + intros. change (IR EDX) with (preg_of DX). red; intros. + unfold proj_sumbool in H1. destruct (mreg_eq dst DX); try discriminate. elim n. eapply preg_of_injective; eauto. destruct ep; simpl in TR. (* EDX contains parent *) @@ -577,10 +547,7 @@ Opaque loadind. exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. assert (S: Val.lessdef v (rs2 (preg_of res))) by (eapply Val.lessdef_trans; eauto). exists rs2; split. eauto. - split. - unfold undef_op. - destruct op; try (eapply agree_set_undef_mreg; eauto). - eapply agree_set_undef_move_mreg; eauto. + split. eapply agree_set_undef_mreg; eauto. simpl; congruence. - (* Mload *) @@ -606,7 +573,7 @@ Opaque loadind. intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. exists rs2; split. eauto. - split. eapply agree_exten_temps; eauto. + split. eapply agree_undef_regs; eauto. simpl; congruence. - (* Mcall *) @@ -700,22 +667,26 @@ Opaque loadind. inv AT. monadInv H3. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H2); intro NOOV. - exploit external_call_mem_extends; eauto. eapply preg_vals; eauto. + exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_builtin. eauto. eauto. eapply find_instr_tail; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. + eauto. econstructor; eauto. instantiate (2 := tf); instantiate (1 := x). unfold nextinstr_nf, nextinstr. rewrite Pregmap.gss. - simpl undef_regs. repeat rewrite Pregmap.gso; auto with asmgen. + rewrite undef_regs_other. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto. eapply code_tail_next_int; eauto. - apply agree_nextinstr_nf. eapply agree_set_undef_mreg; eauto. - rewrite Pregmap.gss. auto. - intros. Simplifs. + rewrite preg_notin_charact. intros. auto with asmgen. + rewrite preg_notin_charact. intros. auto with asmgen. + auto with asmgen. + simpl; intros. intuition congruence. + apply agree_nextinstr_nf. eapply agree_set_mregs; auto. + eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. congruence. - (* Mannot *) @@ -723,12 +694,12 @@ Opaque loadind. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H3); intro NOOV. exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_annot. eauto. eauto. eapply find_instr_tail; eauto. eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. eapply match_states_intro with (ep := false); eauto with coqlib. unfold nextinstr. rewrite Pregmap.gss. @@ -762,7 +733,7 @@ Opaque loadind. (* simple jcc *) exists (Pjcc c1 lbl); exists k; exists rs'. split. eexact A. - split. eapply agree_exten_temps; eauto. + split. eapply agree_exten; eauto. simpl. rewrite B. auto. (* jcc; jcc *) destruct (eval_testcond c1 rs') as [b1|] eqn:TC1; @@ -771,13 +742,13 @@ Opaque loadind. (* first jcc jumps *) exists (Pjcc c1 lbl); exists (Pjcc c2 lbl :: k); exists rs'. split. eexact A. - split. eapply agree_exten_temps; eauto. + split. eapply agree_exten; eauto. simpl. rewrite TC1. auto. (* second jcc jumps *) exists (Pjcc c2 lbl); exists k; exists (nextinstr rs'). split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl. rewrite TC1. auto. auto. - split. eapply agree_exten_temps; eauto. + split. eapply agree_exten; eauto. intros; Simplifs. simpl. rewrite eval_testcond_nextinstr. rewrite TC2. destruct b2; auto || discriminate. @@ -787,7 +758,7 @@ Opaque loadind. destruct (andb_prop _ _ H3). subst. exists (Pjcc2 c1 c2 lbl); exists k; exists rs'. split. eexact A. - split. eapply agree_exten_temps; eauto. + split. eapply agree_exten; eauto. simpl. rewrite TC1; rewrite TC2; auto. - (* Mcond false *) @@ -801,7 +772,7 @@ Opaque loadind. econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B. eauto. auto. - split. apply agree_nextinstr. eapply agree_exten_temps; eauto. + split. apply agree_nextinstr. eapply agree_exten; eauto. simpl; congruence. (* jcc ; jcc *) destruct (eval_testcond c1 rs') as [b1|] eqn:TC1; @@ -811,7 +782,7 @@ Opaque loadind. eapply exec_straight_trans. eexact A. eapply exec_straight_two. simpl. rewrite TC1. eauto. auto. simpl. rewrite eval_testcond_nextinstr. rewrite TC2. eauto. auto. auto. - split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten_temps; eauto. + split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten; eauto. simpl; congruence. (* jcc2 *) destruct (eval_testcond c1 rs') as [b1|] eqn:TC1; @@ -822,7 +793,7 @@ Opaque loadind. rewrite TC1; rewrite TC2. destruct b1. simpl in *. subst b2. auto. auto. auto. - split. apply agree_nextinstr. eapply agree_exten_temps; eauto. + split. apply agree_nextinstr. eapply agree_exten; eauto. rewrite H1; congruence. - (* Mjumptable *) @@ -830,8 +801,7 @@ Opaque loadind. inv AT. monadInv H6. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H5); intro NOOV. - exploit find_label_goto_label. eauto. eauto. instantiate (2 := rs0#ECX <- Vundef #EDX <- Vundef). - repeat (rewrite Pregmap.gso by auto with asmgen). eauto. eauto. + exploit find_label_goto_label; eauto. intros [tc' [rs' [A [B C]]]]. exploit ireg_val; eauto. rewrite H. intros LD; inv LD. left; econstructor; split. @@ -839,7 +809,8 @@ Opaque loadind. eapply find_instr_tail; eauto. simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eauto. econstructor; eauto. - eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simplifs. +Transparent destroyed_by_jumptable. + simpl. eapply agree_exten; eauto. intros. rewrite C; auto with asmgen. congruence. - (* Mreturn *) @@ -890,8 +861,9 @@ Opaque loadind. subst x. unfold fn_code. eapply code_tail_next_int. rewrite list_length_z_cons. omega. constructor. apply agree_nextinstr. eapply agree_change_sp; eauto. - apply agree_exten_temps with rs0; eauto. - intros; Simplifs. +Transparent destroyed_at_function_entry. + apply agree_undef_regs with rs0; eauto. + simpl; intros. apply Pregmap.gso; auto with asmgen. tauto. congruence. intros. Simplifs. eapply agree_sp; eauto. @@ -900,17 +872,15 @@ Opaque loadind. intros [tf [A B]]. simpl in B. inv B. exploit extcall_arguments_match; eauto. intros [args' [C D]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [res' [m2' [P [Q [R S]]]]]. left; econstructor; split. apply plus_one. eapply exec_step_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. unfold loc_external_result. - eapply agree_set_mreg; eauto. - rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. - intros; Simplifs. + apply agree_set_other; auto. apply agree_set_mregs; auto. - (* return *) inv STACKS. simpl in *. @@ -942,10 +912,9 @@ Lemma transf_final_states: forall st1 st2 r, match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r. Proof. - intros. inv H0. inv H. inv STACKS. constructor. - auto. - compute in H1. - generalize (preg_val _ _ _ AX AG). rewrite H1. intros LD; inv LD. auto. + intros. inv H0. inv H. constructor. auto. + compute in H1. inv H1. + generalize (preg_val _ _ _ AX AG). rewrite H2. intros LD; inv LD. auto. Qed. Theorem transf_program_correct: diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v index e3e62cc..303337e 100644 --- a/ia32/Asmgenproof1.v +++ b/ia32/Asmgenproof1.v @@ -37,10 +37,11 @@ Lemma agree_nextinstr_nf: agree ms sp rs -> agree ms sp (nextinstr_nf rs). Proof. intros. unfold nextinstr_nf. apply agree_nextinstr. - apply agree_undef_regs. auto. + apply agree_undef_nondata_regs. auto. intro. simpl. ElimOrEq; auto. Qed. +(* Lemma agree_undef_move: forall ms sp rs rs', agree ms sp rs -> @@ -71,6 +72,7 @@ Proof. congruence. auto. intros. rewrite Pregmap.gso; auto. Qed. +*) (** Useful properties of the PC register. *) @@ -95,13 +97,6 @@ Proof. intros. apply nextinstr_nf_inv. destruct r; auto || discriminate. Qed. -Lemma nextinstr_nf_inv2: - forall r rs, - nontemp_preg r = true -> (nextinstr_nf rs)#r = rs#r. -Proof. - intros. apply nextinstr_nf_inv1; auto with asmgen. -Qed. - Lemma nextinstr_nf_set_preg: forall rs m v, (nextinstr_nf (rs#(preg_of m) <- v))#PC = Val.add rs#PC Vone. @@ -166,180 +161,7 @@ Proof. split. Simplifs. intros; Simplifs. Qed. -(** Smart constructor for shifts *) - -Lemma mk_shift_correct: - forall sinstr ssem r1 r2 k c rs1 m, - mk_shift sinstr r1 r2 k = OK c -> - (forall r c rs m, - exec_instr ge c (sinstr r) rs m = Next (nextinstr_nf (rs#r <- (ssem rs#r rs#ECX))) m) -> - exists rs2, - exec_straight ge fn c rs1 m k rs2 m - /\ rs2#r1 = ssem rs1#r1 rs1#r2 - /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r. -Proof. - unfold mk_shift; intros. - destruct (ireg_eq r2 ECX). -(* fast case *) - monadInv H. - econstructor. split. apply exec_straight_one. apply H0. auto. - split. Simplifs. intros; Simplifs. -(* xchg case *) - destruct (ireg_eq r1 ECX); monadInv H. - econstructor. split. eapply exec_straight_three. - simpl; eauto. - apply H0. - simpl; eauto. - auto. auto. auto. - split. Simplifs. f_equal. Simplifs. - intros; Simplifs. destruct (preg_eq r r2). subst r. Simplifs. Simplifs. -(* general case *) - econstructor. split. eapply exec_straight_two. simpl; eauto. apply H0. - auto. auto. - split. Simplifs. f_equal. Simplifs. intros. Simplifs. -Qed. - -(** Parallel move 2 *) - -Lemma mk_mov2_correct: - forall src1 dst1 src2 dst2 k rs m, - dst1 <> dst2 -> - exists rs', - exec_straight ge fn (mk_mov2 src1 dst1 src2 dst2 k) rs m k rs' m - /\ rs'#dst1 = rs#src1 - /\ rs'#dst2 = rs#src2 - /\ forall r, r <> PC -> r <> dst1 -> r <> dst2 -> rs'#r = rs#r. -Proof. - intros. unfold mk_mov2. -(* single moves *) - destruct (ireg_eq src1 dst1). subst. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. - intuition Simplifs. - destruct (ireg_eq src2 dst2). subst. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. - intuition Simplifs. -(* xchg *) - destruct (ireg_eq src2 dst1). destruct (ireg_eq src1 dst2). - subst. econstructor; split. apply exec_straight_one. simpl; eauto. auto. - intuition Simplifs. -(* move 2; move 1 *) - subst. econstructor; split. eapply exec_straight_two. - simpl; eauto. simpl; eauto. auto. auto. - intuition Simplifs. -(* move 1; move 2*) - subst. econstructor; split. eapply exec_straight_two. - simpl; eauto. simpl; eauto. auto. auto. - intuition Simplifs. -Qed. - -(** Smart constructor for division *) - -Lemma mk_div_correct: - forall mkinstr dsem msem r1 r2 k c (rs1: regset) m vq vr, - mk_div mkinstr r1 r2 k = OK c -> - (forall r c rs m, - exec_instr ge c (mkinstr r) rs m = - let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r in - match dsem vn vd, msem vn vd with - | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m - | _, _ => Stuck - end) -> - dsem rs1#r1 rs1#r2 = Some vq -> - msem rs1#r1 rs1#r2 = Some vr -> - exists rs2, - exec_straight ge fn c rs1 m k rs2 m - /\ rs2#r1 = vq - /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r. -Proof. - unfold mk_div; intros. - destruct (ireg_eq r1 EAX). destruct (ireg_eq r2 EDX); monadInv H. -(* r1=EAX r2=EDX *) - econstructor. split. eapply exec_straight_two. simpl; eauto. - rewrite H0. - change (nextinstr rs1 # ECX <- (rs1 EDX) EAX) with (rs1#EAX). - change ((nextinstr rs1 # ECX <- (rs1 EDX)) # EDX <- Vundef ECX) with (rs1#EDX). - rewrite H1. rewrite H2. eauto. auto. auto. - intuition Simplifs. -(* r1=EAX r2<>EDX *) - econstructor. split. eapply exec_straight_one. rewrite H0. - replace (rs1 # EDX <- Vundef r2) with (rs1 r2). rewrite H1; rewrite H2. eauto. - symmetry. Simplifs. auto. - intuition Simplifs. -(* r1 <> EAX *) - monadInv H. - set (rs2 := nextinstr (rs1#XMM7 <- (rs1#EAX))). - exploit (mk_mov2_correct r1 EAX r2 ECX). congruence. instantiate (1 := rs2). - intros [rs3 [A [B [C D]]]]. - econstructor; split. - apply exec_straight_step with rs2 m; auto. - eapply exec_straight_trans. eexact A. - eapply exec_straight_three. - rewrite H0. replace (rs3 EAX) with (rs1 r1). replace (rs3 # EDX <- Vundef ECX) with (rs1 r2). - rewrite H1; rewrite H2. eauto. - simpl; eauto. simpl; eauto. - auto. auto. auto. - split. Simplifs. - intros. destruct (preg_eq r EAX). subst. - Simplifs. rewrite D; auto with asmgen. - Simplifs. rewrite D; auto with asmgen. unfold rs2; Simplifs. -Qed. - -(** Smart constructor for modulus *) - -Lemma mk_mod_correct: - forall mkinstr dsem msem r1 r2 k c (rs1: regset) m vq vr, - mk_mod mkinstr r1 r2 k = OK c -> - (forall r c rs m, - exec_instr ge c (mkinstr r) rs m = - let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r in - match dsem vn vd, msem vn vd with - | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m - | _, _ => Stuck - end) -> - dsem rs1#r1 rs1#r2 = Some vq -> - msem rs1#r1 rs1#r2 = Some vr -> - exists rs2, - exec_straight ge fn c rs1 m k rs2 m - /\ rs2#r1 = vr - /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r. -Proof. - unfold mk_mod; intros. - destruct (ireg_eq r1 EAX). destruct (ireg_eq r2 EDX); monadInv H. -(* r1=EAX r2=EDX *) - econstructor. split. eapply exec_straight_three. - simpl; eauto. - rewrite H0. - change (nextinstr rs1 # ECX <- (rs1 EDX) EAX) with (rs1#EAX). - change ((nextinstr rs1 # ECX <- (rs1 EDX)) # EDX <- Vundef ECX) with (rs1#EDX). - rewrite H1. rewrite H2. eauto. - simpl; eauto. - auto. auto. auto. - intuition Simplifs. -(* r1=EAX r2<>EDX *) - econstructor. split. eapply exec_straight_two. rewrite H0. - replace (rs1 # EDX <- Vundef r2) with (rs1 r2). rewrite H1; rewrite H2. eauto. - symmetry. Simplifs. - simpl; eauto. - auto. auto. - intuition Simplifs. -(* r1 <> EAX *) - monadInv H. - set (rs2 := nextinstr (rs1#XMM7 <- (rs1#EAX))). - exploit (mk_mov2_correct r1 EAX r2 ECX). congruence. instantiate (1 := rs2). - intros [rs3 [A [B [C D]]]]. - econstructor; split. - apply exec_straight_step with rs2 m; auto. - eapply exec_straight_trans. eexact A. - eapply exec_straight_three. - rewrite H0. replace (rs3 EAX) with (rs1 r1). replace (rs3 # EDX <- Vundef ECX) with (rs1 r2). - rewrite H1; rewrite H2. eauto. - simpl; eauto. simpl; eauto. - auto. auto. auto. - split. Simplifs. - intros. destruct (preg_eq r EAX). subst. - Simplifs. rewrite D; auto with asmgen. - Simplifs. rewrite D; auto with asmgen. unfold rs2; Simplifs. -Qed. +(** Properties of division *) Remark divs_mods_exist: forall v1 v2, @@ -368,46 +190,42 @@ Qed. (** Smart constructor for [shrx] *) Lemma mk_shrximm_correct: - forall r1 n k c (rs1: regset) v m, - mk_shrximm r1 n k = OK c -> - Val.shrx (rs1#r1) (Vint n) = Some v -> + forall n k c (rs1: regset) v m, + mk_shrximm n k = OK c -> + Val.shrx (rs1#EAX) (Vint n) = Some v -> exists rs2, exec_straight ge fn c rs1 m k rs2 m - /\ rs2#r1 = v - /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r. + /\ rs2#EAX = v + /\ forall r, data_preg r = true -> r <> EAX -> r <> ECX -> rs2#r = rs1#r. Proof. unfold mk_shrximm; intros. inv H. exploit Val.shrx_shr; eauto. intros [x [y [A [B C]]]]. inversion B; clear B; subst y; subst v; clear H0. - set (tmp := if ireg_eq r1 ECX then EDX else ECX). - assert (TMP1: tmp <> r1). unfold tmp; destruct (ireg_eq r1 ECX); congruence. - assert (TMP2: nontemp_preg tmp = false). unfold tmp; destruct (ireg_eq r1 ECX); auto. set (tnm1 := Int.sub (Int.shl Int.one n) Int.one). set (x' := Int.add x tnm1). set (rs2 := nextinstr (compare_ints (Vint x) (Vint Int.zero) rs1 m)). - set (rs3 := nextinstr (rs2#tmp <- (Vint x'))). - set (rs4 := nextinstr (if Int.lt x Int.zero then rs3#r1 <- (Vint x') else rs3)). - set (rs5 := nextinstr_nf (rs4#r1 <- (Val.shr rs4#r1 (Vint n)))). - assert (rs3#r1 = Vint x). unfold rs3. Simplifs. - assert (rs3#tmp = Vint x'). unfold rs3. Simplifs. + set (rs3 := nextinstr (rs2#ECX <- (Vint x'))). + set (rs4 := nextinstr (if Int.lt x Int.zero then rs3#EAX <- (Vint x') else rs3)). + set (rs5 := nextinstr_nf (rs4#EAX <- (Val.shr rs4#EAX (Vint n)))). + assert (rs3#EAX = Vint x). unfold rs3. Simplifs. + assert (rs3#ECX = Vint x'). unfold rs3. Simplifs. exists rs5. split. apply exec_straight_step with rs2 m. simpl. rewrite A. simpl. rewrite Int.and_idem. auto. auto. apply exec_straight_step with rs3 m. simpl. - change (rs2 r1) with (rs1 r1). rewrite A. simpl. + change (rs2 EAX) with (rs1 EAX). rewrite A. simpl. rewrite (Int.add_commut Int.zero tnm1). rewrite Int.add_zero. auto. auto. apply exec_straight_step with rs4 m. simpl. change (rs3 SOF) with (rs2 SOF). unfold rs2. rewrite nextinstr_inv; auto with asmgen. unfold compare_ints. rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. - unfold Val.cmp. simpl. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto. rewrite H0; auto. + unfold Val.cmp. simpl. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto. apply exec_straight_one. auto. auto. - split. unfold rs5. Simplifs. unfold rs4. Simplifs. - f_equal. destruct (Int.lt x Int.zero). - Simplifs. rewrite A. auto. Simplifs. congruence. - intros. unfold rs5; Simplifs. unfold rs4; Simplifs. - transitivity (rs3#r). - destruct (Int.lt x Int.zero). Simplifs. auto. - unfold rs3; Simplifs. unfold rs2; Simplifs. unfold compare_ints; Simplifs. + split. unfold rs5. Simplifs. unfold rs4. rewrite nextinstr_inv; auto with asmgen. + destruct (Int.lt x Int.zero). rewrite Pregmap.gss. rewrite A; auto. rewrite A; rewrite H; auto. + intros. unfold rs5. Simplifs. unfold rs4. Simplifs. + transitivity (rs3#r). destruct (Int.lt x Int.zero). Simplifs. auto. + unfold rs3. Simplifs. unfold rs2. Simplifs. + unfold compare_ints. Simplifs. Qed. (** Smart constructor for integer conversions *) @@ -420,14 +238,14 @@ Lemma mk_intconv_correct: exists rs2, exec_straight ge fn c rs1 m k rs2 m /\ rs2#rd = sem rs1#rs - /\ forall r, nontemp_preg r = true -> r <> rd -> rs2#r = rs1#r. + /\ forall r, data_preg r = true -> r <> rd -> r <> EAX -> rs2#r = rs1#r. Proof. unfold mk_intconv; intros. destruct (low_ireg rs); monadInv H. econstructor. split. apply exec_straight_one. rewrite H0. eauto. auto. - intuition Simplifs. + split. Simplifs. intros. Simplifs. econstructor. split. eapply exec_straight_two. - simpl. eauto. apply H0. auto. auto. - intuition Simplifs. + simpl. eauto. apply H0. auto. auto. + split. Simplifs. intros. Simplifs. Qed. (** Smart constructor for small stores *) @@ -449,10 +267,10 @@ Lemma mk_smallstore_correct: mk_smallstore sto addr r k = OK c -> Mem.storev chunk m1 (eval_addrmode ge addr rs1) (rs1 r) = Some m2 -> (forall c r addr rs m, - exec_instr ge c (sto addr r) rs m = exec_store ge chunk m addr rs r) -> + exec_instr ge c (sto addr r) rs m = exec_store ge chunk m addr rs r nil) -> exists rs2, exec_straight ge fn c rs1 m1 k rs2 m2 - /\ forall r, nontemp_preg r = true -> rs2#r = rs1#r. + /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> rs2#r = rs1#r. Proof. unfold mk_smallstore; intros. remember (low_ireg r) as low. destruct low. @@ -461,17 +279,17 @@ Proof. unfold exec_store. rewrite H0. eauto. auto. intros; Simplifs. (* high reg *) - remember (addressing_mentions addr ECX) as mentions. destruct mentions; monadInv H. -(* ECX is mentioned. *) + remember (addressing_mentions addr EAX) as mentions. destruct mentions; monadInv H. +(* EAX is mentioned. *) assert (r <> ECX). red; intros; subst r; discriminate. set (rs2 := nextinstr (rs1#ECX <- (eval_addrmode ge addr rs1))). - set (rs3 := nextinstr (rs2#EDX <- (rs1 r))). + set (rs3 := nextinstr (rs2#EAX <- (rs1 r))). econstructor; split. apply exec_straight_three with rs2 m1 rs3 m1. simpl. auto. simpl. replace (rs2 r) with (rs1 r). auto. symmetry. unfold rs2; Simplifs. rewrite H1. unfold exec_store. simpl. rewrite Int.add_zero. - change (rs3 EDX) with (rs1 r). + change (rs3 EAX) with (rs1 r). change (rs3 ECX) with (eval_addrmode ge addr rs1). replace (Val.add (eval_addrmode ge addr rs1) (Vint Int.zero)) with (eval_addrmode ge addr rs1). @@ -479,18 +297,18 @@ Proof. destruct (eval_addrmode ge addr rs1); simpl in H0; try discriminate. simpl. rewrite Int.add_zero; auto. auto. auto. auto. - intros. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. -(* ECX is not mentioned *) - set (rs2 := nextinstr (rs1#ECX <- (rs1 r))). + intros. destruct H3. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. +(* EAX is not mentioned *) + set (rs2 := nextinstr (rs1#EAX <- (rs1 r))). econstructor; split. apply exec_straight_two with rs2 m1. simpl. auto. rewrite H1. unfold exec_store. - rewrite (addressing_mentions_correct addr ECX rs2 rs1); auto. - change (rs2 ECX) with (rs1 r). rewrite H0. eauto. + rewrite (addressing_mentions_correct addr EAX rs2 rs1); auto. + change (rs2 EAX) with (rs1 r). rewrite H0. eauto. intros. unfold rs2; Simplifs. auto. auto. - intros. rewrite dec_eq_false. Simplifs. unfold rs2; Simplifs. congruence. + intros. destruct H2. simpl. Simplifs. unfold rs2; Simplifs. Qed. (** Accessing slots in the stack frame *) @@ -521,6 +339,8 @@ Proof. unfold exec_load. rewrite H1; rewrite H0; auto. unfold exec_load. rewrite H1; rewrite H0; auto. intuition Simplifs. + (* long *) + inv H. Qed. Lemma storeind_correct: @@ -549,7 +369,9 @@ Proof. intros. apply nextinstr_nf_inv1; auto. econstructor; split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto. - intros. Simplifs. rewrite dec_eq_true. Simplifs. + intros. simpl. Simplifs. + (* long *) + inv H. Qed. (** Translation of addressing modes *) @@ -608,7 +430,7 @@ Lemma compare_ints_spec: rs'#ZF = Val.cmpu (Mem.valid_pointer m) Ceq v1 v2 /\ rs'#CF = Val.cmpu (Mem.valid_pointer m) Clt v1 v2 /\ rs'#SOF = Val.cmp Clt v1 v2 - /\ (forall r, nontemp_preg r = true -> rs'#r = rs#r). + /\ (forall r, data_preg r = true -> rs'#r = rs#r). Proof. intros. unfold rs'; unfold compare_ints. split. auto. @@ -737,7 +559,7 @@ Lemma compare_floats_spec: rs'#ZF = Val.of_bool (negb (Float.cmp Cne n1 n2)) /\ rs'#CF = Val.of_bool (negb (Float.cmp Cge n1 n2)) /\ rs'#PF = Val.of_bool (negb (Float.cmp Ceq n1 n2 || Float.cmp Clt n1 n2 || Float.cmp Cgt n1 n2)) - /\ (forall r, nontemp_preg r = true -> rs'#r = rs#r). + /\ (forall r, data_preg r = true -> rs'#r = rs#r). Proof. intros. unfold rs'; unfold compare_floats. split. auto. @@ -890,7 +712,7 @@ Lemma transl_cond_correct: | None => True | Some b => eval_extcond (testcond_for_condition cond) rs' = Some b end - /\ forall r, nontemp_preg r = true -> rs'#r = rs r. + /\ forall r, data_preg r = true -> rs'#r = rs r. Proof. unfold transl_cond; intros. destruct cond; repeat (destruct args; try discriminate); monadInv H. @@ -968,19 +790,19 @@ Proof. intros. unfold eval_testcond. repeat rewrite Pregmap.gso; auto with asmgen. Qed. -Lemma mk_setcc_correct: +Lemma mk_setcc_base_correct: forall cond rd k rs1 m, exists rs2, - exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m + exec_straight ge fn (mk_setcc_base cond rd k) rs1 m k rs2 m /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1) - /\ forall r, nontemp_preg r = true -> r <> rd -> rs2#r = rs1#r. + /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> r <> rd -> rs2#r = rs1#r. Proof. intros. destruct cond; simpl in *. -(* base *) +- (* base *) econstructor; split. - apply exec_straight_one. simpl; eauto. auto. - intuition Simplifs. -(* or *) + apply exec_straight_one. simpl; eauto. auto. + split. Simplifs. intros; Simplifs. +- (* or *) assert (Val.of_optbool match eval_testcond c1 rs1 with | Some b1 => @@ -996,7 +818,7 @@ Proof. destruct b; auto. auto. rewrite H; clear H. - destruct (ireg_eq rd EDX). + destruct (ireg_eq rd EAX). subst rd. econstructor; split. eapply exec_straight_three. simpl; eauto. @@ -1010,9 +832,9 @@ Proof. simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto. simpl. eauto. auto. auto. auto. - split. Simplifs. rewrite Val.or_commut. f_equal; Simplifs. - intros. Simplifs. -(* and *) + split. Simplifs. rewrite Val.or_commut. decEq; Simplifs. + intros. destruct H0; Simplifs. +- (* and *) assert (Val.of_optbool match eval_testcond c1 rs1 with | Some b1 => @@ -1023,12 +845,14 @@ Proof. | None => None end = Val.and (Val.of_optbool (eval_testcond c1 rs1)) (Val.of_optbool (eval_testcond c2 rs1))). - destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1). - destruct b; destruct b0; auto. - destruct b; auto. - auto. + { + destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1). + destruct b; destruct b0; auto. + destruct b; auto. + auto. + } rewrite H; clear H. - destruct (ireg_eq rd EDX). + destruct (ireg_eq rd EAX). subst rd. econstructor; split. eapply exec_straight_three. simpl; eauto. @@ -1042,10 +866,25 @@ Proof. simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto. simpl. eauto. auto. auto. auto. - split. Simplifs. rewrite Val.and_commut. f_equal; Simplifs. - intros. Simplifs. + split. Simplifs. rewrite Val.and_commut. decEq; Simplifs. + intros. destruct H0; Simplifs. Qed. +Lemma mk_setcc_correct: + forall cond rd k rs1 m, + exists rs2, + exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m + /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1) + /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> r <> rd -> rs2#r = rs1#r. +Proof. + intros. unfold mk_setcc. destruct (low_ireg rd). +- apply mk_setcc_base_correct. +- exploit mk_setcc_base_correct. intros [rs2 [A [B C]]]. + econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. eauto. simpl. auto. + intuition Simplifs. +Qed. + (** Translation of arithmetic operations. *) Ltac ArgsInv := @@ -1053,7 +892,8 @@ Ltac ArgsInv := | [ H: Error _ = OK _ |- _ ] => discriminate | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args; ArgsInv | [ H: bind _ _ = OK _ |- _ ] => monadInv H; ArgsInv - | [ H: assertion _ = OK _ |- _ ] => monadInv H; subst; ArgsInv + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *; clear H; ArgsInv | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *; clear H; ArgsInv | _ => idtac @@ -1071,25 +911,22 @@ Lemma transl_op_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ Val.lessdef v rs'#(preg_of res) - /\ forall r, - match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end -> - r <> preg_of res -> rs' r = rs r. + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. Proof. +Transparent destroyed_by_op. intros until v; intros TR EV. assert (SAME: (exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of res) = v - /\ forall r, - match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end -> - r <> preg_of res -> rs' r = rs r) -> + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r) -> exists rs', exec_straight ge fn c rs m k rs' m /\ Val.lessdef v rs'#(preg_of res) - /\ forall r, - match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end -> - r <> preg_of res -> rs' r = rs r). - intros [rs' [A [B C]]]. subst v. exists rs'; auto. + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r). + { + intros [rs' [A [B C]]]. subst v. exists rs'; auto. + } destruct op; simpl in TR; ArgsInv; simpl in EV; try (inv EV); try (apply SAME; TranslOp; fail). (* move *) @@ -1109,32 +946,34 @@ Proof. apply SAME. eapply mk_intconv_correct; eauto. (* div *) apply SAME. - specialize (divs_mods_exist (rs x0) (rs x1)). rewrite H0. - destruct (Val.mods (rs x0) (rs x1)) as [vr|] eqn:?; intros; try contradiction. - eapply mk_div_correct with (dsem := Val.divs) (msem := Val.mods); eauto. + specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0. + destruct (Val.mods (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction. + TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto. + auto. auto. + simpl in H3. destruct H3; Simplifs. (* divu *) apply SAME. - specialize (divu_modu_exist (rs x0) (rs x1)). rewrite H0. - destruct (Val.modu (rs x0) (rs x1)) as [vr|] eqn:?; intros; try contradiction. - eapply mk_div_correct with (dsem := Val.divu) (msem := Val.modu); eauto. + specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0. + destruct (Val.modu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction. + TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto. + auto. auto. + simpl in H3. destruct H3; Simplifs. (* mod *) apply SAME. - specialize (divs_mods_exist (rs x0) (rs x1)). rewrite H0. - destruct (Val.divs (rs x0) (rs x1)) as [vq|] eqn:?; intros; try contradiction. - eapply mk_mod_correct with (dsem := Val.divs) (msem := Val.mods); eauto. + specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0. + destruct (Val.divs (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction. + TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto. + auto. auto. + simpl in H3. destruct H3; Simplifs. (* modu *) apply SAME. - specialize (divu_modu_exist (rs x0) (rs x1)). rewrite H0. - destruct (Val.divu (rs x0) (rs x1)) as [vq|] eqn:?; intros; try contradiction. - eapply mk_mod_correct with (dsem := Val.divu) (msem := Val.modu); eauto. -(* shl *) - apply SAME. eapply mk_shift_correct; eauto. -(* shr *) - apply SAME. eapply mk_shift_correct; eauto. + specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0. + destruct (Val.divu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction. + TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto. + auto. auto. + simpl in H3. destruct H3; Simplifs. (* shrximm *) apply SAME. eapply mk_shrximm_correct; eauto. -(* shru *) - apply SAME. eapply mk_shift_correct; eauto. (* lea *) exploit transl_addressing_mode_correct; eauto. intros EA. TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss; auto. @@ -1163,7 +1002,7 @@ Lemma transl_load_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dest) = v - /\ forall r, nontemp_preg r = true -> r <> preg_of dest -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of dest -> rs'#r = rs#r. Proof. unfold transl_load; intros. monadInv H. exploit transl_addressing_mode_correct; eauto. intro EA. @@ -1191,7 +1030,7 @@ Lemma transl_store_correct: Mem.storev chunk m a (rs (preg_of src)) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, nontemp_preg r = true -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_store chunk addr) -> rs'#r = rs#r. Proof. unfold transl_store; intros. monadInv H. exploit transl_addressing_mode_correct; eauto. intro EA. @@ -1223,7 +1062,7 @@ Proof. (* float32 *) econstructor; split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto. - intros. Simplifs. + intros. Transparent destroyed_by_store. simpl in H2. simpl. Simplifs. (* float64 *) econstructor; split. apply exec_straight_one. simpl. unfold exec_store. erewrite Mem.storev_float64al32; eauto. auto. diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp index e6ba98a..fea0afd 100644 --- a/ia32/ConstpropOp.vp +++ b/ia32/ConstpropOp.vp @@ -31,6 +31,7 @@ Inductive approx : Type := no compile-time information is available. *) | I: int -> approx (** A known integer value. *) | F: float -> approx (** A known floating-point value. *) + | L: int64 -> approx (** A know 64-bit integer value. *) | G: ident -> int -> approx (** The value is the address of the given global symbol plus the given integer offset. *) @@ -130,6 +131,11 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) := | Oshru, I n1 :: I n2 :: nil => if Int.ltu n2 Int.iwordsize then I(Int.shru n1 n2) else Unknown | Oshruimm n, I n1 :: nil => if Int.ltu n Int.iwordsize then I(Int.shru n1 n) else Unknown | Ororimm n, I n1 :: nil => if Int.ltu n Int.iwordsize then I(Int.ror n1 n) else Unknown + | Oshldimm n, I n1 :: I n2 :: nil => + let n' := Int.sub Int.iwordsize n in + if Int.ltu n Int.iwordsize && Int.ltu n' Int.iwordsize + then I(Int.or (Int.shl n1 n) (Int.shru n2 n')) + else Unknown | Olea mode, vl => eval_static_addressing mode vl | Onegf, F n1 :: nil => F(Float.neg n1) | Oabsf, F n1 :: nil => F(Float.abs n1) @@ -140,6 +146,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) := | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1) | Ointoffloat, F n1 :: nil => eval_static_intoffloat n1 | Ofloatofint, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofint n1) else Unknown + | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2) + | Olowlong, L n :: nil => I(Int64.loword n) + | Ohighlong, L n :: nil => I(Int64.hiword n) | Ocmp c, vl => eval_static_condition_val c vl | _, _ => Unknown end. diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v index d792d8a..b6c3cdc 100644 --- a/ia32/ConstpropOpproof.v +++ b/ia32/ConstpropOpproof.v @@ -44,9 +44,10 @@ Definition val_match_approx (a: approx) (v: val) : Prop := | Unknown => True | I p => v = Vint p | F p => v = Vfloat p + | L p => v = Vlong p | G symb ofs => v = symbol_address ge symb ofs | S ofs => v = Val.add sp (Vint ofs) - | _ => False + | Novalue => False end. Inductive val_list_match_approx: list approx -> list val -> Prop := @@ -64,6 +65,8 @@ Ltac SimplVMA := simpl in H; (try subst v); SimplVMA | H: (val_match_approx (F _) ?v) |- _ => simpl in H; (try subst v); SimplVMA + | H: (val_match_approx (L _) ?v) |- _ => + simpl in H; (try subst v); SimplVMA | H: (val_match_approx (G _ _) ?v) |- _ => simpl in H; (try subst v); SimplVMA | H: (val_match_approx (S _) ?v) |- _ => @@ -156,6 +159,8 @@ Proof. destruct (Int.ltu n2 Int.iwordsize); simpl; auto. destruct (Int.ltu n Int.iwordsize); simpl; auto. destruct (Int.ltu n Int.iwordsize); simpl; auto. + destruct (Int.ltu n Int.iwordsize); + destruct (Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize); simpl; auto. eapply eval_static_addressing_correct; eauto. unfold eval_static_intoffloat. destruct (Float.intoffloat n1) eqn:?; simpl in H0; inv H0. diff --git a/ia32/Machregs.v b/ia32/Machregs.v index df96393..3b84aa5 100644 --- a/ia32/Machregs.v +++ b/ia32/Machregs.v @@ -10,34 +10,32 @@ (* *) (* *********************************************************************) +Require Import String. Require Import Coqlib. Require Import Maps. Require Import AST. +Require Import Integers. +Require Import Op. (** ** Machine registers *) (** The following type defines the machine registers that can be referenced as locations. These include: -- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]). -- Floating-point registers that can be allocated to RTL pseudo-registers - ([Fxx]). -- Two integer registers, not allocatable, reserved as temporaries for - spilling and reloading ([IT1, IT2]). -- Two float registers, not allocatable, reserved as temporaries for - spilling and reloading ([FT1, FT2]). +- Integer registers that can be allocated to RTL pseudo-registers. +- Floating-point registers that can be allocated to RTL pseudo-registers. +- The special [FP0] register denoting the top of the X87 float stack. The type [mreg] does not include special-purpose or reserved machine registers such as the stack pointer and the condition codes. *) Inductive mreg: Type := (** Allocatable integer regs *) - | AX: mreg | BX: mreg | SI: mreg | DI: mreg | BP: mreg + | AX: mreg | BX: mreg | CX: mreg | DX: mreg | SI: mreg | DI: mreg | BP: mreg (** Allocatable float regs *) - | X0: mreg | X1: mreg | X2: mreg | X3: mreg | X4: mreg | X5: mreg - (** Integer temporaries *) - | IT1: mreg (* DX *) | IT2: mreg (* CX *) - (** Float temporaries *) - | FT1: mreg (* X6 *) | FT2: mreg (* X7 *) | FP0: mreg (* top of FP stack *). + | X0: mreg | X1: mreg | X2: mreg | X3: mreg + | X4: mreg | X5: mreg | X6: mreg | X7: mreg + (** Special float reg *) + | FP0: mreg (**r top of x87 FP stack *). Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}. Proof. decide equality. Defined. @@ -45,28 +43,24 @@ Global Opaque mreg_eq. Definition mreg_type (r: mreg): typ := match r with - | AX => Tint | BX => Tint | SI => Tint | DI => Tint | BP => Tint - (** Allocatable float regs *) - | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat - | X3 => Tfloat | X4 => Tfloat | X5 => Tfloat - (** Integer temporaries *) - | IT1 => Tint | IT2 => Tint - (** Float temporaries *) - | FT1 => Tfloat | FT2 => Tfloat | FP0 => Tfloat + | AX => Tint | BX => Tint | CX => Tint | DX => Tint + | SI => Tint | DI => Tint | BP => Tint + | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat | X3 => Tfloat + | X4 => Tfloat | X5 => Tfloat | X6 => Tfloat | X7 => Tfloat + | FP0 => Tfloat end. -Open Scope positive_scope. +Local Open Scope positive_scope. Module IndexedMreg <: INDEXED_TYPE. Definition t := mreg. Definition eq := mreg_eq. Definition index (r: mreg): positive := match r with - | AX => 1 | BX => 2 | SI => 3 | DI => 4 | BP => 5 - | X0 => 6 | X1 => 7 | X2 => 8 - | X3 => 9 | X4 => 10 | X5 => 11 - | IT1 => 12 | IT2 => 13 - | FT1 => 14 | FT2 => 15 | FP0 => 16 + | AX => 1 | BX => 2 | CX => 3 | DX => 4 | SI => 5 | DI => 6 | BP => 7 + | X0 => 8 | X1 => 9 | X2 => 10 | X3 => 11 + | X4 => 12 | X5 => 13 | X6 => 14 | X7 => 15 + | FP0 => 16 end. Lemma index_inj: forall r1 r2, index r1 = index r2 -> r1 = r2. @@ -75,3 +69,147 @@ Module IndexedMreg <: INDEXED_TYPE. Qed. End IndexedMreg. +(** ** Destroyed registers, preferred registers *) + +Definition destroyed_by_op (op: operation): list mreg := + match op with + | Omove => FP0 :: nil + | Ocast8signed | Ocast8unsigned | Ocast16signed | Ocast16unsigned => AX :: nil + | Odiv => AX :: DX :: nil + | Odivu => AX :: DX :: nil + | Omod => AX :: DX :: nil + | Omodu => AX :: DX :: nil + | Oshrximm _ => CX :: nil + | Ocmp _ => AX :: CX :: nil + | _ => nil + end. + +Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := + nil. + +Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := + match chunk with + | Mint8signed | Mint8unsigned => AX :: CX :: nil + | Mint16signed | Mint16unsigned | Mint32 | Mint64 => nil + | Mfloat32 => X7 :: nil + | Mfloat64 | Mfloat64al32 => FP0 :: nil + end. + +Definition destroyed_by_cond (cond: condition): list mreg := + nil. + +Definition destroyed_by_jumptable: list mreg := + nil. + +Definition destroyed_by_builtin (ef: external_function): list mreg := + match ef with + | EF_memcpy sz al => + if zle sz 32 then CX :: X7 :: nil else CX :: SI :: DI :: nil + | EF_vload _ => nil + | EF_vload_global _ _ _ => nil + | EF_vstore _ => AX :: CX :: X7 :: nil + | EF_vstore_global _ _ _ => AX :: X7 :: nil + | _ => AX :: CX :: X7 :: FP0 :: nil + end. + +Definition destroyed_at_function_entry: list mreg := + DX :: FP0 :: nil. (* must include destroyed_by_op Omove *) + +Definition temp_for_parent_frame: mreg := + DX. + +Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := + match op with + | Odiv => (Some AX :: Some CX :: nil, Some AX) + | Odivu => (Some AX :: Some CX :: nil, Some AX) + | Omod => (Some AX :: Some CX :: nil, Some DX) + | Omodu => (Some AX :: Some CX :: nil, Some DX) + | Oshl => (None :: Some CX :: nil, None) + | Oshr => (None :: Some CX :: nil, None) + | Oshru => (None :: Some CX :: nil, None) + | Oshrximm _ => (Some AX :: nil, Some AX) + | _ => (nil, None) + end. + +Local Open Scope string_scope. + +Definition builtin_negl := ident_of_string "__builtin_negl". +Definition builtin_addl := ident_of_string "__builtin_addl". +Definition builtin_subl := ident_of_string "__builtin_subl". +Definition builtin_mull := ident_of_string "__builtin_mull". + +Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) := + match ef with + | EF_memcpy sz al => + if zle sz 32 then (Some AX :: Some DX :: nil, nil) else (Some DI :: Some SI :: nil, nil) + | EF_builtin id sg => + if ident_eq id builtin_negl then + (Some DX :: Some AX :: nil, Some DX :: Some AX :: nil) + else if ident_eq id builtin_addl || ident_eq id builtin_subl then + (Some DX :: Some AX :: Some CX :: Some BX :: nil, Some DX :: Some AX :: nil) + else if ident_eq id builtin_mull then + (Some AX :: Some DX :: nil, Some DX :: Some AX :: nil) + else + (nil, nil) + | _ => (nil, nil) + end. + +Global Opaque + destroyed_by_op destroyed_by_load destroyed_by_store + destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin + destroyed_at_function_entry temp_for_parent_frame + mregs_for_operation mregs_for_builtin. + +(** Two-address operations. Return [true] if the first argument and + the result must be in the same location *and* are unconstrained + by [mregs_for_operation]. *) + +Definition two_address_op (op: operation) : bool := + match op with + | Omove => false + | Ointconst _ => false + | Ofloatconst _ => false + | Oindirectsymbol _ => false + | Ocast8signed => false + | Ocast8unsigned => false + | Ocast16signed => false + | Ocast16unsigned => false + | Oneg => true + | Osub => true + | Omul => true + | Omulimm _ => true + | Odiv => false + | Odivu => false + | Omod => false + | Omodu => false + | Oand => true + | Oandimm _ => true + | Oor => true + | Oorimm _ => true + | Oxor => true + | Oxorimm _ => true + | Oshl => true + | Oshlimm _ => true + | Oshr => true + | Oshrimm _ => true + | Oshrximm _ => false + | Oshru => true + | Oshruimm _ => true + | Ororimm _ => true + | Oshldimm _ => true + | Olea addr => false + | Onegf => true + | Oabsf => true + | Oaddf => true + | Osubf => true + | Omulf => true + | Odivf => true + | Osingleoffloat => false + | Ointoffloat => false + | Ofloatofint => false + | Omakelong => false + | Olowlong => false + | Ohighlong => false + | Ocmp c => false + end. + diff --git a/ia32/Machregsaux.ml b/ia32/Machregsaux.ml index 7d6df90..8403746 100644 --- a/ia32/Machregsaux.ml +++ b/ia32/Machregsaux.ml @@ -15,11 +15,11 @@ open Machregs let register_names = [ - ("AX", AX); ("BX", BX); ("SI", SI); ("DI", DI); ("BP", BP); + ("AX", AX); ("BX", BX); ("CX", CX); ("DX", DX); + ("SI", SI); ("DI", DI); ("BP", BP); ("XMM0", X0); ("XMM1", X1); ("XMM2", X2); ("XMM3", X3); - ("XMM4", X4); ("XMM5", X5); - ("DX", IT1); ("CX", IT2); - ("XMM6", FT1); ("XMM7", FT2); ("ST0", FP0) + ("XMM4", X4); ("XMM5", X5); ("XMM6", X6); ("XMM7", X7); + ("ST0", FP0) ] let name_of_register r = @@ -96,6 +96,7 @@ Inductive operation : Type := | Oshru: operation (**r [rd = r1 >> r2] (unsigned) *) | Oshruimm: int -> operation (**r [rd = r1 >> n] (unsigned) *) | Ororimm: int -> operation (**r rotate right immediate *) + | Oshldimm: int -> operation (**r [rd = r1 << n | r2 >> (32-n)] *) | Olea: addressing -> operation (**r effective address *) (*c Floating-point arithmetic: *) | Onegf: operation (**r [rd = - r1] *) @@ -108,6 +109,10 @@ Inductive operation : Type := (*c Conversions between int and float: *) | Ointoffloat: operation (**r [rd = signed_int_of_float(r1)] *) | Ofloatofint: operation (**r [rd = float_of_signed_int(r1)] *) +(*c Manipulating 64-bit integers: *) + | Omakelong: operation (**r [rd = r1 << 32 | r2] *) + | Olowlong: operation (**r [rd = low-word(r1)] *) + | Ohighlong: operation (**r [rd = high-word(r1)] *) (*c Boolean tests: *) | Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) @@ -130,6 +135,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}. Proof. generalize Int.eq_dec; intro. generalize Float.eq_dec; intro. + generalize Int64.eq_dec; intro. assert (forall (x y: ident), {x=y}+{x<>y}). exact peq. assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality. assert (forall (x y: condition), {x=y}+{x<>y}). decide equality. @@ -222,6 +228,8 @@ Definition eval_operation | Oshru, v1::v2::nil => Some (Val.shru v1 v2) | Oshruimm n, v1::nil => Some (Val.shru v1 (Vint n)) | Ororimm n, v1::nil => Some (Val.ror v1 (Vint n)) + | Oshldimm n, v1::v2::nil => Some (Val.or (Val.shl v1 (Vint n)) + (Val.shru v2 (Vint (Int.sub Int.iwordsize n)))) | Olea addr, _ => eval_addressing genv sp addr vl | Onegf, v1::nil => Some(Val.negf v1) | Oabsf, v1::nil => Some(Val.absf v1) @@ -232,6 +240,9 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1) | Ointoffloat, v1::nil => Val.intoffloat v1 | Ofloatofint, v1::nil => Val.floatofint v1 + | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2) + | Olowlong, v1::nil => Some(Val.loword v1) + | Ohighlong, v1::nil => Some(Val.hiword v1) | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m)) | _, _ => None end. @@ -306,6 +317,7 @@ Definition type_of_operation (op: operation) : list typ * typ := | Oshru => (Tint :: Tint :: nil, Tint) | Oshruimm _ => (Tint :: nil, Tint) | Ororimm _ => (Tint :: nil, Tint) + | Oshldimm _ => (Tint :: Tint :: nil, Tint) | Olea addr => (type_of_addressing addr, Tint) | Onegf => (Tfloat :: nil, Tfloat) | Oabsf => (Tfloat :: nil, Tfloat) @@ -316,6 +328,9 @@ Definition type_of_operation (op: operation) : list typ * typ := | Osingleoffloat => (Tfloat :: nil, Tfloat) | Ointoffloat => (Tfloat :: nil, Tint) | Ofloatofint => (Tint :: nil, Tfloat) + | Omakelong => (Tint :: Tint :: nil, Tlong) + | Olowlong => (Tlong :: nil, Tint) + | Ohighlong => (Tlong :: nil, Tint) | Ocmp c => (type_of_condition c, Tint) end. @@ -386,6 +401,8 @@ Proof with (try exact I). destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)... destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)... + destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)... + destruct v1; simpl... destruct (Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize)... eapply type_of_addressing_sound; eauto. destruct v0... destruct v0... @@ -396,6 +413,9 @@ Proof with (try exact I). destruct v0... destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2... destruct v0; simpl in H0; inv H0... + destruct v0; destruct v1... + destruct v0... + destruct v0... destruct (eval_condition c vl m); simpl... destruct b... Qed. @@ -511,79 +531,41 @@ Proof. apply eval_shift_stack_addressing. Qed. -(** Transformation of addressing modes with two operands or more - into an equivalent arithmetic operation. This is used in the [Reload] - pass when a store instruction cannot be reloaded directly because - it runs out of temporary registers. *) +(** Offset an addressing mode [addr] by a quantity [delta], so that + it designates the pointer [delta] bytes past the pointer designated + by [addr]. May be undefined, in which case [None] is returned. *) -Definition op_for_binary_addressing (addr: addressing) : operation := Olea addr. +Definition offset_addressing (addr: addressing) (delta: int) : option addressing := + match addr with + | Aindexed n => Some(Aindexed (Int.add n delta)) + | Aindexed2 n => Some(Aindexed2 (Int.add n delta)) + | Ascaled sc n => Some(Ascaled sc (Int.add n delta)) + | Aindexed2scaled sc n => Some(Aindexed2scaled sc (Int.add n delta)) + | Aglobal s n => Some(Aglobal s (Int.add n delta)) + | Abased s n => Some(Abased s (Int.add n delta)) + | Abasedscaled sc s n => Some(Abasedscaled sc s (Int.add n delta)) + | Ainstack n => Some(Ainstack (Int.add n delta)) + end. -Lemma eval_op_for_binary_addressing: - forall (F V: Type) (ge: Genv.t F V) sp addr args v m, - (length args >= 2)%nat -> +Lemma eval_offset_addressing: + forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v, + offset_addressing addr delta = Some addr' -> eval_addressing ge sp addr args = Some v -> - eval_operation ge sp (op_for_binary_addressing addr) args m = Some v. -Proof. - intros. simpl. auto. -Qed. - -Lemma type_op_for_binary_addressing: - forall addr, - (length (type_of_addressing addr) >= 2)%nat -> - type_of_operation (op_for_binary_addressing addr) = (type_of_addressing addr, Tint). + eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)). Proof. - intros. simpl. auto. + intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; inv H. + rewrite Val.add_assoc; auto. + rewrite !Val.add_assoc; auto. + rewrite !Val.add_assoc; auto. + rewrite !Val.add_assoc; auto. + unfold symbol_address. destruct (Genv.find_symbol ge i); auto. + unfold symbol_address. destruct (Genv.find_symbol ge i); auto. + rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. + unfold symbol_address. destruct (Genv.find_symbol ge i0); auto. + rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. + rewrite Val.add_assoc. auto. Qed. - -(** Two-address operations. Return [true] if the first argument and - the result must be in the same location. *) - -Definition two_address_op (op: operation) : bool := - match op with - | Omove => false - | Ointconst _ => false - | Ofloatconst _ => false - | Oindirectsymbol _ => false - | Ocast8signed => false - | Ocast8unsigned => false - | Ocast16signed => false - | Ocast16unsigned => false - | Oneg => true - | Osub => true - | Omul => true - | Omulimm _ => true - | Odiv => true - | Odivu => true - | Omod => true - | Omodu => true - | Oand => true - | Oandimm _ => true - | Oor => true - | Oorimm _ => true - | Oxor => true - | Oxorimm _ => true - | Oshl => true - | Oshlimm _ => true - | Oshr => true - | Oshrimm _ => true - | Oshrximm _ => true - | Oshru => true - | Oshruimm _ => true - | Ororimm _ => true - | Olea addr => false - | Onegf => true - | Oabsf => true - | Oaddf => true - | Osubf => true - | Omulf => true - | Odivf => true - | Osingleoffloat => false - | Ointoffloat => false - | Ofloatofint => false - | Ocmp c => false - end. - (** Operations that are so cheap to recompute that CSE should not factor them out. *) Definition is_trivial_op (op: operation) : bool := @@ -774,6 +756,8 @@ Proof. eauto 3 using val_cmpu_bool_inject, Mem.valid_pointer_implies. inv H3; inv H2; simpl in H0; inv H0; auto. inv H3; inv H2; simpl in H0; inv H0; auto. + inv H3; try discriminate; inv H5; auto. + inv H3; try discriminate; inv H5; auto. Qed. Ltac TrivialExists := @@ -842,6 +826,8 @@ Proof. inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto. inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto. inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto. + inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto. + inv H2; simpl; auto. destruct (Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize); auto. eapply eval_addressing_inj; eauto. inv H4; simpl; auto. inv H4; simpl; auto. @@ -853,6 +839,9 @@ Proof. inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2. exists (Vint i); auto. inv H4; simpl in H1; inv H1. simpl. TrivialExists. + inv H4; inv H2; simpl; auto. + inv H4; simpl; auto. + inv H4; simpl; auto. subst v1. destruct (eval_condition c vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. destruct b; simpl; constructor. diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml index 4768606..c2ea98f 100644 --- a/ia32/PrintAsm.ml +++ b/ia32/PrintAsm.ml @@ -61,7 +61,7 @@ let raw_symbol oc s = | ELF -> fprintf oc "%s" s | MacOS | Cygwin -> fprintf oc "_%s" s -let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$" +let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$" let symbol oc symb = let s = extern_atom symb in @@ -239,9 +239,9 @@ let print_annot_val oc txt args res = fprintf oc "%s annotation: " comment; PrintAnnot.print_annot_val preg oc txt args; match args, res with - | IR src :: _, IR dst -> + | [IR src], [IR dst] -> if dst <> src then fprintf oc " movl %a, %a\n" ireg src ireg dst - | FR src :: _, FR dst -> + | [FR src], [FR dst] -> if dst <> src then fprintf oc " movsd %a, %a\n" freg src freg dst | _, _ -> assert false @@ -251,98 +251,76 @@ let print_annot_val oc txt args res = memory accesses regardless of alignment. *) let print_builtin_memcpy_small oc sz al src dst = - let tmp = - if src <> ECX && dst <> ECX then ECX - else if src <> EDX && dst <> EDX then EDX - else EAX in - let need_tmp = - sz mod 4 <> 0 || not !Clflags.option_fsse in + assert (src = EDX && dst = EAX); let rec copy ofs sz = if sz >= 8 && !Clflags.option_fsse then begin - fprintf oc " movq %d(%a), %a\n" ofs ireg src freg XMM6; - fprintf oc " movq %a, %d(%a)\n" freg XMM6 ofs ireg dst; + fprintf oc " movq %d(%a), %a\n" ofs ireg src freg XMM7; + fprintf oc " movq %a, %d(%a)\n" freg XMM7 ofs ireg dst; copy (ofs + 8) (sz - 8) end else if sz >= 4 then begin - if !Clflags.option_fsse then begin - fprintf oc " movd %d(%a), %a\n" ofs ireg src freg XMM6; - fprintf oc " movd %a, %d(%a)\n" freg XMM6 ofs ireg dst - end else begin - fprintf oc " movl %d(%a), %a\n" ofs ireg src ireg tmp; - fprintf oc " movl %a, %d(%a)\n" ireg tmp ofs ireg dst - end; + fprintf oc " movl %d(%a), %a\n" ofs ireg src ireg ECX; + fprintf oc " movl %a, %d(%a)\n" ireg ECX ofs ireg dst; copy (ofs + 4) (sz - 4) end else if sz >= 2 then begin - fprintf oc " movw %d(%a), %a\n" ofs ireg src ireg16 tmp; - fprintf oc " movw %a, %d(%a)\n" ireg16 tmp ofs ireg dst; + fprintf oc " movw %d(%a), %a\n" ofs ireg src ireg16 ECX; + fprintf oc " movw %a, %d(%a)\n" ireg16 ECX ofs ireg dst; copy (ofs + 2) (sz - 2) end else if sz >= 1 then begin - fprintf oc " movb %d(%a), %a\n" ofs ireg src ireg8 tmp; - fprintf oc " movb %a, %d(%a)\n" ireg8 tmp ofs ireg dst; + fprintf oc " movb %d(%a), %a\n" ofs ireg src ireg8 ECX; + fprintf oc " movb %a, %d(%a)\n" ireg8 ECX ofs ireg dst; copy (ofs + 1) (sz - 1) end in - if need_tmp && tmp = EAX then - fprintf oc " pushl %a\n" ireg EAX; - copy 0 sz; - if need_tmp && tmp = EAX then - fprintf oc " popl %a\n" ireg EAX - -let print_mov2 oc src1 dst1 src2 dst2 = - if src1 = dst1 then - if src2 = dst2 - then () - else fprintf oc " movl %a, %a\n" ireg src2 ireg dst2 - else if src2 = dst2 then - fprintf oc " movl %a, %a\n" ireg src1 ireg dst1 - else if src2 = dst1 then - if src1 = dst2 then - fprintf oc " xchgl %a, %a\n" ireg src1 ireg src2 - else begin - fprintf oc " movl %a, %a\n" ireg src2 ireg dst2; - fprintf oc " movl %a, %a\n" ireg src1 ireg dst1 - end - else begin - fprintf oc " movl %a, %a\n" ireg src1 ireg dst1; - fprintf oc " movl %a, %a\n" ireg src2 ireg dst2 - end + copy 0 sz let print_builtin_memcpy_big oc sz al src dst = - fprintf oc " pushl %a\n" ireg ESI; - fprintf oc " pushl %a\n" ireg EDI; - print_mov2 oc src ESI dst EDI; + assert (src = ESI && dst = EDI); fprintf oc " movl $%d, %a\n" (sz / 4) ireg ECX; fprintf oc " rep movsl\n"; if sz mod 4 >= 2 then fprintf oc " movsw\n"; - if sz mod 2 >= 1 then fprintf oc " movsb\n"; - fprintf oc " popl %a\n" ireg EDI; - fprintf oc " popl %a\n" ireg ESI + if sz mod 2 >= 1 then fprintf oc " movsb\n" let print_builtin_memcpy oc sz al args = let (dst, src) = match args with [IR d; IR s] -> (d, s) | _ -> assert false in fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n" comment sz al; - if sz <= 64 + if sz <= 32 then print_builtin_memcpy_small oc sz al src dst else print_builtin_memcpy_big oc sz al src dst; fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment (* Handling of volatile reads and writes *) +let offset_addressing (Addrmode(base, ofs, cst)) delta = + Addrmode(base, ofs, + match cst with + | Coq_inl n -> Coq_inl(Integers.Int.add n delta) + | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta)) + let print_builtin_vload_common oc chunk addr res = match chunk, res with - | Mint8unsigned, IR res -> + | Mint8unsigned, [IR res] -> fprintf oc " movzbl %a, %a\n" addressing addr ireg res - | Mint8signed, IR res -> + | Mint8signed, [IR res] -> fprintf oc " movsbl %a, %a\n" addressing addr ireg res - | Mint16unsigned, IR res -> + | Mint16unsigned, [IR res] -> fprintf oc " movzwl %a, %a\n" addressing addr ireg res - | Mint16signed, IR res -> + | Mint16signed, [IR res] -> fprintf oc " movswl %a, %a\n" addressing addr ireg res - | Mint32, IR res -> + | Mint32, [IR res] -> fprintf oc " movl %a, %a\n" addressing addr ireg res - | Mfloat32, FR res -> + | Mint64, [IR res1; IR res2] -> + let addr' = offset_addressing addr (coqint_of_camlint 4l) in + if not (Asmgen.addressing_mentions addr res2) then begin + fprintf oc " movl %a, %a\n" addressing addr ireg res2; + fprintf oc " movl %a, %a\n" addressing addr' ireg res1 + end else begin + fprintf oc " movl %a, %a\n" addressing addr' ireg res1; + fprintf oc " movl %a, %a\n" addressing addr ireg res2 + end + | Mfloat32, [FR res] -> fprintf oc " cvtss2sd %a, %a\n" addressing addr freg res - | (Mfloat64 | Mfloat64al32), FR res -> + | (Mfloat64 | Mfloat64al32), [FR res] -> fprintf oc " movsd %a, %a\n" addressing addr freg res | _ -> assert false @@ -366,21 +344,25 @@ let print_builtin_vload_global oc chunk id ofs args res = let print_builtin_vstore_common oc chunk addr src tmp = match chunk, src with - | (Mint8signed | Mint8unsigned), IR src -> + | (Mint8signed | Mint8unsigned), [IR src] -> if Asmgen.low_ireg src then fprintf oc " movb %a, %a\n" ireg8 src addressing addr else begin fprintf oc " movl %a, %a\n" ireg src ireg tmp; fprintf oc " movb %a, %a\n" ireg8 tmp addressing addr end - | (Mint16signed | Mint16unsigned), IR src -> + | (Mint16signed | Mint16unsigned), [IR src] -> fprintf oc " movw %a, %a\n" ireg16 src addressing addr - | Mint32, IR src -> + | Mint32, [IR src] -> fprintf oc " movl %a, %a\n" ireg src addressing addr - | Mfloat32, FR src -> + | Mint64, [IR src1; IR src2] -> + let addr' = offset_addressing addr (coqint_of_camlint 4l) in + fprintf oc " movl %a, %a\n" ireg src2 addressing addr; + fprintf oc " movl %a, %a\n" ireg src1 addressing addr' + | Mfloat32, [FR src] -> fprintf oc " cvtsd2ss %a, %%xmm7\n" freg src; fprintf oc " movss %%xmm7, %a\n" addressing addr - | (Mfloat64 | Mfloat64al32), FR src -> + | (Mfloat64 | Mfloat64al32), [FR src] -> fprintf oc " movsd %a, %a\n" freg src addressing addr | _ -> assert false @@ -388,10 +370,10 @@ let print_builtin_vstore_common oc chunk addr src tmp = let print_builtin_vstore oc chunk args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with - | [IR addr; src] -> + | IR addr :: src -> print_builtin_vstore_common oc chunk (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src - (if addr = ECX then EDX else ECX) + (if addr = EAX then ECX else EAX) | _ -> assert false end; @@ -399,13 +381,8 @@ let print_builtin_vstore oc chunk args = let print_builtin_vstore_global oc chunk id ofs args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; - begin match args with - | [src] -> - print_builtin_vstore_common oc chunk - (Addrmode(None, None, Coq_inr(id,ofs))) src EDX - | _ -> - assert false - end; + print_builtin_vstore_common oc chunk + (Addrmode(None, None, Coq_inr(id,ofs))) args EAX; fprintf oc "%s end builtin __builtin_volatile_write\n" comment (* Handling of compiler-inlined builtins *) @@ -414,13 +391,13 @@ let print_builtin_inline oc name args res = fprintf oc "%s begin builtin %s\n" comment name; begin match name, args, res with (* Memory accesses *) - | "__builtin_read16_reversed", [IR a1], IR res -> + | "__builtin_read16_reversed", [IR a1], [IR res] -> let tmp = if Asmgen.low_ireg res then res else ECX in fprintf oc " movzwl 0(%a), %a\n" ireg a1 ireg tmp; fprintf oc " xchg %a, %a\n" ireg8 tmp high_ireg8 tmp; if tmp <> res then fprintf oc " movl %a, %a\n" ireg tmp ireg res - | "__builtin_read32_reversed", [IR a1], IR res -> + | "__builtin_read32_reversed", [IR a1], [IR res] -> fprintf oc " movl 0(%a), %a\n" ireg a1 ireg res; fprintf oc " bswap %a\n" ireg res | "__builtin_write16_reversed", [IR a1; IR a2], _ -> @@ -436,19 +413,19 @@ let print_builtin_inline oc name args res = fprintf oc " bswap %a\n" ireg tmp; fprintf oc " movl %a, 0(%a)\n" ireg tmp ireg a1 (* Integer arithmetic *) - | "__builtin_bswap", [IR a1], IR res -> + | "__builtin_bswap", [IR a1], [IR res] -> if a1 <> res then fprintf oc " movl %a, %a\n" ireg a1 ireg res; fprintf oc " bswap %a\n" ireg res (* Float arithmetic *) - | "__builtin_fabs", [FR a1], FR res -> + | "__builtin_fabs", [FR a1], [FR res] -> need_masks := true; if a1 <> res then fprintf oc " movsd %a, %a\n" freg a1 freg res; fprintf oc " andpd %a, %a\n" raw_symbol "__absd_mask" freg res - | "__builtin_fsqrt", [FR a1], FR res -> + | "__builtin_fsqrt", [FR a1], [FR res] -> fprintf oc " sqrtsd %a, %a\n" freg a1 freg res - | "__builtin_fmax", [FR a1; FR a2], FR res -> + | "__builtin_fmax", [FR a1; FR a2], [FR res] -> if res = a1 then fprintf oc " maxsd %a, %a\n" freg a2 freg res else if res = a2 then @@ -457,7 +434,7 @@ let print_builtin_inline oc name args res = fprintf oc " movsd %a, %a\n" freg a1 freg res; fprintf oc " maxsd %a, %a\n" freg a2 freg res end - | "__builtin_fmin", [FR a1; FR a2], FR res -> + | "__builtin_fmin", [FR a1; FR a2], [FR res] -> if res = a1 then fprintf oc " minsd %a, %a\n" freg a2 freg res else if res = a2 then @@ -466,6 +443,23 @@ let print_builtin_inline oc name args res = fprintf oc " movsd %a, %a\n" freg a1 freg res; fprintf oc " minsd %a, %a\n" freg a2 freg res end + (* 64-bit integer arithmetic *) + | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> + assert (ah = EDX && al = EAX && rh = EDX && rl = EAX); + fprintf oc " negl %a\n" ireg EAX; + fprintf oc " adcl $0, %a\n" ireg EDX; + fprintf oc " negl %a\n" ireg EDX + | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); + fprintf oc " addl %a, %a\n" ireg EBX ireg EAX; + fprintf oc " adcl %a, %a\n" ireg ECX ireg EDX + | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); + fprintf oc " subl %a, %a\n" ireg EBX ireg EAX; + fprintf oc " sbbl %a, %a\n" ireg ECX ireg EDX + | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> + assert (a = EAX && b = EDX && rh = EDX && rl = EAX); + fprintf oc " mull %a\n" ireg EDX (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) @@ -604,6 +598,8 @@ let print_instruction oc = function fprintf oc " sarl %%cl, %a\n" ireg rd | Psar_ri(rd, n) -> fprintf oc " sarl $%a, %a\n" coqint n ireg rd + | Pshld_ri(rd, r1, n) -> + fprintf oc " shldl $%a, %a, %a\n" coqint n ireg r1 ireg rd | Pror_ri(rd, n) -> fprintf oc " rorl $%a, %a\n" coqint n ireg rd | Pcmp_rr(r1, r2) -> @@ -617,8 +613,8 @@ let print_instruction oc = function | Pcmov(c, rd, r1) -> fprintf oc " cmov%s %a, %a\n" (name_of_condition c) ireg r1 ireg rd | Psetcc(c, rd) -> - fprintf oc " set%s %%cl\n" (name_of_condition c); - fprintf oc " movzbl %%cl, %a\n" ireg rd + fprintf oc " set%s %a\n" (name_of_condition c) ireg8 rd; + fprintf oc " movzbl %a, %a\n" ireg8 rd ireg rd (** Arithmetic operations over floats *) | Paddd_ff(rd, r1) -> fprintf oc " addsd %a, %a\n" freg r1 freg rd @@ -757,6 +753,8 @@ let print_init oc = function fprintf oc " .short %ld\n" (camlint_of_coqint n) | Init_int32 n -> fprintf oc " .long %ld\n" (camlint_of_coqint n) + | Init_int64 n -> + fprintf oc " .quad %Ld\n" (camlint64_of_coqint n) | Init_float32 n -> fprintf oc " .long %ld %s %.18g\n" (camlint_of_coqint (Floats.Float.bits_of_single n)) diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml index 7ddf42f..1f417c2 100644 --- a/ia32/PrintOp.ml +++ b/ia32/PrintOp.ml @@ -90,6 +90,7 @@ let print_operation reg pp = function | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2 | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n) | Ororimm n, [r1] -> fprintf pp "%a ror %ld" reg r1 (camlint_of_coqint n) + | Oshldimm n, [r1;r2] -> fprintf pp "(%a, %a) << %ld" reg r1 reg r2 (camlint_of_coqint n) | Olea addr, args -> print_addressing reg pp (addr, args) | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1 | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1 @@ -100,6 +101,9 @@ let print_operation reg pp = function | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1 | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1 | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1 + | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2 + | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1 + | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1 | Ocmp c, args -> print_condition reg pp (c, args) | _ -> fprintf pp "<bad operator>" diff --git a/ia32/SelectOp.vp b/ia32/SelectOp.vp index 0c30386..7f79a4f 100644 --- a/ia32/SelectOp.vp +++ b/ia32/SelectOp.vp @@ -264,14 +264,16 @@ Nondetfunction or (e1: expr) (e2: expr) := | Eop (Ointconst n1) Enil, t2 => orimm n1 t2 | t1, Eop (Ointconst n2) Enil => orimm n2 t1 | Eop (Oshlimm n1) (t1:::Enil), Eop (Oshruimm n2) (t2:::Enil) => - if Int.eq (Int.add n1 n2) Int.iwordsize - && same_expr_pure t1 t2 - then Eop (Ororimm n2) (t1:::Enil) + if Int.eq (Int.add n1 n2) Int.iwordsize then + if same_expr_pure t1 t2 + then Eop (Ororimm n2) (t1:::Enil) + else Eop (Oshldimm n1) (t1:::t2:::Enil) else Eop Oor (e1:::e2:::Enil) | Eop (Oshruimm n2) (t2:::Enil), Eop (Oshlimm n1) (t1:::Enil) => - if Int.eq (Int.add n1 n2) Int.iwordsize - && same_expr_pure t1 t2 - then Eop (Ororimm n2) (t1:::Enil) + if Int.eq (Int.add n1 n2) Int.iwordsize then + if same_expr_pure t1 t2 + then Eop (Ororimm n2) (t1:::Enil) + else Eop (Oshldimm n1) (t1:::t2:::Enil) else Eop Oor (e1:::e2:::Enil) | _, _ => Eop Oor (e1:::e2:::Enil) diff --git a/ia32/SelectOpproof.v b/ia32/SelectOpproof.v index 18deca6..1569ad6 100644 --- a/ia32/SelectOpproof.v +++ b/ia32/SelectOpproof.v @@ -26,13 +26,6 @@ Require Import SelectOp. Open Local Scope cminorsel_scope. -Section CMCONSTR. - -Variable ge: genv. -Variable sp: val. -Variable e: env. -Variable m: mem. - (** * Useful lemmas and tactics *) (** The following are trivial lemmas and custom tactics that help @@ -78,9 +71,15 @@ Ltac TrivialExists := | [ |- exists v, _ /\ Val.lessdef ?a v ] => exists a; split; [EvalOp | auto] end. - (** * Correctness of the smart constructors *) +Section CMCONSTR. + +Variable ge: genv. +Variable sp: val. +Variable e: env. +Variable m: mem. + (** We now show that the code generated by "smart constructor" functions such as [SelectOp.notint] behaves as expected. Continuing the [notint] example, we show that if the expression [e] @@ -410,6 +409,12 @@ Proof. discriminate. Qed. +Remark int_add_sub_eq: + forall x y z, Int.add x y = z -> Int.sub z x = y. +Proof. + intros. subst z. rewrite Int.sub_add_l. rewrite Int.sub_idem. apply Int.add_zero_l. +Qed. + Lemma eval_or: binary_constructor_sound or Val.or. Proof. red; intros until y; unfold or; case (or_match a b); intros. @@ -417,26 +422,29 @@ Proof. InvEval. rewrite Val.or_commut. apply eval_orimm; auto. InvEval. apply eval_orimm; auto. (* shlimm - shruimm *) - destruct (Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2) eqn:?. - destruct (andb_prop _ _ Heqb0). - generalize (Int.eq_spec (Int.add n1 n2) Int.iwordsize); rewrite H1; intros EQ. + predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize. + destruct (same_expr_pure t1 t2) eqn:?. InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. exists (Val.ror v0 (Vint n2)); split. EvalOp. destruct v0; simpl; auto. destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto. destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto. simpl. rewrite <- Int.or_ror; auto. + InvEval. exists (Val.or x y); split. EvalOp. + simpl. erewrite int_add_sub_eq; eauto. rewrite H0; rewrite H; auto. auto. TrivialExists. (* shruimm - shlimm *) - destruct (Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2) eqn:?. - destruct (andb_prop _ _ Heqb0). - generalize (Int.eq_spec (Int.add n1 n2) Int.iwordsize); rewrite H1; intros EQ. + predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize. + destruct (same_expr_pure t1 t2) eqn:?. InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. exists (Val.ror v1 (Vint n2)); split. EvalOp. destruct v1; simpl; auto. destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto. destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto. simpl. rewrite Int.or_commut. rewrite <- Int.or_ror; auto. + InvEval. exists (Val.or y x); split. EvalOp. + simpl. erewrite int_add_sub_eq; eauto. rewrite H0; rewrite H; auto. + rewrite Val.or_commut; auto. TrivialExists. (* default *) TrivialExists. diff --git a/ia32/standard/Conventions1.v b/ia32/standard/Conventions1.v index 49f5da9..cae20f6 100644 --- a/ia32/standard/Conventions1.v +++ b/ia32/standard/Conventions1.v @@ -21,46 +21,26 @@ Require Import Locations. (** Machine registers (type [mreg] in module [Locations]) are divided in the following groups: -- Temporaries used for spilling, reloading, and parallel move operations. -- Allocatable registers, that can be assigned to RTL pseudo-registers. - These are further divided into: --- Callee-save registers, whose value is preserved across a function call. --- Caller-save registers that can be modified during a function call. +- Callee-save registers, whose value is preserved across a function call. +- Caller-save registers that can be modified during a function call. We follow the x86-32 application binary interface (ABI) in our choice of callee- and caller-save registers. *) -Definition int_caller_save_regs := AX :: nil. +Definition int_caller_save_regs := AX :: CX :: DX :: nil. -Definition float_caller_save_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil. +Definition float_caller_save_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil. Definition int_callee_save_regs := BX :: SI :: DI :: BP :: nil. Definition float_callee_save_regs : list mreg := nil. -Definition destroyed_at_call_regs := - int_caller_save_regs ++ float_caller_save_regs. +Definition destroyed_at_call := + FP0 :: int_caller_save_regs ++ float_caller_save_regs. -Definition destroyed_at_call := List.map R destroyed_at_call_regs. - -Definition int_temporaries := IT1 :: IT2 :: nil. - -Definition float_temporaries := FT1 :: FT2 :: nil. - -(** [FP0] is not used for reloading, hence it is not in [float_temporaries], - however it is not allocatable, hence it is in [temporaries]. *) - -Definition temporary_regs := IT1 :: IT2 :: FT1 :: FT2 :: FP0 :: nil. - -Definition temporaries := List.map R temporary_regs. - -Definition destroyed_at_move_regs := FP0 :: nil. - -Definition destroyed_at_move := List.map R destroyed_at_move_regs. - -Definition dummy_int_reg := AX. (**r Used in [Coloring]. *) -Definition dummy_float_reg := X0. (**r Used in [Coloring]. *) +Definition dummy_int_reg := AX. (**r Used in [Regalloc]. *) +Definition dummy_float_reg := X0. (**r Used in [Regalloc]. *) (** The [index_int_callee_save] and [index_float_callee_save] associate a unique positive integer to callee-save registers. This integer is @@ -147,7 +127,7 @@ Proof. Qed. (** The following lemmas show that - (temporaries, destroyed at call, integer callee-save, float callee-save) + (destroyed at call, integer callee-save, float callee-save) is a partition of the set of machine registers. *) Lemma int_float_callee_save_disjoint: @@ -158,34 +138,26 @@ Qed. Lemma register_classification: forall r, - (In (R r) temporaries \/ In (R r) destroyed_at_call) \/ - (In r int_callee_save_regs \/ In r float_callee_save_regs). + In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs. Proof. destruct r; - try (left; left; simpl; OrEq); - try (left; right; simpl; OrEq); + try (left; simpl; OrEq); try (right; left; simpl; OrEq); try (right; right; simpl; OrEq). Qed. Lemma int_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r int_callee_save_regs). + In r destroyed_at_call -> In r int_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma float_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r float_callee_save_regs). + In r destroyed_at_call -> In r float_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma int_callee_save_type: @@ -244,13 +216,15 @@ Qed. registers [AX] or [FP0], depending on the type of the returned value. We treat a function without result as a function with one integer result. *) -Definition loc_result (s: signature) : mreg := +Definition loc_result (s: signature) : list mreg := match s.(sig_res) with - | None => AX - | Some Tint => AX - | Some Tfloat => FP0 + | None => AX :: nil + | Some Tint => AX :: nil + | Some Tfloat => FP0 :: nil + | Some Tlong => DX :: AX :: nil end. +(* (** The result location has the type stated in the signature. *) Lemma loc_result_type: @@ -263,17 +237,18 @@ Proof. destruct t; reflexivity. reflexivity. Qed. +*) -(** The result location is a caller-save register or a temporary *) +(** The result locations are caller-save registers *) Lemma loc_result_caller_save: - forall (s: signature), - In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries. + forall (s: signature) (r: mreg), + In r (loc_result s) -> In r destroyed_at_call. Proof. - intros; unfold loc_result. - destruct (sig_res s). - destruct t. left; simpl; OrEq. right; simpl; OrEq. - left; simpl; OrEq. + intros. + assert (r = AX \/ r = DX \/ r = FP0). + unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition. + destruct H0 as [A | [A | A]]; subst r; simpl; OrEq. Qed. (** ** Location of function arguments *) @@ -284,8 +259,9 @@ Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc := match tyl with | nil => nil - | Tint :: tys => S (Outgoing ofs Tint) :: loc_arguments_rec tys (ofs + 1) - | Tfloat :: tys => S (Outgoing ofs Tfloat) :: loc_arguments_rec tys (ofs + 2) + | Tint :: tys => S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 1) + | Tfloat :: tys => S Outgoing ofs Tfloat :: loc_arguments_rec tys (ofs + 2) + | Tlong :: tys => S Outgoing (ofs + 1) Tint :: S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 2) end. (** [loc_arguments s] returns the list of locations where to store arguments @@ -301,27 +277,19 @@ Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z := match tyl with | nil => ofs - | Tint :: tys => size_arguments_rec tys (ofs + 1) - | Tfloat :: tys => size_arguments_rec tys (ofs + 2) + | ty :: tys => size_arguments_rec tys (ofs + typesize ty) end. Definition size_arguments (s: signature) : Z := size_arguments_rec s.(sig_args) 0. -(** A tail-call is possible for a signature if the corresponding - arguments are all passed in registers. *) - -Definition tailcall_possible (s: signature) : Prop := - forall l, In l (loc_arguments s) -> - match l with R _ => True | S _ => False end. - -(** Argument locations are either non-temporary registers or [Outgoing] +(** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) Definition loc_argument_acceptable (l: loc) : Prop := match l with - | R r => ~(In l temporaries) - | S (Outgoing ofs ty) => ofs >= 0 + | R r => In r destroyed_at_call + | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong | _ => False end. @@ -329,62 +297,36 @@ Remark loc_arguments_rec_charact: forall tyl ofs l, In l (loc_arguments_rec tyl ofs) -> match l with - | S (Outgoing ofs' ty) => ofs' >= ofs + | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong | _ => False end. Proof. induction tyl; simpl loc_arguments_rec; intros. - elim H. - destruct a; simpl in H; destruct H. - subst l. omega. - generalize (IHtyl _ _ H). destruct l; auto. destruct s; auto. omega. - subst l. omega. - generalize (IHtyl _ _ H). destruct l; auto. destruct s; auto. omega. + destruct H. + destruct a. +- destruct H. subst l. split. omega. congruence. + exploit IHtyl; eauto. + destruct l; auto. destruct sl; auto. intuition omega. +- destruct H. subst l. split. omega. congruence. + exploit IHtyl; eauto. + destruct l; auto. destruct sl; auto. intuition omega. +- destruct H. subst l; split; [omega|congruence]. + destruct H. subst l; split; [omega|congruence]. + exploit IHtyl; eauto. + destruct l; auto. destruct sl; auto. intuition omega. Qed. Lemma loc_arguments_acceptable: - forall (s: signature) (r: loc), - In r (loc_arguments s) -> loc_argument_acceptable r. + forall (s: signature) (l: loc), + In l (loc_arguments s) -> loc_argument_acceptable l. Proof. unfold loc_arguments; intros. - generalize (loc_arguments_rec_charact _ _ _ H). - destruct r; tauto. -Qed. -Hint Resolve loc_arguments_acceptable: locs. - -(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *) - -Remark loc_arguments_rec_notin_local: - forall tyl ofs ofs0 ty0, - Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a; simpl; auto. -Qed. - -Remark loc_arguments_rec_notin_outgoing: - forall tyl ofs ofs0 ty0, - ofs0 + typesize ty0 <= ofs -> - Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - split. simpl. omega. apply IHtyl. omega. - split. simpl. omega. apply IHtyl. omega. + exploit loc_arguments_rec_charact; eauto. + unfold loc_argument_acceptable. + destruct l; tauto. Qed. -Lemma loc_arguments_norepet: - forall (s: signature), Loc.norepet (loc_arguments s). -Proof. - intros. unfold loc_arguments. generalize (sig_args s) 0. - induction l; simpl; intros. - constructor. - destruct a; constructor. - apply loc_arguments_rec_notin_outgoing. simpl; omega. auto. - apply loc_arguments_rec_notin_outgoing. simpl; omega. auto. -Qed. +Hint Resolve loc_arguments_acceptable: locs. (** The offsets of [Outgoing] arguments are below [size_arguments s]. *) @@ -393,9 +335,8 @@ Remark size_arguments_rec_above: Proof. induction tyl; simpl; intros. omega. - destruct a. - apply Zle_trans with (ofs0 + 1); auto; omega. - apply Zle_trans with (ofs0 + 2); auto; omega. + apply Zle_trans with (ofs0 + typesize a); auto. + generalize (typesize_pos a); omega. Qed. Lemma size_arguments_above: @@ -407,56 +348,20 @@ Qed. Lemma loc_arguments_bounded: forall (s: signature) (ofs: Z) (ty: typ), - In (S (Outgoing ofs ty)) (loc_arguments s) -> + In (S Outgoing ofs ty) (loc_arguments s) -> ofs + typesize ty <= size_arguments s. Proof. intros until ty. unfold loc_arguments, size_arguments. generalize (sig_args s) 0. induction l; simpl; intros. - elim H. - destruct a; simpl in H; destruct H. - inv H. apply size_arguments_rec_above. - auto. - inv H. apply size_arguments_rec_above. + destruct H. + destruct a. + destruct H. inv H. apply size_arguments_rec_above. auto. + destruct H. inv H. apply size_arguments_rec_above. auto. + destruct H. inv H. + simpl typesize. replace (z + 1 + 1) with (z + 2) by omega. apply size_arguments_rec_above. + destruct H. inv H. + simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above. auto. Qed. -(** Temporary registers do not overlap with argument locations. *) - -Lemma loc_arguments_not_temporaries: - forall sig, Loc.disjoint (loc_arguments sig) temporaries. -Proof. - intros; red; intros x1 x2 H. - generalize (loc_arguments_rec_charact _ _ _ H). - destruct x1. tauto. destruct s; intuition. - revert H1. simpl; ElimOrEq; auto. -Qed. -Hint Resolve loc_arguments_not_temporaries: locs. - -(** Argument registers are caller-save. *) -Lemma arguments_caller_save: - forall sig r, - In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call. -Proof. - unfold loc_arguments; intros. - elim (loc_arguments_rec_charact _ _ _ H); simpl. -Qed. - -(** Argument locations agree in number with the function signature. *) - -Lemma loc_arguments_length: - forall sig, - List.length (loc_arguments sig) = List.length sig.(sig_args). -Proof. - intros. unfold loc_arguments. generalize (sig_args sig) 0. - induction l; simpl; intros. auto. destruct a; simpl; decEq; auto. -Qed. - -(** Argument locations agree in types with the function signature. *) - -Lemma loc_arguments_type: - forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args). -Proof. - intros. unfold loc_arguments. generalize (sig_args sig) 0. - induction l; simpl; intros. auto. destruct a; simpl; decEq; auto. -Qed. diff --git a/ia32/standard/Stacklayout.v b/ia32/standard/Stacklayout.v index be854fd..f9d1daf 100644 --- a/ia32/standard/Stacklayout.v +++ b/ia32/standard/Stacklayout.v @@ -19,10 +19,9 @@ Require Import Bounds. from bottom (lowest offsets) to top: - Space for outgoing arguments to function calls. - Back link to parent frame -- Local stack slots of integer type. - Saved values of integer callee-save registers used by the function. -- Local stack slots of float type. - Saved values of float callee-save registers used by the function. +- Local stack slots. - Space for the stack-allocated data declared in Cminor - Return address. @@ -36,10 +35,9 @@ Record frame_env : Type := mk_frame_env { fe_size: Z; fe_ofs_link: Z; fe_ofs_retaddr: Z; - fe_ofs_int_local: Z; + fe_ofs_local: Z; fe_ofs_int_callee_save: Z; fe_num_int_callee_save: Z; - fe_ofs_float_local: Z; fe_ofs_float_callee_save: Z; fe_num_float_callee_save: Z; fe_stack_data: Z @@ -50,17 +48,16 @@ Record frame_env : Type := mk_frame_env { Definition make_env (b: bounds) := let olink := 4 * b.(bound_outgoing) in (* back link *) - let oil := olink + 4 in (* integer locals *) - let oics := oil + 4 * b.(bound_int_local) in (* integer callee-saves *) - let oendi := oics + 4 * b.(bound_int_callee_save) in - let ofl := align oendi 8 in (* float locals *) - let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *) - let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *) + let oics := olink + 4 in (* integer callee-saves *) + let ofcs := align (oics + 4 * b.(bound_int_callee_save)) 8 in (* float callee-saves *) + let ol := ofcs + 8 * b.(bound_float_callee_save) in (* locals *) + let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *) let oretaddr := align (ostkdata + b.(bound_stack_data)) 4 in (* return address *) let sz := oretaddr + 4 in (* total size *) mk_frame_env sz olink oretaddr - oil oics b.(bound_int_callee_save) - ofl ofcs b.(bound_float_callee_save) + ol + oics b.(bound_int_callee_save) + ofcs b.(bound_float_callee_save) ostkdata. (** Separation property *) @@ -70,25 +67,23 @@ Remark frame_env_separated: let fe := make_env b in 0 <= fe_ofs_arg /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_link) - /\ fe.(fe_ofs_link) + 4 <= fe.(fe_ofs_int_local) - /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_int_callee_save) - /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local) - /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save) - /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data) + /\ fe.(fe_ofs_link) + 4 <= fe.(fe_ofs_int_callee_save) + /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save) + /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_ofs_local) + /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_stack_data) /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_ofs_retaddr) /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size). Proof. intros. generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)). + generalize (align_le (fe.(fe_ofs_local) + 4 * b.(bound_local)) 8 (refl_equal _)). generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 4 (refl_equal _)). unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, - fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data, fe_ofs_arg. intros. - generalize (bound_int_local_pos b); intro; - generalize (bound_float_local_pos b); intro; + generalize (bound_local_pos b); intro; generalize (bound_int_callee_save_pos b); intro; generalize (bound_float_callee_save_pos b); intro; generalize (bound_outgoing_pos b); intro; @@ -102,38 +97,34 @@ Remark frame_env_aligned: forall b, let fe := make_env b in (4 | fe.(fe_ofs_link)) - /\ (4 | fe.(fe_ofs_int_local)) /\ (4 | fe.(fe_ofs_int_callee_save)) - /\ (8 | fe.(fe_ofs_float_local)) /\ (8 | fe.(fe_ofs_float_callee_save)) - /\ (4 | fe.(fe_ofs_retaddr)) + /\ (8 | fe.(fe_ofs_local)) /\ (8 | fe.(fe_stack_data)) + /\ (4 | fe.(fe_ofs_retaddr)) /\ (4 | fe.(fe_size)). Proof. intros. unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data. set (x1 := 4 * bound_outgoing b). assert (4 | x1). unfold x1; exists (bound_outgoing b); ring. set (x2 := x1 + 4). assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists 1; auto. - set (x3 := x2 + 4 * bound_int_local b). - assert (4 | x3). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring. - set (x4 := x3 + 4 * bound_int_callee_save b). - set (x5 := align x4 8). - assert (8 | x5). unfold x5. apply align_divides. omega. - set (x6 := x5 + 8 * bound_float_local b). - assert (8 | x6). unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring. - set (x7 := x6 + 8 * bound_float_callee_save b). - assert (8 | x7). - unfold x7. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. - set (x8 := align (x7 + bound_stack_data b) 4). - assert (4 | x8). apply align_divides. omega. - set (x9 := x8 + 4). - assert (4 | x9). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto. + set (x3 := x2 + 4 * bound_int_callee_save b). + set (x4 := align x3 8). + assert (8 | x4). unfold x4. apply align_divides. omega. + set (x5 := x4 + 8 * bound_float_callee_save b). + assert (8 | x5). unfold x5; apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. + set (x6 := align (x5 + 4 * bound_local b) 8). + assert (8 | x6). unfold x6; apply align_divides; omega. + set (x7 := align (x6 + bound_stack_data b) 4). + assert (4 | x7). unfold x7; apply align_divides; omega. + set (x8 := x7 + 4). + assert (4 | x8). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto. tauto. Qed. diff --git a/lib/Camlcoq.ml b/lib/Camlcoq.ml index d99e20f..e057771 100644 --- a/lib/Camlcoq.ml +++ b/lib/Camlcoq.ml @@ -252,6 +252,11 @@ let camlstring_of_coqstring (s: char list) = | c :: s -> r.[pos] <- c; fill (pos + 1) s in fill 0 s +let coqstring_of_camlstring s = + let rec cstring accu pos = + if pos < 0 then accu else cstring (s.[pos] :: accu) (pos - 1) + in cstring [] (String.length s - 1) + (* Floats *) let coqfloat_of_camlfloat f = diff --git a/lib/Coqlib.v b/lib/Coqlib.v index b936b9b..ce5d94e 100644 --- a/lib/Coqlib.v +++ b/lib/Coqlib.v @@ -559,6 +559,16 @@ Proof. Defined. Global Opaque Zdivide_dec. +Lemma Zdivide_interval: + forall a b c, + 0 < c -> 0 <= a < b -> (c | a) -> (c | b) -> 0 <= a <= b - c. +Proof. + intros. destruct H1 as [x EQ1]. destruct H2 as [y EQ2]. subst. destruct H0. + split. omega. exploit Zmult_lt_reg_r; eauto. intros. + replace (y * c - c) with ((y - 1) * c) by ring. + apply Zmult_le_compat_r; omega. +Qed. + (** Conversion from [Z] to [nat]. *) Definition nat_of_Z: Z -> nat := Z.to_nat. @@ -1250,6 +1260,17 @@ Proof. intros. unfold proj_sumbool. destruct a. auto. contradiction. Qed. +Ltac InvBooleans := + match goal with + | [ H: _ && _ = true |- _ ] => + destruct (andb_prop _ _ H); clear H; InvBooleans + | [ H: _ || _ = false |- _ ] => + destruct (orb_false_elim _ _ H); clear H; InvBooleans + | [ H: proj_sumbool ?x = true |- _ ] => + generalize (proj_sumbool_true _ H); clear H; intro; InvBooleans + | _ => idtac + end. + Section DECIDABLE_EQUALITY. Variable A: Type. diff --git a/lib/FSetAVLplus.v b/lib/FSetAVLplus.v new file mode 100644 index 0000000..5f5cc51 --- /dev/null +++ b/lib/FSetAVLplus.v @@ -0,0 +1,513 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** An extension of [FSetAVL] (finite sets as balanced binary search trees) + with extra interval-based operations, more efficient than standard + operations. *) + +Require Import FSetInterface. +Require FSetAVL. +Require Import Coqlib. + +Module Make(X: OrderedType). + +Include FSetAVL.Make(X). + +Module Raw := MSet.Raw. + +Section MEM_BETWEEN. + +(** [mem_between above below s] is [true] iff there exists an element of [s] + that belongs to the interval described by the predicates [above] and [below]. + Using the monotonicity of [above] and [below], the implementation of + [mem_between] avoids traversing the subtrees of [s] that + lie entirely outside the interval of interest. *) + +Variable above_low_bound: elt -> bool. +Variable below_high_bound: elt -> bool. + +Fixpoint raw_mem_between (m: Raw.tree) : bool := + match m with + | Raw.Leaf => false + | Raw.Node _ l x r => + if above_low_bound x + then if below_high_bound x + then true + else raw_mem_between l + else raw_mem_between r + end. + +Definition mem_between (m: t) : bool := raw_mem_between m.(MSet.this). + +Hypothesis above_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true. +Hypothesis below_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true. + +Lemma raw_mem_between_1: + forall m, + raw_mem_between m = true -> + exists x, Raw.In x m /\ above_low_bound x = true /\ below_high_bound x = true. +Proof. + induction m; simpl; intros. +- discriminate. +- destruct (above_low_bound t1) eqn: LB; [destruct (below_high_bound t1) eqn: HB | idtac]. + + (* in interval *) + exists t1; split; auto. apply Raw.IsRoot. auto. + + (* above interval *) + exploit IHm1; auto. intros [x' [A B]]. exists x'; split; auto. apply Raw.InLeft; auto. + + (* below interval *) + exploit IHm2; auto. intros [x' [A B]]. exists x'; split; auto. apply Raw.InRight; auto. +Qed. + +Lemma raw_mem_between_2: + forall x m, + Raw.bst m -> + Raw.In x m -> above_low_bound x = true -> below_high_bound x = true -> + raw_mem_between m = true. +Proof. + induction 1; simpl; intros. +- inv H. +- rewrite Raw.In_node_iff in H1. + destruct (above_low_bound x0) eqn: LB; [destruct (below_high_bound x0) eqn: HB | idtac]. + + (* in interval *) + auto. + + (* above interval *) + assert (X.eq x x0 \/ X.lt x0 x -> False). + { intros. exploit below_monotone; eauto. congruence. } + intuition. + + assert (X.eq x x0 \/ X.lt x x0 -> False). + { intros. exploit above_monotone; eauto. congruence. } + intuition. +Qed. + +Theorem mem_between_1: + forall s, + mem_between s = true -> + exists x, In x s /\ above_low_bound x = true /\ below_high_bound x = true. +Proof. + intros. apply raw_mem_between_1. auto. +Qed. + +Theorem mem_between_2: + forall x s, + In x s -> above_low_bound x = true -> below_high_bound x = true -> + mem_between s = true. +Proof. + unfold mem_between; intros. apply raw_mem_between_2 with x; auto. apply MSet.is_ok. +Qed. + +End MEM_BETWEEN. + +Section ELEMENTS_BETWEEN. + +(** [elements_between above below s] returns the set of elements of [s] + that belong to the interval [above,below]. *) + +Variable above_low_bound: elt -> bool. +Variable below_high_bound: elt -> bool. + +Fixpoint raw_elements_between (m: Raw.tree) : Raw.tree := + match m with + | Raw.Leaf => Raw.Leaf + | Raw.Node _ l x r => + if above_low_bound x then + if below_high_bound x then + Raw.join (raw_elements_between l) x (raw_elements_between r) + else + raw_elements_between l + else + raw_elements_between r + end. + +Remark In_raw_elements_between_1: + forall x m, + Raw.In x (raw_elements_between m) -> Raw.In x m. +Proof. + induction m; simpl; intros. +- inv H. +- rewrite Raw.In_node_iff. + destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H. + + rewrite Raw.join_spec in H. intuition. + + left; apply IHm1; auto. + + right; right; apply IHm2; auto. +Qed. + +Lemma raw_elements_between_ok: + forall m, Raw.bst m -> Raw.bst (raw_elements_between m). +Proof. + induction 1; simpl. +- constructor. +- destruct (above_low_bound x) eqn:LB; [destruct (below_high_bound x) eqn: RB | idtac]; simpl. + + apply Raw.join_ok; auto. + red; intros. apply l0. apply In_raw_elements_between_1; auto. + red; intros. apply g. apply In_raw_elements_between_1; auto. + + auto. + + auto. +Qed. + +Definition elements_between (s: t) : t := + @MSet.Mkt (raw_elements_between s.(MSet.this)) (raw_elements_between_ok s.(MSet.this) s.(MSet.is_ok)). + +Hypothesis above_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true. +Hypothesis below_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true. + +Remark In_raw_elements_between_2: + forall x m, + Raw.In x (raw_elements_between m) -> above_low_bound x = true /\ below_high_bound x = true. +Proof. + induction m; simpl; intros. +- inv H. +- destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H. + + rewrite Raw.join_spec in H. intuition. + apply above_monotone with t1; auto. + apply below_monotone with t1; auto. + + auto. + + auto. +Qed. + +Remark In_raw_elements_between_3: + forall x m, + Raw.bst m -> + Raw.In x m -> above_low_bound x = true -> below_high_bound x = true -> + Raw.In x (raw_elements_between m). +Proof. + induction 1; simpl; intros. +- auto. +- rewrite Raw.In_node_iff in H1. + destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]. + + rewrite Raw.join_spec. intuition. + + assert (X.eq x x0 \/ X.lt x0 x -> False). + { intros. exploit below_monotone; eauto. congruence. } + intuition. elim H7. apply g. auto. + + assert (X.eq x x0 \/ X.lt x x0 -> False). + { intros. exploit above_monotone; eauto. congruence. } + intuition. elim H7. apply l0. auto. +Qed. + +Theorem elements_between_iff: + forall x s, + In x (elements_between s) <-> In x s /\ above_low_bound x = true /\ below_high_bound x = true. +Proof. + intros. unfold elements_between, In; simpl. split. + intros. split. apply In_raw_elements_between_1; auto. eapply In_raw_elements_between_2; eauto. + intros [A [B C]]. apply In_raw_elements_between_3; auto. apply MSet.is_ok. +Qed. + +End ELEMENTS_BETWEEN. + +Section FOR_ALL_BETWEEN. + +(** [for_all_between pred above below s] is [true] iff all elements of [s] + in a given variation interval satisfy predicate [pred]. + The variation interval is given by two predicates [above] and [below] + which must both hold if the element is part of the interval. + Using the monotonicity of [above] and [below], the implementation of + [for_all_between] avoids traversing the subtrees of [s] that + lie entirely outside the interval of interest. *) + +Variable pred: elt -> bool. +Variable above_low_bound: elt -> bool. +Variable below_high_bound: elt -> bool. + +Fixpoint raw_for_all_between (m: Raw.tree) : bool := + match m with + | Raw.Leaf => true + | Raw.Node _ l x r => + if above_low_bound x + then if below_high_bound x + then raw_for_all_between l && pred x && raw_for_all_between r + else raw_for_all_between l + else raw_for_all_between r + end. + +Definition for_all_between (m: t) : bool := raw_for_all_between m.(MSet.this). + +Hypothesis pred_compat: + forall x1 x2, X.eq x1 x2 -> pred x1 = pred x2. +Hypothesis above_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true. +Hypothesis below_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true. + +Lemma raw_for_all_between_1: + forall x m, + Raw.bst m -> + raw_for_all_between m = true -> + Raw.In x m -> + above_low_bound x = true -> + below_high_bound x = true -> + pred x = true. +Proof. + induction 1; simpl; intros. +- inv H0. +- destruct (above_low_bound x0) eqn: LB; [destruct (below_high_bound x0) eqn: HB | idtac]. + + (* in interval *) + destruct (andb_prop _ _ H1) as [P C]. destruct (andb_prop _ _ P) as [A B]. clear H1 P. + inv H2. + * erewrite pred_compat; eauto. + * apply IHbst1; auto. + * apply IHbst2; auto. + + (* above interval *) + inv H2. + * assert (below_high_bound x0 = true) by (apply below_monotone with x; auto). + congruence. + * apply IHbst1; auto. + * assert (below_high_bound x0 = true) by (apply below_monotone with x; auto). + congruence. + + (* below interval *) + inv H2. + * assert (above_low_bound x0 = true) by (apply above_monotone with x; auto). + congruence. + * assert (above_low_bound x0 = true) by (apply above_monotone with x; auto). + congruence. + * apply IHbst2; auto. +Qed. + +Lemma raw_for_all_between_2: + forall m, + Raw.bst m -> + (forall x, Raw.In x m -> above_low_bound x = true -> below_high_bound x = true -> pred x = true) -> + raw_for_all_between m = true. +Proof. + induction 1; intros; simpl. +- auto. +- destruct (above_low_bound x) eqn: LB; [destruct (below_high_bound x) eqn: HB | idtac]. + + (* in interval *) + rewrite IHbst1. rewrite (H1 x). rewrite IHbst2. auto. + intros. apply H1; auto. apply Raw.InRight; auto. + apply Raw.IsRoot. reflexivity. auto. auto. + intros. apply H1; auto. apply Raw.InLeft; auto. + + (* above interval *) + apply IHbst1. intros. apply H1; auto. apply Raw.InLeft; auto. + + (* below interval *) + apply IHbst2. intros. apply H1; auto. apply Raw.InRight; auto. +Qed. + +Theorem for_all_between_iff: + forall s, + for_all_between s = true <-> (forall x, In x s -> above_low_bound x = true -> below_high_bound x = true -> pred x = true). +Proof. + unfold for_all_between; intros; split; intros. +- eapply raw_for_all_between_1; eauto. apply MSet.is_ok. +- apply raw_for_all_between_2; auto. apply MSet.is_ok. +Qed. + +End FOR_ALL_BETWEEN. + +Section PARTITION_BETWEEN. + +Variable above_low_bound: elt -> bool. +Variable below_high_bound: elt -> bool. + +Fixpoint raw_partition_between (m: Raw.tree) : Raw.tree * Raw.tree := + match m with + | Raw.Leaf => (Raw.Leaf, Raw.Leaf) + | Raw.Node _ l x r => + if above_low_bound x then + if below_high_bound x then + (let (l1, l2) := raw_partition_between l in + let (r1, r2) := raw_partition_between r in + (Raw.join l1 x r1, Raw.concat l2 r2)) + else + (let (l1, l2) := raw_partition_between l in + (l1, Raw.join l2 x r)) + else + (let (r1, r2) := raw_partition_between r in + (r1, Raw.join l x r2)) + end. + +Remark In_raw_partition_between_1: + forall x m, + Raw.In x (fst (raw_partition_between m)) -> Raw.In x m. +Proof. + induction m; simpl; intros. +- inv H. +- destruct (raw_partition_between m1) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between m2) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H. + + rewrite Raw.join_spec in H. rewrite Raw.In_node_iff. intuition. + + rewrite Raw.In_node_iff. intuition. + + rewrite Raw.In_node_iff. intuition. +Qed. + +Remark In_raw_partition_between_2: + forall x m, + Raw.In x (snd (raw_partition_between m)) -> Raw.In x m. +Proof. + induction m; simpl; intros. +- inv H. +- destruct (raw_partition_between m1) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between m2) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H. + + rewrite Raw.concat_spec in H. rewrite Raw.In_node_iff. intuition. + + rewrite Raw.join_spec in H. rewrite Raw.In_node_iff. intuition. + + rewrite Raw.join_spec in H. rewrite Raw.In_node_iff. intuition. +Qed. + +Lemma raw_partition_between_ok: + forall m, Raw.bst m -> Raw.bst (fst (raw_partition_between m)) /\ Raw.bst (snd (raw_partition_between m)). +Proof. + induction 1; simpl. +- split; constructor. +- destruct IHbst1 as [L1 L2]. destruct IHbst2 as [R1 R2]. + destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound x) eqn:LB; [destruct (below_high_bound x) eqn: RB | idtac]; simpl. + + split. + apply Raw.join_ok; auto. + red; intros. apply l0. apply In_raw_partition_between_1. rewrite LEQ; auto. + red; intros. apply g. apply In_raw_partition_between_1. rewrite REQ; auto. + apply Raw.concat_ok; auto. + intros. transitivity x. + apply l0. apply In_raw_partition_between_2. rewrite LEQ; auto. + apply g. apply In_raw_partition_between_2. rewrite REQ; auto. + + split. + auto. + apply Raw.join_ok; auto. + red; intros. apply l0. apply In_raw_partition_between_2. rewrite LEQ; auto. + + split. + auto. + apply Raw.join_ok; auto. + red; intros. apply g. apply In_raw_partition_between_2. rewrite REQ; auto. +Qed. + +Hypothesis above_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x1 x2 -> above_low_bound x1 = true -> above_low_bound x2 = true. +Hypothesis below_monotone: + forall x1 x2, X.eq x1 x2 \/ X.lt x2 x1 -> below_high_bound x1 = true -> below_high_bound x2 = true. + +Remark In_raw_partition_between_3: + forall x m, + Raw.In x (fst (raw_partition_between m)) -> above_low_bound x = true /\ below_high_bound x = true. +Proof. + induction m; simpl; intros. +- inv H. +- destruct (raw_partition_between m1) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between m2) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound t1) eqn:LB; [destruct (below_high_bound t1) eqn: RB | idtac]; simpl in H. + + rewrite Raw.join_spec in H. intuition. + apply above_monotone with t1; auto. + apply below_monotone with t1; auto. + + auto. + + auto. +Qed. + +Remark In_raw_partition_between_4: + forall x m, + Raw.bst m -> + Raw.In x (snd (raw_partition_between m)) -> above_low_bound x = false \/ below_high_bound x = false. +Proof. + induction 1; simpl; intros. +- inv H. +- destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]; simpl in H. + + simpl in H1. rewrite Raw.concat_spec in H1. intuition. + + assert (forall y, X.eq y x0 \/ X.lt x0 y -> below_high_bound y = false). + { intros. destruct (below_high_bound y) eqn:E; auto. + assert (below_high_bound x0 = true) by (apply below_monotone with y; auto). + congruence. } + simpl in H1. rewrite Raw.join_spec in H1. intuition. + + assert (forall y, X.eq y x0 \/ X.lt y x0 -> above_low_bound y = false). + { intros. destruct (above_low_bound y) eqn:E; auto. + assert (above_low_bound x0 = true) by (apply above_monotone with y; auto). + congruence. } + simpl in H1. rewrite Raw.join_spec in H1. intuition. +Qed. + +Remark In_raw_partition_between_5: + forall x m, + Raw.bst m -> + Raw.In x m -> above_low_bound x = true -> below_high_bound x = true -> + Raw.In x (fst (raw_partition_between m)). +Proof. + induction 1; simpl; intros. +- inv H. +- destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]; simpl in H. + + simpl. rewrite Raw.join_spec. inv H1. + auto. + right; left; apply IHbst1; auto. + right; right; apply IHbst2; auto. + + simpl. inv H1. + assert (below_high_bound x0 = true) by (apply below_monotone with x; auto). + congruence. + auto. + assert (below_high_bound x0 = true) by (apply below_monotone with x; auto). + congruence. + + simpl. inv H1. + assert (above_low_bound x0 = true) by (apply above_monotone with x; auto). + congruence. + assert (above_low_bound x0 = true) by (apply above_monotone with x; auto). + congruence. + eauto. +Qed. + +Remark In_raw_partition_between_6: + forall x m, + Raw.bst m -> + Raw.In x m -> above_low_bound x = false \/ below_high_bound x = false -> + Raw.In x (snd (raw_partition_between m)). +Proof. + induction 1; simpl; intros. +- inv H. +- destruct (raw_partition_between l) as [l1 l2] eqn:LEQ; simpl in *. + destruct (raw_partition_between r) as [r1 r2] eqn:REQ; simpl in *. + destruct (above_low_bound x0) eqn:LB; [destruct (below_high_bound x0) eqn: RB | idtac]; simpl in H. + + simpl. rewrite Raw.concat_spec. inv H1. + assert (below_high_bound x = true) by (apply below_monotone with x0; auto; left; symmetry; auto). + assert (above_low_bound x = true) by (apply above_monotone with x0; auto; left; symmetry; auto). + destruct H2; congruence. + left; apply IHbst1; auto. + right; apply IHbst2; auto. + + simpl. rewrite Raw.join_spec. inv H1. + auto. + right; left; apply IHbst1; auto. + auto. + + simpl. rewrite Raw.join_spec. inv H1. + auto. + auto. + right; right; apply IHbst2; auto. +Qed. + +Definition partition_between (s: t) : t * t := + let p := raw_partition_between s.(MSet.this) in + (@MSet.Mkt (fst p) (proj1 (raw_partition_between_ok s.(MSet.this) s.(MSet.is_ok))), + @MSet.Mkt (snd p) (proj2 (raw_partition_between_ok s.(MSet.this) s.(MSet.is_ok)))). + +Theorem partition_between_iff_1: + forall x s, + In x (fst (partition_between s)) <-> + In x s /\ above_low_bound x = true /\ below_high_bound x = true. +Proof. + intros. unfold partition_between, In; simpl. split. + intros. split. apply In_raw_partition_between_1; auto. eapply In_raw_partition_between_3; eauto. + intros [A [B C]]. apply In_raw_partition_between_5; auto. apply MSet.is_ok. +Qed. + +Theorem partition_between_iff_2: + forall x s, + In x (snd (partition_between s)) <-> + In x s /\ (above_low_bound x = false \/ below_high_bound x = false). +Proof. + intros. unfold partition_between, In; simpl. split. + intros. split. apply In_raw_partition_between_2; auto. eapply In_raw_partition_between_4; eauto. apply MSet.is_ok. + intros [A B]. apply In_raw_partition_between_6; auto. apply MSet.is_ok. +Qed. + +End PARTITION_BETWEEN. + +End Make. diff --git a/lib/Floats.v b/lib/Floats.v index eb86027..02ff25c 100644 --- a/lib/Floats.v +++ b/lib/Floats.v @@ -121,6 +121,26 @@ Definition intuoffloat (f:float): option int := (**r conversion to unsigned 32-b | None => None end. +Definition longoffloat (f:float): option int64 := (**r conversion to signed 64-bit int *) + match Zoffloat f with + | Some n => + if Zle_bool Int64.min_signed n && Zle_bool n Int64.max_signed then + Some (Int64.repr n) + else + None + | None => None + end. + +Definition longuoffloat (f:float): option int64 := (**r conversion to unsigned 64-bit int *) + match Zoffloat f with + | Some n => + if Zle_bool 0 n && Zle_bool n Int64.max_unsigned then + Some (Int64.repr n) + else + None + | None => None + end. + (* Functions used to parse floats *) Program Definition build_from_parsed (prec:Z) (emax:Z) (prec_gt_0 :Prec_gt_0 prec) (Hmax:prec < emax) @@ -154,6 +174,11 @@ Definition floatofint (n:int): float := (**r conversion from signed 32-bit int * binary_normalize64 (Int.signed n) 0 false. Definition floatofintu (n:int): float:= (**r conversion from unsigned 32-bit int *) binary_normalize64 (Int.unsigned n) 0 false. +Definition floatoflong (n:int64): float := (**r conversion from signed 64-bit int *) + binary_normalize64 (Int64.signed n) 0 false. +Definition floatoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int *) + binary_normalize64 (Int64.unsigned n) 0 false. + Definition add: float -> float -> float := b64_plus mode_NE. (**r addition *) Definition sub: float -> float -> float := b64_minus mode_NE. (**r subtraction *) Definition mul: float -> float -> float := b64_mult mode_NE. (**r multiplication *) @@ -217,24 +242,7 @@ Definition double_of_bits (b: int64): float := b64_of_bits (Int64.unsigned b). Definition bits_of_single (f: float) : int := Int.repr (bits_of_b32 (binary32offloat f)). Definition single_of_bits (b: int): float := floatofbinary32 (b32_of_bits (Int.unsigned b)). -Definition from_words (hi lo: int) : float := - double_of_bits - (Int64.or (Int64.shl (Int64.repr (Int.unsigned hi)) (Int64.repr 32)) - (Int64.repr (Int.unsigned lo))). - -Lemma from_words_eq: - forall lo hi, - from_words hi lo = - double_of_bits (Int64.repr (Int.unsigned hi * two_p 32 + Int.unsigned lo)). -Proof. - intros. unfold from_words. decEq. - rewrite Int64.shifted_or_is_add. - apply Int64.eqm_samerepr. auto with ints. - change Int64.zwordsize with 64. omega. - generalize (Int.unsigned_range lo). intros [A B]. - rewrite Int64.unsigned_repr. assumption. - assert (Int.modulus < Int64.max_unsigned). compute; auto. omega. -Qed. +Definition from_words (hi lo: int) : float := double_of_bits (Int64.ofwords hi lo). (** Below are the only properties of floating-point arithmetic that we rely on in the compiler proof. *) @@ -747,35 +755,11 @@ Definition ox4330_0000 := Int.repr 1127219200. (**r [0x4330_0000] *) Lemma split_bits_or: forall x, - split_bits 52 11 - (Int64.unsigned - (Int64.or - (Int64.shl (Int64.repr (Int.unsigned ox4330_0000)) (Int64.repr 32)) - (Int64.repr (Int.unsigned x)))) = (false, Int.unsigned x, 1075). + split_bits 52 11 (Int64.unsigned (Int64.ofwords ox4330_0000 x)) = (false, Int.unsigned x, 1075). Proof. intros. transitivity (split_bits 52 11 (join_bits 52 11 false (Int.unsigned x) 1075)). - - f_equal. - assert (Int64.unsigned (Int64.repr (Int.unsigned x)) = Int.unsigned x). - apply Int64.unsigned_repr. - generalize (Int.unsigned_range x). - compute_this (Int.modulus). - compute_this (Int64.max_unsigned). - omega. - rewrite Int64.shifted_or_is_add. - unfold join_bits. - rewrite H. - apply Int64.unsigned_repr. - generalize (Int.unsigned_range x). - compute_this ((0 + 1075) * 2 ^ 52). - compute_this (Int.modulus). - compute_this (Int64.max_unsigned). - omega. - compute_this Int64.zwordsize. omega. - rewrite H. - generalize (Int.unsigned_range x). - change (two_p 32) with Int.modulus. - omega. + - f_equal. rewrite Int64.ofwords_add'. reflexivity. - apply split_join_bits. compute; auto. generalize (Int.unsigned_range x). diff --git a/lib/Integers.v b/lib/Integers.v index af9decd..84b82bf 100644 --- a/lib/Integers.v +++ b/lib/Integers.v @@ -16,8 +16,8 @@ (** Formalizations of machine integers modulo $2^N$ #2<sup>N</sup>#. *) Require Import Eqdep_dec. +Require Import Zquot. Require Import Zwf. -Require Recdef. Require Import Coqlib. (** * Comparisons *) @@ -1061,36 +1061,7 @@ Proof. apply eqm_mult; apply eqm_sym; apply eqm_signed_unsigned. Qed. -(** ** Properties of division and Lemma Ztestbit_same_equal: - forall x y, - (forall i, 0 <= i -> Z.testbit x i = Z.testbit y i) -> - x = y. -Proof. - assert (forall x, 0 <= x -> - forall y, - (forall i, 0 <= i -> Z.testbit x i = Z.testbit y i) -> - x = y). - { - intros x0 POS0; pattern x0; apply Zdiv2_pos_ind; auto. - - intros. symmetry. apply Ztestbit_is_zero. - intros. rewrite <- H; auto. apply Z.testbit_0_l. - - intros. rewrite (Zdecomp x); rewrite (Zdecomp y). f_equal. - + exploit (H0 0). omega. rewrite !(Ztestbit_eq 0). - rewrite !zeq_true. auto. omega. omega. - + intros. apply H; intros. - exploit (H0 (Zsucc i)). omega. rewrite !(Ztestbit_eq (Z.succ i)). - rewrite !zeq_false. rewrite !Z.pred_succ. auto. - omega. omega. omega. omega. - } - intros. destruct (zle 0 x). - - apply H; auto. - - assert (-x-1 = -y-1). - { apply H. omega. intros. rewrite !Z_one_complement; auto. - rewrite H0; auto. - } - omega. -Qed. -modulus *) +(** ** Properties of division and modulus *) Lemma modu_divu_Euclid: forall x y, y <> zero -> x = add (mul (divu x y) y) (modu x y). @@ -1155,8 +1126,6 @@ Proof. apply one_not_zero. Qed. -Require Import Zquot. - Theorem divs_mone: forall x, divs x mone = neg x. Proof. @@ -3707,7 +3676,434 @@ Module Wordsize_64. Proof. unfold wordsize; congruence. Qed. End Wordsize_64. -Module Int64 := Make(Wordsize_64). +Module Int64. + +Include Make(Wordsize_64). + +(** Shifts with amount given as a 32-bit integer *) + +Definition iwordsize': Int.int := Int.repr zwordsize. + +Definition shl' (x: int) (y: Int.int): int := + repr (Z.shiftl (unsigned x) (Int.unsigned y)). +Definition shru' (x: int) (y: Int.int): int := + repr (Z.shiftr (unsigned x) (Int.unsigned y)). +Definition shr' (x: int) (y: Int.int): int := + repr (Z.shiftr (signed x) (Int.unsigned y)). + +Lemma bits_shl': + forall x y i, + 0 <= i < zwordsize -> + testbit (shl' x y) i = + if zlt i (Int.unsigned y) then false else testbit x (i - Int.unsigned y). +Proof. + intros. unfold shl'. rewrite testbit_repr; auto. + destruct (zlt i (Int.unsigned y)). + apply Z.shiftl_spec_low. auto. + apply Z.shiftl_spec_high. omega. omega. +Qed. + +Lemma bits_shru': + forall x y i, + 0 <= i < zwordsize -> + testbit (shru' x y) i = + if zlt (i + Int.unsigned y) zwordsize then testbit x (i + Int.unsigned y) else false. +Proof. + intros. unfold shru'. rewrite testbit_repr; auto. + rewrite Z.shiftr_spec. fold (testbit x (i + Int.unsigned y)). + destruct (zlt (i + Int.unsigned y) zwordsize). + auto. + apply bits_above; auto. + omega. +Qed. + +Lemma bits_shr': + forall x y i, + 0 <= i < zwordsize -> + testbit (shr' x y) i = + testbit x (if zlt (i + Int.unsigned y) zwordsize then i + Int.unsigned y else zwordsize - 1). +Proof. + intros. unfold shr'. rewrite testbit_repr; auto. + rewrite Z.shiftr_spec. apply bits_signed. + generalize (Int.unsigned_range y); omega. + omega. +Qed. + +(** Decomposing 64-bit ints as pairs of 32-bit ints *) + +Definition loword (n: int) : Int.int := Int.repr (unsigned n). + +Definition hiword (n: int) : Int.int := Int.repr (unsigned (shru n (repr Int.zwordsize))). + +Definition ofwords (hi lo: Int.int) : int := + or (shl (repr (Int.unsigned hi)) (repr Int.zwordsize)) (repr (Int.unsigned lo)). + +Lemma bits_loword: + forall n i, 0 <= i < Int.zwordsize -> Int.testbit (loword n) i = testbit n i. +Proof. + intros. unfold loword. rewrite Int.testbit_repr; auto. +Qed. + +Lemma bits_hiword: + forall n i, 0 <= i < Int.zwordsize -> Int.testbit (hiword n) i = testbit n (i + Int.zwordsize). +Proof. + intros. unfold hiword. rewrite Int.testbit_repr; auto. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + fold (testbit (shru n (repr Int.zwordsize)) i). rewrite bits_shru. + change (unsigned (repr Int.zwordsize)) with Int.zwordsize. + apply zlt_true. omega. omega. +Qed. + +Lemma bits_ofwords: + forall hi lo i, 0 <= i < zwordsize -> + testbit (ofwords hi lo) i = + if zlt i Int.zwordsize then Int.testbit lo i else Int.testbit hi (i - Int.zwordsize). +Proof. + intros. unfold ofwords. rewrite bits_or; auto. rewrite bits_shl; auto. + change (unsigned (repr Int.zwordsize)) with Int.zwordsize. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + destruct (zlt i Int.zwordsize). + rewrite testbit_repr; auto. + rewrite !testbit_repr; auto. + fold (Int.testbit lo i). rewrite Int.bits_above. apply orb_false_r. auto. + omega. +Qed. + +Lemma lo_ofwords: + forall hi lo, loword (ofwords hi lo) = lo. +Proof. + intros. apply Int.same_bits_eq; intros. + rewrite bits_loword; auto. rewrite bits_ofwords. apply zlt_true. omega. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. omega. +Qed. + +Lemma hi_ofwords: + forall hi lo, hiword (ofwords hi lo) = hi. +Proof. + intros. apply Int.same_bits_eq; intros. + rewrite bits_hiword; auto. rewrite bits_ofwords. + rewrite zlt_false. f_equal. omega. omega. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. omega. +Qed. + +Lemma ofwords_recompose: + forall n, ofwords (hiword n) (loword n) = n. +Proof. + intros. apply same_bits_eq; intros. rewrite bits_ofwords; auto. + destruct (zlt i Int.zwordsize). + apply bits_loword. omega. + rewrite bits_hiword. f_equal. omega. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. omega. +Qed. + +Lemma ofwords_add: + forall lo hi, ofwords hi lo = repr (Int.unsigned hi * two_p 32 + Int.unsigned lo). +Proof. + intros. unfold ofwords. rewrite shifted_or_is_add. + apply eqm_samerepr. apply eqm_add. apply eqm_mult. + apply eqm_sym; apply eqm_unsigned_repr. + apply eqm_refl. + apply eqm_sym; apply eqm_unsigned_repr. + change Int.zwordsize with 32; change zwordsize with 64; omega. + rewrite unsigned_repr. generalize (Int.unsigned_range lo). intros [A B]. exact B. + assert (Int.max_unsigned < max_unsigned) by (compute; auto). + generalize (Int.unsigned_range_2 lo); omega. +Qed. + +Lemma ofwords_add': + forall lo hi, unsigned (ofwords hi lo) = Int.unsigned hi * two_p 32 + Int.unsigned lo. +Proof. + intros. rewrite ofwords_add. apply unsigned_repr. + generalize (Int.unsigned_range hi) (Int.unsigned_range lo). + change (two_p 32) with Int.modulus. + change Int.modulus with 4294967296. + change max_unsigned with 18446744073709551615. + omega. +Qed. + +(** Expressing 64-bit operations in terms of 32-bit operations *) + +Lemma decompose_bitwise_binop: + forall f f64 f32 xh xl yh yl, + (forall x y i, 0 <= i < zwordsize -> testbit (f64 x y) i = f (testbit x i) (testbit y i)) -> + (forall x y i, 0 <= i < Int.zwordsize -> Int.testbit (f32 x y) i = f (Int.testbit x i) (Int.testbit y i)) -> + f64 (ofwords xh xl) (ofwords yh yl) = ofwords (f32 xh yh) (f32 xl yl). +Proof. + intros. apply Int64.same_bits_eq; intros. + rewrite H by auto. rewrite ! bits_ofwords by auto. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + destruct (zlt i Int.zwordsize); rewrite H0 by omega; auto. +Qed. + +Lemma decompose_and: + forall xh xl yh yl, + and (ofwords xh xl) (ofwords yh yl) = ofwords (Int.and xh yh) (Int.and xl yl). +Proof. + intros. apply decompose_bitwise_binop with andb. + apply bits_and. apply Int.bits_and. +Qed. + +Lemma decompose_or: + forall xh xl yh yl, + or (ofwords xh xl) (ofwords yh yl) = ofwords (Int.or xh yh) (Int.or xl yl). +Proof. + intros. apply decompose_bitwise_binop with orb. + apply bits_or. apply Int.bits_or. +Qed. + +Lemma decompose_xor: + forall xh xl yh yl, + xor (ofwords xh xl) (ofwords yh yl) = ofwords (Int.xor xh yh) (Int.xor xl yl). +Proof. + intros. apply decompose_bitwise_binop with xorb. + apply bits_xor. apply Int.bits_xor. +Qed. + +Lemma decompose_not: + forall xh xl, + not (ofwords xh xl) = ofwords (Int.not xh) (Int.not xl). +Proof. + intros. unfold not, Int.not. rewrite <- decompose_xor. f_equal. + apply (Int64.eq_spec mone (ofwords Int.mone Int.mone)). +Qed. + +Lemma decompose_shl_1: + forall xh xl y, + 0 <= Int.unsigned y < Int.zwordsize -> + shl' (ofwords xh xl) y = + ofwords (Int.or (Int.shl xh y) (Int.shru xl (Int.sub Int.iwordsize y))) + (Int.shl xl y). +Proof. + intros. + assert (Int.unsigned (Int.sub Int.iwordsize y) = Int.zwordsize - Int.unsigned y). + { unfold Int.sub. rewrite Int.unsigned_repr. auto. + rewrite Int.unsigned_repr_wordsize. generalize Int.wordsize_max_unsigned; omega. } + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + apply Int64.same_bits_eq; intros. + rewrite bits_shl' by auto. symmetry. rewrite bits_ofwords by auto. + destruct (zlt i Int.zwordsize). rewrite Int.bits_shl by omega. + destruct (zlt i (Int.unsigned y)). auto. + rewrite bits_ofwords by omega. rewrite zlt_true by omega. auto. + rewrite zlt_false by omega. rewrite bits_ofwords by omega. + rewrite Int.bits_or by omega. rewrite Int.bits_shl by omega. + rewrite Int.bits_shru by omega. rewrite H0. + destruct (zlt (i - Int.unsigned y) (Int.zwordsize)). + rewrite zlt_true by omega. rewrite zlt_true by omega. + rewrite orb_false_l. f_equal. omega. + rewrite zlt_false by omega. rewrite zlt_false by omega. + rewrite orb_false_r. f_equal. omega. +Qed. + +Lemma decompose_shl_2: + forall xh xl y, + Int.zwordsize <= Int.unsigned y < zwordsize -> + shl' (ofwords xh xl) y = + ofwords (Int.shl xl (Int.sub y Int.iwordsize)) Int.zero. +Proof. + intros. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + assert (Int.unsigned (Int.sub y Int.iwordsize) = Int.unsigned y - Int.zwordsize). + { unfold Int.sub. rewrite Int.unsigned_repr. auto. + rewrite Int.unsigned_repr_wordsize. generalize (Int.unsigned_range_2 y). omega. } + apply Int64.same_bits_eq; intros. + rewrite bits_shl' by auto. symmetry. rewrite bits_ofwords by auto. + destruct (zlt i Int.zwordsize). rewrite zlt_true by omega. apply Int.bits_zero. + rewrite Int.bits_shl by omega. + destruct (zlt i (Int.unsigned y)). + rewrite zlt_true by omega. auto. + rewrite zlt_false by omega. + rewrite bits_ofwords by omega. rewrite zlt_true by omega. f_equal. omega. +Qed. + +Lemma decompose_shru_1: + forall xh xl y, + 0 <= Int.unsigned y < Int.zwordsize -> + shru' (ofwords xh xl) y = + ofwords (Int.shru xh y) + (Int.or (Int.shru xl y) (Int.shl xh (Int.sub Int.iwordsize y))). +Proof. + intros. + assert (Int.unsigned (Int.sub Int.iwordsize y) = Int.zwordsize - Int.unsigned y). + { unfold Int.sub. rewrite Int.unsigned_repr. auto. + rewrite Int.unsigned_repr_wordsize. generalize Int.wordsize_max_unsigned; omega. } + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + apply Int64.same_bits_eq; intros. + rewrite bits_shru' by auto. symmetry. rewrite bits_ofwords by auto. + destruct (zlt i Int.zwordsize). + rewrite zlt_true by omega. + rewrite bits_ofwords by omega. + rewrite Int.bits_or by omega. rewrite Int.bits_shl by omega. + rewrite Int.bits_shru by omega. rewrite H0. + destruct (zlt (i + Int.unsigned y) (Int.zwordsize)). + rewrite zlt_true by omega. + rewrite orb_false_r. auto. + rewrite zlt_false by omega. + rewrite orb_false_l. f_equal. omega. + rewrite Int.bits_shru by omega. + destruct (zlt (i + Int.unsigned y) zwordsize). + rewrite bits_ofwords by omega. + rewrite zlt_true by omega. rewrite zlt_false by omega. f_equal. omega. + rewrite zlt_false by omega. auto. +Qed. + +Lemma decompose_shru_2: + forall xh xl y, + Int.zwordsize <= Int.unsigned y < zwordsize -> + shru' (ofwords xh xl) y = + ofwords Int.zero (Int.shru xh (Int.sub y Int.iwordsize)). +Proof. + intros. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + assert (Int.unsigned (Int.sub y Int.iwordsize) = Int.unsigned y - Int.zwordsize). + { unfold Int.sub. rewrite Int.unsigned_repr. auto. + rewrite Int.unsigned_repr_wordsize. generalize (Int.unsigned_range_2 y). omega. } + apply Int64.same_bits_eq; intros. + rewrite bits_shru' by auto. symmetry. rewrite bits_ofwords by auto. + destruct (zlt i Int.zwordsize). + rewrite Int.bits_shru by omega. rewrite H1. + destruct (zlt (i + Int.unsigned y) zwordsize). + rewrite zlt_true by omega. rewrite bits_ofwords by omega. + rewrite zlt_false by omega. f_equal; omega. + rewrite zlt_false by omega. auto. + rewrite zlt_false by omega. apply Int.bits_zero. +Qed. + +Lemma decompose_shr_1: + forall xh xl y, + 0 <= Int.unsigned y < Int.zwordsize -> + shr' (ofwords xh xl) y = + ofwords (Int.shr xh y) + (Int.or (Int.shru xl y) (Int.shl xh (Int.sub Int.iwordsize y))). +Proof. + intros. + assert (Int.unsigned (Int.sub Int.iwordsize y) = Int.zwordsize - Int.unsigned y). + { unfold Int.sub. rewrite Int.unsigned_repr. auto. + rewrite Int.unsigned_repr_wordsize. generalize Int.wordsize_max_unsigned; omega. } + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + apply Int64.same_bits_eq; intros. + rewrite bits_shr' by auto. symmetry. rewrite bits_ofwords by auto. + destruct (zlt i Int.zwordsize). + rewrite zlt_true by omega. + rewrite bits_ofwords by omega. + rewrite Int.bits_or by omega. rewrite Int.bits_shl by omega. + rewrite Int.bits_shru by omega. rewrite H0. + destruct (zlt (i + Int.unsigned y) (Int.zwordsize)). + rewrite zlt_true by omega. + rewrite orb_false_r. auto. + rewrite zlt_false by omega. + rewrite orb_false_l. f_equal. omega. + rewrite Int.bits_shr by omega. + destruct (zlt (i + Int.unsigned y) zwordsize). + rewrite bits_ofwords by omega. + rewrite zlt_true by omega. rewrite zlt_false by omega. f_equal. omega. + rewrite zlt_false by omega. rewrite bits_ofwords by omega. + rewrite zlt_false by omega. f_equal. +Qed. + +Lemma decompose_shr_2: + forall xh xl y, + Int.zwordsize <= Int.unsigned y < zwordsize -> + shr' (ofwords xh xl) y = + ofwords (Int.shr xh (Int.sub Int.iwordsize Int.one)) + (Int.shr xh (Int.sub y Int.iwordsize)). +Proof. + intros. + assert (zwordsize = 2 * Int.zwordsize) by reflexivity. + assert (Int.unsigned (Int.sub y Int.iwordsize) = Int.unsigned y - Int.zwordsize). + { unfold Int.sub. rewrite Int.unsigned_repr. auto. + rewrite Int.unsigned_repr_wordsize. generalize (Int.unsigned_range_2 y). omega. } + apply Int64.same_bits_eq; intros. + rewrite bits_shr' by auto. symmetry. rewrite bits_ofwords by auto. + destruct (zlt i Int.zwordsize). + rewrite Int.bits_shr by omega. rewrite H1. + destruct (zlt (i + Int.unsigned y) zwordsize). + rewrite zlt_true by omega. rewrite bits_ofwords by omega. + rewrite zlt_false by omega. f_equal; omega. + rewrite zlt_false by omega. rewrite bits_ofwords by omega. + rewrite zlt_false by omega. auto. + rewrite Int.bits_shr by omega. + change (Int.unsigned (Int.sub Int.iwordsize Int.one)) with (Int.zwordsize - 1). + destruct (zlt (i + Int.unsigned y) zwordsize); + rewrite bits_ofwords by omega. + symmetry. rewrite zlt_false by omega. f_equal. + destruct (zlt (i - Int.zwordsize + (Int.zwordsize - 1)) Int.zwordsize); omega. + symmetry. rewrite zlt_false by omega. f_equal. + destruct (zlt (i - Int.zwordsize + (Int.zwordsize - 1)) Int.zwordsize); omega. +Qed. + +Remark eqm_mul_2p32: + forall x y, Int.eqm x y -> eqm (x * two_p 32) (y * two_p 32). +Proof. + intros. destruct H as [k EQ]. exists k. rewrite EQ. + change Int.modulus with (two_p 32). + change modulus with (two_p 32 * two_p 32). + ring. +Qed. + +Lemma decompose_add: + forall xh xl yh yl, + add (ofwords xh xl) (ofwords yh yl) = + ofwords (Int.add (Int.add xh yh) (Int.add_carry xl yl Int.zero)) + (Int.add xl yl). +Proof. + intros. symmetry. rewrite ofwords_add. rewrite add_unsigned. + apply eqm_samerepr. + rewrite ! ofwords_add'. rewrite (Int.unsigned_add_carry xl yl). + set (cc := Int.add_carry xl yl Int.zero). + set (Xl := Int.unsigned xl); set (Xh := Int.unsigned xh); + set (Yl := Int.unsigned yl); set (Yh := Int.unsigned yh). + change Int.modulus with (two_p 32). + replace (Xh * two_p 32 + Xl + (Yh * two_p 32 + Yl)) + with ((Xh + Yh) * two_p 32 + (Xl + Yl)) by ring. + replace (Int.unsigned (Int.add (Int.add xh yh) cc) * two_p 32 + + (Xl + Yl - Int.unsigned cc * two_p 32)) + with ((Int.unsigned (Int.add (Int.add xh yh) cc) - Int.unsigned cc) * two_p 32 + + (Xl + Yl)) by ring. + apply eqm_add. 2: apply eqm_refl. apply eqm_mul_2p32. + replace (Xh + Yh) with ((Xh + Yh + Int.unsigned cc) - Int.unsigned cc) by ring. + apply Int.eqm_sub. 2: apply Int.eqm_refl. + apply Int.eqm_unsigned_repr_l. apply Int.eqm_add. 2: apply Int.eqm_refl. + apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. +Qed. + +Definition mul' (x y: Int.int) : int := repr (Int.unsigned x * Int.unsigned y). + +Lemma decompose_mul: + forall xh xl yh yl, + mul (ofwords xh xl) (ofwords yh yl) = + ofwords (Int.add (Int.add (hiword (mul' xl yl)) (Int.mul xl yh)) (Int.mul xh yl)) + (loword (mul' xl yl)). +Proof. + intros. + set (pl := loword (mul' xl yl)); set (ph := hiword (mul' xl yl)). + assert (EQ0: unsigned (mul' xl yl) = Int.unsigned ph * two_p 32 + Int.unsigned pl). + { rewrite <- (ofwords_recompose (mul' xl yl)). apply ofwords_add'. } + symmetry. rewrite ofwords_add. unfold mul. rewrite !ofwords_add'. + set (XL := Int.unsigned xl); set (XH := Int.unsigned xh); + set (YL := Int.unsigned yl); set (YH := Int.unsigned yh). + set (PH := Int.unsigned ph) in *. set (PL := Int.unsigned pl) in *. + transitivity (repr (((PH + XL * YH) + XH * YL) * two_p 32 + PL)). + apply eqm_samerepr. apply eqm_add. 2: apply eqm_refl. + apply eqm_mul_2p32. + rewrite Int.add_unsigned. apply Int.eqm_unsigned_repr_l. apply Int.eqm_add. + rewrite Int.add_unsigned. apply Int.eqm_unsigned_repr_l. apply Int.eqm_add. + apply Int.eqm_refl. + unfold Int.mul. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. + unfold Int.mul. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. + transitivity (repr (unsigned (mul' xl yl) + (XL * YH + XH * YL) * two_p 32)). + rewrite EQ0. f_equal. ring. + transitivity (repr ((XL * YL + (XL * YH + XH * YL) * two_p 32))). + apply eqm_samerepr. apply eqm_add. 2: apply eqm_refl. + unfold mul'. apply eqm_unsigned_repr_l. apply eqm_refl. + transitivity (repr (0 + (XL * YL + (XL * YH + XH * YL) * two_p 32))). + rewrite Zplus_0_l; auto. + transitivity (repr (XH * YH * (two_p 32 * two_p 32) + (XL * YL + (XL * YH + XH * YL) * two_p 32))). + apply eqm_samerepr. apply eqm_add. 2: apply eqm_refl. + change (two_p 32 * two_p 32) with modulus. exists (- XH * YH). ring. + f_equal. ring. +Qed. + +End Int64. Notation int64 := Int64.int. diff --git a/lib/Lattice.v b/lib/Lattice.v index c4b55e9..3fc0d4c 100644 --- a/lib/Lattice.v +++ b/lib/Lattice.v @@ -747,3 +747,103 @@ Lemma ge_lub_right: forall x y, ge (lub x y) y. Proof. destruct x; destruct y; compute; tauto. Qed. End LBoolean. + +(** * Option semi-lattice *) + +(** This lattice adds a top element (represented by [None]) to a given + semi-lattice (whose elements are injected via [Some]). *) + +Module LOption(L: SEMILATTICE) <: SEMILATTICE_WITH_TOP. + +Definition t: Type := option L.t. + +Definition eq (x y: t) : Prop := + match x, y with + | None, None => True + | Some x1, Some y1 => L.eq x1 y1 + | _, _ => False + end. + +Lemma eq_refl: forall x, eq x x. +Proof. + unfold eq; intros; destruct x. apply L.eq_refl. auto. +Qed. + +Lemma eq_sym: forall x y, eq x y -> eq y x. +Proof. + unfold eq; intros; destruct x; destruct y; auto. apply L.eq_sym; auto. +Qed. + +Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z. +Proof. + unfold eq; intros; destruct x; destruct y; destruct z; auto. + eapply L.eq_trans; eauto. + contradiction. +Qed. + +Definition beq (x y: t) : bool := + match x, y with + | None, None => true + | Some x1, Some y1 => L.beq x1 y1 + | _, _ => false + end. + +Lemma beq_correct: forall x y, beq x y = true -> eq x y. +Proof. + unfold beq, eq; intros; destruct x; destruct y. + apply L.beq_correct; auto. + discriminate. discriminate. auto. +Qed. + +Definition ge (x y: t) : Prop := + match x, y with + | None, _ => True + | _, None => False + | Some x1, Some y1 => L.ge x1 y1 + end. + +Lemma ge_refl: forall x y, eq x y -> ge x y. +Proof. + unfold eq, ge; intros; destruct x; destruct y. + apply L.ge_refl; auto. + auto. elim H. auto. +Qed. + +Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z. +Proof. + unfold ge; intros; destruct x; destruct y; destruct z; auto. + eapply L.ge_trans; eauto. contradiction. +Qed. + +Definition bot : t := Some L.bot. + +Lemma ge_bot: forall x, ge x bot. +Proof. + unfold ge, bot; intros. destruct x; auto. apply L.ge_bot. +Qed. + +Definition lub (x y: t) : t := + match x, y with + | None, _ => None + | _, None => None + | Some x1, Some y1 => Some (L.lub x1 y1) + end. + +Lemma ge_lub_left: forall x y, ge (lub x y) x. +Proof. + unfold ge, lub; intros; destruct x; destruct y; auto. apply L.ge_lub_left. +Qed. + +Lemma ge_lub_right: forall x y, ge (lub x y) y. +Proof. + unfold ge, lub; intros; destruct x; destruct y; auto. apply L.ge_lub_right. +Qed. + +Definition top : t := None. + +Lemma ge_top: forall x, ge top x. +Proof. + unfold ge, top; intros. auto. +Qed. + +End LOption. @@ -377,23 +377,18 @@ Module PTree <: TREE. end. Lemma bempty_correct: - forall m, bempty m = true -> forall x, get x m = None. + forall m, bempty m = true <-> (forall x, get x m = None). Proof. - induction m; simpl; intros. - change (@Leaf A) with (empty A). apply gempty. - destruct o. congruence. destruct (andb_prop _ _ H). + induction m; simpl. + split; intros. apply gleaf. auto. + destruct o; split; intros. + congruence. + generalize (H xH); simpl; congruence. + destruct (andb_prop _ _ H). rewrite IHm1 in H0. rewrite IHm2 in H1. destruct x; simpl; auto. - Qed. - - Lemma bempty_complete: - forall m, (forall x, get x m = None) -> bempty m = true. - Proof. - induction m; simpl; intros. - auto. - destruct o. generalize (H xH); simpl; congruence. - rewrite IHm1. rewrite IHm2. auto. - intros; apply (H (xI x)). - intros; apply (H (xO x)). + apply andb_true_intro; split. + apply IHm1. intros; apply (H (xO x)). + apply IHm2. intros; apply (H (xI x)). Qed. Lemma beq_correct: @@ -406,38 +401,23 @@ Module PTree <: TREE. | _, _ => False end). Proof. - intros; split. - - (* beq = true -> exteq *) - revert m1 m2. induction m1; destruct m2; simpl. - intros; red; intros. change (@Leaf A) with (empty A). - repeat rewrite gempty. auto. - destruct o; intro. congruence. - red; intros. change (@Leaf A) with (empty A). rewrite gempty. - rewrite bempty_correct. auto. assumption. - destruct o; intro. congruence. - red; intros. change (@Leaf A) with (empty A). rewrite gempty. - rewrite bempty_correct. auto. assumption. - destruct o; destruct o0; simpl; intro; try congruence. - destruct (andb_prop _ _ H). destruct (andb_prop _ _ H0). - destruct x; simpl. - apply IHm1_2; auto. apply IHm1_1; auto. auto. - destruct (andb_prop _ _ H). - red; intros. destruct x; simpl. - apply IHm1_2; auto. apply IHm1_1; auto. - auto. - - (* exteq -> beq = true *) - revert m1 m2. induction m1; destruct m2; simpl; intros. - auto. - change (bempty (Node m2_1 o m2_2) = true). - apply bempty_complete. intros. generalize (H x). rewrite gleaf. - destruct (get x (Node m2_1 o m2_2)); tauto. - change (bempty (Node m1_1 o m1_2) = true). - apply bempty_complete. intros. generalize (H x). rewrite gleaf. - destruct (get x (Node m1_1 o m1_2)); tauto. - apply andb_true_intro. split. apply andb_true_intro. split. - generalize (H xH); simpl. destruct o; destruct o0; auto. - apply IHm1_1. intros. apply (H (xO x)). - apply IHm1_2. intros. apply (H (xI x)). + induction m1; intros. + - simpl. rewrite bempty_correct. split; intros. + rewrite gleaf. rewrite H. auto. + generalize (H x). rewrite gleaf. destruct (get x m2); tauto. + - destruct m2. + + unfold beq. rewrite bempty_correct. split; intros. + rewrite H. rewrite gleaf. auto. + generalize (H x). rewrite gleaf. destruct (get x (Node m1_1 o m1_2)); tauto. + + simpl. split; intros. + * destruct (andb_prop _ _ H). destruct (andb_prop _ _ H0). + rewrite IHm1_1 in H3. rewrite IHm1_2 in H1. + destruct x; simpl. apply H1. apply H3. + destruct o; destruct o0; auto || congruence. + * apply andb_true_intro. split. apply andb_true_intro. split. + generalize (H xH); simpl. destruct o; destruct o0; tauto. + apply IHm1_1. intros; apply (H (xO x)). + apply IHm1_2. intros; apply (H (xI x)). Qed. End BOOLEAN_EQUALITY. @@ -559,7 +539,7 @@ Module PTree <: TREE. Section COMBINE. - Variable A B C: Type. + Variables A B C: Type. Variable f: option A -> option B -> option C. Hypothesis f_none_none: f None None = None. @@ -646,45 +626,53 @@ Module PTree <: TREE. auto. Qed. - Fixpoint xelements (A : Type) (m : t A) (i : positive) {struct m} - : list (positive * A) := + Fixpoint xelements (A : Type) (m : t A) (i : positive) + (k: list (positive * A)) {struct m} + : list (positive * A) := match m with - | Leaf => nil + | Leaf => k | Node l None r => - (xelements l (append i (xO xH))) ++ (xelements r (append i (xI xH))) + xelements l (append i (xO xH)) (xelements r (append i (xI xH)) k) | Node l (Some x) r => - (xelements l (append i (xO xH))) - ++ ((i, x) :: xelements r (append i (xI xH))) + xelements l (append i (xO xH)) + ((i, x) :: xelements r (append i (xI xH)) k) end. - (* Note: function [xelements] above is inefficient. We should apply - deforestation to it, but that makes the proofs even harder. *) - Definition elements A (m : t A) := xelements m xH. + Definition elements (A: Type) (m : t A) := xelements m xH nil. + + Lemma xelements_incl: + forall (A: Type) (m: t A) (i : positive) k x, + In x k -> In x (xelements m i k). + Proof. + induction m; intros; simpl. + auto. + destruct o. + apply IHm1. simpl; right; auto. + auto. + Qed. Lemma xelements_correct: - forall (A: Type) (m: t A) (i j : positive) (v: A), - get i m = Some v -> In (append j i, v) (xelements m j). + forall (A: Type) (m: t A) (i j : positive) (v: A) k, + get i m = Some v -> In (append j i, v) (xelements m j k). Proof. induction m; intros. rewrite (gleaf A i) in H; congruence. destruct o; destruct i; simpl; simpl in H. - rewrite append_assoc_1; apply in_or_app; right; apply in_cons; - apply IHm2; auto. - rewrite append_assoc_0; apply in_or_app; left; apply IHm1; auto. - rewrite append_neutral_r; apply in_or_app; injection H; - intro EQ; rewrite EQ; right; apply in_eq. - rewrite append_assoc_1; apply in_or_app; right; apply IHm2; auto. - rewrite append_assoc_0; apply in_or_app; left; apply IHm1; auto. - congruence. + rewrite append_assoc_1. apply xelements_incl. right. auto. + rewrite append_assoc_0. auto. + inv H. apply xelements_incl. left. rewrite append_neutral_r; auto. + rewrite append_assoc_1. apply xelements_incl. auto. + rewrite append_assoc_0. auto. + inv H. Qed. Theorem elements_correct: forall (A: Type) (m: t A) (i: positive) (v: A), get i m = Some v -> In (i, v) (elements m). Proof. - intros A m i v H. - exact (xelements_correct m i xH H). + intros A m i v H. + exact (xelements_correct m i xH nil H). Qed. Fixpoint xget (A : Type) (i j : positive) (m : t A) {struct j} : option A := @@ -695,6 +683,13 @@ Module PTree <: TREE. | _, _ => None end. + Lemma xget_diag : + forall (A : Type) (i : positive) (m1 m2 : t A) (o : option A), + xget i i (Node m1 o m2) = o. + Proof. + induction i; intros; simpl; auto. + Qed. + Lemma xget_left : forall (A : Type) (j i : positive) (m1 m2 : t A) (o : option A) (v : A), xget i (append j (xO xH)) m1 = Some v -> xget i j (Node m1 o m2) = Some v. @@ -703,122 +698,26 @@ Module PTree <: TREE. destruct i; congruence. Qed. - Lemma xelements_ii : - forall (A: Type) (m: t A) (i j : positive) (v: A), - In (xI i, v) (xelements m (xI j)) -> In (i, v) (xelements m j). - Proof. - induction m. - simpl; auto. - intros; destruct o; simpl; simpl in H; destruct (in_app_or _ _ _ H); - apply in_or_app. - left; apply IHm1; auto. - right; destruct (in_inv H0). - injection H1; intros EQ1 EQ2; rewrite EQ1; rewrite EQ2; apply in_eq. - apply in_cons; apply IHm2; auto. - left; apply IHm1; auto. - right; apply IHm2; auto. - Qed. - - Lemma xelements_io : - forall (A: Type) (m: t A) (i j : positive) (v: A), - ~In (xI i, v) (xelements m (xO j)). - Proof. - induction m. - simpl; auto. - intros; destruct o; simpl; intro H; destruct (in_app_or _ _ _ H). - apply (IHm1 _ _ _ H0). - destruct (in_inv H0). - congruence. - apply (IHm2 _ _ _ H1). - apply (IHm1 _ _ _ H0). - apply (IHm2 _ _ _ H0). - Qed. - - Lemma xelements_oo : - forall (A: Type) (m: t A) (i j : positive) (v: A), - In (xO i, v) (xelements m (xO j)) -> In (i, v) (xelements m j). - Proof. - induction m. - simpl; auto. - intros; destruct o; simpl; simpl in H; destruct (in_app_or _ _ _ H); - apply in_or_app. - left; apply IHm1; auto. - right; destruct (in_inv H0). - injection H1; intros EQ1 EQ2; rewrite EQ1; rewrite EQ2; apply in_eq. - apply in_cons; apply IHm2; auto. - left; apply IHm1; auto. - right; apply IHm2; auto. - Qed. - - Lemma xelements_oi : - forall (A: Type) (m: t A) (i j : positive) (v: A), - ~In (xO i, v) (xelements m (xI j)). - Proof. - induction m. - simpl; auto. - intros; destruct o; simpl; intro H; destruct (in_app_or _ _ _ H). - apply (IHm1 _ _ _ H0). - destruct (in_inv H0). - congruence. - apply (IHm2 _ _ _ H1). - apply (IHm1 _ _ _ H0). - apply (IHm2 _ _ _ H0). - Qed. - - Lemma xelements_ih : - forall (A: Type) (m1 m2: t A) (o: option A) (i : positive) (v: A), - In (xI i, v) (xelements (Node m1 o m2) xH) -> In (i, v) (xelements m2 xH). - Proof. - destruct o; simpl; intros; destruct (in_app_or _ _ _ H). - absurd (In (xI i, v) (xelements m1 2)); auto; apply xelements_io; auto. - destruct (in_inv H0). - congruence. - apply xelements_ii; auto. - absurd (In (xI i, v) (xelements m1 2)); auto; apply xelements_io; auto. - apply xelements_ii; auto. - Qed. - - Lemma xelements_oh : - forall (A: Type) (m1 m2: t A) (o: option A) (i : positive) (v: A), - In (xO i, v) (xelements (Node m1 o m2) xH) -> In (i, v) (xelements m1 xH). - Proof. - destruct o; simpl; intros; destruct (in_app_or _ _ _ H). - apply xelements_oo; auto. - destruct (in_inv H0). - congruence. - absurd (In (xO i, v) (xelements m2 3)); auto; apply xelements_oi; auto. - apply xelements_oo; auto. - absurd (In (xO i, v) (xelements m2 3)); auto; apply xelements_oi; auto. - Qed. - - Lemma xelements_hi : - forall (A: Type) (m: t A) (i : positive) (v: A), - ~In (xH, v) (xelements m (xI i)). + Lemma xget_right : + forall (A : Type) (j i : positive) (m1 m2 : t A) (o : option A) (v : A), + xget i (append j (xI xH)) m2 = Some v -> xget i j (Node m1 o m2) = Some v. Proof. - induction m; intros. - simpl; auto. - destruct o; simpl; intro H; destruct (in_app_or _ _ _ H). - generalize H0; apply IHm1; auto. - destruct (in_inv H0). - congruence. - generalize H1; apply IHm2; auto. - generalize H0; apply IHm1; auto. - generalize H0; apply IHm2; auto. + induction j; intros; destruct i; simpl; simpl in H; auto; try congruence. + destruct i; congruence. Qed. - Lemma xelements_ho : - forall (A: Type) (m: t A) (i : positive) (v: A), - ~In (xH, v) (xelements m (xO i)). + Lemma xelements_complete: + forall (A: Type) (m: t A) (i j : positive) (v: A) k, + In (i, v) (xelements m j k) -> xget i j m = Some v \/ In (i, v) k. Proof. - induction m; intros. - simpl; auto. - destruct o; simpl; intro H; destruct (in_app_or _ _ _ H). - generalize H0; apply IHm1; auto. - destruct (in_inv H0). - congruence. - generalize H1; apply IHm2; auto. - generalize H0; apply IHm1; auto. - generalize H0; apply IHm2; auto. + induction m; simpl; intros. + auto. + destruct o. + edestruct IHm1; eauto. left; apply xget_left; auto. + destruct H0. inv H0. left; apply xget_diag. + edestruct IHm2; eauto. left; apply xget_right; auto. + edestruct IHm1; eauto. left; apply xget_left; auto. + edestruct IHm2; eauto. left; apply xget_right; auto. Qed. Lemma get_xget_h : @@ -827,89 +726,50 @@ Module PTree <: TREE. destruct i; simpl; auto. Qed. - Lemma xelements_complete: - forall (A: Type) (i j : positive) (m: t A) (v: A), - In (i, v) (xelements m j) -> xget i j m = Some v. - Proof. - induction i; simpl; intros; destruct j; simpl. - apply IHi; apply xelements_ii; auto. - absurd (In (xI i, v) (xelements m (xO j))); auto; apply xelements_io. - destruct m. - simpl in H; tauto. - rewrite get_xget_h. apply IHi. apply (xelements_ih _ _ _ _ _ H). - absurd (In (xO i, v) (xelements m (xI j))); auto; apply xelements_oi. - apply IHi; apply xelements_oo; auto. - destruct m. - simpl in H; tauto. - rewrite get_xget_h. apply IHi. apply (xelements_oh _ _ _ _ _ H). - absurd (In (xH, v) (xelements m (xI j))); auto; apply xelements_hi. - absurd (In (xH, v) (xelements m (xO j))); auto; apply xelements_ho. - destruct m. - simpl in H; tauto. - destruct o; simpl in H; destruct (in_app_or _ _ _ H). - absurd (In (xH, v) (xelements m1 (xO xH))); auto; apply xelements_ho. - destruct (in_inv H0). - congruence. - absurd (In (xH, v) (xelements m2 (xI xH))); auto; apply xelements_hi. - absurd (In (xH, v) (xelements m1 (xO xH))); auto; apply xelements_ho. - absurd (In (xH, v) (xelements m2 (xI xH))); auto; apply xelements_hi. - Qed. - Theorem elements_complete: forall (A: Type) (m: t A) (i: positive) (v: A), In (i, v) (elements m) -> get i m = Some v. Proof. - intros A m i v H. - unfold elements in H. - rewrite get_xget_h. - exact (xelements_complete i xH m v H). + intros A m i v H. unfold elements in H. + edestruct xelements_complete; eauto. + rewrite get_xget_h. auto. + contradiction. Qed. Lemma in_xelements: - forall (A: Type) (m: t A) (i k: positive) (v: A), - In (k, v) (xelements m i) -> - exists j, k = append i j. + forall (A: Type) (m: t A) (i k: positive) (v: A) l, + In (k, v) (xelements m i l) -> + (exists j, k = append i j) \/ In (k, v) l. Proof. induction m; simpl; intros. - tauto. - assert (k = i \/ In (k, v) (xelements m1 (append i 2)) - \/ In (k, v) (xelements m2 (append i 3))). - destruct o. - elim (in_app_or _ _ _ H); simpl; intuition. - replace k with i. tauto. congruence. - elim (in_app_or _ _ _ H); simpl; intuition. - elim H0; intro. - exists xH. rewrite append_neutral_r. auto. - elim H1; intro. - elim (IHm1 _ _ _ H2). intros k1 EQ. rewrite EQ. - rewrite <- append_assoc_0. exists (xO k1); auto. - elim (IHm2 _ _ _ H2). intros k1 EQ. rewrite EQ. - rewrite <- append_assoc_1. exists (xI k1); auto. + auto. + destruct o. + edestruct IHm1 as [[j EQ] | IN]; eauto. + rewrite <- append_assoc_0 in EQ. left; econstructor; eauto. + destruct IN. + inv H0. left; exists xH; symmetry; apply append_neutral_r. + edestruct IHm2 as [[j EQ] | IN]; eauto. + rewrite <- append_assoc_1 in EQ. left; econstructor; eauto. + edestruct IHm1 as [[j EQ] | IN]; eauto. + rewrite <- append_assoc_0 in EQ. left; econstructor; eauto. + edestruct IHm2 as [[j EQ] | IN']; eauto. + rewrite <- append_assoc_1 in EQ. left; econstructor; eauto. Qed. - Definition xkeys (A: Type) (m: t A) (i: positive) := - List.map (@fst positive A) (xelements m i). + Definition xkeys (A: Type) (m: t A) (i: positive) (l: list (positive * A)) := + List.map (@fst positive A) (xelements m i l). Lemma in_xkeys: - forall (A: Type) (m: t A) (i k: positive), - In k (xkeys m i) -> - exists j, k = append i j. + forall (A: Type) (m: t A) (i k: positive) l, + In k (xkeys m i l) -> + (exists j, k = append i j) \/ In k (List.map fst l). Proof. - unfold xkeys; intros. - elim (list_in_map_inv _ _ _ H). intros [k1 v1] [EQ IN]. - simpl in EQ; subst k1. apply in_xelements with A m v1. auto. - Qed. - - Remark list_append_cons_norepet: - forall (A: Type) (l1 l2: list A) (x: A), - list_norepet l1 -> list_norepet l2 -> list_disjoint l1 l2 -> - ~In x l1 -> ~In x l2 -> - list_norepet (l1 ++ x :: l2). - Proof. - intros. apply list_norepet_append_commut. simpl; constructor. - red; intros. elim (in_app_or _ _ _ H4); intro; tauto. - apply list_norepet_append; auto. - apply list_disjoint_sym; auto. + unfold xkeys; intros. + exploit list_in_map_inv; eauto. intros [[k1 v1] [EQ IN]]. + simpl in EQ; subst k1. + exploit in_xelements; eauto. intros [EX | IN']. + auto. + right. change k with (fst (k, v1)). apply List.in_map; auto. Qed. Lemma append_injective: @@ -922,44 +782,52 @@ Module PTree <: TREE. Qed. Lemma xelements_keys_norepet: - forall (A: Type) (m: t A) (i: positive), - list_norepet (xkeys m i). + forall (A: Type) (m: t A) (i: positive) l, + (forall k v, get k m = Some v -> ~In (append i k) (List.map fst l)) -> + list_norepet (List.map fst l) -> + list_norepet (xkeys m i l). Proof. - induction m; unfold xkeys; simpl; fold xkeys; intros. - constructor. - assert (list_disjoint (xkeys m1 (append i 2)) (xkeys m2 (append i 3))). - red; intros; red; intro. subst y. - elim (in_xkeys _ _ _ H); intros j1 EQ1. - elim (in_xkeys _ _ _ H0); intros j2 EQ2. - rewrite EQ1 in EQ2. - rewrite <- append_assoc_0 in EQ2. - rewrite <- append_assoc_1 in EQ2. - generalize (append_injective _ _ _ EQ2). congruence. - assert (forall (m: t A) j, - j = 2%positive \/ j = 3%positive -> - ~In i (xkeys m (append i j))). - intros; red; intros. - elim (in_xkeys _ _ _ H1); intros k EQ. - assert (EQ1: append i xH = append (append i j) k). - rewrite append_neutral_r. auto. - elim H0; intro; subst j; - try (rewrite <- append_assoc_0 in EQ1); - try (rewrite <- append_assoc_1 in EQ1); - generalize (append_injective _ _ _ EQ1); congruence. - destruct o; rewrite list_append_map; simpl; - change (List.map (@fst positive A) (xelements m1 (append i 2))) - with (xkeys m1 (append i 2)); - change (List.map (@fst positive A) (xelements m2 (append i 3))) - with (xkeys m2 (append i 3)). - apply list_append_cons_norepet; auto. - apply list_norepet_append; auto. + unfold xkeys; induction m; simpl; intros. + auto. + destruct o. + apply IHm1. + intros; red; intros IN. rewrite <- append_assoc_0 in IN. simpl in IN; destruct IN. + exploit (append_injective i k~0 xH). rewrite append_neutral_r. auto. + congruence. + exploit in_xkeys; eauto. intros [[j EQ] | IN]. + rewrite <- append_assoc_1 in EQ. exploit append_injective; eauto. congruence. + elim (H (xO k) v); auto. + simpl. constructor. + red; intros IN. exploit in_xkeys; eauto. intros [[j EQ] | IN']. + rewrite <- append_assoc_1 in EQ. + exploit (append_injective i j~1 xH). rewrite append_neutral_r. auto. congruence. + elim (H xH a). auto. rewrite append_neutral_r. auto. + apply IHm2; auto. intros. rewrite <- append_assoc_1. eapply H; eauto. + apply IHm1. + intros; red; intros IN. rewrite <- append_assoc_0 in IN. + exploit in_xkeys; eauto. intros [[j EQ] | IN']. + rewrite <- append_assoc_1 in EQ. exploit append_injective; eauto. congruence. + elim (H (xO k) v); auto. + apply IHm2; auto. intros. rewrite <- append_assoc_1. eapply H; eauto. Qed. Theorem elements_keys_norepet: forall (A: Type) (m: t A), list_norepet (List.map (@fst elt A) (elements m)). Proof. - intros. change (list_norepet (xkeys m 1)). apply xelements_keys_norepet. + intros. change (list_norepet (xkeys m 1 nil)). apply xelements_keys_norepet. + intros; red; intros. elim H0. constructor. + Qed. + + Remark xelements_empty: + forall (A: Type) (m: t A) i l, (forall i, get i m = None) -> xelements m i l = l. + Proof. + induction m; simpl; intros. + auto. + destruct o. generalize (H xH); simpl; congruence. + rewrite IHm1. apply IHm2. + intros. apply (H (xI i0)). + intros. apply (H (xO i0)). Qed. Theorem elements_canonical_order: @@ -971,48 +839,48 @@ Module PTree <: TREE. (elements m) (elements n). Proof. intros until R. - assert (forall m n j, + assert (forall m n j l1 l2, (forall i x, get i m = Some x -> exists y, get i n = Some y /\ R x y) -> (forall i y, get i n = Some y -> exists x, get i m = Some x /\ R x y) -> list_forall2 (fun i_x i_y => fst i_x = fst i_y /\ R (snd i_x) (snd i_y)) - (xelements m j) (xelements n j)). - induction m; induction n; intros; simpl. - constructor. - destruct o. exploit (H0 xH). simpl. reflexivity. simpl. intros [x [P Q]]. congruence. - change (@nil (positive*A)) with ((@nil (positive * A))++nil). - apply list_forall2_app. - apply IHn1. - intros. rewrite gleaf in H1. congruence. - intros. exploit (H0 (xO i)). simpl; eauto. rewrite gleaf. intros [x [P Q]]. congruence. - apply IHn2. - intros. rewrite gleaf in H1. congruence. - intros. exploit (H0 (xI i)). simpl; eauto. rewrite gleaf. intros [x [P Q]]. congruence. - destruct o. exploit (H xH). simpl. reflexivity. simpl. intros [x [P Q]]. congruence. - change (@nil (positive*B)) with (xelements (@Leaf B) (append j 2) ++ (xelements (@Leaf B) (append j 3))). - apply list_forall2_app. - apply IHm1. - intros. exploit (H (xO i)). simpl; eauto. rewrite gleaf. intros [y [P Q]]. congruence. - intros. rewrite gleaf in H1. congruence. - apply IHm2. - intros. exploit (H (xI i)). simpl; eauto. rewrite gleaf. intros [y [P Q]]. congruence. - intros. rewrite gleaf in H1. congruence. - exploit (IHm1 n1 (append j 2)). - intros. exploit (H (xO i)). simpl; eauto. simpl. auto. - intros. exploit (H0 (xO i)). simpl; eauto. simpl; auto. - intro REC1. - exploit (IHm2 n2 (append j 3)). - intros. exploit (H (xI i)). simpl; eauto. simpl. auto. - intros. exploit (H0 (xI i)). simpl; eauto. simpl; auto. - intro REC2. - destruct o; destruct o0. - apply list_forall2_app; auto. constructor; auto. - simpl; split; auto. exploit (H xH). simpl; eauto. simpl. intros [y [P Q]]. congruence. - exploit (H xH). simpl; eauto. simpl. intros [y [P Q]]; congruence. - exploit (H0 xH). simpl; eauto. simpl. intros [x [P Q]]; congruence. - apply list_forall2_app; auto. - - unfold elements; auto. + l1 l2 -> + list_forall2 + (fun i_x i_y => fst i_x = fst i_y /\ R (snd i_x) (snd i_y)) + (xelements m j l1) (xelements n j l2)). + { + induction m; simpl; intros. + rewrite xelements_empty. auto. + intros. destruct (get i n) eqn:E; auto. exploit H0; eauto. + intros [x [P Q]]. rewrite gleaf in P; congruence. + destruct o. + destruct n. exploit (H xH a); auto. simpl. intros [y [P Q]]; congruence. + exploit (H xH a); auto. intros [y [P Q]]. simpl in P. subst o. + simpl. apply IHm1. + intros i x. exact (H (xO i) x). + intros i x. exact (H0 (xO i) x). + constructor. simpl; auto. + apply IHm2. + intros i x. exact (H (xI i) x). + intros i x. exact (H0 (xI i) x). + auto. + destruct n. simpl. + rewrite ! xelements_empty. auto. + intros. destruct (get i m2) eqn:E; auto. exploit (H (xI i)); eauto. + rewrite gleaf. intros [y [P Q]]; congruence. + intros. destruct (get i m1) eqn:E; auto. exploit (H (xO i)); eauto. + rewrite gleaf. intros [y [P Q]]; congruence. + destruct o. + exploit (H0 xH); simpl; eauto. intros [y [P Q]]; congruence. + simpl. apply IHm1. + intros i x. exact (H (xO i) x). + intros i x. exact (H0 (xO i) x). + apply IHm2. + intros i x. exact (H (xI i) x). + intros i x. exact (H0 (xI i) x). + auto. + } + intros. apply H. auto. auto. constructor. Qed. Theorem elements_extensional: @@ -1045,19 +913,15 @@ Module PTree <: TREE. xfold f xH m v. Lemma xfold_xelements: - forall (A B: Type) (f: B -> positive -> A -> B) m i v, - xfold f i m v = - List.fold_left (fun a p => f a (fst p) (snd p)) - (xelements m i) - v. + forall (A B: Type) (f: B -> positive -> A -> B) m i v l, + List.fold_left (fun a p => f a (fst p) (snd p)) l (xfold f i m v) = + List.fold_left (fun a p => f a (fst p) (snd p)) (xelements m i l) v. Proof. induction m; intros. simpl. auto. - simpl. destruct o. - rewrite fold_left_app. simpl. - rewrite IHm1. apply IHm2. - rewrite fold_left_app. simpl. - rewrite IHm1. apply IHm2. + destruct o; simpl. + rewrite <- IHm1. simpl. rewrite <- IHm2. auto. + rewrite <- IHm1. rewrite <- IHm2. auto. Qed. Theorem fold_spec: @@ -1065,7 +929,7 @@ Module PTree <: TREE. fold f m v = List.fold_left (fun a p => f a (fst p) (snd p)) (elements m) v. Proof. - intros. unfold fold, elements. apply xfold_xelements. + intros. unfold fold, elements. rewrite <- xfold_xelements. auto. Qed. End PTree. diff --git a/lib/Ordered.v b/lib/Ordered.v index f52a7ef..026671a 100644 --- a/lib/Ordered.v +++ b/lib/Ordered.v @@ -48,6 +48,36 @@ Definition eq_dec : forall x y, { eq x y } + { ~ eq x y } := peq. End OrderedPositive. +(** The ordered type of integers *) + +Module OrderedZ <: OrderedType. + +Definition t := Z. +Definition eq (x y: t) := x = y. +Definition lt := Zlt. + +Lemma eq_refl : forall x : t, eq x x. +Proof (@refl_equal t). +Lemma eq_sym : forall x y : t, eq x y -> eq y x. +Proof (@sym_equal t). +Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z. +Proof (@trans_equal t). +Lemma lt_trans : forall x y z : t, lt x y -> lt y z -> lt x z. +Proof Zlt_trans. +Lemma lt_not_eq : forall x y : t, lt x y -> ~ eq x y. +Proof. unfold lt, eq, t; intros. omega. Qed. +Lemma compare : forall x y : t, Compare lt eq x y. +Proof. + intros. destruct (Z.compare x y) as [] eqn:E. + apply EQ. red. apply Z.compare_eq_iff. assumption. + apply LT. assumption. + apply GT. apply Z.compare_gt_iff. assumption. +Defined. + +Definition eq_dec : forall x y, { eq x y } + { ~ eq x y } := zeq. + +End OrderedZ. + (** The ordered type of machine integers *) Module OrderedInt <: OrderedType. diff --git a/powerpc/Asm.v b/powerpc/Asm.v index 27e801a..115d846 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -23,7 +23,7 @@ Require Import Events. Require Import Globalenvs. Require Import Smallstep. Require Import Locations. -Require Stacklayout. +Require Import Stacklayout. Require Import Conventions. (** * Abstract syntax *) @@ -222,7 +222,7 @@ Inductive instruction : Type := | Pxori: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate *) | Pxoris: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate high *) | Plabel: label -> instruction (**r define a code label *) - | Pbuiltin: external_function -> list preg -> preg -> instruction (**r built-in function *) + | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *) | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *) with annot_param : Type := @@ -324,6 +324,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset := | r :: l' => undef_regs l' (rs#r <- Vundef) end. +(** Assigning multiple registers *) + +Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset := + match rl, vl with + | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1) + | _, _ => rs + end. + Section RELSEM. (** Looking up instructions in a code sequence by position. *) @@ -777,45 +785,45 @@ Definition preg_of (r: mreg) : preg := match r with | R3 => GPR3 | R4 => GPR4 | R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R8 => GPR8 | R9 => GPR9 | R10 => GPR10 + | R11 => GPR11 | R12 => GPR12 | R14 => GPR14 | R15 => GPR15 | R16 => GPR16 | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24 | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29 => GPR29 | R30 => GPR30 | R31 => GPR31 - | IT1 => GPR11 | IT2 => GPR12 + | F0 => FPR0 | F1 => FPR1 | F2 => FPR2 | F3 => FPR3 | F4 => FPR4 | F5 => FPR5 | F6 => FPR6 | F7 => FPR7 | F8 => FPR8 - | F9 => FPR9 | F10 => FPR10 | F11 => FPR11 - | F14 => FPR14 | F15 => FPR15 + | F9 => FPR9 | F10 => FPR10 | F11 => FPR11 | F12 => FPR12 + | F13 => FPR13 | F14 => FPR14 | F15 => FPR15 | F16 => FPR16 | F17 => FPR17 | F18 => FPR18 | F19 => FPR19 | F20 => FPR20 | F21 => FPR21 | F22 => FPR22 | F23 => FPR23 | F24 => FPR24 | F25 => FPR25 | F26 => FPR26 | F27 => FPR27 | F28 => FPR28 | F29 => FPR29 | F30 => FPR30 | F31 => FPR31 - | FT1 => FPR0 | FT2 => FPR12 | FT3 => FPR13 end. (** Extract the values of the arguments of an external call. We exploit the calling conventions from module [Conventions], except that we use PPC registers instead of locations. *) +Definition chunk_of_type (ty: typ) := + match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end. + Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := | extcall_arg_reg: forall r, extcall_arg rs m (R r) (rs (preg_of r)) - | extcall_arg_int_stack: forall ofs bofs v, - bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mint32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tint)) v - | extcall_arg_float_stack: forall ofs bofs v, + | extcall_arg_stack: forall ofs ty bofs v, bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mfloat64al32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tfloat)) v. + Mem.loadv (chunk_of_type ty) m + (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v -> + extcall_arg rs m (S Outgoing ofs ty) v. Definition extcall_arguments (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := list_forall2 (extcall_arg rs m) (loc_arguments sg) args. -Definition loc_external_result (sg: signature) : preg := - preg_of (loc_result sg). +Definition loc_external_result (sg: signature) : list preg := + map preg_of (loc_result sg). (** Extract the values of the arguments of an annotation. *) @@ -845,33 +853,31 @@ Inductive step: state -> trace -> state -> Prop := exec_instr c i rs m = Next rs' m' -> step (State rs m) E0 (State rs' m') | exec_step_builtin: - forall b ofs c ef args res rs m t v m', + forall b ofs c ef args res rs m t vl rs' m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal c) -> find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) -> - external_call ef ge (map rs args) m t v m' -> - step (State rs m) t - (State (nextinstr(rs #GPR11 <- Vundef #GPR12 <- Vundef - #FPR12 <- Vundef #FPR13 <- Vundef - #FPR0 <- Vundef #CTR <- Vundef - #res <- v)) m') + external_call' ef ge (map rs args) m t vl m' -> + rs' = nextinstr + (set_regs res vl + (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) -> + step (State rs m) t (State rs' m') | exec_step_annot: forall b ofs c ef args rs m vargs t v m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal c) -> find_instr (Int.unsigned ofs) c = Some (Pannot ef args) -> annot_arguments rs m args vargs -> - external_call ef ge vargs m t v m' -> + external_call' ef ge vargs m t v m' -> step (State rs m) t (State (nextinstr rs) m') | exec_step_external: forall b ef args res rs m t rs' m', rs PC = Vptr b Int.zero -> Genv.find_funct_ptr ge b = Some (External ef) -> - external_call ef ge args m t res m' -> + external_call' ef ge args m t res m' -> extcall_arguments rs m (ef_sig ef) args -> - rs' = (rs#(loc_external_result (ef_sig ef)) <- res - #PC <- (rs LR)) -> + rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) -> step (State rs m) t (State rs' m'). End RELSEM. @@ -939,21 +945,21 @@ Ltac Equalities := discriminate. discriminate. inv H11. - exploit external_call_determ. eexact H4. eexact H11. intros [A B]. + exploit external_call_determ'. eexact H4. eexact H9. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. inv H12. assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0. - exploit external_call_determ. eexact H5. eexact H13. intros [A B]. + exploit external_call_determ'. eexact H5. eexact H13. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0. - exploit external_call_determ. eexact H3. eexact H8. intros [A B]. + exploit external_call_determ'. eexact H3. eexact H8. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. (* trace length *) red; intros. inv H; simpl. omega. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. + inv H3; eapply external_call_trace_length; eauto. + inv H4; eapply external_call_trace_length; eauto. + inv H2; eapply external_call_trace_length; eauto. (* initial states *) inv H; inv H0. f_equal. congruence. (* final no step *) @@ -973,13 +979,4 @@ Definition data_preg (r: preg) : bool := | _ => true end. -Definition nontemp_preg (r: preg) : bool := - match r with - | IR GPR0 => false | IR GPR11 => false | IR GPR12 => false - | FR FPR0 => false | FR FPR12 => false | FR FPR13 => false - | PC => false | LR => false | CTR => false - | CR0_0 => false | CR0_1 => false | CR0_2 => false | CR0_3 => false - | CARRY => false - | _ => true - end. diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v index 39b84e0..6a1d07e 100644 --- a/powerpc/Asmgen.v +++ b/powerpc/Asmgen.v @@ -136,6 +136,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := Plfd r (Cint ofs) base :: k else loadimm GPR0 ofs (Plfdx r base GPR0 :: k)) + | Tlong => + Error (msg "Asmgen.loadind") end. Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := @@ -152,6 +154,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := Pstfd r (Cint ofs) base :: k else loadimm GPR0 ofs (Pstfdx r base GPR0 :: k)) + | Tlong => + Error (msg "Asmgen.storeind") end. (** Constructor for a floating-point comparison. The PowerPC has @@ -336,8 +340,8 @@ Definition transl_op OK (if symbol_is_small_data s ofs then Paddi r GPR0 (Csymbol_sda s ofs) :: k else - Paddis GPR12 GPR0 (Csymbol_high s ofs) :: - Paddi r GPR12 (Csymbol_low s ofs) :: k) + Paddis r GPR0 (Csymbol_high s ofs) :: + Paddi r r (Csymbol_low s ofs) :: k) | Oaddrstack n, nil => do r <- ireg_of res; OK (addimm r GPR1 n k) | Ocast8signed, a1 :: nil => @@ -428,7 +432,7 @@ Definition transl_op do r1 <- ireg_of a1; do r <- ireg_of res; OK (rolm r r1 amount mask k) | Oroli amount mask, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r2 <- ireg_of a2; do r <- ireg_of res; OK (Prlwimi r r2 amount mask :: k) | Onegf, a1 :: nil => @@ -467,7 +471,7 @@ Definition transl_op (** Translation of memory accesses: loads, and stores. *) Definition int_temp_for (r: mreg) := - if mreg_eq r IT2 then GPR11 else GPR12. + if mreg_eq r R12 then GPR11 else GPR12. Definition transl_memory_access (mk1: constant -> ireg -> instruction) @@ -529,6 +533,8 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing) | Mfloat64 | Mfloat64al32 => do r <- freg_of dst; transl_memory_access (Plfd r) (Plfdx r) addr args GPR12 k + | Mint64 => + Error (msg "Asmgen.transl_load") end. Definition transl_store (chunk: memory_chunk) (addr: addressing) @@ -550,6 +556,8 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing) | Mfloat64 | Mfloat64al32 => do r <- freg_of src; transl_memory_access (Pstfd r) (Pstfdx r) addr args temp k + | Mint64 => + Error (msg "Asmgen.transl_store") end. (** Translation of arguments to annotations *) @@ -574,7 +582,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) loadind GPR11 ofs ty dst k else (do k1 <- loadind GPR11 ofs ty dst k; - loadind GPR1 f.(fn_link_ofs) Tint IT1 k1) + loadind GPR1 f.(fn_link_ofs) Tint R11 k1) | Mop op args res => transl_op op args res k | Mload chunk addr args dst => @@ -598,7 +606,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbs symb :: k) | Mbuiltin ef args res => - OK (Pbuiltin ef (map preg_of args) (preg_of res) :: k) + OK (Pbuiltin ef (map preg_of args) (map preg_of res) :: k) | Mannot ef args => OK (Pannot ef (map transl_annot_param args) :: k) | Mlabel lbl => @@ -624,8 +632,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool := match i with | Msetstack src ofs ty => before - | Mgetparam ofs ty dst => negb (mreg_eq dst IT1) - | Mop Omove args res => before && negb (mreg_eq res IT1) + | Mgetparam ofs ty dst => negb (mreg_eq dst R11) + | Mop Omove args res => before && negb (mreg_eq res R11) | _ => false end. diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index 07e66cf..37c8808 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -487,9 +487,9 @@ Definition measure (s: Mach.state) : nat := | Mach.Returnstate _ _ _ => 1%nat end. -Remark preg_of_not_GPR11: forall r, negb (mreg_eq r IT1) = true -> IR GPR11 <> preg_of r. +Remark preg_of_not_GPR11: forall r, negb (mreg_eq r R11) = true -> IR GPR11 <> preg_of r. Proof. - intros. change (IR GPR11) with (preg_of IT1). red; intros. + intros. change (IR GPR11) with (preg_of R11). red; intros. exploit preg_of_injective; eauto. intros; subst r; discriminate. Qed. @@ -526,7 +526,8 @@ Proof. rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]]. exists rs'; split. eauto. - split. change (undef_setstack rs) with rs. apply agree_exten with rs0; auto with asmgen. + split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. + apply agree_exten with rs0; auto with asmgen. simpl; intros. rewrite Q; auto with asmgen. - (* Mgetparam *) @@ -568,9 +569,11 @@ Opaque loadind. intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. left; eapply exec_straight_steps; eauto; intros. simpl in TR. exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. - exists rs2; split. eauto. split. auto. - simpl. destruct op; try congruence. destruct ep; simpl; try congruence. intros. + exists rs2; split. eauto. split. auto. + destruct op; simpl; try discriminate. intros. + destruct (andb_prop _ _ H1); clear H1. rewrite R; auto. apply preg_of_not_GPR11; auto. + change (destroyed_by_op Omove) with (@nil mreg). simpl; auto. - (* Mload *) assert (eval_addressing tge sp addr rs##args = Some a). @@ -595,7 +598,7 @@ Opaque loadind. left; eapply exec_straight_steps; eauto. intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. exists rs2; split. eauto. - split. eapply agree_exten_temps; eauto. intros; auto with asmgen. + split. eapply agree_undef_regs; eauto with asmgen. simpl; congruence. - (* Mcall *) @@ -741,19 +744,21 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. inv AT. monadInv H3. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H2); intro NOOV. - exploit external_call_mem_extends; eauto. eapply preg_vals; eauto. + exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_builtin. eauto. eauto. eapply find_instr_tail; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. + eauto. econstructor; eauto. - Simpl. rewrite <- H0. simpl. econstructor; eauto. + Simpl. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto. eapply code_tail_next_int; eauto. - apply agree_nextinstr. eapply agree_set_undef_mreg; eauto. - rewrite Pregmap.gss. auto. - intros. Simpl. + apply preg_notin_charact; auto with asmgen. + apply preg_notin_charact; auto with asmgen. + apply agree_nextinstr. eapply agree_set_mregs; auto. + eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. congruence. - (* Mannot *) @@ -761,12 +766,12 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H3); intro NOOV. exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_annot. eauto. eauto. eapply find_instr_tail; eauto. eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. eapply match_states_intro with (ep := false); eauto with coqlib. unfold nextinstr. rewrite Pregmap.gss. @@ -798,11 +803,11 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. destruct (snd (crbit_for_cond cond)). (* Pbt, taken *) econstructor; econstructor; econstructor; split. eexact A. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_exten; eauto with asmgen. simpl. rewrite B. reflexivity. (* Pbf, taken *) econstructor; econstructor; econstructor; split. eexact A. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_exten; eauto with asmgen. simpl. rewrite B. reflexivity. - (* Mcond false *) @@ -815,7 +820,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. destruct (snd (crbit_for_cond cond)). apply exec_straight_one. simpl. rewrite B. reflexivity. auto. apply exec_straight_one. simpl. rewrite B. reflexivity. auto. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_exten; eauto with asmgen. intros; Simpl. simpl. congruence. @@ -835,7 +840,9 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. eapply find_instr_tail; eauto. simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A. econstructor; eauto. - eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simpl. + eapply agree_undef_regs; eauto. +Local Transparent destroyed_by_jumptable. + simpl. intros. rewrite C; auto with asmgen. Simpl. congruence. - (* Mreturn *) @@ -896,6 +903,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. intros [m1' [C D]]. exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto. intros [m2' [F G]]. + simpl Mach.chunk_of_type in F. exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. intros [m3' [P Q]]. (* Execution of function prologue *) @@ -910,7 +918,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. rewrite <- H5 at 2. apply exec_straight_three with rs2 m2' rs3 m2'. unfold exec_instr. rewrite C. fold sp. - rewrite <- (sp_val _ _ _ AG). unfold chunk_of_type in F. rewrite F. auto. + rewrite <- (sp_val _ _ _ AG). rewrite F. auto. simpl. auto. simpl. unfold store1. rewrite gpr_or_zero_not_zero. change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR). simpl. @@ -928,8 +936,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. unfold rs4, rs3, rs2. apply agree_nextinstr. apply agree_set_other; auto. apply agree_set_other; auto. apply agree_nextinstr. apply agree_set_other; auto. - eapply agree_change_sp; eauto. apply agree_exten_temps with rs0; eauto. - unfold sp; congruence. + eapply agree_change_sp; eauto. unfold sp; congruence. congruence. - (* external function *) @@ -937,17 +944,15 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. intros [tf [A B]]. simpl in B. inv B. exploit extcall_arguments_match; eauto. intros [args' [C D]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [res' [m2' [P [Q [R S]]]]]. left; econstructor; split. apply plus_one. eapply exec_step_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. unfold loc_external_result. - eapply agree_set_mreg; eauto. - rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. - intros; Simpl. + apply agree_set_other; auto. apply agree_set_mregs; auto. - (* return *) inv STACKS. simpl in *. @@ -980,10 +985,9 @@ Lemma transf_final_states: forall st1 st2 r, match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r. Proof. - intros. inv H0. inv H. inv STACKS. constructor. - auto. - compute in H1. - generalize (preg_val _ _ _ R3 AG). rewrite H1. intros LD; inv LD. auto. + intros. inv H0. inv H. constructor. auto. + compute in H1. inv H1. + generalize (preg_val _ _ _ R3 AG). rewrite H2. intros LD; inv LD. auto. Qed. Theorem transf_program_correct: diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index 0e6d3e1..cd961c9 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -546,7 +546,8 @@ Ltac ArgsInv := | [ H: Error _ = OK _ |- _ ] => discriminate | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args | [ H: bind _ _ = OK _ |- _ ] => monadInv H - | [ H: assertion _ = OK _ |- _ ] => monadInv H + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv end); subst; repeat (match goal with @@ -810,9 +811,7 @@ Lemma transl_op_correct_aux: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of res) = v - /\ forall r, - match op with Omove => data_preg r = true | _ => nontemp_preg r = true end -> - r <> preg_of res -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. Proof. Opaque Int.eq. intros. unfold transl_op in H; destruct op; ArgsInv; simpl in H0; try (inv H0); try TranslOpSimpl. @@ -839,7 +838,7 @@ Opaque Val.add. rewrite gpr_or_zero_not_zero; eauto with asmgen. Simpl. rewrite (Val.add_commut Vzero). rewrite high_half_zero. rewrite Val.add_commut. rewrite low_high_half. auto. - intros; Simpl. + intros; Simpl. (* Oaddrstack *) destruct (addimm_correct x GPR1 i k rs m) as [rs' [EX [RES OTH]]]; eauto with asmgen. exists rs'; auto with asmgen. @@ -879,12 +878,11 @@ Opaque Val.add. intros. rewrite D; auto with asmgen. unfold rs1; Simpl. (* Oandimm *) destruct (andimm_correct x0 x i k rs m) as [rs' [A [B C]]]; eauto with asmgen. - exists rs'; auto with asmgen. (* Oorimm *) - destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]]. + destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]]. exists rs'; auto with asmgen. (* Oxorimm *) - destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]]. + destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]]. exists rs'; auto with asmgen. (* Onor *) replace (Val.notint (rs x)) @@ -898,7 +896,6 @@ Opaque Val.add. intros; Simpl. (* Orolm *) destruct (rolm_correct x0 x i i0 k rs m) as [rs' [A [B C]]]; eauto with asmgen. - exists rs'; auto with asmgen. (* Ointoffloat *) replace v with (Val.maketotal (Val.intoffloat (rs x))). TranslOpSimpl. @@ -916,19 +913,17 @@ Lemma transl_op_correct: Mem.extends m m' -> exists rs', exec_straight ge fn c rs m' k rs' m' - /\ agree (Regmap.set res v (undef_op op ms)) sp rs' - /\ forall r, op = Omove -> data_preg r = true -> r <> preg_of res -> rs'#r = rs#r. + /\ agree (Regmap.set res v (Mach.undef_regs (destroyed_by_op op) ms)) sp rs' + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. Proof. intros. exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eauto. intros [v' [A B]]. rewrite (sp_val _ _ _ H0) in A. exploit transl_op_correct_aux; eauto. intros [rs' [P [Q R]]]. rewrite <- Q in B. - exists rs'; split. eexact P. - split. unfold undef_op. destruct op; - (apply agree_set_undef_mreg with rs || apply agree_set_mreg with rs); + exists rs'; split. eexact P. + split. apply agree_set_undef_mreg with rs; auto. auto. - intros. subst op. auto. Qed. (** Translation of memory accesses *) @@ -1100,17 +1095,18 @@ Lemma transl_store_correct: Mem.storev chunk m a (rs (preg_of src)) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r. + /\ forall r, r <> PC -> preg_notin r (destroyed_by_store chunk addr) -> rs' r = rs r. Proof. +Local Transparent destroyed_by_store. intros. assert (TEMP0: int_temp_for src = GPR11 \/ int_temp_for src = GPR12). - unfold int_temp_for. destruct (mreg_eq src IT2); auto. + unfold int_temp_for. destruct (mreg_eq src R12); auto. assert (TEMP1: int_temp_for src <> GPR0). destruct TEMP0; congruence. assert (TEMP2: IR (int_temp_for src) <> preg_of src). - unfold int_temp_for. destruct (mreg_eq src IT2). + unfold int_temp_for. destruct (mreg_eq src R12). subst src; simpl; congruence. - change (IR GPR12) with (preg_of IT2). red; intros; elim n. + change (IR GPR12) with (preg_of R12). red; intros; elim n. eapply preg_of_injective; eauto. assert (BASE: forall mk1 mk2 chunk', transl_memory_access mk1 mk2 addr args (int_temp_for src) k = OK c -> @@ -1123,15 +1119,15 @@ Proof. store2 chunk' (preg_of src) r1 r2 rs1 m) -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r). + /\ forall r, r <> PC -> r <> GPR11 /\ r <> GPR12 -> rs' r = rs r). { intros. eapply transl_memory_access_correct; eauto. intros. econstructor; split. apply exec_straight_one. rewrite H4. unfold store1. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto. - intros; Simpl. apply H7; auto. destruct TEMP0; congruence. + intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence. intros. econstructor; split. apply exec_straight_one. rewrite H5. unfold store2. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto. - intros; Simpl. apply H7; auto. destruct TEMP0; congruence. + intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence. } destruct chunk; monadInv H. - (* Mint8signed *) @@ -1153,10 +1149,11 @@ Proof. eapply transl_memory_access_correct. eauto. eauto. eauto. intros. econstructor; split. apply exec_straight_one. simpl. unfold store1. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. - intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. +Local Transparent destroyed_by_store. + simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. intros. econstructor; split. apply exec_straight_one. simpl. unfold store2. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. - intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. + simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. - (* Mfloat64 *) apply Mem.storev_float64al32 in H1. eapply BASE; eauto; erewrite freg_of_eq by eauto; auto. - (* Mfloat64al32 *) diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp index 9131a46..e7e4095 100644 --- a/powerpc/ConstpropOp.vp +++ b/powerpc/ConstpropOp.vp @@ -31,6 +31,7 @@ Inductive approx : Type := no compile-time information is available. *) | I: int -> approx (** A known integer value. *) | F: float -> approx (** A known floating-point value. *) + | L: int64 -> approx (** A know 64-bit integer value. *) | G: ident -> int -> approx (** The value is the address of the given global symbol plus the given integer offset. *) @@ -125,6 +126,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) := | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1) | Ointoffloat, F n1 :: nil => eval_static_intoffloat n1 | Ofloatofwords, I n1 :: I n2 :: nil => if propagate_float_constants tt then F(Float.from_words n1 n2) else Unknown + | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2) + | Olowlong, L n :: nil => I(Int64.loword n) + | Ohighlong, L n :: nil => I(Int64.hiword n) | Ocmp c, vl => eval_static_condition_val c vl | _, _ => Unknown end. diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v index 84e1e5b..9d833bf 100644 --- a/powerpc/ConstpropOpproof.v +++ b/powerpc/ConstpropOpproof.v @@ -45,6 +45,7 @@ Definition val_match_approx (a: approx) (v: val) : Prop := | Unknown => True | I p => v = Vint p | F p => v = Vfloat p + | L p => v = Vlong p | G symb ofs => v = symbol_address ge symb ofs | S ofs => v = Val.add sp (Vint ofs) | _ => False @@ -65,6 +66,8 @@ Ltac SimplVMA := simpl in H; (try subst v); SimplVMA | H: (val_match_approx (F _) ?v) |- _ => simpl in H; (try subst v); SimplVMA + | H: (val_match_approx (L _) ?v) |- _ => + simpl in H; (try subst v); SimplVMA | H: (val_match_approx (G _ _) ?v) |- _ => simpl in H; (try subst v); SimplVMA | H: (val_match_approx (S _) ?v) |- _ => diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v index 632a55d..ce66e6a 100644 --- a/powerpc/Machregs.v +++ b/powerpc/Machregs.v @@ -13,6 +13,7 @@ Require Import Coqlib. Require Import Maps. Require Import AST. +Require Import Op. (** ** Machine registers *) @@ -21,59 +22,55 @@ Require Import AST. - Integer registers that can be allocated to RTL pseudo-registers ([Rxx]). - Floating-point registers that can be allocated to RTL pseudo-registers ([Fxx]). -- Two integer registers, not allocatable, reserved as temporaries for - spilling and reloading ([IT1, IT2]). -- Two float registers, not allocatable, reserved as temporaries for - spilling and reloading ([FT1, FT2]). The type [mreg] does not include special-purpose or reserved - machine registers such as the stack pointer and the condition codes. *) + machine registers such as the stack pointer (GPR1), the small data area + pointers (GPR2, GPR13), and the condition codes. *) Inductive mreg: Type := (** Allocatable integer regs *) | R3: mreg | R4: mreg | R5: mreg | R6: mreg | R7: mreg | R8: mreg | R9: mreg | R10: mreg + | R11: mreg | R12: mreg | R14: mreg | R15: mreg | R16: mreg | R17: mreg | R18: mreg | R19: mreg | R20: mreg | R21: mreg | R22: mreg | R23: mreg | R24: mreg | R25: mreg | R26: mreg | R27: mreg | R28: mreg | R29: mreg | R30: mreg | R31: mreg (** Allocatable float regs *) + | F0: mreg | F1: mreg | F2: mreg | F3: mreg | F4: mreg | F5: mreg | F6: mreg | F7: mreg | F8: mreg - | F9: mreg | F10: mreg | F11: mreg - | F14: mreg | F15: mreg + | F9: mreg | F10: mreg | F11: mreg | F12: mreg + | F13: mreg | F14: mreg | F15: mreg | F16: mreg | F17: mreg | F18: mreg | F19: mreg | F20: mreg | F21: mreg | F22: mreg | F23: mreg | F24: mreg | F25: mreg | F26: mreg | F27: mreg - | F28: mreg | F29: mreg | F30: mreg | F31: mreg - (** Integer temporaries *) - | IT1: mreg (* R11 *) | IT2: mreg (* R12 *) - (** Float temporaries *) - | FT1: mreg (* F0 *) | FT2: mreg (* F12 *) | FT3: mreg (* F13 *). + | F28: mreg | F29: mreg | F30: mreg | F31: mreg. Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}. Proof. decide equality. Defined. +Global Opaque mreg_eq. Definition mreg_type (r: mreg): typ := match r with | R3 => Tint | R4 => Tint | R5 => Tint | R6 => Tint | R7 => Tint | R8 => Tint | R9 => Tint | R10 => Tint + | R11 => Tint | R12 => Tint | R14 => Tint | R15 => Tint | R16 => Tint | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint | R29 => Tint | R30 => Tint | R31 => Tint + | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat | F4 => Tfloat | F5 => Tfloat | F6 => Tfloat | F7 => Tfloat | F8 => Tfloat - | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat - | F14 => Tfloat | F15 => Tfloat + | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat | F12 => Tfloat + | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat | F16 => Tfloat | F17 => Tfloat | F18 => Tfloat | F19 => Tfloat | F20 => Tfloat | F21 => Tfloat | F22 => Tfloat | F23 => Tfloat | F24 => Tfloat | F25 => Tfloat | F26 => Tfloat | F27 => Tfloat | F28 => Tfloat | F29 => Tfloat | F30 => Tfloat | F31 => Tfloat - | IT1 => Tint | IT2 => Tint - | FT1 => Tfloat | FT2 => Tfloat | FT3 => Tfloat end. Open Scope positive_scope. @@ -85,21 +82,21 @@ Module IndexedMreg <: INDEXED_TYPE. match r with | R3 => 1 | R4 => 2 | R5 => 3 | R6 => 4 | R7 => 5 | R8 => 6 | R9 => 7 | R10 => 8 - | R14 => 9 | R15 => 10 | R16 => 11 - | R17 => 12 | R18 => 13 | R19 => 14 | R20 => 15 - | R21 => 16 | R22 => 17 | R23 => 18 | R24 => 19 - | R25 => 20 | R26 => 21 | R27 => 22 | R28 => 23 - | R29 => 24 | R30 => 25 | R31 => 26 - | F1 => 28 | F2 => 29 | F3 => 30 | F4 => 31 - | F5 => 32 | F6 => 33 | F7 => 34 | F8 => 35 - | F9 => 36 | F10 => 37 | F11 => 38 - | F14 => 39 | F15 => 40 - | F16 => 41 | F17 => 42 | F18 => 43 | F19 => 44 - | F20 => 45 | F21 => 46 | F22 => 47 | F23 => 48 - | F24 => 49 | F25 => 50 | F26 => 51 | F27 => 52 - | F28 => 53 | F29 => 54 | F30 => 55 | F31 => 56 - | IT1 => 57 | IT2 => 58 - | FT1 => 59 | FT2 => 60 | FT3 => 61 + | R11 => 9 | R12 => 10 + | R14 => 11 | R15 => 12 | R16 => 13 + | R17 => 14 | R18 => 15 | R19 => 16 | R20 => 17 + | R21 => 18 | R22 => 19 | R23 => 20 | R24 => 21 + | R25 => 22 | R26 => 23 | R27 => 24 | R28 => 25 + | R29 => 26 | R30 => 27 | R31 => 28 + | F0 => 29 + | F1 => 30 | F2 => 31 | F3 => 32 | F4 => 33 + | F5 => 34 | F6 => 35 | F7 => 36 | F8 => 37 + | F9 => 38 | F10 => 39 | F11 => 40 | F12 => 41 + | F13 => 42 | F14 => 43 | F15 => 44 + | F16 => 45 | F17 => 46 | F18 => 47 | F19 => 48 + | F20 => 49 | F21 => 50 | F22 => 51 | F23 => 52 + | F24 => 53 | F25 => 54 | F26 => 55 | F27 => 56 + | F28 => 57 | F29 => 58 | F30 => 59 | F31 => 60 end. Lemma index_inj: forall r1 r2, index r1 = index r2 -> r1 = r2. @@ -108,3 +105,68 @@ Module IndexedMreg <: INDEXED_TYPE. Qed. End IndexedMreg. +(** ** Destroyed registers, preferred registers *) + +Definition destroyed_by_op (op: operation): list mreg := + match op with + | Ofloatconst _ => R12 :: nil + | Ointoffloat => F13 :: nil + | _ => nil + end. + +Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := + R12 :: nil. + +Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := + match chunk with + | Mfloat32 => R11 :: R12 :: F13 :: nil + | _ => R11 :: R12 :: nil + end. + +Definition destroyed_by_cond (cond: condition): list mreg := + nil. + +Definition destroyed_by_jumptable: list mreg := + R12 :: nil. + +Definition destroyed_by_builtin (ef: external_function): list mreg := + match ef with + | EF_builtin _ _ => F13 :: nil + | EF_vload _ => nil + | EF_vstore Mfloat32 => F13 :: nil + | EF_vstore _ => nil + | EF_vload_global _ _ _ => R11 :: nil + | EF_vstore_global Mint64 _ _ => R10 :: R11 :: R12 :: nil + | EF_vstore_global Mfloat32 _ _ => R11 :: R12 :: F13 :: nil + | EF_vstore_global _ _ _ => R11 :: R12 :: nil + | EF_memcpy _ _ => R11 :: R12 :: F13 :: nil + | _ => nil + end. + +Definition destroyed_at_function_entry: list mreg := + nil. + +Definition temp_for_parent_frame: mreg := + R11. + +Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := + (nil, None). + +Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) := + (nil, nil). + +Global Opaque + destroyed_by_op destroyed_by_load destroyed_by_store + destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin + destroyed_at_function_entry temp_for_parent_frame + mregs_for_operation mregs_for_builtin. + +(** Two-address operations. Return [true] if the first argument and + the result must be in the same location *and* are unconstrained + by [mregs_for_operation]. There is only one: rotate-mask-insert. *) + +Definition two_address_op (op: operation) : bool := + match op with + | Oroli _ _ => true + | _ => false + end. diff --git a/powerpc/Machregsaux.ml b/powerpc/Machregsaux.ml index 713e14d..b8d7c97 100644 --- a/powerpc/Machregsaux.ml +++ b/powerpc/Machregsaux.ml @@ -17,21 +17,20 @@ open Machregs let register_names = [ ("R3", R3); ("R4", R4); ("R5", R5); ("R6", R6); ("R7", R7); ("R8", R8); ("R9", R9); ("R10", R10); + ("R11", R11); ("R12", R12); ("R14", R14); ("R15", R15); ("R16", R16); ("R17", R17); ("R18", R18); ("R19", R19); ("R20", R20); ("R21", R21); ("R22", R22); ("R23", R23); ("R24", R24); ("R25", R25); ("R26", R26); ("R27", R27); ("R28", R28); ("R29", R29); ("R30", R30); ("R31", R31); - ("F1", F1); ("F2", F2); ("F3", F3); ("F4", F4); + ("F0", F0); ("F1", F1); ("F2", F2); ("F3", F3); ("F4", F4); ("F5", F5); ("F6", F6); ("F7", F7); ("F8", F8); - ("F9", F9); ("F10", F10); ("F11", F11); - ("F14", F14); ("F15", F15); + ("F9", F9); ("F10", F10); ("F11", F11); ("F12", F12); + ("F13", F13); ("F14", F14); ("F15", F15); ("F16", F16); ("F17", F17); ("F18", F18); ("F19", F19); ("F20", F20); ("F21", F21); ("F22", F22); ("F23", F23); ("F24", F24); ("F25", F25); ("F26", F26); ("F27", F27); - ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31); - ("R11", IT1); ("R12", IT2); - ("F0", FT1); ("F12", FT2); ("F13", FT3) + ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31) ] let name_of_register r = diff --git a/powerpc/Op.v b/powerpc/Op.v index 110796b..e584726 100644 --- a/powerpc/Op.v +++ b/powerpc/Op.v @@ -97,6 +97,10 @@ Inductive operation : Type := (*c Conversions between int and float: *) | Ointoffloat: operation (**r [rd = signed_int_of_float(r1)] *) | Ofloatofwords: operation (**r [rd = float_of_words(r1,r2)] *) +(*c Manipulating 64-bit integers: *) + | Omakelong: operation (**r [rd = r1 << 32 | r2] *) + | Olowlong: operation (**r [rd = low-word(r1)] *) + | Ohighlong: operation (**r [rd = high-word(r1)] *) (*c Boolean tests: *) | Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) @@ -205,6 +209,9 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1) | Ointoffloat, v1::nil => Val.intoffloat v1 | Ofloatofwords, v1::v2::nil => Some(Val.floatofwords v1 v2) + | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2) + | Olowlong, v1::nil => Some(Val.loword v1) + | Ohighlong, v1::nil => Some(Val.hiword v1) | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m)) | _, _ => None end. @@ -225,7 +232,7 @@ Ltac FuncInv := match goal with | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ => destruct x; simpl in H; try discriminate; FuncInv - | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ => + | H: (match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ => destruct v; simpl in H; try discriminate; FuncInv | H: (Some _ = Some _) |- _ => injection H; intros; clear H; FuncInv @@ -292,6 +299,9 @@ Definition type_of_operation (op: operation) : list typ * typ := | Osingleoffloat => (Tfloat :: nil, Tfloat) | Ointoffloat => (Tfloat :: nil, Tint) | Ofloatofwords => (Tint :: Tint :: nil, Tfloat) + | Omakelong => (Tint :: Tint :: nil, Tlong) + | Olowlong => (Tlong :: nil, Tint) + | Ohighlong => (Tlong :: nil, Tint) | Ocmp c => (type_of_condition c, Tint) end. @@ -366,6 +376,9 @@ Proof with (try exact I). destruct v0... destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2... destruct v0; destruct v1... + destruct v0; destruct v1... + destruct v0... + destruct v0... destruct (eval_condition c vl m); simpl... destruct b... Qed. @@ -481,6 +494,33 @@ Proof. rewrite Val.add_assoc. simpl. auto. Qed. +(** Offset an addressing mode [addr] by a quantity [delta], so that + it designates the pointer [delta] bytes past the pointer designated + by [addr]. May be undefined, in which case [None] is returned. *) + +Definition offset_addressing (addr: addressing) (delta: int) : option addressing := + match addr with + | Aindexed n => Some(Aindexed (Int.add n delta)) + | Aindexed2 => None + | Aglobal s n => Some(Aglobal s (Int.add n delta)) + | Abased s n => Some(Abased s (Int.add n delta)) + | Ainstack n => Some(Ainstack (Int.add n delta)) + end. + +Lemma eval_offset_addressing: + forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v, + offset_addressing addr delta = Some addr' -> + eval_addressing ge sp addr args = Some v -> + eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)). +Proof. + intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst. + rewrite Val.add_assoc; auto. + unfold symbol_address. destruct (Genv.find_symbol ge i); auto. + unfold symbol_address. destruct (Genv.find_symbol ge i); auto. + rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. + rewrite Val.add_assoc. auto. +Qed. + (** Transformation of addressing modes with two operands or more into an equivalent arithmetic operation. This is used in the [Reload] pass when a store instruction cannot be reloaded directly because @@ -510,14 +550,6 @@ Proof. intros. destruct addr; simpl in H; reflexivity || omegaContradiction. Qed. -(** Two-address operations. There is only one: rotate-mask-insert. *) - -Definition two_address_op (op: operation) : bool := - match op with - | Oroli _ _ => true - | _ => false - end. - (** Operations that are so cheap to recompute that CSE should not factor them out. *) Definition is_trivial_op (op: operation) : bool := @@ -767,7 +799,10 @@ Proof. inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2. exists (Vint i); auto. inv H4; inv H2; simpl; auto. - subst v1. destruct (eval_condition c vl1 m1) eqn:?. + inv H4; inv H2; simpl; auto. + inv H4; simpl; auto. + inv H4; simpl; auto. + subst. destruct (eval_condition c vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. destruct b; simpl; constructor. simpl; constructor. diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml index 2d69be1..319e12c 100644 --- a/powerpc/PrintAsm.ml +++ b/powerpc/PrintAsm.ml @@ -223,8 +223,8 @@ let rolm_mask n = - annotation statements: take their arguments in registers or stack locations; generate no code; - inlined by the compiler: take their arguments in arbitrary - registers; preserve all registers except the temporaries - (GPR0, GPR11, GPR12, FPR0, FPR12, FPR13); + registers; preserve all registers except the reserved temporaries + (GPR0, GPR11, GPR12, FPR13); - inlined while printing asm code; take their arguments in locations dictated by the calling conventions; preserve callee-save regs only. *) @@ -239,11 +239,12 @@ let print_annot_val oc txt args res = fprintf oc "%s annotation: " comment; PrintAnnot.print_annot_val preg oc txt args; match args, res with - | IR src :: _, IR dst -> + | [IR src], [IR dst] -> if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src - | FR src :: _, FR dst -> + | [FR src], [FR dst] -> if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src - | _, _ -> assert false + | _, _ -> + assert false (* Handling of memcpy *) @@ -256,8 +257,8 @@ let print_annot_val oc txt args res = let print_builtin_memcpy_small oc sz al src dst = let rec copy ofs sz = if sz >= 8 && al >= 4 then begin - fprintf oc " lfd %a, %d(%a)\n" freg FPR0 ofs ireg src; - fprintf oc " stfd %a, %d(%a)\n" freg FPR0 ofs ireg dst; + fprintf oc " lfd %a, %d(%a)\n" freg FPR13 ofs ireg src; + fprintf oc " stfd %a, %d(%a)\n" freg FPR13 ofs ireg dst; copy (ofs + 8) (sz - 8) end else if sz >= 4 then begin fprintf oc " lwz %a, %d(%a)\n" ireg GPR0 ofs ireg src; @@ -326,14 +327,23 @@ let print_builtin_vload_common oc chunk base offset res = fprintf oc " lfs %a, %a(%a)\n" freg res constant offset ireg base | (Mfloat64 | Mfloat64al32), FR res -> fprintf oc " lfd %a, %a(%a)\n" freg res constant offset ireg base + (* Mint64 is special-cased below *) | _ -> assert false let print_builtin_vload oc chunk args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - begin match args with - | [IR addr] -> + begin match args, res with + | [IR addr], [res] when chunk <> Mint64 -> print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res + | [IR addr], [IR res1; IR res2] when chunk = Mint64 -> + if addr <> res1 then begin + fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr; + fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr + end else begin + fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr; + fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr + end | _ -> assert false end; @@ -341,9 +351,24 @@ let print_builtin_vload oc chunk args res = let print_builtin_vload_global oc chunk id ofs args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - fprintf oc " addis %a, %a, %a\n" - ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res; + begin match res with + | [res] when chunk <> Mint64 -> + fprintf oc " addis %a, %a, %a\n" + ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res + | [IR res1; IR res2] when chunk = Mint64 -> + fprintf oc " addis %a, %a, %a\n" + ireg res1 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " lwz %a, %a(%a)\n" + ireg res1 constant (Csymbol_low(id, ofs)) ireg res1; + let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in + fprintf oc " addis %a, %a, %a\n" + ireg res2 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " lwz %a, %a(%a)\n" + ireg res2 constant (Csymbol_low(id, ofs)) ireg res2 + | _ -> + assert false + end; fprintf oc "%s end builtin __builtin_volatile_read\n" comment let print_builtin_vstore_common oc chunk base offset src = @@ -359,14 +384,18 @@ let print_builtin_vstore_common oc chunk base offset src = fprintf oc " stfs %a, %a(%a)\n" freg FPR13 constant offset ireg base | (Mfloat64 | Mfloat64al32), FR src -> fprintf oc " stfd %a, %a(%a)\n" freg src constant offset ireg base + (* Mint64 is special-cased below *) | _ -> assert false let print_builtin_vstore oc chunk args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with - | [IR addr; src] -> + | [IR addr; src] when chunk <> Mint64 -> print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src + | [IR addr; IR src1; IR src2] when chunk = Mint64 -> + fprintf oc " stw %a, 0(%a)\n" ireg src1 ireg addr; + fprintf oc " stw %a, 4(%a)\n" ireg src2 ireg addr | _ -> assert false end; @@ -375,11 +404,24 @@ let print_builtin_vstore oc chunk args = let print_builtin_vstore_global oc chunk id ofs args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with - | [src] -> + | [src] when chunk <> Mint64 -> let tmp = if src = IR GPR11 then GPR12 else GPR11 in fprintf oc " addis %a, %a, %a\n" ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src + | [IR src1; IR src2] when chunk = Mint64 -> + let tmp = + if not (List.mem GPR12 [src1; src2]) then GPR12 else + if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in + fprintf oc " addis %a, %a, %a\n" + ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " stw %a, %a(%a)\n" + ireg src1 constant (Csymbol_low(id, ofs)) ireg tmp; + let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in + fprintf oc " addis %a, %a, %a\n" + ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " stw %a, %a(%a)\n" + ireg src2 constant (Csymbol_low(id, ofs)) ireg tmp | _ -> assert false end; @@ -389,47 +431,84 @@ let print_builtin_vstore_global oc chunk id ofs args = let print_builtin_inline oc name args res = fprintf oc "%s begin builtin %s\n" comment name; - (* Can use as temporaries: GPR0, GPR11, GPR12, FPR0, FPR12, FPR13 *) + (* Can use as temporaries: GPR0, FPR13 *) begin match name, args, res with (* Integer arithmetic *) - | "__builtin_mulhw", [IR a1; IR a2], IR res -> + | "__builtin_mulhw", [IR a1; IR a2], [IR res] -> fprintf oc " mulhw %a, %a, %a\n" ireg res ireg a1 ireg a2 - | "__builtin_mulhwu", [IR a1; IR a2], IR res -> + | "__builtin_mulhwu", [IR a1; IR a2], [IR res] -> fprintf oc " mulhwu %a, %a, %a\n" ireg res ireg a1 ireg a2 - | "__builtin_cntlz", [IR a1], IR res -> + | "__builtin_cntlz", [IR a1], [IR res] -> fprintf oc " cntlzw %a, %a\n" ireg res ireg a1 - | "__builtin_bswap", [IR a1], IR res -> + | "__builtin_bswap", [IR a1], [IR res] -> fprintf oc " stwu %a, -8(%a)\n" ireg a1 ireg GPR1; fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1 (* Float arithmetic *) - | "__builtin_fmadd", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fmsub", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fnmadd", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fnmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fnmsub", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fnmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fabs", [FR a1], FR res -> + | "__builtin_fabs", [FR a1], [FR res] -> fprintf oc " fabs %a, %a\n" freg res freg a1 - | "__builtin_fsqrt", [FR a1], FR res -> + | "__builtin_fsqrt", [FR a1], [FR res] -> fprintf oc " fsqrt %a, %a\n" freg res freg a1 - | "__builtin_frsqrte", [FR a1], FR res -> + | "__builtin_frsqrte", [FR a1], [FR res] -> fprintf oc " frsqrte %a, %a\n" freg res freg a1 - | "__builtin_fres", [FR a1], FR res -> + | "__builtin_fres", [FR a1], [FR res] -> fprintf oc " fres %a, %a\n" freg res freg a1 - | "__builtin_fsel", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fsel %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fcti", [FR a1], IR res -> + | "__builtin_fcti", [FR a1], [IR res] -> fprintf oc " fctiw %a, %a\n" freg FPR13 freg a1; fprintf oc " stfdu %a, -8(%a)\n" freg FPR13 ireg GPR1; fprintf oc " lwz %a, 4(%a)\n" ireg res ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1 + (* 64-bit integer arithmetic *) + | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> + if rl = ah then begin + fprintf oc " subfic %a, %a, 0\n" ireg GPR0 ireg al; + fprintf oc " subfze %a, %a\n" ireg rh ireg ah; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " subfic %a, %a, 0\n" ireg rl ireg al; + fprintf oc " subfze %a, %a\n" ireg rh ireg ah + end + | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + if rl = ah || rl = bh then begin + fprintf oc " addc %a, %a, %a\n" ireg GPR0 ireg al ireg bl; + fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " addc %a, %a, %a\n" ireg rl ireg al ireg bl; + fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh + end + | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + if rl = ah || rl = bh then begin + fprintf oc " subfc %a, %a, %a\n" ireg GPR0 ireg bl ireg al; + fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " subfc %a, %a, %a\n" ireg rl ireg bl ireg al; + fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah + end + | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> + if rl = a || rl = b then begin + fprintf oc " mullw %a, %a, %a\n" ireg GPR0 ireg a ireg b; + fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " mullw %a, %a, %a\n" ireg rl ireg a ireg b; + fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b + end (* Memory accesses *) - | "__builtin_read16_reversed", [IR a1], IR res -> + | "__builtin_read16_reversed", [IR a1], [IR res] -> fprintf oc " lhbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 - | "__builtin_read32_reversed", [IR a1], IR res -> + | "__builtin_read32_reversed", [IR a1], [IR res] -> fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 | "__builtin_write16_reversed", [IR a1; IR a2], _ -> fprintf oc " sthbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 @@ -484,9 +563,9 @@ let print_instruction oc tbl pc fallthrough = function if adj >= -0x8000l then fprintf oc " stwu %a, %ld(%a)\n" ireg GPR1 adj ireg GPR1 else begin - fprintf oc " addis %a, 0, %ld\n" ireg GPR12 (Int32.shift_right_logical adj 16); - fprintf oc " ori %a, %a, %ld\n" ireg GPR12 ireg GPR12 (Int32.logand adj 0xFFFFl); - fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR12 + fprintf oc " addis %a, 0, %ld\n" ireg GPR0 (Int32.shift_right_logical adj 16); + fprintf oc " ori %a, %a, %ld\n" ireg GPR0 ireg GPR0 (Int32.logand adj 0xFFFFl); + fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR0 end | Pand_(r1, r2, r3) -> fprintf oc " and. %a, %a, %a\n" ireg r1 ireg r2 ireg r3 @@ -860,7 +939,7 @@ let print_function oc name code = (* Generation of stub functions *) -let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$" +let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$" (* Stubs for EABI *) @@ -908,6 +987,11 @@ let print_init oc = function fprintf oc " .short %ld\n" (camlint_of_coqint n) | Init_int32 n -> fprintf oc " .long %ld\n" (camlint_of_coqint n) + | Init_int64 n -> + let b = camlint64_of_coqint n in + fprintf oc " .long 0x%Lx, 0x%Lx\n" + (Int64.shift_right_logical b 32) + (Int64.logand b 0xFFFFFFFFL) | Init_float32 n -> fprintf oc " .long 0x%lx %s %.18g\n" (camlint_of_coqint (Floats.Float.bits_of_single n)) diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml index a2f9ff8..cac8cd2 100644 --- a/powerpc/PrintOp.ml +++ b/powerpc/PrintOp.ml @@ -96,6 +96,9 @@ let print_operation reg pp = function | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1 | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1 | Ofloatofwords, [r1;r2] -> fprintf pp "floatofwords(%a,%a)" reg r1 reg r2 + | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2 + | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1 + | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1 | Ocmp c, args -> print_condition reg pp (c, args) | _ -> fprintf pp "<bad operator>" diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp index 905a448..a011847 100644 --- a/powerpc/SelectOp.vp +++ b/powerpc/SelectOp.vp @@ -200,6 +200,8 @@ Nondetfunction mul (e1: expr) (e2: expr) := (** ** Bitwise and, or, xor *) Nondetfunction andimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else + if Int.eq n1 Int.mone then e2 else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil @@ -240,6 +242,8 @@ Definition same_expr_pure (e1 e2: expr) := end. Nondetfunction orimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then e2 else + if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil) @@ -268,6 +272,7 @@ Nondetfunction or (e1: expr) (e2: expr) := end. Nondetfunction xorimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then e2 else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil | Eop (Oxorimm n2) (t2:::Enil) => Eop (Oxorimm (Int.xor n1 n2)) (t2:::Enil) @@ -444,12 +449,18 @@ Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil). (** ** Recognition of addressing modes for load and store operations *) +Definition can_use_Aindexed2 (chunk: memory_chunk): bool := + match chunk with Mint64 => false | _ => true end. + Nondetfunction addressing (chunk: memory_chunk) (e: expr) := match e with | Eop (Oaddrsymbol s n) Enil => (Aglobal s n, Enil) | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) | Eop Oadd (Eop (Oaddrsymbol s n) Enil ::: e2 ::: Enil) => (Abased s n, e2:::Enil) | Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil) - | Eop Oadd (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil) + | Eop Oadd (e1:::e2:::Enil) => + if can_use_Aindexed2 chunk + then (Aindexed2, e1:::e2:::Enil) + else (Aindexed Int.zero, Eop Oadd (e1:::e2:::Enil) ::: Enil) | _ => (Aindexed Int.zero, e:::Enil) end. diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v index 7be8858..f751314 100644 --- a/powerpc/SelectOpproof.v +++ b/powerpc/SelectOpproof.v @@ -27,13 +27,6 @@ Require Import SelectOp. Open Local Scope cminorsel_scope. -Section CMCONSTR. - -Variable ge: genv. -Variable sp: val. -Variable e: env. -Variable m: mem. - (** * Useful lemmas and tactics *) (** The following are trivial lemmas and custom tactics that help @@ -81,6 +74,13 @@ Ltac TrivialExists := (** * Correctness of the smart constructors *) +Section CMCONSTR. + +Variable ge: genv. +Variable sp: val. +Variable e: env. +Variable m: mem. + (** We now show that the code generated by "smart constructor" functions such as [SelectOp.notint] behaves as expected. Continuing the [notint] example, we show that if the expression [e] @@ -163,7 +163,7 @@ Proof. rewrite Int.add_commut. auto. unfold symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. rewrite Int.add_commut; auto. rewrite Val.add_assoc. rewrite Int.add_commut. auto. - subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto. + subst. rewrite Val.add_assoc. rewrite Int.add_commut. auto. Qed. Theorem eval_add: binary_constructor_sound add Val.add. @@ -327,7 +327,15 @@ Qed. Theorem eval_andimm: forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)). Proof. - intros; red; intros until x. unfold andimm. case (andimm_match a); intros. + intros; red; intros until x. unfold andimm. + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists (Vint Int.zero); split. EvalOp. + destruct x; simpl; auto. rewrite Int.and_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone. + intros. subst. exists x; split. auto. + destruct x; simpl; auto. rewrite Int.and_mone; auto. + clear H H0. + case (andimm_match a); intros. InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto. set (n' := Int.and n n2). destruct (Int.eq (Int.shru (Int.shl n' amount) amount) n' && @@ -376,8 +384,12 @@ Qed. Theorem eval_orimm: forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)). Proof. - intros; red; intros until x. - unfold orimm. destruct (orimm_match a); intros; InvEval. + intros; red; intros until x. unfold orimm. + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.or_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone. + intros. subst. exists (Vint Int.mone); split. EvalOp. destruct x; simpl; auto. rewrite Int.or_mone; auto. + clear H H0. destruct (orimm_match a); intros; InvEval. TrivialExists. simpl. rewrite Int.or_commut; auto. subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists. TrivialExists. @@ -433,8 +445,10 @@ Qed. Theorem eval_xorimm: forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)). Proof. - intros; red; intros until x. - unfold xorimm. destruct (xorimm_match a); intros; InvEval. + intros; red; intros until x. unfold xorimm. + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.xor_zero; auto. + clear H. destruct (xorimm_match a); intros; InvEval. TrivialExists. simpl. rewrite Int.xor_commut; auto. subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. TrivialExists. TrivialExists. @@ -445,9 +459,9 @@ Proof. red; intros until y; unfold xor; case (xor_match a b); intros; InvEval. rewrite Val.xor_commut. apply eval_xorimm; auto. apply eval_xorimm; auto. - subst x. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc. + subst. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. rewrite Val.xor_commut. TrivialExists. - subst y. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists. + subst. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists. TrivialExists. Qed. @@ -843,7 +857,11 @@ Proof. exists (@nil val). split. eauto with evalexpr. simpl. auto. exists (v0 :: nil). split. eauto with evalexpr. simpl. congruence. exists (v1 :: nil). split. eauto with evalexpr. simpl. congruence. + destruct (can_use_Aindexed2 chunk). exists (v1 :: v0 :: nil). split. eauto with evalexpr. simpl. congruence. + exists (Vptr b ofs :: nil). split. + constructor. EvalOp. simpl; congruence. constructor. + simpl. rewrite Int.add_zero. auto. exists (v :: nil). split. eauto with evalexpr. subst v. simpl. rewrite Int.add_zero. auto. Qed. diff --git a/powerpc/eabi/Conventions1.v b/powerpc/eabi/Conventions1.v index 652f4a1..8ff0dc0 100644 --- a/powerpc/eabi/Conventions1.v +++ b/powerpc/eabi/Conventions1.v @@ -21,21 +21,18 @@ Require Import Locations. (** Machine registers (type [mreg] in module [Locations]) are divided in the following groups: -- Temporaries used for spilling, reloading, and parallel move operations. -- Allocatable registers, that can be assigned to RTL pseudo-registers. - These are further divided into: --- Callee-save registers, whose value is preserved across a function call. --- Caller-save registers that can be modified during a function call. +- Callee-save registers, whose value is preserved across a function call. +- Caller-save registers that can be modified during a function call. We follow the PowerPC/EABI application binary interface (ABI) in our choice of callee- and caller-save registers. *) Definition int_caller_save_regs := - R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. + R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: R12 :: nil. Definition float_caller_save_regs := - F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: nil. + F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: nil. Definition int_callee_save_regs := R31 :: R30 :: R29 :: R28 :: R27 :: R26 :: R25 :: R24 :: R23 :: @@ -45,26 +42,11 @@ Definition float_callee_save_regs := F31 :: F30 :: F29 :: F28 :: F27 :: F26 :: F25 :: F24 :: F23 :: F22 :: F21 :: F20 :: F19 :: F18 :: F17 :: F16 :: F15 :: F14 :: nil. -Definition destroyed_at_call_regs := - int_caller_save_regs ++ float_caller_save_regs. - Definition destroyed_at_call := - List.map R destroyed_at_call_regs. - -Definition int_temporaries := IT1 :: IT2 :: nil. - -Definition float_temporaries := FT1 :: FT2 :: FT3 :: nil. - -Definition temporary_regs := int_temporaries ++ float_temporaries. - -Definition temporaries := List.map R temporary_regs. - -Definition destroyed_at_move_regs: list mreg := nil. - -Definition destroyed_at_move := List.map R destroyed_at_move_regs. + int_caller_save_regs ++ float_caller_save_regs. Definition dummy_int_reg := R3. (**r Used in [Coloring]. *) -Definition dummy_float_reg := F1. (**r Used in [Coloring]. *) +Definition dummy_float_reg := F0. (**r Used in [Coloring]. *) (** The [index_int_callee_save] and [index_float_callee_save] associate a unique positive integer to callee-save registers. This integer is @@ -175,34 +157,26 @@ Qed. Lemma register_classification: forall r, - (In (R r) temporaries \/ In (R r) destroyed_at_call) \/ - (In r int_callee_save_regs \/ In r float_callee_save_regs). + In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs. Proof. destruct r; - try (left; left; simpl; OrEq); - try (left; right; simpl; OrEq); + try (left; simpl; OrEq); try (right; left; simpl; OrEq); try (right; right; simpl; OrEq). Qed. Lemma int_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r int_callee_save_regs). + In r destroyed_at_call -> In r int_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma float_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r float_callee_save_regs). + In r destroyed_at_call -> In r float_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma int_callee_save_type: @@ -259,39 +233,27 @@ Qed. (** ** Location of function result *) (** The result value of a function is passed back to the caller in - registers [R3] or [F1], depending on the type of the returned value. + registers [R3] or [F1] or [R3, R4], depending on the type of the returned value. We treat a function without result as a function with one integer result. *) -Definition loc_result (s: signature) : mreg := +Definition loc_result (s: signature) : list mreg := match s.(sig_res) with - | None => R3 - | Some Tint => R3 - | Some Tfloat => F1 + | None => R3 :: nil + | Some Tint => R3 :: nil + | Some Tfloat => F1 :: nil + | Some Tlong => R3 :: R4 :: nil end. -(** The result location has the type stated in the signature. *) - -Lemma loc_result_type: - forall sig, - mreg_type (loc_result sig) = - match sig.(sig_res) with None => Tint | Some ty => ty end. -Proof. - intros; unfold loc_result. - destruct (sig_res sig). - destruct t; reflexivity. - reflexivity. -Qed. - -(** The result location is a caller-save register or a temporary *) +(** The result location is a caller-save register *) Lemma loc_result_caller_save: - forall (s: signature), - In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries. + forall (s: signature) (r: mreg), + In r (loc_result s) -> In r destroyed_at_call. Proof. - intros; unfold loc_result. left; - destruct (sig_res s). - destruct t; simpl; OrEq. - simpl; OrEq. + intros. + assert (r = R3 \/ r = R4 \/ r = F1). + unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition. + destruct H0 as [A | [A | A]]; subst r; simpl; OrEq. Qed. (** ** Location of function arguments *) @@ -300,213 +262,162 @@ Qed. to a function: - The first 8 integer arguments are passed in registers [R3] to [R10]. - The first 8 float arguments are passed in registers [F1] to [F8]. +- The first 4 long integer arguments are passed in register pairs [R3,R4] ... [R9,R10]. - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively assigned (1 word for an integer argument, 2 words for a float), starting at word offset 0. - No stack space is reserved for the arguments that are passed in registers. *) +Definition int_param_regs := + R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. +Definition float_param_regs := + F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil. + Fixpoint loc_arguments_rec - (tyl: list typ) (iregl: list mreg) (fregl: list mreg) - (ofs: Z) {struct tyl} : list loc := + (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc := match tyl with | nil => nil | Tint :: tys => - match iregl with - | nil => - S (Outgoing ofs Tint) :: loc_arguments_rec tys nil fregl (ofs + 1) - | ireg :: iregs => - R ireg :: loc_arguments_rec tys iregs fregl ofs + match list_nth_z int_param_regs ir with + | None => + S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1) + | Some ireg => + R ireg :: loc_arguments_rec tys (ir + 1) fr ofs end | Tfloat :: tys => - match fregl with - | nil => - S (Outgoing ofs Tfloat) :: loc_arguments_rec tys iregl nil (ofs + 2) - | freg :: fregs => - R freg :: loc_arguments_rec tys iregl fregs ofs + match list_nth_z float_param_regs fr with + | None => + S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2) + | Some freg => + R freg :: loc_arguments_rec tys ir (fr + 1) ofs + end + | Tlong :: tys => + let ir := align ir 2 in + match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with + | Some r1, Some r2 => + R r1 :: R r2 :: loc_arguments_rec tys (ir + 2) fr ofs + | _, _ => + S Outgoing ofs Tint :: S Outgoing (ofs + 1) Tint :: loc_arguments_rec tys ir fr (ofs + 2) end end. -Definition int_param_regs := - R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. -Definition float_param_regs := - F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil. - (** [loc_arguments s] returns the list of locations where to store arguments when calling a function with signature [s]. *) Definition loc_arguments (s: signature) : list loc := - loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 0. + loc_arguments_rec s.(sig_args) 0 0 0. (** [size_arguments s] returns the number of [Outgoing] slots used to call a function with signature [s]. *) -Fixpoint size_arguments_rec - (tyl: list typ) (iregl: list mreg) (fregl: list mreg) - (ofs: Z) {struct tyl} : Z := +Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z := match tyl with | nil => ofs | Tint :: tys => - match iregl with - | nil => size_arguments_rec tys nil fregl (ofs + 1) - | ireg :: iregs => size_arguments_rec tys iregs fregl ofs + match list_nth_z int_param_regs ir with + | None => size_arguments_rec tys ir fr (ofs + 1) + | Some ireg => size_arguments_rec tys (ir + 1) fr ofs end | Tfloat :: tys => - match fregl with - | nil => size_arguments_rec tys iregl nil (ofs + 2) - | freg :: fregs => size_arguments_rec tys iregl fregs ofs + match list_nth_z float_param_regs fr with + | None => size_arguments_rec tys ir fr (ofs + 2) + | Some freg => size_arguments_rec tys ir (fr + 1) ofs + end + | Tlong :: tys => + let ir := align ir 2 in + match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with + | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs + | _, _ => size_arguments_rec tys ir fr (ofs + 2) end end. + Definition size_arguments (s: signature) : Z := - size_arguments_rec s.(sig_args) int_param_regs float_param_regs 0. + size_arguments_rec s.(sig_args) 0 0 0. (** A tail-call is possible for a signature if the corresponding arguments are all passed in registers. *) Definition tailcall_possible (s: signature) : Prop := forall l, In l (loc_arguments s) -> - match l with R _ => True | S _ => False end. + match l with R _ => True | S _ _ _ => False end. -(** Argument locations are either non-temporary registers or [Outgoing] +(** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) Definition loc_argument_acceptable (l: loc) : Prop := match l with - | R r => ~(In l temporaries) - | S (Outgoing ofs ty) => ofs >= 0 + | R r => In r destroyed_at_call + | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong | _ => False end. Remark loc_arguments_rec_charact: - forall tyl iregl fregl ofs l, - In l (loc_arguments_rec tyl iregl fregl ofs) -> + forall tyl ir fr ofs l, + In l (loc_arguments_rec tyl ir fr ofs) -> match l with - | R r => In r iregl \/ In r fregl - | S (Outgoing ofs' ty) => ofs' >= ofs - | S _ => False + | R r => In r int_param_regs \/ In r float_param_regs + | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong + | S _ _ _ => False end. Proof. +Opaque list_nth_z. induction tyl; simpl loc_arguments_rec; intros. elim H. - destruct a. - destruct iregl; elim H; intro. - subst l. omega. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega. - subst l. auto with coqlib. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. simpl; intuition. - destruct fregl; elim H; intro. - subst l. omega. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega. - subst l. auto with coqlib. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. - intros [A|B]. left; auto. right; auto with coqlib. + destruct a. + destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H. + subst. left. eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. + destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H. + subst. right. eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. + set (ir' := align ir 2) in *. + destruct (list_nth_z int_param_regs ir') as [r1|] eqn:E1. + destruct (list_nth_z int_param_regs (ir' + 1)) as [r2|] eqn:E2. + destruct H. subst; left; eapply list_nth_z_in; eauto. + destruct H. subst; left; eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + destruct H. subst. split. omega. congruence. + destruct H. subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. + destruct H. subst. split. omega. congruence. + destruct H. subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. Qed. Lemma loc_arguments_acceptable: - forall (s: signature) (r: loc), - In r (loc_arguments s) -> loc_argument_acceptable r. + forall (s: signature) (l: loc), + In l (loc_arguments s) -> loc_argument_acceptable l. Proof. unfold loc_arguments; intros. generalize (loc_arguments_rec_charact _ _ _ _ _ H). - destruct r. - intro H0; elim H0. simpl. unfold not. ElimOrEq; NotOrEq. - simpl. unfold not. ElimOrEq; NotOrEq. - destruct s0; try contradiction. - simpl. omega. + destruct l. + intro H0; elim H0; simpl; ElimOrEq; OrEq. + destruct sl; try contradiction. simpl. intuition omega. Qed. Hint Resolve loc_arguments_acceptable: locs. -(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *) - -Remark loc_arguments_rec_notin_reg: - forall tyl iregl fregl ofs r, - ~(In r iregl) -> ~(In r fregl) -> - Loc.notin (R r) (loc_arguments_rec tyl iregl fregl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl. auto. - simpl in H. split. apply sym_not_equal. tauto. - apply IHtyl. tauto. tauto. - destruct fregl; simpl. auto. - simpl in H0. split. apply sym_not_equal. tauto. - apply IHtyl. - red; intro. apply H. auto. - tauto. -Qed. - -Remark loc_arguments_rec_notin_local: - forall tyl iregl fregl ofs ofs0 ty0, - Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl; auto. - destruct fregl; simpl; auto. -Qed. - -Remark loc_arguments_rec_notin_outgoing: - forall tyl iregl fregl ofs ofs0 ty0, - ofs0 + typesize ty0 <= ofs -> - Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl. - split. omega. eapply IHtyl. omega. - auto. - destruct fregl; simpl. - split. omega. eapply IHtyl. omega. - auto. -Qed. - -Lemma loc_arguments_norepet: - forall (s: signature), Loc.norepet (loc_arguments s). -Proof. - assert (forall tyl iregl fregl ofs, - list_norepet iregl -> - list_norepet fregl -> - list_disjoint iregl fregl -> - Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)). - induction tyl; simpl; intros. - constructor. - destruct a. - destruct iregl; constructor. - apply loc_arguments_rec_notin_outgoing. simpl; omega. auto. - apply loc_arguments_rec_notin_reg. inversion H. auto. - apply list_disjoint_notin with (m :: iregl); auto with coqlib. - apply IHtyl. inv H; auto. auto. - eapply list_disjoint_cons_left; eauto. - destruct fregl; constructor. - apply loc_arguments_rec_notin_outgoing. simpl; omega. auto. - apply loc_arguments_rec_notin_reg. - red; intro. apply (H1 m m). auto. - auto with coqlib. auto. inv H0; auto. - apply IHtyl. auto. - inv H0; auto. - red; intros. apply H1. auto. auto with coqlib. - - intro. unfold loc_arguments. apply H. - unfold int_param_regs. NoRepet. - unfold float_param_regs. NoRepet. - red; intros x y; simpl. ElimOrEq; ElimOrEq; discriminate. -Qed. - (** The offsets of [Outgoing] arguments are below [size_arguments s]. *) Remark size_arguments_rec_above: - forall tyl iregl fregl ofs0, - ofs0 <= size_arguments_rec tyl iregl fregl ofs0. + forall tyl ir fr ofs0, + ofs0 <= size_arguments_rec tyl ir fr ofs0. Proof. induction tyl; simpl; intros. omega. destruct a. - destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega. auto. - destruct fregl. apply Zle_trans with (ofs0 + 2); auto; omega. auto. + destruct (list_nth_z int_param_regs ir); eauto. apply Zle_trans with (ofs0 + 1); auto; omega. + destruct (list_nth_z float_param_regs fr); eauto. apply Zle_trans with (ofs0 + 2); auto; omega. + set (ir' := align ir 2). + destruct (list_nth_z int_param_regs ir'); eauto. + destruct (list_nth_z int_param_regs (ir' + 1)); eauto. + apply Zle_trans with (ofs0 + 2); auto; omega. + apply Zle_trans with (ofs0 + 2); auto; omega. Qed. Lemma size_arguments_above: @@ -518,81 +429,39 @@ Qed. Lemma loc_arguments_bounded: forall (s: signature) (ofs: Z) (ty: typ), - In (S (Outgoing ofs ty)) (loc_arguments s) -> + In (S Outgoing ofs ty) (loc_arguments s) -> ofs + typesize ty <= size_arguments s. Proof. intros. - assert (forall tyl iregl fregl ofs0, - In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) -> - ofs + typesize ty <= size_arguments_rec tyl iregl fregl ofs0). + assert (forall tyl ir fr ofs0, + In (S Outgoing ofs ty) (loc_arguments_rec tyl ir fr ofs0) -> + ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0). induction tyl; simpl; intros. elim H0. - destruct a. destruct iregl; elim H0; intro. - inv H1. simpl. apply size_arguments_rec_above. auto. - discriminate. auto. - destruct fregl; elim H0; intro. - inv H1. simpl. apply size_arguments_rec_above. auto. - discriminate. auto. - unfold size_arguments. eapply H0. unfold loc_arguments in H. eauto. -Qed. - -(** Temporary registers do not overlap with argument locations. *) - -Lemma loc_arguments_not_temporaries: - forall sig, Loc.disjoint (loc_arguments sig) temporaries. -Proof. - intros; red; intros x1 x2 H. - generalize (loc_arguments_rec_charact _ _ _ _ _ H). - destruct x1. - intro H0; elim H0; simpl; (ElimOrEq; ElimOrEq; congruence). - destruct s; try contradiction. intro. - simpl; ElimOrEq; auto. -Qed. -Hint Resolve loc_arguments_not_temporaries: locs. - -(** Argument registers are caller-save. *) - -Lemma arguments_caller_save: - forall sig r, - In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call. -Proof. - unfold loc_arguments; intros. - elim (loc_arguments_rec_charact _ _ _ _ _ H); simpl. - ElimOrEq; intuition. - ElimOrEq; intuition. -Qed. - -(** Argument locations agree in number with the function signature. *) - -Lemma loc_arguments_length: - forall sig, - List.length (loc_arguments sig) = List.length sig.(sig_args). -Proof. - assert (forall tyl iregl fregl ofs, - List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl). - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl; decEq; auto. - destruct fregl; simpl; decEq; auto. - intros. unfold loc_arguments. auto. -Qed. - -(** Argument locations agree in types with the function signature. *) - -Lemma loc_arguments_type: - forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args). -Proof. - assert (forall tyl iregl fregl ofs, - (forall r, In r iregl -> mreg_type r = Tint) -> - (forall r, In r fregl -> mreg_type r = Tfloat) -> - List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl). - induction tyl; simpl; intros. - auto. - destruct a; [destruct iregl|destruct fregl]; simpl; - f_equal; eauto with coqlib. - - intros. unfold loc_arguments. apply H. - intro; simpl. ElimOrEq; reflexivity. - intro; simpl. ElimOrEq; reflexivity. + destruct a. + destruct (list_nth_z int_param_regs ir); destruct H0. + congruence. + eauto. + inv H0. apply size_arguments_rec_above. + eauto. + destruct (list_nth_z float_param_regs fr); destruct H0. + congruence. + eauto. + inv H0. apply size_arguments_rec_above. + eauto. + set (ir' := align ir 2) in *. + destruct (list_nth_z int_param_regs ir'). + destruct (list_nth_z int_param_regs (ir' + 1)). + destruct H0. congruence. destruct H0. congruence. eauto. + destruct H0. inv H0. + transitivity (ofs + 2). simpl; omega. eauto. apply size_arguments_rec_above. + destruct H0. inv H0. + transitivity (ofs0 + 2). simpl; omega. eauto. apply size_arguments_rec_above. + eauto. + destruct H0. inv H0. + transitivity (ofs + 2). simpl; omega. eauto. apply size_arguments_rec_above. + destruct H0. inv H0. + transitivity (ofs0 + 2). simpl; omega. eauto. apply size_arguments_rec_above. + eauto. + eauto. Qed. diff --git a/powerpc/eabi/Stacklayout.v b/powerpc/eabi/Stacklayout.v index cd4d9ae..be823c1 100644 --- a/powerpc/eabi/Stacklayout.v +++ b/powerpc/eabi/Stacklayout.v @@ -24,12 +24,8 @@ Require Import Bounds. Since we would rather store our return address in our own frame, we will not use these 4 bytes, and just reserve them. - Space for outgoing arguments to function calls. -- Local stack slots of integer type. -- Saved return address into caller. +- Local stack slots. - Saved values of integer callee-save registers used by the function. -- One word of padding, if necessary to align the following data - on a 8-byte boundary. -- Local stack slots of float type. - Saved values of float callee-save registers used by the function. - Space for the stack-allocated data declared in Cminor. @@ -43,10 +39,9 @@ Record frame_env : Type := mk_frame_env { fe_size: Z; fe_ofs_link: Z; fe_ofs_retaddr: Z; - fe_ofs_int_local: Z; + fe_ofs_local: Z; fe_ofs_int_callee_save: Z; fe_num_int_callee_save: Z; - fe_ofs_float_local: Z; fe_ofs_float_callee_save: Z; fe_num_float_callee_save: Z; fe_stack_data: Z @@ -56,17 +51,17 @@ Record frame_env : Type := mk_frame_env { function. *) Definition make_env (b: bounds) := - let oil := 8 + 4 * b.(bound_outgoing) in (* integer locals *) - let ora := oil + 4 * b.(bound_int_local) in (* saved return address *) + let ol := align (8 + 4 * b.(bound_outgoing)) 8 in (* locals *) + let ora := ol + 4 * b.(bound_local) in (* saved return address *) let oics := ora + 4 in (* integer callee-saves *) let oendi := oics + 4 * b.(bound_int_callee_save) in - let ofl := align oendi 8 in (* float locals *) - let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *) + let ofcs := align oendi 8 in (* float callee-saves *) let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *) let sz := align (ostkdata + b.(bound_stack_data)) 16 in mk_frame_env sz 0 ora - oil oics b.(bound_int_callee_save) - ofl ofcs b.(bound_float_callee_save) + ol + oics b.(bound_int_callee_save) + ofcs b.(bound_float_callee_save) ostkdata. (** Separation property *) @@ -76,25 +71,25 @@ Remark frame_env_separated: let fe := make_env b in 0 <= fe.(fe_ofs_link) /\ fe.(fe_ofs_link) + 4 <= fe_ofs_arg - /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_int_local) - /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_retaddr) + /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_local) + /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_ofs_retaddr) /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_ofs_int_callee_save) - /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local) - /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save) + /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save) /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data) - /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size). + /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size) + /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size). Proof. intros. + generalize (align_le (8 + 4 * b.(bound_outgoing)) 8 (refl_equal _)). generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)). generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 16 (refl_equal _)). unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data, fe_ofs_arg. intros. - generalize (bound_int_local_pos b); intro; - generalize (bound_float_local_pos b); intro; + generalize (bound_local_pos b); intro; generalize (bound_int_callee_save_pos b); intro; generalize (bound_float_callee_save_pos b); intro; generalize (bound_outgoing_pos b); intro; @@ -108,9 +103,8 @@ Remark frame_env_aligned: forall b, let fe := make_env b in (4 | fe.(fe_ofs_link)) - /\ (4 | fe.(fe_ofs_int_local)) + /\ (8 | fe.(fe_ofs_local)) /\ (4 | fe.(fe_ofs_int_callee_save)) - /\ (8 | fe.(fe_ofs_float_local)) /\ (8 | fe.(fe_ofs_float_callee_save)) /\ (4 | fe.(fe_ofs_retaddr)) /\ (8 | fe.(fe_stack_data)) @@ -118,24 +112,23 @@ Remark frame_env_aligned: Proof. intros. unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data. - set (x1 := 8 + 4 * bound_outgoing b). - assert (4 | x1). unfold x1; apply Zdivide_plus_r. exists 2; auto. exists (bound_outgoing b); ring. - set (x2 := x1 + 4 * bound_int_local b). - assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring. + set (x1 := align (8 + 4 * bound_outgoing b) 8). + assert (8 | x1). unfold x1; apply align_divides. omega. + set (x2 := x1 + 4 * bound_local b). + assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. + apply Zdivides_trans with 8. exists 2; auto. auto. + exists (bound_local b); ring. set (x3 := x2 + 4). assert (4 | x3). unfold x3; apply Zdivide_plus_r; auto. exists 1; auto. set (x4 := align (x3 + 4 * bound_int_callee_save b) 8). assert (8 | x4). unfold x4. apply align_divides. omega. - set (x5 := x4 + 8 * bound_float_local b). - assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring. - set (x6 := x5 + 8 * bound_float_callee_save b). - assert (8 | x6). - unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. - set (x7 := align (x6 + bound_stack_data b) 16). - assert (16 | x7). unfold x7; apply align_divides. omega. + set (x5 := x4 + 8 * bound_float_callee_save b). + assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. + set (x6 := align (x5 + bound_stack_data b) 16). + assert (16 | x6). unfold x6; apply align_divides. omega. intuition. Qed. diff --git a/runtime/Makefile b/runtime/Makefile index c128ba2..5550e6b 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -2,26 +2,34 @@ include ../Makefile.config CFLAGS=-O1 -g -Wall INCLUDES= -OBJS= +OBJS=int64.o LIB=libcompcert.a -ifeq ($(strip $(HAS_RUNTIME_LIB)),true) all: $(LIB) $(INCLUDES) -else -all: -endif $(LIB): $(OBJS) rm -f $(LIB) ar rcs $(LIB) $(OBJS) -clean: +%.o: $(ARCH)/%.s + $(CASM) $^ + +clean:: rm -f *.o $(LIB) -ifeq ($(strip $(HAS_RUNTIME_LIB)),true) install: install -d $(LIBDIR) install -c $(LIB) $(INCLUDES) $(LIBDIR) -else -install: -endif + +test/test_int64: test/test_int64.c $(LIB) + $(CC) -o $@ test/test_int64.c $(LIB) + +clean:: + rm -f test/test_int64 + +.PHONY: test + +test: FORCE test/test_int64 + test/test_int64 + +FORCE: diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s new file mode 100644 index 0000000..6b03351 --- /dev/null +++ b/runtime/arm/int64.s @@ -0,0 +1,424 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the <organization> nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT +@ HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +@ Calling conventions for R = F(X) or R = F(X,Y): +@ one or two long arguments: XL in r0, XH in r1, YL in r2, YH in r3 +@ one long argument, one int: XL in r0, XH in r1, Y in r2 +@ one float argument: X in r0, r1 +@ one long result: RL in r0, RH in r1 +@ one float result: R in r0, r1 +@ This is a little-endian convention: the low word is in the +@ low-numbered register. +@ Can use r0...r3 and f0...f7 as temporary registers (caller-save) + + .text + +@@@ Unsigned comparison + + .global __i64_ucmp +__i64_ucmp: + cmp r1, r3 @ compare high words + cmpeq r0, r2 @ if equal, compare low words instead + moveq r0, #0 @ res = 0 if eq + movhi r0, #1 @ res = 1 if unsigned higher + mvnlo r0, #0 @ res = -1 if unsigned lower + bx lr + .type __i64_ucmp, %function + .size __i64_ucmp, . - __i64_ucmp + +@@@ Signed comparison + + .global __i64_scmp +__i64_scmp: + cmp r0, r2 @ compare low words (unsigned) + moveq r0, #0 @ res = 0 if eq + movhi r0, #1 @ res = 1 if unsigned higher + mvnlo r0, #0 @ res = -1 if unsigned lower + cmp r1, r3 @ compare high words (signed) + addgt r0, r0, #2 @ res += 2 if signed greater + sublt r0, r0, #2 @ res -= 2 if signed less + @ here, r0 = 0 if X == Y + @ r0 = -3, -2, -1 if X < Y + @ r0 = 1, 2, 3 if X > Y + bx lr + .type __i64_scmp, %function + .size __i64_scmp, . - __i64_scmp + +@@@ Opposite + + .global __i64_neg +__i64_neg: + rsbs r0, r0, #0 + rsc r1, r1, #0 + bx lr + .type __i64_neg, %function + .size __i64_neg, . - __i64_neg + +@@@ Addition + + .global __i64_add +__i64_add: + adds r0, r0, r2 + adc r1, r1, r3 + bx lr + .type __i64_add, %function + .size __i64_add, . - __i64_add + +@@@ Subtraction + + .global __i64_sub +__i64_sub: + subs r0, r0, r2 + sbc r1, r1, r3 + bx lr + .type __i64_sub, %function + .size __i64_sub, . - __i64_sub + +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ Therefore, unsigned shifts by 32 bits or more produce 0. + +@@@ Shift left + + .global __i64_shl +__i64_shl: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r1, r1, lsl r2 + orr r1, r0, lsr r3 + mov r0, r0, lsl r2 + bx lr +1: + sub r2, r2, #32 + mov r1, r0, lsl r2 + mov r0, #0 + bx lr + .type __i64_shl, %function + .size __i64_shl, . - __i64_shl + +@@@ Shift right unsigned + + .global __i64_shr +__i64_shr: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r0, r0, lsr r2 + orr r0, r1, lsl r3 + mov r1, r1, lsr r2 + bx lr +1: + sub r2, r2, #32 + mov r0, r1, lsr r2 + mov r1, #0 + bx lr + .type __i64_shr, %function + .size __i64_shr, . - __i64_shr + +@@@ Shift right signed + + .global __i64_sar +__i64_sar: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r0, r0, lsr r2 + orr r0, r1, lsl r3 + mov r1, r1, asr r2 + bx lr +1: + sub r2, r2, #32 + mov r0, r1, asr r2 + mov r1, r1, asr #31 + bx lr + .type __i64_sar, %function + .size __i64_sar, . - __i64_sar + +@@@ Multiplication + + .global __i64_mul +__i64_mul: + push {r4, r5} + mov r4, r0 @ save first arg in r4,r5 + mov r5, r1 + umull r0, r1, r2, r4 @ 64-bit product of low halves + mla r1, r2, r5, r1 @ add 32-bit products low half * high half + mla r1, r3, r4, r1 @ to high half of result + pop {r4, r5} + bx lr + .type __i64_mul, %function + .size __i64_mul, . - __i64_mul + +@@@ Auxiliary function for division and modulus. Not exported. + +@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor +@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder +@ Locals: M = (r6, r7) mask TMP = r8 temporary + +__i64_udivmod: + orrs r8, r2, r3 @ is D == 0? + bxeq lr @ if so, return with unspecified results + mov r4, #0 @ Q = 0 + mov r5, #0 + mov r6, #1 @ M = 1 + mov r7, #0 +1: cmp r3, #0 @ while ((signed) D >= 0) ... + blt 2f + subs r8, r0, r2 @ ... and N >= D ... + sbcs r8, r1, r3 + blo 2f + adds r2, r2, r2 @ D = D << 1 + adc r3, r3, r3 + adds r6, r6, r6 @ M = M << 1 + adc r7, r7, r7 + b 1b +2: subs r0, r0, r2 @ N = N - D + sbcs r1, r1, r3 + orr r4, r4, r6 @ Q = Q | M + orr r5, r5, r7 + bhs 3f @ if N was >= D, continue + adds r0, r0, r2 @ otherwise, undo what we just did + adc r1, r1, r3 @ N = N + D + bic r4, r4, r6 @ Q = Q & ~M + bic r5, r5, r7 +3: movs r7, r7, lsr #1 @ M = M >> 1 + mov r6, r6, rrx + movs r3, r3, lsr #1 @ D = D >> 1 + mov r2, r2, rrx + orrs r8, r6, r7 @ repeat while (M != 0) ... + bne 2b + bx lr + +@@@ Unsigned division + + .global __i64_udiv +__i64_udiv: + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod + mov r0, r4 + mov r1, r5 + pop {r4, r5, r6, r7, r8, lr} + bx lr + .type __i64_udiv, %function + .size __i64_udiv, . - __i64_udiv + +@@@ Unsigned modulus + + .global __i64_umod +__i64_umod: + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod @ remainder is already in r0,r1 + pop {r4, r5, r6, r7, r8, lr} + bx lr + .type __i64_umod, %function + .size __i64_umod, . - __i64_umod + + .global __i64_sdiv +__i64_sdiv: + push {r4, r5, r6, r7, r8, r10, lr} + eor r10, r1, r3 @ r10 = sign of result + mov r4, r1, asr #31 @ take absolute value of N + eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + mov r4, r3, asr #31 @ take absolute value of D + eor r2, r2, r4 + eor r3, r3, r4 + subs r2, r2, r4 + sbc r3, r3, r4 + bl __i64_udivmod @ do unsigned division + mov r0, r4 + mov r1, r5 + eor r0, r0, r10, asr#31 @ apply expected sign + eor r1, r1, r10, asr#31 + subs r0, r0, r10, asr#31 + sbc r1, r1, r10, asr#31 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr + .type __i64_sdiv, %function + .size __i64_sdiv, . - __i64_sdiv + +@@@ Signed modulus + + .global __i64_smod +__i64_smod: + push {r4, r5, r6, r7, r8, r10, lr} + mov r10, r1 @ r10 = sign of result + mov r4, r1, asr#31 @ take absolute value of N + eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + mov r4, r3, asr #31 @ take absolute value of D + eor r2, r2, r4 + eor r3, r3, r4 + subs r2, r2, r4 + sbc r3, r3, r4 + bl __i64_udivmod @ do unsigned division + eor r0, r0, r10, asr#31 @ apply expected sign + eor r1, r1, r10, asr#31 + subs r0, r0, r10, asr#31 + sbc r1, r1, r10, asr#31 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr + .type __i64_smod, %function + .size __i64_smod, . - __i64_smod + +@@@ Conversion from unsigned 64-bit integer to double float + + .global __i64_utod +__i64_utod: + fmsr s0, r0 + fuitod d0, s0 @ convert low half to double (unsigned) + fmsr s2, r1 + fuitod d1, s2 @ convert high half to double (unsigned) + fldd d2, .LC1 @ d2 = 2^32 + fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + fmrrd r0, r1, d0 @ return result in r0, r1 + bx lr + .type __i64_utod, %function + .size __i64_utod, . - __i64_utod + + .balign 8 +.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision + +@@@ Conversion from signed 64-bit integer to double float + + .global __i64_stod +__i64_stod: + fmsr s0, r0 + fuitod d0, s0 @ convert low half to double (unsigned) + fmsr s2, r1 + fsitod d1, s2 @ convert high half to double (signed) + fldd d2, .LC1 @ d2 = 2^32 + fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + fmrrd r0, r1, d0 @ return result in r0, r1 + bx lr + .type __i64_stod, %function + .size __i64_stod, . - __i64_stod + +@@@ Conversion from double float to unsigned 64-bit integer + + .global __i64_dtou +__i64_dtou: + cmp r1, #0 @ is double < 0 ? + blt 1f @ then it converts to 0 + @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 + @ note: 1023 + 52 = 1075 = 1024 + 51 + @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 + mov r2, r1, lsl #1 + mov r2, r2, lsr #21 + sub r2, r2, #51 + sub r2, r2, #1024 + @ check range of exponent + cmn r2, #52 @ if EXP < -52, double is < 1.0 + blt 1f + cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64 + bge 2f + @ extract true mantissa + bic r1, r1, #0xFF000000 + bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + orr r1, r1, #0x00100000 @ HI |= 0x00100000 + @ shift it appropriately + cmp r2, #0 + bge __i64_shl @ if EXP >= 0, shift left by EXP + rsb r2, r2, #0 + b __i64_shr @ otherwise, shift right by -EXP + @ special cases +1: mov r0, #0 @ result is 0 + mov r1, #0 + bx lr +2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT) + mvn r1, #0 + bx lr + .type __i64_dtou, %function + .size __i64_dtou, . - __i64_dtou + +@@@ Conversion from double float to signed 64-bit integer + + .global __i64_dtos +__i64_dtos: + push {r4, lr} + mov r4, r1, asr #31 @ save sign in r4 + @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 + @ note: 1023 + 52 = 1075 = 1024 + 51 + @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 + mov r2, r1, lsl #1 + mov r2, r2, lsr #21 + sub r2, r2, #51 + sub r2, r2, #1024 + @ check range of exponent + cmn r2, #52 @ if EXP < -52, |double| is < 1.0 + blt 1f + cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63 + bge 2f + @ extract true mantissa + bic r1, r1, #0xFF000000 + bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + orr r1, r1, #0x00100000 @ HI |= 0x00100000 + @ shift it appropriately + cmp r2, #0 + blt 3f + bl __i64_shl @ if EXP >= 0, shift left by EXP + b 4f +3: rsb r2, r2, #0 + bl __i64_shr @ otherwise, shift right by -EXP + @ apply sign to result +4: eor r0, r0, r4 + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + pop {r4, lr} + bx lr + @ special cases +1: mov r0, #0 @ result is 0 + mov r1, #0 + pop {r4, lr} + bx lr +2: cmp r4, #0 + blt 6f + mvn r0, #0 @ result is 0x7F....FF (MAX_SINT) + mov r1, r0, lsr #1 + pop {r4, lr} + bx lr +6: mov r0, #0 @ result is 0x80....00 (MIN_SINT) + mov r1, #0x80000000 + pop {r4, lr} + bx lr + .type __i64_dtos, %function + .size __i64_dtos, . - __i64_dtos + diff --git a/runtime/ia32/int64.s b/runtime/ia32/int64.s new file mode 100644 index 0000000..8fd8151 --- /dev/null +++ b/runtime/ia32/int64.s @@ -0,0 +1,471 @@ +# ***************************************************************** +# +# The Compcert verified compiler +# +# Xavier Leroy, INRIA Paris-Rocquencourt +# +# Copyright (c) 2013 Institut National de Recherche en Informatique et +# en Automatique. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the <organization> nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT +# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ********************************************************************* + +# Helper functions for 64-bit integer arithmetic. IA32 version. + + .text + +# Opposite + + .globl __i64_neg + .balign 16 +__i64_neg: + movl 4(%esp), %eax + movl 8(%esp), %edx + negl %eax + adcl $0, %edx + negl %edx + ret + .type __i64_neg, @function + .size __i64_neg, . - __i64_neg + +# Addition + + .globl __i64_add + .balign 16 +__i64_add: + movl 4(%esp), %eax + movl 8(%esp), %edx + addl 12(%esp), %eax + adcl 16(%esp), %edx + ret + .type __i64_add, @function + .size __i64_add, . - __i64_add + +# Subtraction + + .globl __i64_sub + .balign 16 +__i64_sub: + movl 4(%esp), %eax + movl 8(%esp), %edx + subl 12(%esp), %eax + sbbl 16(%esp), %edx + ret + .type __i64_sub, @function + .size __i64_sub, . - __i64_sub + +# Multiplication + + .globl __i64_mul + .balign 16 +__i64_mul: + movl 4(%esp), %eax + mull 12(%esp) # edx:eax = xlo * ylo + movl 4(%esp), %ecx + imull 16(%esp), %ecx # ecx = xlo * yhi + addl %ecx, %edx + movl 12(%esp), %ecx # ecx = xhi * ylo + imull 8(%esp), %ecx + addl %ecx, %edx + ret + .type __i64_mul, @function + .size __i64_mul, . - __i64_mul + +# Division and remainder + +# Auxiliary function, not exported. +# Input: 20(esp), 24(esp) is dividend N +# 28(esp), 32(esp) is divisor D +# Output: esi:edi is quotient Q +# eax:edx is remainder R +# ebp is preserved + + .balign 16 +__i64_udivmod: + cmpl $0, 32(%esp) # single-word divisor? (DH = 0) + jne 1f + # Special case 64 bits divided by 32 bits + movl 28(%esp), %ecx # divide NH by DL + movl 24(%esp), %eax # (will trap if D = 0) + xorl %edx, %edx + divl %ecx # eax = quotient, edx = remainder + movl %eax, %edi # high word of quotient in edi + movl 20(%esp), %eax # divide rem : NL by DL + divl %ecx # eax = quotient, edx = remainder + movl %eax, %esi # low word of quotient in esi */ + movl %edx, %eax # low word of remainder in eax + xorl %edx, %edx # high word of remainder is 0, in edx + ret + # The general case +1: movl 28(%esp), %ecx # esi:ecx = D + movl 32(%esp), %esi + movl 20(%esp), %eax # edx:eax = N + movl 24(%esp), %edx + # Scale D and N down, giving D' and N', until D' fits in 32 bits +2: shrl $1, %esi # shift D' right by one + rcrl $1, %ecx + shrl $1, %edx # shift N' right by one + rcrl $1, %eax + testl %esi, %esi # repeat until D'H = 0 + jnz 2b + # Divide N' by D' to get an approximate quotient + divl %ecx # eax = quotient, edx = remainder + movl %eax, %esi # save tentative quotient Q in esi + # Check for off by one quotient + # Compute Q * D +3: movl 32(%esp), %ecx + imull %esi, %ecx # ecx = Q * DH + movl 28(%esp), %eax + mull %esi # edx:eax = Q * DL + add %ecx, %edx # edx:eax = Q * D + jc 5f # overflow in addition means Q is too high + # Compare Q * D with N, computing the remainder in the process + movl %eax, %ecx + movl 20(%esp), %eax + subl %ecx, %eax + movl %edx, %ecx + movl 24(%esp), %edx + sbbl %ecx, %edx # edx:eax = N - Q * D + jnc 4f # no carry: N >= Q * D, we are fine + decl %esi # carry: N < Q * D, adjust Q down by 1 + addl 28(%esp), %eax # and remainder up by D + adcl 32(%esp), %edx + # Finished +4: xorl %edi, %edi # high half of quotient is 0 + ret + # Special case when Q * D overflows +5: decl %esi # adjust Q down by 1 + jmp 3b # and redo check & computation of remainder + +# Unsigned division + + .globl __i64_udiv + .balign 16 +__i64_udiv: + pushl %ebp + pushl %esi + pushl %edi + call __i64_udivmod + movl %esi, %eax + movl %edi, %edx + popl %edi + popl %esi + popl %ebp + ret + .type __i64_udiv, @function + .size __i64_udiv, . - __i64_udiv + +# Unsigned remainder + + .globl __i64_umod + .balign 16 +__i64_umod: + pushl %ebp + pushl %esi + pushl %edi + call __i64_udivmod + popl %edi + popl %esi + popl %ebp + ret + .type __i64_umod, @function + .size __i64_umod, . - __i64_umod + +# Signed division + + .globl __i64_sdiv + .balign 16 +__i64_sdiv: + pushl %ebp + pushl %esi + pushl %edi + movl 20(%esp), %esi # esi = NH + movl %esi, %ebp # save sign of N in ebp + testl %esi, %esi + jge 1f # if N < 0, + negl 16(%esp) # N = -N + adcl $0, %esi + negl %esi + movl %esi, 20(%esp) +1: movl 28(%esp), %esi # esi = DH + xorl %esi, %ebp # sign of result in ebp + testl %esi, %esi + jge 2f # if D < 0, + negl 24(%esp) # D = -D + adcl $0, %esi + negl %esi + movl %esi, 28(%esp) +2: call __i64_udivmod + testl %ebp, %ebp # apply sign to result + jge 3f + negl %esi + adcl $0, %edi + negl %edi +3: movl %esi, %eax + movl %edi, %edx + popl %edi + popl %esi + popl %ebp + ret + .type __i64_sdiv, @function + .size __i64_sdiv, . - __i64_sdiv + +# Signed remainder + + .globl __i64_smod + .balign 16 +__i64_smod: + pushl %ebp + pushl %esi + pushl %edi + movl 20(%esp), %esi # esi = NH + movl %esi, %ebp # save sign of result in ebp + testl %esi, %esi + jge 1f # if N < 0, + negl 16(%esp) # N = -N + adcl $0, %esi + negl %esi + movl %esi, 20(%esp) +1: movl 28(%esp), %esi # esi = DH + testl %esi, %esi + jge 2f # if D < 0, + negl 24(%esp) # D = -D + adcl $0, %esi + negl %esi + movl %esi, 28(%esp) +2: call __i64_udivmod + testl %ebp, %ebp # apply sign to result + jge 3f + negl %eax + adcl $0, %edx + negl %edx +3: popl %edi + popl %esi + popl %ebp + ret + .type __i64_sdiv, @function + .size __i64_sdiv, . - __i64_sdiv + +# Note on shifts: +# IA32 shift instructions treat their amount (in %cl) modulo 32 + +# Shift left + + .globl __i64_shl + .balign 16 +__i64_shl: + movl 12(%esp), %ecx # ecx = shift amount, treated mod 64 + testb $32, %cl + jne 1f + # shift amount < 32 + movl 4(%esp), %eax + movl 8(%esp), %edx + shldl %cl, %eax, %edx # edx = high(XH:XL << amount) + shll %cl, %eax # eax = XL << amount + ret + # shift amount >= 32 +1: movl 4(%esp), %edx + shll %cl, %edx # edx = XL << (amount - 32) + xorl %eax, %eax # eax = 0 + ret + .type __i64_shl, @function + .size __i64_shl, . - __i64_shl + +# Shift right unsigned + + .globl __i64_shr + .balign 16 +__i64_shr: + movl 12(%esp), %ecx # ecx = shift amount, treated mod 64 + testb $32, %cl + jne 1f + # shift amount < 32 + movl 4(%esp), %eax + movl 8(%esp), %edx + shrdl %cl, %edx, %eax # eax = low(XH:XL >> amount) + shrl %cl, %edx # edx = XH >> amount + ret + # shift amount >= 32 +1: movl 8(%esp), %eax + shrl %cl, %eax # eax = XH >> (amount - 32) + xorl %edx, %edx # edx = 0 + ret + .type __i64_shr, @function + .size __i64_shr, . - __i64_shr + +# Shift right signed + + .globl __i64_sar + .balign 16 +__i64_sar: + movl 12(%esp), %ecx # ecx = shift amount, treated mod 64 + testb $32, %cl + jne 1f + # shift amount < 32 + movl 4(%esp), %eax + movl 8(%esp), %edx + shrdl %cl, %edx, %eax # eax = low(XH:XL >> amount) + sarl %cl, %edx # edx = XH >> amount (signed) + ret + # shift amount >= 32 +1: movl 8(%esp), %eax + movl %eax, %edx + sarl %cl, %eax # eax = XH >> (amount - 32) + sarl $31, %edx # edx = sign of X + ret + .type __i64_sar, @function + .size __i64_sar, . - __i64_sar + +# Unsigned comparison + + .globl __i64_ucmp + .balign 16 +__i64_ucmp: + movl 8(%esp), %eax # compare high words + cmpl 16(%esp), %eax + jne 1f # if high words equal, + movl 4(%esp), %eax # compare low words + cmpl 12(%esp), %eax +1: seta %al # AL = 1 if >, 0 if <= + setb %dl # DL = 1 if <, 0 if >= + subb %dl, %al # AL = 0 if same, 1 if >, -1 if < + movsbl %al, %eax + ret + .type __i64_ucmp, @function + .size __i64_ucmp, . - __i64_ucmp + +# Signed comparison + + .globl __i64_scmp + .balign 16 +__i64_scmp: + movl 8(%esp), %eax # compare high words (signed) + cmpl 16(%esp), %eax + je 1f # if different, + setg %al # extract result + setl %dl + subb %dl, %al + movsbl %al, %eax + ret +1: movl 4(%esp), %eax # if high words equal, + cmpl 12(%esp), %eax # compare low words (unsigned) + seta %al # and extract result + setb %dl + subb %dl, %al + movsbl %al, %eax + ret + .type __i64_scmp, @function + .size __i64_scmp, . - __i64_scmp + +# Conversion signed long -> float + + .globl __i64_stod + .balign 16 +__i64_stod: + fildll 4(%esp) + ret + .type __i64_stod, @function + .size __i64_stod, . - __i64_stod + +# Conversion unsigned long -> float + + .globl __i64_utod + .balign 16 +__i64_utod: + fildll 4(%esp) # convert as if signed + cmpl $0, 8(%esp) # is argument >= 2^63? + jns 1f + fadds LC1 # adjust by 2^64 +1: ret + .type __i64_stod, @function + .size __i64_stod, . - __i64_stod + + .balign 4 +LC1: .long 0x5f800000 # 2^64 in single precision + +# Conversion float -> signed long + + .globl __i64_dtos + .balign 16 +__i64_dtos: + subl $4, %esp + # Change rounding mode to "round towards zero" + fnstcw 0(%esp) + movw 0(%esp), %ax + movb $12, %ah + movw %ax, 2(%esp) + fldcw 2(%esp) + # Convert + fldl 8(%esp) + fistpll 8(%esp) + # Restore rounding mode + fldcw 0(%esp) + # Load result in edx:eax + movl 8(%esp), %eax + movl 12(%esp), %edx + addl $4, %esp + ret + .type __i64_dtos, @function + .size __i64_dtos, . - __i64_dtos + +# Conversion float -> unsigned long + + .globl __i64_dtou + .balign 16 +__i64_dtou: + subl $4, %esp + # Change rounding mode to "round towards zero" + fnstcw 0(%esp) + movw 0(%esp), %ax + movb $12, %ah + movw %ax, 2(%esp) + fldcw 2(%esp) + # Compare argument with 2^63 + fldl (4+4)(%esp) + flds LC2 + fucomp + fnstsw %ax + sahf + jbe 1f # branch if not (ARG < 2^63) + # Argument < 2^63: convert as is + fistpll 8(%esp) + movl 8(%esp), %eax + movl 12(%esp), %edx + jmp 2f + # Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63 +1: fsubs LC2 + fistpll 8(%esp) + movl 8(%esp), %eax + movl 12(%esp), %edx + addl $0x80000000, %edx + # Restore rounding mode +2: fldcw 0(%esp) + addl $4, %esp + ret + .type __i64_dtou, @function + .size __i64_dtou, . - __i64_dtou + + .balign 4 +LC2: .long 0x5f000000 # 2^63 in single precision diff --git a/runtime/powerpc/int64.s b/runtime/powerpc/int64.s new file mode 100644 index 0000000..34b65b5 --- /dev/null +++ b/runtime/powerpc/int64.s @@ -0,0 +1,492 @@ +# ***************************************************************** +# +# The Compcert verified compiler +# +# Xavier Leroy, INRIA Paris-Rocquencourt +# +# Copyright (c) 2013 Institut National de Recherche en Informatique et +# en Automatique. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the <organization> nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT +# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ********************************************************************* + +# Helper functions for 64-bit integer arithmetic. PowerPC version. + +# Calling conventions for R = F(X) or R = F(X,Y): +# one or two long arguments: XH in r3, XL in r4, YH in r5, YL in r6 +# one long argument, one int: XH in r3, XL in r4, Y in r5 +# one float argument: X in f1 +# one long result: RH in r3, RL in r4 +# one float result: R in f1 +# This is a big-endian convention: the high word is in the low-numbered register +# Can use r3...r12 and f0...r13 as temporary registers (caller-save) + + .text + +### Opposite + + .balign 16 + .globl __i64_neg +__i64_neg: + subfic r4, r4, 0 # RL = -XL and set borrow iff XL != 0 + subfze r3, r3 # RH = -XH - borrow + blr + .type __i64_neg, @function + .size __i64_neg, .-__i64_neg + +### Addition + + .balign 16 + .globl __i64_add +__i64_add: + addc r4, r4, r6 # RL = XL + YL and set carry if overflow + adde r3, r3, r5 # RH = XH + YH + carry + blr + .type __i64_add, @function + .size __i64_add, .-__i64_add + +### Subtraction + + .balign 16 + .globl __i64_sub +__i64_sub: + subfc r4, r6, r4 # RL = XL - YL and set borrow if underflow + subfe r3, r5, r3 # RH = XH - YH - borrow + blr + .type __i64_sub, @function + .size __i64_sub, .-__i64_sub + +### Multiplication + + .balign 16 + .globl __i64_mul +__i64_mul: + # Form intermediate products + mulhwu r7, r4, r6 # r7 = high half of XL * YL + mullw r8, r3, r6 # r8 = low half of XH * YL + mullw r9, r4, r5 # r9 = low half of XL * YH + mullw r4, r4, r6 # r4 = low half of XL * YL = low half of result + # Reconstruct high half of result + add r3, r7, r8 + add r3, r3, r9 + blr + .type __i64_mul, @function + .size __i64_mul, .-__i64_mul + +### Helper function for division and modulus. Not exported. +# Input: numerator N in (r3,r4), divisor D in (r5,r6) +# Output: quotient Q in (r7,r8), remainder R in (r3,r4) + .balign 16 +__i64_udivmod: + # Set up quotient and mask + li r8, 0 # Q = 0 + li r7, 0 + li r10, 1 # M = 1 + li r9, 0 + # Check for zero divisor + or. r0, r6, r5 + beqlr # return with unspecified quotient & remainder + # Scale divisor and mask +1: cmpwi r5, 0 # while top bit of D is zero... + blt 2f + subfc r0, r6, r4 # compute borrow out of N - D + subfe r0, r5, r3 + subfe. r0, r0, r0 # EQ iff no borrow iff N >= D + bne 2f # ... and while N >= D ... + addc r6, r6, r6 # scale divisor: D = D << 1 + adde r5, r5, r5 + addc r10, r10, r10 # scale mask: M = M << 1 + adde r9, r9, r9 + b 1b # end while + # Long division +2: subfc r4, r6, r4 # Q = Q | M, N = N - D, and compute borrow + or r8, r8, r10 + subfe r3, r5, r3 + or r7, r7, r9 + subfe. r0, r0, r0 # test borrow + beq 3f # no borrow: N >= D, continue + addc r4, r4, r6 # borrow: undo what we just did to N and Q + andc r8, r8, r10 + adde r3, r3, r5 + andc r7, r7, r9 +3: slwi r0, r9, 31 # unscale mask: M = M >> 1 + srwi r10, r10, 1 + or r10, r10, r0 + srwi r9, r9, 1 + slwi r0, r5, 31 # unscale divisor: D = D >> 1 + srwi r6, r6, 1 + or r6, r6, r0 + srwi r5, r5, 1 + or. r0, r10, r9 # iterate while M != 0 + bne 2b + blr + +### Unsigned division + + .balign 16 + .globl __i64_udiv +__i64_udiv: + mflr r11 # save return address in r11 + bl __i64_udivmod # unsigned divide + mtlr r11 # restore return address + mr r3, r7 # R = quotient + mr r4, r8 + blr + .type __i64_udiv, @function + .size __i64_udiv, .-__i64_udiv + +### Unsigned remainder + + .balign 16 + .globl __i64_umod +__i64_umod: + mflr r11 + bl __i64_udivmod + mtlr r11 + blr # remainder is already in R + .type __i64_umod, @function + .size __i64_umod, .-__i64_umod + +### Signed division + + .balign 16 + .globl __i64_sdiv +__i64_sdiv: + mflr r11 # save return address + xor r12, r3, r5 # save sign of result in r12 (top bit) + srawi r0, r3, 31 # take absolute value of N + xor r4, r4, r0 # (i.e. N = N ^ r0 - r0, + xor r3, r3, r0 # where r0 = 0 if N >= 0 and r0 = -1 if N < 0) + subfc r4, r0, r4 + subfe r3, r0, r3 + srawi r0, r5, 31 # take absolute value of D + xor r6, r6, r0 # (same trick) + xor r5, r5, r0 + subfc r6, r0, r6 + subfe r5, r0, r5 + bl __i64_udivmod # do unsigned division + mtlr r11 # restore return address + srawi r0, r12, 31 # apply expected sign to quotient + xor r8, r8, r0 # RES = Q if r12 >= 0, -Q if r12 < 0 + xor r7, r7, r0 + subfc r4, r0, r8 + subfe r3, r0, r7 + blr + .type __i64_sdiv, @function + .size __i64_sdiv, .-__i64_sdiv + +## Signed remainder + + .balign 16 + .globl __i64_smod +__i64_smod: + mflr r11 # save return address + srawi r12, r3, 31 # save sign of result in r12 (sign of N) + xor r4, r4, r12 # and take absolute value of N + xor r3, r3, r12 + subfc r4, r12, r4 + subfe r3, r12, r3 + srawi r0, r5, 31 # take absolute value of D + xor r6, r6, r0 + xor r5, r5, r0 + subfc r6, r0, r6 + subfe r5, r0, r5 + bl __i64_udivmod # do unsigned division + mtlr r11 # restore return address + xor r4, r4, r12 # apply expected sign to remainder + xor r3, r3, r12 # RES = R if r12 == 0, -R if r12 == -1 + subfc r4, r12, r4 + subfe r3, r12, r3 + blr + .type __i64_smod, @function + .size __i64_smod, .-__i64_smod + +### Unsigned comparison + + .balign 16 + .globl __i64_ucmp +__i64_ucmp: + cmplw cr0, r3, r5 # compare high words (unsigned) + cmplw cr1, r4, r6 # compare low words (unsigned) + mfcr r0 +# At this point, the bits of r0 are as follow: +# bit 31: XH < YH +# bit 30: XH > YH +# bit 27: XL > YL +# bit 26: XL < YL + rlwinm r3, r0, 0, 0, 1 # r3 = r0 & 0xC000_0000 + srawi r3, r3, 24 # r4 = r4 >>s 28 +# r3 = -0x80 if XH < YH +# = 0x40 if XH > YH +# = 0 if XH = YH + rlwinm r4, r0, 4, 0, 1 # r4 = (r0 << 4) & 0xC000_0000 + srawi r4, r4, 28 # r4 = r4 >>s 28 +# r4 = -8 if XL < YL +# = 4 if XL > YL +# = 0 if XL = YL + add r3, r3, r4 +# r3 = -0x80 or -0x80 - 8 or -0x80 + 4 or -8 if X < Y +# (in all cases, r3 < 0 if X < Y) +# = 0x40 or 0x40 - 8 or 0x40 + 4 or 4 if X > Y +# (in all cases, r3 > 0 if X > Y) +# = 0 if X = Y + blr + .type __i64_ucmp, @function + .size __i64_ucmp, .-__i64_ucmp + +### Signed comparison + + .balign 16 + .globl __i64_scmp +__i64_scmp: + cmpw cr0, r3, r5 # compare high words (signed) + cmplw cr1, r4, r6 # compare low words (unsigned) + mfcr r0 +# Same trick as in __i64_ucmp + rlwinm r3, r0, 0, 0, 1 + srawi r3, r3, 24 + rlwinm r4, r0, 4, 0, 1 + srawi r4, r4, 28 + add r3, r3, r4 + blr + .type __i64_scmp, @function + .size __i64_scmp, .-__i64_scmp + +### Shifts + +# On PowerPC, shift instructions with amount mod 64 >= 32 return 0 + + .balign 16 + .globl __i64_shl +__i64_shl: +# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32)) +# lo = lo << amount +# if 0 <= amount < 32: +# (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0 +# if 32 <= amount < 64: +# lo << amount == 0 +# (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0 + andi. r5, r5, 63 # take amount modulo 64 + subfic r6, r5, 32 # r6 = 32 - amount + addi r7, r5, -32 # r7 = amount - 32 + slw r3, r3, r5 + srw r0, r4, r6 + or r3, r3, r0 + slw r0, r4, r7 + or r3, r3, r0 + slw r4, r4, r5 + blr + .type __i64_shl, @function + .size __i64_shl, .-__i64_shl + + .balign 16 + .globl __i64_shr +__i64_shr: +# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32)) +# hi = hi >> amount +# if 0 <= amount < 32: +# (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0 +# if 32 <= amount < 64: +# hi >> amount == 0 +# (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0 + andi. r5, r5, 63 # take amount modulo 64 + subfic r6, r5, 32 # r6 = 32 - amount + addi r7, r5, -32 # r7 = amount - 32 + srw r4, r4, r5 + slw r0, r3, r6 + or r4, r4, r0 + srw r0, r3, r7 + or r4, r4, r0 + srw r3, r3, r5 + blr + .type __i64_shr, @function + .size __i64_shr, .-__i64_shr + + .balign 16 + .globl __i64_sar +__i64_sar: + andi. r5, r5, 63 # take amount modulo 64 + cmpwi r5, 32 + bge 1f # amount < 32? + subfic r6, r5, 32 # r6 = 32 - amount + srw r4, r4, r5 # RH = XH >>s amount + slw r0, r3, r6 # RL = XL >>u amount | XH << (32 - amount) + or r4, r4, r0 + sraw r3, r3, r5 + blr +1: addi r6, r5, -32 # amount >= 32 + sraw r4, r3, r6 # RL = XH >>s (amount - 32) + srawi r3, r3, 31 # RL = sign extension of XH + blr + .type __i64_sar, @function + .size __i64_sar, .-__i64_sar + +### Conversion from unsigned long to double float + + .balign 16 + .globl __i64_utod +__i64_utod: + addi r1, r1, -16 + lis r5, 0x4330 + li r6, 0 + stw r5, 0(r1) + stw r4, 4(r1) + stw r5, 8(r1) + stw r6, 12(r1) + lfd f1, 0(r1) + lfd f2, 8(r1) + fsub f1, f1, f2 # f1 is XL as a double + lis r5, 0x4530 + stw r5, 0(r1) + stw r3, 4(r1) + stw r5, 8(r1) + lfd f2, 0(r1) + lfd f3, 8(r1) + fsub f2, f2, f3 # f2 is XH * 2^32 as a double + fadd f1, f1, f2 # add both to get result + addi r1, r1, 16 + blr + .type __i64_utod, @function + .size __i64_utod, .-__i64_utod + +### Conversion from signed long to double float + + .balign 16 + .globl __i64_stod +__i64_stod: + addi r1, r1, -16 + lis r5, 0x4330 + li r6, 0 + stw r5, 0(r1) + stw r4, 4(r1) + stw r5, 8(r1) + stw r6, 12(r1) + lfd f1, 0(r1) + lfd f2, 8(r1) + fsub f1, f1, f2 # f1 is XL (unsigned) as a double + lis r5, 0x4530 + lis r6, 0x8000 + stw r5, 0(r1) + add r3, r3, r6 + stw r3, 4(r1) + stw r5, 8(r1) + stw r6, 12(r1) + lfd f2, 0(r1) + lfd f3, 8(r1) + fsub f2, f2, f3 # f2 is XH (signed) * 2^32 as a double + fadd f1, f1, f2 # add both to get result + addi r1, r1, 16 + blr + .type __i64_stod, @function + .size __i64_stod, .-__i64_stod + +### Conversion from double float to unsigned long + + .balign 16 + .globl __i64_dtou +__i64_dtou: + stfdu f1, -16(r1) # extract LO (r4) and HI (r3) halves of double + lwz r3, 0(r1) + lwz r4, 4(r1) + addi r1, r1, 16 + cmpwi r3, 0 # is double < 0? + blt 1f # then it converts to 0 + # extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) + rlwinm r5, r3, 12, 21, 31 + addi r5, r5, -1075 + # check range of exponent + cmpwi r5, -52 # if EXP < -52, double is < 1.0 + blt 1f + cmpwi r5, 12 # if EXP >= 64 - 52, double is >= 2^64 + bge 2f + # extract true mantissa + rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000 + oris r3, r3, 0x10 # HI |= 0x00100000 + # shift it appropriately + cmpwi r5, 0 + blt 3f + b __i64_shl # if EXP >= 0, shift left by EXP +3: subfic r5, r5, 0 + b __i64_shr # if EXP < 0, shift right by -EXP + # Special cases +1: li r3, 0 # result is 0 + li r4, 0 + blr +2: li r3, -1 # result is MAX_UINT + li r4, -1 + blr + .type __i64_dtou, @function + .size __i64_dtou, .-__i64_dtou + +### Conversion from double float to signed long + + .balign 16 + .globl __i64_dtos +__i64_dtos: + stfdu f1, -16(r1) # extract LO (r4) and HI (r3) halves of double + lwz r3, 0(r1) + lwz r4, 4(r1) + addi r1, r1, 16 + srawi r10, r3, 31 # save sign of double in r10 + # extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) + rlwinm r5, r3, 12, 21, 31 + addi r5, r5, -1075 + # check range of exponent + cmpwi r5, -52 # if EXP < -52, abs(double) is < 1.0 + blt 1f + cmpwi r5, 11 # if EXP >= 63 - 52, abs(double) is >= 2^63 + bge 2f + # extract true mantissa + rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000 + oris r3, r3, 0x10 # HI |= 0x00100000 + # shift it appropriately + mflr r9 # save retaddr in r9 + cmpwi r5, 0 + blt 3f + bl __i64_shl # if EXP >= 0, shift left by EXP + b 4f +3: subfic r5, r5, 0 + bl __i64_shr # if EXP < 0, shift right by -EXP + # apply sign to result +4: mtlr r9 + xor r4, r4, r10 + xor r3, r3, r10 + subfc r4, r10, r4 + subfe r3, r10, r3 + blr + # Special cases +1: li r3, 0 # result is 0 + li r4, 0 + blr +2: cmpwi r10, 0 # result is MAX_SINT or MIN_SINT + bge 5f # depending on sign + li r4, -1 # result is MAX_SINT = 0x7FFF_FFFF + srwi r3, r4, 1 + blr +5: lis r3, 0x8000 # result is MIN_SINT = 0x8000_0000 + li r4, 0 + blr + .type __i64_dtos, @function + .size __i64_dtos, .-__i64_dtos diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c new file mode 100644 index 0000000..11adce3 --- /dev/null +++ b/runtime/test/test_int64.c @@ -0,0 +1,238 @@ +/* ************************************************************************** */ +/* */ +/* The Compcert verified compiler */ +/* */ +/* Xavier Leroy, INRIA Paris-Rocquencourt */ +/* */ +/* Copyright (c) 2013 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* Redistribution and use in source and binary forms, with or without */ +/* modification, are permitted provided that the following conditions are met:*/ +/* * Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* * Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution. */ +/* * Neither the name of the <organization> nor the */ +/* names of its contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written permission.*/ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ +/* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */ +/* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */ +/* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT */ +/* HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ +/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */ +/* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */ +/* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* ************************************************************************** */ + +/* Differential testing of 64-bit integer operations */ +/* This file is to be compiled by a C compiler other than CompCert C */ + +#include <stdio.h> +#include <stdlib.h> + +typedef unsigned long long u64; +typedef signed long long s64; + +extern s64 __i64_neg(s64 x); +extern s64 __i64_add(s64 x, s64 y); +extern s64 __i64_sub(s64 x, s64 y); +extern s64 __i64_mul(s64 x, s64 y); +extern u64 __i64_udiv(u64 x, u64 y); +extern u64 __i64_umod(u64 x, u64 y); +extern s64 __i64_sdiv(s64 x, s64 y); +extern s64 __i64_smod(s64 x, s64 y); + +extern u64 __i64_shl(u64 x, unsigned amount); +extern u64 __i64_shr(u64 x, unsigned amount); +extern s64 __i64_sar(s64 x, unsigned amount); + +extern int __i64_ucmp(u64 x, u64 y); +extern int __i64_scmp(s64 x, s64 y); + +extern double __i64_utod(u64 x); +extern double __i64_stod(s64 x); +extern u64 __i64_dtou(double d); +extern s64 __i64_dtos(double d); + +static u64 rnd64(void) +{ + static u64 seed = 0; + seed = seed * 6364136223846793005ULL + 1442695040888963407ULL; + return seed; +} + +static int error = 0; + +static void test1(u64 x, u64 y) +{ + u64 z, uy; + s64 t, sy; + int i; + double f, g; + + z = __i64_neg(x); + if (z != -x) + error++, printf("- %016llx = %016llx, expected %016llx\n", x, z, -x); + z = __i64_add(x, y); + if (z != x + y) + error++, printf("%016llx + %016llx = %016llx, expected %016llx\n", x, y, z, x + y); + + z = __i64_sub(x, y); + if (z != x - y) + error++, printf("%016llx - %016llx = %016llx, expected %016llx\n", x, y, z, x - y); + + z = __i64_mul(x, y); + if (z != x * y) + error++, printf("%016llx * %016llx = %016llx, expected %016llx\n", x, y, z, x * y); + + if (y != 0) { + + z = __i64_udiv(x, y); + if (z != x / y) + error++, printf("%llu /u %llu = %llu, expected %llu\n", x, y, z, x / y); + + z = __i64_umod(x, y); + if (z != x % y) + error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, y, z, x % y); + + } + + if (y != 0 && !(x == 0x800000000000LLU && y == -1)) { + + t = __i64_sdiv(x, y); + if (t != (s64) x / (s64) y) + error++, printf("%lld /s %lld = %lld, expected %lld\n", x, y, t, (s64) x / (s64) y); + + t = __i64_smod(x, y); + if (t != (s64) x % (s64) y) + error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, y, t, (s64) x % (s64) y); + + } + + /* Test division with small (32-bit) divisors */ + uy = y >> 32; + sy = (s64)y >> 32; + + if (uy != 0) { + + z = __i64_udiv(x, uy); + if (z != x / uy) + error++, printf("%llu /u %llu = %llu, expected %llu\n", x, uy, z, x / uy); + + z = __i64_umod(x, uy); + if (z != x % uy) + error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, uy, z, x % uy); + + } + + if (sy != 0 && !(x == 0x800000000000LLU && sy == -1)) { + + t = __i64_sdiv(x, sy); + if (t != (s64) x / sy) + error++, printf("%lld /s %lld = %lld, expected %lld\n", x, sy, t, (s64) x / sy); + + t = __i64_smod(x, sy); + if (t != (s64) x % sy) + error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, sy, t, (s64) x % sy); + + } + + i = y & 63; + + z = __i64_shl(x, i); + if (z != x << i) + error++, printf("%016llx << %d = %016llx, expected %016llx\n", x, i, z, x << i); + + z = __i64_shr(x, i); + if (z != x >> i) + error++, printf("%016llx >>u %d = %016llx, expected %016llx\n", x, i, z, x >> i); + + t = __i64_sar(x, i); + if (t != (s64) x >> i) + error++, printf("%016llx >>s %d = %016llx, expected %016llx\n", x, i, t, (s64) x >> i); + + i = __i64_ucmp(x, y); + if (x == y) { + if (! (i == 0)) + error++, printf("ucmp(%016llx, %016llx) = %d, expected 0\n", x, y, i); + } + else if (x < y) { + if (! (i < 0)) + error++, printf("ucmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i); + } else { + if (! (i > 0)) + error++, printf("ucmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i); + } + + i = __i64_scmp(x, y); + if (x == y) { + if (! (i == 0)) + error++, printf("scmp(%016llx, %016llx) = %d, expected 0\n", x, y, i); + } + else if ((s64)x < (s64)y) { + if (! (i < 0)) + error++, printf("scmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i); + } else { + if (! (i > 0)) + error++, printf("scmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i); + } + + f = __i64_utod(x); + g = (double) x; + if (f != g) + error++, printf("(double) %llu (u) = %a, expected %a\n", x, f, g); + + f = __i64_stod(x); + g = (double) (s64) x; + if (f != g) + error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g); + + f = ((double) x) * 0.0001; + z = __i64_dtou(f); + if (z != (u64) f) + error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f); + + f = ((double) (s64) x) * 0.0001; + t = __i64_dtos(f); + if (t != (s64) f) + error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f); +} + +#define NSPECIFIC 8 + +unsigned long long specific[NSPECIFIC] = { + 0, 1, -1, 0x7FFFFFFFULL, 0x80000000ULL, 0xFFFFFFFFULL, + 0x7FFFFFFFFFFFULL, 0x8000000000000000ULL +}; + +int main() +{ + int i, j; + + /* Some specific values */ + for (i = 0; i < NSPECIFIC; i++) + for (j = 0; j < NSPECIFIC; j++) + test1(specific[i], specific[j]); + + /* Random testing */ + for (i = 0; i < 50; i++) { + for (j = 0; j < 1000000; j++) + test1(rnd64(), rnd64()); + printf("."); fflush(stdout); + } + printf("\n"); + if (error == 0) + printf ("Test passed\n"); + else + printf ("TEST FAILED, %d error(s) detected\n", error); + return 0; +} + diff --git a/test/c/Makefile b/test/c/Makefile index 6839ac4..a11ab69 100644 --- a/test/c/Makefile +++ b/test/c/Makefile @@ -13,7 +13,7 @@ TIME=xtime -o /dev/null -mintime 1.0 # Xavier's hack PROGS=fib integr qsort fft sha1 aes almabench lists \ binarytrees fannkuch knucleotide mandelbrot nbody \ nsieve nsievebits spectral vmach \ - bisect chomp perlin floats floats-basics + bisect chomp perlin siphash24 floats floats-basics PROGS_INTERP=floats floats-basics diff --git a/test/c/Results/siphash24 b/test/c/Results/siphash24 new file mode 100644 index 0000000..a99b4f1 --- /dev/null +++ b/test/c/Results/siphash24 @@ -0,0 +1 @@ +test vectors ok diff --git a/test/c/siphash24.c b/test/c/siphash24.c new file mode 100644 index 0000000..0ed1841 --- /dev/null +++ b/test/c/siphash24.c @@ -0,0 +1,255 @@ +/* + SipHash reference C implementation + + Written in 2012 by + Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> + Daniel J. Bernstein <djb@cr.yp.to> + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. +*/ + +#include <stdio.h> +#include <string.h> +typedef unsigned long long u64; +typedef unsigned long u32; +typedef unsigned char u8; + +#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) + +#define U32TO8_LE(p, v) \ + (p)[0] = (u8)((v) ); (p)[1] = (u8)((v) >> 8); \ + (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (u32)((v) )); \ + U32TO8_LE((p) + 4, (u32)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((u64)((p)[0]) ) | \ + ((u64)((p)[1]) << 8) | \ + ((u64)((p)[2]) << 16) | \ + ((u64)((p)[3]) << 24) | \ + ((u64)((p)[4]) << 32) | \ + ((u64)((p)[5]) << 40) | \ + ((u64)((p)[6]) << 48) | \ + ((u64)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ + v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ + v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ + v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ + } while(0) + +/* SipHash-2-4 */ +int crypto_auth( unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k ) +{ + /* "somepseudorandomlygeneratedbytes" */ + u64 v0 = 0x736f6d6570736575ULL; + u64 v1 = 0x646f72616e646f6dULL; + u64 v2 = 0x6c7967656e657261ULL; + u64 v3 = 0x7465646279746573ULL; + u64 b; + u64 k0 = U8TO64_LE( k ); + u64 k1 = U8TO64_LE( k + 8 ); + u64 m; + const u8 *end = in + inlen - ( inlen % sizeof( u64 ) ); + const int left = inlen & 7; + b = ( ( u64 )inlen ) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for ( ; in != end; in += 8 ) + { + m = U8TO64_LE( in ); +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); + printf( "(%3d) compress %08x %08x\n", ( int )inlen, ( u32 )( m >> 32 ), ( u32 )m ); +#endif + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } + + switch( left ) + { + case 7: b |= ( ( u64 )in[ 6] ) << 48; + + case 6: b |= ( ( u64 )in[ 5] ) << 40; + + case 5: b |= ( ( u64 )in[ 4] ) << 32; + + case 4: b |= ( ( u64 )in[ 3] ) << 24; + + case 3: b |= ( ( u64 )in[ 2] ) << 16; + + case 2: b |= ( ( u64 )in[ 1] ) << 8; + + case 1: b |= ( ( u64 )in[ 0] ); break; + + case 0: break; + } + +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); + printf( "(%3d) padding %08x %08x\n", ( int )inlen, ( u32 )( b >> 32 ), ( u32 )b ); +#endif + v3 ^= b; + SIPROUND; + SIPROUND; + v0 ^= b; +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); +#endif + v2 ^= 0xff; + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE( out, b ); + return 0; +} + +/* + SipHash-2-4 output with + k = 00 01 02 ... + and + in = (empty string) + in = 00 (1 byte) + in = 00 01 (2 bytes) + in = 00 01 02 (3 bytes) + ... + in = 00 01 02 ... 3e (63 bytes) +*/ +u8 vectors[64][8] = +{ + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } +}; + + +int test_vectors() +{ +#define MAXLEN 64 + u8 in[MAXLEN], out[8], k[16]; + int i; + int ok = 1; + + for( i = 0; i < 16; ++i ) k[i] = i; + + for( i = 0; i < MAXLEN; ++i ) + { + in[i] = i; + crypto_auth( out, in, i, k ); + + if ( memcmp( out, vectors[i], 8 ) ) + { + printf( "test vector failed for %d bytes\n", i ); + ok = 0; + } + } + + return ok; +} + +u8 testdata[100] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 34, 56, 78, 90 }; + +int speed_test(void) +{ + u8 out[8], k[16]; + int i; + + for(i = 0; i < 16; ++i ) k[i] = i; + for(i = 0; i < 10000000; i++) { + crypto_auth(out, testdata, 100, k); + } + return out[0]; +} + +int main() +{ + if ( test_vectors() ) printf( "test vectors ok\n" ); + (void) speed_test(); + return 0; +} diff --git a/test/regression/Makefile b/test/regression/Makefile index 48e3b97..2974444 100644 --- a/test/regression/Makefile +++ b/test/regression/Makefile @@ -12,7 +12,7 @@ TESTS=attribs1 bitfields1 bitfields2 bitfields3 bitfields4 \ expr1 expr6 funptr2 initializers volatile1 volatile2 volatile3 \ funct3 expr5 struct7 struct8 struct11 casts1 casts2 char1 \ sizeof1 sizeof2 packedstruct1 packedstruct2 \ - instrsel bool compar switch + instrsel bool compar switch int64 # Other tests: should compile to .s without errors (but expect warnings) EXTRAS=annot1 commaprec expr2 expr3 expr4 extern1 funct2 funptr1 init1 \ diff --git a/test/regression/Results/int64 b/test/regression/Results/int64 new file mode 100644 index 0000000..15e4ecc --- /dev/null +++ b/test/regression/Results/int64 @@ -0,0 +1,3780 @@ +x = 0 +y = 0 +-x = 0 +x + y = 0 +x - y = 0 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = 0 +x ^ y = 0 +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = eq +x cmps y = eq +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 + +x = 0 +y = 1 +-x = 0 +x + y = 1 +x - y = ffffffffffffffff +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = 1 +x ^ y = 1 +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 + +x = 0 +y = ffffffffffffffff +-x = 0 +x + y = ffffffffffffffff +x - y = 1 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = ffffffffffffffff +x ^ y = ffffffffffffffff +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 + +x = 0 +y = 7fffffff +-x = 0 +x + y = 7fffffff +x - y = ffffffff80000001 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = 7fffffff +x ^ y = 7fffffff +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 + +x = 0 +y = 80000000 +-x = 0 +x + y = 80000000 +x - y = ffffffff80000000 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = 80000000 +x ^ y = 80000000 +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 + +x = 0 +y = 14057b7ef767814f +-x = 0 +x + y = 14057b7ef767814f +x - y = ebfa848108987eb1 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = 14057b7ef767814f +x ^ y = 14057b7ef767814f +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 + +x = 1a08ee1184ba6d32 +y = 0 +-x = e5f711ee7b4592ce +x + y = 1a08ee1184ba6d32 +x - y = 1a08ee1184ba6d32 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = e5f711ee7b4592cd +x & y = 0 +x | y = 1a08ee1184ba6d32 +x ^ y = 1a08ee1184ba6d32 +x << i = 1a08ee1184ba6d32 +x >>u i = 1a08ee1184ba6d32 +x >>s i = 1a08ee1184ba6d32 +x cmpu y = gt +x cmps y = gt +utod x = 43ba08ee1184ba6d +dtou f = aa9f48f29aaf +stod x = 43ba08ee1184ba6d +dtos f = aa9f48f29aaf + +x = 1 +y = 0 +-x = ffffffffffffffff +x + y = 1 +x - y = 1 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = fffffffffffffffe +x & y = 0 +x | y = 1 +x ^ y = 1 +x << i = 1 +x >>u i = 1 +x >>s i = 1 +x cmpu y = gt +x cmps y = gt +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 + +x = 1 +y = 1 +-x = ffffffffffffffff +x + y = 2 +x - y = 0 +x * y = 1 +x /u y = 1 +x %u y = 0 +x /s y = 1 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = fffffffffffffffe +x & y = 1 +x | y = 1 +x ^ y = 0 +x << i = 2 +x >>u i = 0 +x >>s i = 0 +x cmpu y = eq +x cmps y = eq +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 + +x = 1 +y = ffffffffffffffff +-x = ffffffffffffffff +x + y = 0 +x - y = 2 +x * y = ffffffffffffffff +x /u y = 0 +x %u y = 1 +x /s y = ffffffffffffffff +x %s y = 0 +x /u y2 = 0 +x %u y2 = 1 +x /s y3 = ffffffffffffffff +x %s y3 = 0 +~x = fffffffffffffffe +x & y = 1 +x | y = ffffffffffffffff +x ^ y = fffffffffffffffe +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 + +x = 1 +y = 7fffffff +-x = ffffffffffffffff +x + y = 80000000 +x - y = ffffffff80000002 +x * y = 7fffffff +x /u y = 0 +x %u y = 1 +x /s y = 0 +x %s y = 1 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = fffffffffffffffe +x & y = 1 +x | y = 7fffffff +x ^ y = 7ffffffe +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 + +x = 1 +y = 80000000 +-x = ffffffffffffffff +x + y = 80000001 +x - y = ffffffff80000001 +x * y = 80000000 +x /u y = 0 +x %u y = 1 +x /s y = 0 +x %s y = 1 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = fffffffffffffffe +x & y = 0 +x | y = 80000001 +x ^ y = 80000001 +x << i = 1 +x >>u i = 1 +x >>s i = 1 +x cmpu y = lt +x cmps y = lt +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 + +x = 1 +y = 9af678222e728119 +-x = ffffffffffffffff +x + y = 9af678222e72811a +x - y = 650987ddd18d7ee8 +x * y = 9af678222e728119 +x /u y = 0 +x %u y = 1 +x /s y = 0 +x %s y = 1 +x /u y2 = 0 +x %u y2 = 1 +x /s y3 = 0 +x %s y3 = 1 +~x = fffffffffffffffe +x & y = 1 +x | y = 9af678222e728119 +x ^ y = 9af678222e728118 +x << i = 2000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 + +x = 66b61ae97f2099b4 +y = 1 +-x = 9949e51680df664c +x + y = 66b61ae97f2099b5 +x - y = 66b61ae97f2099b3 +x * y = 66b61ae97f2099b4 +x /u y = 66b61ae97f2099b4 +x %u y = 0 +x /s y = 66b61ae97f2099b4 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = 9949e51680df664b +x & y = 0 +x | y = 66b61ae97f2099b5 +x ^ y = 66b61ae97f2099b5 +x << i = cd6c35d2fe413368 +x >>u i = 335b0d74bf904cda +x >>s i = 335b0d74bf904cda +x cmpu y = gt +x cmps y = gt +utod x = 43d9ad86ba5fc826 +dtou f = 2a1210c1f1b75 +stod x = 43d9ad86ba5fc826 +dtos f = 2a1210c1f1b75 + +x = ffffffffffffffff +y = 0 +-x = 1 +x + y = ffffffffffffffff +x - y = ffffffffffffffff +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = 0 +x & y = 0 +x | y = ffffffffffffffff +x ^ y = ffffffffffffffff +x << i = ffffffffffffffff +x >>u i = ffffffffffffffff +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 + +x = ffffffffffffffff +y = 1 +-x = 1 +x + y = 0 +x - y = fffffffffffffffe +x * y = ffffffffffffffff +x /u y = ffffffffffffffff +x %u y = 0 +x /s y = ffffffffffffffff +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = 0 +x & y = 1 +x | y = ffffffffffffffff +x ^ y = fffffffffffffffe +x << i = fffffffffffffffe +x >>u i = 7fffffffffffffff +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 + +x = ffffffffffffffff +y = ffffffffffffffff +-x = 1 +x + y = fffffffffffffffe +x - y = 0 +x * y = 1 +x /u y = 1 +x %u y = 0 +x /s y = 1 +x %s y = 0 +x /u y2 = 100000001 +x %u y2 = 0 +x /s y3 = 1 +x %s y3 = 0 +~x = 0 +x & y = ffffffffffffffff +x | y = ffffffffffffffff +x ^ y = 0 +x << i = 8000000000000000 +x >>u i = 1 +x >>s i = ffffffffffffffff +x cmpu y = eq +x cmps y = eq +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 + +x = ffffffffffffffff +y = 7fffffff +-x = 1 +x + y = 7ffffffe +x - y = ffffffff80000000 +x * y = ffffffff80000001 +x /u y = 200000004 +x %u y = 3 +x /s y = 0 +x %s y = ffffffffffffffff +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = 0 +x & y = 7fffffff +x | y = ffffffffffffffff +x ^ y = ffffffff80000000 +x << i = 8000000000000000 +x >>u i = 1 +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 + +x = ffffffffffffffff +y = 80000000 +-x = 1 +x + y = 7fffffff +x - y = ffffffff7fffffff +x * y = ffffffff80000000 +x /u y = 1ffffffff +x %u y = 7fffffff +x /s y = 0 +x %s y = ffffffffffffffff +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = 0 +x & y = 80000000 +x | y = ffffffffffffffff +x ^ y = ffffffff7fffffff +x << i = ffffffffffffffff +x >>u i = ffffffffffffffff +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 + +x = ffffffffffffffff +y = 62354cda6226d1f3 +-x = 1 +x + y = 62354cda6226d1f2 +x - y = 9dcab3259dd92e0c +x * y = 9dcab3259dd92e0d +x /u y = 2 +x %u y = 3b95664b3bb25c19 +x /s y = 0 +x %s y = ffffffffffffffff +x /u y2 = 29b51243c +x %u y2 = 2db954e7 +x /s y3 = 0 +x %s y3 = ffffffffffffffff +~x = 0 +x & y = 62354cda6226d1f3 +x | y = ffffffffffffffff +x ^ y = 9dcab3259dd92e0c +x << i = fff8000000000000 +x >>u i = 1fff +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 + +x = 8f947f36d0d0f606 +y = ffffffffffffffff +-x = 706b80c92f2f09fa +x + y = 8f947f36d0d0f605 +x - y = 8f947f36d0d0f607 +x * y = 706b80c92f2f09fa +x /u y = 0 +x %u y = 8f947f36d0d0f606 +x /s y = 706b80c92f2f09fa +x %s y = 0 +x /u y2 = 8f947f37 +x %u y2 = 6065753d +x /s y3 = 706b80c92f2f09fa +x %s y3 = 0 +~x = 706b80c92f2f09f9 +x & y = 8f947f36d0d0f606 +x | y = ffffffffffffffff +x ^ y = 706b80c92f2f09f9 +x << i = 0 +x >>u i = 1 +x >>s i = ffffffffffffffff +x cmpu y = lt +x cmps y = lt +utod x = 43e1f28fe6da1a1f +dtou f = 3acf760d70f97 +stod x = c3dc1ae0324bcbc2 +dtos f = fffd1f3ea60ffecc + +x = 7fffffff +y = 0 +-x = ffffffff80000001 +x + y = 7fffffff +x - y = 7fffffff +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff80000000 +x & y = 0 +x | y = 7fffffff +x ^ y = 7fffffff +x << i = 7fffffff +x >>u i = 7fffffff +x >>s i = 7fffffff +x cmpu y = gt +x cmps y = gt +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc + +x = 7fffffff +y = 1 +-x = ffffffff80000001 +x + y = 80000000 +x - y = 7ffffffe +x * y = 7fffffff +x /u y = 7fffffff +x %u y = 0 +x /s y = 7fffffff +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff80000000 +x & y = 1 +x | y = 7fffffff +x ^ y = 7ffffffe +x << i = fffffffe +x >>u i = 3fffffff +x >>s i = 3fffffff +x cmpu y = gt +x cmps y = gt +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc + +x = 7fffffff +y = ffffffffffffffff +-x = ffffffff80000001 +x + y = 7ffffffe +x - y = 80000000 +x * y = ffffffff80000001 +x /u y = 0 +x %u y = 7fffffff +x /s y = ffffffff80000001 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 7fffffff +x /s y3 = ffffffff80000001 +x %s y3 = 0 +~x = ffffffff80000000 +x & y = 7fffffff +x | y = ffffffffffffffff +x ^ y = ffffffff80000000 +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc + +x = 7fffffff +y = 7fffffff +-x = ffffffff80000001 +x + y = fffffffe +x - y = 0 +x * y = 3fffffff00000001 +x /u y = 1 +x %u y = 0 +x /s y = 1 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff80000000 +x & y = 7fffffff +x | y = 7fffffff +x ^ y = 0 +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = eq +x cmps y = eq +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc + +x = 7fffffff +y = 80000000 +-x = ffffffff80000001 +x + y = ffffffff +x - y = ffffffffffffffff +x * y = 3fffffff80000000 +x /u y = 0 +x %u y = 7fffffff +x /s y = 0 +x %s y = 7fffffff +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff80000000 +x & y = 0 +x | y = ffffffff +x ^ y = ffffffff +x << i = 7fffffff +x >>u i = 7fffffff +x >>s i = 7fffffff +x cmpu y = lt +x cmps y = lt +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc + +x = 7fffffff +y = 144093704fadba5d +-x = ffffffff80000001 +x + y = 14409370cfadba5c +x - y = ebbf6c90305245a2 +x * y = 139649be305245a3 +x /u y = 0 +x %u y = 7fffffff +x /s y = 0 +x %s y = 7fffffff +x /u y2 = 6 +x %u y2 = 67c8b5f +x /s y3 = 6 +x %s y3 = 67c8b5f +~x = ffffffff80000000 +x & y = 4fadba5d +x | y = 144093707fffffff +x ^ y = 14409370305245a2 +x << i = fffffffe0000000 +x >>u i = 3 +x >>s i = 3 +x cmpu y = lt +x cmps y = lt +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc + +x = 5b21778e3c8666a8 +y = 7fffffff +-x = a4de8871c3799958 +x + y = 5b21778ebc8666a7 +x - y = 5b21778dbc8666a9 +x * y = c321bbc5c3799958 +x /u y = b642ef1d +x %u y = 72c955c5 +x /s y = b642ef1d +x %s y = 72c955c5 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = a4de8871c3799957 +x & y = 3c8666a8 +x | y = 5b21778e7fffffff +x ^ y = 5b21778e43799957 +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = gt +x cmps y = gt +utod x = 43d6c85de38f219a +dtou f = 2553bfeb9de93 +stod x = 43d6c85de38f219a +dtos f = 2553bfeb9de93 + +x = 80000000 +y = 0 +-x = ffffffff80000000 +x + y = 80000000 +x - y = 80000000 +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff7fffffff +x & y = 0 +x | y = 80000000 +x ^ y = 80000000 +x << i = 80000000 +x >>u i = 80000000 +x >>s i = 80000000 +x cmpu y = gt +x cmps y = gt +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc + +x = 80000000 +y = 1 +-x = ffffffff80000000 +x + y = 80000001 +x - y = 7fffffff +x * y = 80000000 +x /u y = 80000000 +x %u y = 0 +x /s y = 80000000 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff7fffffff +x & y = 0 +x | y = 80000001 +x ^ y = 80000001 +x << i = 100000000 +x >>u i = 40000000 +x >>s i = 40000000 +x cmpu y = gt +x cmps y = gt +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc + +x = 80000000 +y = ffffffffffffffff +-x = ffffffff80000000 +x + y = 7fffffff +x - y = 80000001 +x * y = ffffffff80000000 +x /u y = 0 +x %u y = 80000000 +x /s y = ffffffff80000000 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 80000000 +x /s y3 = ffffffff80000000 +x %s y3 = 0 +~x = ffffffff7fffffff +x & y = 80000000 +x | y = ffffffffffffffff +x ^ y = ffffffff7fffffff +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc + +x = 80000000 +y = 7fffffff +-x = ffffffff80000000 +x + y = ffffffff +x - y = 1 +x * y = 3fffffff80000000 +x /u y = 1 +x %u y = 1 +x /s y = 1 +x %s y = 1 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff7fffffff +x & y = 0 +x | y = ffffffff +x ^ y = ffffffff +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = gt +x cmps y = gt +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc + +x = 80000000 +y = 80000000 +-x = ffffffff80000000 +x + y = 100000000 +x - y = 0 +x * y = 4000000000000000 +x /u y = 1 +x %u y = 0 +x /s y = 1 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = ffffffff7fffffff +x & y = 80000000 +x | y = 80000000 +x ^ y = 0 +x << i = 80000000 +x >>u i = 80000000 +x >>s i = 80000000 +x cmpu y = eq +x cmps y = eq +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc + +x = 80000000 +y = 7b985bc1e7bce4d7 +-x = ffffffff80000000 +x + y = 7b985bc267bce4d7 +x - y = 8467a43e98431b29 +x * y = f3de726b80000000 +x /u y = 0 +x %u y = 80000000 +x /s y = 0 +x %s y = 80000000 +x /u y2 = 1 +x %u y2 = 467a43f +x /s y3 = 1 +x %s y3 = 467a43f +~x = ffffffff7fffffff +x & y = 80000000 +x | y = 7b985bc1e7bce4d7 +x ^ y = 7b985bc167bce4d7 +x << i = 40000000000000 +x >>u i = 100 +x >>s i = 100 +x cmpu y = lt +x cmps y = lt +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc + +x = 7252e9376e45641a +y = 80000000 +-x = 8dad16c891ba9be6 +x + y = 7252e937ee45641a +x - y = 7252e936ee45641a +x * y = b722b20d00000000 +x /u y = e4a5d26e +x %u y = 6e45641a +x /s y = e4a5d26e +x %s y = 6e45641a +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +~x = 8dad16c891ba9be5 +x & y = 0 +x | y = 7252e937ee45641a +x ^ y = 7252e937ee45641a +x << i = 7252e9376e45641a +x >>u i = 7252e9376e45641a +x >>s i = 7252e9376e45641a +x cmpu y = gt +x cmps y = gt +utod x = 43dc94ba4ddb9159 +dtou f = 2ed3ba0c0e099 +stod x = 43dc94ba4ddb9159 +dtos f = 2ed3ba0c0e099 + +x = a220229ec164ffe1 +y = 5d7d4da4cb0e1adc +-x = 5ddfdd613e9b001f +x + y = ff9d70438c731abd +x - y = 44a2d4f9f656e505 +x * y = 52b323a3de16bf5c +x /u y = 1 +x %u y = 44a2d4f9f656e505 +x /s y = ffffffffffffffff +x %s y = ff9d70438c731abd +x /u y2 = 1bbf1cb77 +x %u y2 = c30dca5 +x /s y3 = fffffffefef21c87 +x %s y3 = ffffffffa2cb1e65 +~x = 5ddfdd613e9b001e +x & y = 200084c1041ac0 +x | y = ff7d6fbecb6efffd +x ^ y = ff5d6f3a0a6ae53d +x << i = ec164ffe10000000 +x >>u i = a220229ec +x >>s i = fffffffa220229ec +x cmpu y = gt +x cmps y = lt +utod x = 43e4440453d82ca0 +dtou f = 42681802c45c2 +stod x = c3d777f7584fa6c0 +dtos f = fffd98c8c56534f8 + +x = c73aa0d9a415dfb +y = 18e9107ab99b8b6e +-x = f38c55f265bea205 +x + y = 255cba8853dce969 +x - y = f38a9992e0a5d28d +x * y = 9c3790b61016aada +x /u y = 0 +x %u y = c73aa0d9a415dfb +x /s y = 0 +x %s y = c73aa0d9a415dfb +x /u y2 = 7ff714a4 +x %u y2 = 3f347d3 +x /s y3 = 7ff714a4 +x %s y3 = 3f347d3 +~x = f38c55f265bea204 +x & y = 86100089801096a +x | y = 1cfbba7fbbdbdfff +x ^ y = 149aba7723dad695 +x << i = 577ec00000000000 +x >>u i = 31ce +x >>s i = 31ce +x cmpu y = lt +x cmps y = lt +utod x = 43a8e7541b3482bc +dtou f = 519aad330d8d +stod x = 43a8e7541b3482bc +dtos f = 519aad330d8d + +x = e9bcd26890f095a5 +y = 329cb23ce0f7aa50 +-x = 16432d976f0f6a5b +x + y = 1c5984a571e83ff5 +x - y = b720202baff8eb55 +x * y = 9b6d58a9d0c15590 +x /u y = 4 +x %u y = 1f4a09750d11ec65 +x /s y = 0 +x %s y = e9bcd26890f095a5 +x /u y2 = 49e436778 +x %u y2 = 1e12e585 +x /s y3 = ffffffff8f651a9c +x %s y3 = fffffffff9ade115 +~x = 16432d976f0f6a5a +x & y = 209c922880f08000 +x | y = fbbcf27cf0f7bff5 +x ^ y = db20605470073ff5 +x << i = d26890f095a50000 +x >>u i = e9bcd26890f0 +x >>s i = ffffe9bcd26890f0 +x cmpu y = gt +x cmps y = lt +utod x = 43ed379a4d121e13 +dtou f = 5fbd298972a9d +stod x = c3b6432d976f0f6a +dtos f = ffff6e19ddd019d2 + +x = 8362aa9340fe215f +y = f986342416ec8002 +-x = 7c9d556cbf01dea1 +x + y = 7ce8deb757eaa161 +x - y = 89dc766f2a11a15d +x * y = e4a2b426803fc2be +x /u y = 0 +x %u y = 8362aa9340fe215f +x /s y = 13 +x %s y = fe6ccbe58d70a139 +x /u y2 = 86cb918b +x %u y2 = 910b6dd3 +x /s y3 = 133e437097 +x %s y3 = fffffffffe99a023 +~x = 7c9d556cbf01dea0 +x & y = 8102200000ec0002 +x | y = fbe6beb756fea15f +x ^ y = 7ae49eb75612a15d +x << i = d8aaa4d03f8857c +x >>u i = 20d8aaa4d03f8857 +x >>s i = e0d8aaa4d03f8857 +x cmpu y = lt +x cmps y = lt +utod x = 43e06c5552681fc4 +dtou f = 35d0c262d14d7 +stod x = c3df27555b2fc078 +dtos f = fffccf536b66040d + +x = 368083376ba4ffa9 +y = 6912b247b79a4904 +-x = c97f7cc8945b0057 +x + y = 9f93357f233f48ad +x - y = cd6dd0efb40ab6a5 +x * y = ec1ef87256252fa4 +x /u y = 0 +x %u y = 368083376ba4ffa9 +x /s y = 0 +x %s y = 368083376ba4ffa9 +x /u y2 = 84c9e8f3 +x %u y2 = 27966e44 +x /s y3 = 84c9e8f3 +x %s y3 = 27966e44 +~x = c97f7cc8945b0056 +x & y = 2000820723804900 +x | y = 7f92b377ffbeffad +x ^ y = 5f923170dc3eb6ad +x << i = 68083376ba4ffa90 +x >>u i = 368083376ba4ffa +x >>s i = 368083376ba4ffa +x cmpu y = lt +x cmps y = lt +utod x = 43cb40419bb5d280 +dtou f = 1652f2fb41ccc +stod x = 43cb40419bb5d280 +dtos f = 1652f2fb41ccc + +x = aefab65d77135303 +y = bfc7666ab0ba95d6 +-x = 510549a288ecacfd +x + y = 6ec21cc827cde8d9 +x - y = ef334ff2c658bd2d +x * y = b8e736ca29a62382 +x /u y = 0 +x %u y = aefab65d77135303 +x /s y = 1 +x %s y = ef334ff2c658bd2d +x /u y2 = e9932394 +x %u y2 = bed9fbb +x /s y3 = 142f786e7 +x %s y3 = ffffffffe6446d5d +~x = 510549a288ecacfc +x & y = aec2264830121102 +x | y = bffff67ff7bbd7d7 +x ^ y = 113dd037c7a9c6d5 +x << i = 975dc4d4c0c00000 +x >>u i = 2bbead975dc +x >>s i = fffffebbead975dc +x cmpu y = lt +x cmps y = lt +utod x = 43e5df56cbaee26a +dtou f = 47abea07f9115 +stod x = c3d4415268a23b2b +dtos f = fffded05e5b8804b + +x = dec3f99f561701ed +y = 307892d5fe586af8 +-x = 213c0660a9e8fe13 +x + y = f3c8c75546f6ce5 +x - y = ae4b66c957be96f5 +x * y = f0bbfc03bc8dff98 +x /u y = 4 +x %u y = 1ce1ae475cb5560d +x /s y = 0 +x %s y = dec3f99f561701ed +x /u y2 = 49889cbcc +x %u y2 = d961931 +x /s y3 = ffffffff5078c8f7 +x %s y3 = fffffffffb32ee6a +~x = 213c0660a9e8fe12 +x & y = 10409095561000e8 +x | y = fefbfbdffe5f6bfd +x ^ y = eebb6b4aa84f6b15 +x << i = ed00000000000000 +x >>u i = de +x >>s i = ffffffffffffffde +x cmpu y = gt +x cmps y = lt +utod x = 43ebd87f33eac2e0 +dtou f = 5b3ea899bcdcc +stod x = c3c09e033054f47f +dtos f = ffff2631ced4bd02 + +x = 7c15eb1a6c5b56e7 +y = 74078c767c0560ea +-x = 83ea14e593a4a919 +x + y = f01d7790e860b7d1 +x - y = 80e5ea3f055f5fd +x * y = fa248d23e2970f26 +x /u y = 1 +x %u y = 80e5ea3f055f5fd +x /s y = 1 +x %s y = 80e5ea3f055f5fd +x /u y2 = 111c647a7 +x %u y2 = 2a35fbed +x /s y3 = 111c647a7 +x %s y3 = 2a35fbed +~x = 83ea14e593a4a918 +x & y = 740588126c0140e2 +x | y = 7c17ef7e7c5f76ef +x ^ y = 812676c105e360d +x << i = 6d5b9c0000000000 +x >>u i = 1f057a +x >>s i = 1f057a +x cmpu y = gt +x cmps y = gt +utod x = 43df057ac69b16d6 +dtou f = 32d351f657ccf +stod x = 43df057ac69b16d6 +dtos f = 32d351f657ccf + +x = 2d2acce24f9fa071 +y = 6f47682e14d3c42c +-x = d2d5331db0605f8f +x + y = 9c7235106473649d +x - y = bde364b43acbdc45 +x * y = bb2c5143f769176c +x /u y = 0 +x %u y = 2d2acce24f9fa071 +x /s y = 0 +x %s y = 2d2acce24f9fa071 +x /u y2 = 67e883b7 +x %u y2 = 4d949d8f +x /s y3 = 67e883b7 +x %s y3 = 4d949d8f +~x = d2d5331db0605f8e +x & y = 2d02482204938020 +x | y = 6f6fecee5fdfe47d +x ^ y = 426da4cc5b4c645d +x << i = fa07100000000000 +x >>u i = 2d2ac +x >>s i = 2d2ac +x cmpu y = lt +x cmps y = lt +utod x = 43c695667127cfd0 +dtou f = 12801f7ddfe5a +stod x = 43c695667127cfd0 +dtos f = 12801f7ddfe5a + +x = 13621127ec8ed10b +y = 9a109dfc559db53e +-x = ec9deed813712ef5 +x + y = ad72af24422c8649 +x - y = 7951732b96f11bcd +x * y = f06266bf1f2267aa +x /u y = 0 +x %u y = 13621127ec8ed10b +x /s y = 0 +x %s y = 13621127ec8ed10b +x /u y2 = 203527d3 +x %u y2 = 339f3657 +x /s y3 = ffffffffcf52411e +x %s y3 = 47c75183 +~x = ec9deed813712ef4 +x & y = 12001124448c910a +x | y = 9b729dfffd9ff53f +x ^ y = 89728cdbb9136435 +x << i = c000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 43b3621127ec8ed1 +dtou f = 7f076703304d +stod x = 43b3621127ec8ed1 +dtos f = 7f076703304d + +x = 4ab009d226201f35 +y = da130a0806148a0 +-x = b54ff62dd9dfe0cb +x + y = 58513a72a68167d5 +x - y = 3d0ed931a5bed695 +x * y = 27bc30f72fef6920 +x /u y = 5 +x %u y = 68a16afa439b415 +x /s y = 5 +x %s y = 68a16afa439b415 +x /u y2 = 57ad3b593 +x %u y2 = 7501355 +x /s y3 = 57ad3b593 +x %s y3 = 7501355 +~x = b54ff62dd9dfe0ca +x & y = 8a0008000200820 +x | y = 4fb139f2a6615fb5 +x ^ y = 47113972a6415795 +x << i = 26201f3500000000 +x >>u i = 4ab009d2 +x >>s i = 4ab009d2 +x cmpu y = gt +x cmps y = gt +utod x = 43d2ac0274898808 +dtou f = 1e979155aadac +stod x = 43d2ac0274898808 +dtos f = 1e979155aadac + +x = a8f485b490a8a56f +y = 2dd1b3c84cb9a6d2 +-x = 570b7a4b6f575a91 +x + y = d6c6397cdd624c41 +x - y = 7b22d1ec43eefe9d +x * y = e3c43aa086d4af0e +x /u y = 3 +x %u y = 1f7f6a5baa7bb0f9 +x /s y = ffffffffffffffff +x %s y = d6c6397cdd624c41 +x /u y2 = 3affbc857 +x %u y2 = 2cd84c77 +x /s y3 = fffffffe19aa1e7c +x %s y3 = ffffffffe134208f +~x = 570b7a4b6f575a90 +x & y = 28d0818000a8a442 +x | y = adf5b7fcdcb9a7ff +x ^ y = 8525367cdc1103bd +x << i = 16d242a295bc0000 +x >>u i = 2a3d216d242a +x >>s i = ffffea3d216d242a +x cmpu y = gt +x cmps y = lt +utod x = 43e51e90b6921515 +dtou f = 45343bae4fbb7 +stod x = c3d5c2de92dbd5d7 +dtos f = fffdc58b001deaec + +x = ac82b442fe060239 +y = ba09c177d0bd2c54 +-x = 537d4bbd01f9fdc7 +x + y = 668c75bacec32e8d +x - y = f278f2cb2d48d5e5 +x * y = b0321761566f86b4 +x /u y = 0 +x %u y = ac82b442fe060239 +x /s y = 1 +x %s y = f278f2cb2d48d5e5 +x /u y2 = ed62a033 +x %u y2 = 9a9c1784 +x /s y3 = 1317fc5dc +x %s y3 = ffffffffdaba2cf5 +~x = 537d4bbd01f9fdc6 +x & y = a8008042d0040010 +x | y = be8bf577febf2e7d +x ^ y = 168b75352ebb2e6d +x << i = 442fe06023900000 +x >>u i = ac82b442fe0 +x >>s i = fffffac82b442fe0 +x cmpu y = lt +x cmps y = lt +utod x = 43e59056885fc0c0 +dtou f = 46a90b2a98617 +stod x = c3d4df52ef407e7f +dtos f = fffddcd7f7e2754d + +x = b18070243e89f813 +y = 8b9a402e6ec889a6 +-x = 4e7f8fdbc17607ed +x + y = 3d1ab052ad5281b9 +x - y = 25e62ff5cfc16e6d +x * y = 4bb994c45c110752 +x /u y = 1 +x %u y = 25e62ff5cfc16e6d +x /s y = 0 +x %s y = b18070243e89f813 +x /u y2 = 1457fa721 +x %u y2 = 62f7b025 +x /s y3 = aca563d9 +x %s y3 = fffffffffc51c715 +~x = 4e7f8fdbc17607ec +x & y = 818040242e888802 +x | y = bb9a702e7ec9f9b7 +x ^ y = 3a1a300a504171b5 +x << i = a27e04c000000000 +x >>u i = 2c601c0 +x >>s i = fffffffffec601c0 +x cmpu y = gt +x cmps y = gt +utod x = 43e6300e0487d13f +dtou f = 48b46746f5fb1 +stod x = c3d39fe3f6f05d82 +dtos f = fffdfd8db9a84ee6 + +x = 81dc3a61528f0d7d +y = 20670ecc67fee348 +-x = 7e23c59ead70f283 +x + y = a243492dba8df0c5 +x - y = 61752b94ea902a35 +x * y = 8c82c58cbe37a228 +x /u y = 4 +x %u y = 3fff2fb293805d +x /s y = fffffffffffffffd +x %s y = e31166c68a8bb755 +x /u y2 = 401f99d2e +x %u y2 = 1b8b48d5 +x /s y3 = fffffffc1b6b5e46 +x %s y3 = ffffffffebae19b5 +~x = 7e23c59ead70f282 +x & y = 440a40428e0148 +x | y = a1ff3eed77ffef7d +x ^ y = a1bb34ad3571ee35 +x << i = dc3a61528f0d7d00 +x >>u i = 81dc3a61528f0d +x >>s i = ff81dc3a61528f0d +x cmpu y = gt +x cmps y = lt +utod x = 43e03b874c2a51e2 +dtou f = 3530d5f787ce7 +stod x = c3df88f167ab5c3d +dtos f = fffcc554a4b16c1d + +x = 148943805ade2cf7 +y = b623237a886f1ba +-x = eb76bc7fa521d309 +x + y = 1feb75b803651eb1 +x - y = 9271148b2573b3d +x * y = 732a25abcf0b3276 +x /u y = 1 +x %u y = 9271148b2573b3d +x /s y = 1 +x %s y = 9271148b2573b3d +x /u y2 = 1cdd42781 +x %u y2 = 8307e40 +x /s y3 = 1cdd42781 +x %s y3 = 8307e40 +~x = eb76bc7fa521d308 +x & y = 200088620b2 +x | y = 1feb73b7fadefdff +x ^ y = 1feb71b7f258dd4d +x << i = dc00000000000000 +x >>u i = 5 +x >>s i = 5 +x cmpu y = gt +x cmps y = gt +utod x = 43b48943805ade2d +dtou f = 869600d40a9a +stod x = 43b48943805ade2d +dtos f = 869600d40a9a + +x = 3596d7e2724d4501 +y = 200951a3d9cd217c +-x = ca69281d8db2baff +x + y = 55a029864c1a667d +x - y = 158d863e98802385 +x * y = 12f0a6c6e1f8d7c +x /u y = 1 +x %u y = 158d863e98802385 +x /s y = 1 +x %s y = 158d863e98802385 +x /u y2 = 1ac3a0a48 +x %u y2 = f8ff129 +x /s y3 = 1ac3a0a48 +x %s y3 = f8ff129 +~x = ca69281d8db2bafe +x & y = 200051a2504d0100 +x | y = 359fd7e3fbcd657d +x ^ y = 159f8641ab80647d +x << i = 1000000000000000 +x >>u i = 3 +x >>s i = 3 +x cmpu y = gt +x cmps y = gt +utod x = 43cacb6bf13926a3 +dtou f = 15f33cfbad626 +stod x = 43cacb6bf13926a3 +dtos f = 15f33cfbad626 + +x = 8364db7a513ee81b +y = 39c53d919052b30e +-x = 7c9b2485aec117e5 +x + y = bd2a190be1919b29 +x - y = 499f9de8c0ec350d +x * y = 7192d67ff661927a +x /u y = 2 +x %u y = fda6057309981ff +x /s y = fffffffffffffffe +x %s y = f6ef569d71e44e37 +x /u y2 = 2464000e3 +x %u y2 = d195088 +x /s y3 = fffffffdd7d46212 +x %s y3 = fffffffffcbb11e9 +~x = 7c9b2485aec117e4 +x & y = 14419101012a00a +x | y = bbe5fffbd17efb1f +x ^ y = baa1e6ebc16c5b15 +x << i = 36de944fba06c000 +x >>u i = 20d936de944fb +x >>s i = fffe0d936de944fb +x cmpu y = gt +x cmps y = lt +utod x = 43e06c9b6f4a27dd +dtou f = 35d1a821b799f +stod x = c3df26c9216bb046 +dtos f = fffccf61c75468d5 + +x = ca6ee36ebbeaecc5 +y = 7fe0e5d6d5d1daf0 +-x = 35911c914415133b +x + y = 4a4fc94591bcc7b5 +x - y = 4a8dfd97e61911d5 +x * y = 28f18b4f6fb2bab0 +x /u y = 1 +x %u y = 4a8dfd97e61911d5 +x /s y = 0 +x %s y = ca6ee36ebbeaecc5 +x /u y2 = 195403f42 +x %u y2 = 67e00199 +x /s y3 = ffffffff94c3b859 +x %s y3 = ffffffffd787355f +~x = 35911c914415133a +x & y = 4a60e14691c0c8c0 +x | y = ffeee7fefffbfef5 +x ^ y = b58e06b86e3b3635 +x << i = ecc5000000000000 +x >>u i = ca6e +x >>s i = ffffffffffffca6e +x cmpu y = gt +x cmps y = lt +utod x = 43e94ddc6dd77d5e +dtou f = 52eaa7b41d752 +stod x = c3cac88e48a20a8a +dtos f = fffea0f1c07ac687 + +x = e9872a956980d7f +y = 5abe9b0d2dbee1a2 +-x = f1678d56a967f281 +x + y = 69570db68456ef21 +x - y = b3d9d79c28d92bdd +x * y = 374cf7f0c757295e +x /u y = 0 +x %u y = e9872a956980d7f +x /s y = 0 +x %s y = e9872a956980d7f +x /u y2 = 292cea58 +x %u y2 = 301cdf07 +x /s y3 = 292cea58 +x %s y3 = 301cdf07 +~x = f1678d56a967f280 +x & y = a98120904980122 +x | y = 5ebefbad7fbeedff +x ^ y = 5426e9a47b26ecdd +x << i = 5a6035fc00000000 +x >>u i = 3a61caa +x >>s i = 3a61caa +x cmpu y = lt +x cmps y = lt +utod x = 43ad30e552ad301b +dtou f = 5fa72f57d2bd +stod x = 43ad30e552ad301b +dtos f = 5fa72f57d2bd + +x = cc90a3111f2e88c9 +y = fdb739ffd16e43a4 +-x = 336f5ceee0d17737 +x + y = ca47dd10f09ccc6d +x - y = ced969114dc04525 +x * y = cba4570306fa3bc4 +x /u y = 0 +x %u y = cc90a3111f2e88c9 +x /s y = 16 +x %s y = fed1a7151fb4b8b1 +x /u y2 = ce682040 +x %u y2 = 8882909 +x /s y3 = 16845c4311 +x %s y3 = fffffffffe31f1da +~x = 336f5ceee0d17736 +x & y = cc902111112e0080 +x | y = fdb7bbffdf6ecbed +x ^ y = 31279aeece40cb6d +x << i = f2e88c9000000000 +x >>u i = cc90a31 +x >>s i = fffffffffcc90a31 +x cmpu y = lt +x cmps y = lt +utod x = 43e992146223e5d1 +dtou f = 53ca3196c85e7 +stod x = c3c9b7ae777068bc +dtos f = fffeaeea5ea5751c + +x = f6d06b7289cbc123 +y = f78231121267d176 +-x = 92f948d76343edd +x + y = ee529c849c339299 +x - y = ff4e3a607763efad +x * y = bda1155007ad9922 +x /u y = 0 +x %u y = f6d06b7289cbc123 +x /s y = 1 +x %s y = ff4e3a607763efad +x /u y2 = ff48210c +x %u y2 = 6c4e224b +x /s y3 = 114ef78ac +x %s y3 = ffffffffff84590b +~x = 92f948d76343edc +x & y = f68021120043c122 +x | y = f7d27b729befd177 +x ^ y = 1525a609bac1055 +x << i = 48c0000000000000 +x >>u i = 3db +x >>s i = ffffffffffffffdb +x cmpu y = lt +x cmps y = lt +utod x = 43eeda0d6e513978 +dtou f = 6518569bde544 +stod x = c3a25f291aec687e +dtos f = ffffc3ccaef6d47a + +x = 1f75e527a63edd0d +y = cab61a6fe4aecf98 +-x = e08a1ad859c122f3 +x + y = ea2bff978aedaca5 +x - y = 54bfcab7c1900d75 +x * y = f44145345ce6c2b8 +x /u y = 0 +x %u y = 1f75e527a63edd0d +x /s y = 0 +x %s y = 1f75e527a63edd0d +x /u y2 = 27bb1964 +x %u y2 = 7672b2b1 +x /s y3 = ffffffff68dce4e4 +x %s y3 = 1d207631 +~x = e08a1ad859c122f2 +x & y = a340027a42ecd08 +x | y = dff7ff6fe6bedf9d +x ^ y = d5c3ff4842901295 +x << i = 27a63edd0d000000 +x >>u i = 1f75e527a6 +x >>s i = 1f75e527a6 +x cmpu y = lt +x cmps y = gt +utod x = 43bf75e527a63edd +dtou f = ce2e01d58fbd +stod x = 43bf75e527a63edd +dtos f = ce2e01d58fbd + +x = 67d900b4d6966707 +y = 2953a6185807168a +-x = 9826ff4b296998f9 +x + y = 912ca6cd2e9d7d91 +x - y = 3e855a9c7e8f507d +x * y = faecb01bd31f23c6 +x /u y = 2 +x %u y = 1531b484268839f3 +x /s y = 2 +x %s y = 1531b484268839f3 +x /u y2 = 28349b7ce +x %u y2 = 24b397b7 +x /s y3 = 28349b7ce +x %s y3 = 24b397b7 +~x = 9826ff4b296998f8 +x & y = 2151001050060602 +x | y = 6fdba6bcde97778f +x ^ y = 4e8aa6ac8e91718d +x << i = 6402d35a599c1c00 +x >>u i = 19f6402d35a599 +x >>s i = 19f6402d35a599 +x cmpu y = gt +x cmps y = gt +utod x = 43d9f6402d35a59a +dtou f = 2a893795d8eb4 +stod x = 43d9f6402d35a59a +dtos f = 2a893795d8eb4 + +x = 89d196850b26ed91 +y = 756ef4168e7f32cc +-x = 762e697af4d9126f +x + y = ff408a9b99a6205d +x - y = 1462a26e7ca7bac5 +x * y = a3aeacbec65aa18c +x /u y = 1 +x %u y = 1462a26e7ca7bac5 +x /s y = ffffffffffffffff +x %s y = ff408a9b99a6205d +x /u y2 = 12c706881 +x %u y2 = 3974fe7b +x /s y3 = fffffffefe5ea0d2 +x %s y3 = ffffffffe980f385 +~x = 762e697af4d9126e +x & y = 14094040a262080 +x | y = fdfff6978f7fffdd +x ^ y = fcbf62938559df5d +x << i = 196850b26ed91000 +x >>u i = 89d196850b26e +x >>s i = fff89d196850b26e +x cmpu y = gt +x cmps y = lt +utod x = 43e13a32d0a164de +dtou f = 387356a372c01 +stod x = c3dd8b9a5ebd3645 +dtos f = fffcf97caf701b36 + +x = 25ea5203eb2a32b +y = d27bb23d57c784de +-x = fda15adfc14d5cd5 +x + y = d4da575d967a2809 +x - y = 2fe2f2e2e6eb1e4d +x * y = fe450bd9ee78ab4a +x /u y = 0 +x %u y = 25ea5203eb2a32b +x /s y = 0 +x %s y = 25ea5203eb2a32b +x /u y2 = 2e1d4dc +x %u y2 = cb2df2bf +x /s y3 = fffffffff2ac0c93 +x %s y3 = 2a546e24 +~x = fda15adfc14d5cd4 +x & y = 25aa0201682800a +x | y = d27fb73d7ff7a7ff +x ^ y = d025171d697527f5 +x << i = faca8cac0000000 +x >>u i = 97a9480 +x >>s i = 97a9480 +x cmpu y = lt +x cmps y = gt +utod x = 4382f52901f59519 +dtou f = f87b5758598 +stod x = 4382f52901f59519 +dtos f = f87b5758598 + +x = 57e7846b9d99fe55 +y = 933550ac541e6140 +-x = a8187b94626601ab +x + y = eb1cd517f1b85f95 +x - y = c4b233bf497b9d15 +x * y = eb523a71d3ca40 +x /u y = 0 +x %u y = 57e7846b9d99fe55 +x /s y = 0 +x %s y = 57e7846b9d99fe55 +x /u y2 = 98de5aed +x %u y2 = 81b9d719 +x /s y3 = ffffffff312686c0 +x %s y3 = 67db7555 +~x = a8187b94626601aa +x & y = 1325002814186040 +x | y = d7f7d4efdd9fff55 +x ^ y = c4d2d4c7c9879f15 +x << i = 57e7846b9d99fe55 +x >>u i = 57e7846b9d99fe55 +x >>s i = 57e7846b9d99fe55 +x cmpu y = lt +x cmps y = gt +utod x = 43d5f9e11ae76680 +dtou f = 240170cfeb5b5 +stod x = 43d5f9e11ae76680 +dtos f = 240170cfeb5b5 + +x = 7f83a526d3d598f +y = 3562759c2ed93072 +-x = f807c5ad92c2a671 +x + y = 3d5aafee9c168a01 +x - y = d295c4b63e64291d +x * y = d607fa6cc253b1ae +x /u y = 0 +x %u y = 7f83a526d3d598f +x /s y = 0 +x %s y = 7f83a526d3d598f +x /u y2 = 2637b00b +x %u y2 = 4a30bdb +x /s y3 = 2637b00b +x %s y3 = 4a30bdb +~x = f807c5ad92c2a670 +x & y = 56030102c191002 +x | y = 37fa7fde6ffd79ff +x ^ y = 329a4fce43e469fd +x << i = 663c000000000000 +x >>u i = 1fe +x >>s i = 1fe +x cmpu y = lt +x cmps y = lt +utod x = 439fe0e949b4f566 +dtou f = 343ad6486db4 +stod x = 439fe0e949b4f566 +dtos f = 343ad6486db4 + +x = e2e88e05fcf79359 +y = 6d0fe1876fd28ef4 +-x = 1d1771fa03086ca7 +x + y = 4ff86f8d6cca224d +x - y = 75d8ac7e8d250465 +x * y = e44e78efe5b5ced4 +x /u y = 2 +x %u y = 8c8caf71d527571 +x /s y = 0 +x %s y = e2e88e05fcf79359 +x /u y2 = 2149e9078 +x %u y2 = 3157ec11 +x /s y3 = ffffffffbbb6b39e +x %s y3 = ffffffff9f7efd07 +~x = 1d1771fa03086ca6 +x & y = 600880056cd28250 +x | y = efefef87fff79ffd +x ^ y = 8fe76f8293251dad +x << i = 3590000000000000 +x >>u i = e2e +x >>s i = fffffffffffffe2e +x cmpu y = gt +x cmps y = lt +utod x = 43ec5d11c0bf9ef2 +dtou f = 5cf112710b749 +stod x = c3bd1771fa03086d +dtos f = ffff41586c49a67e + +x = d3099a8b859ae33 +y = 8fa8587c64456d46 +-x = f2cf665747a651cd +x + y = 9cd8f2251c9f1b79 +x - y = 7d88412c541440ed +x * y = 8cfd7b8c777058f2 +x /u y = 0 +x %u y = d3099a8b859ae33 +x /s y = 0 +x %s y = d3099a8b859ae33 +x /u y2 = 178126fd +x %u y2 = 75bd3a7 +x /s y3 = ffffffffe1f1945e +x %s y3 = 5ca580ab +~x = f2cf665747a651cc +x & y = d20182820412c02 +x | y = 8fb8d9fcfc5def77 +x ^ y = 8298c1d4dc1cc375 +x << i = 4c266a2e166b8cc0 +x >>u i = 34c266a2e166b8 +x >>s i = 34c266a2e166b8 +x cmpu y = lt +x cmps y = gt +utod x = 43aa61335170b35c +dtou f = 5670e3244231 +stod x = 43aa61335170b35c +dtos f = 5670e3244231 + +x = b62ea5426a8f709d +y = 31b8858d02dd2fe8 +-x = 49d15abd95708f63 +x + y = e7e72acf6d6ca085 +x - y = 84761fb567b240b5 +x * y = e22a9ef5933e148 +x /u y = 3 +x %u y = 2105149b61f7e0e5 +x /s y = ffffffffffffffff +x %s y = e7e72acf6d6ca085 +x /u y2 = 3aa02d06b +x %u y2 = 8d30eae +x /s y3 = fffffffe83ee35ed +x %s y3 = ffffffffdeff9c14 +~x = 49d15abd95708f62 +x & y = 30288500028d2088 +x | y = b7bea5cf6adf7ffd +x ^ y = 879620cf68525f75 +x << i = 8f709d0000000000 +x >>u i = b62ea5 +x >>s i = ffffffffffb62ea5 +x cmpu y = gt +x cmps y = lt +utod x = 43e6c5d4a84d51ee +dtou f = 4a9f307066a12 +stod x = c3d27456af655c24 +dtos f = fffe1c3a4c3f5948 + +x = 2e94ca71f5150517 +y = 5f844de64402cf5a +-x = d16b358e0aeafae9 +x + y = 8e1918583917d471 +x - y = cf107c8bb11235bd +x * y = 36454e6a4eaf6316 +x /u y = 0 +x %u y = 2e94ca71f5150517 +x /s y = 0 +x %s y = 2e94ca71f5150517 +x /u y2 = 7cd84df1 +x %u y2 = 1c098191 +x /s y3 = 7cd84df1 +x %s y3 = 1c098191 +~x = d16b358e0aeafae8 +x & y = e84486044000512 +x | y = 7f94cff7f517cf5f +x ^ y = 71108797b117ca4d +x << i = c7d454145c000000 +x >>u i = ba5329c7d +x >>s i = ba5329c7d +x cmpu y = lt +x cmps y = lt +utod x = 43c74a6538fa8a83 +dtou f = 131464f1a5831 +stod x = 43c74a6538fa8a83 +dtos f = 131464f1a5831 + +x = 61bd0eb9b8259a21 +y = 134717f07eaef81c +-x = 9e42f14647da65df +x + y = 750426aa36d4923d +x - y = 4e75f6c93976a205 +x * y = 780fa80f91dad39c +x /u y = 5 +x %u y = 15997073ebac195 +x /s y = 5 +x %s y = 15997073ebac195 +x /u y2 = 511ed4ac8 +x %u y2 = 27f86a1 +x /s y3 = 511ed4ac8 +x %s y3 = 27f86a1 +~x = 9e42f14647da65de +x & y = 10506b038249800 +x | y = 73ff1ff9feaffa3d +x ^ y = 72fa1949c68b623d +x << i = 9b8259a210000000 +x >>u i = 61bd0eb9b +x >>s i = 61bd0eb9b +x cmpu y = gt +x cmps y = gt +utod x = 43d86f43ae6e0967 +dtou f = 2808a00a88ddd +stod x = 43d86f43ae6e0967 +dtos f = 2808a00a88ddd + +x = 3b12fa08c18b023b +y = 812e51fdc3492aae +-x = c4ed05f73e74fdc5 +x + y = bc414c0684d42ce9 +x - y = b9e4a80afe41d78d +x * y = 9f875459eeac321a +x /u y = 0 +x %u y = 3b12fa08c18b023b +x /s y = 0 +x %s y = 3b12fa08c18b023b +x /u y2 = 751173e7 +x %u y2 = 4f1d5ff0 +x /s y3 = ffffffff88c0657c +x %s y3 = 3027aaf +~x = c4ed05f73e74fdc4 +x & y = 1025008c109022a +x | y = bb3efbfdc3cb2abf +x ^ y = ba3cabf502c22895 +x << i = c08ec00000000000 +x >>u i = ec4b +x >>s i = ec4b +x cmpu y = lt +x cmps y = gt +utod x = 43cd897d0460c581 +dtou f = 18325f0a8cb71 +stod x = 43cd897d0460c581 +dtos f = 18325f0a8cb71 + +x = 341b83d316b653e5 +y = 5153bc26395bdb90 +-x = cbe47c2ce949ac1b +x + y = 856f3ff950122f75 +x - y = e2c7c7acdd5a7855 +x * y = a24306a68fbb17d0 +x /u y = 0 +x %u y = 341b83d316b653e5 +x /s y = 0 +x %s y = 341b83d316b653e5 +x /u y2 = a405e4b4 +x %u y2 = 4f86312d +x /s y3 = a405e4b4 +x %s y3 = 4f86312d +~x = cbe47c2ce949ac1a +x & y = 1013800210125380 +x | y = 755bbff73fffdbf5 +x ^ y = 65483ff52fed8875 +x << i = 83d316b653e50000 +x >>u i = 341b83d316b6 +x >>s i = 341b83d316b6 +x cmpu y = lt +x cmps y = lt +utod x = 43ca0dc1e98b5b2a +dtou f = 1557dd8590e16 +stod x = 43ca0dc1e98b5b2a +dtos f = 1557dd8590e16 + +x = 605604a12949899f +y = c5e6d8ea02259342 +-x = 9fa9fb5ed6b67661 +x + y = 263cdd8b2b6f1ce1 +x - y = 9a6f2bb72723f65d +x * y = f1a33a4c00f6c7fe +x /u y = 0 +x %u y = 605604a12949899f +x /s y = ffffffffffffffff +x %s y = 263cdd8b2b6f1ce1 +x /u y2 = 7c9e1383 +x %u y2 = 89a32be1 +x /s y3 = fffffffe57830a86 +x %s y3 = 83cdb23 +~x = 9fa9fb5ed6b67660 +x & y = 404600a000018102 +x | y = e5f6dceb2b6d9bdf +x ^ y = a5b0dc4b2b6c1add +x << i = 81581284a526267c +x >>u i = 181581284a526267 +x >>s i = 181581284a526267 +x cmpu y = lt +x cmps y = gt +utod x = 43d81581284a5262 +dtou f = 27759007077d8 +stod x = 43d81581284a5262 +dtos f = 27759007077d8 + +x = 94e3e5e2497a21e9 +y = 6f2836d5614f0e44 +-x = 6b1c1a1db685de17 +x + y = 40c1cb7aac9302d +x - y = 25bbaf0ce82b13a5 +x * y = eaf4c086f232bfe4 +x /u y = 1 +x %u y = 25bbaf0ce82b13a5 +x /s y = 0 +x %s y = 94e3e5e2497a21e9 +x /u y2 = 156e6b4cb +x %u y2 = caae302 +x /s y3 = ffffffff09523710 +x %s y3 = ffffffffa5f4f199 +~x = 6b1c1a1db685de16 +x & y = 42024c0414a0040 +x | y = ffebf7f7697f2fed +x ^ y = fbcbd33728352fad +x << i = 4e3e5e2497a21e90 +x >>u i = 94e3e5e2497a21e +x >>s i = f94e3e5e2497a21e +x cmpu y = gt +x cmps y = lt +utod x = 43e29c7cbc492f44 +dtou f = 3cfc45959f1ec +stod x = c3dac706876da178 +dtos f = fffd420b9e92e122 + +x = 2c62677ac7f4bf43 +y = 58ff1a48be4e5d16 +-x = d39d9885380b40bd +x + y = 856181c386431c59 +x - y = d3634d3209a6622d +x * y = dbdcaa8f18edc6c2 +x /u y = 0 +x %u y = 2c62677ac7f4bf43 +x /s y = 0 +x %s y = 2c62677ac7f4bf43 +x /u y2 = 7fac2866 +x %u y2 = 15d50693 +x /s y3 = 7fac2866 +x %s y3 = 15d50693 +~x = d39d9885380b40bc +x & y = 862024886441d02 +x | y = 7cff7f7afffeff57 +x ^ y = 749d7d3279bae255 +x << i = deb1fd2fd0c00000 +x >>u i = b1899deb1f +x >>s i = b1899deb1f +x cmpu y = lt +x cmps y = lt +utod x = 43c63133bd63fa60 +dtou f = 122e0a6fbb2b5 +stod x = 43c63133bd63fa60 +dtos f = 122e0a6fbb2b5 + +x = 54328cce0129c82d +y = e66196f0c43f0438 +-x = abcd7331fed637d3 +x + y = 3a9423bec568cc65 +x - y = 6dd0f5dd3ceac3f5 +x * y = 6e5b53cf9f577dd8 +x /u y = 0 +x %u y = 54328cce0129c82d +x /s y = fffffffffffffffd +x %s y = 75751a04de6d4d5 +x /u y2 = 5d8f7413 +x %u y2 = 2f76d45d +x /s y3 = fffffffcb6a49899 +x %s y3 = d7812bd +~x = abcd7331fed637d2 +x & y = 442084c000290028 +x | y = f6739efec53fcc3d +x ^ y = b2531a3ec516cc15 +x << i = 2d00000000000000 +x >>u i = 54 +x >>s i = 54 +x cmpu y = lt +x cmps y = gt +utod x = 43d50ca333804a72 +dtou f = 227cbe624e4ce +stod x = 43d50ca333804a72 +dtos f = 227cbe624e4ce + +x = 764480f2ce2b0727 +y = e847cd7a4b371c2a +-x = 89bb7f0d31d4f8d9 +x + y = 5e8c4e6d19622351 +x - y = 8dfcb37882f3eafd +x * y = c4f5404f7e387066 +x /u y = 0 +x %u y = 764480f2ce2b0727 +x /s y = fffffffffffffffc +x %s y = 1763b6dbfb0777cf +x /u y2 = 82583705 +x %u y2 = 41aecdc5 +x /s y3 = fffffffb038fcf10 +x %s y3 = 5628987 +~x = 89bb7f0d31d4f8d8 +x & y = 604480724a230422 +x | y = fe47cdfacf3f1f2f +x ^ y = 9e034d88851c1b0d +x << i = ac1c9c0000000000 +x >>u i = 1d9120 +x >>s i = 1d9120 +x cmpu y = lt +x cmps y = gt +utod x = 43dd91203cb38ac2 +dtou f = 30714183cfbc6 +stod x = 43dd91203cb38ac2 +dtos f = 30714183cfbc6 + +x = 86e8642063824ab1 +y = 200894ad1d61716c +-x = 79179bdf9c7db54f +x + y = a6f0f8cd80e3bc1d +x - y = 66dfcf734620d945 +x * y = 218ace07d800a3ac +x /u y = 4 +x %u y = 6c6116bedfc8501 +x /s y = fffffffffffffffd +x %s y = e7022227bba69ef5 +x /u y2 = 43622075f +x %u y2 = 748637e +x /s y3 = fffffffc38469f85 +x %s y3 = ffffffffeb6799d0 +~x = 79179bdf9c7db54e +x & y = 8042001004020 +x | y = a6e8f4ad7fe37bfd +x ^ y = a6e0f08d7ee33bdd +x << i = 24ab100000000000 +x >>u i = 86e86 +x >>s i = fffffffffff86e86 +x cmpu y = gt +x cmps y = lt +utod x = 43e0dd0c840c7049 +dtou f = 37421b15de363 +stod x = c3de45e6f7e71f6d +dtos f = fffce668f696d298 + +x = f22c66df8ca9054b +y = 3237edb3e364a47e +-x = dd399207356fab5 +x + y = 24645493700da9c9 +x - y = bff4792ba94460cd +x * y = aa00cc4c14e0a6ea +x /u y = 4 +x %u y = 294cb00fff167353 +x /s y = 0 +x %s y = f22c66df8ca9054b +x /u y2 = 4d288aa25 +x %u y2 = 9a2cc6c +x /s y3 = ffffffffb9841949 +x %s y3 = ffffffffd933c240 +~x = dd399207356fab4 +x & y = 322464938020044a +x | y = f23fefffefeda57f +x ^ y = c01b8b6c6fcda135 +x << i = c000000000000000 +x >>u i = 3 +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43ee458cdbf19521 +dtou f = 6331b9e80f041 +stod x = c3aba73240e6adf5 +dtos f = ffffa562e3b9df77 + +x = a952be7c0308ed75 +y = f80f290ededf49e0 +-x = 56ad4183fcf7128b +x + y = a161e78ae1e83755 +x - y = b143956d2429a395 +x * y = 13c0257882712360 +x /u y = 0 +x %u y = a952be7c0308ed75 +x /s y = a +x %s y = f8bb23e74e500ab5 +x /u y2 = aebe580f +x %u y2 = 47a4b5a3 +x /s y3 = aea5798d9 +x %s y3 = fffffffffe0cd097 +~x = 56ad4183fcf7128a +x & y = a802280c02084960 +x | y = f95fbf7edfdfedf5 +x ^ y = 515d9772ddd7a495 +x << i = 308ed7500000000 +x >>u i = a952be7c +x >>s i = ffffffffa952be7c +x cmpu y = lt +x cmps y = lt +utod x = 43e52a57cf80611e +dtou f = 455ad38d511d7 +stod x = c3d5ab5060ff3dc5 +dtos f = fffdc7f47e0e010d + +x = e36991f169ad9daf +y = 240b76fb67010a12 +-x = 1c966e0e96526251 +x + y = 77508ecd0aea7c1 +x - y = bf5e1af602ac939d +x * y = d70339ce3d0cec4e +x /u y = 6 +x %u y = b24c80cffa76143 +x /s y = 0 +x %s y = e36991f169ad9daf +x /u y2 = 64f253db1 +x %u y2 = 1b1f8b24 +x /s y3 = ffffffff34f67dfd +x %s y3 = ffffffffe00e78a0 +~x = 1c966e0e96526250 +x & y = 200910f161010802 +x | y = e76bf7fb6fad9fbf +x ^ y = c762e70a0eac97bd +x << i = 47c5a6b676bc0000 +x >>u i = 38da647c5a6b +x >>s i = fffff8da647c5a6b +x cmpu y = gt +x cmps y = lt +utod x = 43ec6d323e2d35b4 +dtou f = 5d25eaad6e0cf +stod x = c3bc966e0e965262 +dtos f = ffff44a5f00fd005 + +x = 75714d8bcb0f3479 +y = f306b780aa88c194 +-x = 8a8eb27434f0cb87 +x + y = 6878050c7597f60d +x - y = 826a960b208672e5 +x * y = d405ed5415a18ef4 +x /u y = 0 +x %u y = 75714d8bcb0f3479 +x /s y = fffffffffffffff7 +x %s y = adc111c9de02ad +x /u y2 = 7bb650b1 +x %u y2 = b51254f9 +x /s y3 = fffffff6f29b761c +x %s y3 = 33e2279 +~x = 8a8eb27434f0cb86 +x & y = 710005808a080010 +x | y = f777ff8beb8ff5fd +x ^ y = 8677fa0b6187f5ed +x << i = d8bcb0f347900000 +x >>u i = 75714d8bcb0 +x >>s i = 75714d8bcb0 +x cmpu y = lt +x cmps y = gt +utod x = 43dd5c5362f2c3cd +dtou f = 301abf81c22cb +stod x = 43dd5c5362f2c3cd +dtos f = 301abf81c22cb + +x = 941f83cb649df453 +y = ea181038dbafa0e6 +-x = 6be07c349b620bad +x + y = 7e379404404d9539 +x - y = aa07739288ee536d +x * y = 856711c4245a6292 +x /u y = 0 +x %u y = 941f83cb649df453 +x /s y = 4 +x %s y = ebbf42e7f5df70bb +x /u y2 = a1fbf096 +x %u y2 = 8c67f383 +x /s y3 = 4ecae6205 +x %s y3 = fffffffff5e0333b +~x = 6be07c349b620bac +x & y = 80180008408da042 +x | y = fe1f93fbffbff4f7 +x ^ y = 7e0793f3bf3254b5 +x << i = 277d14c000000000 +x >>u i = 2507e0f +x >>s i = fffffffffe507e0f +x cmpu y = lt +x cmps y = lt +utod x = 43e283f0796c93bf +dtou f = 3cabd55143047 +stod x = c3daf81f0d26d883 +dtos f = fffd3d049a4d1f7d + +x = 789d0be4a3f6e3bd +y = 278ae68eedc94c88 +-x = 8762f41b5c091c43 +x + y = a027f27391c03045 +x - y = 51122555b62d9735 +x * y = 7b6a0a92f2a1868 +x /u y = 3 +x %u y = 1fc5837da9afe25 +x /s y = 3 +x %s y = 1fc5837da9afe25 +x /u y2 = 30cdb0a56 +x %u y2 = ad1e409 +x /s y3 = 30cdb0a56 +x %s y3 = ad1e409 +~x = 8762f41b5c091c42 +x & y = 20880284a1c04088 +x | y = 7f9fefeeefffefbd +x ^ y = 5f17ed6a4e3faf35 +x << i = 9d0be4a3f6e3bd00 +x >>u i = 789d0be4a3f6e3 +x >>s i = 789d0be4a3f6e3 +x cmpu y = gt +x cmps y = gt +utod x = 43de2742f928fdb9 +dtou f = 31673cfc93710 +stod x = 43de2742f928fdb9 +dtos f = 31673cfc93710 + +x = 2ff5275f8be96d37 +y = f35327a741a0fcfa +-x = d00ad8a0741692c9 +x + y = 23484f06cd8a6a31 +x - y = 3ca1ffb84a48703d +x * y = 7926d788a2d6cbb6 +x /u y = 0 +x %u y = 2ff5275f8be96d37 +x /s y = fffffffffffffffd +x %s y = 9ee9e5550cc6425 +x /u y2 = 3274b0b2 +x %u y2 = 9b290b19 +x /s y3 = fffffffc3766d779 +x %s y3 = 5c46e48 +~x = d00ad8a0741692c8 +x & y = 2351270701a06c32 +x | y = fff727ffcbe9fdff +x ^ y = dca600f8ca4991cd +x << i = dc00000000000000 +x >>u i = b +x >>s i = b +x cmpu y = lt +x cmps y = gt +utod x = 43c7fa93afc5f4b7 +dtou f = 13a4b8e1e9d87 +stod x = 43c7fa93afc5f4b7 +dtos f = 13a4b8e1e9d87 + +x = cba92b33d3b5ff41 +y = 1f0ddcc454db9ebc +-x = 3456d4cc2c4a00bf +x + y = eab707f828919dfd +x - y = ac9b4e6f7eda6085 +x * y = 43b3d4f07dcc91bc +x /u y = 6 +x %u y = 1155fe99d69046d9 +x /s y = ffffffffffffffff +x %s y = eab707f828919dfd +x /u y2 = 68ee956da +x %u y2 = 135a2859 +x /s y3 = fffffffe50888709 +x %s y3 = ffffffffe4ade05d +~x = 3456d4cc2c4a00be +x & y = b09080050919e00 +x | y = dfadfff7d7fffffd +x ^ y = d4a4f7f7876e61fd +x << i = 1000000000000000 +x >>u i = c +x >>s i = fffffffffffffffc +x cmpu y = gt +x cmps y = lt +utod x = 43e97525667a76c0 +dtou f = 536b62630d660 +stod x = c3ca2b6a66162500 +dtos f = fffea8fd6b69c595 + +x = 48d35e1a092dac5b +y = 32d5df5f91e6f24e +-x = b72ca1e5f6d253a5 +x + y = 7ba93d799b149ea9 +x - y = 15fd7eba7746ba0d +x * y = a8bfa1095d9a89ba +x /u y = 1 +x %u y = 15fd7eba7746ba0d +x /s y = 1 +x %s y = 15fd7eba7746ba0d +x /u y2 = 16ebd54de +x %u y2 = b47cbf9 +x /s y3 = 16ebd54de +x %s y3 = b47cbf9 +~x = b72ca1e5f6d253a4 +x & y = d15e1a0124a04a +x | y = 7ad7df5f99effe5f +x ^ y = 7a06814598cb5e15 +x << i = d786824b6b16c000 +x >>u i = 1234d786824b6 +x >>s i = 1234d786824b6 +x cmpu y = gt +x cmps y = gt +utod x = 43d234d786824b6b +dtou f = 1dd452c7e644a +stod x = 43d234d786824b6b +dtos f = 1dd452c7e644a + +x = be9793bd5e9acb05 +y = 1b8eddf6093dac30 +-x = 41686c42a16534fb +x + y = da2671b367d87735 +x - y = a308b5c7555d1ed5 +x * y = 1dea52e54a9e6cf0 +x /u y = 6 +x %u y = 193e5ff92728c1e5 +x /s y = fffffffffffffffe +x %s y = f5b54fa971162365 +x /u y2 = 6ea800f5e +x %u y2 = a23e0b1 +x /s y3 = fffffffda06520cf +x %s y3 = ffffffffe848911b +~x = 41686c42a16534fa +x & y = 1a8691b408188800 +x | y = bf9fdfff5fbfef35 +x ^ y = a5194e4b57a76735 +x << i = cb05000000000000 +x >>u i = be97 +x >>s i = ffffffffffffbe97 +x cmpu y = gt +x cmps y = lt +utod x = 43e7d2f277abd359 +dtou f = 4e1107ad00a84 +stod x = c3d05a1b10a8594d +dtos f = fffe5357c008f9b9 + +x = 6a5642c7a79a95bf +y = 8a9c14bdfa0894e2 +-x = 95a9bd3858656a41 +x + y = f4f25785a1a32aa1 +x - y = dfba2e09ad9200dd +x * y = d5e40e3b89029e9e +x /u y = 0 +x %u y = 6a5642c7a79a95bf +x /s y = 0 +x %s y = 6a5642c7a79a95bf +x /u y2 = c46523b0 +x %u y2 = 22e57ccf +x /s y3 = ffffffff181acd99 +x %s y3 = 84d7ca +~x = 95a9bd3858656a40 +x & y = a140085a20894a2 +x | y = eade56ffff9a95ff +x ^ y = e0ca567a5d92015d +x << i = 9e6a56fc00000000 +x >>u i = 1a9590b1 +x >>s i = 1a9590b1 +x cmpu y = lt +x cmps y = gt +utod x = 43da9590b1e9e6a5 +dtou f = 2b8e3cf0b40fc +stod x = 43da9590b1e9e6a5 +dtos f = 2b8e3cf0b40fc + +x = 457806d9ec4fcb09 +y = 71e74b55ef64a8e4 +-x = ba87f92613b034f7 +x + y = b75f522fdbb473ed +x - y = d390bb83fceb2225 +x * y = c93cf7e08ad2bc04 +x /u y = 0 +x %u y = 457806d9ec4fcb09 +x /s y = 0 +x %s y = 457806d9ec4fcb09 +x /u y2 = 9c21e58a +x %u y2 = 1d492637 +x /s y3 = 9c21e58a +x %s y3 = 1d492637 +~x = ba87f92613b034f6 +x & y = 41600251ec448800 +x | y = 75ff4fddef6febed +x ^ y = 349f4d8c032b63ed +x << i = c4fcb09000000000 +x >>u i = 457806d +x >>s i = 457806d +x cmpu y = lt +x cmps y = lt +utod x = 43d15e01b67b13f3 +dtou f = 1c74565d5b77e +stod x = 43d15e01b67b13f3 +dtos f = 1c74565d5b77e + +x = 9f2932af8964d63 +y = cfbf03aa8d638b6 +-x = f60d6cd50769b29d +x + y = 16ee8365a16c8619 +x - y = fcf6a2f04fc014ad +x * y = 1a5fcfc9448aac62 +x /u y = 0 +x %u y = 9f2932af8964d63 +x /s y = 0 +x %s y = 9f2932af8964d63 +x /u y2 = c4213692 +x %u y2 = cc1104f +x /s y3 = c4213692 +x %s y3 = cc1104f +~x = f60d6cd50769b29c +x & y = 8f2902aa8960822 +x | y = dfbf33af8d67df7 +x ^ y = 5096310504075d5 +x << i = 58c0000000000000 +x >>u i = 27 +x >>s i = 27 +x cmpu y = lt +x cmps y = lt +utod x = 43a3e52655f12c9b +dtou f = 41313bac4077 +stod x = 43a3e52655f12c9b +dtos f = 41313bac4077 + +x = f20b1d13f51fc34d +y = 9ce1509c57b108d8 +-x = df4e2ec0ae03cb3 +x + y = 8eec6db04cd0cc25 +x - y = 5529cc779d6eba75 +x * y = 547381c1042430f8 +x /u y = 1 +x %u y = 5529cc779d6eba75 +x /s y = 0 +x %s y = f20b1d13f51fc34d +x /u y2 = 18af89756 +x %u y2 = 2bc2aae5 +x /s y3 = 240bce76 +x %s y3 = ffffffffefb31365 +~x = df4e2ec0ae03cb2 +x & y = 9001101055110048 +x | y = feeb5d9ff7bfcbdd +x ^ y = 6eea4d8fa2aecb95 +x << i = 13f51fc34d000000 +x >>u i = f20b1d13f5 +x >>s i = fffffff20b1d13f5 +x cmpu y = gt +x cmps y = gt +utod x = 43ee4163a27ea3f8 +dtou f = 632417610ecbe +stod x = c3abe9c5d815c079 +dtos f = ffffa488bb49dbf3 + +x = f68a3d1d7aa13747 +y = 6db1793cc07d71ca +-x = 975c2e2855ec8b9 +x + y = 643bb65a3b1ea911 +x - y = 88d8c3e0ba23c57d +x * y = e56c8a352a46f506 +x /u y = 2 +x %u y = 1b274aa3f9a653b3 +x /s y = 0 +x %s y = f68a3d1d7aa13747 +x /u y2 = 23f5eca36 +x %u y2 = 5804c9f +x /s y3 = ffffffffe9ec23f1 +x %s y3 = ffffffff93aae1cb +~x = 975c2e2855ec8b8 +x & y = 6480391c40213142 +x | y = ffbb7d3dfafd77cf +x ^ y = 9b3b4421badc468d +x << i = 28f475ea84dd1c00 +x >>u i = 3da28f475ea84d +x >>s i = fffda28f475ea84d +x cmpu y = gt +x cmps y = lt +utod x = 43eed147a3af5427 +dtou f = 64fb979962f8b +stod x = c3a2eb85c50abd91 +dtos f = ffffc200becf1ec1 + +x = d849bdf83b79b7d1 +y = 6a1649c9d6a2800c +-x = 27b64207c486482f +x + y = 426007c2121c37dd +x - y = 6e33742e64d737c5 +x * y = 79305427addf1dcc +x /u y = 2 +x %u y = 41d2a648e34b7b9 +x /s y = 0 +x %s y = d849bdf83b79b7d1 +x /u y2 = 209ed682d +x %u y2 = 2780177c +x /s y3 = ffffffffa02bad7e +x %s y3 = ffffffffefe191e3 +~x = 27b64207c486482e +x & y = 480009c812208000 +x | y = fa5ffdf9fffbb7dd +x ^ y = b25ff431eddb37dd +x << i = 9bdf83b79b7d1000 +x >>u i = d849bdf83b79b +x >>s i = fffd849bdf83b79b +x cmpu y = gt +x cmps y = lt +utod x = 43eb0937bf076f37 +dtou f = 5897724416b90 +stod x = c3c3db2103e24324 +dtos f = fffefbbe697a5ac6 + +x = e725b2286679f76b +y = 6d152baf41dd141e +-x = 18da4dd799860895 +x + y = 543addd7a8570b89 +x - y = 7a108679249ce34d +x * y = 7bf192ff4bfe5a8a +x /u y = 2 +x %u y = cfb5ac9e2bfcf2f +x /s y = 0 +x %s y = e725b2286679f76b +x /u y2 = 21e776484 +x %u y2 = 1226152f +x /s y3 = ffffffffc5acab53 +x %s y3 = ffffffffdfdae8ae +~x = 18da4dd799860894 +x & y = 650522284059140a +x | y = ef35bbaf67fdf77f +x ^ y = 8a30998727a4e375 +x << i = 199e7ddac0000000 +x >>u i = 39c96c8a1 +x >>s i = ffffffff9c96c8a1 +x cmpu y = gt +x cmps y = lt +utod x = 43ece4b6450ccf3f +dtou f = 5ead8bbcfd53f +stod x = c3b8da4dd7998609 +dtos f = ffff5d200108c475 + +x = 2e8033e5d5b4ec95 +y = 5ff552a5384c0280 +-x = d17fcc1a2a4b136b +x + y = 8e75868b0e00ef15 +x - y = ce8ae1409d68ea15 +x * y = 5679fa16188b7480 +x /u y = 0 +x %u y = 2e8033e5d5b4ec95 +x /s y = 0 +x %s y = 2e8033e5d5b4ec95 +x /u y2 = 7c0e5692 +x %u y2 = 41025c7b +x /s y3 = 7c0e5692 +x %s y3 = 41025c7b +~x = d17fcc1a2a4b136a +x & y = e8012a510040080 +x | y = 7ff573e5fdfcee95 +x ^ y = 71756140edf8ee15 +x << i = 2e8033e5d5b4ec95 +x >>u i = 2e8033e5d5b4ec95 +x >>s i = 2e8033e5d5b4ec95 +x cmpu y = lt +x cmps y = lt +utod x = 43c74019f2eada76 +dtou f = 130bf620b348e +stod x = 43c74019f2eada76 +dtos f = 130bf620b348e + +x = ef43bef3068171cf +y = b7c320f5051933b2 +-x = 10bc410cf97e8e31 +x + y = a706dfe80b9aa581 +x - y = 37809dfe01683e1d +x * y = 52ca243e77e45eee +x /u y = 1 +x %u y = 37809dfe01683e1d +x /s y = 0 +x %s y = ef43bef3068171cf +x /u y2 = 14d521718 +x %u y2 = 4c657d7 +x /s y3 = 3b4ec343 +x %s y3 = ffffffffcaaf32b0 +~x = 10bc410cf97e8e30 +x & y = a74320f104013182 +x | y = ffc3bef7079973ff +x ^ y = 58809e060398427d +x << i = c73c000000000000 +x >>u i = 3bd0 +x >>s i = fffffffffffffbd0 +x cmpu y = gt +x cmps y = gt +utod x = 43ede877de60d02e +dtou f = 6200b71208662 +stod x = c3b0bc410cf97e8e +dtos f = ffff9252b6597598 + +x = 90d105664c14e599 +y = 457cda70c307c434 +-x = 6f2efa99b3eb1a67 +x + y = d64ddfd70f1ca9cd +x - y = 4b542af5890d2165 +x * y = c1c276d4636c714 +x /u y = 2 +x %u y = 5d75084c6055d31 +x /s y = ffffffffffffffff +x %s y = d64ddfd70f1ca9cd +x /u y2 = 21584e541 +x %u y2 = 263b3f29 +x /s y3 = fffffffe66636700 +x %s y3 = fffffffff6e1d599 +~x = 6f2efa99b3eb1a66 +x & y = 5000604004c410 +x | y = d5fddf76cf17e5bd +x ^ y = d5addf168f1321ad +x << i = 5990000000000000 +x >>u i = 90d +x >>s i = fffffffffffff90d +x cmpu y = gt +x cmps y = lt +utod x = 43e21a20acc9829d +dtou f = 3b511c0437bd3 +stod x = c3dbcbbea66cfac7 +dtos f = fffd2759057c6b08 + +x = 7cb0b0fabc5eca73 +y = 24bef482136f2486 +-x = 834f4f0543a1358d +x + y = a16fa57ccfcdeef9 +x - y = 57f1bc78a8efa5ed +x * y = e48506423ef32432 +x /u y = 3 +x %u y = e73d37482115ce1 +x /s y = 3 +x %s y = e73d37482115ce1 +x /u y2 = 364afcce3 +x %u y2 = f56632d +x /s y3 = 364afcce3 +x %s y3 = f56632d +~x = 834f4f0543a1358c +x & y = 24b0b082104e0002 +x | y = 7cbef4fabf7feef7 +x ^ y = 580e4478af31eef5 +x << i = 2c2c3eaf17b29cc0 +x >>u i = 1f2c2c3eaf17b29 +x >>s i = 1f2c2c3eaf17b29 +x cmpu y = gt +x cmps y = gt +utod x = 43df2c2c3eaf17b3 +dtou f = 3312b71530e4e +stod x = 43df2c2c3eaf17b3 +dtos f = 3312b71530e4e + +x = 2a1c70168f0d66dd +y = f58c569d3d6b3928 +-x = d5e38fe970f29923 +x + y = 1fa8c6b3cc78a005 +x - y = 3490197951a22db5 +x * y = 208cd905fd5e4788 +x /u y = 0 +x %u y = 2a1c70168f0d66dd +x /s y = fffffffffffffffc +x %s y = 4dca8b84ba4b7d +x /u y2 = 2be74f23 +x %u y2 = 27781c66 +x /s y3 = fffffffbf88ea18f +x %s y3 = 1a482a +~x = d5e38fe970f29922 +x & y = 200c50140d092008 +x | y = ff9c769fbf6f7ffd +x ^ y = df90268bb2665ff5 +x << i = d66dd0000000000 +x >>u i = 2a1c70 +x >>s i = 2a1c70 +x cmpu y = lt +x cmps y = gt +utod x = 43c50e380b4786b3 +dtou f = 113faad6dbfa0 +stod x = 43c50e380b4786b3 +dtos f = 113faad6dbfa0 + +x = c864770318e36557 +y = 6a00ab1df6497a9a +-x = 379b88fce71c9aa9 +x + y = 326522210f2cdff1 +x - y = 5e63cbe52299eabd +x * y = 136745cdd6e56c56 +x /u y = 1 +x %u y = 5e63cbe52299eabd +x /s y = 0 +x %s y = c864770318e36557 +x /u y2 = 1e3f46921 +x %u y2 = 59fa719a +x /s y3 = ffffffff79b4afc3 +x %s y3 = ffffffffe0043b40 +~x = 379b88fce71c9aa8 +x & y = 4800230110416012 +x | y = ea64ff1ffeeb7fdf +x ^ y = a264dc1eeeaa1fcd +x << i = c638d955c000000 +x >>u i = 32191dc0c6 +x >>s i = fffffff2191dc0c6 +x cmpu y = gt +x cmps y = lt +utod x = 43e90c8ee0631c6d +dtou f = 5214aba09a5f9 +stod x = c3cbcdc47e738e4d +dtos f = fffe9391ff42952f + +x = 9714eb2dc9c67461 +y = 466bcdc06b7b155c +-x = 68eb14d236398b9f +x + y = dd80b8ee354189bd +x - y = 50a91d6d5e4b5f05 +x * y = 8fb920c04078c7dc +x /u y = 2 +x %u y = a3d4facf2d049a9 +x /s y = ffffffffffffffff +x %s y = dd80b8ee354189bd +x /u y2 = 225392f08 +x %u y2 = 3be1c661 +x /s y3 = fffffffe8297e39a +x %s y3 = fffffffff93b6ee1 +~x = 68eb14d236398b9e +x & y = 600c90049421440 +x | y = d77fefedebff757d +x ^ y = d17f26eda2bd613d +x << i = dc9c674610000000 +x >>u i = 9714eb2dc +x >>s i = fffffff9714eb2dc +x cmpu y = gt +x cmps y = lt +utod x = 43e2e29d65b938cf +dtou f = 3de210ddea9bd +stod x = c3da3ac5348d8e63 +dtos f = fffd5068531798f3 + +x = 680ca5caaa2ee67b +y = 20cfe3e3cf9409ee +-x = 97f35a3555d11985 +x + y = 88dc89ae79c2f069 +x - y = 473cc1e6da9adc8d +x * y = 21c701f191d0995a +x /u y = 3 +x %u y = 59cfa1f3b72c8b1 +x /s y = 3 +x %s y = 59cfa1f3b72c8b1 +x /u y2 = 32bcb4e0b +x %u y2 = 13cff1ba +x /s y3 = 32bcb4e0b +x %s y3 = 13cff1ba +~x = 97f35a3555d11984 +x & y = 200ca1c28a04006a +x | y = 68cfe7ebefbeefff +x ^ y = 48c3462965baef95 +x << i = b99ec00000000000 +x >>u i = 1a032 +x >>s i = 1a032 +x cmpu y = gt +x cmps y = gt +utod x = 43da032972aa8bba +dtou f = 2a9e5ef11df9d +stod x = 43da032972aa8bba +dtos f = 2a9e5ef11df9d + +x = caa0e97754e05225 +y = 45366c07f71f4cd0 +-x = 355f1688ab1faddb +x + y = fd7557f4bff9ef5 +x - y = 856a7d6f5dc10555 +x * y = 11fcaf963420ba10 +x /u y = 2 +x %u y = 4034116766a1b885 +x /s y = 0 +x %s y = caa0e97754e05225 +x /u y2 = 2ed78e4eb +x %u y2 = 356cebb8 +x /s y3 = ffffffff3a97a9d7 +x %s y3 = fffffffff8b9f944 +~x = 355f1688ab1fadda +x & y = 4020680754004000 +x | y = cfb6ed77f7ff5ef5 +x ^ y = 8f968570a3ff1ef5 +x << i = e97754e052250000 +x >>u i = caa0e97754e0 +x >>s i = ffffcaa0e97754e0 +x cmpu y = gt +x cmps y = lt +utod x = 43e9541d2eea9c0a +dtou f = 52ff250e0fec7 +stod x = c3caaf8b44558fd7 +dtos f = fffea2399619edfd + +x = 8fb54d13641331df +y = 3896e8ba4f4fe682 +-x = 704ab2ec9becce21 +x + y = c84c35cdb3631861 +x - y = 571e645914c34b5d +x * y = bdfcb218455ead3e +x /u y = 2 +x %u y = 1e877b9ec57364db +x /s y = ffffffffffffffff +x %s y = c84c35cdb3631861 +x /u y2 = 28a1bbf24 +x %u y2 = 799b1b7 +x /s y3 = fffffffe040371a9 +x %s y3 = ffffffffc98b7515 +~x = 704ab2ec9becce20 +x & y = 894481244032082 +x | y = bfb7edbb6f5ff7df +x ^ y = b723a5a92b5cd75d +x << i = 3ed5344d904cc77c +x >>u i = 23ed5344d904cc77 +x >>s i = e3ed5344d904cc77 +x cmpu y = gt +x cmps y = lt +utod x = 43e1f6a9a26c8266 +dtou f = 3adce5d10e5de +stod x = c3dc12acbb26fb34 +dtos f = fffd2015a249d513 + +x = 5fde20614d778429 +y = 4cb55308f7d71384 +-x = a021df9eb2887bd7 +x + y = ac93736a454e97ad +x - y = 1328cd5855a070a5 +x * y = c6f8b7175dde3024 +x /u y = 1 +x %u y = 1328cd5855a070a5 +x /s y = 1 +x %s y = 1328cd5855a070a5 +x /u y2 = 13ff0e2ae +x %u y2 = 476c04b9 +x /s y3 = 13ff0e2ae +x %s y3 = 476c04b9 +~x = a021df9eb2887bd6 +x & y = 4c94000045570000 +x | y = 5fff7369fff797ad +x ^ y = 136b7369baa097ad +x << i = fde20614d7784290 +x >>u i = 5fde20614d77842 +x >>s i = 5fde20614d77842 +x cmpu y = gt +x cmps y = gt +utod x = 43d7f78818535de1 +dtou f = 2744747b69de3 +stod x = 43d7f78818535de1 +dtos f = 2744747b69de3 + +x = 9e0edf33c6b86b83 +y = 46d59276da676456 +-x = 61f120cc3947947d +x + y = e4e471aaa11fcfd9 +x - y = 57394cbcec51072d +x * y = d98e79279ba84a02 +x /u y = 2 +x %u y = 1063ba4611e9a2d7 +x /s y = ffffffffffffffff +x %s y = e4e471aaa11fcfd9 +x /u y2 = 23b3ba758 +x %u y2 = 279118f3 +x /s y3 = fffffffe9e07f55d +x %s y3 = ffffffffd0bd48a5 +~x = 61f120cc3947947c +x & y = 6049232c2206002 +x | y = dedfdf77deff6fd7 +x ^ y = d8db4d451cdf0fd5 +x << i = ccf1ae1ae0c00000 +x >>u i = 2783b7ccf1a +x >>s i = fffffe783b7ccf1a +x cmpu y = gt +x cmps y = lt +utod x = 43e3c1dbe678d70d +dtou f = 40bd97a285016 +stod x = c3d87c48330e51e5 +dtos f = fffd7e20bf613f4c + +x = 42419bd19468ce6d +y = f28e83678dacdd78 +-x = bdbe642e6b973193 +x + y = 34d01f392215abe5 +x - y = 4fb3186a06bbf0f5 +x * y = b4153c53c590dc18 +x /u y = 0 +x %u y = 42419bd19468ce6d +x /s y = fffffffffffffffc +x %s y = c7ba96fcb1c444d +x /u y2 = 45edad94 +x %u y2 = a1dd3be1 +x /s y3 = fffffffb12493a09 +x %s y3 = d41d9ce +~x = bdbe642e6b973192 +x & y = 420083418428cc68 +x | y = f2cf9bf79decdf7d +x ^ y = b0cf18b619c41315 +x << i = 6d00000000000000 +x >>u i = 42 +x >>s i = 42 +x cmpu y = lt +x cmps y = gt +utod x = 43d09066f4651a34 +dtou f = 1b237993b1864 +stod x = 43d09066f4651a34 +dtos f = 1b237993b1864 + +x = a10909702780f767 +y = 355cb86c76c2176a +-x = 5ef6f68fd87f0899 +x + y = d665c1dc9e430ed1 +x - y = 6bac5103b0bedffd +x * y = ef08c535e7aeb1a6 +x /u y = 3 +x %u y = f2e02ac33ab129 +x /s y = ffffffffffffffff +x %s y = d665c1dc9e430ed1 +x /u y2 = 3048d2c3e +x %u y2 = 7ddbd3f +x /s y3 = fffffffe386a7a90 +x %s y3 = fffffffff6bdc2a7 +~x = 5ef6f68fd87f0898 +x & y = 2108086026801762 +x | y = b55db97c77c2f76f +x ^ y = 9455b11c5142e00d +x << i = 3dd9c0000000000 +x >>u i = 284242 +x >>s i = ffffffffffe84242 +x cmpu y = gt +x cmps y = lt +utod x = 43e421212e04f01f +dtou f = 41f5c66d044d6 +stod x = c3d7bdbda3f61fc2 +dtos f = fffd91a3ac09340c + +x = b62b990739d534f1 +y = ea2aeea1436a5eac +-x = 49d466f8c62acb0f +x + y = a05687a87d3f939d +x - y = cc00aa65f66ad645 +x * y = 387490cb237a0fec +x /u y = 0 +x %u y = b62b990739d534f1 +x /s y = 3 +x %s y = f7aacd236f9618ed +x /u y2 = c7279a22 +x %u y2 = cb0da98f +x /s y3 = 361b5597a +x %s y3 = fffffffffb948337 +~x = 49d466f8c62acb0e +x & y = a22a8801014014a0 +x | y = fe2bffa77bff7efd +x ^ y = 5c0177a67abf6a5d +x << i = 534f100000000000 +x >>u i = b62b9 +x >>s i = fffffffffffb62b9 +x cmpu y = lt +x cmps y = lt +utod x = 43e6c57320e73aa7 +dtou f = 4a9df0db3a812 +stod x = c3d27519be318ab3 +dtos f = fffe1c2652ec9747 + +x = 9e2dd389c02d798b +y = ff1135085098d3be +-x = 61d22c763fd28675 +x + y = 9d3f089210c64d49 +x - y = 9f1c9e816f94a5cd +x * y = c4066df5b775c62a +x /u y = 0 +x %u y = 9e2dd389c02d798b +x /s y = 68 +x %s y = ff3048290217745b +x /u y2 = 9ec1e9af +x %u y2 = cf1df113 +x /s y3 = 68deaf7ef9 +x %s y3 = ffffffffffdef4c3 +~x = 61d22c763fd28674 +x & y = 9e0111084008518a +x | y = ff3df789d0bdfbbf +x ^ y = 613ce68190b5aa35 +x << i = c000000000000000 +x >>u i = 2 +x >>s i = fffffffffffffffe +x cmpu y = lt +x cmps y = lt +utod x = 43e3c5ba713805af +dtou f = 40ca45715b69e +stod x = c3d8748b1d8ff4a2 +dtos f = fffd7eeb9c4ea5d3 + +x = eceb862698e5fbb5 +y = b6f3d882c0c8b20 +-x = 131479d9671a044b +x + y = f85ac3aec4f286d5 +x - y = e17c489e6cd97095 +x * y = 5d01450ae4e6bda0 +x /u y = 14 +x %u y = 83ab78327eb1d35 +x /s y = ffffffffffffffff +x %s y = f85ac3aec4f286d5 +x /u y2 = 14b83ddb43 +x %u y2 = 7bd891d +x /s y3 = fffffffe54d4c750 +x %s y3 = fffffffff8ae0935 +~x = 131479d9671a044a +x & y = 86b040008048b20 +x | y = efefbfaebcedfbb5 +x ^ y = e784bbaeb4e97095 +x << i = 98e5fbb500000000 +x >>u i = eceb8626 +x >>s i = ffffffffeceb8626 +x cmpu y = gt +x cmps y = lt +utod x = 43ed9d70c4d31cbf +dtou f = 610add3ee9d70 +stod x = c3b31479d9671a04 +dtos f = ffff82f519278ca6 + +x = 3b5e2b836840d5ef +y = 8984a692ed09ad52 +-x = c4a1d47c97bf2a11 +x + y = c4e2d216554a8341 +x - y = b1d984f07b37289d +x * y = 8ca36a2efdbe098e +x /u y = 0 +x %u y = 3b5e2b836840d5ef +x /s y = 0 +x %s y = 3b5e2b836840d5ef +x /u y2 = 6e847dbc +x %u y2 = 63938b7 +x /s y3 = ffffffff7fb9c9d5 +x %s y3 = 8979c75 +~x = c4a1d47c97bf2a10 +x & y = 904228268008542 +x | y = bbdeaf93ed49fdff +x ^ y = b2da8d11854978bd +x << i = ae0da10357bc0000 +x >>u i = ed78ae0da10 +x >>s i = ed78ae0da10 +x cmpu y = lt +x cmps y = gt +utod x = 43cdaf15c1b4206b +dtou f = 18512ba0bae1a +stod x = 43cdaf15c1b4206b +dtos f = 18512ba0bae1a + +x = 8bb41c6aa7d0a6b9 +y = e7cba3f3ef7796d4 +-x = 744be395582f5947 +x + y = 737fc05e97483d8d +x - y = a3e87876b8590fe5 +x * y = 4af6e0e671797734 +x /u y = 0 +x %u y = 8bb41c6aa7d0a6b9 +x /s y = 4 +x %s y = ec858c9ae9f24b69 +x /u y2 = 9a4aab1a +x %u y2 = 8460af0b +x /s y3 = 4ce035bf5 +x %s y3 = fffffffff8cc5e2a +~x = 744be395582f5946 +x & y = 83800062a7508690 +x | y = efffbffbeff7b6fd +x ^ y = 6c7fbf9948a7306d +x << i = c6aa7d0a6b900000 +x >>u i = 8bb41c6aa7d +x >>s i = fffff8bb41c6aa7d +x cmpu y = lt +x cmps y = lt +utod x = 43e176838d54fa15 +dtou f = 3938fad8898cc +stod x = c3dd12f8e5560bd6 +dtos f = fffd05d6f2c18801 + +x = 5e14c5d21ca43093 +y = dd468d765debf826 +-x = a1eb3a2de35bcf6d +x + y = 3b5b53487a9028b9 +x - y = 80ce385bbeb8386d +x * y = d4a2340f4d5e9dd2 +x /u y = 0 +x %u y = 5e14c5d21ca43093 +x /s y = fffffffffffffffe +x %s y = 18a1e0bed87c20df +x /u y2 = 6cd85af3 +x %u y2 = 21606d91 +x /s y3 = fffffffd4a66f1cd +x %s y3 = 6f4d315 +~x = a1eb3a2de35bcf6c +x & y = 5c0485521ca03002 +x | y = df56cdf65deff8b7 +x ^ y = 835248a4414fc8b5 +x << i = 290c24c000000000 +x >>u i = 1785317 +x >>s i = 1785317 +x cmpu y = lt +x cmps y = gt +utod x = 43d785317487290c +dtou f = 26891f773d4b0 +stod x = 43d785317487290c +dtos f = 26891f773d4b0 + +x = 7265a096401af9fd +y = b918a7193a6af5c8 +-x = 8d9a5f69bfe50603 +x + y = 2b7e47af7a85efc5 +x - y = b94cf97d05b00435 +x * y = 4b405d3fbb146ea8 +x /u y = 0 +x %u y = 7265a096401af9fd +x /s y = ffffffffffffffff +x %s y = 2b7e47af7a85efc5 +x /u y2 = 9e37e46d +x %u y2 = 276a9058 +x /s y3 = fffffffe62f7568d +x %s y3 = 3a458b38 +~x = 8d9a5f69bfe50602 +x & y = 3000a010000af1c8 +x | y = fb7da79f7a7afdfd +x ^ y = cb7d078f7a700c35 +x << i = 65a096401af9fd00 +x >>u i = 7265a096401af9 +x >>s i = 7265a096401af9 +x cmpu y = lt +x cmps y = gt +utod x = 43dc9968259006be +dtou f = 2edb6497a791c +stod x = 43dc9968259006be +dtos f = 2edb6497a791c + +x = 529ef449b98aed77 +y = 998d65c70ae4483a +-x = ad610bb646751289 +x + y = ec2c5a10c46f35b1 +x - y = b9118e82aea6a53d +x * y = d3da4c66403f44f6 +x /u y = 0 +x %u y = 529ef449b98aed77 +x /s y = 0 +x %s y = 529ef449b98aed77 +x /u y2 = 89be8881 +x %u y2 = 3286ec30 +x /s y3 = ffffffff318b3773 +x %s y3 = 41b7412 +~x = ad610bb646751288 +x & y = 108c644108804832 +x | y = db9ff5cfbbeeed7f +x ^ y = cb13918eb36ea54d +x << i = dc00000000000000 +x >>u i = 14 +x >>s i = 14 +x cmpu y = lt +x cmps y = gt +utod x = 43d4a7bd126e62bb +dtou f = 21d76e4ce45e9 +stod x = 43d4a7bd126e62bb +dtos f = 21d76e4ce45e9 + +x = d4c53464631ef981 +y = d79a311745b55bfc +-x = 2b3acb9b9ce1067f +x + y = ac5f657ba8d4557d +x - y = fd2b034d1d699d85 +x * y = e994cbcac26375fc +x /u y = 0 +x %u y = d4c53464631ef981 +x /s y = 1 +x %s y = fd2b034d1d699d85 +x /u y2 = fca32bce +x %u y2 = 19279bff +x /s y3 = 111f23d93 +x %s y3 = ffffffffe9244e4c +~x = 2b3acb9b9ce1067e +x & y = d480300441145980 +x | y = d7df357767bffbfd +x ^ y = 35f057326aba27d +x << i = 1000000000000000 +x >>u i = d +x >>s i = fffffffffffffffd +x cmpu y = lt +x cmps y = lt +utod x = 43ea98a68c8c63df +dtou f = 57269613000cb +stod x = c3c59d65cdce7083 +dtos f = fffee4b0a668f000 + +x = cb016d548a96b09b +y = 59b7d17de3b8718e +-x = 34fe92ab75694f65 +x + y = 24b93ed26e4f2229 +x - y = 71499bd6a6de3f0d +x * y = 9b43f7a8c3f260fa +x /u y = 2 +x %u y = 1791ca58c325cd7f +x /s y = 0 +x %s y = cb016d548a96b09b +x /u y2 = 24340b83f +x %u y2 = 41884ad8 +x /s y3 = ffffffff68c95c6b +x %s y3 = ffffffffb854355c +~x = 34fe92ab75694f64 +x & y = 490141548290308a +x | y = dbb7fd7debbef19f +x ^ y = 92b6bc29692ec115 +x << i = 5b5522a5ac26c000 +x >>u i = 32c05b5522a5a +x >>s i = ffff2c05b5522a5a +x cmpu y = gt +x cmps y = lt +utod x = 43e9602daa9152d6 +dtou f = 5326ad656497d +stod x = c3ca7f4955bab4a8 +dtos f = fffea4b21b8f38b3 + +x = e9b85c9ebecee945 +y = eff4c4de68a8bd70 +-x = 1647a361413116bb +x + y = d9ad217d2777a6b5 +x - y = f9c397c056262bd5 +x * y = 45861cb55a05ff30 +x /u y = 0 +x %u y = e9b85c9ebecee945 +x /s y = 1 +x %s y = f9c397c056262bd5 +x /u y2 = f958dab3 +x %u y2 = ecb4360b +x /s y3 = 16380abc0 +x %s y3 = fffffffffebaf8c5 +~x = 1647a361413116ba +x & y = e9b0449e2888a940 +x | y = effcdcdefeeefd75 +x ^ y = 64c9840d6665435 +x << i = e945000000000000 +x >>u i = e9b8 +x >>s i = ffffffffffffe9b8 +x cmpu y = lt +x cmps y = lt +utod x = 43ed370b93d7d9dd +dtou f = 5fbb55dc42afb +stod x = c3b647a361413117 +dtos f = ffff6dfca2fd1a30 + +x = f6e37ba6953b5dff +y = f751e9180fe38822 +-x = 91c84596ac4a201 +x + y = ee3564bea51ee621 +x - y = ff91928e8557d5dd +x * y = d828a075a5eef3de +x /u y = 0 +x %u y = f6e37ba6953b5dff +x /s y = 1 +x %s y = ff91928e8557d5dd +x /u y2 = ff8db266 +x %u y2 = cc4dce6f +x /s y3 = 10cb8d59a +x %s y3 = fffffffffdc42d8f +~x = 91c84596ac4a200 +x & y = f641690005230822 +x | y = f7f3fbbe9ffbddff +x ^ y = 1b292be9ad8d5dd +x << i = 54ed77fc00000000 +x >>u i = 3db8dee9 +x >>s i = fffffffffdb8dee9 +x cmpu y = lt +x cmps y = lt +utod x = 43eedc6f74d2a76c +dtou f = 6520258a4622f +stod x = c3a23908b2d58944 +dtos f = ffffc4499ddd5165 + +x = 6b48866df6b94d49 +y = 560a70d9eace4e24 +-x = 94b779920946b2b7 +x + y = c152f747e1879b6d +x - y = 153e15940beaff25 +x * y = 9ee6c54512591c44 +x /u y = 1 +x %u y = 153e15940beaff25 +x /s y = 1 +x %s y = 153e15940beaff25 +x /u y2 = 13f341cd4 +x %u y2 = 3ea81d95 +x /s y3 = 13f341cd4 +x %s y3 = 3ea81d95 +~x = 94b779920946b2b6 +x & y = 42080049e2884c00 +x | y = 7f4af6fdfeff4f6d +x ^ y = 3d42f6b41c77036d +x << i = 6b94d49000000000 +x >>u i = 6b48866 +x >>s i = 6b48866 +x cmpu y = gt +x cmps y = gt +utod x = 43dad2219b7dae53 +dtou f = 2bf178305165b +stod x = 43dad2219b7dae53 +dtos f = 2bf178305165b + +x = c48462e3c16319a3 +y = fc0b2696f69dff6 +-x = 3b7b9d1c3e9ce65d +x + y = d445154d30ccf999 +x - y = b4c3b07a51f939ad +x * y = 7a588fef596a9fa2 +x /u y = c +x %u y = 77c05f2886c9a1b +x /s y = fffffffffffffffd +x %s y = f3c67a200fa0b985 +x /u y2 = c79a1999e +x %u y2 = 8cb3bd5 +x /s y3 = fffffffc39559faa +x %s y3 = fffffffffec068e9 +~x = 3b7b9d1c3e9ce65c +x & y = 4802261416119a2 +x | y = cfc4f2ebef6bdff7 +x ^ y = cb44d08aae0ac655 +x << i = 68c0000000000000 +x >>u i = 312 +x >>s i = ffffffffffffff12 +x cmpu y = gt +x cmps y = lt +utod x = 43e8908c5c782c63 +dtou f = 507e50a560d2f +stod x = c3cdbdce8e1f4e73 +dtos f = fffe7a2c4f8efc65 + +x = 81c143a7754ce98d +y = 2595000288da8218 +-x = 7e3ebc588ab31673 +x + y = a75643a9fe276ba5 +x - y = 5c2c43a4ec726775 +x * y = 1c0ef3efd7e17f38 +x /u y = 3 +x %u y = 1102439fdabd6345 +x /s y = fffffffffffffffd +x %s y = f28043af0fdc6fd5 +x /u y2 = 373dc3a72 +x %u y2 = f3a74a9 +x /s y3 = fffffffca40c567c +x %s y3 = ffffffffeb083c95 +~x = 7e3ebc588ab31672 +x & y = 181000200488008 +x | y = a5d543a7fddeeb9d +x ^ y = a45443a5fd966b95 +x << i = a7754ce98d000000 +x >>u i = 81c143a775 +x >>s i = ffffff81c143a775 +x cmpu y = gt +x cmps y = lt +utod x = 43e0382874eea99d +dtou f = 3525ca9c3ecab +stod x = c3df8faf1622acc6 +dtos f = fffcc4a3eefcdbe1 + +x = 185201da44524787 +y = 6207869c80ed0d0a +-x = e7adfe25bbadb879 +x + y = 7a598876c53f5491 +x - y = b64a7b3dc3653a7d +x * y = 4ccbcc3290d3a646 +x /u y = 0 +x %u y = 185201da44524787 +x /s y = 0 +x %s y = 185201da44524787 +x /u y2 = 3f82fa73 +x %u y2 = 22447773 +x /s y3 = 3f82fa73 +x %s y3 = 22447773 +~x = e7adfe25bbadb878 +x & y = 2009800400502 +x | y = 7a5787dec4ff4f8f +x ^ y = 7a558746c4bf4a8d +x << i = 48076911491e1c00 +x >>u i = 6148076911491 +x >>s i = 6148076911491 +x cmpu y = lt +x cmps y = lt +utod x = 43b85201da445248 +dtou f = 9f62c2d2a520 +stod x = 43b85201da445248 +dtos f = 9f62c2d2a520 + +x = 4c00590c95cc211 +y = 733d576e60e10d4c +-x = fb3ffa6f36a33def +x + y = 77fd5cff2a3dcf5d +x - y = 9182ae22687bb4c5 +x * y = 45e9eed06e557a0c +x /u y = 0 +x %u y = 4c00590c95cc211 +x /s y = 0 +x %s y = 4c00590c95cc211 +x /u y2 = a8d5772 +x %u y2 = 2bbf7115 +x /s y3 = a8d5772 +x %s y3 = 2bbf7115 +~x = fb3ffa6f36a33dee +x & y = 50040400000 +x | y = 77fd57fee9fdcf5d +x ^ y = 77fd52fea9bdcf5d +x << i = 590c95cc211000 +x >>u i = 4c00590c95cc +x >>s i = 4c00590c95cc +x cmpu y = lt +x cmps y = lt +utod x = 4393001643257308 +dtou f = 1f2151f0acc4 +stod x = 4393001643257308 +dtos f = 1f2151f0acc4 + +x = 49769540f1cb8bab +y = 737fa1db80ffe35e +-x = b6896abf0e347455 +x + y = bcf6371c72cb6f09 +x - y = d5f6f36570cba84d +x * y = 3b460a1ee4eae9ca +x /u y = 0 +x %u y = 49769540f1cb8bab +x /s y = 0 +x %s y = 49769540f1cb8bab +x /u y2 = a2d45f9b +x %u y2 = 7184712 +x /s y3 = a2d45f9b +x %s y3 = 7184712 +~x = b6896abf0e347454 +x & y = 4176814080cb830a +x | y = 7b7fb5dbf1ffebff +x ^ y = 3a09349b713468f5 +x << i = 3c72e2eac0000000 +x >>u i = 125da5503 +x >>s i = 125da5503 +x cmpu y = lt +x cmps y = lt +utod x = 43d25da5503c72e3 +dtou f = 1e172d28eff11 +stod x = 43d25da5503c72e3 +dtos f = 1e172d28eff11 + +x = e43d7008d3e41ad5 +y = df82555639c8e3c0 +-x = 1bc28ff72c1be52b +x + y = c3bfc55f0dacfe95 +x - y = 4bb1ab29a1b3715 +x * y = 3d32eb3e9246fec0 +x /u y = 1 +x %u y = 4bb1ab29a1b3715 +x /s y = 0 +x %s y = e43d7008d3e41ad5 +x /u y2 = 1056b28b9 +x %u y2 = 986cffaf +x /s y3 = dab98c69 +x %s y3 = fffffffff99e128f +~x = 1bc28ff72c1be52a +x & y = c400500011c002c0 +x | y = ffbf755efbecfbd5 +x ^ y = 3bbf255eea2cf915 +x << i = e43d7008d3e41ad5 +x >>u i = e43d7008d3e41ad5 +x >>s i = e43d7008d3e41ad5 +x cmpu y = gt +x cmps y = gt +utod x = 43ec87ae011a7c83 +dtou f = 5d7cb29987887 +stod x = c3bbc28ff72c1be5 +dtos f = ffff4a126ed167bc + +x = fbbe909b5773ca0f +y = f9520e33d6ba76f2 +-x = 4416f64a88c35f1 +x + y = f5109ecf2e2e4101 +x - y = 26c826780b9531d +x * y = cdb0b52e637dec2e +x /u y = 1 +x %u y = 26c826780b9531d +x /s y = 0 +x %s y = fbbe909b5773ca0f +x /u y2 = 1027d221e +x %u y2 = e50c5a15 +x /s y3 = a31a1d6d +x %s y3 = fffffffff9b9f758 +~x = 4416f64a88c35f0 +x & y = f912001356324202 +x | y = fbfe9ebbd7fbfeff +x ^ y = 2ec9ea881c9bcfd +x << i = 283c000000000000 +x >>u i = 3eef +x >>s i = fffffffffffffeef +x cmpu y = gt +x cmps y = gt +utod x = 43ef77d2136aee79 +dtou f = 671d501edc23f +stod x = c39105bd92a230d8 +dtos f = ffffe41c4726b175 + +x = 8f2098834a0a77d9 +y = a889851f5a003974 +-x = 70df677cb5f58827 +x + y = 37aa1da2a40ab14d +x - y = e6971363f00a3e65 +x * y = 376b45632b6d9f54 +x /u y = 0 +x %u y = 8f2098834a0a77d9 +x /s y = 1 +x %s y = e6971363f00a3e65 +x /u y2 = d967540f +x %u y2 = 73d47f08 +x /s y3 = 14a5fa443 +x %s y3 = ffffffffb843c4bc +~x = 70df677cb5f58826 +x & y = 880080034a003150 +x | y = afa99d9f5a0a7ffd +x ^ y = 27a91d9c100a4ead +x << i = 7d90000000000000 +x >>u i = 8f2 +x >>s i = fffffffffffff8f2 +x cmpu y = lt +x cmps y = lt +utod x = 43e1e4131069414f +dtou f = 3a9ffceeea4ed +stod x = c3dc37d9df2d7d62 +dtos f = fffd1c4714279423 + +x = cdb48c3bc67626b3 +y = 7253c92e218e1bc6 +-x = 324b73c43989d94d +x + y = 40085569e8044279 +x - y = 5b60c30da4e80aed +x * y = 51d293da7fc0cf72 +x /u y = 1 +x %u y = 5b60c30da4e80aed +x /s y = 0 +x %s y = cdb48c3bc67626b3 +x /u y2 = 1cc9cbb61 +x %u y2 = ba5245 +x /s y3 = ffffffff8f617990 +x %s y3 = fffffffff0d03ed3 +~x = 324b73c43989d94c +x & y = 4010882a00060282 +x | y = fff7cd3fe7fe3ff7 +x ^ y = bfe74515e7f83d75 +x << i = 6d230ef19d89acc0 +x >>u i = 336d230ef19d89a +x >>s i = ff36d230ef19d89a +x cmpu y = gt +x cmps y = lt +utod x = 43e9b6918778cec5 +dtou f = 5441c2a7d80c5 +stod x = c3c925b9e21cc4ed +dtos f = fffeb6636fb66ffb + +x = b9770d034f679d1d +y = b10aa58461708268 +-x = 4688f2fcb09862e3 +x + y = 6a81b287b0d81f85 +x - y = 86c677eedf71ab5 +x * y = a954ab7b99908dc8 +x /u y = 1 +x %u y = 86c677eedf71ab5 +x /s y = 0 +x %s y = b9770d034f679d1d +x /u y2 = 10c2e2273 +x %u y2 = 6febbad1 +x /s y3 = e4b07a4a +x %s y3 = ffffffffb4b2dcf5 +~x = 4688f2fcb09862e2 +x & y = b102050041608008 +x | y = b97fad876f779f7d +x ^ y = 87da8872e171f75 +x << i = 679d1d0000000000 +x >>u i = b9770d +x >>s i = ffffffffffb9770d +x cmpu y = gt +x cmps y = gt +utod x = 43e72ee1a069ecf4 +dtou f = 4bf76b520c0b5 +stod x = c3d1a23cbf2c2619 +dtos f = fffe31bdfa59afea + +x = 7df56995af680597 +y = 83202d927c5965da +-x = 820a966a5097fa69 +x + y = 11597282bc16b71 +x - y = fad53c03330e9fbd +x * y = 95d310a87e485596 +x /u y = 0 +x %u y = 7df56995af680597 +x /s y = ffffffffffffffff +x %s y = 11597282bc16b71 +x /u y2 = f5e9880c +x %u y2 = 3dce52bf +x /s y3 = fffffffefdc6ec2b +x %s y3 = 7611c611 +~x = 820a966a5097fa68 +x & y = 12029902c480592 +x | y = fff56d97ff7965df +x ^ y = fed54407d331604d +x << i = 56bda0165c000000 +x >>u i = 1f7d5a656b +x >>s i = 1f7d5a656b +x cmpu y = lt +x cmps y = gt +utod x = 43df7d5a656bda01 +dtou f = 3397b88f3b261 +stod x = 43df7d5a656bda01 +dtos f = 3397b88f3b261 + +x = 553ac3862c878ea1 +y = 596019b6dab2729c +-x = aac53c79d378715f +x + y = ae9add3d073a013d +x - y = fbdaa9cf51d51c05 +x * y = aac46ac3c6109c1c +x /u y = 0 +x %u y = 553ac3862c878ea1 +x /s y = 0 +x %s y = 553ac3862c878ea1 +x /u y2 = f41ffd27 +x %u y2 = 70c5e7 +x /s y3 = f41ffd27 +x %s y3 = 70c5e7 +~x = aac53c79d378715e +x & y = 5120018608820280 +x | y = 5d7adbb6feb7febd +x ^ y = c5ada30f635fc3d +x << i = 62c878ea10000000 +x >>u i = 553ac3862 +x >>s i = 553ac3862 +x cmpu y = lt +x cmps y = lt +utod x = 43d54eb0e18b21e4 +dtou f = 22e8f73417278 +stod x = 43d54eb0e18b21e4 +dtos f = 22e8f73417278 + +x = 20fcd070f46d0abb +y = b8bcea21c9bc292e +-x = df032f8f0b92f545 +x + y = d9b9ba92be2933e9 +x - y = 683fe64f2ab0e18d +x * y = a4ddf90d16a3e09a +x /u y = 0 +x %u y = 20fcd070f46d0abb +x /s y = 0 +x %s y = 20fcd070f46d0abb +x /u y2 = 2db65f96 +x %u y2 = 5a639c65 +x /s y3 = ffffffff897f0ff8 +x %s y3 = 1c534bc3 +~x = df032f8f0b92f544 +x & y = 20bcc020c02c082a +x | y = b8fcfa71fdfd2bbf +x ^ y = 98403a513dd12395 +x << i = 42aec00000000000 +x >>u i = 83f3 +x >>s i = 83f3 +x cmpu y = lt +x cmps y = gt +utod x = 43c07e68387a3685 +dtou f = d82fef3b1880 +stod x = 43c07e68387a3685 +dtos f = d82fef3b1880 + +x = 37d0c25765ae9065 +y = 2c73a7867781fe10 +-x = c82f3da89a516f9b +x + y = 644469dddd308e75 +x - y = b5d1ad0ee2c9255 +x * y = 7f1db64d44123c50 +x /u y = 1 +x %u y = b5d1ad0ee2c9255 +x /s y = 1 +x %s y = b5d1ad0ee2c9255 +x /u y2 = 14171ae1b +x %u y2 = 2178d143 +x /s y3 = 14171ae1b +x %s y3 = 2178d143 +~x = c82f3da89a516f9a +x & y = 2450820665809000 +x | y = 3ff3e7d777affe75 +x ^ y = 1ba365d1122f6e75 +x << i = c25765ae90650000 +x >>u i = 37d0c25765ae +x >>s i = 37d0c25765ae +x cmpu y = gt +x cmps y = gt +utod x = 43cbe8612bb2d748 +dtou f = 16dcacfdb480d +stod x = 43cbe8612bb2d748 +dtos f = 16dcacfdb480d + +x = 7c44b251159b1a1f +y = 6922cff21dab79c2 +-x = 83bb4daeea64e5e1 +x + y = e5678243334693e1 +x - y = 1321e25ef7efa05d +x * y = f5cf3786a497727e +x /u y = 1 +x %u y = 1321e25ef7efa05d +x /s y = 1 +x %s y = 1321e25ef7efa05d +x /u y2 = 12e961017 +x %u y2 = 22af4b61 +x /s y3 = 12e961017 +x %s y3 = 22af4b61 +~x = 83bb4daeea64e5e0 +x & y = 68008250158b1802 +x | y = 7d66fff31dbb7bdf +x ^ y = 15667da3083063dd +x << i = f112c944566c687c +x >>u i = 1f112c944566c687 +x >>s i = 1f112c944566c687 +x cmpu y = gt +x cmps y = gt +utod x = 43df112c944566c7 +dtou f = 32e67b027dfed +stod x = 43df112c944566c7 +dtos f = 32e67b027dfed + diff --git a/test/regression/Results/volatile2 b/test/regression/Results/volatile2 index e6a9903..23a53f0 100644 --- a/test/regression/Results/volatile2 +++ b/test/regression/Results/volatile2 @@ -12,6 +12,8 @@ float 1: OK float 2: OK double 1: OK double 2: OK +long long 1: OK +long long 2: OK global signed char 1: OK global signed char 2: OK global unsigned char 1: OK @@ -26,3 +28,5 @@ global float 1: OK global float 2: OK global double 1: OK global double 2: OK +global long long 1: OK +global long long 2: OK diff --git a/test/regression/int64.c b/test/regression/int64.c new file mode 100644 index 0000000..55a4f88 --- /dev/null +++ b/test/regression/int64.c @@ -0,0 +1,115 @@ +/* Semi-random testing of 64-bit integer operations */ + +#include <stdio.h> + +typedef unsigned long long u64; +typedef signed long long s64; + +static u64 rnd64(void) +{ + static u64 seed = 0; + seed = seed * 6364136223846793005ULL + 1442695040888963407ULL; + return seed; +} + +static inline u64 safe_udiv64(u64 x, u64 y) +{ + if (y == 0) return 0; else return x / y; +} + +static inline u64 safe_umod64(u64 x, u64 y) +{ + if (y == 0) return 0; else return x % y; +} + +static inline s64 safe_sdiv64(s64 x, s64 y) +{ + if (y == 0 || (y == -1 && x == (-1LL << 63))) return 0; else return x / y; +} + +static inline s64 safe_smod64(s64 x, s64 y) +{ + if (y == 0 || (y == -1 && x == (-1LL << 63))) return 0; else return x % y; +} + +static void test1(u64 x, u64 y) +{ + u64 y2; + s64 y3; + int i; + double f; + + printf("x = %llx\n", x); + printf("y = %llx\n", y); + printf("-x = %llx\n", -x); + printf("x + y = %llx\n", x + y); + printf("x - y = %llx\n", x - y); + printf("x * y = %llx\n", x * y); + printf("x /u y = %llx\n", safe_udiv64(x, y)); + printf("x %%u y = %llx\n", safe_umod64(x, y)); + printf("x /s y = %llx\n", safe_sdiv64(x, y)); + printf("x %%s y = %llx\n", safe_smod64(x, y)); + y2 = y >> 32; + printf("x /u y2 = %llx\n", safe_udiv64(x, y2)); + printf("x %%u y2 = %llx\n", safe_umod64(x, y2)); + y3 = ((s64)y) >> 32; + printf("x /s y3 = %llx\n", safe_sdiv64(x, y3)); + printf("x %%s y3 = %llx\n", safe_smod64(x, y3)); + printf("~x = %llx\n", ~x); + printf("x & y = %llx\n", x & y); + printf("x | y = %llx\n", x | y); + printf("x ^ y = %llx\n", x ^ y); + i = y & 63; + printf("x << i = %llx\n", x << i); + printf("x >>u i = %llx\n", x >> i); + printf("x >>s i = %llx\n", (s64) x >> i); + printf("x cmpu y = %s\n", + x == y ? "eq" : x < y ? "lt" : "gt"); + printf("x cmps y = %s\n", + x == y ? "eq" : (s64)x < (s64)y ? "lt" : "gt"); + f = (double) x; + printf("utod x = %llx\n", *((u64*) &f)); + f = f * 0.0001; + printf("dtou f = %llx\n", (u64) f); + f = (double) ((s64) x); + printf("stod x = %llx\n", *((u64*) &f)); + f = f * 0.0001; + printf("dtos f = %llx\n", (s64) f); + printf("\n"); +} + +u64 special_values[] = { + 0, + 1, + -1, + 0x7FFFFFFFLLU, + 0x80000000LLU, + 0x7FFFFFFFFFFFFFFFLLU, + 0x8000000000000000LLU +}; + +int main() +{ + int i, j; + u64 x, y; + + for (i = 0; i <= 4; i++) { + for (j = 0; j <= 4; j++) { + test1(special_values[i], special_values[j]); + } + test1(special_values[i], rnd64()); + test1(rnd64(), special_values[i]); + } + for (i = 0; i < 100; i++) { + x = rnd64(); y = rnd64(); + test1(x, y); + } + return 0; +} + + + + + + + diff --git a/test/regression/volatile2.c b/test/regression/volatile2.c index 306eb8c..3bad6ae 100644 --- a/test/regression/volatile2.c +++ b/test/regression/volatile2.c @@ -15,6 +15,7 @@ unsigned short gus; int gi; float gf; double gd; +long long gll; int main() { @@ -25,6 +26,7 @@ int main() int i; float f; double d; + long long ll; TEST("signed char", signed char, sc, 12, 34); TEST("unsigned char", unsigned char, uc, 56, 78); @@ -33,6 +35,7 @@ int main() TEST("int", int, i, 0x123456, 0x7890AB); TEST("float", float, f, 0.5, 256.0); TEST("double", double, d, 3.1415, 2.718); + TEST("long long", long long, ll, 0x123456789ABCDEFLL, 0x789ABCDEF1234567LL); TEST("global signed char", signed char, gsc, 12, 34); TEST("global unsigned char", unsigned char, guc, 56, 78); TEST("global signed short", signed short, gss, 1234, 5678); @@ -40,6 +43,7 @@ int main() TEST("global int", int, gi, 0x123456, 0x7890AB); TEST("global float", float, gf, 0.5, 256.0); TEST("global double", double, gd, 3.1415, 2.718); + TEST("global long long", long long, gll, 0x123456789ABCDEFLL, 0x789ABCDEF1234567LL); return 0; } |