Просмотр исходного кода

Add an early `TailMerge` pass (#2721)

* Add an early `TailMerge` pass

Some translations can have a lot of guest calls and since for each guest
call there is a call guard which may return. This can produce a lot of
epilogue code for returns. This pass merges the epilogue into a single
block.

```
Using filter 'hcq'.
Using metric 'code size'.

Total diff: -1648111 (-7.19 %) (bytes):
  Base: 22913847
  Diff: 21265736

Improved: 4567, regressed: 14, unchanged: 144
```

* Set PTC version

* Address feedback

* Handle `void` returning functions

* Actually handle `void` returning functions

* Fix `RegisterToLocal` logging
FICTURE7 4 лет назад
Родитель
Сommit
fbf40424f4

+ 6 - 1
ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs

@@ -17,7 +17,7 @@ namespace ARMeilleure.CodeGen.Optimizations
             BasicBlock lastBlock = cfg.Blocks.Last;
 
             // Move cold blocks at the end of the list, so that they are emitted away from hot code.
-            for (block = cfg.Blocks.First; block != lastBlock; block = nextBlock)
+            for (block = cfg.Blocks.First; block != null; block = nextBlock)
             {
                 nextBlock = block.ListNext;
 
@@ -26,6 +26,11 @@ namespace ARMeilleure.CodeGen.Optimizations
                     cfg.Blocks.Remove(block);
                     cfg.Blocks.AddLast(block);
                 }
+
+                if (block == lastBlock)
+                {
+                    break;
+                }
             }
 
             for (block = cfg.Blocks.First; block != null; block = nextBlock)

+ 83 - 0
ARMeilleure/CodeGen/Optimizations/TailMerge.cs

@@ -0,0 +1,83 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+    static class TailMerge
+    {
+        public static void RunPass(in CompilerContext cctx)
+        {
+            ControlFlowGraph cfg = cctx.Cfg;
+
+            BasicBlock mergedReturn = new(cfg.Blocks.Count);
+
+            Operand returnValue;
+            Operation returnOp;
+
+            if (cctx.FuncReturnType == OperandType.None)
+            {
+                returnValue = default;
+                returnOp = Operation(Instruction.Return, default);
+            }
+            else
+            {
+                returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
+                returnOp = Operation(Instruction.Return, default, returnValue);
+            }
+
+            mergedReturn.Frequency = BasicBlockFrequency.Cold;
+            mergedReturn.Operations.AddLast(returnOp);
+
+            for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+            {
+                Operation op = block.Operations.Last;
+
+                if (op != default && op.Instruction == Instruction.Return)
+                {
+                    block.Operations.Remove(op);
+
+                    if (cctx.FuncReturnType == OperandType.None)
+                    {
+                        PrepareMerge(block, mergedReturn);
+                    }
+                    else
+                    {
+                        Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
+
+                        PrepareMerge(block, mergedReturn).Append(copyOp);
+                    }
+                }
+            }
+
+            cfg.Blocks.AddLast(mergedReturn);
+            cfg.Update();
+        }
+
+        private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
+        {
+            BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
+
+            // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
+            if (from.Operations.Count == 0 && fromPred != null)
+            {
+                for (int i = 0; i < fromPred.SuccessorsCount; i++)
+                {
+                    if (fromPred.GetSuccessor(i) == from)
+                    {
+                        fromPred.SetSuccessor(i, to);
+                    }
+                }
+
+                // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
+                return fromPred;
+            }
+            else
+            {
+                from.AddSuccessor(to);
+
+                return from;
+            }
+        }
+    }
+}

+ 2 - 0
ARMeilleure/Diagnostics/PassName.cs

@@ -5,8 +5,10 @@ namespace ARMeilleure.Diagnostics
         Decoding,
         Translation,
         RegisterUsage,
+        TailMerge,
         Dominance,
         SsaConstruction,
+        RegisterToLocal,
         Optimization,
         PreAllocation,
         RegisterAllocation,

+ 22 - 11
ARMeilleure/Translation/Compiler.cs

@@ -1,4 +1,5 @@
 using ARMeilleure.CodeGen;
+using ARMeilleure.CodeGen.Optimizations;
 using ARMeilleure.CodeGen.X86;
 using ARMeilleure.Diagnostics;
 using ARMeilleure.IntermediateRepresentation;
@@ -13,30 +14,40 @@ namespace ARMeilleure.Translation
             OperandType      retType,
             CompilerOptions  options)
         {
-            Logger.StartPass(PassName.Dominance);
+            CompilerContext cctx = new(cfg, argTypes, retType, options);
+
+            if (options.HasFlag(CompilerOptions.Optimize))
+            {
+                Logger.StartPass(PassName.TailMerge);
+
+                TailMerge.RunPass(cctx);
 
-            if ((options & CompilerOptions.SsaForm) != 0)
+                Logger.EndPass(PassName.TailMerge, cfg);
+            }
+
+            if (options.HasFlag(CompilerOptions.SsaForm))
             {
+                Logger.StartPass(PassName.Dominance);
+
                 Dominance.FindDominators(cfg);
                 Dominance.FindDominanceFrontiers(cfg);
-            }
 
-            Logger.EndPass(PassName.Dominance);
+                Logger.EndPass(PassName.Dominance);
 
-            Logger.StartPass(PassName.SsaConstruction);
+                Logger.StartPass(PassName.SsaConstruction);
 
-            if ((options & CompilerOptions.SsaForm) != 0)
-            {
                 Ssa.Construct(cfg);
+
+                Logger.EndPass(PassName.SsaConstruction, cfg);
             }
             else
             {
-                RegisterToLocal.Rename(cfg);
-            }
+                Logger.StartPass(PassName.RegisterToLocal);
 
-            Logger.EndPass(PassName.SsaConstruction, cfg);
+                RegisterToLocal.Rename(cfg);
 
-            CompilerContext cctx = new(cfg, argTypes, retType, options);
+                Logger.EndPass(PassName.RegisterToLocal, cfg);
+            }
 
             return CodeGenerator.Generate(cctx);
         }

+ 10 - 1
ARMeilleure/Translation/ControlFlowGraph.cs

@@ -10,7 +10,7 @@ namespace ARMeilleure.Translation
         private BasicBlock[] _postOrderBlocks;
         private int[] _postOrderMap;
 
-        public int LocalsCount { get; }
+        public int LocalsCount { get; private set; }
         public BasicBlock Entry { get; }
         public IntrusiveList<BasicBlock> Blocks { get; }
         public BasicBlock[] PostOrderBlocks => _postOrderBlocks;
@@ -25,6 +25,15 @@ namespace ARMeilleure.Translation
             Update();
         }
 
+        public Operand AllocateLocal(OperandType type)
+        {
+            Operand result = Operand.Factory.Local(type);
+
+            result.NumberLocal(++LocalsCount);
+
+            return result;
+        }
+
         public void Update()
         {
             RemoveUnreachableBlocks(Blocks);

+ 1 - 1
ARMeilleure/Translation/PTC/Ptc.cs

@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 2680; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 2721; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";

+ 22 - 10
ARMeilleure/Translation/RegisterUsage.cs

@@ -203,12 +203,18 @@ namespace ARMeilleure.Translation
                 // It always needs a context load as it is the first block to run.
                 if (block.Predecessors.Count == 0 || hasContextLoad)
                 {
-                    arg = Local(OperandType.I64);
+                    long vecMask = globalInputs[block.Index].VecMask;
+                    long intMask = globalInputs[block.Index].IntMask;
 
-                    Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
+                    if (vecMask != 0 || intMask != 0)
+                    {
+                        arg = Local(OperandType.I64);
+
+                        Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
 
-                    LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode, loadArg, arg);
-                    LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode, loadArg, arg);
+                        LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg);
+                        LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg);
+                    }
                 }
 
                 bool hasContextStore = HasContextStore(block);
@@ -220,15 +226,21 @@ namespace ARMeilleure.Translation
 
                 if (EndsWithReturn(block) || hasContextStore)
                 {
-                    if (arg == default)
+                    long vecMask = globalOutputs[block.Index].VecMask;
+                    long intMask = globalOutputs[block.Index].IntMask;
+
+                    if (vecMask != 0 || intMask != 0)
                     {
-                        arg = Local(OperandType.I64);
+                        if (arg == default)
+                        {
+                            arg = Local(OperandType.I64);
 
-                        block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
-                    }
+                            block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
+                        }
 
-                    StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, arg);
-                    StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, arg);
+                        StoreLocals(block, intMask, RegisterType.Integer, mode, arg);
+                        StoreLocals(block, vecMask, RegisterType.Vector, mode, arg);
+                    }
                 }
             }
         }