Kaynağa Gözat

Generalize tail continues (#1298)

* Generalize tail continues

* Fix DecodeBasicBlock

`Next` and `Branch` would be null, which is not the state expected by
the branch instructions. They end up branching or falling into a block
which is never populated by the `Translator`. This causes an assert to
be fired when building the CFG.

* Clean up Decode overloads

* Do not synchronize when branching into exit block

If we're branching into an exit block, that exit block will tail
continue into another translation which already has a synchronization.

* Remove A32 predicate tail continue

If `block` is not an exit block then the `block.Next` must exist (as
per the last instruction of `block`).

* Throw if decoded 0 blocks

Address gdkchan's feedback

* Rebuild block list instead of setting to null

Address gdkchan's feedback
Ficture Seven 5 yıl önce
ebeveyn
işleme
2421186d97

+ 2 - 1
ARMeilleure/Decoders/Block.cs

@@ -12,6 +12,7 @@ namespace ARMeilleure.Decoders
         public Block Branch { get; set; }
 
         public bool TailCall { get; set; }
+        public bool Exit     { get; set; }
 
         public List<OpCode> OpCodes { get; private set; }
 
@@ -29,7 +30,7 @@ namespace ARMeilleure.Decoders
         {
             int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
 
-            if ((ulong)OpCodes[splitIndex].Address < rightBlock.Address)
+            if (OpCodes[splitIndex].Address < rightBlock.Address)
             {
                 splitIndex++;
             }

+ 53 - 48
ARMeilleure/Decoders/Decoder.cs

@@ -17,16 +17,7 @@ namespace ARMeilleure.Decoders
         // For lower code quality translation, we set a lower limit since we're blocking execution.
         private const int MaxInstsPerFunctionLowCq = 500;
 
-        public static Block[] DecodeBasicBlock(IMemoryManager memory, ulong address, ExecutionMode mode)
-        {
-            Block block = new Block(address);
-
-            FillBlock(memory, mode, block, ulong.MaxValue);
-
-            return new Block[] { block };
-        }
-
-        public static Block[] DecodeFunction(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq)
+        public static Block[] Decode(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq, bool singleBlock)
         {
             List<Block> blocks = new List<Block>();
 
@@ -42,13 +33,14 @@ namespace ARMeilleure.Decoders
             {
                 if (!visited.TryGetValue(blkAddress, out Block block))
                 {
-                    if (opsCount > instructionLimit || !memory.IsMapped(blkAddress))
+                    block = new Block(blkAddress);
+
+                    if ((singleBlock && visited.Count >= 1) || opsCount > instructionLimit || !memory.IsMapped(blkAddress))
                     {
-                        return null;
+                        block.Exit = true;
+                        block.EndAddress = blkAddress;
                     }
 
-                    block = new Block(blkAddress);
-
                     workQueue.Enqueue(block);
 
                     visited.Add(blkAddress, block);
@@ -71,6 +63,8 @@ namespace ARMeilleure.Decoders
                         throw new InvalidOperationException("Found duplicate block address on the list.");
                     }
 
+                    currBlock.Exit = false;
+
                     nBlock.Split(currBlock);
 
                     blocks.Insert(nBlkIndex + 1, currBlock);
@@ -78,47 +72,50 @@ namespace ARMeilleure.Decoders
                     continue;
                 }
 
-                // If we have a block after the current one, set the limit address.
-                ulong limitAddress = ulong.MaxValue;
-
-                if (nBlkIndex != blocks.Count)
+                if (!currBlock.Exit)
                 {
-                    Block nBlock = blocks[nBlkIndex];
-
-                    int nextIndex = nBlkIndex + 1;
+                    // If we have a block after the current one, set the limit address.
+                    ulong limitAddress = ulong.MaxValue;
 
-                    if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+                    if (nBlkIndex != blocks.Count)
                     {
-                        limitAddress = blocks[nextIndex].Address;
+                        Block nBlock = blocks[nBlkIndex];
+
+                        int nextIndex = nBlkIndex + 1;
+
+                        if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+                        {
+                            limitAddress = blocks[nextIndex].Address;
+                        }
+                        else if (nBlock.Address > currBlock.Address)
+                        {
+                            limitAddress = blocks[nBlkIndex].Address;
+                        }
                     }
-                    else if (nBlock.Address > currBlock.Address)
-                    {
-                        limitAddress = blocks[nBlkIndex].Address;
-                    }
-                }
 
-                FillBlock(memory, mode, currBlock, limitAddress);
+                    FillBlock(memory, mode, currBlock, limitAddress);
 
-                opsCount += currBlock.OpCodes.Count;
-
-                if (currBlock.OpCodes.Count != 0)
-                {
-                    // Set child blocks. "Branch" is the block the branch instruction
-                    // points to (when taken), "Next" is the block at the next address,
-                    // executed when the branch is not taken. For Unconditional Branches
-                    // (except BL/BLR that are sub calls) or end of executable, Next is null.
-                    OpCode lastOp = currBlock.GetLastOp();
+                    opsCount += currBlock.OpCodes.Count;
 
-                    bool isCall = IsCall(lastOp);
-
-                    if (lastOp is IOpCodeBImm op && !isCall)
+                    if (currBlock.OpCodes.Count != 0)
                     {
-                        currBlock.Branch = GetBlock((ulong)op.Immediate);
-                    }
-
-                    if (!IsUnconditionalBranch(lastOp) || isCall)
-                    {
-                        currBlock.Next = GetBlock(currBlock.EndAddress);
+                        // Set child blocks. "Branch" is the block the branch instruction
+                        // points to (when taken), "Next" is the block at the next address,
+                        // executed when the branch is not taken. For Unconditional Branches
+                        // (except BL/BLR that are sub calls) or end of executable, Next is null.
+                        OpCode lastOp = currBlock.GetLastOp();
+
+                        bool isCall = IsCall(lastOp);
+
+                        if (lastOp is IOpCodeBImm op && !isCall)
+                        {
+                            currBlock.Branch = GetBlock((ulong)op.Immediate);
+                        }
+
+                        if (!IsUnconditionalBranch(lastOp) || isCall)
+                        {
+                            currBlock.Next = GetBlock(currBlock.EndAddress);
+                        }
                     }
                 }
 
@@ -135,7 +132,15 @@ namespace ARMeilleure.Decoders
                 }
             }
 
-            TailCallRemover.RunPass(address, blocks);
+            if (blocks.Count == 0)
+            {
+                throw new InvalidOperationException($"Decoded 0 blocks. Entry point = 0x{address:X}.");
+            }
+
+            if (!singleBlock)
+            {
+                return TailCallRemover.RunPass(address, blocks);
+            }
 
             return blocks.ToArray();
         }

+ 32 - 18
ARMeilleure/Decoders/Optimizations/TailCallRemover.cs

@@ -1,16 +1,15 @@
-using ARMeilleure.Decoders;
-using System;
+using System;
 using System.Collections.Generic;
 
 namespace ARMeilleure.Decoders.Optimizations
 {
     static class TailCallRemover
     {
-        public static void RunPass(ulong entryAddress, List<Block> blocks)
+        public static Block[] RunPass(ulong entryAddress, List<Block> blocks)
         {
             // Detect tail calls:
             // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address.
-            // - Unconditional jump to an area outside this contiguous region will be treated as a tail call.
+            // - A jump to an area outside this contiguous region will be treated as an exit block.
             // - Include a small allowance for jumps outside the contiguous range.
 
             if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId))
@@ -19,57 +18,72 @@ namespace ARMeilleure.Decoders.Optimizations
             }
 
             const ulong allowance = 4;
+
             Block entryBlock = blocks[entryBlockId];
-            int startBlockIndex = entryBlockId;
+
             Block startBlock = entryBlock;
-            int endBlockIndex = entryBlockId;
-            Block endBlock = entryBlock;
+            Block endBlock   = entryBlock;
+
+            int startBlockIndex = entryBlockId;
+            int endBlockIndex   = entryBlockId;
 
             for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards.
             {
                 Block block = blocks[i];
+
                 if (endBlock.EndAddress < block.Address - allowance)
                 {
                     break; // End of contiguous function.
                 }
 
-                endBlock = block;
+                endBlock      = block;
                 endBlockIndex = i;
             }
 
             for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards.
             {
                 Block block = blocks[i];
+
                 if (startBlock.Address > block.EndAddress + allowance)
                 {
                     break; // End of contiguous function.
                 }
 
-                startBlock = block;
+                startBlock      = block;
                 startBlockIndex = i;
             }
 
             if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1)
             {
-                return; // Nothing to do here.
+                return blocks.ToArray(); // Nothing to do here.
             }
-
-            // Replace all branches to blocks outside the range with null, and force a tail call.
-
+            
+            // Mark branches outside of contiguous region as exit blocks.
             for (int i = startBlockIndex; i <= endBlockIndex; i++)
             {
                 Block block = blocks[i];
+
                 if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address))
                 {
-                    block.Branch = null;
-                    block.TailCall = true;
+                    block.Branch.Exit     = true;
+                    block.Branch.TailCall = true;
                 }
             }
 
-            // Finally, delete all blocks outside the contiguous range.
+           var newBlocks = new List<Block>(blocks.Count);
+
+            // Finally, rebuild decoded block list, ignoring blocks outside the contiguous range.
+            for (int i = 0; i < blocks.Count; i++)
+            {
+                Block block = blocks[i];
+
+                if (block.Exit || (i >= startBlockIndex && i <= endBlockIndex))
+                {
+                    newBlocks.Add(block);
+                }
+            }
 
-            blocks.RemoveRange(endBlockIndex + 1, (blocks.Count - endBlockIndex) - 1);
-            blocks.RemoveRange(0, startBlockIndex);
+            return newBlocks.ToArray();
         }
     }
 }

+ 0 - 10
ARMeilleure/Instructions/InstEmitException.cs

@@ -27,11 +27,6 @@ namespace ARMeilleure.Instructions
             context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
 
             context.LoadFromContext();
-
-            if (context.CurrBlock.Next == null)
-            {
-                EmitTailContinue(context, Const(op.Address + 4));
-            }
         }
 
         public static void Und(ArmEmitterContext context)
@@ -45,11 +40,6 @@ namespace ARMeilleure.Instructions
             context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode));
 
             context.LoadFromContext();
-
-            if (context.CurrBlock.Next == null)
-            {
-                EmitTailContinue(context, Const(op.Address + 4));
-            }
         }
     }
 }

+ 0 - 5
ARMeilleure/Instructions/InstEmitException32.cs

@@ -27,11 +27,6 @@ namespace ARMeilleure.Instructions
             context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
 
             context.LoadFromContext();
-
-            if (context.CurrBlock.Next == null)
-            {
-                EmitTailContinue(context, Const(op.Address + 4));
-            }
         }
     }
 }

+ 7 - 61
ARMeilleure/Instructions/InstEmitFlow.cs

@@ -15,14 +15,7 @@ namespace ARMeilleure.Instructions
         {
             OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
 
-            if (context.CurrBlock.Branch != null)
-            {
-                context.Branch(context.GetLabel((ulong)op.Immediate));
-            }
-            else
-            {
-                EmitTailContinue(context, Const(op.Immediate), context.CurrBlock.TailCall);
-            }
+            context.Branch(context.GetLabel((ulong)op.Immediate));
         }
 
         public static void B_Cond(ArmEmitterContext context)
@@ -92,69 +85,22 @@ namespace ARMeilleure.Instructions
         {
             OpCodeBImm op = (OpCodeBImm)context.CurrOp;
 
-            if (context.CurrBlock.Branch != null)
-            {
-                EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
-
-                if (context.CurrBlock.Next == null)
-                {
-                    EmitTailContinue(context, Const(op.Address + 4));
-                }
-            }
-            else
-            {
-                Operand lblTaken = Label();
-
-                EmitCondBranch(context, lblTaken, cond);
-
-                EmitTailContinue(context, Const(op.Address + 4));
-
-                context.MarkLabel(lblTaken);
-
-                EmitTailContinue(context, Const(op.Immediate));
-            }
+            EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
         }
 
         private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
         {
             OpCodeBImm op = (OpCodeBImm)context.CurrOp;
 
-            if (context.CurrBlock.Branch != null)
+            Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+            if (onNotZero)
             {
-                Operand lblTarget = context.GetLabel((ulong)op.Immediate);
-
-                if (onNotZero)
-                {
-                    context.BranchIfTrue(lblTarget, value);
-                }
-                else
-                {
-                    context.BranchIfFalse(lblTarget, value);
-                }
-
-                if (context.CurrBlock.Next == null)
-                {
-                    EmitTailContinue(context, Const(op.Address + 4));
-                }
+                context.BranchIfTrue(lblTarget, value);
             }
             else
             {
-                Operand lblTaken = Label();
-
-                if (onNotZero)
-                {
-                    context.BranchIfTrue(lblTaken, value);
-                }
-                else
-                {
-                    context.BranchIfFalse(lblTaken, value);
-                }
-
-                EmitTailContinue(context, Const(op.Address + 4));
-
-                context.MarkLabel(lblTaken);
-
-                EmitTailContinue(context, Const(op.Immediate));
+                context.BranchIfFalse(lblTarget, value);
             }
         }
     }

+ 1 - 8
ARMeilleure/Instructions/InstEmitFlow32.cs

@@ -15,14 +15,7 @@ namespace ARMeilleure.Instructions
         {
             IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
 
-            if (context.CurrBlock.Branch != null)
-            {
-                context.Branch(context.GetLabel((ulong)op.Immediate));
-            }
-            else
-            {
-                EmitTailContinue(context, Const(op.Immediate));
-            }
+            context.Branch(context.GetLabel((ulong)op.Immediate));
         }
 
         public static void Bl(ArmEmitterContext context)

+ 47 - 50
ARMeilleure/Instructions/InstEmitFlowHelper.cs

@@ -150,17 +150,32 @@ namespace ARMeilleure.Instructions
         private static void EmitNativeCall(ArmEmitterContext context, Operand nativeContextPtr, Operand funcAddr, bool isJump = false)
         {
             context.StoreToContext();
-            Operand returnAddress;
+
             if (isJump)
             {
                 context.Tailcall(funcAddr, nativeContextPtr);
             }
             else
             {
-                returnAddress = context.Call(funcAddr, OperandType.I64, nativeContextPtr);
+                OpCode op = context.CurrOp;
+
+                Operand returnAddress = context.Call(funcAddr, OperandType.I64, nativeContextPtr);
+
                 context.LoadFromContext();
 
-                EmitContinueOrReturnCheck(context, returnAddress);
+                // Note: The return value of a translated function is always an Int64 with the
+                // address execution has returned to. We expect this address to be immediately after the
+                // current instruction, if it isn't we keep returning until we reach the dispatcher.
+                Operand nextAddr = Const((long)op.Address + op.OpCodeSizeInBytes);
+
+                // Try to continue within this block.
+                // If the return address isn't to our next instruction, we need to return so the JIT can figure out what to do.
+                Operand lblContinue = context.GetLabel(nextAddr.Value);
+
+                // We need to clear out the call flag for the return address before comparing it.
+                context.BranchIfTrue(lblContinue, context.ICompareEqual(context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr));
+
+                context.Return(returnAddress);
             }
         }
 
@@ -191,46 +206,18 @@ namespace ARMeilleure.Instructions
             }
         }
 
-        private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand returnAddress)
-        {
-            // Note: The return value of a translated function is always an Int64 with the
-            // address execution has returned to. We expect this address to be immediately after the
-            // current instruction, if it isn't we keep returning until we reach the dispatcher.
-            Operand nextAddr = Const(GetNextOpAddress(context.CurrOp));
-
-            // Try to continue within this block.
-            // If the return address isn't to our next instruction, we need to return so the JIT can figure out what to do.
-            Operand lblContinue = Label();
-
-            // We need to clear out the call flag for the return address before comparing it.
-            context.BranchIfTrue(lblContinue, context.ICompareEqual(context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr));
-
-            context.Return(returnAddress);
-
-            context.MarkLabel(lblContinue);
-
-            if (context.CurrBlock.Next == null)
-            {
-                // No code following this instruction, try and find the next block and jump to it.
-                EmitTailContinue(context, nextAddr);
-            }
-        }
-
-        private static ulong GetNextOpAddress(OpCode op)
-        {
-            return op.Address + (ulong)op.OpCodeSizeInBytes;
-        }
-
         public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit = false)
         {
-            bool useTailContinue = true; // Left option here as it may be useful if we need to return to managed rather than tail call in future. (eg. for debug)
+            // Left option here as it may be useful if we need to return to managed rather than tail call in future. 
+            // (eg. for debug)
+            bool useTailContinue = true;
 
             if (useTailContinue)
             {
                 if (context.HighCq)
                 {
-                    // If we're doing a tail continue in HighCq, reserve a space in the jump table to avoid calling back to the translator.
-                    // This will always try to get a HighCq version of our continue target as well.
+                    // If we're doing a tail continue in HighCq, reserve a space in the jump table to avoid calling back
+                    // to the translator. This will always try to get a HighCq version of our continue target as well.
                     EmitJumpTableBranch(context, address, true);
                 }
                 else
@@ -263,6 +250,7 @@ namespace ARMeilleure.Instructions
         {
             address = context.BitwiseOr(address, Const(address.Type, (long)CallFlag)); // Set call flag.
             Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address);
+
             EmitNativeCall(context, fallbackAddr, isJump);
         }
 
@@ -273,39 +261,48 @@ namespace ARMeilleure.Instructions
             Operand endLabel = Label();
             Operand fallbackLabel = Label();
 
-            Action<Operand> emitTableEntry = (Operand entrySkipLabel) =>
+            void EmitTableEntry(Operand entrySkipLabel)
             {
                 // Try to take this entry in the table if its guest address equals 0.
                 Operand gotResult = context.CompareAndSwap(tableAddress, Const(0L), address);
 
                 // Is the address ours? (either taken via CompareAndSwap (0), or what was already here)
-                context.BranchIfFalse(entrySkipLabel, context.BitwiseOr(context.ICompareEqual(gotResult, address), context.ICompareEqual(gotResult, Const(0L))));
+                context.BranchIfFalse(entrySkipLabel, 
+                    context.BitwiseOr(
+                        context.ICompareEqual(gotResult, address), 
+                        context.ICompareEqual(gotResult, Const(0L)))
+                );
 
                 // It's ours, so what function is it pointing to?
                 Operand targetFunctionPtr = context.Add(tableAddress, Const(8L));
                 Operand targetFunction = context.Load(OperandType.I64, targetFunctionPtr);
 
                 // Call the function.
-                // We pass in the entry address as the guest address, as the entry may need to be updated by the indirect call stub.
+                // We pass in the entry address as the guest address, as the entry may need to be updated by the 
+                // indirect call stub.
                 EmitNativeCallWithGuestAddress(context, targetFunction, tableAddress, isJump);
+
                 context.Branch(endLabel);
-            };
+            }
 
             // Currently this uses a size of 1, as higher values inflate code size for no real benefit.
             for (int i = 0; i < JumpTable.DynamicTableElems; i++)
             {
                 if (i == JumpTable.DynamicTableElems - 1)
                 {
-                    emitTableEntry(fallbackLabel); // If this is the last entry, avoid emitting the additional label and add.
-                }
+                    // If this is the last entry, avoid emitting the additional label and add.
+                    EmitTableEntry(fallbackLabel);
+                } 
                 else
                 {
                     Operand nextLabel = Label();
 
-                    emitTableEntry(nextLabel);
+                    EmitTableEntry(nextLabel);
 
                     context.MarkLabel(nextLabel);
-                    tableAddress = context.Add(tableAddress, Const((long)JumpTable.JumpTableStride)); // Move to the next table entry.
+
+                    // Move to the next table entry.
+                    tableAddress = context.Add(tableAddress, Const((long)JumpTable.JumpTableStride));
                 }
             }
 
@@ -323,16 +320,15 @@ namespace ARMeilleure.Instructions
                 address = context.ZeroExtend32(OperandType.I64, address);
             }
 
-            // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to avoid them when possible.
+            // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to 
+            // avoid them when possible.
             bool isConst = address.Kind == OperandKind.Constant;
             long constAddr = (long)address.Value;
 
             if (!context.HighCq)
             {
-                // Don't emit indirect calls or jumps if we're compiling in lowCq mode.
-                // This avoids wasting space on the jump and indirect tables.
-                // Just ask the translator for the function address.
-
+                // Don't emit indirect calls or jumps if we're compiling in lowCq mode. This avoids wasting space on the
+                // jump and indirect tables. Just ask the translator for the function address.
                 EmitBranchFallback(context, address, isJump);
             }
             else if (!isConst)
@@ -376,7 +372,8 @@ namespace ARMeilleure.Instructions
 
                 Operand funcAddr = context.Load(OperandType.I64, tableEntryPtr);
 
-                EmitNativeCallWithGuestAddress(context, funcAddr, address, isJump); // Call the function directly. If it's not present yet, this will call the direct call stub.
+                // Call the function directly. If it's not present yet, this will call the direct call stub.
+                EmitNativeCallWithGuestAddress(context, funcAddr, address, isJump);
             }
         }
     }

+ 31 - 34
ARMeilleure/Translation/Translator.cs

@@ -183,7 +183,7 @@ namespace ARMeilleure.Translation
 
             Logger.StartPass(PassName.Decoding);
 
-            Block[] blocks = Decoder.DecodeFunction(memory, address, mode, highCq);
+            Block[] blocks = Decoder.Decode(memory, address, mode, highCq, singleBlock: false);
 
             Logger.EndPass(PassName.Decoding);
 
@@ -242,49 +242,46 @@ namespace ARMeilleure.Translation
 
                 context.MarkLabel(context.GetLabel(block.Address));
 
-                for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
+                if (block.Exit)
                 {
-                    OpCode opCode = block.OpCodes[opcIndex];
+                    InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address), block.TailCall);
+                }
+                else
+                {
+                    for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
+                    {
+                        OpCode opCode = block.OpCodes[opcIndex];
 
-                    context.CurrOp = opCode;
+                        context.CurrOp = opCode;
 
-                    bool isLastOp = opcIndex == block.OpCodes.Count - 1;
+                        bool isLastOp = opcIndex == block.OpCodes.Count - 1;
 
-                    if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address)
-                    {
-                        EmitSynchronization(context);
-                    }
+                        if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
+                        {
+                            EmitSynchronization(context);
+                        }
 
-                    Operand lblPredicateSkip = null;
+                        Operand lblPredicateSkip = null;
 
-                    if (opCode is OpCode32 op && op.Cond < Condition.Al)
-                    {
-                        lblPredicateSkip = Label();
+                        if (opCode is OpCode32 op && op.Cond < Condition.Al)
+                        {
+                            lblPredicateSkip = Label();
 
-                        InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
-                    }
+                            InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
+                        }
 
-                    if (opCode.Instruction.Emitter != null)
-                    {
-                        opCode.Instruction.Emitter(context);
-                    }
-                    else
-                    {
-                        throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
-                    }
+                        if (opCode.Instruction.Emitter != null)
+                        {
+                            opCode.Instruction.Emitter(context);
+                        }
+                        else
+                        {
+                            throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
+                        }
 
-                    if (lblPredicateSkip != null)
-                    {
-                        context.MarkLabel(lblPredicateSkip);
-
-                        // If this is the last op on the block, and there's no "next" block
-                        // after this one, then we have to return right now, with the address
-                        // of the next instruction to be executed (in the case that the condition
-                        // is false, and the branch was not taken, as all basic blocks should end
-                        // with some kind of branch).
-                        if (isLastOp && block.Next == null)
+                        if (lblPredicateSkip != null)
                         {
-                            InstEmitFlowHelper.EmitTailContinue(context, Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
+                            context.MarkLabel(lblPredicateSkip);
                         }
                     }
                 }