Sfoglia il codice sorgente

Implement VMOVL and VORR.I32 AArch32 SIMD instructions (#960)

* Implement VMOVL and VORR.I32 AArch32 SIMD instructions

* Rename <dt> to <size> on test description

* Rename Widen to Long and improve VMOVL implementation a bit
gdkchan 6 anni fa
parent
commit
89ccec197e

+ 2 - 4
ARMeilleure/Decoders/OpCode32SimdImm.cs

@@ -1,11 +1,9 @@
 namespace ARMeilleure.Decoders
 {
-    class OpCode32SimdImm : OpCode32, IOpCode32SimdImm
+    class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm
     {
-        public int Vd { get; private set; }
         public bool Q { get; private set; }
         public long Immediate { get; private set; }
-        public int Size { get; private set; }
         public int Elems => GetBytesCount() >> Size;
 
         public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
@@ -24,7 +22,7 @@
             imm |= ((uint)opCode >> 12) & 0x70;
             imm |= ((uint)opCode >> 17) & 0x80;
 
-            (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2);
+            (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm);
 
             RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
 

+ 27 - 0
ARMeilleure/Decoders/OpCode32SimdLong.cs

@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32SimdLong : OpCode32SimdBase
+    {
+        public bool U { get; private set; }
+
+        public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int imm3h = (opCode >> 19) & 0x7;
+
+            // The value must be a power of 2, otherwise it is the encoding of another instruction.
+            switch (imm3h)
+            {
+                case 1: Size = 0; break;
+                case 2: Size = 1; break;
+                case 4: Size = 2; break;
+            }
+
+            U = ((opCode >> 24) & 0x1) != 0;
+
+            RegisterSize = RegisterSize.Simd64;
+
+            Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+            Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+        }
+    }
+}

+ 1 - 1
ARMeilleure/Decoders/OpCodeSimdHelper.cs

@@ -2,7 +2,7 @@
 {
     public static class OpCodeSimdHelper
     {
-        public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0)
+        public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm)
         {
             int modeLow = cMode & 1;
             int modeHigh = cMode >> 1;

+ 5 - 1
ARMeilleure/Decoders/OpCodeTable.cs

@@ -158,7 +158,7 @@ namespace ARMeilleure.Decoders
             SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv,            InstEmit.Sdiv,            typeof(OpCodeAluBinary));
             SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl,          InstEmit.Smaddl,          typeof(OpCodeMul));
             SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl,          InstEmit.Smsubl,          typeof(OpCodeMul));
-            SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smul__,           InstEmit.Smulh,           typeof(OpCodeMul));
+            SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh,           InstEmit.Smulh,           typeof(OpCodeMul));
             SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr,            InstEmit.Stlr,            typeof(OpCodeMemEx));
             SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp,           InstEmit.Stlxp,           typeof(OpCodeMemEx));
             SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr,           InstEmit.Stlxr,           typeof(OpCodeMemEx));
@@ -829,6 +829,9 @@ namespace ARMeilleure.Decoders
             SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov,    InstEmit32.Vmov_I,   typeof(OpCode32SimdImm)); // D/Q (dt - from cmode).
             SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov,    InstEmit32.Vmov_I,   typeof(OpCode32SimdImm)); // D/Q I64.
             SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov,    InstEmit32.Vmov_S,   typeof(OpCode32SimdS));
+            SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl,   InstEmit32.Vmovl,    typeof(OpCode32SimdLong));
+            SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl,   InstEmit32.Vmovl,    typeof(OpCode32SimdLong));
+            SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl,   InstEmit32.Vmovl,    typeof(OpCode32SimdLong));
             SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn,   InstEmit32.Vmovn,    typeof(OpCode32SimdCmpZ));
             SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs,    InstEmit32.Vmrs,     typeof(OpCode32SimdSpecial));
             SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr,    InstEmit32.Vmsr,     typeof(OpCode32SimdSpecial));
@@ -845,6 +848,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls,   InstEmit32.Vnmls_S,  typeof(OpCode32SimdRegS));
             SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul,   InstEmit32.Vnmul_S,  typeof(OpCode32SimdRegS));
             SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr,    InstEmit32.Vorr_I,   typeof(OpCode32SimdBinary));
+            SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr,    InstEmit32.Vorr_II,  typeof(OpCode32SimdImm));
             SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd,   InstEmit32.Vpadd_I,  typeof(OpCode32SimdReg));
             SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd,   InstEmit32.Vpadd_V,  typeof(OpCode32SimdReg));
             SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe,  InstEmit32.Vrecpe,   typeof(OpCode32SimdSqrte));

+ 39 - 0
ARMeilleure/Instructions/InstEmitSimdLogical32.cs

@@ -2,7 +2,10 @@
 using ARMeilleure.IntermediateRepresentation;
 using ARMeilleure.Translation;
 
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
 using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
 
 namespace ARMeilleure.Instructions
 {
@@ -64,6 +67,42 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vorr_II(ArmEmitterContext context)
+        {
+            OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+            long immediate = op.Immediate;
+
+            // Replicate fields to fill the 64-bits, if size is < 64-bits.
+            switch (op.Size)
+            {
+                case 0: immediate *= 0x0101010101010101L; break;
+                case 1: immediate *= 0x0001000100010001L; break;
+                case 2: immediate *= 0x0000000100000001L; break;
+            }
+
+            Operand imm = Const(immediate);
+            Operand res = GetVecA32(op.Qd);
+
+            if (op.Q)
+            {
+                for (int elem = 0; elem < 2; elem++)
+                {
+                    Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+                    res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
+                }
+            }
+            else
+            {
+                Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+                res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         private static void EmitBifBit(ArmEmitterContext context, bool notRm)
         {
             OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;

+ 30 - 0
ARMeilleure/Instructions/InstEmitSimdMove32.cs

@@ -139,6 +139,36 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vmovl(ArmEmitterContext context)
+        {
+            OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+                if (op.Size == 2)
+                {
+                    if (op.U)
+                    {
+                        me = context.ZeroExtend32(OperandType.I64, me);
+                    }
+                    else
+                    {
+                        me = context.SignExtend32(OperandType.I64, me);
+                    }
+                }
+
+                res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         public static void Vtbl(ArmEmitterContext context)
         {
             OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;

+ 3 - 1
ARMeilleure/Instructions/InstName.cs

@@ -81,7 +81,7 @@ namespace ARMeilleure.Instructions
         Sdiv,
         Smaddl,
         Smsubl,
-        Smul__,
+        Smulh,
         Smull,
         Smulw_,
         Ssat,
@@ -500,6 +500,7 @@ namespace ARMeilleure.Instructions
         Smlaw_,
         Smmla,
         Smmls,
+        Smul__,
         Smmul,
         Stl,
         Stlb,
@@ -560,6 +561,7 @@ namespace ARMeilleure.Instructions
         Vmla,
         Vmls,
         Vmov,
+        Vmovl,
         Vmovn,
         Vmrs,
         Vmsr,

+ 28 - 0
Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs

@@ -56,6 +56,34 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn();
         }
+
+        [Test, Pairwise, Description("VORR.I32 <Vd>, #<imm>")]
+        public void Vorr_II([Range(0u, 4u)] uint rd,
+                            [Random(RndCnt)] ulong z,
+                            [Random(RndCnt)] byte imm,
+                            [Values(0u, 1u, 2u, 3u)] uint cMode,
+                            [Values] bool q)
+        {
+            uint opcode = 0xf2800110u; // VORR.I32 D0, #0
+
+            if (q)
+            {
+                opcode |= 1 << 6;
+                rd <<= 1;
+            }
+
+            opcode |= (uint)(imm & 0xf) << 0;
+            opcode |= (uint)(imm & 0x70) << 12;
+            opcode |= (uint)(imm & 0x80) << 17;
+            opcode |= (cMode & 0x3) << 9;
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+
+            V128 v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcode, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }

+ 30 - 0
Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs

@@ -228,6 +228,36 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise, Description("VMOVL.<size> <Qd>, <Dm>")]
+        public void Vmovl([Values(0u, 1u, 2u, 3u)] uint vm,
+                          [Values(0u, 2u, 4u, 6u)] uint vd,
+                          [Values(1u, 2u, 4u)] uint imm3H,
+                          [Values] bool u)
+        {
+            // This is not VMOVL because imm3H = 0, but once
+            // we shift in the imm3H value it turns into VMOVL.
+            uint opcode = 0xf2800a10u; // VMOV.I16 D0, #0
+
+            opcode |= (vm & 0x10) << 1;
+            opcode |= (vm & 0xf);
+            opcode |= (vd & 0x10) << 18;
+            opcode |= (vd & 0xf) << 12;
+            opcode |= (imm3H & 0x7) << 19;
+            if (u)
+            {
+                opcode |= 1 << 24;
+            }
+
+            V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise, Description("VTRN.<size> <Vd>, <Vm>")]
         public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm,
                          [Values(0u, 1u, 2u, 3u)] uint vd,