Просмотр исходного кода

Implement fast path for AES crypto instructions on Arm64 (#5281)

* Implement fast path for AES crypto instructions on Arm64

* PPTC version bump

* Use AES HW feature check
gdkchan 2 лет назад
Родитель
Сommit
193ca3c9a2

+ 0 - 2
src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs

@@ -168,8 +168,6 @@ namespace ARMeilleure.CodeGen.Arm64
 
             Logger.StartPass(PassName.CodeGeneration);
 
-            //Console.Error.WriteLine(IRDumper.GetDump(cfg));
-
             bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
 
             CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);

+ 29 - 0
src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs

@@ -179,6 +179,35 @@ namespace ARMeilleure.CodeGen.Arm64
                         (uint)operation.GetSource(2).AsInt32());
                     break;
 
+                case IntrinsicType.Vector128Unary:
+                    GenerateVectorUnary(
+                        context,
+                        1,
+                        0,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0));
+                    break;
+                case IntrinsicType.Vector128Binary:
+                    GenerateVectorBinary(
+                        context,
+                        1,
+                        0,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(0),
+                        operation.GetSource(1));
+                    break;
+                case IntrinsicType.Vector128BinaryRd:
+                    GenerateVectorUnary(
+                        context,
+                        1,
+                        0,
+                        info.Inst,
+                        operation.Destination,
+                        operation.GetSource(1));
+                    break;
+
                 case IntrinsicType.VectorUnary:
                     GenerateVectorUnary(
                         context,

+ 2 - 2
src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs

@@ -19,8 +19,8 @@ namespace ARMeilleure.CodeGen.Arm64
             Add(Intrinsic.Arm64AddvV,         new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
             Add(Intrinsic.Arm64AddS,          new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
             Add(Intrinsic.Arm64AddV,          new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
-            Add(Intrinsic.Arm64AesdV,         new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
-            Add(Intrinsic.Arm64AeseV,         new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
+            Add(Intrinsic.Arm64AesdV,         new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd));
+            Add(Intrinsic.Arm64AeseV,         new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd));
             Add(Intrinsic.Arm64AesimcV,       new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
             Add(Intrinsic.Arm64AesmcV,        new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
             Add(Intrinsic.Arm64AndV,          new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));

+ 4 - 3
src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs

@@ -23,6 +23,10 @@ namespace ARMeilleure.CodeGen.Arm64
         ScalarTernaryShlRd,
         ScalarTernaryShrRd,
 
+        Vector128Unary,
+        Vector128Binary,
+        Vector128BinaryRd,
+
         VectorUnary,
         VectorUnaryBitwise,
         VectorUnaryByElem,
@@ -50,9 +54,6 @@ namespace ARMeilleure.CodeGen.Arm64
         VectorTernaryShlRd,
         VectorTernaryShrRd,
 
-        Vector128Unary,
-        Vector128Binary,
-
         GetRegister,
         SetRegister
     }

+ 1 - 0
src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs

@@ -746,6 +746,7 @@ namespace ARMeilleure.CodeGen.Arm64
                    info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
                    info.Type == IntrinsicType.ScalarTernaryShlRd ||
                    info.Type == IntrinsicType.ScalarTernaryShrRd ||
+                   info.Type == IntrinsicType.Vector128BinaryRd ||
                    info.Type == IntrinsicType.VectorBinaryRd ||
                    info.Type == IntrinsicType.VectorInsertByElem ||
                    info.Type == IntrinsicType.VectorTernaryRd ||

+ 20 - 4
src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs

@@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
             }
@@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
             }
@@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
             }
@@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 Operand roundKey = context.VectorZero();
 

+ 20 - 4
src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs

@@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
             }
@@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
             }
@@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
             }
@@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
 
             Operand res;
 
-            if (Optimizations.UseAesni)
+            if (Optimizations.UseArm64Aes)
+            {
+                res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
+            }
+            else if (Optimizations.UseAesni)
             {
                 Operand roundKey = context.VectorZero();
 

+ 2 - 0
src/ARMeilleure/Optimizations.cs

@@ -13,6 +13,7 @@ namespace ARMeilleure
         public static bool UseUnmanagedDispatchLoop { get; set; } = true;
 
         public static bool UseAdvSimdIfAvailable    { get; set; } = true;
+        public static bool UseArm64AesIfAvailable   { get; set; } = true;
         public static bool UseArm64PmullIfAvailable { get; set; } = true;
 
         public static bool UseSseIfAvailable       { get; set; } = true;
@@ -41,6 +42,7 @@ namespace ARMeilleure
         }
 
         internal static bool UseAdvSimd    => UseAdvSimdIfAvailable    && Arm64HardwareCapabilities.SupportsAdvSimd;
+        internal static bool UseArm64Aes   => UseArm64AesIfAvailable   && Arm64HardwareCapabilities.SupportsAes;
         internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
 
         internal static bool UseSse       => UseSseIfAvailable       && X86HardwareCapabilities.SupportsSse;

+ 1 - 1
src/ARMeilleure/Translation/PTC/Ptc.cs

@@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 5281; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";