Просмотр исходного кода

Add Fast Paths for Crypto instructions (A32/A64) (#1026)

* Add Fast Paths for Crypto instructions (A32/A64)

* Replace additional XOR with passing in const zero.
riperiperi 6 лет назад
Родитель
Сommit
f695a215ad

+ 5 - 0
ARMeilleure/CodeGen/X86/Assembler.cs

@@ -74,6 +74,11 @@ namespace ARMeilleure.CodeGen.X86
             Add(X86Instruction.Addps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex));
             Add(X86Instruction.Addsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
             Add(X86Instruction.Addss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Aesdec,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesenc,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Aesimc,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
             Add(X86Instruction.And,        new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp,      0x00000023, InstructionFlags.None));
             Add(X86Instruction.Andnpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
             Add(X86Instruction.Andnps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex));

+ 5 - 0
ARMeilleure/CodeGen/X86/IntrinsicTable.cs

@@ -17,6 +17,11 @@ namespace ARMeilleure.CodeGen.X86
             Add(Intrinsic.X86Addps,      new IntrinsicInfo(X86Instruction.Addps,      IntrinsicType.Binary));
             Add(Intrinsic.X86Addsd,      new IntrinsicInfo(X86Instruction.Addsd,      IntrinsicType.Binary));
             Add(Intrinsic.X86Addss,      new IntrinsicInfo(X86Instruction.Addss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesdec,     new IntrinsicInfo(X86Instruction.Aesdec,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesenc,     new IntrinsicInfo(X86Instruction.Aesenc,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
+            Add(Intrinsic.X86Aesimc,     new IntrinsicInfo(X86Instruction.Aesimc,     IntrinsicType.Unary));
             Add(Intrinsic.X86Andnpd,     new IntrinsicInfo(X86Instruction.Andnpd,     IntrinsicType.Binary));
             Add(Intrinsic.X86Andnps,     new IntrinsicInfo(X86Instruction.Andnps,     IntrinsicType.Binary));
             Add(Intrinsic.X86Andpd,      new IntrinsicInfo(X86Instruction.Andpd,      IntrinsicType.Binary));

+ 5 - 0
ARMeilleure/CodeGen/X86/X86Instruction.cs

@@ -7,6 +7,11 @@ namespace ARMeilleure.CodeGen.X86
         Addps,
         Addsd,
         Addss,
+        Aesdec,
+        Aesdeclast,
+        Aesenc,
+        Aesenclast,
+        Aesimc,
         And,
         Andnpd,
         Andnps,

+ 50 - 4
ARMeilleure/Instructions/InstEmitSimdCrypto.cs

@@ -15,7 +15,17 @@ namespace ARMeilleure.Instructions
             Operand d = GetVec(op.Rd);
             Operand n = GetVec(op.Rn);
 
-            context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+            }
+            else
+            {
+                res = context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n);
+            }
+
+            context.Copy(d, res);
         }
 
         public static void Aese_V(ArmEmitterContext context)
@@ -25,7 +35,17 @@ namespace ARMeilleure.Instructions
             Operand d = GetVec(op.Rd);
             Operand n = GetVec(op.Rn);
 
-            context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+            }
+            else
+            {
+                res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n);
+            }
+
+            context.Copy(d, res);
         }
 
         public static void Aesimc_V(ArmEmitterContext context)
@@ -34,7 +54,17 @@ namespace ARMeilleure.Instructions
 
             Operand n = GetVec(op.Rn);
 
-            context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+            }
+            else
+            {
+                res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
         }
 
         public static void Aesmc_V(ArmEmitterContext context)
@@ -43,7 +73,23 @@ namespace ARMeilleure.Instructions
 
             Operand n = GetVec(op.Rn);
 
-            context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                Operand roundKey = context.VectorZero();
+
+                // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens
+                res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+                // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens
+                res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+            }
+            else
+            {
+                res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
         }
     }
 }

+ 50 - 4
ARMeilleure/Instructions/InstEmitSimdCrypto32.cs

@@ -15,7 +15,17 @@ namespace ARMeilleure.Instructions
             Operand d = GetVecA32(op.Qd);
             Operand n = GetVecA32(op.Qm);
 
-            context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+            }
+            else
+            {
+                res = context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n);
+            }
+
+            context.Copy(d, res);
         }
 
         public static void Aese_V(ArmEmitterContext context)
@@ -25,7 +35,17 @@ namespace ARMeilleure.Instructions
             Operand d = GetVecA32(op.Qd);
             Operand n = GetVecA32(op.Qm);
 
-            context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+            }
+            else
+            {
+                res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n);
+            }
+
+            context.Copy(d, res);
         }
 
         public static void Aesimc_V(ArmEmitterContext context)
@@ -34,7 +54,17 @@ namespace ARMeilleure.Instructions
 
             Operand n = GetVecA32(op.Qm);
 
-            context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+            }
+            else
+            {
+                res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
         }
 
         public static void Aesmc_V(ArmEmitterContext context)
@@ -43,7 +73,23 @@ namespace ARMeilleure.Instructions
 
             Operand n = GetVecA32(op.Qm);
 
-            context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
+            Operand res;
+            if (Optimizations.UseAesni)
+            {
+                Operand roundKey = context.VectorZero();
+
+                // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
+                res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+                // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
+                res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+            }
+            else
+            {
+                res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
         }
     }
 }

+ 5 - 0
ARMeilleure/IntermediateRepresentation/Intrinsic.cs

@@ -6,6 +6,11 @@ namespace ARMeilleure.IntermediateRepresentation
         X86Addps,
         X86Addsd,
         X86Addss,
+        X86Aesdec,
+        X86Aesdeclast,
+        X86Aesenc,
+        X86Aesenclast,
+        X86Aesimc,
         X86Andnpd,
         X86Andnps,
         X86Andpd,

+ 2 - 0
ARMeilleure/Optimizations.cs

@@ -16,6 +16,7 @@ namespace ARMeilleure
         public static bool UseSse42IfAvailable  { get; set; } = true;
         public static bool UsePopCntIfAvailable { get; set; } = true;
         public static bool UseAvxIfAvailable    { get; set; } = true;
+        public static bool UseAesniIfAvailable  { get; set; } = true;
 
         public static bool ForceLegacySse
         {
@@ -31,5 +32,6 @@ namespace ARMeilleure
         internal static bool UseSse42  => UseSse42IfAvailable  && HardwareCapabilities.SupportsSse42;
         internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
         internal static bool UseAvx    => UseAvxIfAvailable    && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
+        internal static bool UseAesni  => UseAesniIfAvailable  && HardwareCapabilities.SupportsAesni;
     }
 }