Bladeren bron

Add AESD, AESE, AESIMC, AESMC instructions; add 4 simple Tests (closed box). (#365)

* Create CpuTestSimdCrypto.cs

* Update AOpCodeTable.cs

* Create AInstEmitSimdCrypto.cs

* Update ASoftFallback.cs

* Create ACryptoHelper.cs
LDj3SNuD 7 jaren geleden
bovenliggende
commit
d021d5dfa9

+ 4 - 0
ChocolArm64/AOpCodeTable.cs

@@ -180,6 +180,10 @@ namespace ChocolArm64
             SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", AInstEmit.Addp_V,        typeof(AOpCodeSimdReg));
             SetA64("000011100x110001101110xxxxxxxxxx", AInstEmit.Addv_V,        typeof(AOpCodeSimd));
             SetA64("01001110<<110001101110xxxxxxxxxx", AInstEmit.Addv_V,        typeof(AOpCodeSimd));
+            SetA64("0100111000101000010110xxxxxxxxxx", AInstEmit.Aesd_V,        typeof(AOpCodeSimd));
+            SetA64("0100111000101000010010xxxxxxxxxx", AInstEmit.Aese_V,        typeof(AOpCodeSimd));
+            SetA64("0100111000101000011110xxxxxxxxxx", AInstEmit.Aesimc_V,      typeof(AOpCodeSimd));
+            SetA64("0100111000101000011010xxxxxxxxxx", AInstEmit.Aesmc_V,       typeof(AOpCodeSimd));
             SetA64("0x001110001xxxxx000111xxxxxxxxxx", AInstEmit.And_V,         typeof(AOpCodeSimdReg));
             SetA64("0x001110011xxxxx000111xxxxxxxxxx", AInstEmit.Bic_V,         typeof(AOpCodeSimdReg));
             SetA64("0x10111100000xxx<<x101xxxxxxxxxx", AInstEmit.Bic_Vi,        typeof(AOpCodeSimdImm));

+ 328 - 0
ChocolArm64/Instruction/ACryptoHelper.cs

@@ -0,0 +1,328 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace ChocolArm64.Instruction
+{
+    static class ACryptoHelper
+    {
+#region "LookUp Tables"
+        private static byte[] SBox =
+        {
+            0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+            0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+            0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+            0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+            0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+            0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+            0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+            0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+            0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+            0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+            0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+            0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+            0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+            0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+            0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+            0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+        };
+
+        private static byte[] InvSBox =
+        {
+            0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+            0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+            0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+            0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+            0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+            0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+            0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+            0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+            0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+            0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+            0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+            0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+            0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+            0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+            0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+            0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+        };
+
+        private static byte[] GFMul_02 =
+        {
+            0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+            0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+            0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+            0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+            0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+            0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+            0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+            0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+            0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+            0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+            0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+            0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+            0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+            0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+            0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+            0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
+        };
+
+        private static byte[] GFMul_03 =
+        {
+            0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+            0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+            0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+            0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+            0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+            0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+            0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+            0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+            0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+            0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+            0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+            0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+            0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+            0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+            0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+            0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
+        };
+
+        private static byte[] GFMul_09 =
+        {
+            0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+            0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+            0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+            0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+            0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+            0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+            0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+            0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+            0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+            0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+            0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+            0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+            0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+            0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+            0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+            0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
+        };
+
+        private static byte[] GFMul_0B =
+        {
+            0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+            0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+            0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+            0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+            0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+            0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+            0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+            0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+            0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+            0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+            0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+            0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+            0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+            0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+            0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+            0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
+        };
+
+        private static byte[] GFMul_0D =
+        {
+            0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+            0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+            0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+            0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+            0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+            0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+            0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+            0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+            0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+            0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+            0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+            0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+            0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+            0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+            0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+            0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
+        };
+
+        private static byte[] GFMul_0E =
+        {
+            0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+            0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+            0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+            0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+            0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+            0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+            0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+            0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+            0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+            0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+            0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+            0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+            0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+            0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+            0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+            0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
+        };
+
+        private static byte[] SRPerm = { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 };
+
+        private static byte[] ISRPerm = { 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 };
+#endregion
+
+        public static Vector128<float> AESInvMixColumns(Vector128<float> op)
+        {
+            byte[] InState  = new byte[16];
+            byte[] OutState = new byte[16];
+
+            FromVectorToByteArray(InState, ref op);
+
+            for (int Columns = 0; Columns <= 3; Columns++)
+            {
+                int Idx = Columns << 2;
+
+                byte Row0 = InState[Idx + 0]; // A, E, I, M: [Row0, Col0-Col3]
+                byte Row1 = InState[Idx + 1]; // B, F, J, N: [Row1, Col0-Col3]
+                byte Row2 = InState[Idx + 2]; // C, G, K, O: [Row2, Col0-Col3]
+                byte Row3 = InState[Idx + 3]; // D, H, L, P: [Row3, Col0-Col3]
+
+                OutState[Idx + 0] = (byte)((uint)GFMul_0E[Row0] ^ GFMul_0B[Row1] ^ GFMul_0D[Row2] ^ GFMul_09[Row3]);
+                OutState[Idx + 1] = (byte)((uint)GFMul_09[Row0] ^ GFMul_0E[Row1] ^ GFMul_0B[Row2] ^ GFMul_0D[Row3]);
+                OutState[Idx + 2] = (byte)((uint)GFMul_0D[Row0] ^ GFMul_09[Row1] ^ GFMul_0E[Row2] ^ GFMul_0B[Row3]);
+                OutState[Idx + 3] = (byte)((uint)GFMul_0B[Row0] ^ GFMul_0D[Row1] ^ GFMul_09[Row2] ^ GFMul_0E[Row3]);
+            }
+
+            FromByteArrayToVector(OutState, ref op);
+
+            return op;
+        }
+
+        public static Vector128<float> AESInvShiftRows(Vector128<float> op)
+        {
+            byte[] InState  = new byte[16];
+            byte[] OutState = new byte[16];
+
+            FromVectorToByteArray(InState, ref op);
+
+            for (int Idx = 0; Idx <= 15; Idx++)
+            {
+                OutState[ISRPerm[Idx]] = InState[Idx];
+            }
+
+            FromByteArrayToVector(OutState, ref op);
+
+            return op;
+        }
+
+        public static Vector128<float> AESInvSubBytes(Vector128<float> op)
+        {
+            byte[] InState  = new byte[16];
+            byte[] OutState = new byte[16];
+
+            FromVectorToByteArray(InState, ref op);
+
+            for (int Idx = 0; Idx <= 15; Idx++)
+            {
+                OutState[Idx] = InvSBox[InState[Idx]];
+            }
+
+            FromByteArrayToVector(OutState, ref op);
+
+            return op;
+        }
+
+        public static Vector128<float> AESMixColumns(Vector128<float> op)
+        {
+            byte[] InState  = new byte[16];
+            byte[] OutState = new byte[16];
+
+            FromVectorToByteArray(InState, ref op);
+
+            for (int Columns = 0; Columns <= 3; Columns++)
+            {
+                int Idx = Columns << 2;
+
+                byte Row0 = InState[Idx + 0]; // A, E, I, M: [Row0, Col0-Col3]
+                byte Row1 = InState[Idx + 1]; // B, F, J, N: [Row1, Col0-Col3]
+                byte Row2 = InState[Idx + 2]; // C, G, K, O: [Row2, Col0-Col3]
+                byte Row3 = InState[Idx + 3]; // D, H, L, P: [Row3, Col0-Col3]
+
+                OutState[Idx + 0] = (byte)((uint)GFMul_02[Row0] ^ GFMul_03[Row1] ^ Row2 ^ Row3);
+                OutState[Idx + 1] = (byte)((uint)Row0 ^ GFMul_02[Row1] ^ GFMul_03[Row2] ^ Row3);
+                OutState[Idx + 2] = (byte)((uint)Row0 ^ Row1 ^ GFMul_02[Row2] ^ GFMul_03[Row3]);
+                OutState[Idx + 3] = (byte)((uint)GFMul_03[Row0] ^ Row1 ^ Row2 ^ GFMul_02[Row3]);
+            }
+
+            FromByteArrayToVector(OutState, ref op);
+
+            return op;
+        }
+
+        public static Vector128<float> AESShiftRows(Vector128<float> op)
+        {
+            byte[] InState  = new byte[16];
+            byte[] OutState = new byte[16];
+
+            FromVectorToByteArray(InState, ref op);
+
+            for (int Idx = 0; Idx <= 15; Idx++)
+            {
+                OutState[SRPerm[Idx]] = InState[Idx];
+            }
+
+            FromByteArrayToVector(OutState, ref op);
+
+            return op;
+        }
+
+        public static Vector128<float> AESSubBytes(Vector128<float> op)
+        {
+            byte[] InState  = new byte[16];
+            byte[] OutState = new byte[16];
+
+            FromVectorToByteArray(InState, ref op);
+
+            for (int Idx = 0; Idx <= 15; Idx++)
+            {
+                OutState[Idx] = SBox[InState[Idx]];
+            }
+
+            FromByteArrayToVector(OutState, ref op);
+
+            return op;
+        }
+
+        private static void FromVectorToByteArray(byte[] State, ref Vector128<float> op)
+        {
+            ulong ULongLow  = AVectorHelper.VectorExtractIntZx((op), (byte)0, 3);
+            ulong ULongHigh = AVectorHelper.VectorExtractIntZx((op), (byte)1, 3);
+
+            for (int Idx = 0; Idx <= 7; Idx++)
+            {
+                State[Idx + 0] = (byte)(ULongLow  & 0xFFUL);
+                State[Idx + 8] = (byte)(ULongHigh & 0xFFUL);
+
+                ULongLow  >>= 8;
+                ULongHigh >>= 8;
+            }
+        }
+
+        private static void FromByteArrayToVector(byte[] State, ref Vector128<float> op)
+        {
+            if (!Sse2.IsSupported)
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            op = Sse.StaticCast<byte, float>(Sse2.SetVector128(
+                State[15], State[14], State[13], State[12],
+                State[11], State[10], State[9],  State[8],
+                State[7],  State[6],  State[5],  State[4],
+                State[3],  State[2],  State[1],  State[0]));
+        }
+    }
+}

+ 54 - 0
ChocolArm64/Instruction/AInstEmitSimdCrypto.cs

@@ -0,0 +1,54 @@
+using ChocolArm64.Decoder;
+using ChocolArm64.Translation;
+
+namespace ChocolArm64.Instruction
+{
+    static partial class AInstEmit
+    {
+        public static void Aesd_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            Context.EmitLdvec(Op.Rd);
+            Context.EmitLdvec(Op.Rn);
+
+            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Decrypt));
+
+            Context.EmitStvec(Op.Rd);
+        }
+
+        public static void Aese_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            Context.EmitLdvec(Op.Rd);
+            Context.EmitLdvec(Op.Rn);
+
+            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Encrypt));
+
+            Context.EmitStvec(Op.Rd);
+        }
+
+        public static void Aesimc_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            Context.EmitLdvec(Op.Rn);
+
+            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.InverseMixColumns));
+
+            Context.EmitStvec(Op.Rd);
+        }
+
+        public static void Aesmc_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            Context.EmitLdvec(Op.Rn);
+
+            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MixColumns));
+
+            Context.EmitStvec(Op.Rd);
+        }
+    }
+}

+ 36 - 0
ChocolArm64/Instruction/ASoftFallback.cs

@@ -410,6 +410,42 @@ namespace ChocolArm64.Instruction
         }
 #endregion
 
+#region "Aes"
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> Decrypt(Vector128<float> value, Vector128<float> roundKey)
+        {
+            if (!Sse.IsSupported)
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            return ACryptoHelper.AESInvSubBytes(ACryptoHelper.AESInvShiftRows(Sse.Xor(value, roundKey)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> Encrypt(Vector128<float> value, Vector128<float> roundKey)
+        {
+            if (!Sse.IsSupported)
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            return ACryptoHelper.AESSubBytes(ACryptoHelper.AESShiftRows(Sse.Xor(value, roundKey)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> InverseMixColumns(Vector128<float> value)
+        {
+            return ACryptoHelper.AESInvMixColumns(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> MixColumns(Vector128<float> value)
+        {
+            return ACryptoHelper.AESMixColumns(value);
+        }
+#endregion
+
 #region "Sha256"
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<float> HashLower(Vector128<float> hash_abcd, Vector128<float> hash_efgh, Vector128<float> wk)

+ 135 - 0
Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs

@@ -0,0 +1,135 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ChocolArm64.State;
+
+using NUnit.Framework;
+
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Tests.Cpu
+{
+    public class CpuTestSimdCrypto : CpuTest
+    {
+        [Test, Explicit, Description("AESD <Vd>.16B, <Vn>.16B")]
+        public void Aesd_V([Values(0u)] uint Rd,
+                           [Values(1u)] uint Rn,
+                           [Values(0x7B5B546573745665ul)] ulong ValueH,
+                           [Values(0x63746F725D53475Dul)] ulong ValueL,
+                           [Random(2)]                    ulong RoundKeyH,
+                           [Random(2)]                    ulong RoundKeyL,
+                           [Values(0x8DCAB9BC035006BCul)] ulong ResultH,
+                           [Values(0x8F57161E00CAFD8Dul)] ulong ResultL)
+        {
+            uint Opcode = 0x4E285800; // AESD V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Vector128<float> V0 = MakeVectorE0E1(RoundKeyL ^ ValueL, RoundKeyH ^ ValueH);
+            Vector128<float> V1 = MakeVectorE0E1(RoundKeyL,          RoundKeyH);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH));
+            });
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(RoundKeyL));
+                Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(RoundKeyH));
+            });
+        }
+
+        [Test, Explicit, Description("AESE <Vd>.16B, <Vn>.16B")]
+        public void Aese_V([Values(0u)] uint Rd,
+                           [Values(1u)] uint Rn,
+                           [Values(0x7B5B546573745665ul)] ulong ValueH,
+                           [Values(0x63746F725D53475Dul)] ulong ValueL,
+                           [Random(2)]                    ulong RoundKeyH,
+                           [Random(2)]                    ulong RoundKeyL,
+                           [Values(0x8F92A04DFBED204Dul)] ulong ResultH,
+                           [Values(0x4C39B1402192A84Cul)] ulong ResultL)
+        {
+            uint Opcode = 0x4E284800; // AESE V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Vector128<float> V0 = MakeVectorE0E1(RoundKeyL ^ ValueL, RoundKeyH ^ ValueH);
+            Vector128<float> V1 = MakeVectorE0E1(RoundKeyL,          RoundKeyH);
+
+            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH));
+            });
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(RoundKeyL));
+                Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(RoundKeyH));
+            });
+        }
+
+        [Test, Explicit, Description("AESIMC <Vd>.16B, <Vn>.16B")]
+        public void Aesimc_V([Values(0u)]     uint Rd,
+                             [Values(1u, 0u)] uint Rn,
+                             [Values(0x8DCAB9DC035006BCul)] ulong ValueH,
+                             [Values(0x8F57161E00CAFD8Dul)] ulong ValueL,
+                             [Values(0xD635A667928B5EAEul)] ulong ResultH,
+                             [Values(0xEEC9CC3BC55F5777ul)] ulong ResultL)
+        {
+            uint Opcode = 0x4E287800; // AESIMC V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Vector128<float> V = MakeVectorE0E1(ValueL, ValueH);
+
+            AThreadState ThreadState = SingleOpcode(
+                Opcode,
+                V0: Rn == 0u ? V : default(Vector128<float>),
+                V1: Rn == 1u ? V : default(Vector128<float>));
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH));
+            });
+            if (Rn == 1u)
+            {
+                Assert.Multiple(() =>
+                {
+                    Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(ValueL));
+                    Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(ValueH));
+                });
+            }
+        }
+
+        [Test, Explicit, Description("AESMC <Vd>.16B, <Vn>.16B")]
+        public void Aesmc_V([Values(0u)]     uint Rd,
+                            [Values(1u, 0u)] uint Rn,
+                            [Values(0x627A6F6644B109C8ul)] ulong ValueH,
+                            [Values(0x2B18330A81C3B3E5ul)] ulong ValueL,
+                            [Values(0x7B5B546573745665ul)] ulong ResultH,
+                            [Values(0x63746F725D53475Dul)] ulong ResultL)
+        {
+            uint Opcode = 0x4E286800; // AESMC V0.16B, V0.16B
+            Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+            Vector128<float> V = MakeVectorE0E1(ValueL, ValueH);
+
+            AThreadState ThreadState = SingleOpcode(
+                Opcode,
+                V0: Rn == 0u ? V : default(Vector128<float>),
+                V1: Rn == 1u ? V : default(Vector128<float>));
+
+            Assert.Multiple(() =>
+            {
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH));
+            });
+            if (Rn == 1u)
+            {
+                Assert.Multiple(() =>
+                {
+                    Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(ValueL));
+                    Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(ValueH));
+                });
+            }
+        }
+    }
+}