InstEmitHash.cs 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. // https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.IntermediateRepresentation;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using static ARMeilleure.Instructions.InstEmitHelper;
  7. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  9. namespace ARMeilleure.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void Crc32b(ArmEmitterContext context)
  14. {
  15. if (Optimizations.UsePclmulqdq)
  16. {
  17. EmitCrc32Optimized(context, false, 8);
  18. }
  19. else
  20. {
  21. EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32b));
  22. }
  23. }
  24. public static void Crc32h(ArmEmitterContext context)
  25. {
  26. if (Optimizations.UsePclmulqdq)
  27. {
  28. EmitCrc32Optimized(context, false, 16);
  29. }
  30. else
  31. {
  32. EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32h));
  33. }
  34. }
  35. public static void Crc32w(ArmEmitterContext context)
  36. {
  37. if (Optimizations.UsePclmulqdq)
  38. {
  39. EmitCrc32Optimized(context, false, 32);
  40. }
  41. else
  42. {
  43. EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32w));
  44. }
  45. }
  46. public static void Crc32x(ArmEmitterContext context)
  47. {
  48. if (Optimizations.UsePclmulqdq)
  49. {
  50. EmitCrc32Optimized64(context, false);
  51. }
  52. else
  53. {
  54. EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32x));
  55. }
  56. }
  57. public static void Crc32cb(ArmEmitterContext context)
  58. {
  59. if (Optimizations.UsePclmulqdq)
  60. {
  61. EmitCrc32Optimized(context, true, 8);
  62. }
  63. else
  64. {
  65. EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32cb));
  66. }
  67. }
  68. public static void Crc32ch(ArmEmitterContext context)
  69. {
  70. if (Optimizations.UsePclmulqdq)
  71. {
  72. EmitCrc32Optimized(context, true, 16);
  73. }
  74. else
  75. {
  76. EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32ch));
  77. }
  78. }
  79. public static void Crc32cw(ArmEmitterContext context)
  80. {
  81. if (Optimizations.UsePclmulqdq)
  82. {
  83. EmitCrc32Optimized(context, true, 32);
  84. }
  85. else
  86. {
  87. EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32cw));
  88. }
  89. }
  90. public static void Crc32cx(ArmEmitterContext context)
  91. {
  92. if (Optimizations.UsePclmulqdq)
  93. {
  94. EmitCrc32Optimized64(context, true);
  95. }
  96. else
  97. {
  98. EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32cx));
  99. }
  100. }
  101. private static void EmitCrc32Optimized(ArmEmitterContext context, bool castagnoli, int bitsize)
  102. {
  103. OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
  104. long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
  105. long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
  106. Operand crc = GetIntOrZR(context, op.Rn);
  107. Operand data = GetIntOrZR(context, op.Rm);
  108. crc = context.VectorInsert(context.VectorZero(), crc, 0);
  109. switch (bitsize)
  110. {
  111. case 8: data = context.VectorInsert8(context.VectorZero(), data, 0); break;
  112. case 16: data = context.VectorInsert16(context.VectorZero(), data, 0); break;
  113. case 32: data = context.VectorInsert(context.VectorZero(), data, 0); break;
  114. }
  115. Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
  116. tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
  117. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
  118. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
  119. if (bitsize < 32)
  120. {
  121. crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
  122. tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
  123. }
  124. SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2));
  125. }
  126. private static void EmitCrc32Optimized64(ArmEmitterContext context, bool castagnoli)
  127. {
  128. OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
  129. long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
  130. long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
  131. Operand crc = GetIntOrZR(context, op.Rn);
  132. Operand data = GetIntOrZR(context, op.Rm);
  133. crc = context.VectorInsert(context.VectorZero(), crc, 0);
  134. data = context.VectorInsert(context.VectorZero(), data, 0);
  135. Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
  136. Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
  137. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
  138. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
  139. tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
  140. tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
  141. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
  142. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
  143. SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2));
  144. }
  145. private static void EmitCrc32Call(ArmEmitterContext context, Delegate dlg)
  146. {
  147. OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
  148. Operand n = GetIntOrZR(context, op.Rn);
  149. Operand m = GetIntOrZR(context, op.Rm);
  150. Operand d = context.Call(dlg, n, m);
  151. SetIntOrZR(context, op.Rd, d);
  152. }
  153. }
  154. }