InstEmitHashHelper.cs 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. // https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.Translation;
  4. using System;
  5. using System.Diagnostics;
  6. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  7. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  8. namespace ARMeilleure.Instructions
  9. {
  10. static class InstEmitHashHelper
  11. {
  12. public const uint Crc32RevPoly = 0xedb88320;
  13. public const uint Crc32cRevPoly = 0x82f63b78;
  14. public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli)
  15. {
  16. Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger());
  17. Debug.Assert(size >= 0 && size < 4);
  18. Debug.Assert((size < 3) || (value.Type == OperandType.I64));
  19. if (castagnoli && Optimizations.UseSse42)
  20. {
  21. // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers.
  22. value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value;
  23. crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc;
  24. Intrinsic op = size switch
  25. {
  26. 0 => Intrinsic.X86Crc32_8,
  27. 1 => Intrinsic.X86Crc32_16,
  28. _ => Intrinsic.X86Crc32,
  29. };
  30. return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value);
  31. }
  32. else if (Optimizations.UsePclmulqdq)
  33. {
  34. return size switch
  35. {
  36. 3 => EmitCrc32Optimized64(context, crc, value, castagnoli),
  37. _ => EmitCrc32Optimized(context, crc, value, castagnoli, size),
  38. };
  39. }
  40. else
  41. {
  42. string name = (size, castagnoli) switch
  43. {
  44. (0, false) => nameof(SoftFallback.Crc32b),
  45. (1, false) => nameof(SoftFallback.Crc32h),
  46. (2, false) => nameof(SoftFallback.Crc32w),
  47. (3, false) => nameof(SoftFallback.Crc32x),
  48. (0, true) => nameof(SoftFallback.Crc32cb),
  49. (1, true) => nameof(SoftFallback.Crc32ch),
  50. (2, true) => nameof(SoftFallback.Crc32cw),
  51. (3, true) => nameof(SoftFallback.Crc32cx),
  52. _ => throw new ArgumentOutOfRangeException(nameof(size))
  53. };
  54. return context.Call(typeof(SoftFallback).GetMethod(name), crc, value);
  55. }
  56. }
  57. private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size)
  58. {
  59. long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
  60. long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
  61. crc = context.VectorInsert(context.VectorZero(), crc, 0);
  62. switch (size)
  63. {
  64. case 0: data = context.VectorInsert8(context.VectorZero(), data, 0); break;
  65. case 1: data = context.VectorInsert16(context.VectorZero(), data, 0); break;
  66. case 2: data = context.VectorInsert(context.VectorZero(), data, 0); break;
  67. }
  68. int bitsize = 8 << size;
  69. Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
  70. tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
  71. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
  72. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
  73. if (bitsize < 32)
  74. {
  75. crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
  76. tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
  77. }
  78. return context.VectorExtract(OperandType.I32, tmp, 2);
  79. }
  80. private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli)
  81. {
  82. long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
  83. long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
  84. crc = context.VectorInsert(context.VectorZero(), crc, 0);
  85. data = context.VectorInsert(context.VectorZero(), data, 0);
  86. Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
  87. Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
  88. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
  89. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
  90. tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
  91. tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
  92. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
  93. tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
  94. return context.VectorExtract(OperandType.I32, tmp, 2);
  95. }
  96. }
  97. }