InstEmitSimdShift.cs 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.IntermediateRepresentation;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using static ARMeilleure.Instructions.InstEmitHelper;
  7. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  9. namespace ARMeilleure.Instructions
  10. {
  11. using Func2I = Func<Operand, Operand, Operand>;
  12. static partial class InstEmit
  13. {
  14. #region "Masks"
  15. private static readonly long[] _masks_RshrnShrn = new long[]
  16. {
  17. 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
  18. 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
  19. 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
  20. };
  21. #endregion
  22. public static void Rshrn_V(ArmEmitterContext context)
  23. {
  24. if (Optimizations.UseSsse3)
  25. {
  26. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  27. int shift = GetImmShr(op);
  28. long roundConst = 1L << (shift - 1);
  29. Operand d = GetVec(op.Rd);
  30. Operand n = GetVec(op.Rn);
  31. Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
  32. Operand mask = null;
  33. switch (op.Size + 1)
  34. {
  35. case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
  36. case 2: mask = X86GetAllElements(context, (int)roundConst); break;
  37. case 3: mask = X86GetAllElements(context, roundConst); break;
  38. }
  39. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  40. Operand res = context.AddIntrinsic(addInst, n, mask);
  41. Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
  42. res = context.AddIntrinsic(srlInst, res, Const(shift));
  43. Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
  44. res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
  45. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  46. ? Intrinsic.X86Movlhps
  47. : Intrinsic.X86Movhlps;
  48. res = context.AddIntrinsic(movInst, dLow, res);
  49. context.Copy(GetVec(op.Rd), res);
  50. }
  51. else
  52. {
  53. EmitVectorShrImmNarrowOpZx(context, round: true);
  54. }
  55. }
  56. public static void Shl_S(ArmEmitterContext context)
  57. {
  58. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  59. int shift = GetImmShl(op);
  60. EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
  61. }
  62. public static void Shl_V(ArmEmitterContext context)
  63. {
  64. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  65. int shift = GetImmShl(op);
  66. if (Optimizations.UseSse2 && op.Size > 0)
  67. {
  68. Operand n = GetVec(op.Rn);
  69. Intrinsic sllInst = X86PsllInstruction[op.Size];
  70. Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
  71. if (op.RegisterSize == RegisterSize.Simd64)
  72. {
  73. res = context.VectorZeroUpper64(res);
  74. }
  75. context.Copy(GetVec(op.Rd), res);
  76. }
  77. else
  78. {
  79. EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
  80. }
  81. }
  82. public static void Shll_V(ArmEmitterContext context)
  83. {
  84. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  85. int shift = 8 << op.Size;
  86. if (Optimizations.UseSse41)
  87. {
  88. Operand n = GetVec(op.Rn);
  89. if (op.RegisterSize == RegisterSize.Simd128)
  90. {
  91. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  92. }
  93. Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
  94. Operand res = context.AddIntrinsic(movsxInst, n);
  95. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  96. res = context.AddIntrinsic(sllInst, res, Const(shift));
  97. context.Copy(GetVec(op.Rd), res);
  98. }
  99. else
  100. {
  101. EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  102. }
  103. }
  104. public static void Shrn_V(ArmEmitterContext context)
  105. {
  106. if (Optimizations.UseSsse3)
  107. {
  108. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  109. int shift = GetImmShr(op);
  110. long roundConst = 1L << (shift - 1);
  111. Operand d = GetVec(op.Rd);
  112. Operand n = GetVec(op.Rn);
  113. Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
  114. Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
  115. Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
  116. Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
  117. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
  118. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  119. ? Intrinsic.X86Movlhps
  120. : Intrinsic.X86Movhlps;
  121. res = context.AddIntrinsic(movInst, dLow, res);
  122. context.Copy(GetVec(op.Rd), res);
  123. }
  124. else
  125. {
  126. EmitVectorShrImmNarrowOpZx(context, round: false);
  127. }
  128. }
  129. public static void Sli_V(ArmEmitterContext context)
  130. {
  131. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  132. Operand res = context.VectorZero();
  133. int elems = op.GetBytesCount() >> op.Size;
  134. int shift = GetImmShl(op);
  135. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
  136. for (int index = 0; index < elems; index++)
  137. {
  138. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  139. Operand neShifted = context.ShiftLeft(ne, Const(shift));
  140. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  141. Operand deMasked = context.BitwiseAnd(de, Const(mask));
  142. Operand e = context.BitwiseOr(neShifted, deMasked);
  143. res = EmitVectorInsert(context, res, e, index, op.Size);
  144. }
  145. context.Copy(GetVec(op.Rd), res);
  146. }
  147. public static void Sqrshl_V(ArmEmitterContext context)
  148. {
  149. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  150. Operand res = context.VectorZero();
  151. int elems = op.GetBytesCount() >> op.Size;
  152. for (int index = 0; index < elems; index++)
  153. {
  154. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  155. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  156. Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
  157. res = EmitVectorInsert(context, res, e, index, op.Size);
  158. }
  159. context.Copy(GetVec(op.Rd), res);
  160. }
  161. public static void Sqrshrn_S(ArmEmitterContext context)
  162. {
  163. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  164. }
  165. public static void Sqrshrn_V(ArmEmitterContext context)
  166. {
  167. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  168. }
  169. public static void Sqrshrun_S(ArmEmitterContext context)
  170. {
  171. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  172. }
  173. public static void Sqrshrun_V(ArmEmitterContext context)
  174. {
  175. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  176. }
  177. public static void Sqshl_V(ArmEmitterContext context)
  178. {
  179. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  180. Operand res = context.VectorZero();
  181. int elems = op.GetBytesCount() >> op.Size;
  182. for (int index = 0; index < elems; index++)
  183. {
  184. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  185. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  186. Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
  187. res = EmitVectorInsert(context, res, e, index, op.Size);
  188. }
  189. context.Copy(GetVec(op.Rd), res);
  190. }
  191. public static void Sqshrn_S(ArmEmitterContext context)
  192. {
  193. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  194. }
  195. public static void Sqshrn_V(ArmEmitterContext context)
  196. {
  197. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  198. }
  199. public static void Sqshrun_S(ArmEmitterContext context)
  200. {
  201. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  202. }
  203. public static void Sqshrun_V(ArmEmitterContext context)
  204. {
  205. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  206. }
  207. public static void Srshl_V(ArmEmitterContext context)
  208. {
  209. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  210. Operand res = context.VectorZero();
  211. int elems = op.GetBytesCount() >> op.Size;
  212. for (int index = 0; index < elems; index++)
  213. {
  214. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  215. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  216. Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(1), Const(op.Size));
  217. res = EmitVectorInsert(context, res, e, index, op.Size);
  218. }
  219. context.Copy(GetVec(op.Rd), res);
  220. }
  221. public static void Srshr_S(ArmEmitterContext context)
  222. {
  223. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  224. }
  225. public static void Srshr_V(ArmEmitterContext context)
  226. {
  227. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  228. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  229. {
  230. int shift = GetImmShr(op);
  231. int eSize = 8 << op.Size;
  232. Operand n = GetVec(op.Rn);
  233. Intrinsic sllInst = X86PsllInstruction[op.Size];
  234. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  235. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  236. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  237. Intrinsic sraInst = X86PsraInstruction[op.Size];
  238. Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
  239. Intrinsic addInst = X86PaddInstruction[op.Size];
  240. res = context.AddIntrinsic(addInst, res, nSra);
  241. if (op.RegisterSize == RegisterSize.Simd64)
  242. {
  243. res = context.VectorZeroUpper64(res);
  244. }
  245. context.Copy(GetVec(op.Rd), res);
  246. }
  247. else
  248. {
  249. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  250. }
  251. }
  252. public static void Srsra_S(ArmEmitterContext context)
  253. {
  254. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  255. }
  256. public static void Srsra_V(ArmEmitterContext context)
  257. {
  258. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  259. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  260. {
  261. int shift = GetImmShr(op);
  262. int eSize = 8 << op.Size;
  263. Operand d = GetVec(op.Rd);
  264. Operand n = GetVec(op.Rn);
  265. Intrinsic sllInst = X86PsllInstruction[op.Size];
  266. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  267. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  268. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  269. Intrinsic sraInst = X86PsraInstruction[op.Size];
  270. Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
  271. Intrinsic addInst = X86PaddInstruction[op.Size];
  272. res = context.AddIntrinsic(addInst, res, nSra);
  273. res = context.AddIntrinsic(addInst, res, d);
  274. if (op.RegisterSize == RegisterSize.Simd64)
  275. {
  276. res = context.VectorZeroUpper64(res);
  277. }
  278. context.Copy(GetVec(op.Rd), res);
  279. }
  280. else
  281. {
  282. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  283. }
  284. }
  285. public static void Sshl_V(ArmEmitterContext context)
  286. {
  287. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  288. Operand res = context.VectorZero();
  289. int elems = op.GetBytesCount() >> op.Size;
  290. for (int index = 0; index < elems; index++)
  291. {
  292. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  293. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  294. Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(0), Const(op.Size));
  295. res = EmitVectorInsert(context, res, e, index, op.Size);
  296. }
  297. context.Copy(GetVec(op.Rd), res);
  298. }
  299. public static void Sshll_V(ArmEmitterContext context)
  300. {
  301. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  302. int shift = GetImmShl(op);
  303. if (Optimizations.UseSse41)
  304. {
  305. Operand n = GetVec(op.Rn);
  306. if (op.RegisterSize == RegisterSize.Simd128)
  307. {
  308. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  309. }
  310. Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
  311. Operand res = context.AddIntrinsic(movsxInst, n);
  312. if (shift != 0)
  313. {
  314. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  315. res = context.AddIntrinsic(sllInst, res, Const(shift));
  316. }
  317. context.Copy(GetVec(op.Rd), res);
  318. }
  319. else
  320. {
  321. EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  322. }
  323. }
  324. public static void Sshr_S(ArmEmitterContext context)
  325. {
  326. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  327. }
  328. public static void Sshr_V(ArmEmitterContext context)
  329. {
  330. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  331. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  332. {
  333. int shift = GetImmShr(op);
  334. Operand n = GetVec(op.Rn);
  335. Intrinsic sraInst = X86PsraInstruction[op.Size];
  336. Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
  337. if (op.RegisterSize == RegisterSize.Simd64)
  338. {
  339. res = context.VectorZeroUpper64(res);
  340. }
  341. context.Copy(GetVec(op.Rd), res);
  342. }
  343. else
  344. {
  345. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  346. }
  347. }
  348. public static void Ssra_S(ArmEmitterContext context)
  349. {
  350. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  351. }
  352. public static void Ssra_V(ArmEmitterContext context)
  353. {
  354. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  355. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  356. {
  357. int shift = GetImmShr(op);
  358. Operand d = GetVec(op.Rd);
  359. Operand n = GetVec(op.Rn);
  360. Intrinsic sraInst = X86PsraInstruction[op.Size];
  361. Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
  362. Intrinsic addInst = X86PaddInstruction[op.Size];
  363. res = context.AddIntrinsic(addInst, res, d);
  364. if (op.RegisterSize == RegisterSize.Simd64)
  365. {
  366. res = context.VectorZeroUpper64(res);
  367. }
  368. context.Copy(d, res);
  369. }
  370. else
  371. {
  372. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  373. }
  374. }
  375. public static void Uqrshl_V(ArmEmitterContext context)
  376. {
  377. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  378. Operand res = context.VectorZero();
  379. int elems = op.GetBytesCount() >> op.Size;
  380. for (int index = 0; index < elems; index++)
  381. {
  382. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  383. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  384. Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
  385. res = EmitVectorInsert(context, res, e, index, op.Size);
  386. }
  387. context.Copy(GetVec(op.Rd), res);
  388. }
  389. public static void Uqrshrn_S(ArmEmitterContext context)
  390. {
  391. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  392. }
  393. public static void Uqrshrn_V(ArmEmitterContext context)
  394. {
  395. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  396. }
  397. public static void Uqshl_V(ArmEmitterContext context)
  398. {
  399. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  400. Operand res = context.VectorZero();
  401. int elems = op.GetBytesCount() >> op.Size;
  402. for (int index = 0; index < elems; index++)
  403. {
  404. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  405. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  406. Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
  407. res = EmitVectorInsert(context, res, e, index, op.Size);
  408. }
  409. context.Copy(GetVec(op.Rd), res);
  410. }
  411. public static void Uqshrn_S(ArmEmitterContext context)
  412. {
  413. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  414. }
  415. public static void Uqshrn_V(ArmEmitterContext context)
  416. {
  417. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  418. }
  419. public static void Urshl_V(ArmEmitterContext context)
  420. {
  421. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  422. Operand res = context.VectorZero();
  423. int elems = op.GetBytesCount() >> op.Size;
  424. for (int index = 0; index < elems; index++)
  425. {
  426. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  427. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  428. Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(1), Const(op.Size));
  429. res = EmitVectorInsert(context, res, e, index, op.Size);
  430. }
  431. context.Copy(GetVec(op.Rd), res);
  432. }
  433. public static void Urshr_S(ArmEmitterContext context)
  434. {
  435. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  436. }
  437. public static void Urshr_V(ArmEmitterContext context)
  438. {
  439. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  440. if (Optimizations.UseSse2 && op.Size > 0)
  441. {
  442. int shift = GetImmShr(op);
  443. int eSize = 8 << op.Size;
  444. Operand n = GetVec(op.Rn);
  445. Intrinsic sllInst = X86PsllInstruction[op.Size];
  446. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  447. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  448. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  449. Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
  450. Intrinsic addInst = X86PaddInstruction[op.Size];
  451. res = context.AddIntrinsic(addInst, res, nSrl);
  452. if (op.RegisterSize == RegisterSize.Simd64)
  453. {
  454. res = context.VectorZeroUpper64(res);
  455. }
  456. context.Copy(GetVec(op.Rd), res);
  457. }
  458. else
  459. {
  460. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  461. }
  462. }
  463. public static void Ursra_S(ArmEmitterContext context)
  464. {
  465. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  466. }
  467. public static void Ursra_V(ArmEmitterContext context)
  468. {
  469. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  470. if (Optimizations.UseSse2 && op.Size > 0)
  471. {
  472. int shift = GetImmShr(op);
  473. int eSize = 8 << op.Size;
  474. Operand d = GetVec(op.Rd);
  475. Operand n = GetVec(op.Rn);
  476. Intrinsic sllInst = X86PsllInstruction[op.Size];
  477. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  478. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  479. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  480. Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
  481. Intrinsic addInst = X86PaddInstruction[op.Size];
  482. res = context.AddIntrinsic(addInst, res, nSrl);
  483. res = context.AddIntrinsic(addInst, res, d);
  484. if (op.RegisterSize == RegisterSize.Simd64)
  485. {
  486. res = context.VectorZeroUpper64(res);
  487. }
  488. context.Copy(GetVec(op.Rd), res);
  489. }
  490. else
  491. {
  492. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  493. }
  494. }
  495. public static void Ushl_V(ArmEmitterContext context)
  496. {
  497. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  498. Operand res = context.VectorZero();
  499. int elems = op.GetBytesCount() >> op.Size;
  500. for (int index = 0; index < elems; index++)
  501. {
  502. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  503. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  504. Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size));
  505. res = EmitVectorInsert(context, res, e, index, op.Size);
  506. }
  507. context.Copy(GetVec(op.Rd), res);
  508. }
  509. public static void Ushll_V(ArmEmitterContext context)
  510. {
  511. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  512. int shift = GetImmShl(op);
  513. if (Optimizations.UseSse41)
  514. {
  515. Operand n = GetVec(op.Rn);
  516. if (op.RegisterSize == RegisterSize.Simd128)
  517. {
  518. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  519. }
  520. Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
  521. Operand res = context.AddIntrinsic(movzxInst, n);
  522. if (shift != 0)
  523. {
  524. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  525. res = context.AddIntrinsic(sllInst, res, Const(shift));
  526. }
  527. context.Copy(GetVec(op.Rd), res);
  528. }
  529. else
  530. {
  531. EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  532. }
  533. }
  534. public static void Ushr_S(ArmEmitterContext context)
  535. {
  536. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  537. }
  538. public static void Ushr_V(ArmEmitterContext context)
  539. {
  540. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  541. if (Optimizations.UseSse2 && op.Size > 0)
  542. {
  543. int shift = GetImmShr(op);
  544. Operand n = GetVec(op.Rn);
  545. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  546. Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
  547. if (op.RegisterSize == RegisterSize.Simd64)
  548. {
  549. res = context.VectorZeroUpper64(res);
  550. }
  551. context.Copy(GetVec(op.Rd), res);
  552. }
  553. else
  554. {
  555. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  556. }
  557. }
  558. public static void Usra_S(ArmEmitterContext context)
  559. {
  560. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  561. }
  562. public static void Usra_V(ArmEmitterContext context)
  563. {
  564. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  565. if (Optimizations.UseSse2 && op.Size > 0)
  566. {
  567. int shift = GetImmShr(op);
  568. Operand d = GetVec(op.Rd);
  569. Operand n = GetVec(op.Rn);
  570. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  571. Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
  572. Intrinsic addInst = X86PaddInstruction[op.Size];
  573. res = context.AddIntrinsic(addInst, res, d);
  574. if (op.RegisterSize == RegisterSize.Simd64)
  575. {
  576. res = context.VectorZeroUpper64(res);
  577. }
  578. context.Copy(d, res);
  579. }
  580. else
  581. {
  582. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  583. }
  584. }
  585. [Flags]
  586. private enum ShrImmFlags
  587. {
  588. Scalar = 1 << 0,
  589. Signed = 1 << 1,
  590. Round = 1 << 2,
  591. Accumulate = 1 << 3,
  592. ScalarSx = Scalar | Signed,
  593. ScalarZx = Scalar,
  594. VectorSx = Signed,
  595. VectorZx = 0
  596. }
  597. private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
  598. {
  599. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  600. }
  601. private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
  602. {
  603. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  604. }
  605. private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
  606. {
  607. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  608. }
  609. private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
  610. {
  611. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  612. }
  613. private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
  614. {
  615. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  616. Operand res = context.VectorZero();
  617. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  618. bool signed = (flags & ShrImmFlags.Signed) != 0;
  619. bool round = (flags & ShrImmFlags.Round) != 0;
  620. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  621. int shift = GetImmShr(op);
  622. long roundConst = 1L << (shift - 1);
  623. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  624. for (int index = 0; index < elems; index++)
  625. {
  626. Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  627. if (op.Size <= 2)
  628. {
  629. if (round)
  630. {
  631. e = context.Add(e, Const(roundConst));
  632. }
  633. e = signed
  634. ? context.ShiftRightSI(e, Const(shift))
  635. : context.ShiftRightUI(e, Const(shift));
  636. }
  637. else /* if (op.Size == 3) */
  638. {
  639. e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
  640. }
  641. if (accumulate)
  642. {
  643. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  644. e = context.Add(e, de);
  645. }
  646. res = EmitVectorInsert(context, res, e, index, op.Size);
  647. }
  648. context.Copy(GetVec(op.Rd), res);
  649. }
  650. private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
  651. {
  652. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  653. int shift = GetImmShr(op);
  654. long roundConst = 1L << (shift - 1);
  655. int elems = 8 >> op.Size;
  656. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  657. Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
  658. for (int index = 0; index < elems; index++)
  659. {
  660. Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  661. if (round)
  662. {
  663. e = context.Add(e, Const(roundConst));
  664. }
  665. e = context.ShiftRightUI(e, Const(shift));
  666. res = EmitVectorInsert(context, res, e, part + index, op.Size);
  667. }
  668. context.Copy(GetVec(op.Rd), res);
  669. }
  670. [Flags]
  671. private enum ShrImmSaturatingNarrowFlags
  672. {
  673. Scalar = 1 << 0,
  674. SignedSrc = 1 << 1,
  675. SignedDst = 1 << 2,
  676. Round = 1 << 3,
  677. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  678. ScalarSxZx = Scalar | SignedSrc,
  679. ScalarZxZx = Scalar,
  680. VectorSxSx = SignedSrc | SignedDst,
  681. VectorSxZx = SignedSrc,
  682. VectorZxZx = 0
  683. }
  684. private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
  685. {
  686. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  687. }
  688. private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
  689. {
  690. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  691. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  692. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  693. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  694. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  695. int shift = GetImmShr(op);
  696. long roundConst = 1L << (shift - 1);
  697. int elems = !scalar ? 8 >> op.Size : 1;
  698. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  699. Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
  700. for (int index = 0; index < elems; index++)
  701. {
  702. Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  703. if (op.Size <= 1 || !round)
  704. {
  705. if (round)
  706. {
  707. e = context.Add(e, Const(roundConst));
  708. }
  709. e = signedSrc
  710. ? context.ShiftRightSI(e, Const(shift))
  711. : context.ShiftRightUI(e, Const(shift));
  712. }
  713. else /* if (op.Size == 2 && round) */
  714. {
  715. e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
  716. }
  717. e = EmitSatQ(context, e, op.Size, signedSrc, signedDst);
  718. res = EmitVectorInsert(context, res, e, part + index, op.Size);
  719. }
  720. context.Copy(GetVec(op.Rd), res);
  721. }
  722. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  723. private static Operand EmitShrImm64(
  724. ArmEmitterContext context,
  725. Operand value,
  726. bool signed,
  727. long roundConst,
  728. int shift)
  729. {
  730. Delegate dlg = signed
  731. ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64)
  732. : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64);
  733. return context.Call(dlg, value, Const(roundConst), Const(shift));
  734. }
  735. private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
  736. {
  737. EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
  738. }
  739. private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
  740. {
  741. EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
  742. }
  743. private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
  744. {
  745. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  746. Operand res = context.VectorZero();
  747. int elems = 8 >> op.Size;
  748. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  749. for (int index = 0; index < elems; index++)
  750. {
  751. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  752. res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
  753. }
  754. context.Copy(GetVec(op.Rd), res);
  755. }
  756. }
  757. }