InstEmitSimdShift.cs 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.IntermediateRepresentation;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Reflection;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  11. namespace ARMeilleure.Instructions
  12. {
  13. using Func2I = Func<Operand, Operand, Operand>;
  14. static partial class InstEmit
  15. {
  16. #region "Masks"
  17. private static readonly long[] _masks_SliSri = new long[] // Replication masks.
  18. {
  19. 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L
  20. };
  21. #endregion
  22. public static void Rshrn_V(ArmEmitterContext context)
  23. {
  24. if (Optimizations.UseSsse3)
  25. {
  26. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  27. int shift = GetImmShr(op);
  28. long roundConst = 1L << (shift - 1);
  29. Operand d = GetVec(op.Rd);
  30. Operand n = GetVec(op.Rn);
  31. Operand dLow = context.VectorZeroUpper64(d);
  32. Operand mask = default;
  33. switch (op.Size + 1)
  34. {
  35. case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
  36. case 2: mask = X86GetAllElements(context, (int)roundConst); break;
  37. case 3: mask = X86GetAllElements(context, roundConst); break;
  38. }
  39. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  40. Operand res = context.AddIntrinsic(addInst, n, mask);
  41. Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
  42. res = context.AddIntrinsic(srlInst, res, Const(shift));
  43. Operand mask2 = X86GetAllElements(context, EvenMasks[op.Size]);
  44. res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
  45. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  46. ? Intrinsic.X86Movlhps
  47. : Intrinsic.X86Movhlps;
  48. res = context.AddIntrinsic(movInst, dLow, res);
  49. context.Copy(d, res);
  50. }
  51. else
  52. {
  53. EmitVectorShrImmNarrowOpZx(context, round: true);
  54. }
  55. }
  56. public static void Shl_S(ArmEmitterContext context)
  57. {
  58. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  59. int shift = GetImmShl(op);
  60. EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
  61. }
  62. public static void Shl_V(ArmEmitterContext context)
  63. {
  64. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  65. int shift = GetImmShl(op);
  66. int eSize = 8 << op.Size;
  67. if (shift >= eSize)
  68. {
  69. if ((op.RegisterSize == RegisterSize.Simd64))
  70. {
  71. Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
  72. context.Copy(GetVec(op.Rd), res);
  73. }
  74. }
  75. else if (Optimizations.UseGfni && op.Size == 0)
  76. {
  77. Operand n = GetVec(op.Rn);
  78. ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
  79. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  80. Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  81. if (op.RegisterSize == RegisterSize.Simd64)
  82. {
  83. res = context.VectorZeroUpper64(res);
  84. }
  85. context.Copy(GetVec(op.Rd), res);
  86. }
  87. else if (Optimizations.UseSse2 && op.Size > 0)
  88. {
  89. Operand n = GetVec(op.Rn);
  90. Intrinsic sllInst = X86PsllInstruction[op.Size];
  91. Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
  92. if (op.RegisterSize == RegisterSize.Simd64)
  93. {
  94. res = context.VectorZeroUpper64(res);
  95. }
  96. context.Copy(GetVec(op.Rd), res);
  97. }
  98. else
  99. {
  100. EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
  101. }
  102. }
  103. public static void Shll_V(ArmEmitterContext context)
  104. {
  105. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  106. int shift = 8 << op.Size;
  107. if (Optimizations.UseSse41)
  108. {
  109. Operand n = GetVec(op.Rn);
  110. if (op.RegisterSize == RegisterSize.Simd128)
  111. {
  112. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  113. }
  114. Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
  115. Operand res = context.AddIntrinsic(movsxInst, n);
  116. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  117. res = context.AddIntrinsic(sllInst, res, Const(shift));
  118. context.Copy(GetVec(op.Rd), res);
  119. }
  120. else
  121. {
  122. EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  123. }
  124. }
  125. public static void Shrn_V(ArmEmitterContext context)
  126. {
  127. if (Optimizations.UseSsse3)
  128. {
  129. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  130. int shift = GetImmShr(op);
  131. Operand d = GetVec(op.Rd);
  132. Operand n = GetVec(op.Rn);
  133. Operand dLow = context.VectorZeroUpper64(d);
  134. Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
  135. Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
  136. Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
  137. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
  138. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  139. ? Intrinsic.X86Movlhps
  140. : Intrinsic.X86Movhlps;
  141. res = context.AddIntrinsic(movInst, dLow, res);
  142. context.Copy(d, res);
  143. }
  144. else
  145. {
  146. EmitVectorShrImmNarrowOpZx(context, round: false);
  147. }
  148. }
  149. public static void Sli_S(ArmEmitterContext context)
  150. {
  151. EmitSli(context, scalar: true);
  152. }
  153. public static void Sli_V(ArmEmitterContext context)
  154. {
  155. EmitSli(context, scalar: false);
  156. }
  157. public static void Sqrshl_V(ArmEmitterContext context)
  158. {
  159. EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
  160. }
  161. public static void Sqrshrn_S(ArmEmitterContext context)
  162. {
  163. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  164. }
  165. public static void Sqrshrn_V(ArmEmitterContext context)
  166. {
  167. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  168. }
  169. public static void Sqrshrun_S(ArmEmitterContext context)
  170. {
  171. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  172. }
  173. public static void Sqrshrun_V(ArmEmitterContext context)
  174. {
  175. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  176. }
  177. public static void Sqshl_V(ArmEmitterContext context)
  178. {
  179. EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
  180. }
  181. public static void Sqshrn_S(ArmEmitterContext context)
  182. {
  183. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  184. }
  185. public static void Sqshrn_V(ArmEmitterContext context)
  186. {
  187. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  188. }
  189. public static void Sqshrun_S(ArmEmitterContext context)
  190. {
  191. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  192. }
  193. public static void Sqshrun_V(ArmEmitterContext context)
  194. {
  195. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  196. }
  197. public static void Sri_S(ArmEmitterContext context)
  198. {
  199. EmitSri(context, scalar: true);
  200. }
  201. public static void Sri_V(ArmEmitterContext context)
  202. {
  203. EmitSri(context, scalar: false);
  204. }
  205. public static void Srshl_V(ArmEmitterContext context)
  206. {
  207. EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
  208. }
  209. public static void Srshr_S(ArmEmitterContext context)
  210. {
  211. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  212. }
  213. public static void Srshr_V(ArmEmitterContext context)
  214. {
  215. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  216. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  217. {
  218. int shift = GetImmShr(op);
  219. int eSize = 8 << op.Size;
  220. Operand n = GetVec(op.Rn);
  221. Intrinsic sllInst = X86PsllInstruction[op.Size];
  222. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  223. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  224. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  225. Intrinsic sraInst = X86PsraInstruction[op.Size];
  226. Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
  227. Intrinsic addInst = X86PaddInstruction[op.Size];
  228. res = context.AddIntrinsic(addInst, res, nSra);
  229. if (op.RegisterSize == RegisterSize.Simd64)
  230. {
  231. res = context.VectorZeroUpper64(res);
  232. }
  233. context.Copy(GetVec(op.Rd), res);
  234. }
  235. else
  236. {
  237. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  238. }
  239. }
  240. public static void Srsra_S(ArmEmitterContext context)
  241. {
  242. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  243. }
  244. public static void Srsra_V(ArmEmitterContext context)
  245. {
  246. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  247. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  248. {
  249. int shift = GetImmShr(op);
  250. int eSize = 8 << op.Size;
  251. Operand d = GetVec(op.Rd);
  252. Operand n = GetVec(op.Rn);
  253. Intrinsic sllInst = X86PsllInstruction[op.Size];
  254. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  255. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  256. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  257. Intrinsic sraInst = X86PsraInstruction[op.Size];
  258. Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
  259. Intrinsic addInst = X86PaddInstruction[op.Size];
  260. res = context.AddIntrinsic(addInst, res, nSra);
  261. res = context.AddIntrinsic(addInst, res, d);
  262. if (op.RegisterSize == RegisterSize.Simd64)
  263. {
  264. res = context.VectorZeroUpper64(res);
  265. }
  266. context.Copy(d, res);
  267. }
  268. else
  269. {
  270. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  271. }
  272. }
  273. public static void Sshl_S(ArmEmitterContext context)
  274. {
  275. EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
  276. }
  277. public static void Sshl_V(ArmEmitterContext context)
  278. {
  279. EmitShlRegOp(context, ShlRegFlags.Signed);
  280. }
  281. public static void Sshll_V(ArmEmitterContext context)
  282. {
  283. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  284. int shift = GetImmShl(op);
  285. if (Optimizations.UseSse41)
  286. {
  287. Operand n = GetVec(op.Rn);
  288. if (op.RegisterSize == RegisterSize.Simd128)
  289. {
  290. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  291. }
  292. Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
  293. Operand res = context.AddIntrinsic(movsxInst, n);
  294. if (shift != 0)
  295. {
  296. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  297. res = context.AddIntrinsic(sllInst, res, Const(shift));
  298. }
  299. context.Copy(GetVec(op.Rd), res);
  300. }
  301. else
  302. {
  303. EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  304. }
  305. }
  306. public static void Sshr_S(ArmEmitterContext context)
  307. {
  308. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  309. }
  310. public static void Sshr_V(ArmEmitterContext context)
  311. {
  312. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  313. int shift = GetImmShr(op);
  314. if (Optimizations.UseGfni && op.Size == 0)
  315. {
  316. Operand n = GetVec(op.Rn);
  317. ulong bitMatrix;
  318. if (shift < 8)
  319. {
  320. bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
  321. // Extend sign-bit
  322. bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8);
  323. }
  324. else
  325. {
  326. // Replicate sign-bit into all bits
  327. bitMatrix = 0x8080808080808080UL;
  328. }
  329. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  330. Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  331. if (op.RegisterSize == RegisterSize.Simd64)
  332. {
  333. res = context.VectorZeroUpper64(res);
  334. }
  335. context.Copy(GetVec(op.Rd), res);
  336. }
  337. else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  338. {
  339. Operand n = GetVec(op.Rn);
  340. Intrinsic sraInst = X86PsraInstruction[op.Size];
  341. Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
  342. if (op.RegisterSize == RegisterSize.Simd64)
  343. {
  344. res = context.VectorZeroUpper64(res);
  345. }
  346. context.Copy(GetVec(op.Rd), res);
  347. }
  348. else
  349. {
  350. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  351. }
  352. }
  353. public static void Ssra_S(ArmEmitterContext context)
  354. {
  355. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  356. }
  357. public static void Ssra_V(ArmEmitterContext context)
  358. {
  359. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  360. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  361. {
  362. int shift = GetImmShr(op);
  363. Operand d = GetVec(op.Rd);
  364. Operand n = GetVec(op.Rn);
  365. Intrinsic sraInst = X86PsraInstruction[op.Size];
  366. Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
  367. Intrinsic addInst = X86PaddInstruction[op.Size];
  368. res = context.AddIntrinsic(addInst, res, d);
  369. if (op.RegisterSize == RegisterSize.Simd64)
  370. {
  371. res = context.VectorZeroUpper64(res);
  372. }
  373. context.Copy(d, res);
  374. }
  375. else
  376. {
  377. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  378. }
  379. }
  380. public static void Uqrshl_V(ArmEmitterContext context)
  381. {
  382. EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
  383. }
  384. public static void Uqrshrn_S(ArmEmitterContext context)
  385. {
  386. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  387. }
  388. public static void Uqrshrn_V(ArmEmitterContext context)
  389. {
  390. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  391. }
  392. public static void Uqshl_V(ArmEmitterContext context)
  393. {
  394. EmitShlRegOp(context, ShlRegFlags.Saturating);
  395. }
  396. public static void Uqshrn_S(ArmEmitterContext context)
  397. {
  398. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  399. }
  400. public static void Uqshrn_V(ArmEmitterContext context)
  401. {
  402. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  403. }
  404. public static void Urshl_V(ArmEmitterContext context)
  405. {
  406. EmitShlRegOp(context, ShlRegFlags.Round);
  407. }
  408. public static void Urshr_S(ArmEmitterContext context)
  409. {
  410. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  411. }
  412. public static void Urshr_V(ArmEmitterContext context)
  413. {
  414. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  415. if (Optimizations.UseSse2 && op.Size > 0)
  416. {
  417. int shift = GetImmShr(op);
  418. int eSize = 8 << op.Size;
  419. Operand n = GetVec(op.Rn);
  420. Intrinsic sllInst = X86PsllInstruction[op.Size];
  421. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  422. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  423. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  424. Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
  425. Intrinsic addInst = X86PaddInstruction[op.Size];
  426. res = context.AddIntrinsic(addInst, res, nSrl);
  427. if (op.RegisterSize == RegisterSize.Simd64)
  428. {
  429. res = context.VectorZeroUpper64(res);
  430. }
  431. context.Copy(GetVec(op.Rd), res);
  432. }
  433. else
  434. {
  435. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  436. }
  437. }
  438. public static void Ursra_S(ArmEmitterContext context)
  439. {
  440. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  441. }
  442. public static void Ursra_V(ArmEmitterContext context)
  443. {
  444. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  445. if (Optimizations.UseSse2 && op.Size > 0)
  446. {
  447. int shift = GetImmShr(op);
  448. int eSize = 8 << op.Size;
  449. Operand d = GetVec(op.Rd);
  450. Operand n = GetVec(op.Rn);
  451. Intrinsic sllInst = X86PsllInstruction[op.Size];
  452. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  453. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  454. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  455. Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
  456. Intrinsic addInst = X86PaddInstruction[op.Size];
  457. res = context.AddIntrinsic(addInst, res, nSrl);
  458. res = context.AddIntrinsic(addInst, res, d);
  459. if (op.RegisterSize == RegisterSize.Simd64)
  460. {
  461. res = context.VectorZeroUpper64(res);
  462. }
  463. context.Copy(d, res);
  464. }
  465. else
  466. {
  467. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  468. }
  469. }
  470. public static void Ushl_S(ArmEmitterContext context)
  471. {
  472. EmitShlRegOp(context, ShlRegFlags.Scalar);
  473. }
  474. public static void Ushl_V(ArmEmitterContext context)
  475. {
  476. EmitShlRegOp(context, ShlRegFlags.None);
  477. }
  478. public static void Ushll_V(ArmEmitterContext context)
  479. {
  480. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  481. int shift = GetImmShl(op);
  482. if (Optimizations.UseSse41)
  483. {
  484. Operand n = GetVec(op.Rn);
  485. if (op.RegisterSize == RegisterSize.Simd128)
  486. {
  487. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  488. }
  489. Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
  490. Operand res = context.AddIntrinsic(movzxInst, n);
  491. if (shift != 0)
  492. {
  493. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  494. res = context.AddIntrinsic(sllInst, res, Const(shift));
  495. }
  496. context.Copy(GetVec(op.Rd), res);
  497. }
  498. else
  499. {
  500. EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  501. }
  502. }
  503. public static void Ushr_S(ArmEmitterContext context)
  504. {
  505. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  506. }
  507. public static void Ushr_V(ArmEmitterContext context)
  508. {
  509. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  510. if (Optimizations.UseSse2 && op.Size > 0)
  511. {
  512. int shift = GetImmShr(op);
  513. Operand n = GetVec(op.Rn);
  514. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  515. Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
  516. if (op.RegisterSize == RegisterSize.Simd64)
  517. {
  518. res = context.VectorZeroUpper64(res);
  519. }
  520. context.Copy(GetVec(op.Rd), res);
  521. }
  522. else
  523. {
  524. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  525. }
  526. }
  527. public static void Usra_S(ArmEmitterContext context)
  528. {
  529. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  530. }
  531. public static void Usra_V(ArmEmitterContext context)
  532. {
  533. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  534. if (Optimizations.UseSse2 && op.Size > 0)
  535. {
  536. int shift = GetImmShr(op);
  537. Operand d = GetVec(op.Rd);
  538. Operand n = GetVec(op.Rn);
  539. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  540. Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
  541. Intrinsic addInst = X86PaddInstruction[op.Size];
  542. res = context.AddIntrinsic(addInst, res, d);
  543. if (op.RegisterSize == RegisterSize.Simd64)
  544. {
  545. res = context.VectorZeroUpper64(res);
  546. }
  547. context.Copy(d, res);
  548. }
  549. else
  550. {
  551. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  552. }
  553. }
  554. [Flags]
  555. private enum ShrImmFlags
  556. {
  557. Scalar = 1 << 0,
  558. Signed = 1 << 1,
  559. Round = 1 << 2,
  560. Accumulate = 1 << 3,
  561. ScalarSx = Scalar | Signed,
  562. ScalarZx = Scalar,
  563. VectorSx = Signed,
  564. VectorZx = 0
  565. }
  566. private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
  567. {
  568. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  569. }
  570. private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
  571. {
  572. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  573. }
  574. private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
  575. {
  576. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  577. }
  578. private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
  579. {
  580. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  581. }
  582. private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
  583. {
  584. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  585. Operand res = context.VectorZero();
  586. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  587. bool signed = (flags & ShrImmFlags.Signed) != 0;
  588. bool round = (flags & ShrImmFlags.Round) != 0;
  589. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  590. int shift = GetImmShr(op);
  591. long roundConst = 1L << (shift - 1);
  592. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  593. for (int index = 0; index < elems; index++)
  594. {
  595. Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  596. if (op.Size <= 2)
  597. {
  598. if (round)
  599. {
  600. e = context.Add(e, Const(roundConst));
  601. }
  602. e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
  603. }
  604. else /* if (op.Size == 3) */
  605. {
  606. e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
  607. }
  608. if (accumulate)
  609. {
  610. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  611. e = context.Add(e, de);
  612. }
  613. res = EmitVectorInsert(context, res, e, index, op.Size);
  614. }
  615. context.Copy(GetVec(op.Rd), res);
  616. }
  617. private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
  618. {
  619. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  620. int shift = GetImmShr(op);
  621. long roundConst = 1L << (shift - 1);
  622. int elems = 8 >> op.Size;
  623. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  624. Operand d = GetVec(op.Rd);
  625. Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
  626. for (int index = 0; index < elems; index++)
  627. {
  628. Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  629. if (round)
  630. {
  631. e = context.Add(e, Const(roundConst));
  632. }
  633. e = context.ShiftRightUI(e, Const(shift));
  634. res = EmitVectorInsert(context, res, e, part + index, op.Size);
  635. }
  636. context.Copy(d, res);
  637. }
  638. [Flags]
  639. private enum ShrImmSaturatingNarrowFlags
  640. {
  641. Scalar = 1 << 0,
  642. SignedSrc = 1 << 1,
  643. SignedDst = 1 << 2,
  644. Round = 1 << 3,
  645. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  646. ScalarSxZx = Scalar | SignedSrc,
  647. ScalarZxZx = Scalar,
  648. VectorSxSx = SignedSrc | SignedDst,
  649. VectorSxZx = SignedSrc,
  650. VectorZxZx = 0
  651. }
  652. private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
  653. {
  654. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  655. }
  656. private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
  657. {
  658. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  659. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  660. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  661. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  662. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  663. int shift = GetImmShr(op);
  664. long roundConst = 1L << (shift - 1);
  665. int elems = !scalar ? 8 >> op.Size : 1;
  666. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  667. Operand d = GetVec(op.Rd);
  668. Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
  669. for (int index = 0; index < elems; index++)
  670. {
  671. Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  672. if (op.Size <= 1 || !round)
  673. {
  674. if (round)
  675. {
  676. e = context.Add(e, Const(roundConst));
  677. }
  678. e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
  679. }
  680. else /* if (op.Size == 2 && round) */
  681. {
  682. e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
  683. }
  684. e = signedSrc ? EmitSignedSrcSatQ(context, e, op.Size, signedDst) : EmitUnsignedSrcSatQ(context, e, op.Size, signedDst);
  685. res = EmitVectorInsert(context, res, e, part + index, op.Size);
  686. }
  687. context.Copy(d, res);
  688. }
  689. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  690. private static Operand EmitShrImm64(
  691. ArmEmitterContext context,
  692. Operand value,
  693. bool signed,
  694. long roundConst,
  695. int shift)
  696. {
  697. MethodInfo info = signed
  698. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
  699. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
  700. return context.Call(info, value, Const(roundConst), Const(shift));
  701. }
  702. private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
  703. {
  704. EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
  705. }
  706. private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
  707. {
  708. EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
  709. }
  710. private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
  711. {
  712. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  713. Operand res = context.VectorZero();
  714. int elems = 8 >> op.Size;
  715. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  716. for (int index = 0; index < elems; index++)
  717. {
  718. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  719. res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
  720. }
  721. context.Copy(GetVec(op.Rd), res);
  722. }
  723. private static void EmitSli(ArmEmitterContext context, bool scalar)
  724. {
  725. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  726. int shift = GetImmShl(op);
  727. int eSize = 8 << op.Size;
  728. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
  729. if (shift >= eSize)
  730. {
  731. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  732. {
  733. Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
  734. context.Copy(GetVec(op.Rd), res);
  735. }
  736. }
  737. else if (Optimizations.UseGfni && op.Size == 0)
  738. {
  739. Operand d = GetVec(op.Rd);
  740. Operand n = GetVec(op.Rn);
  741. ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
  742. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  743. Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  744. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  745. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  746. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  747. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  748. {
  749. res = context.VectorZeroUpper64(res);
  750. }
  751. context.Copy(d, res);
  752. }
  753. else if (Optimizations.UseSse2 && op.Size > 0)
  754. {
  755. Operand d = GetVec(op.Rd);
  756. Operand n = GetVec(op.Rn);
  757. Intrinsic sllInst = X86PsllInstruction[op.Size];
  758. Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift));
  759. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  760. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  761. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  762. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  763. {
  764. res = context.VectorZeroUpper64(res);
  765. }
  766. context.Copy(d, res);
  767. }
  768. else
  769. {
  770. Operand res = context.VectorZero();
  771. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  772. for (int index = 0; index < elems; index++)
  773. {
  774. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  775. Operand neShifted = context.ShiftLeft(ne, Const(shift));
  776. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  777. Operand deMasked = context.BitwiseAnd(de, Const(mask));
  778. Operand e = context.BitwiseOr(neShifted, deMasked);
  779. res = EmitVectorInsert(context, res, e, index, op.Size);
  780. }
  781. context.Copy(GetVec(op.Rd), res);
  782. }
  783. }
  784. private static void EmitSri(ArmEmitterContext context, bool scalar)
  785. {
  786. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  787. int shift = GetImmShr(op);
  788. int eSize = 8 << op.Size;
  789. ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));
  790. if (shift >= eSize)
  791. {
  792. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  793. {
  794. Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
  795. context.Copy(GetVec(op.Rd), res);
  796. }
  797. }
  798. else if (Optimizations.UseGfni && op.Size == 0)
  799. {
  800. Operand d = GetVec(op.Rd);
  801. Operand n = GetVec(op.Rn);
  802. ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
  803. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  804. Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  805. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  806. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  807. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  808. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  809. {
  810. res = context.VectorZeroUpper64(res);
  811. }
  812. context.Copy(d, res);
  813. }
  814. else if (Optimizations.UseSse2 && op.Size > 0)
  815. {
  816. Operand d = GetVec(op.Rd);
  817. Operand n = GetVec(op.Rn);
  818. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  819. Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
  820. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  821. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  822. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  823. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  824. {
  825. res = context.VectorZeroUpper64(res);
  826. }
  827. context.Copy(d, res);
  828. }
  829. else
  830. {
  831. Operand res = context.VectorZero();
  832. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  833. for (int index = 0; index < elems; index++)
  834. {
  835. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  836. Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL);
  837. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  838. Operand deMasked = context.BitwiseAnd(de, Const(mask));
  839. Operand e = context.BitwiseOr(neShifted, deMasked);
  840. res = EmitVectorInsert(context, res, e, index, op.Size);
  841. }
  842. context.Copy(GetVec(op.Rd), res);
  843. }
  844. }
  845. [Flags]
  846. private enum ShlRegFlags
  847. {
  848. None = 0,
  849. Scalar = 1 << 0,
  850. Signed = 1 << 1,
  851. Round = 1 << 2,
  852. Saturating = 1 << 3
  853. }
  854. private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None)
  855. {
  856. bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
  857. bool signed = flags.HasFlag(ShlRegFlags.Signed);
  858. bool round = flags.HasFlag(ShlRegFlags.Round);
  859. bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
  860. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  861. Operand res = context.VectorZero();
  862. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  863. for (int index = 0; index < elems; index++)
  864. {
  865. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  866. Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, size: 0);
  867. Operand e = !saturating
  868. ? EmitShlReg(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed)
  869. : EmitShlRegSatQ(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed);
  870. res = EmitVectorInsert(context, res, e, index, op.Size);
  871. }
  872. context.Copy(GetVec(op.Rd), res);
  873. }
  874. // long SignedShlReg(long op, int shiftLsB, bool round, int size);
  875. // ulong UnsignedShlReg(ulong op, int shiftLsB, bool round, int size);
  876. private static Operand EmitShlReg(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
  877. {
  878. int eSize = 8 << size;
  879. Debug.Assert(op.Type == OperandType.I64);
  880. Debug.Assert(shiftLsB.Type == OperandType.I32);
  881. Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
  882. Operand lbl1 = Label();
  883. Operand lblEnd = Label();
  884. Operand eSizeOp = Const(eSize);
  885. Operand zero = Const(0);
  886. Operand zeroL = Const(0L);
  887. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  888. context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
  889. context.Copy(res, signed
  890. ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
  891. : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
  892. context.Branch(lblEnd);
  893. context.MarkLabel(lbl1);
  894. context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
  895. Operand shl = context.ShiftLeft(op, shiftLsB);
  896. Operand isGreaterOrEqual = context.ICompareGreaterOrEqual(shiftLsB, eSizeOp);
  897. context.Copy(res, context.ConditionalSelect(isGreaterOrEqual, zeroL, shl));
  898. context.Branch(lblEnd);
  899. context.MarkLabel(lblEnd);
  900. return res;
  901. }
  902. // long SignedShlRegSatQ(long op, int shiftLsB, bool round, int size);
  903. // ulong UnsignedShlRegSatQ(ulong op, int shiftLsB, bool round, int size);
  904. private static Operand EmitShlRegSatQ(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
  905. {
  906. int eSize = 8 << size;
  907. Debug.Assert(op.Type == OperandType.I64);
  908. Debug.Assert(shiftLsB.Type == OperandType.I32);
  909. Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
  910. Operand lbl1 = Label();
  911. Operand lbl2 = Label();
  912. Operand lblEnd = Label();
  913. Operand eSizeOp = Const(eSize);
  914. Operand zero = Const(0);
  915. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  916. context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
  917. context.Copy(res, signed
  918. ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
  919. : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
  920. context.Branch(lblEnd);
  921. context.MarkLabel(lbl1);
  922. context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
  923. context.BranchIf(lbl2, shiftLsB, eSizeOp, Comparison.Less);
  924. context.Copy(res, signed
  925. ? EmitSignedSignSatQ(context, op, size)
  926. : EmitUnsignedSignSatQ(context, op, size));
  927. context.Branch(lblEnd);
  928. context.MarkLabel(lbl2);
  929. Operand shl = context.ShiftLeft(op, shiftLsB);
  930. if (eSize == 64)
  931. {
  932. Operand sarOrShr = signed
  933. ? context.ShiftRightSI(shl, shiftLsB)
  934. : context.ShiftRightUI(shl, shiftLsB);
  935. context.Copy(res, shl);
  936. context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
  937. context.Copy(res, signed
  938. ? EmitSignedSignSatQ(context, op, size)
  939. : EmitUnsignedSignSatQ(context, op, size));
  940. }
  941. else
  942. {
  943. context.Copy(res, signed
  944. ? EmitSignedSrcSatQ(context, shl, size, signedDst: true)
  945. : EmitUnsignedSrcSatQ(context, shl, size, signedDst: false));
  946. }
  947. context.Branch(lblEnd);
  948. context.MarkLabel(lblEnd);
  949. return res;
  950. }
  951. // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  952. // long SignedShrReg(long op, int shift, bool round, int eSize);
  953. private static Operand EmitSignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
  954. {
  955. if (round)
  956. {
  957. Operand lblEnd = Label();
  958. Operand eSizeOp = Const(eSize);
  959. Operand zeroL = Const(0L);
  960. Operand one = Const(1);
  961. Operand oneL = Const(1L);
  962. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
  963. context.BranchIf(lblEnd, shift, eSizeOp, Comparison.GreaterOrEqual);
  964. Operand roundConst = context.ShiftLeft(oneL, context.Subtract(shift, one));
  965. Operand add = context.Add(op, roundConst);
  966. Operand sar = context.ShiftRightSI(add, shift);
  967. if (eSize == 64)
  968. {
  969. Operand shr = context.ShiftRightUI(add, shift);
  970. Operand left = context.BitwiseAnd(context.Negate(op), context.BitwiseExclusiveOr(op, add));
  971. Operand isLess = context.ICompareLess(left, zeroL);
  972. context.Copy(res, context.ConditionalSelect(isLess, shr, sar));
  973. }
  974. else
  975. {
  976. context.Copy(res, sar);
  977. }
  978. context.Branch(lblEnd);
  979. context.MarkLabel(lblEnd);
  980. return res;
  981. }
  982. else
  983. {
  984. Operand lblEnd = Label();
  985. Operand eSizeOp = Const(eSize);
  986. Operand zeroL = Const(0L);
  987. Operand negOneL = Const(-1L);
  988. Operand sar = context.ShiftRightSI(op, shift);
  989. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sar);
  990. context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
  991. Operand isLess = context.ICompareLess(op, zeroL);
  992. context.Copy(res, context.ConditionalSelect(isLess, negOneL, zeroL));
  993. context.Branch(lblEnd);
  994. context.MarkLabel(lblEnd);
  995. return res;
  996. }
  997. }
  998. // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  999. // ulong UnsignedShrReg(ulong op, int shift, bool round, int eSize);
  1000. private static Operand EmitUnsignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
  1001. {
  1002. if (round)
  1003. {
  1004. Operand lblEnd = Label();
  1005. Operand zeroUL = Const(0UL);
  1006. Operand one = Const(1);
  1007. Operand oneUL = Const(1UL);
  1008. Operand eSizeMaxOp = Const(64);
  1009. Operand oneShl63UL = Const(1UL << 63);
  1010. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
  1011. context.BranchIf(lblEnd, shift, eSizeMaxOp, Comparison.Greater);
  1012. Operand roundConst = context.ShiftLeft(oneUL, context.Subtract(shift, one));
  1013. Operand add = context.Add(op, roundConst);
  1014. Operand shr = context.ShiftRightUI(add, shift);
  1015. Operand isEqual = context.ICompareEqual(shift, eSizeMaxOp);
  1016. context.Copy(res, context.ConditionalSelect(isEqual, zeroUL, shr));
  1017. if (eSize == 64)
  1018. {
  1019. context.BranchIf(lblEnd, add, op, Comparison.GreaterOrEqualUI);
  1020. Operand right = context.BitwiseOr(shr, context.ShiftRightUI(oneShl63UL, context.Subtract(shift, one)));
  1021. context.Copy(res, context.ConditionalSelect(isEqual, oneUL, right));
  1022. }
  1023. context.Branch(lblEnd);
  1024. context.MarkLabel(lblEnd);
  1025. return res;
  1026. }
  1027. else
  1028. {
  1029. Operand lblEnd = Label();
  1030. Operand eSizeOp = Const(eSize);
  1031. Operand zeroUL = Const(0UL);
  1032. Operand shr = context.ShiftRightUI(op, shift);
  1033. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), shr);
  1034. context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
  1035. context.Copy(res, zeroUL);
  1036. context.Branch(lblEnd);
  1037. context.MarkLabel(lblEnd);
  1038. return res;
  1039. }
  1040. }
  1041. }
  1042. }