InstEmitSimdShift.cs 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ChocolArm64.Decoders;
  3. using ChocolArm64.State;
  4. using ChocolArm64.Translation;
  5. using System;
  6. using System.Reflection.Emit;
  7. using System.Runtime.Intrinsics;
  8. using System.Runtime.Intrinsics.X86;
  9. using static ChocolArm64.Instructions.InstEmitSimdHelper;
  10. namespace ChocolArm64.Instructions
  11. {
  12. static partial class InstEmit
  13. {
  14. #region "Masks"
  15. private static readonly long[] _masks_RshrnShrn = new long[]
  16. {
  17. 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
  18. 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
  19. 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
  20. };
  21. #endregion
  22. public static void Rshrn_V(ILEmitterCtx context)
  23. {
  24. if (Optimizations.UseSsse3)
  25. {
  26. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  27. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] };
  28. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  29. Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
  30. Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size + 1] };
  31. Type[] typesSve = new Type[] { typeof(long), typeof(long) };
  32. string nameMov = op.RegisterSize == RegisterSize.Simd128
  33. ? nameof(Sse.MoveLowToHigh)
  34. : nameof(Sse.MoveHighToLow);
  35. int shift = GetImmShr(op);
  36. long roundConst = 1L << (shift - 1);
  37. context.EmitLdvec(op.Rd);
  38. VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
  39. context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
  40. context.EmitLdvec(op.Rn);
  41. context.EmitLdc_I8(roundConst);
  42. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
  43. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  44. context.EmitLdc_I4(shift);
  45. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
  46. context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
  47. context.Emit(OpCodes.Dup); // mask
  48. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
  49. context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
  50. context.EmitCall(typeof(Sse).GetMethod(nameMov));
  51. context.EmitStvec(op.Rd);
  52. }
  53. else
  54. {
  55. EmitVectorShrImmNarrowOpZx(context, round: true);
  56. }
  57. }
  58. public static void Shl_S(ILEmitterCtx context)
  59. {
  60. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  61. int shift = GetImmShl(op);
  62. EmitScalarUnaryOpZx(context, () =>
  63. {
  64. context.EmitLdc_I4(shift);
  65. context.Emit(OpCodes.Shl);
  66. });
  67. }
  68. public static void Shl_V(ILEmitterCtx context)
  69. {
  70. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  71. int shift = GetImmShl(op);
  72. if (Optimizations.UseSse2 && op.Size > 0)
  73. {
  74. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  75. context.EmitLdvec(op.Rn);
  76. context.EmitLdc_I4(shift);
  77. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  78. context.EmitStvec(op.Rd);
  79. if (op.RegisterSize == RegisterSize.Simd64)
  80. {
  81. EmitVectorZeroUpper(context, op.Rd);
  82. }
  83. }
  84. else
  85. {
  86. EmitVectorUnaryOpZx(context, () =>
  87. {
  88. context.EmitLdc_I4(shift);
  89. context.Emit(OpCodes.Shl);
  90. });
  91. }
  92. }
  93. public static void Shll_V(ILEmitterCtx context)
  94. {
  95. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  96. int shift = 8 << op.Size;
  97. if (Optimizations.UseSse41)
  98. {
  99. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  100. Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
  101. string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
  102. nameof(Sse41.ConvertToVector128Int32),
  103. nameof(Sse41.ConvertToVector128Int64) };
  104. context.EmitLdvec(op.Rn);
  105. if (op.RegisterSize == RegisterSize.Simd128)
  106. {
  107. context.Emit(OpCodes.Ldc_I4_8);
  108. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
  109. }
  110. context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
  111. context.EmitLdc_I4(shift);
  112. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  113. context.EmitStvec(op.Rd);
  114. }
  115. else
  116. {
  117. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
  118. }
  119. }
  120. public static void Shrn_V(ILEmitterCtx context)
  121. {
  122. if (Optimizations.UseSsse3)
  123. {
  124. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  125. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  126. Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
  127. Type[] typesSve = new Type[] { typeof(long), typeof(long) };
  128. string nameMov = op.RegisterSize == RegisterSize.Simd128
  129. ? nameof(Sse.MoveLowToHigh)
  130. : nameof(Sse.MoveHighToLow);
  131. int shift = GetImmShr(op);
  132. context.EmitLdvec(op.Rd);
  133. VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
  134. context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
  135. context.EmitLdvec(op.Rn);
  136. context.EmitLdc_I4(shift);
  137. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
  138. context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
  139. context.Emit(OpCodes.Dup); // mask
  140. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
  141. context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
  142. context.EmitCall(typeof(Sse).GetMethod(nameMov));
  143. context.EmitStvec(op.Rd);
  144. }
  145. else
  146. {
  147. EmitVectorShrImmNarrowOpZx(context, round: false);
  148. }
  149. }
  150. public static void Sli_V(ILEmitterCtx context)
  151. {
  152. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  153. int bytes = op.GetBitsCount() >> 3;
  154. int elems = bytes >> op.Size;
  155. int shift = GetImmShl(op);
  156. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
  157. for (int index = 0; index < elems; index++)
  158. {
  159. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  160. context.EmitLdc_I4(shift);
  161. context.Emit(OpCodes.Shl);
  162. EmitVectorExtractZx(context, op.Rd, index, op.Size);
  163. context.EmitLdc_I8((long)mask);
  164. context.Emit(OpCodes.And);
  165. context.Emit(OpCodes.Or);
  166. EmitVectorInsert(context, op.Rd, index, op.Size);
  167. }
  168. if (op.RegisterSize == RegisterSize.Simd64)
  169. {
  170. EmitVectorZeroUpper(context, op.Rd);
  171. }
  172. }
  173. public static void Sqrshl_V(ILEmitterCtx context)
  174. {
  175. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  176. int bytes = op.GetBitsCount() >> 3;
  177. int elems = bytes >> op.Size;
  178. for (int index = 0; index < elems; index++)
  179. {
  180. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  181. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  182. context.Emit(OpCodes.Ldc_I4_1);
  183. context.EmitLdc_I4(op.Size);
  184. context.EmitLdarg(TranslatedSub.StateArgIdx);
  185. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
  186. EmitVectorInsert(context, op.Rd, index, op.Size);
  187. }
  188. if (op.RegisterSize == RegisterSize.Simd64)
  189. {
  190. EmitVectorZeroUpper(context, op.Rd);
  191. }
  192. }
  193. public static void Sqrshrn_S(ILEmitterCtx context)
  194. {
  195. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  196. }
  197. public static void Sqrshrn_V(ILEmitterCtx context)
  198. {
  199. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  200. }
  201. public static void Sqrshrun_S(ILEmitterCtx context)
  202. {
  203. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  204. }
  205. public static void Sqrshrun_V(ILEmitterCtx context)
  206. {
  207. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  208. }
  209. public static void Sqshl_V(ILEmitterCtx context)
  210. {
  211. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  212. int bytes = op.GetBitsCount() >> 3;
  213. int elems = bytes >> op.Size;
  214. for (int index = 0; index < elems; index++)
  215. {
  216. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  217. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  218. context.Emit(OpCodes.Ldc_I4_0);
  219. context.EmitLdc_I4(op.Size);
  220. context.EmitLdarg(TranslatedSub.StateArgIdx);
  221. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
  222. EmitVectorInsert(context, op.Rd, index, op.Size);
  223. }
  224. if (op.RegisterSize == RegisterSize.Simd64)
  225. {
  226. EmitVectorZeroUpper(context, op.Rd);
  227. }
  228. }
  229. public static void Sqshrn_S(ILEmitterCtx context)
  230. {
  231. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  232. }
  233. public static void Sqshrn_V(ILEmitterCtx context)
  234. {
  235. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  236. }
  237. public static void Sqshrun_S(ILEmitterCtx context)
  238. {
  239. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  240. }
  241. public static void Sqshrun_V(ILEmitterCtx context)
  242. {
  243. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  244. }
  245. public static void Srshl_V(ILEmitterCtx context)
  246. {
  247. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  248. int bytes = op.GetBitsCount() >> 3;
  249. int elems = bytes >> op.Size;
  250. for (int index = 0; index < elems; index++)
  251. {
  252. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  253. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  254. context.Emit(OpCodes.Ldc_I4_1);
  255. context.EmitLdc_I4(op.Size);
  256. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
  257. EmitVectorInsert(context, op.Rd, index, op.Size);
  258. }
  259. if (op.RegisterSize == RegisterSize.Simd64)
  260. {
  261. EmitVectorZeroUpper(context, op.Rd);
  262. }
  263. }
  264. public static void Srshr_S(ILEmitterCtx context)
  265. {
  266. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  267. }
  268. public static void Srshr_V(ILEmitterCtx context)
  269. {
  270. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  271. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  272. {
  273. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  274. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  275. int shift = GetImmShr(op);
  276. int eSize = 8 << op.Size;
  277. context.EmitLdvec(op.Rn);
  278. context.EmitLdc_I4(eSize - shift);
  279. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  280. context.EmitLdc_I4(eSize - 1);
  281. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  282. context.EmitLdvec(op.Rn);
  283. context.EmitLdc_I4(shift);
  284. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  285. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  286. context.EmitStvec(op.Rd);
  287. if (op.RegisterSize == RegisterSize.Simd64)
  288. {
  289. EmitVectorZeroUpper(context, op.Rd);
  290. }
  291. }
  292. else
  293. {
  294. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  295. }
  296. }
  297. public static void Srsra_S(ILEmitterCtx context)
  298. {
  299. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  300. }
  301. public static void Srsra_V(ILEmitterCtx context)
  302. {
  303. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  304. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  305. {
  306. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  307. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  308. int shift = GetImmShr(op);
  309. int eSize = 8 << op.Size;
  310. context.EmitLdvec(op.Rd);
  311. context.EmitLdvec(op.Rn);
  312. context.EmitLdc_I4(eSize - shift);
  313. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  314. context.EmitLdc_I4(eSize - 1);
  315. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  316. context.EmitLdvec(op.Rn);
  317. context.EmitLdc_I4(shift);
  318. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  319. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  320. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  321. context.EmitStvec(op.Rd);
  322. if (op.RegisterSize == RegisterSize.Simd64)
  323. {
  324. EmitVectorZeroUpper(context, op.Rd);
  325. }
  326. }
  327. else
  328. {
  329. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  330. }
  331. }
  332. public static void Sshl_V(ILEmitterCtx context)
  333. {
  334. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  335. int bytes = op.GetBitsCount() >> 3;
  336. int elems = bytes >> op.Size;
  337. for (int index = 0; index < elems; index++)
  338. {
  339. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  340. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  341. context.Emit(OpCodes.Ldc_I4_0);
  342. context.EmitLdc_I4(op.Size);
  343. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
  344. EmitVectorInsert(context, op.Rd, index, op.Size);
  345. }
  346. if (op.RegisterSize == RegisterSize.Simd64)
  347. {
  348. EmitVectorZeroUpper(context, op.Rd);
  349. }
  350. }
  351. public static void Sshll_V(ILEmitterCtx context)
  352. {
  353. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  354. int shift = GetImmShl(op);
  355. if (Optimizations.UseSse41)
  356. {
  357. Type[] typesSll = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  358. Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
  359. string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
  360. nameof(Sse41.ConvertToVector128Int32),
  361. nameof(Sse41.ConvertToVector128Int64) };
  362. context.EmitLdvec(op.Rn);
  363. if (op.RegisterSize == RegisterSize.Simd128)
  364. {
  365. context.Emit(OpCodes.Ldc_I4_8);
  366. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
  367. }
  368. context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
  369. if (shift != 0)
  370. {
  371. context.EmitLdc_I4(shift);
  372. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  373. }
  374. context.EmitStvec(op.Rd);
  375. }
  376. else
  377. {
  378. EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), shift);
  379. }
  380. }
  381. public static void Sshr_S(ILEmitterCtx context)
  382. {
  383. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  384. }
  385. public static void Sshr_V(ILEmitterCtx context)
  386. {
  387. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  388. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  389. {
  390. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  391. context.EmitLdvec(op.Rn);
  392. context.EmitLdc_I4(GetImmShr(op));
  393. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  394. context.EmitStvec(op.Rd);
  395. if (op.RegisterSize == RegisterSize.Simd64)
  396. {
  397. EmitVectorZeroUpper(context, op.Rd);
  398. }
  399. }
  400. else
  401. {
  402. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  403. }
  404. }
  405. public static void Ssra_S(ILEmitterCtx context)
  406. {
  407. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  408. }
  409. public static void Ssra_V(ILEmitterCtx context)
  410. {
  411. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  412. if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  413. {
  414. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  415. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  416. context.EmitLdvec(op.Rd);
  417. context.EmitLdvec(op.Rn);
  418. context.EmitLdc_I4(GetImmShr(op));
  419. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  420. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  421. context.EmitStvec(op.Rd);
  422. if (op.RegisterSize == RegisterSize.Simd64)
  423. {
  424. EmitVectorZeroUpper(context, op.Rd);
  425. }
  426. }
  427. else
  428. {
  429. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  430. }
  431. }
  432. public static void Uqrshl_V(ILEmitterCtx context)
  433. {
  434. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  435. int bytes = op.GetBitsCount() >> 3;
  436. int elems = bytes >> op.Size;
  437. for (int index = 0; index < elems; index++)
  438. {
  439. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  440. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  441. context.Emit(OpCodes.Ldc_I4_1);
  442. context.EmitLdc_I4(op.Size);
  443. context.EmitLdarg(TranslatedSub.StateArgIdx);
  444. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
  445. EmitVectorInsert(context, op.Rd, index, op.Size);
  446. }
  447. if (op.RegisterSize == RegisterSize.Simd64)
  448. {
  449. EmitVectorZeroUpper(context, op.Rd);
  450. }
  451. }
  452. public static void Uqrshrn_S(ILEmitterCtx context)
  453. {
  454. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  455. }
  456. public static void Uqrshrn_V(ILEmitterCtx context)
  457. {
  458. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  459. }
  460. public static void Uqshl_V(ILEmitterCtx context)
  461. {
  462. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  463. int bytes = op.GetBitsCount() >> 3;
  464. int elems = bytes >> op.Size;
  465. for (int index = 0; index < elems; index++)
  466. {
  467. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  468. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  469. context.Emit(OpCodes.Ldc_I4_0);
  470. context.EmitLdc_I4(op.Size);
  471. context.EmitLdarg(TranslatedSub.StateArgIdx);
  472. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
  473. EmitVectorInsert(context, op.Rd, index, op.Size);
  474. }
  475. if (op.RegisterSize == RegisterSize.Simd64)
  476. {
  477. EmitVectorZeroUpper(context, op.Rd);
  478. }
  479. }
  480. public static void Uqshrn_S(ILEmitterCtx context)
  481. {
  482. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  483. }
  484. public static void Uqshrn_V(ILEmitterCtx context)
  485. {
  486. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  487. }
  488. public static void Urshl_V(ILEmitterCtx context)
  489. {
  490. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  491. int bytes = op.GetBitsCount() >> 3;
  492. int elems = bytes >> op.Size;
  493. for (int index = 0; index < elems; index++)
  494. {
  495. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  496. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  497. context.Emit(OpCodes.Ldc_I4_1);
  498. context.EmitLdc_I4(op.Size);
  499. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
  500. EmitVectorInsert(context, op.Rd, index, op.Size);
  501. }
  502. if (op.RegisterSize == RegisterSize.Simd64)
  503. {
  504. EmitVectorZeroUpper(context, op.Rd);
  505. }
  506. }
  507. public static void Urshr_S(ILEmitterCtx context)
  508. {
  509. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  510. }
  511. public static void Urshr_V(ILEmitterCtx context)
  512. {
  513. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  514. if (Optimizations.UseSse2 && op.Size > 0)
  515. {
  516. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  517. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  518. int shift = GetImmShr(op);
  519. int eSize = 8 << op.Size;
  520. context.EmitLdvec(op.Rn);
  521. context.EmitLdc_I4(eSize - shift);
  522. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  523. context.EmitLdc_I4(eSize - 1);
  524. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  525. context.EmitLdvec(op.Rn);
  526. context.EmitLdc_I4(shift);
  527. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  528. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  529. context.EmitStvec(op.Rd);
  530. if (op.RegisterSize == RegisterSize.Simd64)
  531. {
  532. EmitVectorZeroUpper(context, op.Rd);
  533. }
  534. }
  535. else
  536. {
  537. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  538. }
  539. }
  540. public static void Ursra_S(ILEmitterCtx context)
  541. {
  542. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  543. }
  544. public static void Ursra_V(ILEmitterCtx context)
  545. {
  546. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  547. if (Optimizations.UseSse2 && op.Size > 0)
  548. {
  549. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  550. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  551. int shift = GetImmShr(op);
  552. int eSize = 8 << op.Size;
  553. context.EmitLdvec(op.Rd);
  554. context.EmitLdvec(op.Rn);
  555. context.EmitLdc_I4(eSize - shift);
  556. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  557. context.EmitLdc_I4(eSize - 1);
  558. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  559. context.EmitLdvec(op.Rn);
  560. context.EmitLdc_I4(shift);
  561. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  562. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  563. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  564. context.EmitStvec(op.Rd);
  565. if (op.RegisterSize == RegisterSize.Simd64)
  566. {
  567. EmitVectorZeroUpper(context, op.Rd);
  568. }
  569. }
  570. else
  571. {
  572. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  573. }
  574. }
  575. public static void Ushl_V(ILEmitterCtx context)
  576. {
  577. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  578. int bytes = op.GetBitsCount() >> 3;
  579. int elems = bytes >> op.Size;
  580. for (int index = 0; index < elems; index++)
  581. {
  582. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  583. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  584. context.Emit(OpCodes.Ldc_I4_0);
  585. context.EmitLdc_I4(op.Size);
  586. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
  587. EmitVectorInsert(context, op.Rd, index, op.Size);
  588. }
  589. if (op.RegisterSize == RegisterSize.Simd64)
  590. {
  591. EmitVectorZeroUpper(context, op.Rd);
  592. }
  593. }
  594. public static void Ushll_V(ILEmitterCtx context)
  595. {
  596. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  597. int shift = GetImmShl(op);
  598. if (Optimizations.UseSse41)
  599. {
  600. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  601. Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
  602. string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
  603. nameof(Sse41.ConvertToVector128Int32),
  604. nameof(Sse41.ConvertToVector128Int64) };
  605. context.EmitLdvec(op.Rn);
  606. if (op.RegisterSize == RegisterSize.Simd128)
  607. {
  608. context.Emit(OpCodes.Ldc_I4_8);
  609. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
  610. }
  611. context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
  612. if (shift != 0)
  613. {
  614. context.EmitLdc_I4(shift);
  615. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  616. }
  617. context.EmitStvec(op.Rd);
  618. }
  619. else
  620. {
  621. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
  622. }
  623. }
  624. public static void Ushr_S(ILEmitterCtx context)
  625. {
  626. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  627. }
  628. public static void Ushr_V(ILEmitterCtx context)
  629. {
  630. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  631. if (Optimizations.UseSse2 && op.Size > 0)
  632. {
  633. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  634. context.EmitLdvec(op.Rn);
  635. context.EmitLdc_I4(GetImmShr(op));
  636. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  637. context.EmitStvec(op.Rd);
  638. if (op.RegisterSize == RegisterSize.Simd64)
  639. {
  640. EmitVectorZeroUpper(context, op.Rd);
  641. }
  642. }
  643. else
  644. {
  645. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  646. }
  647. }
  648. public static void Usra_S(ILEmitterCtx context)
  649. {
  650. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  651. }
  652. public static void Usra_V(ILEmitterCtx context)
  653. {
  654. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  655. if (Optimizations.UseSse2 && op.Size > 0)
  656. {
  657. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  658. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  659. context.EmitLdvec(op.Rd);
  660. context.EmitLdvec(op.Rn);
  661. context.EmitLdc_I4(GetImmShr(op));
  662. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  663. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  664. context.EmitStvec(op.Rd);
  665. if (op.RegisterSize == RegisterSize.Simd64)
  666. {
  667. EmitVectorZeroUpper(context, op.Rd);
  668. }
  669. }
  670. else
  671. {
  672. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  673. }
  674. }
  675. [Flags]
  676. private enum ShrImmFlags
  677. {
  678. Scalar = 1 << 0,
  679. Signed = 1 << 1,
  680. Round = 1 << 2,
  681. Accumulate = 1 << 3,
  682. ScalarSx = Scalar | Signed,
  683. ScalarZx = Scalar,
  684. VectorSx = Signed,
  685. VectorZx = 0
  686. }
  687. private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  688. {
  689. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  690. }
  691. private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  692. {
  693. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  694. }
  695. private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  696. {
  697. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  698. }
  699. private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  700. {
  701. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  702. }
  703. private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags)
  704. {
  705. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  706. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  707. bool signed = (flags & ShrImmFlags.Signed) != 0;
  708. bool round = (flags & ShrImmFlags.Round) != 0;
  709. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  710. int shift = GetImmShr(op);
  711. long roundConst = 1L << (shift - 1);
  712. int bytes = op.GetBitsCount() >> 3;
  713. int elems = !scalar ? bytes >> op.Size : 1;
  714. for (int index = 0; index < elems; index++)
  715. {
  716. EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  717. if (op.Size <= 2)
  718. {
  719. if (round)
  720. {
  721. context.EmitLdc_I8(roundConst);
  722. context.Emit(OpCodes.Add);
  723. }
  724. context.EmitLdc_I4(shift);
  725. context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un);
  726. }
  727. else /* if (op.Size == 3) */
  728. {
  729. EmitShrImm64(context, signed, round ? roundConst : 0L, shift);
  730. }
  731. if (accumulate)
  732. {
  733. EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  734. context.Emit(OpCodes.Add);
  735. }
  736. EmitVectorInsert(context, op.Rd, index, op.Size);
  737. }
  738. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  739. {
  740. EmitVectorZeroUpper(context, op.Rd);
  741. }
  742. }
  743. private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round)
  744. {
  745. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  746. int shift = GetImmShr(op);
  747. long roundConst = 1L << (shift - 1);
  748. int elems = 8 >> op.Size;
  749. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  750. if (part != 0)
  751. {
  752. context.EmitLdvec(op.Rd);
  753. context.EmitStvectmp();
  754. }
  755. for (int index = 0; index < elems; index++)
  756. {
  757. EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  758. if (round)
  759. {
  760. context.EmitLdc_I8(roundConst);
  761. context.Emit(OpCodes.Add);
  762. }
  763. context.EmitLdc_I4(shift);
  764. context.Emit(OpCodes.Shr_Un);
  765. EmitVectorInsertTmp(context, part + index, op.Size);
  766. }
  767. context.EmitLdvectmp();
  768. context.EmitStvec(op.Rd);
  769. if (part == 0)
  770. {
  771. EmitVectorZeroUpper(context, op.Rd);
  772. }
  773. }
  774. [Flags]
  775. private enum ShrImmSaturatingNarrowFlags
  776. {
  777. Scalar = 1 << 0,
  778. SignedSrc = 1 << 1,
  779. SignedDst = 1 << 2,
  780. Round = 1 << 3,
  781. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  782. ScalarSxZx = Scalar | SignedSrc,
  783. ScalarZxZx = Scalar,
  784. VectorSxSx = SignedSrc | SignedDst,
  785. VectorSxZx = SignedSrc,
  786. VectorZxZx = 0
  787. }
  788. private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  789. {
  790. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  791. }
  792. private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  793. {
  794. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  795. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  796. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  797. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  798. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  799. int shift = GetImmShr(op);
  800. long roundConst = 1L << (shift - 1);
  801. int elems = !scalar ? 8 >> op.Size : 1;
  802. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  803. if (scalar)
  804. {
  805. EmitVectorZeroLowerTmp(context);
  806. }
  807. if (part != 0)
  808. {
  809. context.EmitLdvec(op.Rd);
  810. context.EmitStvectmp();
  811. }
  812. for (int index = 0; index < elems; index++)
  813. {
  814. EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  815. if (op.Size <= 1 || !round)
  816. {
  817. if (round)
  818. {
  819. context.EmitLdc_I8(roundConst);
  820. context.Emit(OpCodes.Add);
  821. }
  822. context.EmitLdc_I4(shift);
  823. context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
  824. }
  825. else /* if (op.Size == 2 && round) */
  826. {
  827. EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32
  828. }
  829. EmitSatQ(context, op.Size, signedSrc, signedDst);
  830. EmitVectorInsertTmp(context, part + index, op.Size);
  831. }
  832. context.EmitLdvectmp();
  833. context.EmitStvec(op.Rd);
  834. if (part == 0)
  835. {
  836. EmitVectorZeroUpper(context, op.Rd);
  837. }
  838. }
  839. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  840. private static void EmitShrImm64(ILEmitterCtx context, bool signed, long roundConst, int shift)
  841. {
  842. context.EmitLdc_I8(roundConst);
  843. context.EmitLdc_I4(shift);
  844. SoftFallback.EmitCall(context, signed
  845. ? nameof(SoftFallback.SignedShrImm64)
  846. : nameof(SoftFallback.UnsignedShrImm64));
  847. }
  848. private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm)
  849. {
  850. EmitVectorShImmWidenBinaryOp(context, emit, imm, true);
  851. }
  852. private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm)
  853. {
  854. EmitVectorShImmWidenBinaryOp(context, emit, imm, false);
  855. }
  856. private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed)
  857. {
  858. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  859. int elems = 8 >> op.Size;
  860. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  861. for (int index = 0; index < elems; index++)
  862. {
  863. EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  864. context.EmitLdc_I4(imm);
  865. emit();
  866. EmitVectorInsertTmp(context, index, op.Size + 1);
  867. }
  868. context.EmitLdvectmp();
  869. context.EmitStvec(op.Rd);
  870. }
  871. }
  872. }