InstEmitSimdShift.cs 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ChocolArm64.Decoders;
  3. using ChocolArm64.State;
  4. using ChocolArm64.Translation;
  5. using System;
  6. using System.Reflection.Emit;
  7. using System.Runtime.Intrinsics.X86;
  8. using static ChocolArm64.Instructions.InstEmitSimdHelper;
  9. namespace ChocolArm64.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void Rshrn_V(ILEmitterCtx context)
  14. {
  15. EmitVectorShrImmNarrowOpZx(context, round: true);
  16. }
  17. public static void Shl_S(ILEmitterCtx context)
  18. {
  19. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  20. int shift = GetImmShl(op);
  21. EmitScalarUnaryOpZx(context, () =>
  22. {
  23. context.EmitLdc_I4(shift);
  24. context.Emit(OpCodes.Shl);
  25. });
  26. }
  27. public static void Shl_V(ILEmitterCtx context)
  28. {
  29. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  30. int shift = GetImmShl(op);
  31. if (Optimizations.UseSse2 && op.Size > 0)
  32. {
  33. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  34. context.EmitLdvec(op.Rn);
  35. context.EmitLdc_I4(shift);
  36. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  37. context.EmitStvec(op.Rd);
  38. if (op.RegisterSize == RegisterSize.Simd64)
  39. {
  40. EmitVectorZeroUpper(context, op.Rd);
  41. }
  42. }
  43. else
  44. {
  45. EmitVectorUnaryOpZx(context, () =>
  46. {
  47. context.EmitLdc_I4(shift);
  48. context.Emit(OpCodes.Shl);
  49. });
  50. }
  51. }
  52. public static void Shll_V(ILEmitterCtx context)
  53. {
  54. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  55. int shift = 8 << op.Size;
  56. if (Optimizations.UseSse41)
  57. {
  58. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  59. Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
  60. string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
  61. nameof(Sse41.ConvertToVector128Int32),
  62. nameof(Sse41.ConvertToVector128Int64) };
  63. int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
  64. context.EmitLdvec(op.Rn);
  65. context.EmitLdc_I4(numBytes);
  66. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
  67. context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
  68. context.EmitLdc_I4(shift);
  69. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  70. context.EmitStvec(op.Rd);
  71. }
  72. else
  73. {
  74. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
  75. }
  76. }
  77. public static void Shrn_V(ILEmitterCtx context)
  78. {
  79. EmitVectorShrImmNarrowOpZx(context, round: false);
  80. }
  81. public static void Sli_V(ILEmitterCtx context)
  82. {
  83. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  84. int bytes = op.GetBitsCount() >> 3;
  85. int elems = bytes >> op.Size;
  86. int shift = GetImmShl(op);
  87. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
  88. for (int index = 0; index < elems; index++)
  89. {
  90. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  91. context.EmitLdc_I4(shift);
  92. context.Emit(OpCodes.Shl);
  93. EmitVectorExtractZx(context, op.Rd, index, op.Size);
  94. context.EmitLdc_I8((long)mask);
  95. context.Emit(OpCodes.And);
  96. context.Emit(OpCodes.Or);
  97. EmitVectorInsert(context, op.Rd, index, op.Size);
  98. }
  99. if (op.RegisterSize == RegisterSize.Simd64)
  100. {
  101. EmitVectorZeroUpper(context, op.Rd);
  102. }
  103. }
  104. public static void Sqrshl_V(ILEmitterCtx context)
  105. {
  106. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  107. int bytes = op.GetBitsCount() >> 3;
  108. int elems = bytes >> op.Size;
  109. for (int index = 0; index < elems; index++)
  110. {
  111. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  112. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  113. context.Emit(OpCodes.Ldc_I4_1);
  114. context.EmitLdc_I4(op.Size);
  115. context.EmitLdarg(TranslatedSub.StateArgIdx);
  116. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
  117. EmitVectorInsert(context, op.Rd, index, op.Size);
  118. }
  119. if (op.RegisterSize == RegisterSize.Simd64)
  120. {
  121. EmitVectorZeroUpper(context, op.Rd);
  122. }
  123. }
  124. public static void Sqrshrn_S(ILEmitterCtx context)
  125. {
  126. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  127. }
  128. public static void Sqrshrn_V(ILEmitterCtx context)
  129. {
  130. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  131. }
  132. public static void Sqrshrun_S(ILEmitterCtx context)
  133. {
  134. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  135. }
  136. public static void Sqrshrun_V(ILEmitterCtx context)
  137. {
  138. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  139. }
  140. public static void Sqshl_V(ILEmitterCtx context)
  141. {
  142. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  143. int bytes = op.GetBitsCount() >> 3;
  144. int elems = bytes >> op.Size;
  145. for (int index = 0; index < elems; index++)
  146. {
  147. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  148. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  149. context.Emit(OpCodes.Ldc_I4_0);
  150. context.EmitLdc_I4(op.Size);
  151. context.EmitLdarg(TranslatedSub.StateArgIdx);
  152. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
  153. EmitVectorInsert(context, op.Rd, index, op.Size);
  154. }
  155. if (op.RegisterSize == RegisterSize.Simd64)
  156. {
  157. EmitVectorZeroUpper(context, op.Rd);
  158. }
  159. }
  160. public static void Sqshrn_S(ILEmitterCtx context)
  161. {
  162. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  163. }
  164. public static void Sqshrn_V(ILEmitterCtx context)
  165. {
  166. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  167. }
  168. public static void Sqshrun_S(ILEmitterCtx context)
  169. {
  170. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  171. }
  172. public static void Sqshrun_V(ILEmitterCtx context)
  173. {
  174. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  175. }
  176. public static void Srshl_V(ILEmitterCtx context)
  177. {
  178. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  179. int bytes = op.GetBitsCount() >> 3;
  180. int elems = bytes >> op.Size;
  181. for (int index = 0; index < elems; index++)
  182. {
  183. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  184. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  185. context.Emit(OpCodes.Ldc_I4_1);
  186. context.EmitLdc_I4(op.Size);
  187. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
  188. EmitVectorInsert(context, op.Rd, index, op.Size);
  189. }
  190. if (op.RegisterSize == RegisterSize.Simd64)
  191. {
  192. EmitVectorZeroUpper(context, op.Rd);
  193. }
  194. }
  195. public static void Srshr_S(ILEmitterCtx context)
  196. {
  197. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  198. }
  199. public static void Srshr_V(ILEmitterCtx context)
  200. {
  201. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  202. if (Optimizations.UseSse2 && op.Size > 0
  203. && op.Size < 3)
  204. {
  205. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  206. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  207. int shift = GetImmShr(op);
  208. int eSize = 8 << op.Size;
  209. context.EmitLdvec(op.Rn);
  210. context.Emit(OpCodes.Dup);
  211. context.EmitStvectmp();
  212. context.EmitLdc_I4(eSize - shift);
  213. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  214. context.EmitLdc_I4(eSize - 1);
  215. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  216. context.EmitLdvectmp();
  217. context.EmitLdc_I4(shift);
  218. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  219. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  220. context.EmitStvec(op.Rd);
  221. if (op.RegisterSize == RegisterSize.Simd64)
  222. {
  223. EmitVectorZeroUpper(context, op.Rd);
  224. }
  225. }
  226. else
  227. {
  228. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  229. }
  230. }
  231. public static void Srsra_S(ILEmitterCtx context)
  232. {
  233. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  234. }
  235. public static void Srsra_V(ILEmitterCtx context)
  236. {
  237. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  238. if (Optimizations.UseSse2 && op.Size > 0
  239. && op.Size < 3)
  240. {
  241. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  242. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  243. int shift = GetImmShr(op);
  244. int eSize = 8 << op.Size;
  245. context.EmitLdvec(op.Rd);
  246. context.EmitLdvec(op.Rn);
  247. context.Emit(OpCodes.Dup);
  248. context.EmitStvectmp();
  249. context.EmitLdc_I4(eSize - shift);
  250. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  251. context.EmitLdc_I4(eSize - 1);
  252. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  253. context.EmitLdvectmp();
  254. context.EmitLdc_I4(shift);
  255. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  256. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  257. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  258. context.EmitStvec(op.Rd);
  259. if (op.RegisterSize == RegisterSize.Simd64)
  260. {
  261. EmitVectorZeroUpper(context, op.Rd);
  262. }
  263. }
  264. else
  265. {
  266. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  267. }
  268. }
  269. public static void Sshl_V(ILEmitterCtx context)
  270. {
  271. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  272. int bytes = op.GetBitsCount() >> 3;
  273. int elems = bytes >> op.Size;
  274. for (int index = 0; index < elems; index++)
  275. {
  276. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  277. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  278. context.Emit(OpCodes.Ldc_I4_0);
  279. context.EmitLdc_I4(op.Size);
  280. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
  281. EmitVectorInsert(context, op.Rd, index, op.Size);
  282. }
  283. if (op.RegisterSize == RegisterSize.Simd64)
  284. {
  285. EmitVectorZeroUpper(context, op.Rd);
  286. }
  287. }
  288. public static void Sshll_V(ILEmitterCtx context)
  289. {
  290. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  291. int shift = GetImmShl(op);
  292. if (Optimizations.UseSse41)
  293. {
  294. Type[] typesSll = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  295. Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
  296. string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
  297. nameof(Sse41.ConvertToVector128Int32),
  298. nameof(Sse41.ConvertToVector128Int64) };
  299. int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
  300. context.EmitLdvec(op.Rn);
  301. context.EmitLdc_I4(numBytes);
  302. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
  303. context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
  304. context.EmitLdc_I4(shift);
  305. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  306. context.EmitStvec(op.Rd);
  307. }
  308. else
  309. {
  310. EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), shift);
  311. }
  312. }
  313. public static void Sshr_S(ILEmitterCtx context)
  314. {
  315. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  316. }
  317. public static void Sshr_V(ILEmitterCtx context)
  318. {
  319. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  320. if (Optimizations.UseSse2 && op.Size > 0
  321. && op.Size < 3)
  322. {
  323. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  324. context.EmitLdvec(op.Rn);
  325. context.EmitLdc_I4(GetImmShr(op));
  326. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  327. context.EmitStvec(op.Rd);
  328. if (op.RegisterSize == RegisterSize.Simd64)
  329. {
  330. EmitVectorZeroUpper(context, op.Rd);
  331. }
  332. }
  333. else
  334. {
  335. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  336. }
  337. }
  338. public static void Ssra_S(ILEmitterCtx context)
  339. {
  340. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  341. }
  342. public static void Ssra_V(ILEmitterCtx context)
  343. {
  344. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  345. if (Optimizations.UseSse2 && op.Size > 0
  346. && op.Size < 3)
  347. {
  348. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  349. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  350. context.EmitLdvec(op.Rd);
  351. context.EmitLdvec(op.Rn);
  352. context.EmitLdc_I4(GetImmShr(op));
  353. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  354. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  355. context.EmitStvec(op.Rd);
  356. if (op.RegisterSize == RegisterSize.Simd64)
  357. {
  358. EmitVectorZeroUpper(context, op.Rd);
  359. }
  360. }
  361. else
  362. {
  363. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  364. }
  365. }
  366. public static void Uqrshl_V(ILEmitterCtx context)
  367. {
  368. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  369. int bytes = op.GetBitsCount() >> 3;
  370. int elems = bytes >> op.Size;
  371. for (int index = 0; index < elems; index++)
  372. {
  373. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  374. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  375. context.Emit(OpCodes.Ldc_I4_1);
  376. context.EmitLdc_I4(op.Size);
  377. context.EmitLdarg(TranslatedSub.StateArgIdx);
  378. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
  379. EmitVectorInsert(context, op.Rd, index, op.Size);
  380. }
  381. if (op.RegisterSize == RegisterSize.Simd64)
  382. {
  383. EmitVectorZeroUpper(context, op.Rd);
  384. }
  385. }
  386. public static void Uqrshrn_S(ILEmitterCtx context)
  387. {
  388. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  389. }
  390. public static void Uqrshrn_V(ILEmitterCtx context)
  391. {
  392. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  393. }
  394. public static void Uqshl_V(ILEmitterCtx context)
  395. {
  396. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  397. int bytes = op.GetBitsCount() >> 3;
  398. int elems = bytes >> op.Size;
  399. for (int index = 0; index < elems; index++)
  400. {
  401. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  402. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  403. context.Emit(OpCodes.Ldc_I4_0);
  404. context.EmitLdc_I4(op.Size);
  405. context.EmitLdarg(TranslatedSub.StateArgIdx);
  406. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
  407. EmitVectorInsert(context, op.Rd, index, op.Size);
  408. }
  409. if (op.RegisterSize == RegisterSize.Simd64)
  410. {
  411. EmitVectorZeroUpper(context, op.Rd);
  412. }
  413. }
  414. public static void Uqshrn_S(ILEmitterCtx context)
  415. {
  416. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  417. }
  418. public static void Uqshrn_V(ILEmitterCtx context)
  419. {
  420. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  421. }
  422. public static void Urshl_V(ILEmitterCtx context)
  423. {
  424. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  425. int bytes = op.GetBitsCount() >> 3;
  426. int elems = bytes >> op.Size;
  427. for (int index = 0; index < elems; index++)
  428. {
  429. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  430. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  431. context.Emit(OpCodes.Ldc_I4_1);
  432. context.EmitLdc_I4(op.Size);
  433. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
  434. EmitVectorInsert(context, op.Rd, index, op.Size);
  435. }
  436. if (op.RegisterSize == RegisterSize.Simd64)
  437. {
  438. EmitVectorZeroUpper(context, op.Rd);
  439. }
  440. }
  441. public static void Urshr_S(ILEmitterCtx context)
  442. {
  443. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  444. }
  445. public static void Urshr_V(ILEmitterCtx context)
  446. {
  447. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  448. if (Optimizations.UseSse2 && op.Size > 0)
  449. {
  450. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  451. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  452. int shift = GetImmShr(op);
  453. int eSize = 8 << op.Size;
  454. context.EmitLdvec(op.Rn);
  455. context.Emit(OpCodes.Dup);
  456. context.EmitStvectmp();
  457. context.EmitLdc_I4(eSize - shift);
  458. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  459. context.EmitLdc_I4(eSize - 1);
  460. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  461. context.EmitLdvectmp();
  462. context.EmitLdc_I4(shift);
  463. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  464. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  465. context.EmitStvec(op.Rd);
  466. if (op.RegisterSize == RegisterSize.Simd64)
  467. {
  468. EmitVectorZeroUpper(context, op.Rd);
  469. }
  470. }
  471. else
  472. {
  473. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  474. }
  475. }
  476. public static void Ursra_S(ILEmitterCtx context)
  477. {
  478. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  479. }
  480. public static void Ursra_V(ILEmitterCtx context)
  481. {
  482. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  483. if (Optimizations.UseSse2 && op.Size > 0)
  484. {
  485. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  486. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  487. int shift = GetImmShr(op);
  488. int eSize = 8 << op.Size;
  489. context.EmitLdvec(op.Rd);
  490. context.EmitLdvec(op.Rn);
  491. context.Emit(OpCodes.Dup);
  492. context.EmitStvectmp();
  493. context.EmitLdc_I4(eSize - shift);
  494. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  495. context.EmitLdc_I4(eSize - 1);
  496. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  497. context.EmitLdvectmp();
  498. context.EmitLdc_I4(shift);
  499. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  500. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  501. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  502. context.EmitStvec(op.Rd);
  503. if (op.RegisterSize == RegisterSize.Simd64)
  504. {
  505. EmitVectorZeroUpper(context, op.Rd);
  506. }
  507. }
  508. else
  509. {
  510. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  511. }
  512. }
  513. public static void Ushl_V(ILEmitterCtx context)
  514. {
  515. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  516. int bytes = op.GetBitsCount() >> 3;
  517. int elems = bytes >> op.Size;
  518. for (int index = 0; index < elems; index++)
  519. {
  520. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  521. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  522. context.Emit(OpCodes.Ldc_I4_0);
  523. context.EmitLdc_I4(op.Size);
  524. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
  525. EmitVectorInsert(context, op.Rd, index, op.Size);
  526. }
  527. if (op.RegisterSize == RegisterSize.Simd64)
  528. {
  529. EmitVectorZeroUpper(context, op.Rd);
  530. }
  531. }
  532. public static void Ushll_V(ILEmitterCtx context)
  533. {
  534. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  535. int shift = GetImmShl(op);
  536. if (Optimizations.UseSse41)
  537. {
  538. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
  539. Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
  540. string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
  541. nameof(Sse41.ConvertToVector128Int32),
  542. nameof(Sse41.ConvertToVector128Int64) };
  543. int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
  544. context.EmitLdvec(op.Rn);
  545. context.EmitLdc_I4(numBytes);
  546. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
  547. context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
  548. context.EmitLdc_I4(shift);
  549. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  550. context.EmitStvec(op.Rd);
  551. }
  552. else
  553. {
  554. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
  555. }
  556. }
  557. public static void Ushr_S(ILEmitterCtx context)
  558. {
  559. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  560. }
  561. public static void Ushr_V(ILEmitterCtx context)
  562. {
  563. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  564. if (Optimizations.UseSse2 && op.Size > 0)
  565. {
  566. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  567. context.EmitLdvec(op.Rn);
  568. context.EmitLdc_I4(GetImmShr(op));
  569. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  570. context.EmitStvec(op.Rd);
  571. if (op.RegisterSize == RegisterSize.Simd64)
  572. {
  573. EmitVectorZeroUpper(context, op.Rd);
  574. }
  575. }
  576. else
  577. {
  578. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  579. }
  580. }
  581. public static void Usra_S(ILEmitterCtx context)
  582. {
  583. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  584. }
  585. public static void Usra_V(ILEmitterCtx context)
  586. {
  587. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  588. if (Optimizations.UseSse2 && op.Size > 0)
  589. {
  590. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  591. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  592. context.EmitLdvec(op.Rd);
  593. context.EmitLdvec(op.Rn);
  594. context.EmitLdc_I4(GetImmShr(op));
  595. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  596. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  597. context.EmitStvec(op.Rd);
  598. if (op.RegisterSize == RegisterSize.Simd64)
  599. {
  600. EmitVectorZeroUpper(context, op.Rd);
  601. }
  602. }
  603. else
  604. {
  605. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  606. }
  607. }
  608. [Flags]
  609. private enum ShrImmFlags
  610. {
  611. Scalar = 1 << 0,
  612. Signed = 1 << 1,
  613. Round = 1 << 2,
  614. Accumulate = 1 << 3,
  615. ScalarSx = Scalar | Signed,
  616. ScalarZx = Scalar,
  617. VectorSx = Signed,
  618. VectorZx = 0
  619. }
  620. private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  621. {
  622. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  623. }
  624. private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  625. {
  626. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  627. }
  628. private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  629. {
  630. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  631. }
  632. private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  633. {
  634. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  635. }
  636. private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags)
  637. {
  638. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  639. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  640. bool signed = (flags & ShrImmFlags.Signed) != 0;
  641. bool round = (flags & ShrImmFlags.Round) != 0;
  642. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  643. int shift = GetImmShr(op);
  644. long roundConst = 1L << (shift - 1);
  645. int bytes = op.GetBitsCount() >> 3;
  646. int elems = !scalar ? bytes >> op.Size : 1;
  647. for (int index = 0; index < elems; index++)
  648. {
  649. EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  650. if (op.Size <= 2)
  651. {
  652. if (round)
  653. {
  654. context.EmitLdc_I8(roundConst);
  655. context.Emit(OpCodes.Add);
  656. }
  657. context.EmitLdc_I4(shift);
  658. context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un);
  659. }
  660. else /* if (op.Size == 3) */
  661. {
  662. EmitShrImm64(context, signed, round ? roundConst : 0L, shift);
  663. }
  664. if (accumulate)
  665. {
  666. EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  667. context.Emit(OpCodes.Add);
  668. }
  669. EmitVectorInsertTmp(context, index, op.Size);
  670. }
  671. context.EmitLdvectmp();
  672. context.EmitStvec(op.Rd);
  673. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  674. {
  675. EmitVectorZeroUpper(context, op.Rd);
  676. }
  677. }
  678. private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round)
  679. {
  680. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  681. int shift = GetImmShr(op);
  682. long roundConst = 1L << (shift - 1);
  683. int elems = 8 >> op.Size;
  684. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  685. if (part != 0)
  686. {
  687. context.EmitLdvec(op.Rd);
  688. context.EmitStvectmp();
  689. }
  690. for (int index = 0; index < elems; index++)
  691. {
  692. EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  693. if (round)
  694. {
  695. context.EmitLdc_I8(roundConst);
  696. context.Emit(OpCodes.Add);
  697. }
  698. context.EmitLdc_I4(shift);
  699. context.Emit(OpCodes.Shr_Un);
  700. EmitVectorInsertTmp(context, part + index, op.Size);
  701. }
  702. context.EmitLdvectmp();
  703. context.EmitStvec(op.Rd);
  704. if (part == 0)
  705. {
  706. EmitVectorZeroUpper(context, op.Rd);
  707. }
  708. }
  709. [Flags]
  710. private enum ShrImmSaturatingNarrowFlags
  711. {
  712. Scalar = 1 << 0,
  713. SignedSrc = 1 << 1,
  714. SignedDst = 1 << 2,
  715. Round = 1 << 3,
  716. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  717. ScalarSxZx = Scalar | SignedSrc,
  718. ScalarZxZx = Scalar,
  719. VectorSxSx = SignedSrc | SignedDst,
  720. VectorSxZx = SignedSrc,
  721. VectorZxZx = 0
  722. }
  723. private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  724. {
  725. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  726. }
  727. private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  728. {
  729. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  730. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  731. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  732. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  733. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  734. int shift = GetImmShr(op);
  735. long roundConst = 1L << (shift - 1);
  736. int elems = !scalar ? 8 >> op.Size : 1;
  737. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  738. if (scalar)
  739. {
  740. EmitVectorZeroLowerTmp(context);
  741. }
  742. if (part != 0)
  743. {
  744. context.EmitLdvec(op.Rd);
  745. context.EmitStvectmp();
  746. }
  747. for (int index = 0; index < elems; index++)
  748. {
  749. EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  750. if (op.Size <= 1 || !round)
  751. {
  752. if (round)
  753. {
  754. context.EmitLdc_I8(roundConst);
  755. context.Emit(OpCodes.Add);
  756. }
  757. context.EmitLdc_I4(shift);
  758. context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
  759. }
  760. else /* if (op.Size == 2 && round) */
  761. {
  762. EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32
  763. }
  764. EmitSatQ(context, op.Size, signedSrc, signedDst);
  765. EmitVectorInsertTmp(context, part + index, op.Size);
  766. }
  767. context.EmitLdvectmp();
  768. context.EmitStvec(op.Rd);
  769. if (part == 0)
  770. {
  771. EmitVectorZeroUpper(context, op.Rd);
  772. }
  773. }
  774. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  775. private static void EmitShrImm64(
  776. ILEmitterCtx context,
  777. bool signed,
  778. long roundConst,
  779. int shift)
  780. {
  781. context.EmitLdc_I8(roundConst);
  782. context.EmitLdc_I4(shift);
  783. SoftFallback.EmitCall(context, signed
  784. ? nameof(SoftFallback.SignedShrImm64)
  785. : nameof(SoftFallback.UnsignedShrImm64));
  786. }
  787. private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm)
  788. {
  789. EmitVectorShImmWidenBinaryOp(context, emit, imm, true);
  790. }
  791. private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm)
  792. {
  793. EmitVectorShImmWidenBinaryOp(context, emit, imm, false);
  794. }
  795. private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed)
  796. {
  797. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  798. int elems = 8 >> op.Size;
  799. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  800. for (int index = 0; index < elems; index++)
  801. {
  802. EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  803. context.EmitLdc_I4(imm);
  804. emit();
  805. EmitVectorInsertTmp(context, index, op.Size + 1);
  806. }
  807. context.EmitLdvectmp();
  808. context.EmitStvec(op.Rd);
  809. }
  810. }
  811. }