InstEmitSimdArithmetic.cs 109 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. // https://www.agner.org/optimize/#vectorclass @ vectori128.h
  3. using ARMeilleure.Decoders;
  4. using ARMeilleure.IntermediateRepresentation;
  5. using ARMeilleure.State;
  6. using ARMeilleure.Translation;
  7. using System;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  11. namespace ARMeilleure.Instructions
  12. {
  13. using Func2I = Func<Operand, Operand, Operand>;
  14. static partial class InstEmit
  15. {
  16. public static void Abs_S(ArmEmitterContext context)
  17. {
  18. EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
  19. }
  20. public static void Abs_V(ArmEmitterContext context)
  21. {
  22. EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
  23. }
  24. public static void Add_S(ArmEmitterContext context)
  25. {
  26. EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
  27. }
  28. public static void Add_V(ArmEmitterContext context)
  29. {
  30. if (Optimizations.UseSse2)
  31. {
  32. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  33. Operand n = GetVec(op.Rn);
  34. Operand m = GetVec(op.Rm);
  35. Intrinsic addInst = X86PaddInstruction[op.Size];
  36. Operand res = context.AddIntrinsic(addInst, n, m);
  37. if (op.RegisterSize == RegisterSize.Simd64)
  38. {
  39. res = context.VectorZeroUpper64(res);
  40. }
  41. context.Copy(GetVec(op.Rd), res);
  42. }
  43. else
  44. {
  45. EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
  46. }
  47. }
  48. public static void Addhn_V(ArmEmitterContext context)
  49. {
  50. EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
  51. }
  52. public static void Addp_S(ArmEmitterContext context)
  53. {
  54. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  55. Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  56. Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
  57. Operand res = context.Add(ne0, ne1);
  58. context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
  59. }
  60. public static void Addp_V(ArmEmitterContext context)
  61. {
  62. EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
  63. }
  64. public static void Addv_V(ArmEmitterContext context)
  65. {
  66. EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
  67. }
  68. public static void Cls_V(ArmEmitterContext context)
  69. {
  70. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  71. Operand res = context.VectorZero();
  72. int elems = op.GetBytesCount() >> op.Size;
  73. int eSize = 8 << op.Size;
  74. for (int index = 0; index < elems; index++)
  75. {
  76. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  77. Operand de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingSigns), ne, Const(eSize));
  78. res = EmitVectorInsert(context, res, de, index, op.Size);
  79. }
  80. context.Copy(GetVec(op.Rd), res);
  81. }
  82. public static void Clz_V(ArmEmitterContext context)
  83. {
  84. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  85. Operand res = context.VectorZero();
  86. int elems = op.GetBytesCount() >> op.Size;
  87. int eSize = 8 << op.Size;
  88. for (int index = 0; index < elems; index++)
  89. {
  90. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  91. Operand de;
  92. if (eSize == 64)
  93. {
  94. de = context.CountLeadingZeros(ne);
  95. }
  96. else
  97. {
  98. de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingZeros), ne, Const(eSize));
  99. }
  100. res = EmitVectorInsert(context, res, de, index, op.Size);
  101. }
  102. context.Copy(GetVec(op.Rd), res);
  103. }
  104. public static void Cnt_V(ArmEmitterContext context)
  105. {
  106. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  107. Operand res = context.VectorZero();
  108. int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
  109. for (int index = 0; index < elems; index++)
  110. {
  111. Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
  112. Operand de;
  113. if (Optimizations.UsePopCnt)
  114. {
  115. de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
  116. }
  117. else
  118. {
  119. de = context.Call(new _U64_U64(SoftFallback.CountSetBits8), ne);
  120. }
  121. res = EmitVectorInsert(context, res, de, index, 0);
  122. }
  123. context.Copy(GetVec(op.Rd), res);
  124. }
  125. public static void Fabd_S(ArmEmitterContext context)
  126. {
  127. if (Optimizations.FastFP && Optimizations.UseSse2)
  128. {
  129. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  130. int sizeF = op.Size & 1;
  131. if (sizeF == 0)
  132. {
  133. Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
  134. Operand mask = X86GetScalar(context, -0f);
  135. res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
  136. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  137. }
  138. else /* if (sizeF == 1) */
  139. {
  140. Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
  141. Operand mask = X86GetScalar(context, -0d);
  142. res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
  143. context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
  144. }
  145. }
  146. else
  147. {
  148. EmitScalarBinaryOpF(context, (op1, op2) =>
  149. {
  150. Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
  151. return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
  152. });
  153. }
  154. }
  155. public static void Fabd_V(ArmEmitterContext context)
  156. {
  157. if (Optimizations.FastFP && Optimizations.UseSse2)
  158. {
  159. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  160. int sizeF = op.Size & 1;
  161. if (sizeF == 0)
  162. {
  163. Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
  164. Operand mask = X86GetAllElements(context, -0f);
  165. res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
  166. if (op.RegisterSize == RegisterSize.Simd64)
  167. {
  168. res = context.VectorZeroUpper64(res);
  169. }
  170. context.Copy(GetVec(op.Rd), res);
  171. }
  172. else /* if (sizeF == 1) */
  173. {
  174. Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
  175. Operand mask = X86GetAllElements(context, -0d);
  176. res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
  177. context.Copy(GetVec(op.Rd), res);
  178. }
  179. }
  180. else
  181. {
  182. EmitVectorBinaryOpF(context, (op1, op2) =>
  183. {
  184. Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
  185. return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
  186. });
  187. }
  188. }
  189. public static void Fabs_S(ArmEmitterContext context)
  190. {
  191. if (Optimizations.UseSse2)
  192. {
  193. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  194. if (op.Size == 0)
  195. {
  196. Operand mask = X86GetScalar(context, -0f);
  197. Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
  198. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  199. }
  200. else /* if (op.Size == 1) */
  201. {
  202. Operand mask = X86GetScalar(context, -0d);
  203. Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
  204. context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
  205. }
  206. }
  207. else
  208. {
  209. EmitScalarUnaryOpF(context, (op1) =>
  210. {
  211. return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
  212. });
  213. }
  214. }
  215. public static void Fabs_V(ArmEmitterContext context)
  216. {
  217. if (Optimizations.UseSse2)
  218. {
  219. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  220. int sizeF = op.Size & 1;
  221. if (sizeF == 0)
  222. {
  223. Operand mask = X86GetAllElements(context, -0f);
  224. Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
  225. if (op.RegisterSize == RegisterSize.Simd64)
  226. {
  227. res = context.VectorZeroUpper64(res);
  228. }
  229. context.Copy(GetVec(op.Rd), res);
  230. }
  231. else /* if (sizeF == 1) */
  232. {
  233. Operand mask = X86GetAllElements(context, -0d);
  234. Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
  235. context.Copy(GetVec(op.Rd), res);
  236. }
  237. }
  238. else
  239. {
  240. EmitVectorUnaryOpF(context, (op1) =>
  241. {
  242. return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
  243. });
  244. }
  245. }
  246. public static void Fadd_S(ArmEmitterContext context)
  247. {
  248. if (Optimizations.FastFP && Optimizations.UseSse2)
  249. {
  250. EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
  251. }
  252. else if (Optimizations.FastFP)
  253. {
  254. EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
  255. }
  256. else
  257. {
  258. EmitScalarBinaryOpF(context, (op1, op2) =>
  259. {
  260. return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
  261. });
  262. }
  263. }
  264. public static void Fadd_V(ArmEmitterContext context)
  265. {
  266. if (Optimizations.FastFP && Optimizations.UseSse2)
  267. {
  268. EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
  269. }
  270. else if (Optimizations.FastFP)
  271. {
  272. EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
  273. }
  274. else
  275. {
  276. EmitVectorBinaryOpF(context, (op1, op2) =>
  277. {
  278. return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
  279. });
  280. }
  281. }
  282. public static void Faddp_S(ArmEmitterContext context)
  283. {
  284. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  285. int sizeF = op.Size & 1;
  286. if (Optimizations.FastFP && Optimizations.UseSse3)
  287. {
  288. if (sizeF == 0)
  289. {
  290. Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
  291. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  292. }
  293. else /* if (sizeF == 1) */
  294. {
  295. Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
  296. context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
  297. }
  298. }
  299. else
  300. {
  301. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  302. Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
  303. Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
  304. Operand res = EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, ne0, ne1);
  305. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  306. }
  307. }
  308. public static void Faddp_V(ArmEmitterContext context)
  309. {
  310. if (Optimizations.FastFP && Optimizations.UseSse2)
  311. {
  312. EmitVectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
  313. }
  314. else
  315. {
  316. EmitVectorPairwiseOpF(context, (op1, op2) =>
  317. {
  318. return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
  319. });
  320. }
  321. }
  322. public static void Fdiv_S(ArmEmitterContext context)
  323. {
  324. if (Optimizations.FastFP && Optimizations.UseSse2)
  325. {
  326. EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
  327. }
  328. else if (Optimizations.FastFP)
  329. {
  330. EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
  331. }
  332. else
  333. {
  334. EmitScalarBinaryOpF(context, (op1, op2) =>
  335. {
  336. return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
  337. });
  338. }
  339. }
  340. public static void Fdiv_V(ArmEmitterContext context)
  341. {
  342. if (Optimizations.FastFP && Optimizations.UseSse2)
  343. {
  344. EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
  345. }
  346. else if (Optimizations.FastFP)
  347. {
  348. EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
  349. }
  350. else
  351. {
  352. EmitVectorBinaryOpF(context, (op1, op2) =>
  353. {
  354. return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
  355. });
  356. }
  357. }
  358. public static void Fmadd_S(ArmEmitterContext context) // Fused.
  359. {
  360. if (Optimizations.FastFP && Optimizations.UseSse2)
  361. {
  362. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  363. Operand d = GetVec(op.Rd);
  364. Operand a = GetVec(op.Ra);
  365. Operand n = GetVec(op.Rn);
  366. Operand m = GetVec(op.Rm);
  367. if (op.Size == 0)
  368. {
  369. Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
  370. res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
  371. context.Copy(d, context.VectorZeroUpper96(res));
  372. }
  373. else /* if (op.Size == 1) */
  374. {
  375. Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
  376. res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
  377. context.Copy(d, context.VectorZeroUpper64(res));
  378. }
  379. }
  380. else
  381. {
  382. EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
  383. {
  384. return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
  385. });
  386. }
  387. }
  388. public static void Fmax_S(ArmEmitterContext context)
  389. {
  390. if (Optimizations.FastFP && Optimizations.UseSse2)
  391. {
  392. EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd);
  393. }
  394. else
  395. {
  396. EmitScalarBinaryOpF(context, (op1, op2) =>
  397. {
  398. return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
  399. });
  400. }
  401. }
  402. public static void Fmax_V(ArmEmitterContext context)
  403. {
  404. if (Optimizations.FastFP && Optimizations.UseSse2)
  405. {
  406. EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
  407. }
  408. else
  409. {
  410. EmitVectorBinaryOpF(context, (op1, op2) =>
  411. {
  412. return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
  413. });
  414. }
  415. }
  416. public static void Fmaxnm_S(ArmEmitterContext context)
  417. {
  418. if (Optimizations.FastFP && Optimizations.UseSse41)
  419. {
  420. EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true);
  421. }
  422. else
  423. {
  424. EmitScalarBinaryOpF(context, (op1, op2) =>
  425. {
  426. return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
  427. });
  428. }
  429. }
  430. public static void Fmaxnm_V(ArmEmitterContext context)
  431. {
  432. if (Optimizations.FastFP && Optimizations.UseSse41)
  433. {
  434. EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false);
  435. }
  436. else
  437. {
  438. EmitVectorBinaryOpF(context, (op1, op2) =>
  439. {
  440. return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
  441. });
  442. }
  443. }
  444. public static void Fmaxp_V(ArmEmitterContext context)
  445. {
  446. if (Optimizations.FastFP && Optimizations.UseSse2)
  447. {
  448. EmitVectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
  449. }
  450. else
  451. {
  452. EmitVectorPairwiseOpF(context, (op1, op2) =>
  453. {
  454. return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
  455. });
  456. }
  457. }
  458. public static void Fmin_S(ArmEmitterContext context)
  459. {
  460. if (Optimizations.FastFP && Optimizations.UseSse2)
  461. {
  462. EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd);
  463. }
  464. else
  465. {
  466. EmitScalarBinaryOpF(context, (op1, op2) =>
  467. {
  468. return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
  469. });
  470. }
  471. }
  472. public static void Fmin_V(ArmEmitterContext context)
  473. {
  474. if (Optimizations.FastFP && Optimizations.UseSse2)
  475. {
  476. EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
  477. }
  478. else
  479. {
  480. EmitVectorBinaryOpF(context, (op1, op2) =>
  481. {
  482. return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
  483. });
  484. }
  485. }
  486. public static void Fminnm_S(ArmEmitterContext context)
  487. {
  488. if (Optimizations.FastFP && Optimizations.UseSse41)
  489. {
  490. EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true);
  491. }
  492. else
  493. {
  494. EmitScalarBinaryOpF(context, (op1, op2) =>
  495. {
  496. return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
  497. });
  498. }
  499. }
  500. public static void Fminnm_V(ArmEmitterContext context)
  501. {
  502. if (Optimizations.FastFP && Optimizations.UseSse41)
  503. {
  504. EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false);
  505. }
  506. else
  507. {
  508. EmitVectorBinaryOpF(context, (op1, op2) =>
  509. {
  510. return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
  511. });
  512. }
  513. }
  514. public static void Fminp_V(ArmEmitterContext context)
  515. {
  516. if (Optimizations.FastFP && Optimizations.UseSse2)
  517. {
  518. EmitVectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
  519. }
  520. else
  521. {
  522. EmitVectorPairwiseOpF(context, (op1, op2) =>
  523. {
  524. return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
  525. });
  526. }
  527. }
  528. public static void Fmla_Se(ArmEmitterContext context) // Fused.
  529. {
  530. EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
  531. {
  532. return context.Add(op1, context.Multiply(op2, op3));
  533. });
  534. }
  535. public static void Fmla_V(ArmEmitterContext context) // Fused.
  536. {
  537. if (Optimizations.FastFP && Optimizations.UseSse2)
  538. {
  539. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  540. Operand d = GetVec(op.Rd);
  541. Operand n = GetVec(op.Rn);
  542. Operand m = GetVec(op.Rm);
  543. int sizeF = op.Size & 1;
  544. if (sizeF == 0)
  545. {
  546. Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
  547. res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
  548. if (op.RegisterSize == RegisterSize.Simd64)
  549. {
  550. res = context.VectorZeroUpper64(res);
  551. }
  552. context.Copy(GetVec(op.Rd), res);
  553. }
  554. else /* if (sizeF == 1) */
  555. {
  556. Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
  557. res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
  558. context.Copy(GetVec(op.Rd), res);
  559. }
  560. }
  561. else
  562. {
  563. EmitVectorTernaryOpF(context, (op1, op2, op3) =>
  564. {
  565. return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
  566. });
  567. }
  568. }
  569. public static void Fmla_Ve(ArmEmitterContext context) // Fused.
  570. {
  571. if (Optimizations.FastFP && Optimizations.UseSse2)
  572. {
  573. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  574. Operand d = GetVec(op.Rd);
  575. Operand n = GetVec(op.Rn);
  576. Operand m = GetVec(op.Rm);
  577. int sizeF = op.Size & 1;
  578. if (sizeF == 0)
  579. {
  580. int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
  581. Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
  582. res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
  583. res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
  584. if (op.RegisterSize == RegisterSize.Simd64)
  585. {
  586. res = context.VectorZeroUpper64(res);
  587. }
  588. context.Copy(GetVec(op.Rd), res);
  589. }
  590. else /* if (sizeF == 1) */
  591. {
  592. int shuffleMask = op.Index | op.Index << 1;
  593. Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
  594. res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
  595. res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
  596. context.Copy(GetVec(op.Rd), res);
  597. }
  598. }
  599. else
  600. {
  601. EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
  602. {
  603. return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
  604. });
  605. }
  606. }
  607. public static void Fmls_Se(ArmEmitterContext context) // Fused.
  608. {
  609. EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
  610. {
  611. return context.Subtract(op1, context.Multiply(op2, op3));
  612. });
  613. }
  614. public static void Fmls_V(ArmEmitterContext context) // Fused.
  615. {
  616. if (Optimizations.FastFP && Optimizations.UseSse2)
  617. {
  618. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  619. Operand d = GetVec(op.Rd);
  620. Operand n = GetVec(op.Rn);
  621. Operand m = GetVec(op.Rm);
  622. int sizeF = op.Size & 1;
  623. if (sizeF == 0)
  624. {
  625. Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
  626. res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
  627. if (op.RegisterSize == RegisterSize.Simd64)
  628. {
  629. res = context.VectorZeroUpper64(res);
  630. }
  631. context.Copy(GetVec(op.Rd), res);
  632. }
  633. else /* if (sizeF == 1) */
  634. {
  635. Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
  636. res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
  637. context.Copy(GetVec(op.Rd), res);
  638. }
  639. }
  640. else
  641. {
  642. EmitVectorTernaryOpF(context, (op1, op2, op3) =>
  643. {
  644. return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
  645. });
  646. }
  647. }
  648. public static void Fmls_Ve(ArmEmitterContext context) // Fused.
  649. {
  650. if (Optimizations.FastFP && Optimizations.UseSse2)
  651. {
  652. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  653. Operand d = GetVec(op.Rd);
  654. Operand n = GetVec(op.Rn);
  655. Operand m = GetVec(op.Rm);
  656. int sizeF = op.Size & 1;
  657. if (sizeF == 0)
  658. {
  659. int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
  660. Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
  661. res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
  662. res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
  663. if (op.RegisterSize == RegisterSize.Simd64)
  664. {
  665. res = context.VectorZeroUpper64(res);
  666. }
  667. context.Copy(GetVec(op.Rd), res);
  668. }
  669. else /* if (sizeF == 1) */
  670. {
  671. int shuffleMask = op.Index | op.Index << 1;
  672. Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
  673. res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
  674. res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
  675. context.Copy(GetVec(op.Rd), res);
  676. }
  677. }
  678. else
  679. {
  680. EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
  681. {
  682. return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
  683. });
  684. }
  685. }
  686. public static void Fmsub_S(ArmEmitterContext context) // Fused.
  687. {
  688. if (Optimizations.FastFP && Optimizations.UseSse2)
  689. {
  690. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  691. Operand d = GetVec(op.Rd);
  692. Operand a = GetVec(op.Ra);
  693. Operand n = GetVec(op.Rn);
  694. Operand m = GetVec(op.Rm);
  695. if (op.Size == 0)
  696. {
  697. Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
  698. res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
  699. context.Copy(d, context.VectorZeroUpper96(res));
  700. }
  701. else /* if (op.Size == 1) */
  702. {
  703. Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
  704. res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
  705. context.Copy(d, context.VectorZeroUpper64(res));
  706. }
  707. }
  708. else
  709. {
  710. EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
  711. {
  712. return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
  713. });
  714. }
  715. }
  716. public static void Fmul_S(ArmEmitterContext context)
  717. {
  718. if (Optimizations.FastFP && Optimizations.UseSse2)
  719. {
  720. EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
  721. }
  722. else if (Optimizations.FastFP)
  723. {
  724. EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
  725. }
  726. else
  727. {
  728. EmitScalarBinaryOpF(context, (op1, op2) =>
  729. {
  730. return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
  731. });
  732. }
  733. }
  734. public static void Fmul_Se(ArmEmitterContext context)
  735. {
  736. EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
  737. }
  738. public static void Fmul_V(ArmEmitterContext context)
  739. {
  740. if (Optimizations.FastFP && Optimizations.UseSse2)
  741. {
  742. EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
  743. }
  744. else if (Optimizations.FastFP)
  745. {
  746. EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
  747. }
  748. else
  749. {
  750. EmitVectorBinaryOpF(context, (op1, op2) =>
  751. {
  752. return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
  753. });
  754. }
  755. }
  756. public static void Fmul_Ve(ArmEmitterContext context)
  757. {
  758. if (Optimizations.FastFP && Optimizations.UseSse2)
  759. {
  760. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  761. Operand n = GetVec(op.Rn);
  762. Operand m = GetVec(op.Rm);
  763. int sizeF = op.Size & 1;
  764. if (sizeF == 0)
  765. {
  766. int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
  767. Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
  768. res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
  769. if (op.RegisterSize == RegisterSize.Simd64)
  770. {
  771. res = context.VectorZeroUpper64(res);
  772. }
  773. context.Copy(GetVec(op.Rd), res);
  774. }
  775. else /* if (sizeF == 1) */
  776. {
  777. int shuffleMask = op.Index | op.Index << 1;
  778. Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
  779. res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
  780. context.Copy(GetVec(op.Rd), res);
  781. }
  782. }
  783. else if (Optimizations.FastFP)
  784. {
  785. EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
  786. }
  787. else
  788. {
  789. EmitVectorBinaryOpByElemF(context, (op1, op2) =>
  790. {
  791. return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
  792. });
  793. }
  794. }
  795. public static void Fmulx_S(ArmEmitterContext context)
  796. {
  797. EmitScalarBinaryOpF(context, (op1, op2) =>
  798. {
  799. return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
  800. });
  801. }
  802. public static void Fmulx_Se(ArmEmitterContext context)
  803. {
  804. EmitScalarBinaryOpByElemF(context, (op1, op2) =>
  805. {
  806. return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
  807. });
  808. }
  809. public static void Fmulx_V(ArmEmitterContext context)
  810. {
  811. EmitVectorBinaryOpF(context, (op1, op2) =>
  812. {
  813. return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
  814. });
  815. }
  816. public static void Fmulx_Ve(ArmEmitterContext context)
  817. {
  818. EmitVectorBinaryOpByElemF(context, (op1, op2) =>
  819. {
  820. return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
  821. });
  822. }
  823. public static void Fneg_S(ArmEmitterContext context)
  824. {
  825. if (Optimizations.UseSse2)
  826. {
  827. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  828. if (op.Size == 0)
  829. {
  830. Operand mask = X86GetScalar(context, -0f);
  831. Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
  832. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  833. }
  834. else /* if (op.Size == 1) */
  835. {
  836. Operand mask = X86GetScalar(context, -0d);
  837. Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
  838. context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
  839. }
  840. }
  841. else
  842. {
  843. EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
  844. }
  845. }
  846. public static void Fneg_V(ArmEmitterContext context)
  847. {
  848. if (Optimizations.UseSse2)
  849. {
  850. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  851. int sizeF = op.Size & 1;
  852. if (sizeF == 0)
  853. {
  854. Operand mask = X86GetAllElements(context, -0f);
  855. Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
  856. if (op.RegisterSize == RegisterSize.Simd64)
  857. {
  858. res = context.VectorZeroUpper64(res);
  859. }
  860. context.Copy(GetVec(op.Rd), res);
  861. }
  862. else /* if (sizeF == 1) */
  863. {
  864. Operand mask = X86GetAllElements(context, -0d);
  865. Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
  866. context.Copy(GetVec(op.Rd), res);
  867. }
  868. }
  869. else
  870. {
  871. EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
  872. }
  873. }
  874. public static void Fnmadd_S(ArmEmitterContext context) // Fused.
  875. {
  876. if (Optimizations.FastFP && Optimizations.UseSse2)
  877. {
  878. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  879. Operand d = GetVec(op.Rd);
  880. Operand a = GetVec(op.Ra);
  881. Operand n = GetVec(op.Rn);
  882. Operand m = GetVec(op.Rm);
  883. if (op.Size == 0)
  884. {
  885. Operand mask = X86GetScalar(context, -0f);
  886. Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
  887. Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
  888. res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
  889. context.Copy(d, context.VectorZeroUpper96(res));
  890. }
  891. else /* if (op.Size == 1) */
  892. {
  893. Operand mask = X86GetScalar(context, -0d);
  894. Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
  895. Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
  896. res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
  897. context.Copy(d, context.VectorZeroUpper64(res));
  898. }
  899. }
  900. else
  901. {
  902. EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
  903. {
  904. return EmitSoftFloatCall(context, SoftFloat32.FPNegMulAdd, SoftFloat64.FPNegMulAdd, op1, op2, op3);
  905. });
  906. }
  907. }
  908. public static void Fnmsub_S(ArmEmitterContext context) // Fused.
  909. {
  910. if (Optimizations.FastFP && Optimizations.UseSse2)
  911. {
  912. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  913. Operand d = GetVec(op.Rd);
  914. Operand a = GetVec(op.Ra);
  915. Operand n = GetVec(op.Rn);
  916. Operand m = GetVec(op.Rm);
  917. if (op.Size == 0)
  918. {
  919. Operand mask = X86GetScalar(context, -0f);
  920. Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
  921. Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
  922. res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
  923. context.Copy(d, context.VectorZeroUpper96(res));
  924. }
  925. else /* if (op.Size == 1) */
  926. {
  927. Operand mask = X86GetScalar(context, -0d);
  928. Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
  929. Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
  930. res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
  931. context.Copy(d, context.VectorZeroUpper64(res));
  932. }
  933. }
  934. else
  935. {
  936. EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
  937. {
  938. return EmitSoftFloatCall(context, SoftFloat32.FPNegMulSub, SoftFloat64.FPNegMulSub, op1, op2, op3);
  939. });
  940. }
  941. }
  942. public static void Fnmul_S(ArmEmitterContext context)
  943. {
  944. EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
  945. }
  946. public static void Frecpe_S(ArmEmitterContext context)
  947. {
  948. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  949. int sizeF = op.Size & 1;
  950. if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
  951. {
  952. EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
  953. }
  954. else
  955. {
  956. EmitScalarUnaryOpF(context, (op1) =>
  957. {
  958. return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
  959. });
  960. }
  961. }
  962. public static void Frecpe_V(ArmEmitterContext context)
  963. {
  964. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  965. int sizeF = op.Size & 1;
  966. if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
  967. {
  968. EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
  969. }
  970. else
  971. {
  972. EmitVectorUnaryOpF(context, (op1) =>
  973. {
  974. return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
  975. });
  976. }
  977. }
  978. public static void Frecps_S(ArmEmitterContext context) // Fused.
  979. {
  980. if (Optimizations.FastFP && Optimizations.UseSse2)
  981. {
  982. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  983. int sizeF = op.Size & 1;
  984. if (sizeF == 0)
  985. {
  986. Operand mask = X86GetScalar(context, 2f);
  987. Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
  988. res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
  989. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  990. }
  991. else /* if (sizeF == 1) */
  992. {
  993. Operand mask = X86GetScalar(context, 2d);
  994. Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
  995. res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
  996. context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
  997. }
  998. }
  999. else
  1000. {
  1001. EmitScalarBinaryOpF(context, (op1, op2) =>
  1002. {
  1003. return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
  1004. });
  1005. }
  1006. }
  1007. public static void Frecps_V(ArmEmitterContext context) // Fused.
  1008. {
  1009. if (Optimizations.FastFP && Optimizations.UseSse2)
  1010. {
  1011. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1012. int sizeF = op.Size & 1;
  1013. if (sizeF == 0)
  1014. {
  1015. Operand mask = X86GetAllElements(context, 2f);
  1016. Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
  1017. res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
  1018. if (op.RegisterSize == RegisterSize.Simd64)
  1019. {
  1020. res = context.VectorZeroUpper64(res);
  1021. }
  1022. context.Copy(GetVec(op.Rd), res);
  1023. }
  1024. else /* if (sizeF == 1) */
  1025. {
  1026. Operand mask = X86GetAllElements(context, 2d);
  1027. Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
  1028. res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
  1029. context.Copy(GetVec(op.Rd), res);
  1030. }
  1031. }
  1032. else
  1033. {
  1034. EmitVectorBinaryOpF(context, (op1, op2) =>
  1035. {
  1036. return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
  1037. });
  1038. }
  1039. }
  1040. public static void Frecpx_S(ArmEmitterContext context)
  1041. {
  1042. EmitScalarUnaryOpF(context, (op1) =>
  1043. {
  1044. return EmitSoftFloatCall(context, SoftFloat32.FPRecpX, SoftFloat64.FPRecpX, op1);
  1045. });
  1046. }
  1047. public static void Frinta_S(ArmEmitterContext context)
  1048. {
  1049. EmitScalarUnaryOpF(context, (op1) =>
  1050. {
  1051. return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
  1052. });
  1053. }
  1054. public static void Frinta_V(ArmEmitterContext context)
  1055. {
  1056. EmitVectorUnaryOpF(context, (op1) =>
  1057. {
  1058. return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
  1059. });
  1060. }
  1061. public static void Frinti_S(ArmEmitterContext context)
  1062. {
  1063. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1064. EmitScalarUnaryOpF(context, (op1) =>
  1065. {
  1066. if (op.Size == 0)
  1067. {
  1068. return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
  1069. }
  1070. else /* if (op.Size == 1) */
  1071. {
  1072. return context.Call(new _F64_F64(SoftFallback.Round), op1);
  1073. }
  1074. });
  1075. }
  1076. public static void Frinti_V(ArmEmitterContext context)
  1077. {
  1078. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1079. int sizeF = op.Size & 1;
  1080. EmitVectorUnaryOpF(context, (op1) =>
  1081. {
  1082. if (sizeF == 0)
  1083. {
  1084. return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
  1085. }
  1086. else /* if (sizeF == 1) */
  1087. {
  1088. return context.Call(new _F64_F64(SoftFallback.Round), op1);
  1089. }
  1090. });
  1091. }
  1092. public static void Frintm_S(ArmEmitterContext context)
  1093. {
  1094. if (Optimizations.UseSse41)
  1095. {
  1096. EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
  1097. }
  1098. else
  1099. {
  1100. EmitScalarUnaryOpF(context, (op1) =>
  1101. {
  1102. return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
  1103. });
  1104. }
  1105. }
  1106. public static void Frintm_V(ArmEmitterContext context)
  1107. {
  1108. if (Optimizations.UseSse41)
  1109. {
  1110. EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
  1111. }
  1112. else
  1113. {
  1114. EmitVectorUnaryOpF(context, (op1) =>
  1115. {
  1116. return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
  1117. });
  1118. }
  1119. }
  1120. public static void Frintn_S(ArmEmitterContext context)
  1121. {
  1122. if (Optimizations.UseSse41)
  1123. {
  1124. EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
  1125. }
  1126. else
  1127. {
  1128. EmitScalarUnaryOpF(context, (op1) =>
  1129. {
  1130. return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
  1131. });
  1132. }
  1133. }
  1134. public static void Frintn_V(ArmEmitterContext context)
  1135. {
  1136. if (Optimizations.UseSse41)
  1137. {
  1138. EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
  1139. }
  1140. else
  1141. {
  1142. EmitVectorUnaryOpF(context, (op1) =>
  1143. {
  1144. return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
  1145. });
  1146. }
  1147. }
  1148. public static void Frintp_S(ArmEmitterContext context)
  1149. {
  1150. if (Optimizations.UseSse41)
  1151. {
  1152. EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
  1153. }
  1154. else
  1155. {
  1156. EmitScalarUnaryOpF(context, (op1) =>
  1157. {
  1158. return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
  1159. });
  1160. }
  1161. }
  1162. public static void Frintp_V(ArmEmitterContext context)
  1163. {
  1164. if (Optimizations.UseSse41)
  1165. {
  1166. EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
  1167. }
  1168. else
  1169. {
  1170. EmitVectorUnaryOpF(context, (op1) =>
  1171. {
  1172. return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
  1173. });
  1174. }
  1175. }
  1176. public static void Frintx_S(ArmEmitterContext context)
  1177. {
  1178. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1179. EmitScalarUnaryOpF(context, (op1) =>
  1180. {
  1181. if (op.Size == 0)
  1182. {
  1183. return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
  1184. }
  1185. else /* if (op.Size == 1) */
  1186. {
  1187. return context.Call(new _F64_F64(SoftFallback.Round), op1);
  1188. }
  1189. });
  1190. }
  1191. public static void Frintx_V(ArmEmitterContext context)
  1192. {
  1193. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1194. int sizeF = op.Size & 1;
  1195. EmitVectorUnaryOpF(context, (op1) =>
  1196. {
  1197. if (sizeF == 0)
  1198. {
  1199. return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
  1200. }
  1201. else /* if (sizeF == 1) */
  1202. {
  1203. return context.Call(new _F64_F64(SoftFallback.Round), op1);
  1204. }
  1205. });
  1206. }
  1207. public static void Frintz_S(ArmEmitterContext context)
  1208. {
  1209. if (Optimizations.UseSse41)
  1210. {
  1211. EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
  1212. }
  1213. else
  1214. {
  1215. EmitScalarUnaryOpF(context, (op1) =>
  1216. {
  1217. return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
  1218. });
  1219. }
  1220. }
  1221. public static void Frintz_V(ArmEmitterContext context)
  1222. {
  1223. if (Optimizations.UseSse41)
  1224. {
  1225. EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
  1226. }
  1227. else
  1228. {
  1229. EmitVectorUnaryOpF(context, (op1) =>
  1230. {
  1231. return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
  1232. });
  1233. }
  1234. }
  1235. public static void Frsqrte_S(ArmEmitterContext context)
  1236. {
  1237. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1238. int sizeF = op.Size & 1;
  1239. if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
  1240. {
  1241. EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
  1242. }
  1243. else
  1244. {
  1245. EmitScalarUnaryOpF(context, (op1) =>
  1246. {
  1247. return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
  1248. });
  1249. }
  1250. }
  1251. public static void Frsqrte_V(ArmEmitterContext context)
  1252. {
  1253. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1254. int sizeF = op.Size & 1;
  1255. if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
  1256. {
  1257. EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
  1258. }
  1259. else
  1260. {
  1261. EmitVectorUnaryOpF(context, (op1) =>
  1262. {
  1263. return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
  1264. });
  1265. }
  1266. }
  1267. public static void Frsqrts_S(ArmEmitterContext context) // Fused.
  1268. {
  1269. if (Optimizations.FastFP && Optimizations.UseSse2)
  1270. {
  1271. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1272. int sizeF = op.Size & 1;
  1273. if (sizeF == 0)
  1274. {
  1275. Operand maskHalf = X86GetScalar(context, 0.5f);
  1276. Operand maskThree = X86GetScalar(context, 3f);
  1277. Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
  1278. res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
  1279. res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
  1280. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  1281. }
  1282. else /* if (sizeF == 1) */
  1283. {
  1284. Operand maskHalf = X86GetScalar(context, 0.5d);
  1285. Operand maskThree = X86GetScalar(context, 3d);
  1286. Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
  1287. res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
  1288. res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
  1289. context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
  1290. }
  1291. }
  1292. else
  1293. {
  1294. EmitScalarBinaryOpF(context, (op1, op2) =>
  1295. {
  1296. return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
  1297. });
  1298. }
  1299. }
  1300. public static void Frsqrts_V(ArmEmitterContext context) // Fused.
  1301. {
  1302. if (Optimizations.FastFP && Optimizations.UseSse2)
  1303. {
  1304. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1305. int sizeF = op.Size & 1;
  1306. if (sizeF == 0)
  1307. {
  1308. Operand maskHalf = X86GetAllElements(context, 0.5f);
  1309. Operand maskThree = X86GetAllElements(context, 3f);
  1310. Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
  1311. res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
  1312. res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
  1313. if (op.RegisterSize == RegisterSize.Simd64)
  1314. {
  1315. res = context.VectorZeroUpper64(res);
  1316. }
  1317. context.Copy(GetVec(op.Rd), res);
  1318. }
  1319. else /* if (sizeF == 1) */
  1320. {
  1321. Operand maskHalf = X86GetAllElements(context, 0.5d);
  1322. Operand maskThree = X86GetAllElements(context, 3d);
  1323. Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
  1324. res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
  1325. res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
  1326. context.Copy(GetVec(op.Rd), res);
  1327. }
  1328. }
  1329. else
  1330. {
  1331. EmitVectorBinaryOpF(context, (op1, op2) =>
  1332. {
  1333. return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
  1334. });
  1335. }
  1336. }
  1337. public static void Fsqrt_S(ArmEmitterContext context)
  1338. {
  1339. if (Optimizations.FastFP && Optimizations.UseSse2)
  1340. {
  1341. EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
  1342. }
  1343. else
  1344. {
  1345. EmitScalarUnaryOpF(context, (op1) =>
  1346. {
  1347. return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
  1348. });
  1349. }
  1350. }
  1351. public static void Fsqrt_V(ArmEmitterContext context)
  1352. {
  1353. if (Optimizations.FastFP && Optimizations.UseSse2)
  1354. {
  1355. EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
  1356. }
  1357. else
  1358. {
  1359. EmitVectorUnaryOpF(context, (op1) =>
  1360. {
  1361. return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
  1362. });
  1363. }
  1364. }
  1365. public static void Fsub_S(ArmEmitterContext context)
  1366. {
  1367. if (Optimizations.FastFP && Optimizations.UseSse2)
  1368. {
  1369. EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
  1370. }
  1371. else if (Optimizations.FastFP)
  1372. {
  1373. EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
  1374. }
  1375. else
  1376. {
  1377. EmitScalarBinaryOpF(context, (op1, op2) =>
  1378. {
  1379. return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
  1380. });
  1381. }
  1382. }
  1383. public static void Fsub_V(ArmEmitterContext context)
  1384. {
  1385. if (Optimizations.FastFP && Optimizations.UseSse2)
  1386. {
  1387. EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
  1388. }
  1389. else if (Optimizations.FastFP)
  1390. {
  1391. EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
  1392. }
  1393. else
  1394. {
  1395. EmitVectorBinaryOpF(context, (op1, op2) =>
  1396. {
  1397. return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
  1398. });
  1399. }
  1400. }
  1401. public static void Mla_V(ArmEmitterContext context)
  1402. {
  1403. if (Optimizations.UseSse41)
  1404. {
  1405. EmitSse41Mul_AddSub(context, AddSub.Add);
  1406. }
  1407. else
  1408. {
  1409. EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
  1410. {
  1411. return context.Add(op1, context.Multiply(op2, op3));
  1412. });
  1413. }
  1414. }
  1415. public static void Mla_Ve(ArmEmitterContext context)
  1416. {
  1417. EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
  1418. {
  1419. return context.Add(op1, context.Multiply(op2, op3));
  1420. });
  1421. }
  1422. public static void Mls_V(ArmEmitterContext context)
  1423. {
  1424. if (Optimizations.UseSse41)
  1425. {
  1426. EmitSse41Mul_AddSub(context, AddSub.Subtract);
  1427. }
  1428. else
  1429. {
  1430. EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
  1431. {
  1432. return context.Subtract(op1, context.Multiply(op2, op3));
  1433. });
  1434. }
  1435. }
  1436. public static void Mls_Ve(ArmEmitterContext context)
  1437. {
  1438. EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
  1439. {
  1440. return context.Subtract(op1, context.Multiply(op2, op3));
  1441. });
  1442. }
  1443. public static void Mul_V(ArmEmitterContext context)
  1444. {
  1445. if (Optimizations.UseSse41)
  1446. {
  1447. EmitSse41Mul_AddSub(context, AddSub.None);
  1448. }
  1449. else
  1450. {
  1451. EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
  1452. }
  1453. }
  1454. public static void Mul_Ve(ArmEmitterContext context)
  1455. {
  1456. EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
  1457. }
  1458. public static void Neg_S(ArmEmitterContext context)
  1459. {
  1460. EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
  1461. }
  1462. public static void Neg_V(ArmEmitterContext context)
  1463. {
  1464. if (Optimizations.UseSse2)
  1465. {
  1466. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1467. Intrinsic subInst = X86PsubInstruction[op.Size];
  1468. Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
  1469. if (op.RegisterSize == RegisterSize.Simd64)
  1470. {
  1471. res = context.VectorZeroUpper64(res);
  1472. }
  1473. context.Copy(GetVec(op.Rd), res);
  1474. }
  1475. else
  1476. {
  1477. EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
  1478. }
  1479. }
  1480. public static void Raddhn_V(ArmEmitterContext context)
  1481. {
  1482. EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
  1483. }
  1484. public static void Rsubhn_V(ArmEmitterContext context)
  1485. {
  1486. EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
  1487. }
  1488. public static void Saba_V(ArmEmitterContext context)
  1489. {
  1490. EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
  1491. {
  1492. return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
  1493. });
  1494. }
  1495. public static void Sabal_V(ArmEmitterContext context)
  1496. {
  1497. EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
  1498. {
  1499. return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
  1500. });
  1501. }
  1502. public static void Sabd_V(ArmEmitterContext context)
  1503. {
  1504. if (Optimizations.UseSse2)
  1505. {
  1506. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1507. Operand n = GetVec(op.Rn);
  1508. Operand m = GetVec(op.Rm);
  1509. EmitSse41Sabd(context, op, n, m, isLong: false);
  1510. }
  1511. else
  1512. {
  1513. EmitVectorBinaryOpSx(context, (op1, op2) =>
  1514. {
  1515. return EmitAbs(context, context.Subtract(op1, op2));
  1516. });
  1517. }
  1518. }
  1519. public static void Sabdl_V(ArmEmitterContext context)
  1520. {
  1521. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1522. if (Optimizations.UseSse41 && op.Size < 2)
  1523. {
  1524. Operand n = GetVec(op.Rn);
  1525. Operand m = GetVec(op.Rm);
  1526. if (op.RegisterSize == RegisterSize.Simd128)
  1527. {
  1528. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  1529. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1530. }
  1531. Intrinsic movInst = op.Size == 0
  1532. ? Intrinsic.X86Pmovsxbw
  1533. : Intrinsic.X86Pmovsxwd;
  1534. n = context.AddIntrinsic(movInst, n);
  1535. m = context.AddIntrinsic(movInst, m);
  1536. EmitSse41Sabd(context, op, n, m, isLong: true);
  1537. }
  1538. else
  1539. {
  1540. EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
  1541. {
  1542. return EmitAbs(context, context.Subtract(op1, op2));
  1543. });
  1544. }
  1545. }
  1546. public static void Sadalp_V(ArmEmitterContext context)
  1547. {
  1548. EmitAddLongPairwise(context, signed: true, accumulate: true);
  1549. }
  1550. public static void Saddl_V(ArmEmitterContext context)
  1551. {
  1552. if (Optimizations.UseSse41)
  1553. {
  1554. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1555. Operand n = GetVec(op.Rn);
  1556. Operand m = GetVec(op.Rm);
  1557. if (op.RegisterSize == RegisterSize.Simd128)
  1558. {
  1559. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  1560. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1561. }
  1562. Intrinsic movInst = X86PmovsxInstruction[op.Size];
  1563. n = context.AddIntrinsic(movInst, n);
  1564. m = context.AddIntrinsic(movInst, m);
  1565. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  1566. context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
  1567. }
  1568. else
  1569. {
  1570. EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
  1571. }
  1572. }
  1573. public static void Saddlp_V(ArmEmitterContext context)
  1574. {
  1575. EmitAddLongPairwise(context, signed: true, accumulate: false);
  1576. }
  1577. public static void Saddlv_V(ArmEmitterContext context)
  1578. {
  1579. EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
  1580. }
  1581. public static void Saddw_V(ArmEmitterContext context)
  1582. {
  1583. if (Optimizations.UseSse41)
  1584. {
  1585. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1586. Operand n = GetVec(op.Rn);
  1587. Operand m = GetVec(op.Rm);
  1588. if (op.RegisterSize == RegisterSize.Simd128)
  1589. {
  1590. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1591. }
  1592. Intrinsic movInst = X86PmovsxInstruction[op.Size];
  1593. m = context.AddIntrinsic(movInst, m);
  1594. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  1595. context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
  1596. }
  1597. else
  1598. {
  1599. EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
  1600. }
  1601. }
  1602. public static void Shadd_V(ArmEmitterContext context)
  1603. {
  1604. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1605. if (Optimizations.UseSse2 && op.Size > 0)
  1606. {
  1607. Operand n = GetVec(op.Rn);
  1608. Operand m = GetVec(op.Rm);
  1609. Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
  1610. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
  1611. Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
  1612. res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
  1613. Intrinsic addInst = X86PaddInstruction[op.Size];
  1614. res = context.AddIntrinsic(addInst, res, res2);
  1615. if (op.RegisterSize == RegisterSize.Simd64)
  1616. {
  1617. res = context.VectorZeroUpper64(res);
  1618. }
  1619. context.Copy(GetVec(op.Rd), res);
  1620. }
  1621. else
  1622. {
  1623. EmitVectorBinaryOpSx(context, (op1, op2) =>
  1624. {
  1625. return context.ShiftRightSI(context.Add(op1, op2), Const(1));
  1626. });
  1627. }
  1628. }
  1629. public static void Shsub_V(ArmEmitterContext context)
  1630. {
  1631. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1632. if (Optimizations.UseSse2 && op.Size < 2)
  1633. {
  1634. Operand n = GetVec(op.Rn);
  1635. Operand m = GetVec(op.Rm);
  1636. Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
  1637. Intrinsic addInst = X86PaddInstruction[op.Size];
  1638. Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
  1639. Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
  1640. Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
  1641. Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
  1642. Intrinsic subInst = X86PsubInstruction[op.Size];
  1643. res = context.AddIntrinsic(subInst, nPlusMask, res);
  1644. if (op.RegisterSize == RegisterSize.Simd64)
  1645. {
  1646. res = context.VectorZeroUpper64(res);
  1647. }
  1648. context.Copy(GetVec(op.Rd), res);
  1649. }
  1650. else
  1651. {
  1652. EmitVectorBinaryOpSx(context, (op1, op2) =>
  1653. {
  1654. return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
  1655. });
  1656. }
  1657. }
  1658. public static void Smax_V(ArmEmitterContext context)
  1659. {
  1660. if (Optimizations.UseSse41)
  1661. {
  1662. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1663. Operand n = GetVec(op.Rn);
  1664. Operand m = GetVec(op.Rm);
  1665. Intrinsic maxInst = X86PmaxsInstruction[op.Size];
  1666. Operand res = context.AddIntrinsic(maxInst, n, m);
  1667. if (op.RegisterSize == RegisterSize.Simd64)
  1668. {
  1669. res = context.VectorZeroUpper64(res);
  1670. }
  1671. context.Copy(GetVec(op.Rd), res);
  1672. }
  1673. else
  1674. {
  1675. Delegate dlg = new _S64_S64_S64(Math.Max);
  1676. EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
  1677. }
  1678. }
  1679. public static void Smaxp_V(ArmEmitterContext context)
  1680. {
  1681. Delegate dlg = new _S64_S64_S64(Math.Max);
  1682. EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
  1683. }
  1684. public static void Smaxv_V(ArmEmitterContext context)
  1685. {
  1686. Delegate dlg = new _S64_S64_S64(Math.Max);
  1687. EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
  1688. }
  1689. public static void Smin_V(ArmEmitterContext context)
  1690. {
  1691. if (Optimizations.UseSse41)
  1692. {
  1693. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1694. Operand n = GetVec(op.Rn);
  1695. Operand m = GetVec(op.Rm);
  1696. Intrinsic minInst = X86PminsInstruction[op.Size];
  1697. Operand res = context.AddIntrinsic(minInst, n, m);
  1698. if (op.RegisterSize == RegisterSize.Simd64)
  1699. {
  1700. res = context.VectorZeroUpper64(res);
  1701. }
  1702. context.Copy(GetVec(op.Rd), res);
  1703. }
  1704. else
  1705. {
  1706. Delegate dlg = new _S64_S64_S64(Math.Min);
  1707. EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
  1708. }
  1709. }
  1710. public static void Sminp_V(ArmEmitterContext context)
  1711. {
  1712. Delegate dlg = new _S64_S64_S64(Math.Min);
  1713. EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
  1714. }
  1715. public static void Sminv_V(ArmEmitterContext context)
  1716. {
  1717. Delegate dlg = new _S64_S64_S64(Math.Min);
  1718. EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
  1719. }
  1720. public static void Smlal_V(ArmEmitterContext context)
  1721. {
  1722. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1723. if (Optimizations.UseSse41 && op.Size < 2)
  1724. {
  1725. Operand d = GetVec(op.Rd);
  1726. Operand n = GetVec(op.Rn);
  1727. Operand m = GetVec(op.Rm);
  1728. if (op.RegisterSize == RegisterSize.Simd128)
  1729. {
  1730. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  1731. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1732. }
  1733. Intrinsic movInst = X86PmovsxInstruction[op.Size];
  1734. n = context.AddIntrinsic(movInst, n);
  1735. m = context.AddIntrinsic(movInst, m);
  1736. Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
  1737. Operand res = context.AddIntrinsic(mullInst, n, m);
  1738. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  1739. context.Copy(d, context.AddIntrinsic(addInst, d, res));
  1740. }
  1741. else
  1742. {
  1743. EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
  1744. {
  1745. return context.Add(op1, context.Multiply(op2, op3));
  1746. });
  1747. }
  1748. }
  1749. public static void Smlal_Ve(ArmEmitterContext context)
  1750. {
  1751. EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
  1752. {
  1753. return context.Add(op1, context.Multiply(op2, op3));
  1754. });
  1755. }
  1756. public static void Smlsl_V(ArmEmitterContext context)
  1757. {
  1758. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1759. if (Optimizations.UseSse41 && op.Size < 2)
  1760. {
  1761. Operand d = GetVec(op.Rd);
  1762. Operand n = GetVec(op.Rn);
  1763. Operand m = GetVec(op.Rm);
  1764. if (op.RegisterSize == RegisterSize.Simd128)
  1765. {
  1766. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  1767. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1768. }
  1769. Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovsxbw : Intrinsic.X86Pmovsxwd;
  1770. n = context.AddIntrinsic(movInst, n);
  1771. m = context.AddIntrinsic(movInst, m);
  1772. Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
  1773. Operand res = context.AddIntrinsic(mullInst, n, m);
  1774. Intrinsic subInst = X86PsubInstruction[op.Size + 1];
  1775. context.Copy(d, context.AddIntrinsic(subInst, d, res));
  1776. }
  1777. else
  1778. {
  1779. EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
  1780. {
  1781. return context.Subtract(op1, context.Multiply(op2, op3));
  1782. });
  1783. }
  1784. }
  1785. public static void Smlsl_Ve(ArmEmitterContext context)
  1786. {
  1787. EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
  1788. {
  1789. return context.Subtract(op1, context.Multiply(op2, op3));
  1790. });
  1791. }
  1792. public static void Smull_V(ArmEmitterContext context)
  1793. {
  1794. EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
  1795. }
  1796. public static void Smull_Ve(ArmEmitterContext context)
  1797. {
  1798. EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
  1799. }
  1800. public static void Sqabs_S(ArmEmitterContext context)
  1801. {
  1802. EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
  1803. }
  1804. public static void Sqabs_V(ArmEmitterContext context)
  1805. {
  1806. EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
  1807. }
  1808. public static void Sqadd_S(ArmEmitterContext context)
  1809. {
  1810. EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add);
  1811. }
  1812. public static void Sqadd_V(ArmEmitterContext context)
  1813. {
  1814. EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add);
  1815. }
  1816. public static void Sqdmulh_S(ArmEmitterContext context)
  1817. {
  1818. EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx);
  1819. }
  1820. public static void Sqdmulh_V(ArmEmitterContext context)
  1821. {
  1822. EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx);
  1823. }
  1824. public static void Sqneg_S(ArmEmitterContext context)
  1825. {
  1826. EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
  1827. }
  1828. public static void Sqneg_V(ArmEmitterContext context)
  1829. {
  1830. EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
  1831. }
  1832. public static void Sqrdmulh_S(ArmEmitterContext context)
  1833. {
  1834. EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx);
  1835. }
  1836. public static void Sqrdmulh_V(ArmEmitterContext context)
  1837. {
  1838. EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx);
  1839. }
  1840. public static void Sqsub_S(ArmEmitterContext context)
  1841. {
  1842. EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
  1843. }
  1844. public static void Sqsub_V(ArmEmitterContext context)
  1845. {
  1846. EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
  1847. }
  1848. public static void Sqxtn_S(ArmEmitterContext context)
  1849. {
  1850. EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
  1851. }
  1852. public static void Sqxtn_V(ArmEmitterContext context)
  1853. {
  1854. EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
  1855. }
  1856. public static void Sqxtun_S(ArmEmitterContext context)
  1857. {
  1858. EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
  1859. }
  1860. public static void Sqxtun_V(ArmEmitterContext context)
  1861. {
  1862. EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
  1863. }
  1864. public static void Srhadd_V(ArmEmitterContext context)
  1865. {
  1866. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1867. if (Optimizations.UseSse2 && op.Size < 2)
  1868. {
  1869. Operand n = GetVec(op.Rn);
  1870. Operand m = GetVec(op.Rm);
  1871. Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
  1872. Intrinsic subInst = X86PsubInstruction[op.Size];
  1873. Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
  1874. Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
  1875. Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
  1876. Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
  1877. Intrinsic addInst = X86PaddInstruction[op.Size];
  1878. res = context.AddIntrinsic(addInst, mask, res);
  1879. if (op.RegisterSize == RegisterSize.Simd64)
  1880. {
  1881. res = context.VectorZeroUpper64(res);
  1882. }
  1883. context.Copy(GetVec(op.Rd), res);
  1884. }
  1885. else
  1886. {
  1887. EmitVectorBinaryOpSx(context, (op1, op2) =>
  1888. {
  1889. Operand res = context.Add(op1, op2);
  1890. res = context.Add(res, Const(1L));
  1891. return context.ShiftRightSI(res, Const(1));
  1892. });
  1893. }
  1894. }
  1895. public static void Ssubl_V(ArmEmitterContext context)
  1896. {
  1897. if (Optimizations.UseSse41)
  1898. {
  1899. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1900. Operand n = GetVec(op.Rn);
  1901. Operand m = GetVec(op.Rm);
  1902. if (op.RegisterSize == RegisterSize.Simd128)
  1903. {
  1904. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  1905. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1906. }
  1907. Intrinsic movInst = X86PmovsxInstruction[op.Size];
  1908. n = context.AddIntrinsic(movInst, n);
  1909. m = context.AddIntrinsic(movInst, m);
  1910. Intrinsic subInst = X86PsubInstruction[op.Size + 1];
  1911. context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
  1912. }
  1913. else
  1914. {
  1915. EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
  1916. }
  1917. }
  1918. public static void Ssubw_V(ArmEmitterContext context)
  1919. {
  1920. if (Optimizations.UseSse41)
  1921. {
  1922. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1923. Operand n = GetVec(op.Rn);
  1924. Operand m = GetVec(op.Rm);
  1925. if (op.RegisterSize == RegisterSize.Simd128)
  1926. {
  1927. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  1928. }
  1929. Intrinsic movInst = X86PmovsxInstruction[op.Size];
  1930. m = context.AddIntrinsic(movInst, m);
  1931. Intrinsic subInst = X86PsubInstruction[op.Size + 1];
  1932. context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
  1933. }
  1934. else
  1935. {
  1936. EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
  1937. }
  1938. }
  1939. public static void Sub_S(ArmEmitterContext context)
  1940. {
  1941. EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
  1942. }
  1943. public static void Sub_V(ArmEmitterContext context)
  1944. {
  1945. if (Optimizations.UseSse2)
  1946. {
  1947. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1948. Operand n = GetVec(op.Rn);
  1949. Operand m = GetVec(op.Rm);
  1950. Intrinsic subInst = X86PsubInstruction[op.Size];
  1951. Operand res = context.AddIntrinsic(subInst, n, m);
  1952. if (op.RegisterSize == RegisterSize.Simd64)
  1953. {
  1954. res = context.VectorZeroUpper64(res);
  1955. }
  1956. context.Copy(GetVec(op.Rd), res);
  1957. }
  1958. else
  1959. {
  1960. EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
  1961. }
  1962. }
  1963. public static void Subhn_V(ArmEmitterContext context)
  1964. {
  1965. EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
  1966. }
  1967. public static void Suqadd_S(ArmEmitterContext context)
  1968. {
  1969. EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
  1970. }
  1971. public static void Suqadd_V(ArmEmitterContext context)
  1972. {
  1973. EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
  1974. }
  1975. public static void Uaba_V(ArmEmitterContext context)
  1976. {
  1977. EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
  1978. {
  1979. return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
  1980. });
  1981. }
  1982. public static void Uabal_V(ArmEmitterContext context)
  1983. {
  1984. EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
  1985. {
  1986. return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
  1987. });
  1988. }
  1989. public static void Uabd_V(ArmEmitterContext context)
  1990. {
  1991. if (Optimizations.UseSse41)
  1992. {
  1993. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1994. Operand n = GetVec(op.Rn);
  1995. Operand m = GetVec(op.Rm);
  1996. EmitSse41Uabd(context, op, n, m, isLong: false);
  1997. }
  1998. else
  1999. {
  2000. EmitVectorBinaryOpZx(context, (op1, op2) =>
  2001. {
  2002. return EmitAbs(context, context.Subtract(op1, op2));
  2003. });
  2004. }
  2005. }
  2006. public static void Uabdl_V(ArmEmitterContext context)
  2007. {
  2008. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2009. if (Optimizations.UseSse41 && op.Size < 2)
  2010. {
  2011. Operand n = GetVec(op.Rn);
  2012. Operand m = GetVec(op.Rm);
  2013. if (op.RegisterSize == RegisterSize.Simd128)
  2014. {
  2015. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  2016. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2017. }
  2018. Intrinsic movInst = op.Size == 0
  2019. ? Intrinsic.X86Pmovzxbw
  2020. : Intrinsic.X86Pmovzxwd;
  2021. n = context.AddIntrinsic(movInst, n);
  2022. m = context.AddIntrinsic(movInst, m);
  2023. EmitSse41Uabd(context, op, n, m, isLong: true);
  2024. }
  2025. else
  2026. {
  2027. EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
  2028. {
  2029. return EmitAbs(context, context.Subtract(op1, op2));
  2030. });
  2031. }
  2032. }
  2033. public static void Uadalp_V(ArmEmitterContext context)
  2034. {
  2035. EmitAddLongPairwise(context, signed: false, accumulate: true);
  2036. }
  2037. public static void Uaddl_V(ArmEmitterContext context)
  2038. {
  2039. if (Optimizations.UseSse41)
  2040. {
  2041. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2042. Operand n = GetVec(op.Rn);
  2043. Operand m = GetVec(op.Rm);
  2044. if (op.RegisterSize == RegisterSize.Simd128)
  2045. {
  2046. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  2047. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2048. }
  2049. Intrinsic movInst = X86PmovzxInstruction[op.Size];
  2050. n = context.AddIntrinsic(movInst, n);
  2051. m = context.AddIntrinsic(movInst, m);
  2052. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  2053. context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
  2054. }
  2055. else
  2056. {
  2057. EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
  2058. }
  2059. }
  2060. public static void Uaddlp_V(ArmEmitterContext context)
  2061. {
  2062. EmitAddLongPairwise(context, signed: false, accumulate: false);
  2063. }
  2064. public static void Uaddlv_V(ArmEmitterContext context)
  2065. {
  2066. EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
  2067. }
  2068. public static void Uaddw_V(ArmEmitterContext context)
  2069. {
  2070. if (Optimizations.UseSse41)
  2071. {
  2072. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2073. Operand n = GetVec(op.Rn);
  2074. Operand m = GetVec(op.Rm);
  2075. if (op.RegisterSize == RegisterSize.Simd128)
  2076. {
  2077. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2078. }
  2079. Intrinsic movInst = X86PmovzxInstruction[op.Size];
  2080. m = context.AddIntrinsic(movInst, m);
  2081. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  2082. context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
  2083. }
  2084. else
  2085. {
  2086. EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
  2087. }
  2088. }
  2089. public static void Uhadd_V(ArmEmitterContext context)
  2090. {
  2091. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2092. if (Optimizations.UseSse2 && op.Size > 0)
  2093. {
  2094. Operand n = GetVec(op.Rn);
  2095. Operand m = GetVec(op.Rm);
  2096. Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
  2097. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
  2098. Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
  2099. res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
  2100. Intrinsic addInst = X86PaddInstruction[op.Size];
  2101. res = context.AddIntrinsic(addInst, res, res2);
  2102. if (op.RegisterSize == RegisterSize.Simd64)
  2103. {
  2104. res = context.VectorZeroUpper64(res);
  2105. }
  2106. context.Copy(GetVec(op.Rd), res);
  2107. }
  2108. else
  2109. {
  2110. EmitVectorBinaryOpZx(context, (op1, op2) =>
  2111. {
  2112. return context.ShiftRightUI(context.Add(op1, op2), Const(1));
  2113. });
  2114. }
  2115. }
  2116. public static void Uhsub_V(ArmEmitterContext context)
  2117. {
  2118. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2119. if (Optimizations.UseSse2 && op.Size < 2)
  2120. {
  2121. Operand n = GetVec(op.Rn);
  2122. Operand m = GetVec(op.Rm);
  2123. Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
  2124. Operand res = context.AddIntrinsic(avgInst, n, m);
  2125. Intrinsic subInst = X86PsubInstruction[op.Size];
  2126. res = context.AddIntrinsic(subInst, n, res);
  2127. if (op.RegisterSize == RegisterSize.Simd64)
  2128. {
  2129. res = context.VectorZeroUpper64(res);
  2130. }
  2131. context.Copy(GetVec(op.Rd), res);
  2132. }
  2133. else
  2134. {
  2135. EmitVectorBinaryOpZx(context, (op1, op2) =>
  2136. {
  2137. return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
  2138. });
  2139. }
  2140. }
  2141. public static void Umax_V(ArmEmitterContext context)
  2142. {
  2143. if (Optimizations.UseSse41)
  2144. {
  2145. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2146. Operand n = GetVec(op.Rn);
  2147. Operand m = GetVec(op.Rm);
  2148. Intrinsic maxInst = X86PmaxuInstruction[op.Size];
  2149. Operand res = context.AddIntrinsic(maxInst, n, m);
  2150. if (op.RegisterSize == RegisterSize.Simd64)
  2151. {
  2152. res = context.VectorZeroUpper64(res);
  2153. }
  2154. context.Copy(GetVec(op.Rd), res);
  2155. }
  2156. else
  2157. {
  2158. Delegate dlg = new _U64_U64_U64(Math.Max);
  2159. EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
  2160. }
  2161. }
  2162. public static void Umaxp_V(ArmEmitterContext context)
  2163. {
  2164. Delegate dlg = new _U64_U64_U64(Math.Max);
  2165. EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
  2166. }
  2167. public static void Umaxv_V(ArmEmitterContext context)
  2168. {
  2169. Delegate dlg = new _U64_U64_U64(Math.Max);
  2170. EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
  2171. }
  2172. public static void Umin_V(ArmEmitterContext context)
  2173. {
  2174. if (Optimizations.UseSse41)
  2175. {
  2176. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2177. Operand n = GetVec(op.Rn);
  2178. Operand m = GetVec(op.Rm);
  2179. Intrinsic minInst = X86PminuInstruction[op.Size];
  2180. Operand res = context.AddIntrinsic(minInst, n, m);
  2181. if (op.RegisterSize == RegisterSize.Simd64)
  2182. {
  2183. res = context.VectorZeroUpper64(res);
  2184. }
  2185. context.Copy(GetVec(op.Rd), res);
  2186. }
  2187. else
  2188. {
  2189. Delegate dlg = new _U64_U64_U64(Math.Min);
  2190. EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
  2191. }
  2192. }
  2193. public static void Uminp_V(ArmEmitterContext context)
  2194. {
  2195. Delegate dlg = new _U64_U64_U64(Math.Min);
  2196. EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
  2197. }
  2198. public static void Uminv_V(ArmEmitterContext context)
  2199. {
  2200. Delegate dlg = new _U64_U64_U64(Math.Min);
  2201. EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
  2202. }
  2203. public static void Umlal_V(ArmEmitterContext context)
  2204. {
  2205. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2206. if (Optimizations.UseSse41 && op.Size < 2)
  2207. {
  2208. Operand d = GetVec(op.Rd);
  2209. Operand n = GetVec(op.Rn);
  2210. Operand m = GetVec(op.Rm);
  2211. if (op.RegisterSize == RegisterSize.Simd128)
  2212. {
  2213. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  2214. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2215. }
  2216. Intrinsic movInst = X86PmovzxInstruction[op.Size];
  2217. n = context.AddIntrinsic(movInst, n);
  2218. m = context.AddIntrinsic(movInst, m);
  2219. Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
  2220. Operand res = context.AddIntrinsic(mullInst, n, m);
  2221. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  2222. context.Copy(d, context.AddIntrinsic(addInst, d, res));
  2223. }
  2224. else
  2225. {
  2226. EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
  2227. {
  2228. return context.Add(op1, context.Multiply(op2, op3));
  2229. });
  2230. }
  2231. }
  2232. public static void Umlal_Ve(ArmEmitterContext context)
  2233. {
  2234. EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
  2235. {
  2236. return context.Add(op1, context.Multiply(op2, op3));
  2237. });
  2238. }
  2239. public static void Umlsl_V(ArmEmitterContext context)
  2240. {
  2241. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2242. if (Optimizations.UseSse41 && op.Size < 2)
  2243. {
  2244. Operand d = GetVec(op.Rd);
  2245. Operand n = GetVec(op.Rn);
  2246. Operand m = GetVec(op.Rm);
  2247. if (op.RegisterSize == RegisterSize.Simd128)
  2248. {
  2249. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  2250. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2251. }
  2252. Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovzxbw : Intrinsic.X86Pmovzxwd;
  2253. n = context.AddIntrinsic(movInst, n);
  2254. m = context.AddIntrinsic(movInst, m);
  2255. Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
  2256. Operand res = context.AddIntrinsic(mullInst, n, m);
  2257. Intrinsic subInst = X86PsubInstruction[op.Size + 1];
  2258. context.Copy(d, context.AddIntrinsic(subInst, d, res));
  2259. }
  2260. else
  2261. {
  2262. EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
  2263. {
  2264. return context.Subtract(op1, context.Multiply(op2, op3));
  2265. });
  2266. }
  2267. }
  2268. public static void Umlsl_Ve(ArmEmitterContext context)
  2269. {
  2270. EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
  2271. {
  2272. return context.Subtract(op1, context.Multiply(op2, op3));
  2273. });
  2274. }
  2275. public static void Umull_V(ArmEmitterContext context)
  2276. {
  2277. EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
  2278. }
  2279. public static void Umull_Ve(ArmEmitterContext context)
  2280. {
  2281. EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
  2282. }
  2283. public static void Uqadd_S(ArmEmitterContext context)
  2284. {
  2285. EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
  2286. }
  2287. public static void Uqadd_V(ArmEmitterContext context)
  2288. {
  2289. EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
  2290. }
  2291. public static void Uqsub_S(ArmEmitterContext context)
  2292. {
  2293. EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
  2294. }
  2295. public static void Uqsub_V(ArmEmitterContext context)
  2296. {
  2297. EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
  2298. }
  2299. public static void Uqxtn_S(ArmEmitterContext context)
  2300. {
  2301. EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
  2302. }
  2303. public static void Uqxtn_V(ArmEmitterContext context)
  2304. {
  2305. EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
  2306. }
  2307. public static void Urhadd_V(ArmEmitterContext context)
  2308. {
  2309. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2310. if (Optimizations.UseSse2 && op.Size < 2)
  2311. {
  2312. Operand n = GetVec(op.Rn);
  2313. Operand m = GetVec(op.Rm);
  2314. Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
  2315. Operand res = context.AddIntrinsic(avgInst, n, m);
  2316. if (op.RegisterSize == RegisterSize.Simd64)
  2317. {
  2318. res = context.VectorZeroUpper64(res);
  2319. }
  2320. context.Copy(GetVec(op.Rd), res);
  2321. }
  2322. else
  2323. {
  2324. EmitVectorBinaryOpZx(context, (op1, op2) =>
  2325. {
  2326. Operand res = context.Add(op1, op2);
  2327. res = context.Add(res, Const(1L));
  2328. return context.ShiftRightUI(res, Const(1));
  2329. });
  2330. }
  2331. }
  2332. public static void Usqadd_S(ArmEmitterContext context)
  2333. {
  2334. EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
  2335. }
  2336. public static void Usqadd_V(ArmEmitterContext context)
  2337. {
  2338. EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
  2339. }
  2340. public static void Usubl_V(ArmEmitterContext context)
  2341. {
  2342. if (Optimizations.UseSse41)
  2343. {
  2344. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2345. Operand n = GetVec(op.Rn);
  2346. Operand m = GetVec(op.Rm);
  2347. if (op.RegisterSize == RegisterSize.Simd128)
  2348. {
  2349. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  2350. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2351. }
  2352. Intrinsic movInst = X86PmovzxInstruction[op.Size];
  2353. n = context.AddIntrinsic(movInst, n);
  2354. m = context.AddIntrinsic(movInst, m);
  2355. Intrinsic subInst = X86PsubInstruction[op.Size + 1];
  2356. context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
  2357. }
  2358. else
  2359. {
  2360. EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
  2361. }
  2362. }
  2363. public static void Usubw_V(ArmEmitterContext context)
  2364. {
  2365. if (Optimizations.UseSse41)
  2366. {
  2367. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2368. Operand n = GetVec(op.Rn);
  2369. Operand m = GetVec(op.Rm);
  2370. if (op.RegisterSize == RegisterSize.Simd128)
  2371. {
  2372. m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
  2373. }
  2374. Intrinsic movInst = X86PmovzxInstruction[op.Size];
  2375. m = context.AddIntrinsic(movInst, m);
  2376. Intrinsic subInst = X86PsubInstruction[op.Size + 1];
  2377. context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
  2378. }
  2379. else
  2380. {
  2381. EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
  2382. }
  2383. }
  2384. private static Operand EmitAbs(ArmEmitterContext context, Operand value)
  2385. {
  2386. Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
  2387. return context.ConditionalSelect(isPositive, value, context.Negate(value));
  2388. }
  2389. private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
  2390. {
  2391. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  2392. Operand res = context.VectorZero();
  2393. int pairs = op.GetPairsCount() >> op.Size;
  2394. for (int index = 0; index < pairs; index++)
  2395. {
  2396. int pairIndex = index << 1;
  2397. Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
  2398. Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
  2399. Operand e = context.Add(ne0, ne1);
  2400. if (accumulate)
  2401. {
  2402. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
  2403. e = context.Add(e, de);
  2404. }
  2405. res = EmitVectorInsert(context, res, e, index, op.Size + 1);
  2406. }
  2407. context.Copy(GetVec(op.Rd), res);
  2408. }
  2409. private static Operand EmitDoublingMultiplyHighHalf(
  2410. ArmEmitterContext context,
  2411. Operand n,
  2412. Operand m,
  2413. bool round)
  2414. {
  2415. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2416. int eSize = 8 << op.Size;
  2417. Operand res = context.Multiply(n, m);
  2418. if (!round)
  2419. {
  2420. res = context.ShiftRightSI(res, Const(eSize - 1));
  2421. }
  2422. else
  2423. {
  2424. long roundConst = 1L << (eSize - 1);
  2425. res = context.ShiftLeft(res, Const(1));
  2426. res = context.Add(res, Const(roundConst));
  2427. res = context.ShiftRightSI(res, Const(eSize));
  2428. Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
  2429. res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
  2430. }
  2431. return res;
  2432. }
  2433. private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
  2434. {
  2435. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2436. int elems = 8 >> op.Size;
  2437. int eSize = 8 << op.Size;
  2438. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  2439. Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
  2440. long roundConst = 1L << (eSize - 1);
  2441. for (int index = 0; index < elems; index++)
  2442. {
  2443. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  2444. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
  2445. Operand de = emit(ne, me);
  2446. if (round)
  2447. {
  2448. de = context.Add(de, Const(roundConst));
  2449. }
  2450. de = context.ShiftRightUI(de, Const(eSize));
  2451. res = EmitVectorInsert(context, res, de, part + index, op.Size);
  2452. }
  2453. context.Copy(GetVec(op.Rd), res);
  2454. }
  2455. public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
  2456. {
  2457. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  2458. Operand n = GetVec(op.Rn);
  2459. Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
  2460. Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
  2461. if ((op.Size & 1) != 0)
  2462. {
  2463. res = context.VectorZeroUpper64(res);
  2464. }
  2465. else
  2466. {
  2467. res = context.VectorZeroUpper96(res);
  2468. }
  2469. context.Copy(GetVec(op.Rd), res);
  2470. }
  2471. public static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
  2472. {
  2473. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  2474. Operand n = GetVec(op.Rn);
  2475. Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
  2476. Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
  2477. if (op.RegisterSize == RegisterSize.Simd64)
  2478. {
  2479. res = context.VectorZeroUpper64(res);
  2480. }
  2481. context.Copy(GetVec(op.Rd), res);
  2482. }
  2483. private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
  2484. {
  2485. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  2486. if ((op.Size & 1) == 0)
  2487. {
  2488. const int QBit = 22;
  2489. Operand qMask = X86GetAllElements(context, 1 << QBit);
  2490. Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ));
  2491. Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
  2492. mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
  2493. return context.AddIntrinsic(Intrinsic.X86Andps, mask1, mask2);
  2494. }
  2495. else /* if ((op.Size & 1) == 1) */
  2496. {
  2497. const int QBit = 51;
  2498. Operand qMask = X86GetAllElements(context, 1L << QBit);
  2499. Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ));
  2500. Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
  2501. mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
  2502. return context.AddIntrinsic(Intrinsic.X86Andpd, mask1, mask2);
  2503. }
  2504. }
  2505. private static void EmitSse41MaxMinNumOpF(ArmEmitterContext context, bool isMaxNum, bool scalar)
  2506. {
  2507. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2508. Operand d = GetVec(op.Rd);
  2509. Operand n = GetVec(op.Rn);
  2510. Operand m = GetVec(op.Rm);
  2511. Operand nQNaNMask = EmitSse2VectorIsQNaNOpF(context, n);
  2512. Operand mQNaNMask = EmitSse2VectorIsQNaNOpF(context, m);
  2513. Operand nNum = context.Copy(n);
  2514. Operand mNum = context.Copy(m);
  2515. int sizeF = op.Size & 1;
  2516. if (sizeF == 0)
  2517. {
  2518. Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
  2519. Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
  2520. Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
  2521. nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
  2522. mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
  2523. Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
  2524. if (scalar)
  2525. {
  2526. res = context.VectorZeroUpper96(res);
  2527. }
  2528. else if (op.RegisterSize == RegisterSize.Simd64)
  2529. {
  2530. res = context.VectorZeroUpper64(res);
  2531. }
  2532. context.Copy(d, res);
  2533. }
  2534. else /* if (sizeF == 1) */
  2535. {
  2536. Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
  2537. Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
  2538. Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
  2539. nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
  2540. mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
  2541. Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
  2542. if (scalar)
  2543. {
  2544. res = context.VectorZeroUpper64(res);
  2545. }
  2546. context.Copy(d, res);
  2547. }
  2548. }
  2549. private enum AddSub
  2550. {
  2551. None,
  2552. Add,
  2553. Subtract
  2554. }
  2555. private static void EmitSse41Mul_AddSub(ArmEmitterContext context, AddSub addSub)
  2556. {
  2557. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  2558. Operand n = GetVec(op.Rn);
  2559. Operand m = GetVec(op.Rm);
  2560. Operand res = null;
  2561. if (op.Size == 0)
  2562. {
  2563. Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
  2564. Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
  2565. res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
  2566. res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
  2567. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
  2568. Operand mask = X86GetAllElements(context, 0x00FF00FF);
  2569. res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
  2570. }
  2571. else if (op.Size == 1)
  2572. {
  2573. res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
  2574. }
  2575. else
  2576. {
  2577. res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
  2578. }
  2579. Operand d = GetVec(op.Rd);
  2580. if (addSub == AddSub.Add)
  2581. {
  2582. switch (op.Size)
  2583. {
  2584. case 0: res = context.AddIntrinsic(Intrinsic.X86Paddb, d, res); break;
  2585. case 1: res = context.AddIntrinsic(Intrinsic.X86Paddw, d, res); break;
  2586. case 2: res = context.AddIntrinsic(Intrinsic.X86Paddd, d, res); break;
  2587. case 3: res = context.AddIntrinsic(Intrinsic.X86Paddq, d, res); break;
  2588. }
  2589. }
  2590. else if (addSub == AddSub.Subtract)
  2591. {
  2592. switch (op.Size)
  2593. {
  2594. case 0: res = context.AddIntrinsic(Intrinsic.X86Psubb, d, res); break;
  2595. case 1: res = context.AddIntrinsic(Intrinsic.X86Psubw, d, res); break;
  2596. case 2: res = context.AddIntrinsic(Intrinsic.X86Psubd, d, res); break;
  2597. case 3: res = context.AddIntrinsic(Intrinsic.X86Psubq, d, res); break;
  2598. }
  2599. }
  2600. if (op.RegisterSize == RegisterSize.Simd64)
  2601. {
  2602. res = context.VectorZeroUpper64(res);
  2603. }
  2604. context.Copy(d, res);
  2605. }
  2606. private static void EmitSse41Sabd(
  2607. ArmEmitterContext context,
  2608. OpCodeSimdReg op,
  2609. Operand n,
  2610. Operand m,
  2611. bool isLong)
  2612. {
  2613. int size = isLong ? op.Size + 1 : op.Size;
  2614. Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
  2615. Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
  2616. Intrinsic subInst = X86PsubInstruction[size];
  2617. Operand res = context.AddIntrinsic(subInst, n, m);
  2618. res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
  2619. Operand res2 = context.AddIntrinsic(subInst, m, n);
  2620. res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
  2621. res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
  2622. if (!isLong && op.RegisterSize == RegisterSize.Simd64)
  2623. {
  2624. res = context.VectorZeroUpper64(res);
  2625. }
  2626. context.Copy(GetVec(op.Rd), res);
  2627. }
  2628. private static void EmitSse41Uabd(
  2629. ArmEmitterContext context,
  2630. OpCodeSimdReg op,
  2631. Operand n,
  2632. Operand m,
  2633. bool isLong)
  2634. {
  2635. int size = isLong ? op.Size + 1 : op.Size;
  2636. Intrinsic maxInst = X86PmaxuInstruction[size];
  2637. Operand max = context.AddIntrinsic(maxInst, m, n);
  2638. Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
  2639. Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
  2640. Operand onesMask = X86GetAllElements(context, -1L);
  2641. cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
  2642. Intrinsic subInst = X86PsubInstruction[size];
  2643. Operand res = context.AddIntrinsic(subInst, n, m);
  2644. Operand res2 = context.AddIntrinsic(subInst, m, n);
  2645. res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
  2646. res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
  2647. res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
  2648. if (!isLong && op.RegisterSize == RegisterSize.Simd64)
  2649. {
  2650. res = context.VectorZeroUpper64(res);
  2651. }
  2652. context.Copy(GetVec(op.Rd), res);
  2653. }
  2654. }
  2655. }