InstEmitMemory.cs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Al2p(EmitterContext context)
  16. {
  17. OpCodeAl2p op = (OpCodeAl2p)context.CurrOp;
  18. if (op.Rd.IsRZ)
  19. {
  20. return;
  21. }
  22. context.Copy(Register(op.Rd), context.IAdd(Register(op.Ra), Const(op.Immediate)));
  23. }
  24. public static void Ald(EmitterContext context)
  25. {
  26. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  27. Operand primVertex = context.Copy(GetSrcC(context));
  28. for (int index = 0; index < op.Count; index++)
  29. {
  30. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  31. if (rd.IsRZ)
  32. {
  33. break;
  34. }
  35. if (op.Phys)
  36. {
  37. Operand userAttrOffset = context.ISubtract(GetSrcA(context), Const(AttributeConsts.UserAttributeBase));
  38. Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
  39. context.Copy(Register(rd), context.LoadAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, primVertex));
  40. context.Config.SetUsedFeature(FeatureFlags.IaIndexing);
  41. }
  42. else if (op.Rc.IsRZ)
  43. {
  44. Operand src = Attribute(op.AttributeOffset + index * 4);
  45. context.FlagAttributeRead(src.Value);
  46. context.Copy(Register(rd), src);
  47. }
  48. else
  49. {
  50. Operand src = Const(op.AttributeOffset + index * 4);
  51. context.FlagAttributeRead(src.Value);
  52. context.Copy(Register(rd), context.LoadAttribute(src, Const(0), primVertex));
  53. }
  54. }
  55. }
  56. public static void Ast(EmitterContext context)
  57. {
  58. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  59. for (int index = 0; index < op.Count; index++)
  60. {
  61. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  62. {
  63. break;
  64. }
  65. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  66. if (op.Phys)
  67. {
  68. Operand userAttrOffset = context.ISubtract(GetSrcA(context), Const(AttributeConsts.UserAttributeBase));
  69. Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
  70. context.StoreAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, Register(rd));
  71. context.Config.SetUsedFeature(FeatureFlags.OaIndexing);
  72. }
  73. else
  74. {
  75. Operand dest = Attribute(op.AttributeOffset + index * 4);
  76. context.FlagAttributeWritten(dest.Value);
  77. context.Copy(dest, Register(rd));
  78. }
  79. }
  80. }
  81. public static void Atom(EmitterContext context)
  82. {
  83. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  84. ReductionType type = (ReductionType)op.RawOpCode.Extract(49, 2);
  85. int sOffset = (op.RawOpCode.Extract(28, 20) << 12) >> 12;
  86. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, sOffset);
  87. Operand value = GetSrcB(context);
  88. Operand res = EmitAtomicOp(
  89. context,
  90. Instruction.MrGlobal,
  91. op.AtomicOp,
  92. type,
  93. addrLow,
  94. addrHigh,
  95. value);
  96. context.Copy(GetDest(context), res);
  97. }
  98. public static void Atoms(EmitterContext context)
  99. {
  100. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  101. ReductionType type = op.RawOpCode.Extract(28, 2) switch
  102. {
  103. 0 => ReductionType.U32,
  104. 1 => ReductionType.S32,
  105. 2 => ReductionType.U64,
  106. _ => ReductionType.S64
  107. };
  108. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  109. int sOffset = (op.RawOpCode.Extract(30, 22) << 10) >> 10;
  110. offset = context.IAdd(offset, Const(sOffset));
  111. Operand value = GetSrcB(context);
  112. Operand res = EmitAtomicOp(
  113. context,
  114. Instruction.MrShared,
  115. op.AtomicOp,
  116. type,
  117. offset,
  118. Const(0),
  119. value);
  120. context.Copy(GetDest(context), res);
  121. }
  122. public static void Bar(EmitterContext context)
  123. {
  124. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  125. // TODO: Support other modes.
  126. if (op.Mode == BarrierMode.Sync)
  127. {
  128. context.Barrier();
  129. }
  130. else
  131. {
  132. context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
  133. }
  134. }
  135. public static void Ipa(EmitterContext context)
  136. {
  137. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  138. context.FlagAttributeRead(op.AttributeOffset);
  139. Operand res;
  140. if (op.Idx)
  141. {
  142. Operand userAttrOffset = context.ISubtract(GetSrcA(context), Const(AttributeConsts.UserAttributeBase));
  143. Operand userAttrIndex = context.ShiftRightU32(userAttrOffset, Const(2));
  144. res = context.LoadAttribute(Const(AttributeConsts.UserAttributeBase), userAttrIndex, Const(0));
  145. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  146. context.Config.SetUsedFeature(FeatureFlags.IaIndexing);
  147. }
  148. else
  149. {
  150. res = Attribute(op.AttributeOffset);
  151. if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
  152. op.AttributeOffset < AttributeConsts.UserAttributeEnd)
  153. {
  154. int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
  155. if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
  156. {
  157. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  158. }
  159. }
  160. }
  161. if (op.Mode == InterpolationMode.Default)
  162. {
  163. Operand srcB = GetSrcB(context);
  164. res = context.FPMultiply(res, srcB);
  165. }
  166. res = context.FPSaturate(res, op.Saturate);
  167. context.Copy(GetDest(context), res);
  168. }
  169. public static void Isberd(EmitterContext context)
  170. {
  171. // This instruction performs a load from ISBE memory,
  172. // however it seems to be only used to get some vertex
  173. // input data, so we instead propagate the offset so that
  174. // it can be used on the attribute load.
  175. context.Copy(GetDest(context), GetSrcA(context));
  176. }
  177. public static void Ld(EmitterContext context)
  178. {
  179. EmitLoad(context, MemoryRegion.Local);
  180. }
  181. public static void Ldc(EmitterContext context)
  182. {
  183. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  184. if (op.Size > IntegerSize.B64)
  185. {
  186. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
  187. }
  188. bool isSmallInt = op.Size < IntegerSize.B32;
  189. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  190. Operand slot = Const(op.Slot);
  191. Operand srcA = GetSrcA(context);
  192. if (op.IndexMode == CbIndexMode.Is ||
  193. op.IndexMode == CbIndexMode.Isl)
  194. {
  195. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  196. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  197. }
  198. Operand addr = context.IAdd(srcA, Const(op.Offset));
  199. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  200. Operand bitOffset = GetBitOffset(context, addr);
  201. for (int index = 0; index < count; index++)
  202. {
  203. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  204. if (rd.IsRZ)
  205. {
  206. break;
  207. }
  208. Operand offset = context.IAdd(wordOffset, Const(index));
  209. Operand value = context.LoadConstant(slot, offset);
  210. if (isSmallInt)
  211. {
  212. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  213. }
  214. context.Copy(Register(rd), value);
  215. }
  216. }
  217. public static void Ldg(EmitterContext context)
  218. {
  219. EmitLoadGlobal(context);
  220. }
  221. public static void Lds(EmitterContext context)
  222. {
  223. EmitLoad(context, MemoryRegion.Shared);
  224. }
  225. public static void Membar(EmitterContext context)
  226. {
  227. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  228. if (op.Level == BarrierLevel.Cta)
  229. {
  230. context.GroupMemoryBarrier();
  231. }
  232. else
  233. {
  234. context.MemoryBarrier();
  235. }
  236. }
  237. public static void Out(EmitterContext context)
  238. {
  239. OpCode op = context.CurrOp;
  240. bool emit = op.RawOpCode.Extract(39);
  241. bool cut = op.RawOpCode.Extract(40);
  242. if (!(emit || cut))
  243. {
  244. context.Config.GpuAccessor.Log("Invalid OUT encoding.");
  245. }
  246. if (emit)
  247. {
  248. context.EmitVertex();
  249. }
  250. if (cut)
  251. {
  252. context.EndPrimitive();
  253. }
  254. }
  255. public static void Red(EmitterContext context)
  256. {
  257. OpCodeRed op = (OpCodeRed)context.CurrOp;
  258. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  259. EmitAtomicOp(
  260. context,
  261. Instruction.MrGlobal,
  262. op.AtomicOp,
  263. op.Type,
  264. addrLow,
  265. addrHigh,
  266. GetDest(context));
  267. }
  268. public static void St(EmitterContext context)
  269. {
  270. EmitStore(context, MemoryRegion.Local);
  271. }
  272. public static void Stg(EmitterContext context)
  273. {
  274. EmitStoreGlobal(context);
  275. }
  276. public static void Sts(EmitterContext context)
  277. {
  278. EmitStore(context, MemoryRegion.Shared);
  279. }
  280. private static Operand EmitAtomicOp(
  281. EmitterContext context,
  282. Instruction mr,
  283. AtomicOp op,
  284. ReductionType type,
  285. Operand addrLow,
  286. Operand addrHigh,
  287. Operand value)
  288. {
  289. Operand res = Const(0);
  290. switch (op)
  291. {
  292. case AtomicOp.Add:
  293. if (type == ReductionType.S32 || type == ReductionType.U32)
  294. {
  295. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  296. }
  297. else
  298. {
  299. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  300. }
  301. break;
  302. case AtomicOp.BitwiseAnd:
  303. if (type == ReductionType.S32 || type == ReductionType.U32)
  304. {
  305. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  306. }
  307. else
  308. {
  309. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  310. }
  311. break;
  312. case AtomicOp.BitwiseExclusiveOr:
  313. if (type == ReductionType.S32 || type == ReductionType.U32)
  314. {
  315. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  316. }
  317. else
  318. {
  319. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  320. }
  321. break;
  322. case AtomicOp.BitwiseOr:
  323. if (type == ReductionType.S32 || type == ReductionType.U32)
  324. {
  325. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  326. }
  327. else
  328. {
  329. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  330. }
  331. break;
  332. case AtomicOp.Maximum:
  333. if (type == ReductionType.S32)
  334. {
  335. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  336. }
  337. else if (type == ReductionType.U32)
  338. {
  339. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  340. }
  341. else
  342. {
  343. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  344. }
  345. break;
  346. case AtomicOp.Minimum:
  347. if (type == ReductionType.S32)
  348. {
  349. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  350. }
  351. else if (type == ReductionType.U32)
  352. {
  353. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  354. }
  355. else
  356. {
  357. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  358. }
  359. break;
  360. }
  361. return res;
  362. }
  363. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  364. {
  365. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  366. if (op.Size > IntegerSize.B128)
  367. {
  368. context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
  369. }
  370. bool isSmallInt = op.Size < IntegerSize.B32;
  371. int count = 1;
  372. switch (op.Size)
  373. {
  374. case IntegerSize.B64: count = 2; break;
  375. case IntegerSize.B128: count = 4; break;
  376. }
  377. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  378. // Word offset = byte offset / 4 (one word = 4 bytes).
  379. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  380. Operand bitOffset = GetBitOffset(context, baseOffset);
  381. for (int index = 0; index < count; index++)
  382. {
  383. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  384. if (rd.IsRZ)
  385. {
  386. break;
  387. }
  388. Operand offset = context.IAdd(wordOffset, Const(index));
  389. Operand value = null;
  390. switch (region)
  391. {
  392. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  393. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  394. }
  395. if (isSmallInt)
  396. {
  397. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  398. }
  399. context.Copy(Register(rd), value);
  400. }
  401. }
  402. private static void EmitLoadGlobal(EmitterContext context)
  403. {
  404. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  405. bool isSmallInt = op.Size < IntegerSize.B32;
  406. int count = GetVectorCount(op.Size);
  407. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  408. Operand bitOffset = GetBitOffset(context, addrLow);
  409. for (int index = 0; index < count; index++)
  410. {
  411. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  412. if (rd.IsRZ)
  413. {
  414. break;
  415. }
  416. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  417. if (isSmallInt)
  418. {
  419. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  420. }
  421. context.Copy(Register(rd), value);
  422. }
  423. }
  424. private static void EmitStore(EmitterContext context, MemoryRegion region)
  425. {
  426. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  427. if (op.Size > IntegerSize.B128)
  428. {
  429. context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
  430. }
  431. bool isSmallInt = op.Size < IntegerSize.B32;
  432. int count = 1;
  433. switch (op.Size)
  434. {
  435. case IntegerSize.B64: count = 2; break;
  436. case IntegerSize.B128: count = 4; break;
  437. }
  438. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  439. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  440. Operand bitOffset = GetBitOffset(context, baseOffset);
  441. for (int index = 0; index < count; index++)
  442. {
  443. bool isRz = op.Rd.IsRZ;
  444. Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
  445. Operand value = Register(rd);
  446. Operand offset = context.IAdd(wordOffset, Const(index));
  447. if (isSmallInt)
  448. {
  449. Operand word = null;
  450. switch (region)
  451. {
  452. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  453. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  454. }
  455. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  456. }
  457. switch (region)
  458. {
  459. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  460. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  461. }
  462. }
  463. }
  464. private static void EmitStoreGlobal(EmitterContext context)
  465. {
  466. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  467. bool isSmallInt = op.Size < IntegerSize.B32;
  468. int count = GetVectorCount(op.Size);
  469. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  470. Operand bitOffset = GetBitOffset(context, addrLow);
  471. for (int index = 0; index < count; index++)
  472. {
  473. bool isRz = op.Rd.IsRZ;
  474. Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
  475. Operand value = Register(rd);
  476. if (isSmallInt)
  477. {
  478. Operand word = context.LoadGlobal(addrLow, addrHigh);
  479. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  480. }
  481. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  482. }
  483. }
  484. private static int GetVectorCount(IntegerSize size)
  485. {
  486. switch (size)
  487. {
  488. case IntegerSize.B64:
  489. return 2;
  490. case IntegerSize.B128:
  491. case IntegerSize.UB128:
  492. return 4;
  493. }
  494. return 1;
  495. }
  496. private static (Operand, Operand) Get40BitsAddress(
  497. EmitterContext context,
  498. Register ra,
  499. bool extended,
  500. int offset)
  501. {
  502. Operand addrLow = GetSrcA(context);
  503. Operand addrHigh;
  504. if (extended && !ra.IsRZ)
  505. {
  506. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  507. }
  508. else
  509. {
  510. addrHigh = Const(0);
  511. }
  512. Operand offs = Const(offset);
  513. addrLow = context.IAdd(addrLow, offs);
  514. if (extended)
  515. {
  516. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  517. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  518. }
  519. return (addrLow, addrHigh);
  520. }
  521. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  522. {
  523. // Note: bit offset = (baseOffset & 0b11) * 8.
  524. // Addresses should be always aligned to the integer type,
  525. // so we don't need to take unaligned addresses into account.
  526. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  527. }
  528. private static Operand ExtractSmallInt(
  529. EmitterContext context,
  530. IntegerSize size,
  531. Operand bitOffset,
  532. Operand value)
  533. {
  534. value = context.ShiftRightU32(value, bitOffset);
  535. switch (size)
  536. {
  537. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  538. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  539. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  540. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  541. }
  542. return value;
  543. }
  544. private static Operand InsertSmallInt(
  545. EmitterContext context,
  546. IntegerSize size,
  547. Operand bitOffset,
  548. Operand word,
  549. Operand value)
  550. {
  551. switch (size)
  552. {
  553. case IntegerSize.U8:
  554. case IntegerSize.S8:
  555. value = context.BitwiseAnd(value, Const(0xff));
  556. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  557. break;
  558. case IntegerSize.U16:
  559. case IntegerSize.S16:
  560. value = context.BitwiseAnd(value, Const(0xffff));
  561. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  562. break;
  563. }
  564. return value;
  565. }
  566. }
  567. }