InstEmitMemory.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.FlagAttributeRead(src.Value);
  28. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  29. }
  30. }
  31. public static void Ast(EmitterContext context)
  32. {
  33. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  34. for (int index = 0; index < op.Count; index++)
  35. {
  36. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  37. {
  38. break;
  39. }
  40. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  41. Operand dest = Attribute(op.AttributeOffset + index * 4);
  42. context.FlagAttributeWritten(dest.Value);
  43. context.Copy(dest, Register(rd));
  44. }
  45. }
  46. public static void Atom(EmitterContext context)
  47. {
  48. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  49. ReductionType type = (ReductionType)op.RawOpCode.Extract(49, 2);
  50. int sOffset = (op.RawOpCode.Extract(28, 20) << 12) >> 12;
  51. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, sOffset);
  52. Operand value = GetSrcB(context);
  53. Operand res = EmitAtomicOp(
  54. context,
  55. Instruction.MrGlobal,
  56. op.AtomicOp,
  57. type,
  58. addrLow,
  59. addrHigh,
  60. value);
  61. context.Copy(GetDest(context), res);
  62. }
  63. public static void Atoms(EmitterContext context)
  64. {
  65. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  66. ReductionType type = op.RawOpCode.Extract(28, 2) switch
  67. {
  68. 0 => ReductionType.U32,
  69. 1 => ReductionType.S32,
  70. 2 => ReductionType.U64,
  71. _ => ReductionType.S64
  72. };
  73. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  74. int sOffset = (op.RawOpCode.Extract(30, 22) << 10) >> 10;
  75. offset = context.IAdd(offset, Const(sOffset));
  76. Operand value = GetSrcB(context);
  77. Operand res = EmitAtomicOp(
  78. context,
  79. Instruction.MrShared,
  80. op.AtomicOp,
  81. type,
  82. offset,
  83. Const(0),
  84. value);
  85. context.Copy(GetDest(context), res);
  86. }
  87. public static void Bar(EmitterContext context)
  88. {
  89. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  90. // TODO: Support other modes.
  91. if (op.Mode == BarrierMode.Sync)
  92. {
  93. context.Barrier();
  94. }
  95. else
  96. {
  97. context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
  98. }
  99. }
  100. public static void Ipa(EmitterContext context)
  101. {
  102. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  103. context.FlagAttributeRead(op.AttributeOffset);
  104. Operand res = Attribute(op.AttributeOffset);
  105. if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
  106. op.AttributeOffset < AttributeConsts.UserAttributeEnd)
  107. {
  108. int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
  109. if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
  110. {
  111. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  112. }
  113. }
  114. if (op.Mode == InterpolationMode.Default)
  115. {
  116. Operand srcB = GetSrcB(context);
  117. res = context.FPMultiply(res, srcB);
  118. }
  119. res = context.FPSaturate(res, op.Saturate);
  120. context.Copy(GetDest(context), res);
  121. }
  122. public static void Isberd(EmitterContext context)
  123. {
  124. // This instruction performs a load from ISBE memory,
  125. // however it seems to be only used to get some vertex
  126. // input data, so we instead propagate the offset so that
  127. // it can be used on the attribute load.
  128. context.Copy(GetDest(context), GetSrcA(context));
  129. }
  130. public static void Ld(EmitterContext context)
  131. {
  132. EmitLoad(context, MemoryRegion.Local);
  133. }
  134. public static void Ldc(EmitterContext context)
  135. {
  136. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  137. if (op.Size > IntegerSize.B64)
  138. {
  139. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
  140. }
  141. bool isSmallInt = op.Size < IntegerSize.B32;
  142. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  143. Operand slot = Const(op.Slot);
  144. Operand srcA = GetSrcA(context);
  145. if (op.IndexMode == CbIndexMode.Is ||
  146. op.IndexMode == CbIndexMode.Isl)
  147. {
  148. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  149. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  150. }
  151. Operand addr = context.IAdd(srcA, Const(op.Offset));
  152. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  153. Operand bitOffset = GetBitOffset(context, addr);
  154. for (int index = 0; index < count; index++)
  155. {
  156. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  157. if (rd.IsRZ)
  158. {
  159. break;
  160. }
  161. Operand offset = context.IAdd(wordOffset, Const(index));
  162. Operand value = context.LoadConstant(slot, offset);
  163. if (isSmallInt)
  164. {
  165. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  166. }
  167. context.Copy(Register(rd), value);
  168. }
  169. }
  170. public static void Ldg(EmitterContext context)
  171. {
  172. EmitLoadGlobal(context);
  173. }
  174. public static void Lds(EmitterContext context)
  175. {
  176. EmitLoad(context, MemoryRegion.Shared);
  177. }
  178. public static void Membar(EmitterContext context)
  179. {
  180. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  181. if (op.Level == BarrierLevel.Cta)
  182. {
  183. context.GroupMemoryBarrier();
  184. }
  185. else
  186. {
  187. context.MemoryBarrier();
  188. }
  189. }
  190. public static void Out(EmitterContext context)
  191. {
  192. OpCode op = context.CurrOp;
  193. bool emit = op.RawOpCode.Extract(39);
  194. bool cut = op.RawOpCode.Extract(40);
  195. if (!(emit || cut))
  196. {
  197. context.Config.GpuAccessor.Log("Invalid OUT encoding.");
  198. }
  199. if (emit)
  200. {
  201. context.EmitVertex();
  202. }
  203. if (cut)
  204. {
  205. context.EndPrimitive();
  206. }
  207. }
  208. public static void Red(EmitterContext context)
  209. {
  210. OpCodeRed op = (OpCodeRed)context.CurrOp;
  211. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  212. EmitAtomicOp(
  213. context,
  214. Instruction.MrGlobal,
  215. op.AtomicOp,
  216. op.Type,
  217. addrLow,
  218. addrHigh,
  219. GetDest(context));
  220. }
  221. public static void St(EmitterContext context)
  222. {
  223. EmitStore(context, MemoryRegion.Local);
  224. }
  225. public static void Stg(EmitterContext context)
  226. {
  227. EmitStoreGlobal(context);
  228. }
  229. public static void Sts(EmitterContext context)
  230. {
  231. EmitStore(context, MemoryRegion.Shared);
  232. }
  233. private static Operand EmitAtomicOp(
  234. EmitterContext context,
  235. Instruction mr,
  236. AtomicOp op,
  237. ReductionType type,
  238. Operand addrLow,
  239. Operand addrHigh,
  240. Operand value)
  241. {
  242. Operand res = Const(0);
  243. switch (op)
  244. {
  245. case AtomicOp.Add:
  246. if (type == ReductionType.S32 || type == ReductionType.U32)
  247. {
  248. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  249. }
  250. else
  251. {
  252. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  253. }
  254. break;
  255. case AtomicOp.BitwiseAnd:
  256. if (type == ReductionType.S32 || type == ReductionType.U32)
  257. {
  258. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  259. }
  260. else
  261. {
  262. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  263. }
  264. break;
  265. case AtomicOp.BitwiseExclusiveOr:
  266. if (type == ReductionType.S32 || type == ReductionType.U32)
  267. {
  268. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  269. }
  270. else
  271. {
  272. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  273. }
  274. break;
  275. case AtomicOp.BitwiseOr:
  276. if (type == ReductionType.S32 || type == ReductionType.U32)
  277. {
  278. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  279. }
  280. else
  281. {
  282. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  283. }
  284. break;
  285. case AtomicOp.Maximum:
  286. if (type == ReductionType.S32)
  287. {
  288. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  289. }
  290. else if (type == ReductionType.U32)
  291. {
  292. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  293. }
  294. else
  295. {
  296. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  297. }
  298. break;
  299. case AtomicOp.Minimum:
  300. if (type == ReductionType.S32)
  301. {
  302. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  303. }
  304. else if (type == ReductionType.U32)
  305. {
  306. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  307. }
  308. else
  309. {
  310. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  311. }
  312. break;
  313. }
  314. return res;
  315. }
  316. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  317. {
  318. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  319. if (op.Size > IntegerSize.B128)
  320. {
  321. context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
  322. }
  323. bool isSmallInt = op.Size < IntegerSize.B32;
  324. int count = 1;
  325. switch (op.Size)
  326. {
  327. case IntegerSize.B64: count = 2; break;
  328. case IntegerSize.B128: count = 4; break;
  329. }
  330. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  331. // Word offset = byte offset / 4 (one word = 4 bytes).
  332. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  333. Operand bitOffset = GetBitOffset(context, baseOffset);
  334. for (int index = 0; index < count; index++)
  335. {
  336. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  337. if (rd.IsRZ)
  338. {
  339. break;
  340. }
  341. Operand offset = context.IAdd(wordOffset, Const(index));
  342. Operand value = null;
  343. switch (region)
  344. {
  345. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  346. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  347. }
  348. if (isSmallInt)
  349. {
  350. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  351. }
  352. context.Copy(Register(rd), value);
  353. }
  354. }
  355. private static void EmitLoadGlobal(EmitterContext context)
  356. {
  357. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  358. bool isSmallInt = op.Size < IntegerSize.B32;
  359. int count = GetVectorCount(op.Size);
  360. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  361. Operand bitOffset = GetBitOffset(context, addrLow);
  362. for (int index = 0; index < count; index++)
  363. {
  364. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  365. if (rd.IsRZ)
  366. {
  367. break;
  368. }
  369. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  370. if (isSmallInt)
  371. {
  372. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  373. }
  374. context.Copy(Register(rd), value);
  375. }
  376. }
  377. private static void EmitStore(EmitterContext context, MemoryRegion region)
  378. {
  379. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  380. if (op.Size > IntegerSize.B128)
  381. {
  382. context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
  383. }
  384. bool isSmallInt = op.Size < IntegerSize.B32;
  385. int count = 1;
  386. switch (op.Size)
  387. {
  388. case IntegerSize.B64: count = 2; break;
  389. case IntegerSize.B128: count = 4; break;
  390. }
  391. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  392. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  393. Operand bitOffset = GetBitOffset(context, baseOffset);
  394. for (int index = 0; index < count; index++)
  395. {
  396. bool isRz = op.Rd.IsRZ;
  397. Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
  398. Operand value = Register(rd);
  399. Operand offset = context.IAdd(wordOffset, Const(index));
  400. if (isSmallInt)
  401. {
  402. Operand word = null;
  403. switch (region)
  404. {
  405. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  406. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  407. }
  408. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  409. }
  410. switch (region)
  411. {
  412. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  413. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  414. }
  415. }
  416. }
  417. private static void EmitStoreGlobal(EmitterContext context)
  418. {
  419. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  420. bool isSmallInt = op.Size < IntegerSize.B32;
  421. int count = GetVectorCount(op.Size);
  422. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  423. Operand bitOffset = GetBitOffset(context, addrLow);
  424. for (int index = 0; index < count; index++)
  425. {
  426. bool isRz = op.Rd.IsRZ;
  427. Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
  428. Operand value = Register(rd);
  429. if (isSmallInt)
  430. {
  431. Operand word = context.LoadGlobal(addrLow, addrHigh);
  432. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  433. }
  434. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  435. }
  436. }
  437. private static int GetVectorCount(IntegerSize size)
  438. {
  439. switch (size)
  440. {
  441. case IntegerSize.B64:
  442. return 2;
  443. case IntegerSize.B128:
  444. case IntegerSize.UB128:
  445. return 4;
  446. }
  447. return 1;
  448. }
  449. private static (Operand, Operand) Get40BitsAddress(
  450. EmitterContext context,
  451. Register ra,
  452. bool extended,
  453. int offset)
  454. {
  455. Operand addrLow = GetSrcA(context);
  456. Operand addrHigh;
  457. if (extended && !ra.IsRZ)
  458. {
  459. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  460. }
  461. else
  462. {
  463. addrHigh = Const(0);
  464. }
  465. Operand offs = Const(offset);
  466. addrLow = context.IAdd(addrLow, offs);
  467. if (extended)
  468. {
  469. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  470. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  471. }
  472. return (addrLow, addrHigh);
  473. }
  474. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  475. {
  476. // Note: bit offset = (baseOffset & 0b11) * 8.
  477. // Addresses should be always aligned to the integer type,
  478. // so we don't need to take unaligned addresses into account.
  479. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  480. }
  481. private static Operand ExtractSmallInt(
  482. EmitterContext context,
  483. IntegerSize size,
  484. Operand bitOffset,
  485. Operand value)
  486. {
  487. value = context.ShiftRightU32(value, bitOffset);
  488. switch (size)
  489. {
  490. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  491. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  492. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  493. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  494. }
  495. return value;
  496. }
  497. private static Operand InsertSmallInt(
  498. EmitterContext context,
  499. IntegerSize size,
  500. Operand bitOffset,
  501. Operand word,
  502. Operand value)
  503. {
  504. switch (size)
  505. {
  506. case IntegerSize.U8:
  507. case IntegerSize.S8:
  508. value = context.BitwiseAnd(value, Const(0xff));
  509. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  510. break;
  511. case IntegerSize.U16:
  512. case IntegerSize.S16:
  513. value = context.BitwiseAnd(value, Const(0xffff));
  514. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  515. break;
  516. }
  517. return value;
  518. }
  519. }
  520. }