InstEmitMemory.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.FlagAttributeRead(src.Value);
  28. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  29. }
  30. }
  31. public static void Ast(EmitterContext context)
  32. {
  33. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  34. for (int index = 0; index < op.Count; index++)
  35. {
  36. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  37. {
  38. break;
  39. }
  40. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  41. Operand dest = Attribute(op.AttributeOffset + index * 4);
  42. context.Copy(dest, Register(rd));
  43. }
  44. }
  45. public static void Atom(EmitterContext context)
  46. {
  47. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  48. ReductionType type = (ReductionType)op.RawOpCode.Extract(49, 2);
  49. int sOffset = (op.RawOpCode.Extract(28, 20) << 12) >> 12;
  50. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, sOffset);
  51. Operand value = GetSrcB(context);
  52. Operand res = EmitAtomicOp(
  53. context,
  54. Instruction.MrGlobal,
  55. op.AtomicOp,
  56. type,
  57. addrLow,
  58. addrHigh,
  59. value);
  60. context.Copy(GetDest(context), res);
  61. }
  62. public static void Atoms(EmitterContext context)
  63. {
  64. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  65. ReductionType type = op.RawOpCode.Extract(28, 2) switch
  66. {
  67. 0 => ReductionType.U32,
  68. 1 => ReductionType.S32,
  69. 2 => ReductionType.U64,
  70. _ => ReductionType.S64
  71. };
  72. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  73. int sOffset = (op.RawOpCode.Extract(30, 22) << 10) >> 10;
  74. offset = context.IAdd(offset, Const(sOffset));
  75. Operand value = GetSrcB(context);
  76. Operand res = EmitAtomicOp(
  77. context,
  78. Instruction.MrShared,
  79. op.AtomicOp,
  80. type,
  81. offset,
  82. Const(0),
  83. value);
  84. context.Copy(GetDest(context), res);
  85. }
  86. public static void Bar(EmitterContext context)
  87. {
  88. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  89. // TODO: Support other modes.
  90. if (op.Mode == BarrierMode.Sync)
  91. {
  92. context.Barrier();
  93. }
  94. else
  95. {
  96. context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
  97. }
  98. }
  99. public static void Ipa(EmitterContext context)
  100. {
  101. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  102. context.FlagAttributeRead(op.AttributeOffset);
  103. Operand res = Attribute(op.AttributeOffset);
  104. if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
  105. op.AttributeOffset < AttributeConsts.UserAttributeEnd)
  106. {
  107. int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
  108. if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
  109. {
  110. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  111. }
  112. }
  113. if (op.Mode == InterpolationMode.Default)
  114. {
  115. Operand srcB = GetSrcB(context);
  116. res = context.FPMultiply(res, srcB);
  117. }
  118. res = context.FPSaturate(res, op.Saturate);
  119. context.Copy(GetDest(context), res);
  120. }
  121. public static void Isberd(EmitterContext context)
  122. {
  123. // This instruction performs a load from ISBE memory,
  124. // however it seems to be only used to get some vertex
  125. // input data, so we instead propagate the offset so that
  126. // it can be used on the attribute load.
  127. context.Copy(GetDest(context), GetSrcA(context));
  128. }
  129. public static void Ld(EmitterContext context)
  130. {
  131. EmitLoad(context, MemoryRegion.Local);
  132. }
  133. public static void Ldc(EmitterContext context)
  134. {
  135. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  136. if (op.Size > IntegerSize.B64)
  137. {
  138. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
  139. }
  140. bool isSmallInt = op.Size < IntegerSize.B32;
  141. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  142. Operand slot = Const(op.Slot);
  143. Operand srcA = GetSrcA(context);
  144. if (op.IndexMode == CbIndexMode.Is ||
  145. op.IndexMode == CbIndexMode.Isl)
  146. {
  147. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  148. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  149. }
  150. Operand addr = context.IAdd(srcA, Const(op.Offset));
  151. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  152. Operand bitOffset = GetBitOffset(context, addr);
  153. for (int index = 0; index < count; index++)
  154. {
  155. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  156. if (rd.IsRZ)
  157. {
  158. break;
  159. }
  160. Operand offset = context.IAdd(wordOffset, Const(index));
  161. Operand value = context.LoadConstant(slot, offset);
  162. if (isSmallInt)
  163. {
  164. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  165. }
  166. context.Copy(Register(rd), value);
  167. }
  168. }
  169. public static void Ldg(EmitterContext context)
  170. {
  171. EmitLoadGlobal(context);
  172. }
  173. public static void Lds(EmitterContext context)
  174. {
  175. EmitLoad(context, MemoryRegion.Shared);
  176. }
  177. public static void Membar(EmitterContext context)
  178. {
  179. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  180. if (op.Level == BarrierLevel.Cta)
  181. {
  182. context.GroupMemoryBarrier();
  183. }
  184. else
  185. {
  186. context.MemoryBarrier();
  187. }
  188. }
  189. public static void Out(EmitterContext context)
  190. {
  191. OpCode op = context.CurrOp;
  192. bool emit = op.RawOpCode.Extract(39);
  193. bool cut = op.RawOpCode.Extract(40);
  194. if (!(emit || cut))
  195. {
  196. context.Config.GpuAccessor.Log("Invalid OUT encoding.");
  197. }
  198. if (emit)
  199. {
  200. context.EmitVertex();
  201. }
  202. if (cut)
  203. {
  204. context.EndPrimitive();
  205. }
  206. }
  207. public static void Red(EmitterContext context)
  208. {
  209. OpCodeRed op = (OpCodeRed)context.CurrOp;
  210. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  211. EmitAtomicOp(
  212. context,
  213. Instruction.MrGlobal,
  214. op.AtomicOp,
  215. op.Type,
  216. addrLow,
  217. addrHigh,
  218. GetDest(context));
  219. }
  220. public static void St(EmitterContext context)
  221. {
  222. EmitStore(context, MemoryRegion.Local);
  223. }
  224. public static void Stg(EmitterContext context)
  225. {
  226. EmitStoreGlobal(context);
  227. }
  228. public static void Sts(EmitterContext context)
  229. {
  230. EmitStore(context, MemoryRegion.Shared);
  231. }
  232. private static Operand EmitAtomicOp(
  233. EmitterContext context,
  234. Instruction mr,
  235. AtomicOp op,
  236. ReductionType type,
  237. Operand addrLow,
  238. Operand addrHigh,
  239. Operand value)
  240. {
  241. Operand res = Const(0);
  242. switch (op)
  243. {
  244. case AtomicOp.Add:
  245. if (type == ReductionType.S32 || type == ReductionType.U32)
  246. {
  247. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  248. }
  249. else
  250. {
  251. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  252. }
  253. break;
  254. case AtomicOp.BitwiseAnd:
  255. if (type == ReductionType.S32 || type == ReductionType.U32)
  256. {
  257. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  258. }
  259. else
  260. {
  261. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  262. }
  263. break;
  264. case AtomicOp.BitwiseExclusiveOr:
  265. if (type == ReductionType.S32 || type == ReductionType.U32)
  266. {
  267. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  268. }
  269. else
  270. {
  271. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  272. }
  273. break;
  274. case AtomicOp.BitwiseOr:
  275. if (type == ReductionType.S32 || type == ReductionType.U32)
  276. {
  277. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  278. }
  279. else
  280. {
  281. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  282. }
  283. break;
  284. case AtomicOp.Maximum:
  285. if (type == ReductionType.S32)
  286. {
  287. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  288. }
  289. else if (type == ReductionType.U32)
  290. {
  291. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  292. }
  293. else
  294. {
  295. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  296. }
  297. break;
  298. case AtomicOp.Minimum:
  299. if (type == ReductionType.S32)
  300. {
  301. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  302. }
  303. else if (type == ReductionType.U32)
  304. {
  305. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  306. }
  307. else
  308. {
  309. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  310. }
  311. break;
  312. }
  313. return res;
  314. }
  315. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  316. {
  317. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  318. if (op.Size > IntegerSize.B128)
  319. {
  320. context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
  321. }
  322. bool isSmallInt = op.Size < IntegerSize.B32;
  323. int count = 1;
  324. switch (op.Size)
  325. {
  326. case IntegerSize.B64: count = 2; break;
  327. case IntegerSize.B128: count = 4; break;
  328. }
  329. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  330. // Word offset = byte offset / 4 (one word = 4 bytes).
  331. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  332. Operand bitOffset = GetBitOffset(context, baseOffset);
  333. for (int index = 0; index < count; index++)
  334. {
  335. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  336. if (rd.IsRZ)
  337. {
  338. break;
  339. }
  340. Operand offset = context.IAdd(wordOffset, Const(index));
  341. Operand value = null;
  342. switch (region)
  343. {
  344. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  345. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  346. }
  347. if (isSmallInt)
  348. {
  349. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  350. }
  351. context.Copy(Register(rd), value);
  352. }
  353. }
  354. private static void EmitLoadGlobal(EmitterContext context)
  355. {
  356. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  357. bool isSmallInt = op.Size < IntegerSize.B32;
  358. int count = GetVectorCount(op.Size);
  359. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  360. Operand bitOffset = GetBitOffset(context, addrLow);
  361. for (int index = 0; index < count; index++)
  362. {
  363. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  364. if (rd.IsRZ)
  365. {
  366. break;
  367. }
  368. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  369. if (isSmallInt)
  370. {
  371. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  372. }
  373. context.Copy(Register(rd), value);
  374. }
  375. }
  376. private static void EmitStore(EmitterContext context, MemoryRegion region)
  377. {
  378. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  379. if (op.Size > IntegerSize.B128)
  380. {
  381. context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
  382. }
  383. bool isSmallInt = op.Size < IntegerSize.B32;
  384. int count = 1;
  385. switch (op.Size)
  386. {
  387. case IntegerSize.B64: count = 2; break;
  388. case IntegerSize.B128: count = 4; break;
  389. }
  390. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  391. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  392. Operand bitOffset = GetBitOffset(context, baseOffset);
  393. for (int index = 0; index < count; index++)
  394. {
  395. bool isRz = op.Rd.IsRZ;
  396. Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
  397. Operand value = Register(rd);
  398. Operand offset = context.IAdd(wordOffset, Const(index));
  399. if (isSmallInt)
  400. {
  401. Operand word = null;
  402. switch (region)
  403. {
  404. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  405. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  406. }
  407. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  408. }
  409. switch (region)
  410. {
  411. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  412. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  413. }
  414. }
  415. }
  416. private static void EmitStoreGlobal(EmitterContext context)
  417. {
  418. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  419. bool isSmallInt = op.Size < IntegerSize.B32;
  420. int count = GetVectorCount(op.Size);
  421. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  422. Operand bitOffset = GetBitOffset(context, addrLow);
  423. for (int index = 0; index < count; index++)
  424. {
  425. bool isRz = op.Rd.IsRZ;
  426. Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr);
  427. Operand value = Register(rd);
  428. if (isSmallInt)
  429. {
  430. Operand word = context.LoadGlobal(addrLow, addrHigh);
  431. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  432. }
  433. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  434. }
  435. }
  436. private static int GetVectorCount(IntegerSize size)
  437. {
  438. switch (size)
  439. {
  440. case IntegerSize.B64:
  441. return 2;
  442. case IntegerSize.B128:
  443. case IntegerSize.UB128:
  444. return 4;
  445. }
  446. return 1;
  447. }
  448. private static (Operand, Operand) Get40BitsAddress(
  449. EmitterContext context,
  450. Register ra,
  451. bool extended,
  452. int offset)
  453. {
  454. Operand addrLow = GetSrcA(context);
  455. Operand addrHigh;
  456. if (extended && !ra.IsRZ)
  457. {
  458. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  459. }
  460. else
  461. {
  462. addrHigh = Const(0);
  463. }
  464. Operand offs = Const(offset);
  465. addrLow = context.IAdd(addrLow, offs);
  466. if (extended)
  467. {
  468. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  469. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  470. }
  471. return (addrLow, addrHigh);
  472. }
  473. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  474. {
  475. // Note: bit offset = (baseOffset & 0b11) * 8.
  476. // Addresses should be always aligned to the integer type,
  477. // so we don't need to take unaligned addresses into account.
  478. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  479. }
  480. private static Operand ExtractSmallInt(
  481. EmitterContext context,
  482. IntegerSize size,
  483. Operand bitOffset,
  484. Operand value)
  485. {
  486. value = context.ShiftRightU32(value, bitOffset);
  487. switch (size)
  488. {
  489. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  490. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  491. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  492. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  493. }
  494. return value;
  495. }
  496. private static Operand InsertSmallInt(
  497. EmitterContext context,
  498. IntegerSize size,
  499. Operand bitOffset,
  500. Operand word,
  501. Operand value)
  502. {
  503. switch (size)
  504. {
  505. case IntegerSize.U8:
  506. case IntegerSize.S8:
  507. value = context.BitwiseAnd(value, Const(0xff));
  508. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  509. break;
  510. case IntegerSize.U16:
  511. case IntegerSize.S16:
  512. value = context.BitwiseAnd(value, Const(0xffff));
  513. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  514. break;
  515. }
  516. return value;
  517. }
  518. }
  519. }