InstEmitMemory.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Atom(EmitterContext context)
  16. {
  17. InstAtom op = context.GetOp<InstAtom>();
  18. int sOffset = (op.Imm20 << 12) >> 12;
  19. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
  20. Operand value = GetSrcReg(context, op.SrcB);
  21. Operand res = EmitAtomicOp(context, Instruction.MrGlobal, op.Op, op.Size, addrLow, addrHigh, value);
  22. context.Copy(GetDest(op.Dest), res);
  23. }
  24. public static void Atoms(EmitterContext context)
  25. {
  26. InstAtoms op = context.GetOp<InstAtoms>();
  27. Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
  28. int sOffset = (op.Imm22 << 10) >> 10;
  29. offset = context.IAdd(offset, Const(sOffset));
  30. Operand value = GetSrcReg(context, op.SrcB);
  31. AtomSize size = op.AtomsSize switch
  32. {
  33. AtomsSize.S32 => AtomSize.S32,
  34. AtomsSize.U64 => AtomSize.U64,
  35. AtomsSize.S64 => AtomSize.S64,
  36. _ => AtomSize.U32
  37. };
  38. Operand res = EmitAtomicOp(context, Instruction.MrShared, op.AtomOp, size, offset, Const(0), value);
  39. context.Copy(GetDest(op.Dest), res);
  40. }
  41. public static void Ldc(EmitterContext context)
  42. {
  43. InstLdc op = context.GetOp<InstLdc>();
  44. if (op.LsSize > LsSize2.B64)
  45. {
  46. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
  47. return;
  48. }
  49. bool isSmallInt = op.LsSize < LsSize2.B32;
  50. int count = op.LsSize == LsSize2.B64 ? 2 : 1;
  51. Operand slot = Const(op.CbufSlot);
  52. Operand srcA = GetSrcReg(context, op.SrcA);
  53. if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
  54. {
  55. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  56. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  57. }
  58. Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
  59. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  60. Operand bitOffset = GetBitOffset(context, addr);
  61. for (int index = 0; index < count; index++)
  62. {
  63. Register dest = new Register(op.Dest + index, RegisterType.Gpr);
  64. if (dest.IsRZ)
  65. {
  66. break;
  67. }
  68. Operand offset = context.IAdd(wordOffset, Const(index));
  69. Operand value = context.LoadConstant(slot, offset);
  70. if (isSmallInt)
  71. {
  72. value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
  73. }
  74. context.Copy(Register(dest), value);
  75. }
  76. }
  77. public static void Ldg(EmitterContext context)
  78. {
  79. InstLdg op = context.GetOp<InstLdg>();
  80. EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
  81. }
  82. public static void Ldl(EmitterContext context)
  83. {
  84. InstLdl op = context.GetOp<InstLdl>();
  85. EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  86. }
  87. public static void Lds(EmitterContext context)
  88. {
  89. InstLds op = context.GetOp<InstLds>();
  90. EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  91. }
  92. public static void Red(EmitterContext context)
  93. {
  94. InstRed op = context.GetOp<InstRed>();
  95. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
  96. EmitAtomicOp(context, Instruction.MrGlobal, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
  97. }
  98. public static void Stg(EmitterContext context)
  99. {
  100. InstStg op = context.GetOp<InstStg>();
  101. EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
  102. }
  103. public static void Stl(EmitterContext context)
  104. {
  105. InstStl op = context.GetOp<InstStl>();
  106. EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  107. }
  108. public static void Sts(EmitterContext context)
  109. {
  110. InstSts op = context.GetOp<InstSts>();
  111. EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  112. }
  113. private static Operand EmitAtomicOp(
  114. EmitterContext context,
  115. Instruction mr,
  116. AtomOp op,
  117. AtomSize type,
  118. Operand addrLow,
  119. Operand addrHigh,
  120. Operand value)
  121. {
  122. Operand res = Const(0);
  123. switch (op)
  124. {
  125. case AtomOp.Add:
  126. if (type == AtomSize.S32 || type == AtomSize.U32)
  127. {
  128. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  129. }
  130. else
  131. {
  132. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  133. }
  134. break;
  135. case AtomOp.And:
  136. if (type == AtomSize.S32 || type == AtomSize.U32)
  137. {
  138. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  139. }
  140. else
  141. {
  142. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  143. }
  144. break;
  145. case AtomOp.Xor:
  146. if (type == AtomSize.S32 || type == AtomSize.U32)
  147. {
  148. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  149. }
  150. else
  151. {
  152. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  153. }
  154. break;
  155. case AtomOp.Or:
  156. if (type == AtomSize.S32 || type == AtomSize.U32)
  157. {
  158. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  159. }
  160. else
  161. {
  162. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  163. }
  164. break;
  165. case AtomOp.Max:
  166. if (type == AtomSize.S32)
  167. {
  168. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  169. }
  170. else if (type == AtomSize.U32)
  171. {
  172. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  173. }
  174. else
  175. {
  176. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  177. }
  178. break;
  179. case AtomOp.Min:
  180. if (type == AtomSize.S32)
  181. {
  182. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  183. }
  184. else if (type == AtomSize.U32)
  185. {
  186. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  187. }
  188. else
  189. {
  190. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  191. }
  192. break;
  193. }
  194. return res;
  195. }
  196. private static void EmitLoad(
  197. EmitterContext context,
  198. MemoryRegion region,
  199. LsSize2 size,
  200. Operand srcA,
  201. int rd,
  202. int offset)
  203. {
  204. if (size > LsSize2.B128)
  205. {
  206. context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
  207. return;
  208. }
  209. bool isSmallInt = size < LsSize2.B32;
  210. int count = 1;
  211. switch (size)
  212. {
  213. case LsSize2.B64: count = 2; break;
  214. case LsSize2.B128: count = 4; break;
  215. }
  216. Operand baseOffset = context.IAdd(srcA, Const(offset));
  217. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
  218. Operand bitOffset = GetBitOffset(context, baseOffset);
  219. for (int index = 0; index < count; index++)
  220. {
  221. Register dest = new Register(rd + index, RegisterType.Gpr);
  222. if (dest.IsRZ)
  223. {
  224. break;
  225. }
  226. Operand elemOffset = context.IAdd(wordOffset, Const(index));
  227. Operand value = null;
  228. switch (region)
  229. {
  230. case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
  231. case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
  232. }
  233. if (isSmallInt)
  234. {
  235. value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
  236. }
  237. context.Copy(Register(dest), value);
  238. }
  239. }
  240. private static void EmitLdg(
  241. EmitterContext context,
  242. LsSize size,
  243. int ra,
  244. int rd,
  245. int offset,
  246. bool extended)
  247. {
  248. bool isSmallInt = size < LsSize.B32;
  249. int count = GetVectorCount(size);
  250. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
  251. Operand bitOffset = GetBitOffset(context, addrLow);
  252. for (int index = 0; index < count; index++)
  253. {
  254. Register dest = new Register(rd + index, RegisterType.Gpr);
  255. if (dest.IsRZ)
  256. {
  257. break;
  258. }
  259. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  260. if (isSmallInt)
  261. {
  262. value = ExtractSmallInt(context, size, bitOffset, value);
  263. }
  264. context.Copy(Register(dest), value);
  265. }
  266. }
  267. private static void EmitStore(
  268. EmitterContext context,
  269. MemoryRegion region,
  270. LsSize2 size,
  271. Operand srcA,
  272. int rd,
  273. int offset)
  274. {
  275. if (size > LsSize2.B128)
  276. {
  277. context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
  278. return;
  279. }
  280. bool isSmallInt = size < LsSize2.B32;
  281. int count = 1;
  282. switch (size)
  283. {
  284. case LsSize2.B64: count = 2; break;
  285. case LsSize2.B128: count = 4; break;
  286. }
  287. Operand baseOffset = context.IAdd(srcA, Const(offset));
  288. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  289. Operand bitOffset = GetBitOffset(context, baseOffset);
  290. for (int index = 0; index < count; index++)
  291. {
  292. bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
  293. Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
  294. Operand elemOffset = context.IAdd(wordOffset, Const(index));
  295. if (isSmallInt && region == MemoryRegion.Local)
  296. {
  297. Operand word = context.LoadLocal(elemOffset);
  298. value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
  299. }
  300. if (region == MemoryRegion.Local)
  301. {
  302. context.StoreLocal(elemOffset, value);
  303. }
  304. else if (region == MemoryRegion.Shared)
  305. {
  306. switch (size)
  307. {
  308. case LsSize2.U8:
  309. case LsSize2.S8:
  310. context.StoreShared8(baseOffset, value);
  311. break;
  312. case LsSize2.U16:
  313. case LsSize2.S16:
  314. context.StoreShared16(baseOffset, value);
  315. break;
  316. default:
  317. context.StoreShared(elemOffset, value);
  318. break;
  319. }
  320. }
  321. }
  322. }
  323. private static void EmitStg(
  324. EmitterContext context,
  325. LsSize2 size,
  326. int ra,
  327. int rd,
  328. int offset,
  329. bool extended)
  330. {
  331. if (size > LsSize2.B128)
  332. {
  333. context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
  334. return;
  335. }
  336. int count = GetVectorCount((LsSize)size);
  337. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
  338. Operand bitOffset = GetBitOffset(context, addrLow);
  339. for (int index = 0; index < count; index++)
  340. {
  341. bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
  342. Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
  343. Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
  344. if (size == LsSize2.U8 || size == LsSize2.S8)
  345. {
  346. context.StoreGlobal8(addrLowOffset, addrHigh, value);
  347. }
  348. else if (size == LsSize2.U16 || size == LsSize2.S16)
  349. {
  350. context.StoreGlobal16(addrLowOffset, addrHigh, value);
  351. }
  352. else
  353. {
  354. context.StoreGlobal(addrLowOffset, addrHigh, value);
  355. }
  356. }
  357. }
  358. private static int GetVectorCount(LsSize size)
  359. {
  360. switch (size)
  361. {
  362. case LsSize.B64:
  363. return 2;
  364. case LsSize.B128:
  365. case LsSize.UB128:
  366. return 4;
  367. }
  368. return 1;
  369. }
  370. private static (Operand, Operand) Get40BitsAddress(
  371. EmitterContext context,
  372. Register ra,
  373. bool extended,
  374. int offset)
  375. {
  376. Operand addrLow = Register(ra);
  377. Operand addrHigh;
  378. if (extended && !ra.IsRZ)
  379. {
  380. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  381. }
  382. else
  383. {
  384. addrHigh = Const(0);
  385. }
  386. Operand offs = Const(offset);
  387. addrLow = context.IAdd(addrLow, offs);
  388. if (extended)
  389. {
  390. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  391. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  392. }
  393. return (addrLow, addrHigh);
  394. }
  395. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  396. {
  397. // Note: bit offset = (baseOffset & 0b11) * 8.
  398. // Addresses should be always aligned to the integer type,
  399. // so we don't need to take unaligned addresses into account.
  400. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  401. }
  402. private static Operand ExtractSmallInt(
  403. EmitterContext context,
  404. LsSize size,
  405. Operand bitOffset,
  406. Operand value)
  407. {
  408. value = context.ShiftRightU32(value, bitOffset);
  409. switch (size)
  410. {
  411. case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  412. case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  413. case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
  414. case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
  415. }
  416. return value;
  417. }
  418. private static Operand InsertSmallInt(
  419. EmitterContext context,
  420. LsSize size,
  421. Operand bitOffset,
  422. Operand word,
  423. Operand value)
  424. {
  425. switch (size)
  426. {
  427. case LsSize.U8:
  428. case LsSize.S8:
  429. value = context.BitwiseAnd(value, Const(0xff));
  430. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  431. break;
  432. case LsSize.U16:
  433. case LsSize.S16:
  434. value = context.BitwiseAnd(value, Const(0xffff));
  435. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  436. break;
  437. }
  438. return value;
  439. }
  440. }
  441. }