InstEmitMemory.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using System.Numerics;
  5. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. namespace Ryujinx.Graphics.Shader.Instructions
  8. {
  9. static partial class InstEmit
  10. {
  11. private enum MemoryRegion
  12. {
  13. Local,
  14. Shared
  15. }
  16. public static void Atom(EmitterContext context)
  17. {
  18. InstAtom op = context.GetOp<InstAtom>();
  19. int sOffset = (op.Imm20 << 12) >> 12;
  20. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
  21. Operand value = GetSrcReg(context, op.SrcB);
  22. Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
  23. context.Copy(GetDest(op.Dest), res);
  24. }
  25. public static void Atoms(EmitterContext context)
  26. {
  27. InstAtoms op = context.GetOp<InstAtoms>();
  28. Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
  29. int sOffset = (op.Imm22 << 10) >> 10;
  30. offset = context.IAdd(offset, Const(sOffset));
  31. Operand value = GetSrcReg(context, op.SrcB);
  32. AtomSize size = op.AtomsSize switch
  33. {
  34. AtomsSize.S32 => AtomSize.S32,
  35. AtomsSize.U64 => AtomSize.U64,
  36. AtomsSize.S64 => AtomSize.S64,
  37. _ => AtomSize.U32
  38. };
  39. Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
  40. context.Copy(GetDest(op.Dest), res);
  41. }
  42. public static void Ldc(EmitterContext context)
  43. {
  44. InstLdc op = context.GetOp<InstLdc>();
  45. if (op.LsSize > LsSize2.B64)
  46. {
  47. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
  48. return;
  49. }
  50. bool isSmallInt = op.LsSize < LsSize2.B32;
  51. int count = op.LsSize == LsSize2.B64 ? 2 : 1;
  52. Operand slot = Const(op.CbufSlot);
  53. Operand srcA = GetSrcReg(context, op.SrcA);
  54. if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
  55. {
  56. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  57. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  58. }
  59. Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
  60. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  61. for (int index = 0; index < count; index++)
  62. {
  63. Register dest = new Register(op.Dest + index, RegisterType.Gpr);
  64. if (dest.IsRZ)
  65. {
  66. break;
  67. }
  68. Operand offset = context.IAdd(wordOffset, Const(index));
  69. Operand value = EmitLoadConstant(context, slot, offset);
  70. if (isSmallInt)
  71. {
  72. value = ExtractSmallInt(context, (LsSize)op.LsSize, GetBitOffset(context, addr), value);
  73. }
  74. context.Copy(Register(dest), value);
  75. }
  76. }
  77. public static void Ldg(EmitterContext context)
  78. {
  79. InstLdg op = context.GetOp<InstLdg>();
  80. EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
  81. }
  82. public static void Ldl(EmitterContext context)
  83. {
  84. InstLdl op = context.GetOp<InstLdl>();
  85. EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  86. }
  87. public static void Lds(EmitterContext context)
  88. {
  89. InstLds op = context.GetOp<InstLds>();
  90. EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  91. }
  92. public static void Red(EmitterContext context)
  93. {
  94. InstRed op = context.GetOp<InstRed>();
  95. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
  96. EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
  97. }
  98. public static void Stg(EmitterContext context)
  99. {
  100. InstStg op = context.GetOp<InstStg>();
  101. EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
  102. }
  103. public static void Stl(EmitterContext context)
  104. {
  105. InstStl op = context.GetOp<InstStl>();
  106. EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  107. }
  108. public static void Sts(EmitterContext context)
  109. {
  110. InstSts op = context.GetOp<InstSts>();
  111. EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  112. }
  113. private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset)
  114. {
  115. Operand vecIndex = context.ShiftRightU32(offset, Const(2));
  116. Operand elemIndex = context.BitwiseAnd(offset, Const(3));
  117. if (slot.Type == OperandType.Constant)
  118. {
  119. int binding = context.Config.ResourceManager.GetConstantBufferBinding(slot.Value);
  120. return context.Load(StorageKind.ConstantBuffer, binding, Const(0), vecIndex, elemIndex);
  121. }
  122. else
  123. {
  124. Operand value = Const(0);
  125. uint cbUseMask = context.Config.GpuAccessor.QueryConstantBufferUse();
  126. while (cbUseMask != 0)
  127. {
  128. int cbIndex = BitOperations.TrailingZeroCount(cbUseMask);
  129. int binding = context.Config.ResourceManager.GetConstantBufferBinding(cbIndex);
  130. Operand isCurrent = context.ICompareEqual(slot, Const(cbIndex));
  131. Operand currentValue = context.Load(StorageKind.ConstantBuffer, binding, Const(0), vecIndex, elemIndex);
  132. value = context.ConditionalSelect(isCurrent, currentValue, value);
  133. cbUseMask &= ~(1u << cbIndex);
  134. }
  135. return value;
  136. }
  137. }
  138. private static Operand EmitAtomicOp(
  139. EmitterContext context,
  140. StorageKind storageKind,
  141. AtomOp op,
  142. AtomSize type,
  143. Operand addrLow,
  144. Operand addrHigh,
  145. Operand value)
  146. {
  147. Operand res = Const(0);
  148. switch (op)
  149. {
  150. case AtomOp.Add:
  151. if (type == AtomSize.S32 || type == AtomSize.U32)
  152. {
  153. res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
  154. }
  155. else
  156. {
  157. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  158. }
  159. break;
  160. case AtomOp.And:
  161. if (type == AtomSize.S32 || type == AtomSize.U32)
  162. {
  163. res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
  164. }
  165. else
  166. {
  167. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  168. }
  169. break;
  170. case AtomOp.Xor:
  171. if (type == AtomSize.S32 || type == AtomSize.U32)
  172. {
  173. res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
  174. }
  175. else
  176. {
  177. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  178. }
  179. break;
  180. case AtomOp.Or:
  181. if (type == AtomSize.S32 || type == AtomSize.U32)
  182. {
  183. res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
  184. }
  185. else
  186. {
  187. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  188. }
  189. break;
  190. case AtomOp.Max:
  191. if (type == AtomSize.S32)
  192. {
  193. res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
  194. }
  195. else if (type == AtomSize.U32)
  196. {
  197. res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
  198. }
  199. else
  200. {
  201. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  202. }
  203. break;
  204. case AtomOp.Min:
  205. if (type == AtomSize.S32)
  206. {
  207. res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
  208. }
  209. else if (type == AtomSize.U32)
  210. {
  211. res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
  212. }
  213. else
  214. {
  215. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  216. }
  217. break;
  218. }
  219. return res;
  220. }
  221. private static void EmitLoad(
  222. EmitterContext context,
  223. MemoryRegion region,
  224. LsSize2 size,
  225. Operand srcA,
  226. int rd,
  227. int offset)
  228. {
  229. if (size > LsSize2.B128)
  230. {
  231. context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
  232. return;
  233. }
  234. bool isSmallInt = size < LsSize2.B32;
  235. int count = 1;
  236. switch (size)
  237. {
  238. case LsSize2.B64: count = 2; break;
  239. case LsSize2.B128: count = 4; break;
  240. }
  241. Operand baseOffset = context.IAdd(srcA, Const(offset));
  242. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
  243. Operand bitOffset = GetBitOffset(context, baseOffset);
  244. for (int index = 0; index < count; index++)
  245. {
  246. Register dest = new Register(rd + index, RegisterType.Gpr);
  247. if (dest.IsRZ)
  248. {
  249. break;
  250. }
  251. Operand elemOffset = context.IAdd(wordOffset, Const(index));
  252. Operand value = null;
  253. switch (region)
  254. {
  255. case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
  256. case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
  257. }
  258. if (isSmallInt)
  259. {
  260. value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
  261. }
  262. context.Copy(Register(dest), value);
  263. }
  264. }
  265. private static void EmitLdg(
  266. EmitterContext context,
  267. LsSize size,
  268. int ra,
  269. int rd,
  270. int offset,
  271. bool extended)
  272. {
  273. bool isSmallInt = size < LsSize.B32;
  274. int count = GetVectorCount(size);
  275. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
  276. Operand bitOffset = GetBitOffset(context, addrLow);
  277. for (int index = 0; index < count; index++)
  278. {
  279. Register dest = new Register(rd + index, RegisterType.Gpr);
  280. if (dest.IsRZ)
  281. {
  282. break;
  283. }
  284. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  285. if (isSmallInt)
  286. {
  287. value = ExtractSmallInt(context, size, bitOffset, value);
  288. }
  289. context.Copy(Register(dest), value);
  290. }
  291. }
  292. private static void EmitStore(
  293. EmitterContext context,
  294. MemoryRegion region,
  295. LsSize2 size,
  296. Operand srcA,
  297. int rd,
  298. int offset)
  299. {
  300. if (size > LsSize2.B128)
  301. {
  302. context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
  303. return;
  304. }
  305. bool isSmallInt = size < LsSize2.B32;
  306. int count = 1;
  307. switch (size)
  308. {
  309. case LsSize2.B64: count = 2; break;
  310. case LsSize2.B128: count = 4; break;
  311. }
  312. Operand baseOffset = context.IAdd(srcA, Const(offset));
  313. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  314. Operand bitOffset = GetBitOffset(context, baseOffset);
  315. for (int index = 0; index < count; index++)
  316. {
  317. bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
  318. Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
  319. Operand elemOffset = context.IAdd(wordOffset, Const(index));
  320. if (isSmallInt && region == MemoryRegion.Local)
  321. {
  322. Operand word = context.LoadLocal(elemOffset);
  323. value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
  324. }
  325. if (region == MemoryRegion.Local)
  326. {
  327. context.StoreLocal(elemOffset, value);
  328. }
  329. else if (region == MemoryRegion.Shared)
  330. {
  331. switch (size)
  332. {
  333. case LsSize2.U8:
  334. case LsSize2.S8:
  335. context.StoreShared8(baseOffset, value);
  336. break;
  337. case LsSize2.U16:
  338. case LsSize2.S16:
  339. context.StoreShared16(baseOffset, value);
  340. break;
  341. default:
  342. context.StoreShared(elemOffset, value);
  343. break;
  344. }
  345. }
  346. }
  347. }
  348. private static void EmitStg(
  349. EmitterContext context,
  350. LsSize2 size,
  351. int ra,
  352. int rd,
  353. int offset,
  354. bool extended)
  355. {
  356. if (size > LsSize2.B128)
  357. {
  358. context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
  359. return;
  360. }
  361. int count = GetVectorCount((LsSize)size);
  362. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
  363. Operand bitOffset = GetBitOffset(context, addrLow);
  364. for (int index = 0; index < count; index++)
  365. {
  366. bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
  367. Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
  368. Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
  369. if (size == LsSize2.U8 || size == LsSize2.S8)
  370. {
  371. context.StoreGlobal8(addrLowOffset, addrHigh, value);
  372. }
  373. else if (size == LsSize2.U16 || size == LsSize2.S16)
  374. {
  375. context.StoreGlobal16(addrLowOffset, addrHigh, value);
  376. }
  377. else
  378. {
  379. context.StoreGlobal(addrLowOffset, addrHigh, value);
  380. }
  381. }
  382. }
  383. private static int GetVectorCount(LsSize size)
  384. {
  385. switch (size)
  386. {
  387. case LsSize.B64:
  388. return 2;
  389. case LsSize.B128:
  390. case LsSize.UB128:
  391. return 4;
  392. }
  393. return 1;
  394. }
  395. private static (Operand, Operand) Get40BitsAddress(
  396. EmitterContext context,
  397. Register ra,
  398. bool extended,
  399. int offset)
  400. {
  401. Operand addrLow = Register(ra);
  402. Operand addrHigh;
  403. if (extended && !ra.IsRZ)
  404. {
  405. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  406. }
  407. else
  408. {
  409. addrHigh = Const(0);
  410. }
  411. Operand offs = Const(offset);
  412. addrLow = context.IAdd(addrLow, offs);
  413. if (extended)
  414. {
  415. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  416. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  417. }
  418. return (addrLow, addrHigh);
  419. }
  420. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  421. {
  422. // Note: bit offset = (baseOffset & 0b11) * 8.
  423. // Addresses should be always aligned to the integer type,
  424. // so we don't need to take unaligned addresses into account.
  425. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  426. }
  427. private static Operand ExtractSmallInt(
  428. EmitterContext context,
  429. LsSize size,
  430. Operand bitOffset,
  431. Operand value)
  432. {
  433. value = context.ShiftRightU32(value, bitOffset);
  434. switch (size)
  435. {
  436. case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  437. case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  438. case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
  439. case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
  440. }
  441. return value;
  442. }
  443. private static Operand InsertSmallInt(
  444. EmitterContext context,
  445. LsSize size,
  446. Operand bitOffset,
  447. Operand word,
  448. Operand value)
  449. {
  450. switch (size)
  451. {
  452. case LsSize.U8:
  453. case LsSize.S8:
  454. value = context.BitwiseAnd(value, Const(0xff));
  455. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  456. break;
  457. case LsSize.U16:
  458. case LsSize.S16:
  459. value = context.BitwiseAnd(value, Const(0xffff));
  460. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  461. break;
  462. }
  463. return value;
  464. }
  465. }
  466. }