InstEmitMemory.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using System.Numerics;
  5. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. namespace Ryujinx.Graphics.Shader.Instructions
  8. {
  9. static partial class InstEmit
  10. {
  11. public static void Atom(EmitterContext context)
  12. {
  13. InstAtom op = context.GetOp<InstAtom>();
  14. int sOffset = (op.Imm20 << 12) >> 12;
  15. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
  16. Operand value = GetSrcReg(context, op.SrcB);
  17. Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
  18. context.Copy(GetDest(op.Dest), res);
  19. }
  20. public static void Atoms(EmitterContext context)
  21. {
  22. if (context.TranslatorContext.Definitions.Stage != ShaderStage.Compute)
  23. {
  24. context.TranslatorContext.GpuAccessor.Log($"Atoms instruction is not valid on \"{context.TranslatorContext.Definitions.Stage}\" stage.");
  25. return;
  26. }
  27. InstAtoms op = context.GetOp<InstAtoms>();
  28. Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
  29. int sOffset = (op.Imm22 << 10) >> 10;
  30. offset = context.IAdd(offset, Const(sOffset));
  31. Operand value = GetSrcReg(context, op.SrcB);
  32. AtomSize size = op.AtomsSize switch
  33. {
  34. AtomsSize.S32 => AtomSize.S32,
  35. AtomsSize.U64 => AtomSize.U64,
  36. AtomsSize.S64 => AtomSize.S64,
  37. _ => AtomSize.U32,
  38. };
  39. Operand id = Const(context.ResourceManager.SharedMemoryId);
  40. Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, id, offset, value);
  41. context.Copy(GetDest(op.Dest), res);
  42. }
  43. public static void Ldc(EmitterContext context)
  44. {
  45. InstLdc op = context.GetOp<InstLdc>();
  46. if (op.LsSize > LsSize2.B64)
  47. {
  48. context.TranslatorContext.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
  49. return;
  50. }
  51. bool isSmallInt = op.LsSize < LsSize2.B32;
  52. int count = op.LsSize == LsSize2.B64 ? 2 : 1;
  53. Operand slot = Const(op.CbufSlot);
  54. Operand srcA = GetSrcReg(context, op.SrcA);
  55. if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
  56. {
  57. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  58. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  59. }
  60. Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
  61. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  62. for (int index = 0; index < count; index++)
  63. {
  64. Register dest = new(op.Dest + index, RegisterType.Gpr);
  65. if (dest.IsRZ)
  66. {
  67. break;
  68. }
  69. Operand offset = context.IAdd(wordOffset, Const(index));
  70. Operand value = EmitLoadConstant(context, slot, offset);
  71. if (isSmallInt)
  72. {
  73. value = ExtractSmallInt(context, (LsSize)op.LsSize, GetBitOffset(context, addr), value);
  74. }
  75. context.Copy(Register(dest), value);
  76. }
  77. }
  78. public static void Ldg(EmitterContext context)
  79. {
  80. InstLdg op = context.GetOp<InstLdg>();
  81. EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
  82. }
  83. public static void Ldl(EmitterContext context)
  84. {
  85. InstLdl op = context.GetOp<InstLdl>();
  86. EmitLoad(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  87. }
  88. public static void Lds(EmitterContext context)
  89. {
  90. if (context.TranslatorContext.Definitions.Stage != ShaderStage.Compute)
  91. {
  92. context.TranslatorContext.GpuAccessor.Log($"Lds instruction is not valid on \"{context.TranslatorContext.Definitions.Stage}\" stage.");
  93. return;
  94. }
  95. InstLds op = context.GetOp<InstLds>();
  96. EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  97. }
  98. public static void Red(EmitterContext context)
  99. {
  100. InstRed op = context.GetOp<InstRed>();
  101. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
  102. EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
  103. }
  104. public static void Stg(EmitterContext context)
  105. {
  106. InstStg op = context.GetOp<InstStg>();
  107. EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
  108. }
  109. public static void Stl(EmitterContext context)
  110. {
  111. InstStl op = context.GetOp<InstStl>();
  112. EmitStore(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  113. }
  114. public static void Sts(EmitterContext context)
  115. {
  116. if (context.TranslatorContext.Definitions.Stage != ShaderStage.Compute)
  117. {
  118. context.TranslatorContext.GpuAccessor.Log($"Sts instruction is not valid on \"{context.TranslatorContext.Definitions.Stage}\" stage.");
  119. return;
  120. }
  121. InstSts op = context.GetOp<InstSts>();
  122. EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
  123. }
  124. private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset)
  125. {
  126. Operand vecIndex = context.ShiftRightU32(offset, Const(2));
  127. Operand elemIndex = context.BitwiseAnd(offset, Const(3));
  128. if (slot.Type == OperandType.Constant)
  129. {
  130. int binding = context.ResourceManager.GetConstantBufferBinding(slot.Value);
  131. return context.Load(StorageKind.ConstantBuffer, binding, Const(0), vecIndex, elemIndex);
  132. }
  133. else
  134. {
  135. Operand value = Const(0);
  136. uint cbUseMask = context.TranslatorContext.GpuAccessor.QueryConstantBufferUse();
  137. while (cbUseMask != 0)
  138. {
  139. int cbIndex = BitOperations.TrailingZeroCount(cbUseMask);
  140. int binding = context.ResourceManager.GetConstantBufferBinding(cbIndex);
  141. Operand isCurrent = context.ICompareEqual(slot, Const(cbIndex));
  142. Operand currentValue = context.Load(StorageKind.ConstantBuffer, binding, Const(0), vecIndex, elemIndex);
  143. value = context.ConditionalSelect(isCurrent, currentValue, value);
  144. cbUseMask &= ~(1u << cbIndex);
  145. }
  146. return value;
  147. }
  148. }
  149. private static Operand EmitAtomicOp(
  150. EmitterContext context,
  151. StorageKind storageKind,
  152. AtomOp op,
  153. AtomSize type,
  154. Operand e0,
  155. Operand e1,
  156. Operand value)
  157. {
  158. Operand res = Const(0);
  159. switch (op)
  160. {
  161. case AtomOp.Add:
  162. if (type == AtomSize.S32 || type == AtomSize.U32)
  163. {
  164. res = context.AtomicAdd(storageKind, e0, e1, value);
  165. }
  166. else
  167. {
  168. context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
  169. }
  170. break;
  171. case AtomOp.And:
  172. if (type == AtomSize.S32 || type == AtomSize.U32)
  173. {
  174. res = context.AtomicAnd(storageKind, e0, e1, value);
  175. }
  176. else
  177. {
  178. context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
  179. }
  180. break;
  181. case AtomOp.Xor:
  182. if (type == AtomSize.S32 || type == AtomSize.U32)
  183. {
  184. res = context.AtomicXor(storageKind, e0, e1, value);
  185. }
  186. else
  187. {
  188. context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
  189. }
  190. break;
  191. case AtomOp.Or:
  192. if (type == AtomSize.S32 || type == AtomSize.U32)
  193. {
  194. res = context.AtomicOr(storageKind, e0, e1, value);
  195. }
  196. else
  197. {
  198. context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
  199. }
  200. break;
  201. case AtomOp.Max:
  202. if (type == AtomSize.S32)
  203. {
  204. res = context.AtomicMaxS32(storageKind, e0, e1, value);
  205. }
  206. else if (type == AtomSize.U32)
  207. {
  208. res = context.AtomicMaxU32(storageKind, e0, e1, value);
  209. }
  210. else
  211. {
  212. context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
  213. }
  214. break;
  215. case AtomOp.Min:
  216. if (type == AtomSize.S32)
  217. {
  218. res = context.AtomicMinS32(storageKind, e0, e1, value);
  219. }
  220. else if (type == AtomSize.U32)
  221. {
  222. res = context.AtomicMinU32(storageKind, e0, e1, value);
  223. }
  224. else
  225. {
  226. context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
  227. }
  228. break;
  229. }
  230. return res;
  231. }
  232. private static void EmitLoad(
  233. EmitterContext context,
  234. StorageKind storageKind,
  235. LsSize2 size,
  236. Operand srcA,
  237. int rd,
  238. int offset)
  239. {
  240. if (size > LsSize2.B128)
  241. {
  242. context.TranslatorContext.GpuAccessor.Log($"Invalid load size: {size}.");
  243. return;
  244. }
  245. int id = storageKind == StorageKind.LocalMemory
  246. ? context.ResourceManager.LocalMemoryId
  247. : context.ResourceManager.SharedMemoryId;
  248. bool isSmallInt = size < LsSize2.B32;
  249. int count = size switch
  250. {
  251. LsSize2.B64 => 2,
  252. LsSize2.B128 => 4,
  253. _ => 1,
  254. };
  255. Operand baseOffset = context.Copy(srcA);
  256. for (int index = 0; index < count; index++)
  257. {
  258. Register dest = new(rd + index, RegisterType.Gpr);
  259. if (dest.IsRZ)
  260. {
  261. break;
  262. }
  263. Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
  264. Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
  265. Operand bitOffset = GetBitOffset(context, byteOffset);
  266. Operand value = context.Load(storageKind, id, wordOffset);
  267. if (isSmallInt)
  268. {
  269. value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
  270. }
  271. context.Copy(Register(dest), value);
  272. }
  273. }
  274. private static void EmitLdg(
  275. EmitterContext context,
  276. LsSize size,
  277. int ra,
  278. int rd,
  279. int offset,
  280. bool extended)
  281. {
  282. int count = GetVectorCount(size);
  283. StorageKind storageKind = GetStorageKind(size);
  284. (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
  285. Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
  286. for (int index = 0; index < count; index++)
  287. {
  288. Register dest = new(rd + index, RegisterType.Gpr);
  289. if (dest.IsRZ)
  290. {
  291. break;
  292. }
  293. Operand value = context.Load(storageKind, context.IAdd(srcA, Const(offset + index * 4)), addrHigh);
  294. context.Copy(Register(dest), value);
  295. }
  296. }
  297. private static void EmitStore(
  298. EmitterContext context,
  299. StorageKind storageKind,
  300. LsSize2 size,
  301. Operand srcA,
  302. int rd,
  303. int offset)
  304. {
  305. if (size > LsSize2.B128)
  306. {
  307. context.TranslatorContext.GpuAccessor.Log($"Invalid store size: {size}.");
  308. return;
  309. }
  310. int id = storageKind == StorageKind.LocalMemory
  311. ? context.ResourceManager.LocalMemoryId
  312. : context.ResourceManager.SharedMemoryId;
  313. bool isSmallInt = size < LsSize2.B32;
  314. int count = size switch
  315. {
  316. LsSize2.B64 => 2,
  317. LsSize2.B128 => 4,
  318. _ => 1,
  319. };
  320. Operand baseOffset = context.Copy(srcA);
  321. for (int index = 0; index < count; index++)
  322. {
  323. bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
  324. Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
  325. Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
  326. Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2));
  327. Operand bitOffset = GetBitOffset(context, byteOffset);
  328. if (isSmallInt && storageKind == StorageKind.LocalMemory)
  329. {
  330. Operand word = context.Load(storageKind, id, wordOffset);
  331. value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
  332. }
  333. if (storageKind == StorageKind.LocalMemory)
  334. {
  335. context.Store(storageKind, id, wordOffset, value);
  336. }
  337. else if (storageKind == StorageKind.SharedMemory)
  338. {
  339. switch (size)
  340. {
  341. case LsSize2.U8:
  342. case LsSize2.S8:
  343. context.Store(StorageKind.SharedMemory8, id, byteOffset, value);
  344. break;
  345. case LsSize2.U16:
  346. case LsSize2.S16:
  347. context.Store(StorageKind.SharedMemory16, id, byteOffset, value);
  348. break;
  349. default:
  350. context.Store(storageKind, id, wordOffset, value);
  351. break;
  352. }
  353. }
  354. }
  355. }
  356. private static void EmitStg(
  357. EmitterContext context,
  358. LsSize2 size,
  359. int ra,
  360. int rd,
  361. int offset,
  362. bool extended)
  363. {
  364. if (size > LsSize2.B128)
  365. {
  366. context.TranslatorContext.GpuAccessor.Log($"Invalid store size: {size}.");
  367. return;
  368. }
  369. int count = GetVectorCount((LsSize)size);
  370. StorageKind storageKind = GetStorageKind((LsSize)size);
  371. (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
  372. Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
  373. for (int index = 0; index < count; index++)
  374. {
  375. bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
  376. Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
  377. Operand addrLowOffset = context.IAdd(srcA, Const(offset + index * 4));
  378. context.Store(storageKind, addrLowOffset, addrHigh, value);
  379. }
  380. }
  381. private static StorageKind GetStorageKind(LsSize size)
  382. {
  383. return size switch
  384. {
  385. LsSize.U8 => StorageKind.GlobalMemoryU8,
  386. LsSize.S8 => StorageKind.GlobalMemoryS8,
  387. LsSize.U16 => StorageKind.GlobalMemoryU16,
  388. LsSize.S16 => StorageKind.GlobalMemoryS16,
  389. _ => StorageKind.GlobalMemory,
  390. };
  391. }
  392. private static int GetVectorCount(LsSize size)
  393. {
  394. return size switch
  395. {
  396. LsSize.B64 => 2,
  397. LsSize.B128 or LsSize.UB128 => 4,
  398. _ => 1,
  399. };
  400. }
  401. private static (Operand, Operand) Get40BitsAddress(
  402. EmitterContext context,
  403. Register ra,
  404. bool extended,
  405. int offset)
  406. {
  407. Operand addrLow = Register(ra);
  408. Operand addrHigh;
  409. if (extended && !ra.IsRZ)
  410. {
  411. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  412. }
  413. else
  414. {
  415. addrHigh = Const(0);
  416. }
  417. Operand offs = Const(offset);
  418. addrLow = context.IAdd(addrLow, offs);
  419. if (extended)
  420. {
  421. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  422. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  423. }
  424. return (addrLow, addrHigh);
  425. }
  426. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  427. {
  428. // Note: bit offset = (baseOffset & 0b11) * 8.
  429. // Addresses should be always aligned to the integer type,
  430. // so we don't need to take unaligned addresses into account.
  431. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  432. }
  433. private static Operand ExtractSmallInt(
  434. EmitterContext context,
  435. LsSize size,
  436. Operand bitOffset,
  437. Operand value)
  438. {
  439. value = context.ShiftRightU32(value, bitOffset);
  440. switch (size)
  441. {
  442. case LsSize.U8:
  443. value = ZeroExtendTo32(context, value, 8);
  444. break;
  445. case LsSize.U16:
  446. value = ZeroExtendTo32(context, value, 16);
  447. break;
  448. case LsSize.S8:
  449. value = SignExtendTo32(context, value, 8);
  450. break;
  451. case LsSize.S16:
  452. value = SignExtendTo32(context, value, 16);
  453. break;
  454. }
  455. return value;
  456. }
  457. private static Operand InsertSmallInt(
  458. EmitterContext context,
  459. LsSize size,
  460. Operand bitOffset,
  461. Operand word,
  462. Operand value)
  463. {
  464. switch (size)
  465. {
  466. case LsSize.U8:
  467. case LsSize.S8:
  468. value = context.BitwiseAnd(value, Const(0xff));
  469. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  470. break;
  471. case LsSize.U16:
  472. case LsSize.S16:
  473. value = context.BitwiseAnd(value, Const(0xffff));
  474. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  475. break;
  476. }
  477. return value;
  478. }
  479. }
  480. }