InstEmitMemory.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  28. }
  29. }
  30. public static void Ast(EmitterContext context)
  31. {
  32. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  33. for (int index = 0; index < op.Count; index++)
  34. {
  35. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  36. {
  37. break;
  38. }
  39. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  40. Operand dest = Attribute(op.AttributeOffset + index * 4);
  41. context.Copy(dest, Register(rd));
  42. }
  43. }
  44. public static void Atoms(EmitterContext context)
  45. {
  46. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  47. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  48. offset = context.IAdd(offset, Const(op.Offset));
  49. Operand value = GetSrcB(context);
  50. Operand res = EmitAtomicOp(
  51. context,
  52. Instruction.MrShared,
  53. op.AtomicOp,
  54. op.Type,
  55. offset,
  56. Const(0),
  57. value);
  58. context.Copy(GetDest(context), res);
  59. }
  60. public static void Bar(EmitterContext context)
  61. {
  62. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  63. // TODO: Support other modes.
  64. if (op.Mode == BarrierMode.Sync)
  65. {
  66. context.Barrier();
  67. }
  68. else
  69. {
  70. context.Config.PrintLog($"Invalid barrier mode: {op.Mode}.");
  71. }
  72. }
  73. public static void Ipa(EmitterContext context)
  74. {
  75. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  76. InterpolationQualifier iq = InterpolationQualifier.None;
  77. switch (op.Mode)
  78. {
  79. case InterpolationMode.Constant: iq = InterpolationQualifier.Flat; break;
  80. case InterpolationMode.Pass: iq = InterpolationQualifier.NoPerspective; break;
  81. }
  82. Operand srcA = Attribute(op.AttributeOffset, iq);
  83. Operand res = context.FPSaturate(srcA, op.Saturate);
  84. context.Copy(GetDest(context), res);
  85. }
  86. public static void Isberd(EmitterContext context)
  87. {
  88. // This instruction performs a load from ISBE memory,
  89. // however it seems to be only used to get some vertex
  90. // input data, so we instead propagate the offset so that
  91. // it can be used on the attribute load.
  92. context.Copy(GetDest(context), GetSrcA(context));
  93. }
  94. public static void Ld(EmitterContext context)
  95. {
  96. EmitLoad(context, MemoryRegion.Local);
  97. }
  98. public static void Ldc(EmitterContext context)
  99. {
  100. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  101. if (op.Size > IntegerSize.B64)
  102. {
  103. context.Config.PrintLog($"Invalid LDC size: {op.Size}.");
  104. }
  105. bool isSmallInt = op.Size < IntegerSize.B32;
  106. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  107. Operand addr = context.IAdd(GetSrcA(context), Const(op.Offset));
  108. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  109. Operand bitOffset = GetBitOffset(context, addr);
  110. for (int index = 0; index < count; index++)
  111. {
  112. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  113. if (rd.IsRZ)
  114. {
  115. break;
  116. }
  117. Operand offset = context.IAdd(wordOffset, Const(index));
  118. Operand value = context.LoadConstant(Const(op.Slot), offset);
  119. if (isSmallInt)
  120. {
  121. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  122. }
  123. context.Copy(Register(rd), value);
  124. }
  125. }
  126. public static void Ldg(EmitterContext context)
  127. {
  128. EmitLoadGlobal(context);
  129. }
  130. public static void Lds(EmitterContext context)
  131. {
  132. EmitLoad(context, MemoryRegion.Shared);
  133. }
  134. public static void Membar(EmitterContext context)
  135. {
  136. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  137. if (op.Level == BarrierLevel.Cta)
  138. {
  139. context.GroupMemoryBarrier();
  140. }
  141. else
  142. {
  143. context.MemoryBarrier();
  144. }
  145. }
  146. public static void Out(EmitterContext context)
  147. {
  148. OpCode op = context.CurrOp;
  149. bool emit = op.RawOpCode.Extract(39);
  150. bool cut = op.RawOpCode.Extract(40);
  151. if (!(emit || cut))
  152. {
  153. context.Config.PrintLog("Invalid OUT encoding.");
  154. }
  155. if (emit)
  156. {
  157. context.EmitVertex();
  158. }
  159. if (cut)
  160. {
  161. context.EndPrimitive();
  162. }
  163. }
  164. public static void Red(EmitterContext context)
  165. {
  166. OpCodeRed op = (OpCodeRed)context.CurrOp;
  167. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  168. EmitAtomicOp(
  169. context,
  170. Instruction.MrGlobal,
  171. op.AtomicOp,
  172. op.Type,
  173. addrLow,
  174. addrHigh,
  175. GetDest(context));
  176. }
  177. public static void St(EmitterContext context)
  178. {
  179. EmitStore(context, MemoryRegion.Local);
  180. }
  181. public static void Stg(EmitterContext context)
  182. {
  183. EmitStoreGlobal(context);
  184. }
  185. public static void Sts(EmitterContext context)
  186. {
  187. EmitStore(context, MemoryRegion.Shared);
  188. }
  189. private static Operand EmitAtomicOp(
  190. EmitterContext context,
  191. Instruction mr,
  192. AtomicOp op,
  193. ReductionType type,
  194. Operand addrLow,
  195. Operand addrHigh,
  196. Operand value)
  197. {
  198. Operand res = Const(0);
  199. switch (op)
  200. {
  201. case AtomicOp.Add:
  202. if (type == ReductionType.S32 || type == ReductionType.U32)
  203. {
  204. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  205. }
  206. else
  207. {
  208. context.Config.PrintLog($"Invalid reduction type: {type}.");
  209. }
  210. break;
  211. case AtomicOp.BitwiseAnd:
  212. if (type == ReductionType.S32 || type == ReductionType.U32)
  213. {
  214. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  215. }
  216. else
  217. {
  218. context.Config.PrintLog($"Invalid reduction type: {type}.");
  219. }
  220. break;
  221. case AtomicOp.BitwiseExclusiveOr:
  222. if (type == ReductionType.S32 || type == ReductionType.U32)
  223. {
  224. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  225. }
  226. else
  227. {
  228. context.Config.PrintLog($"Invalid reduction type: {type}.");
  229. }
  230. break;
  231. case AtomicOp.BitwiseOr:
  232. if (type == ReductionType.S32 || type == ReductionType.U32)
  233. {
  234. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  235. }
  236. else
  237. {
  238. context.Config.PrintLog($"Invalid reduction type: {type}.");
  239. }
  240. break;
  241. case AtomicOp.Maximum:
  242. if (type == ReductionType.S32)
  243. {
  244. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  245. }
  246. else if (type == ReductionType.U32)
  247. {
  248. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  249. }
  250. else
  251. {
  252. context.Config.PrintLog($"Invalid reduction type: {type}.");
  253. }
  254. break;
  255. case AtomicOp.Minimum:
  256. if (type == ReductionType.S32)
  257. {
  258. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  259. }
  260. else if (type == ReductionType.U32)
  261. {
  262. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  263. }
  264. else
  265. {
  266. context.Config.PrintLog($"Invalid reduction type: {type}.");
  267. }
  268. break;
  269. }
  270. return res;
  271. }
  272. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  273. {
  274. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  275. if (op.Size > IntegerSize.B128)
  276. {
  277. context.Config.PrintLog($"Invalid load size: {op.Size}.");
  278. }
  279. bool isSmallInt = op.Size < IntegerSize.B32;
  280. int count = 1;
  281. switch (op.Size)
  282. {
  283. case IntegerSize.B64: count = 2; break;
  284. case IntegerSize.B128: count = 4; break;
  285. }
  286. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  287. // Word offset = byte offset / 4 (one word = 4 bytes).
  288. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  289. Operand bitOffset = GetBitOffset(context, baseOffset);
  290. for (int index = 0; index < count; index++)
  291. {
  292. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  293. if (rd.IsRZ)
  294. {
  295. break;
  296. }
  297. Operand offset = context.IAdd(wordOffset, Const(index));
  298. Operand value = null;
  299. switch (region)
  300. {
  301. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  302. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  303. }
  304. if (isSmallInt)
  305. {
  306. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  307. }
  308. context.Copy(Register(rd), value);
  309. }
  310. }
  311. private static void EmitLoadGlobal(EmitterContext context)
  312. {
  313. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  314. bool isSmallInt = op.Size < IntegerSize.B32;
  315. int count = GetVectorCount(op.Size);
  316. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  317. Operand bitOffset = GetBitOffset(context, addrLow);
  318. for (int index = 0; index < count; index++)
  319. {
  320. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  321. if (rd.IsRZ)
  322. {
  323. break;
  324. }
  325. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  326. if (isSmallInt)
  327. {
  328. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  329. }
  330. context.Copy(Register(rd), value);
  331. }
  332. }
  333. private static void EmitStore(EmitterContext context, MemoryRegion region)
  334. {
  335. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  336. if (op.Size > IntegerSize.B128)
  337. {
  338. context.Config.PrintLog($"Invalid store size: {op.Size}.");
  339. }
  340. bool isSmallInt = op.Size < IntegerSize.B32;
  341. int count = 1;
  342. switch (op.Size)
  343. {
  344. case IntegerSize.B64: count = 2; break;
  345. case IntegerSize.B128: count = 4; break;
  346. }
  347. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  348. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  349. Operand bitOffset = GetBitOffset(context, baseOffset);
  350. for (int index = 0; index < count; index++)
  351. {
  352. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  353. Operand value = Register(rd);
  354. Operand offset = context.IAdd(wordOffset, Const(index));
  355. if (isSmallInt)
  356. {
  357. Operand word = null;
  358. switch (region)
  359. {
  360. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  361. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  362. }
  363. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  364. }
  365. switch (region)
  366. {
  367. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  368. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  369. }
  370. if (rd.IsRZ)
  371. {
  372. break;
  373. }
  374. }
  375. }
  376. private static void EmitStoreGlobal(EmitterContext context)
  377. {
  378. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  379. bool isSmallInt = op.Size < IntegerSize.B32;
  380. int count = GetVectorCount(op.Size);
  381. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  382. Operand bitOffset = GetBitOffset(context, addrLow);
  383. for (int index = 0; index < count; index++)
  384. {
  385. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  386. Operand value = Register(rd);
  387. if (isSmallInt)
  388. {
  389. Operand word = context.LoadGlobal(addrLow, addrHigh);
  390. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  391. }
  392. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  393. if (rd.IsRZ)
  394. {
  395. break;
  396. }
  397. }
  398. }
  399. private static int GetVectorCount(IntegerSize size)
  400. {
  401. switch (size)
  402. {
  403. case IntegerSize.B64:
  404. return 2;
  405. case IntegerSize.B128:
  406. case IntegerSize.UB128:
  407. return 4;
  408. }
  409. return 1;
  410. }
  411. private static (Operand, Operand) Get40BitsAddress(
  412. EmitterContext context,
  413. Register ra,
  414. bool extended,
  415. int offset)
  416. {
  417. Operand addrLow = GetSrcA(context);
  418. Operand addrHigh;
  419. if (extended && !ra.IsRZ)
  420. {
  421. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  422. }
  423. else
  424. {
  425. addrHigh = Const(0);
  426. }
  427. Operand offs = Const(offset);
  428. addrLow = context.IAdd(addrLow, offs);
  429. if (extended)
  430. {
  431. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  432. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  433. }
  434. return (addrLow, addrHigh);
  435. }
  436. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  437. {
  438. // Note: bit offset = (baseOffset & 0b11) * 8.
  439. // Addresses should be always aligned to the integer type,
  440. // so we don't need to take unaligned addresses into account.
  441. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  442. }
  443. private static Operand ExtractSmallInt(
  444. EmitterContext context,
  445. IntegerSize size,
  446. Operand bitOffset,
  447. Operand value)
  448. {
  449. value = context.ShiftRightU32(value, bitOffset);
  450. switch (size)
  451. {
  452. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  453. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  454. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  455. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  456. }
  457. return value;
  458. }
  459. private static Operand InsertSmallInt(
  460. EmitterContext context,
  461. IntegerSize size,
  462. Operand bitOffset,
  463. Operand word,
  464. Operand value)
  465. {
  466. switch (size)
  467. {
  468. case IntegerSize.U8:
  469. case IntegerSize.S8:
  470. value = context.BitwiseAnd(value, Const(0xff));
  471. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  472. break;
  473. case IntegerSize.U16:
  474. case IntegerSize.S16:
  475. value = context.BitwiseAnd(value, Const(0xffff));
  476. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  477. break;
  478. }
  479. return value;
  480. }
  481. }
  482. }