InstEmitMemory.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  28. }
  29. }
  30. public static void Ast(EmitterContext context)
  31. {
  32. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  33. for (int index = 0; index < op.Count; index++)
  34. {
  35. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  36. {
  37. break;
  38. }
  39. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  40. Operand dest = Attribute(op.AttributeOffset + index * 4);
  41. context.Copy(dest, Register(rd));
  42. }
  43. }
  44. public static void Atoms(EmitterContext context)
  45. {
  46. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  47. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  48. offset = context.IAdd(offset, Const(op.Offset));
  49. Operand value = GetSrcB(context);
  50. Operand res = EmitAtomicOp(
  51. context,
  52. Instruction.MrShared,
  53. op.AtomicOp,
  54. op.Type,
  55. offset,
  56. Const(0),
  57. value);
  58. context.Copy(GetDest(context), res);
  59. }
  60. public static void Bar(EmitterContext context)
  61. {
  62. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  63. // TODO: Support other modes.
  64. if (op.Mode == BarrierMode.Sync)
  65. {
  66. context.Barrier();
  67. }
  68. else
  69. {
  70. context.Config.PrintLog($"Invalid barrier mode: {op.Mode}.");
  71. }
  72. }
  73. public static void Ipa(EmitterContext context)
  74. {
  75. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  76. InterpolationQualifier iq = InterpolationQualifier.None;
  77. switch (op.Mode)
  78. {
  79. case InterpolationMode.Pass: iq = InterpolationQualifier.NoPerspective; break;
  80. }
  81. Operand srcA = Attribute(op.AttributeOffset, iq);
  82. Operand res = context.FPSaturate(srcA, op.Saturate);
  83. context.Copy(GetDest(context), res);
  84. }
  85. public static void Isberd(EmitterContext context)
  86. {
  87. // This instruction performs a load from ISBE memory,
  88. // however it seems to be only used to get some vertex
  89. // input data, so we instead propagate the offset so that
  90. // it can be used on the attribute load.
  91. context.Copy(GetDest(context), GetSrcA(context));
  92. }
  93. public static void Ld(EmitterContext context)
  94. {
  95. EmitLoad(context, MemoryRegion.Local);
  96. }
  97. public static void Ldc(EmitterContext context)
  98. {
  99. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  100. if (op.Size > IntegerSize.B64)
  101. {
  102. context.Config.PrintLog($"Invalid LDC size: {op.Size}.");
  103. }
  104. bool isSmallInt = op.Size < IntegerSize.B32;
  105. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  106. Operand wordOffset = context.ShiftRightU32(GetSrcA(context), Const(2));
  107. wordOffset = context.IAdd(wordOffset, Const(op.Offset));
  108. Operand bitOffset = GetBitOffset(context, GetSrcA(context));
  109. for (int index = 0; index < count; index++)
  110. {
  111. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  112. if (rd.IsRZ)
  113. {
  114. break;
  115. }
  116. Operand offset = context.IAdd(wordOffset, Const(index));
  117. Operand value = context.LoadConstant(Const(op.Slot), offset);
  118. if (isSmallInt)
  119. {
  120. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  121. }
  122. context.Copy(Register(rd), value);
  123. }
  124. }
  125. public static void Ldg(EmitterContext context)
  126. {
  127. EmitLoadGlobal(context);
  128. }
  129. public static void Lds(EmitterContext context)
  130. {
  131. EmitLoad(context, MemoryRegion.Shared);
  132. }
  133. public static void Membar(EmitterContext context)
  134. {
  135. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  136. if (op.Level == BarrierLevel.Cta)
  137. {
  138. context.GroupMemoryBarrier();
  139. }
  140. else
  141. {
  142. context.MemoryBarrier();
  143. }
  144. }
  145. public static void Out(EmitterContext context)
  146. {
  147. OpCode op = context.CurrOp;
  148. bool emit = op.RawOpCode.Extract(39);
  149. bool cut = op.RawOpCode.Extract(40);
  150. if (!(emit || cut))
  151. {
  152. context.Config.PrintLog("Invalid OUT encoding.");
  153. }
  154. if (emit)
  155. {
  156. context.EmitVertex();
  157. }
  158. if (cut)
  159. {
  160. context.EndPrimitive();
  161. }
  162. }
  163. public static void Red(EmitterContext context)
  164. {
  165. OpCodeRed op = (OpCodeRed)context.CurrOp;
  166. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  167. EmitAtomicOp(
  168. context,
  169. Instruction.MrGlobal,
  170. op.AtomicOp,
  171. op.Type,
  172. addrLow,
  173. addrHigh,
  174. GetDest(context));
  175. }
  176. public static void St(EmitterContext context)
  177. {
  178. EmitStore(context, MemoryRegion.Local);
  179. }
  180. public static void Stg(EmitterContext context)
  181. {
  182. EmitStoreGlobal(context);
  183. }
  184. public static void Sts(EmitterContext context)
  185. {
  186. EmitStore(context, MemoryRegion.Shared);
  187. }
  188. private static Operand EmitAtomicOp(
  189. EmitterContext context,
  190. Instruction mr,
  191. AtomicOp op,
  192. ReductionType type,
  193. Operand addrLow,
  194. Operand addrHigh,
  195. Operand value)
  196. {
  197. Operand res = Const(0);
  198. switch (op)
  199. {
  200. case AtomicOp.Add:
  201. if (type == ReductionType.S32 || type == ReductionType.U32)
  202. {
  203. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  204. }
  205. else
  206. {
  207. context.Config.PrintLog($"Invalid reduction type: {type}.");
  208. }
  209. break;
  210. case AtomicOp.BitwiseAnd:
  211. if (type == ReductionType.S32 || type == ReductionType.U32)
  212. {
  213. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  214. }
  215. else
  216. {
  217. context.Config.PrintLog($"Invalid reduction type: {type}.");
  218. }
  219. break;
  220. case AtomicOp.BitwiseExclusiveOr:
  221. if (type == ReductionType.S32 || type == ReductionType.U32)
  222. {
  223. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  224. }
  225. else
  226. {
  227. context.Config.PrintLog($"Invalid reduction type: {type}.");
  228. }
  229. break;
  230. case AtomicOp.BitwiseOr:
  231. if (type == ReductionType.S32 || type == ReductionType.U32)
  232. {
  233. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  234. }
  235. else
  236. {
  237. context.Config.PrintLog($"Invalid reduction type: {type}.");
  238. }
  239. break;
  240. case AtomicOp.Maximum:
  241. if (type == ReductionType.S32)
  242. {
  243. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  244. }
  245. else if (type == ReductionType.U32)
  246. {
  247. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  248. }
  249. else
  250. {
  251. context.Config.PrintLog($"Invalid reduction type: {type}.");
  252. }
  253. break;
  254. case AtomicOp.Minimum:
  255. if (type == ReductionType.S32)
  256. {
  257. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  258. }
  259. else if (type == ReductionType.U32)
  260. {
  261. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  262. }
  263. else
  264. {
  265. context.Config.PrintLog($"Invalid reduction type: {type}.");
  266. }
  267. break;
  268. }
  269. return res;
  270. }
  271. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  272. {
  273. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  274. if (op.Size > IntegerSize.B128)
  275. {
  276. context.Config.PrintLog($"Invalid load size: {op.Size}.");
  277. }
  278. bool isSmallInt = op.Size < IntegerSize.B32;
  279. int count = 1;
  280. switch (op.Size)
  281. {
  282. case IntegerSize.B64: count = 2; break;
  283. case IntegerSize.B128: count = 4; break;
  284. }
  285. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  286. // Word offset = byte offset / 4 (one word = 4 bytes).
  287. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  288. Operand bitOffset = GetBitOffset(context, baseOffset);
  289. for (int index = 0; index < count; index++)
  290. {
  291. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  292. if (rd.IsRZ)
  293. {
  294. break;
  295. }
  296. Operand offset = context.IAdd(wordOffset, Const(index));
  297. Operand value = null;
  298. switch (region)
  299. {
  300. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  301. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  302. }
  303. if (isSmallInt)
  304. {
  305. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  306. }
  307. context.Copy(Register(rd), value);
  308. }
  309. }
  310. private static void EmitLoadGlobal(EmitterContext context)
  311. {
  312. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  313. bool isSmallInt = op.Size < IntegerSize.B32;
  314. int count = GetVectorCount(op.Size);
  315. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  316. Operand bitOffset = GetBitOffset(context, addrLow);
  317. for (int index = 0; index < count; index++)
  318. {
  319. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  320. if (rd.IsRZ)
  321. {
  322. break;
  323. }
  324. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  325. if (isSmallInt)
  326. {
  327. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  328. }
  329. context.Copy(Register(rd), value);
  330. }
  331. }
  332. private static void EmitStore(EmitterContext context, MemoryRegion region)
  333. {
  334. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  335. if (op.Size > IntegerSize.B128)
  336. {
  337. context.Config.PrintLog($"Invalid store size: {op.Size}.");
  338. }
  339. bool isSmallInt = op.Size < IntegerSize.B32;
  340. int count = 1;
  341. switch (op.Size)
  342. {
  343. case IntegerSize.B64: count = 2; break;
  344. case IntegerSize.B128: count = 4; break;
  345. }
  346. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  347. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  348. Operand bitOffset = GetBitOffset(context, baseOffset);
  349. for (int index = 0; index < count; index++)
  350. {
  351. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  352. Operand value = Register(rd);
  353. Operand offset = context.IAdd(wordOffset, Const(index));
  354. if (isSmallInt)
  355. {
  356. Operand word = null;
  357. switch (region)
  358. {
  359. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  360. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  361. }
  362. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  363. }
  364. switch (region)
  365. {
  366. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  367. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  368. }
  369. if (rd.IsRZ)
  370. {
  371. break;
  372. }
  373. }
  374. }
  375. private static void EmitStoreGlobal(EmitterContext context)
  376. {
  377. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  378. bool isSmallInt = op.Size < IntegerSize.B32;
  379. int count = GetVectorCount(op.Size);
  380. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  381. Operand bitOffset = GetBitOffset(context, addrLow);
  382. for (int index = 0; index < count; index++)
  383. {
  384. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  385. Operand value = Register(rd);
  386. if (isSmallInt)
  387. {
  388. Operand word = context.LoadGlobal(addrLow, addrHigh);
  389. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  390. }
  391. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  392. if (rd.IsRZ)
  393. {
  394. break;
  395. }
  396. }
  397. }
  398. private static int GetVectorCount(IntegerSize size)
  399. {
  400. switch (size)
  401. {
  402. case IntegerSize.B64:
  403. return 2;
  404. case IntegerSize.B128:
  405. case IntegerSize.UB128:
  406. return 4;
  407. }
  408. return 1;
  409. }
  410. private static (Operand, Operand) Get40BitsAddress(
  411. EmitterContext context,
  412. Register ra,
  413. bool extended,
  414. int offset)
  415. {
  416. Operand addrLow = GetSrcA(context);
  417. Operand addrHigh;
  418. if (extended && !ra.IsRZ)
  419. {
  420. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  421. }
  422. else
  423. {
  424. addrHigh = Const(0);
  425. }
  426. Operand offs = Const(offset);
  427. addrLow = context.IAdd(addrLow, offs);
  428. if (extended)
  429. {
  430. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  431. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  432. }
  433. return (addrLow, addrHigh);
  434. }
  435. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  436. {
  437. // Note: bit offset = (baseOffset & 0b11) * 8.
  438. // Addresses should be always aligned to the integer type,
  439. // so we don't need to take unaligned addresses into account.
  440. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  441. }
  442. private static Operand ExtractSmallInt(
  443. EmitterContext context,
  444. IntegerSize size,
  445. Operand bitOffset,
  446. Operand value)
  447. {
  448. value = context.ShiftRightU32(value, bitOffset);
  449. switch (size)
  450. {
  451. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  452. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  453. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  454. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  455. }
  456. return value;
  457. }
  458. private static Operand InsertSmallInt(
  459. EmitterContext context,
  460. IntegerSize size,
  461. Operand bitOffset,
  462. Operand word,
  463. Operand value)
  464. {
  465. switch (size)
  466. {
  467. case IntegerSize.U8:
  468. case IntegerSize.S8:
  469. value = context.BitwiseAnd(value, Const(0xff));
  470. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  471. break;
  472. case IntegerSize.U16:
  473. case IntegerSize.S16:
  474. value = context.BitwiseAnd(value, Const(0xffff));
  475. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  476. break;
  477. }
  478. return value;
  479. }
  480. }
  481. }