InstEmitMemory.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.FlagAttributeRead(src.Value);
  28. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  29. }
  30. }
  31. public static void Ast(EmitterContext context)
  32. {
  33. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  34. for (int index = 0; index < op.Count; index++)
  35. {
  36. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  37. {
  38. break;
  39. }
  40. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  41. Operand dest = Attribute(op.AttributeOffset + index * 4);
  42. context.Copy(dest, Register(rd));
  43. }
  44. }
  45. public static void Atoms(EmitterContext context)
  46. {
  47. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  48. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  49. offset = context.IAdd(offset, Const(op.Offset));
  50. Operand value = GetSrcB(context);
  51. Operand res = EmitAtomicOp(
  52. context,
  53. Instruction.MrShared,
  54. op.AtomicOp,
  55. op.Type,
  56. offset,
  57. Const(0),
  58. value);
  59. context.Copy(GetDest(context), res);
  60. }
  61. public static void Bar(EmitterContext context)
  62. {
  63. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  64. // TODO: Support other modes.
  65. if (op.Mode == BarrierMode.Sync)
  66. {
  67. context.Barrier();
  68. }
  69. else
  70. {
  71. context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
  72. }
  73. }
  74. public static void Ipa(EmitterContext context)
  75. {
  76. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  77. context.FlagAttributeRead(op.AttributeOffset);
  78. Operand res = Attribute(op.AttributeOffset);
  79. if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
  80. op.AttributeOffset < AttributeConsts.UserAttributeEnd)
  81. {
  82. int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
  83. if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
  84. {
  85. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  86. }
  87. }
  88. if (op.Mode == InterpolationMode.Default)
  89. {
  90. Operand srcB = GetSrcB(context);
  91. res = context.FPMultiply(res, srcB);
  92. }
  93. res = context.FPSaturate(res, op.Saturate);
  94. context.Copy(GetDest(context), res);
  95. }
  96. public static void Isberd(EmitterContext context)
  97. {
  98. // This instruction performs a load from ISBE memory,
  99. // however it seems to be only used to get some vertex
  100. // input data, so we instead propagate the offset so that
  101. // it can be used on the attribute load.
  102. context.Copy(GetDest(context), GetSrcA(context));
  103. }
  104. public static void Ld(EmitterContext context)
  105. {
  106. EmitLoad(context, MemoryRegion.Local);
  107. }
  108. public static void Ldc(EmitterContext context)
  109. {
  110. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  111. if (op.Size > IntegerSize.B64)
  112. {
  113. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
  114. }
  115. bool isSmallInt = op.Size < IntegerSize.B32;
  116. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  117. Operand addr = context.IAdd(GetSrcA(context), Const(op.Offset));
  118. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  119. Operand bitOffset = GetBitOffset(context, addr);
  120. for (int index = 0; index < count; index++)
  121. {
  122. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  123. if (rd.IsRZ)
  124. {
  125. break;
  126. }
  127. Operand offset = context.IAdd(wordOffset, Const(index));
  128. Operand value = context.LoadConstant(Const(op.Slot), offset);
  129. if (isSmallInt)
  130. {
  131. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  132. }
  133. context.Copy(Register(rd), value);
  134. }
  135. }
  136. public static void Ldg(EmitterContext context)
  137. {
  138. EmitLoadGlobal(context);
  139. }
  140. public static void Lds(EmitterContext context)
  141. {
  142. EmitLoad(context, MemoryRegion.Shared);
  143. }
  144. public static void Membar(EmitterContext context)
  145. {
  146. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  147. if (op.Level == BarrierLevel.Cta)
  148. {
  149. context.GroupMemoryBarrier();
  150. }
  151. else
  152. {
  153. context.MemoryBarrier();
  154. }
  155. }
  156. public static void Out(EmitterContext context)
  157. {
  158. OpCode op = context.CurrOp;
  159. bool emit = op.RawOpCode.Extract(39);
  160. bool cut = op.RawOpCode.Extract(40);
  161. if (!(emit || cut))
  162. {
  163. context.Config.GpuAccessor.Log("Invalid OUT encoding.");
  164. }
  165. if (emit)
  166. {
  167. context.EmitVertex();
  168. }
  169. if (cut)
  170. {
  171. context.EndPrimitive();
  172. }
  173. }
  174. public static void Red(EmitterContext context)
  175. {
  176. OpCodeRed op = (OpCodeRed)context.CurrOp;
  177. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  178. EmitAtomicOp(
  179. context,
  180. Instruction.MrGlobal,
  181. op.AtomicOp,
  182. op.Type,
  183. addrLow,
  184. addrHigh,
  185. GetDest(context));
  186. }
  187. public static void St(EmitterContext context)
  188. {
  189. EmitStore(context, MemoryRegion.Local);
  190. }
  191. public static void Stg(EmitterContext context)
  192. {
  193. EmitStoreGlobal(context);
  194. }
  195. public static void Sts(EmitterContext context)
  196. {
  197. EmitStore(context, MemoryRegion.Shared);
  198. }
  199. private static Operand EmitAtomicOp(
  200. EmitterContext context,
  201. Instruction mr,
  202. AtomicOp op,
  203. ReductionType type,
  204. Operand addrLow,
  205. Operand addrHigh,
  206. Operand value)
  207. {
  208. Operand res = Const(0);
  209. switch (op)
  210. {
  211. case AtomicOp.Add:
  212. if (type == ReductionType.S32 || type == ReductionType.U32)
  213. {
  214. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  215. }
  216. else
  217. {
  218. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  219. }
  220. break;
  221. case AtomicOp.BitwiseAnd:
  222. if (type == ReductionType.S32 || type == ReductionType.U32)
  223. {
  224. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  225. }
  226. else
  227. {
  228. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  229. }
  230. break;
  231. case AtomicOp.BitwiseExclusiveOr:
  232. if (type == ReductionType.S32 || type == ReductionType.U32)
  233. {
  234. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  235. }
  236. else
  237. {
  238. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  239. }
  240. break;
  241. case AtomicOp.BitwiseOr:
  242. if (type == ReductionType.S32 || type == ReductionType.U32)
  243. {
  244. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  245. }
  246. else
  247. {
  248. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  249. }
  250. break;
  251. case AtomicOp.Maximum:
  252. if (type == ReductionType.S32)
  253. {
  254. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  255. }
  256. else if (type == ReductionType.U32)
  257. {
  258. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  259. }
  260. else
  261. {
  262. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  263. }
  264. break;
  265. case AtomicOp.Minimum:
  266. if (type == ReductionType.S32)
  267. {
  268. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  269. }
  270. else if (type == ReductionType.U32)
  271. {
  272. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  273. }
  274. else
  275. {
  276. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  277. }
  278. break;
  279. }
  280. return res;
  281. }
  282. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  283. {
  284. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  285. if (op.Size > IntegerSize.B128)
  286. {
  287. context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
  288. }
  289. bool isSmallInt = op.Size < IntegerSize.B32;
  290. int count = 1;
  291. switch (op.Size)
  292. {
  293. case IntegerSize.B64: count = 2; break;
  294. case IntegerSize.B128: count = 4; break;
  295. }
  296. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  297. // Word offset = byte offset / 4 (one word = 4 bytes).
  298. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  299. Operand bitOffset = GetBitOffset(context, baseOffset);
  300. for (int index = 0; index < count; index++)
  301. {
  302. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  303. if (rd.IsRZ)
  304. {
  305. break;
  306. }
  307. Operand offset = context.IAdd(wordOffset, Const(index));
  308. Operand value = null;
  309. switch (region)
  310. {
  311. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  312. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  313. }
  314. if (isSmallInt)
  315. {
  316. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  317. }
  318. context.Copy(Register(rd), value);
  319. }
  320. }
  321. private static void EmitLoadGlobal(EmitterContext context)
  322. {
  323. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  324. bool isSmallInt = op.Size < IntegerSize.B32;
  325. int count = GetVectorCount(op.Size);
  326. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  327. Operand bitOffset = GetBitOffset(context, addrLow);
  328. for (int index = 0; index < count; index++)
  329. {
  330. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  331. if (rd.IsRZ)
  332. {
  333. break;
  334. }
  335. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  336. if (isSmallInt)
  337. {
  338. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  339. }
  340. context.Copy(Register(rd), value);
  341. }
  342. }
  343. private static void EmitStore(EmitterContext context, MemoryRegion region)
  344. {
  345. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  346. if (op.Size > IntegerSize.B128)
  347. {
  348. context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
  349. }
  350. bool isSmallInt = op.Size < IntegerSize.B32;
  351. int count = 1;
  352. switch (op.Size)
  353. {
  354. case IntegerSize.B64: count = 2; break;
  355. case IntegerSize.B128: count = 4; break;
  356. }
  357. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  358. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  359. Operand bitOffset = GetBitOffset(context, baseOffset);
  360. for (int index = 0; index < count; index++)
  361. {
  362. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  363. Operand value = Register(rd);
  364. Operand offset = context.IAdd(wordOffset, Const(index));
  365. if (isSmallInt)
  366. {
  367. Operand word = null;
  368. switch (region)
  369. {
  370. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  371. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  372. }
  373. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  374. }
  375. switch (region)
  376. {
  377. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  378. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  379. }
  380. if (rd.IsRZ)
  381. {
  382. break;
  383. }
  384. }
  385. }
  386. private static void EmitStoreGlobal(EmitterContext context)
  387. {
  388. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  389. bool isSmallInt = op.Size < IntegerSize.B32;
  390. int count = GetVectorCount(op.Size);
  391. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  392. Operand bitOffset = GetBitOffset(context, addrLow);
  393. for (int index = 0; index < count; index++)
  394. {
  395. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  396. Operand value = Register(rd);
  397. if (isSmallInt)
  398. {
  399. Operand word = context.LoadGlobal(addrLow, addrHigh);
  400. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  401. }
  402. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  403. if (rd.IsRZ)
  404. {
  405. break;
  406. }
  407. }
  408. }
  409. private static int GetVectorCount(IntegerSize size)
  410. {
  411. switch (size)
  412. {
  413. case IntegerSize.B64:
  414. return 2;
  415. case IntegerSize.B128:
  416. case IntegerSize.UB128:
  417. return 4;
  418. }
  419. return 1;
  420. }
  421. private static (Operand, Operand) Get40BitsAddress(
  422. EmitterContext context,
  423. Register ra,
  424. bool extended,
  425. int offset)
  426. {
  427. Operand addrLow = GetSrcA(context);
  428. Operand addrHigh;
  429. if (extended && !ra.IsRZ)
  430. {
  431. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  432. }
  433. else
  434. {
  435. addrHigh = Const(0);
  436. }
  437. Operand offs = Const(offset);
  438. addrLow = context.IAdd(addrLow, offs);
  439. if (extended)
  440. {
  441. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  442. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  443. }
  444. return (addrLow, addrHigh);
  445. }
  446. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  447. {
  448. // Note: bit offset = (baseOffset & 0b11) * 8.
  449. // Addresses should be always aligned to the integer type,
  450. // so we don't need to take unaligned addresses into account.
  451. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  452. }
  453. private static Operand ExtractSmallInt(
  454. EmitterContext context,
  455. IntegerSize size,
  456. Operand bitOffset,
  457. Operand value)
  458. {
  459. value = context.ShiftRightU32(value, bitOffset);
  460. switch (size)
  461. {
  462. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  463. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  464. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  465. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  466. }
  467. return value;
  468. }
  469. private static Operand InsertSmallInt(
  470. EmitterContext context,
  471. IntegerSize size,
  472. Operand bitOffset,
  473. Operand word,
  474. Operand value)
  475. {
  476. switch (size)
  477. {
  478. case IntegerSize.U8:
  479. case IntegerSize.S8:
  480. value = context.BitwiseAnd(value, Const(0xff));
  481. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  482. break;
  483. case IntegerSize.U16:
  484. case IntegerSize.S16:
  485. value = context.BitwiseAnd(value, Const(0xffff));
  486. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  487. break;
  488. }
  489. return value;
  490. }
  491. }
  492. }