InstEmitMemory.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.FlagAttributeRead(src.Value);
  28. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  29. }
  30. }
  31. public static void Ast(EmitterContext context)
  32. {
  33. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  34. for (int index = 0; index < op.Count; index++)
  35. {
  36. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  37. {
  38. break;
  39. }
  40. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  41. Operand dest = Attribute(op.AttributeOffset + index * 4);
  42. context.Copy(dest, Register(rd));
  43. }
  44. }
  45. public static void Atoms(EmitterContext context)
  46. {
  47. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  48. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  49. offset = context.IAdd(offset, Const(op.Offset));
  50. Operand value = GetSrcB(context);
  51. Operand res = EmitAtomicOp(
  52. context,
  53. Instruction.MrShared,
  54. op.AtomicOp,
  55. op.Type,
  56. offset,
  57. Const(0),
  58. value);
  59. context.Copy(GetDest(context), res);
  60. }
  61. public static void Bar(EmitterContext context)
  62. {
  63. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  64. // TODO: Support other modes.
  65. if (op.Mode == BarrierMode.Sync)
  66. {
  67. context.Barrier();
  68. }
  69. else
  70. {
  71. context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
  72. }
  73. }
  74. public static void Ipa(EmitterContext context)
  75. {
  76. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  77. context.FlagAttributeRead(op.AttributeOffset);
  78. Operand res = Attribute(op.AttributeOffset);
  79. if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
  80. op.AttributeOffset < AttributeConsts.UserAttributeEnd)
  81. {
  82. int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
  83. if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
  84. {
  85. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  86. }
  87. }
  88. if (op.Mode == InterpolationMode.Default)
  89. {
  90. Operand srcB = GetSrcB(context);
  91. res = context.FPMultiply(res, srcB);
  92. }
  93. res = context.FPSaturate(res, op.Saturate);
  94. context.Copy(GetDest(context), res);
  95. }
  96. public static void Isberd(EmitterContext context)
  97. {
  98. // This instruction performs a load from ISBE memory,
  99. // however it seems to be only used to get some vertex
  100. // input data, so we instead propagate the offset so that
  101. // it can be used on the attribute load.
  102. context.Copy(GetDest(context), GetSrcA(context));
  103. }
  104. public static void Ld(EmitterContext context)
  105. {
  106. EmitLoad(context, MemoryRegion.Local);
  107. }
  108. public static void Ldc(EmitterContext context)
  109. {
  110. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  111. if (op.Size > IntegerSize.B64)
  112. {
  113. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
  114. }
  115. bool isSmallInt = op.Size < IntegerSize.B32;
  116. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  117. Operand slot = Const(op.Slot);
  118. Operand srcA = GetSrcA(context);
  119. if (op.IndexMode == CbIndexMode.Is ||
  120. op.IndexMode == CbIndexMode.Isl)
  121. {
  122. slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
  123. srcA = context.BitwiseAnd(srcA, Const(0xffff));
  124. }
  125. Operand addr = context.IAdd(srcA, Const(op.Offset));
  126. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  127. Operand bitOffset = GetBitOffset(context, addr);
  128. for (int index = 0; index < count; index++)
  129. {
  130. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  131. if (rd.IsRZ)
  132. {
  133. break;
  134. }
  135. Operand offset = context.IAdd(wordOffset, Const(index));
  136. Operand value = context.LoadConstant(slot, offset);
  137. if (isSmallInt)
  138. {
  139. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  140. }
  141. context.Copy(Register(rd), value);
  142. }
  143. }
  144. public static void Ldg(EmitterContext context)
  145. {
  146. EmitLoadGlobal(context);
  147. }
  148. public static void Lds(EmitterContext context)
  149. {
  150. EmitLoad(context, MemoryRegion.Shared);
  151. }
  152. public static void Membar(EmitterContext context)
  153. {
  154. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  155. if (op.Level == BarrierLevel.Cta)
  156. {
  157. context.GroupMemoryBarrier();
  158. }
  159. else
  160. {
  161. context.MemoryBarrier();
  162. }
  163. }
  164. public static void Out(EmitterContext context)
  165. {
  166. OpCode op = context.CurrOp;
  167. bool emit = op.RawOpCode.Extract(39);
  168. bool cut = op.RawOpCode.Extract(40);
  169. if (!(emit || cut))
  170. {
  171. context.Config.GpuAccessor.Log("Invalid OUT encoding.");
  172. }
  173. if (emit)
  174. {
  175. context.EmitVertex();
  176. }
  177. if (cut)
  178. {
  179. context.EndPrimitive();
  180. }
  181. }
  182. public static void Red(EmitterContext context)
  183. {
  184. OpCodeRed op = (OpCodeRed)context.CurrOp;
  185. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  186. EmitAtomicOp(
  187. context,
  188. Instruction.MrGlobal,
  189. op.AtomicOp,
  190. op.Type,
  191. addrLow,
  192. addrHigh,
  193. GetDest(context));
  194. }
  195. public static void St(EmitterContext context)
  196. {
  197. EmitStore(context, MemoryRegion.Local);
  198. }
  199. public static void Stg(EmitterContext context)
  200. {
  201. EmitStoreGlobal(context);
  202. }
  203. public static void Sts(EmitterContext context)
  204. {
  205. EmitStore(context, MemoryRegion.Shared);
  206. }
  207. private static Operand EmitAtomicOp(
  208. EmitterContext context,
  209. Instruction mr,
  210. AtomicOp op,
  211. ReductionType type,
  212. Operand addrLow,
  213. Operand addrHigh,
  214. Operand value)
  215. {
  216. Operand res = Const(0);
  217. switch (op)
  218. {
  219. case AtomicOp.Add:
  220. if (type == ReductionType.S32 || type == ReductionType.U32)
  221. {
  222. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  223. }
  224. else
  225. {
  226. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  227. }
  228. break;
  229. case AtomicOp.BitwiseAnd:
  230. if (type == ReductionType.S32 || type == ReductionType.U32)
  231. {
  232. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  233. }
  234. else
  235. {
  236. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  237. }
  238. break;
  239. case AtomicOp.BitwiseExclusiveOr:
  240. if (type == ReductionType.S32 || type == ReductionType.U32)
  241. {
  242. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  243. }
  244. else
  245. {
  246. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  247. }
  248. break;
  249. case AtomicOp.BitwiseOr:
  250. if (type == ReductionType.S32 || type == ReductionType.U32)
  251. {
  252. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  253. }
  254. else
  255. {
  256. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  257. }
  258. break;
  259. case AtomicOp.Maximum:
  260. if (type == ReductionType.S32)
  261. {
  262. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  263. }
  264. else if (type == ReductionType.U32)
  265. {
  266. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  267. }
  268. else
  269. {
  270. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  271. }
  272. break;
  273. case AtomicOp.Minimum:
  274. if (type == ReductionType.S32)
  275. {
  276. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  277. }
  278. else if (type == ReductionType.U32)
  279. {
  280. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  281. }
  282. else
  283. {
  284. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  285. }
  286. break;
  287. }
  288. return res;
  289. }
  290. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  291. {
  292. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  293. if (op.Size > IntegerSize.B128)
  294. {
  295. context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
  296. }
  297. bool isSmallInt = op.Size < IntegerSize.B32;
  298. int count = 1;
  299. switch (op.Size)
  300. {
  301. case IntegerSize.B64: count = 2; break;
  302. case IntegerSize.B128: count = 4; break;
  303. }
  304. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  305. // Word offset = byte offset / 4 (one word = 4 bytes).
  306. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  307. Operand bitOffset = GetBitOffset(context, baseOffset);
  308. for (int index = 0; index < count; index++)
  309. {
  310. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  311. if (rd.IsRZ)
  312. {
  313. break;
  314. }
  315. Operand offset = context.IAdd(wordOffset, Const(index));
  316. Operand value = null;
  317. switch (region)
  318. {
  319. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  320. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  321. }
  322. if (isSmallInt)
  323. {
  324. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  325. }
  326. context.Copy(Register(rd), value);
  327. }
  328. }
  329. private static void EmitLoadGlobal(EmitterContext context)
  330. {
  331. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  332. bool isSmallInt = op.Size < IntegerSize.B32;
  333. int count = GetVectorCount(op.Size);
  334. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  335. Operand bitOffset = GetBitOffset(context, addrLow);
  336. for (int index = 0; index < count; index++)
  337. {
  338. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  339. if (rd.IsRZ)
  340. {
  341. break;
  342. }
  343. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  344. if (isSmallInt)
  345. {
  346. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  347. }
  348. context.Copy(Register(rd), value);
  349. }
  350. }
  351. private static void EmitStore(EmitterContext context, MemoryRegion region)
  352. {
  353. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  354. if (op.Size > IntegerSize.B128)
  355. {
  356. context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
  357. }
  358. bool isSmallInt = op.Size < IntegerSize.B32;
  359. int count = 1;
  360. switch (op.Size)
  361. {
  362. case IntegerSize.B64: count = 2; break;
  363. case IntegerSize.B128: count = 4; break;
  364. }
  365. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  366. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  367. Operand bitOffset = GetBitOffset(context, baseOffset);
  368. for (int index = 0; index < count; index++)
  369. {
  370. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  371. Operand value = Register(rd);
  372. Operand offset = context.IAdd(wordOffset, Const(index));
  373. if (isSmallInt)
  374. {
  375. Operand word = null;
  376. switch (region)
  377. {
  378. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  379. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  380. }
  381. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  382. }
  383. switch (region)
  384. {
  385. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  386. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  387. }
  388. if (rd.IsRZ)
  389. {
  390. break;
  391. }
  392. }
  393. }
  394. private static void EmitStoreGlobal(EmitterContext context)
  395. {
  396. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  397. bool isSmallInt = op.Size < IntegerSize.B32;
  398. int count = GetVectorCount(op.Size);
  399. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  400. Operand bitOffset = GetBitOffset(context, addrLow);
  401. for (int index = 0; index < count; index++)
  402. {
  403. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  404. Operand value = Register(rd);
  405. if (isSmallInt)
  406. {
  407. Operand word = context.LoadGlobal(addrLow, addrHigh);
  408. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  409. }
  410. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  411. if (rd.IsRZ)
  412. {
  413. break;
  414. }
  415. }
  416. }
  417. private static int GetVectorCount(IntegerSize size)
  418. {
  419. switch (size)
  420. {
  421. case IntegerSize.B64:
  422. return 2;
  423. case IntegerSize.B128:
  424. case IntegerSize.UB128:
  425. return 4;
  426. }
  427. return 1;
  428. }
  429. private static (Operand, Operand) Get40BitsAddress(
  430. EmitterContext context,
  431. Register ra,
  432. bool extended,
  433. int offset)
  434. {
  435. Operand addrLow = GetSrcA(context);
  436. Operand addrHigh;
  437. if (extended && !ra.IsRZ)
  438. {
  439. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  440. }
  441. else
  442. {
  443. addrHigh = Const(0);
  444. }
  445. Operand offs = Const(offset);
  446. addrLow = context.IAdd(addrLow, offs);
  447. if (extended)
  448. {
  449. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  450. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  451. }
  452. return (addrLow, addrHigh);
  453. }
  454. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  455. {
  456. // Note: bit offset = (baseOffset & 0b11) * 8.
  457. // Addresses should be always aligned to the integer type,
  458. // so we don't need to take unaligned addresses into account.
  459. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  460. }
  461. private static Operand ExtractSmallInt(
  462. EmitterContext context,
  463. IntegerSize size,
  464. Operand bitOffset,
  465. Operand value)
  466. {
  467. value = context.ShiftRightU32(value, bitOffset);
  468. switch (size)
  469. {
  470. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  471. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  472. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  473. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  474. }
  475. return value;
  476. }
  477. private static Operand InsertSmallInt(
  478. EmitterContext context,
  479. IntegerSize size,
  480. Operand bitOffset,
  481. Operand word,
  482. Operand value)
  483. {
  484. switch (size)
  485. {
  486. case IntegerSize.U8:
  487. case IntegerSize.S8:
  488. value = context.BitwiseAnd(value, Const(0xff));
  489. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  490. break;
  491. case IntegerSize.U16:
  492. case IntegerSize.S16:
  493. value = context.BitwiseAnd(value, Const(0xffff));
  494. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  495. break;
  496. }
  497. return value;
  498. }
  499. }
  500. }