InstEmitMemory.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. using Ryujinx.Graphics.Shader.Decoders;
  2. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3. using Ryujinx.Graphics.Shader.Translation;
  4. using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Instructions
  7. {
  8. static partial class InstEmit
  9. {
  10. private enum MemoryRegion
  11. {
  12. Local,
  13. Shared
  14. }
  15. public static void Ald(EmitterContext context)
  16. {
  17. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  18. Operand primVertex = context.Copy(GetSrcC(context));
  19. for (int index = 0; index < op.Count; index++)
  20. {
  21. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  22. if (rd.IsRZ)
  23. {
  24. break;
  25. }
  26. Operand src = Attribute(op.AttributeOffset + index * 4);
  27. context.Copy(Register(rd), context.LoadAttribute(src, primVertex));
  28. }
  29. }
  30. public static void Ast(EmitterContext context)
  31. {
  32. OpCodeAttribute op = (OpCodeAttribute)context.CurrOp;
  33. for (int index = 0; index < op.Count; index++)
  34. {
  35. if (op.Rd.Index + index > RegisterConsts.RegisterZeroIndex)
  36. {
  37. break;
  38. }
  39. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  40. Operand dest = Attribute(op.AttributeOffset + index * 4);
  41. context.Copy(dest, Register(rd));
  42. }
  43. }
  44. public static void Atoms(EmitterContext context)
  45. {
  46. OpCodeAtom op = (OpCodeAtom)context.CurrOp;
  47. Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2));
  48. offset = context.IAdd(offset, Const(op.Offset));
  49. Operand value = GetSrcB(context);
  50. Operand res = EmitAtomicOp(
  51. context,
  52. Instruction.MrShared,
  53. op.AtomicOp,
  54. op.Type,
  55. offset,
  56. Const(0),
  57. value);
  58. context.Copy(GetDest(context), res);
  59. }
  60. public static void Bar(EmitterContext context)
  61. {
  62. OpCodeBarrier op = (OpCodeBarrier)context.CurrOp;
  63. // TODO: Support other modes.
  64. if (op.Mode == BarrierMode.Sync)
  65. {
  66. context.Barrier();
  67. }
  68. else
  69. {
  70. context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.Mode}.");
  71. }
  72. }
  73. public static void Ipa(EmitterContext context)
  74. {
  75. OpCodeIpa op = (OpCodeIpa)context.CurrOp;
  76. Operand res = Attribute(op.AttributeOffset);
  77. if (op.AttributeOffset >= AttributeConsts.UserAttributeBase &&
  78. op.AttributeOffset < AttributeConsts.UserAttributeEnd)
  79. {
  80. int index = (op.AttributeOffset - AttributeConsts.UserAttributeBase) >> 4;
  81. if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
  82. {
  83. res = context.FPMultiply(res, Attribute(AttributeConsts.PositionW));
  84. }
  85. }
  86. if (op.Mode == InterpolationMode.Default)
  87. {
  88. Operand srcB = GetSrcB(context);
  89. res = context.FPMultiply(res, srcB);
  90. }
  91. res = context.FPSaturate(res, op.Saturate);
  92. context.Copy(GetDest(context), res);
  93. }
  94. public static void Isberd(EmitterContext context)
  95. {
  96. // This instruction performs a load from ISBE memory,
  97. // however it seems to be only used to get some vertex
  98. // input data, so we instead propagate the offset so that
  99. // it can be used on the attribute load.
  100. context.Copy(GetDest(context), GetSrcA(context));
  101. }
  102. public static void Ld(EmitterContext context)
  103. {
  104. EmitLoad(context, MemoryRegion.Local);
  105. }
  106. public static void Ldc(EmitterContext context)
  107. {
  108. OpCodeLdc op = (OpCodeLdc)context.CurrOp;
  109. if (op.Size > IntegerSize.B64)
  110. {
  111. context.Config.GpuAccessor.Log($"Invalid LDC size: {op.Size}.");
  112. }
  113. bool isSmallInt = op.Size < IntegerSize.B32;
  114. int count = op.Size == IntegerSize.B64 ? 2 : 1;
  115. Operand addr = context.IAdd(GetSrcA(context), Const(op.Offset));
  116. Operand wordOffset = context.ShiftRightU32(addr, Const(2));
  117. Operand bitOffset = GetBitOffset(context, addr);
  118. for (int index = 0; index < count; index++)
  119. {
  120. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  121. if (rd.IsRZ)
  122. {
  123. break;
  124. }
  125. Operand offset = context.IAdd(wordOffset, Const(index));
  126. Operand value = context.LoadConstant(Const(op.Slot), offset);
  127. if (isSmallInt)
  128. {
  129. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  130. }
  131. context.Copy(Register(rd), value);
  132. }
  133. }
  134. public static void Ldg(EmitterContext context)
  135. {
  136. EmitLoadGlobal(context);
  137. }
  138. public static void Lds(EmitterContext context)
  139. {
  140. EmitLoad(context, MemoryRegion.Shared);
  141. }
  142. public static void Membar(EmitterContext context)
  143. {
  144. OpCodeMemoryBarrier op = (OpCodeMemoryBarrier)context.CurrOp;
  145. if (op.Level == BarrierLevel.Cta)
  146. {
  147. context.GroupMemoryBarrier();
  148. }
  149. else
  150. {
  151. context.MemoryBarrier();
  152. }
  153. }
  154. public static void Out(EmitterContext context)
  155. {
  156. OpCode op = context.CurrOp;
  157. bool emit = op.RawOpCode.Extract(39);
  158. bool cut = op.RawOpCode.Extract(40);
  159. if (!(emit || cut))
  160. {
  161. context.Config.GpuAccessor.Log("Invalid OUT encoding.");
  162. }
  163. if (emit)
  164. {
  165. context.EmitVertex();
  166. }
  167. if (cut)
  168. {
  169. context.EndPrimitive();
  170. }
  171. }
  172. public static void Red(EmitterContext context)
  173. {
  174. OpCodeRed op = (OpCodeRed)context.CurrOp;
  175. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  176. EmitAtomicOp(
  177. context,
  178. Instruction.MrGlobal,
  179. op.AtomicOp,
  180. op.Type,
  181. addrLow,
  182. addrHigh,
  183. GetDest(context));
  184. }
  185. public static void St(EmitterContext context)
  186. {
  187. EmitStore(context, MemoryRegion.Local);
  188. }
  189. public static void Stg(EmitterContext context)
  190. {
  191. EmitStoreGlobal(context);
  192. }
  193. public static void Sts(EmitterContext context)
  194. {
  195. EmitStore(context, MemoryRegion.Shared);
  196. }
  197. private static Operand EmitAtomicOp(
  198. EmitterContext context,
  199. Instruction mr,
  200. AtomicOp op,
  201. ReductionType type,
  202. Operand addrLow,
  203. Operand addrHigh,
  204. Operand value)
  205. {
  206. Operand res = Const(0);
  207. switch (op)
  208. {
  209. case AtomicOp.Add:
  210. if (type == ReductionType.S32 || type == ReductionType.U32)
  211. {
  212. res = context.AtomicAdd(mr, addrLow, addrHigh, value);
  213. }
  214. else
  215. {
  216. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  217. }
  218. break;
  219. case AtomicOp.BitwiseAnd:
  220. if (type == ReductionType.S32 || type == ReductionType.U32)
  221. {
  222. res = context.AtomicAnd(mr, addrLow, addrHigh, value);
  223. }
  224. else
  225. {
  226. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  227. }
  228. break;
  229. case AtomicOp.BitwiseExclusiveOr:
  230. if (type == ReductionType.S32 || type == ReductionType.U32)
  231. {
  232. res = context.AtomicXor(mr, addrLow, addrHigh, value);
  233. }
  234. else
  235. {
  236. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  237. }
  238. break;
  239. case AtomicOp.BitwiseOr:
  240. if (type == ReductionType.S32 || type == ReductionType.U32)
  241. {
  242. res = context.AtomicOr(mr, addrLow, addrHigh, value);
  243. }
  244. else
  245. {
  246. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  247. }
  248. break;
  249. case AtomicOp.Maximum:
  250. if (type == ReductionType.S32)
  251. {
  252. res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
  253. }
  254. else if (type == ReductionType.U32)
  255. {
  256. res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
  257. }
  258. else
  259. {
  260. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  261. }
  262. break;
  263. case AtomicOp.Minimum:
  264. if (type == ReductionType.S32)
  265. {
  266. res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
  267. }
  268. else if (type == ReductionType.U32)
  269. {
  270. res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
  271. }
  272. else
  273. {
  274. context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
  275. }
  276. break;
  277. }
  278. return res;
  279. }
  280. private static void EmitLoad(EmitterContext context, MemoryRegion region)
  281. {
  282. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  283. if (op.Size > IntegerSize.B128)
  284. {
  285. context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}.");
  286. }
  287. bool isSmallInt = op.Size < IntegerSize.B32;
  288. int count = 1;
  289. switch (op.Size)
  290. {
  291. case IntegerSize.B64: count = 2; break;
  292. case IntegerSize.B128: count = 4; break;
  293. }
  294. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  295. // Word offset = byte offset / 4 (one word = 4 bytes).
  296. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  297. Operand bitOffset = GetBitOffset(context, baseOffset);
  298. for (int index = 0; index < count; index++)
  299. {
  300. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  301. if (rd.IsRZ)
  302. {
  303. break;
  304. }
  305. Operand offset = context.IAdd(wordOffset, Const(index));
  306. Operand value = null;
  307. switch (region)
  308. {
  309. case MemoryRegion.Local: value = context.LoadLocal (offset); break;
  310. case MemoryRegion.Shared: value = context.LoadShared(offset); break;
  311. }
  312. if (isSmallInt)
  313. {
  314. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  315. }
  316. context.Copy(Register(rd), value);
  317. }
  318. }
  319. private static void EmitLoadGlobal(EmitterContext context)
  320. {
  321. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  322. bool isSmallInt = op.Size < IntegerSize.B32;
  323. int count = GetVectorCount(op.Size);
  324. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  325. Operand bitOffset = GetBitOffset(context, addrLow);
  326. for (int index = 0; index < count; index++)
  327. {
  328. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  329. if (rd.IsRZ)
  330. {
  331. break;
  332. }
  333. Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
  334. if (isSmallInt)
  335. {
  336. value = ExtractSmallInt(context, op.Size, bitOffset, value);
  337. }
  338. context.Copy(Register(rd), value);
  339. }
  340. }
  341. private static void EmitStore(EmitterContext context, MemoryRegion region)
  342. {
  343. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  344. if (op.Size > IntegerSize.B128)
  345. {
  346. context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}.");
  347. }
  348. bool isSmallInt = op.Size < IntegerSize.B32;
  349. int count = 1;
  350. switch (op.Size)
  351. {
  352. case IntegerSize.B64: count = 2; break;
  353. case IntegerSize.B128: count = 4; break;
  354. }
  355. Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset));
  356. Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
  357. Operand bitOffset = GetBitOffset(context, baseOffset);
  358. for (int index = 0; index < count; index++)
  359. {
  360. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  361. Operand value = Register(rd);
  362. Operand offset = context.IAdd(wordOffset, Const(index));
  363. if (isSmallInt)
  364. {
  365. Operand word = null;
  366. switch (region)
  367. {
  368. case MemoryRegion.Local: word = context.LoadLocal (offset); break;
  369. case MemoryRegion.Shared: word = context.LoadShared(offset); break;
  370. }
  371. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  372. }
  373. switch (region)
  374. {
  375. case MemoryRegion.Local: context.StoreLocal (offset, value); break;
  376. case MemoryRegion.Shared: context.StoreShared(offset, value); break;
  377. }
  378. if (rd.IsRZ)
  379. {
  380. break;
  381. }
  382. }
  383. }
  384. private static void EmitStoreGlobal(EmitterContext context)
  385. {
  386. OpCodeMemory op = (OpCodeMemory)context.CurrOp;
  387. bool isSmallInt = op.Size < IntegerSize.B32;
  388. int count = GetVectorCount(op.Size);
  389. (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset);
  390. Operand bitOffset = GetBitOffset(context, addrLow);
  391. for (int index = 0; index < count; index++)
  392. {
  393. Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr);
  394. Operand value = Register(rd);
  395. if (isSmallInt)
  396. {
  397. Operand word = context.LoadGlobal(addrLow, addrHigh);
  398. value = InsertSmallInt(context, op.Size, bitOffset, word, value);
  399. }
  400. context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
  401. if (rd.IsRZ)
  402. {
  403. break;
  404. }
  405. }
  406. }
  407. private static int GetVectorCount(IntegerSize size)
  408. {
  409. switch (size)
  410. {
  411. case IntegerSize.B64:
  412. return 2;
  413. case IntegerSize.B128:
  414. case IntegerSize.UB128:
  415. return 4;
  416. }
  417. return 1;
  418. }
  419. private static (Operand, Operand) Get40BitsAddress(
  420. EmitterContext context,
  421. Register ra,
  422. bool extended,
  423. int offset)
  424. {
  425. Operand addrLow = GetSrcA(context);
  426. Operand addrHigh;
  427. if (extended && !ra.IsRZ)
  428. {
  429. addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
  430. }
  431. else
  432. {
  433. addrHigh = Const(0);
  434. }
  435. Operand offs = Const(offset);
  436. addrLow = context.IAdd(addrLow, offs);
  437. if (extended)
  438. {
  439. Operand carry = context.ICompareLessUnsigned(addrLow, offs);
  440. addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
  441. }
  442. return (addrLow, addrHigh);
  443. }
  444. private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
  445. {
  446. // Note: bit offset = (baseOffset & 0b11) * 8.
  447. // Addresses should be always aligned to the integer type,
  448. // so we don't need to take unaligned addresses into account.
  449. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
  450. }
  451. private static Operand ExtractSmallInt(
  452. EmitterContext context,
  453. IntegerSize size,
  454. Operand bitOffset,
  455. Operand value)
  456. {
  457. value = context.ShiftRightU32(value, bitOffset);
  458. switch (size)
  459. {
  460. case IntegerSize.U8: value = ZeroExtendTo32(context, value, 8); break;
  461. case IntegerSize.U16: value = ZeroExtendTo32(context, value, 16); break;
  462. case IntegerSize.S8: value = SignExtendTo32(context, value, 8); break;
  463. case IntegerSize.S16: value = SignExtendTo32(context, value, 16); break;
  464. }
  465. return value;
  466. }
  467. private static Operand InsertSmallInt(
  468. EmitterContext context,
  469. IntegerSize size,
  470. Operand bitOffset,
  471. Operand word,
  472. Operand value)
  473. {
  474. switch (size)
  475. {
  476. case IntegerSize.U8:
  477. case IntegerSize.S8:
  478. value = context.BitwiseAnd(value, Const(0xff));
  479. value = context.BitfieldInsert(word, value, bitOffset, Const(8));
  480. break;
  481. case IntegerSize.U16:
  482. case IntegerSize.S16:
  483. value = context.BitwiseAnd(value, Const(0xffff));
  484. value = context.BitfieldInsert(word, value, bitOffset, Const(16));
  485. break;
  486. }
  487. return value;
  488. }
  489. }
  490. }