Detokenize.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. using Ryujinx.Common.Memory;
  2. using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
  3. using Ryujinx.Graphics.Nvdec.Vp9.Types;
  4. using Ryujinx.Graphics.Video;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Runtime.InteropServices;
  8. using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
  9. namespace Ryujinx.Graphics.Nvdec.Vp9
  10. {
  11. internal static class Detokenize
  12. {
  13. private const int EobContextNode = 0;
  14. private const int ZeroContextNode = 1;
  15. private const int OneContextNode = 2;
  16. private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
  17. {
  18. const int maxNeighbors = 2;
  19. return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
  20. }
  21. private static int ReadCoeff(
  22. ref Reader r,
  23. ReadOnlySpan<byte> probs,
  24. int n,
  25. ref ulong value,
  26. ref int count,
  27. ref uint range)
  28. {
  29. int i, val = 0;
  30. for (i = 0; i < n; ++i)
  31. {
  32. val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
  33. }
  34. return val;
  35. }
  36. private static int DecodeCoefs(
  37. ref MacroBlockD xd,
  38. PlaneType type,
  39. Span<int> dqcoeff,
  40. TxSize txSize,
  41. ref Array2<short> dq,
  42. int ctx,
  43. ReadOnlySpan<short> scan,
  44. ReadOnlySpan<short> nb,
  45. ref Reader r)
  46. {
  47. ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
  48. int maxEob = 16 << ((int)txSize << 1);
  49. ref Vp9EntropyProbs fc = ref xd.Fc.Value;
  50. int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
  51. int band, c = 0;
  52. ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
  53. Span<byte> tokenCache = stackalloc byte[32 * 32];
  54. ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
  55. int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
  56. int v;
  57. short dqv = dq[0];
  58. ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
  59. ? Luts.Vp9Cat6ProbHigh12
  60. : (xd.Bd == 10) ? Luts.Vp9Cat6ProbHigh12.Slice(2) : Luts.Vp9Cat6Prob;
  61. int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
  62. // Keep value, range, and count as locals. The compiler produces better
  63. // results with the locals than using r directly.
  64. ulong value = r.Value;
  65. uint range = r.Range;
  66. int count = r.Count;
  67. while (c < maxEob)
  68. {
  69. int val = -1;
  70. band = bandTranslate[0];
  71. bandTranslate = bandTranslate.Slice(1);
  72. ref Array3<byte> prob = ref coefProbs[band][ctx];
  73. if (!xd.Counts.IsNull)
  74. {
  75. ++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
  76. }
  77. if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
  78. {
  79. if (!xd.Counts.IsNull)
  80. {
  81. ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
  82. }
  83. break;
  84. }
  85. while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
  86. {
  87. if (!xd.Counts.IsNull)
  88. {
  89. ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
  90. }
  91. dqv = dq[1];
  92. tokenCache[scan[c]] = 0;
  93. ++c;
  94. if (c >= maxEob)
  95. {
  96. r.Value = value;
  97. r.Range = range;
  98. r.Count = count;
  99. return c; // Zero tokens at the end (no eob token)
  100. }
  101. ctx = GetCoefContext(nb, tokenCache, c);
  102. band = bandTranslate[0];
  103. bandTranslate = bandTranslate.Slice(1);
  104. prob = ref coefProbs[band][ctx];
  105. }
  106. if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
  107. {
  108. ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
  109. if (!xd.Counts.IsNull)
  110. {
  111. ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
  112. }
  113. if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
  114. {
  115. if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
  116. {
  117. tokenCache[scan[c]] = 5;
  118. if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
  119. {
  120. if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
  121. {
  122. val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
  123. }
  124. else
  125. {
  126. val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
  127. }
  128. }
  129. else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
  130. {
  131. val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
  132. }
  133. else
  134. {
  135. val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
  136. }
  137. }
  138. else
  139. {
  140. tokenCache[scan[c]] = 4;
  141. if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
  142. {
  143. val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
  144. }
  145. else
  146. {
  147. val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
  148. }
  149. }
  150. // Val may use 18-bits
  151. v = (int)(((long)val * dqv) >> dqShift);
  152. }
  153. else
  154. {
  155. if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
  156. {
  157. tokenCache[scan[c]] = 3;
  158. v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
  159. }
  160. else
  161. {
  162. tokenCache[scan[c]] = 2;
  163. v = (2 * dqv) >> dqShift;
  164. }
  165. }
  166. }
  167. else
  168. {
  169. if (!xd.Counts.IsNull)
  170. {
  171. ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
  172. }
  173. tokenCache[scan[c]] = 1;
  174. v = dqv >> dqShift;
  175. }
  176. dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
  177. ++c;
  178. ctx = GetCoefContext(nb, tokenCache, c);
  179. dqv = dq[1];
  180. }
  181. r.Value = value;
  182. r.Range = range;
  183. r.Count = count;
  184. return c;
  185. }
  186. private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
  187. {
  188. if (xd.MaxBlocksWide != 0)
  189. {
  190. if (txSizeInBlocks + x > xd.MaxBlocksWide)
  191. {
  192. ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
  193. }
  194. }
  195. if (xd.MaxBlocksHigh != 0)
  196. {
  197. if (txSizeInBlocks + y > xd.MaxBlocksHigh)
  198. {
  199. ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
  200. }
  201. }
  202. }
  203. private static PlaneType GetPlaneType(int plane)
  204. {
  205. return (PlaneType)(plane > 0 ? 1 : 0);
  206. }
  207. public static int DecodeBlockTokens(
  208. ref TileWorkerData twd,
  209. int plane,
  210. Luts.ScanOrder sc,
  211. int x,
  212. int y,
  213. TxSize txSize,
  214. int segId)
  215. {
  216. ref Reader r = ref twd.BitReader;
  217. ref MacroBlockD xd = ref twd.Xd;
  218. ref MacroBlockDPlane pd = ref xd.Plane[plane];
  219. ref Array2<short> dequant = ref pd.SegDequant[segId];
  220. int eob;
  221. Span<sbyte> a = pd.AboveContext.AsSpan().Slice(x);
  222. Span<sbyte> l = pd.LeftContext.AsSpan().Slice(y);
  223. int ctx;
  224. int ctxShiftA = 0;
  225. int ctxShiftL = 0;
  226. switch (txSize)
  227. {
  228. case TxSize.Tx4x4:
  229. ctx = a[0] != 0 ? 1 : 0;
  230. ctx += l[0] != 0 ? 1 : 0;
  231. eob = DecodeCoefs(
  232. ref xd,
  233. GetPlaneType(plane),
  234. pd.DqCoeff.AsSpan(),
  235. txSize,
  236. ref dequant,
  237. ctx,
  238. sc.Scan,
  239. sc.Neighbors,
  240. ref r);
  241. a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
  242. break;
  243. case TxSize.Tx8x8:
  244. GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
  245. ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
  246. ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
  247. eob = DecodeCoefs(
  248. ref xd,
  249. GetPlaneType(plane),
  250. pd.DqCoeff.AsSpan(),
  251. txSize,
  252. ref dequant,
  253. ctx,
  254. sc.Scan,
  255. sc.Neighbors,
  256. ref r);
  257. MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
  258. MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
  259. break;
  260. case TxSize.Tx16x16:
  261. GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
  262. ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
  263. ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
  264. eob = DecodeCoefs(
  265. ref xd,
  266. GetPlaneType(plane),
  267. pd.DqCoeff.AsSpan(),
  268. txSize,
  269. ref dequant,
  270. ctx,
  271. sc.Scan,
  272. sc.Neighbors,
  273. ref r);
  274. MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
  275. MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
  276. break;
  277. case TxSize.Tx32x32:
  278. GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
  279. // NOTE: Casting to ulong here is safe because the default memory
  280. // alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
  281. // boundaries.
  282. ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
  283. ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
  284. eob = DecodeCoefs(
  285. ref xd,
  286. GetPlaneType(plane),
  287. pd.DqCoeff.AsSpan(),
  288. txSize,
  289. ref dequant,
  290. ctx,
  291. sc.Scan,
  292. sc.Neighbors,
  293. ref r);
  294. MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
  295. MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
  296. break;
  297. default:
  298. Debug.Assert(false, "Invalid transform size.");
  299. eob = 0;
  300. break;
  301. }
  302. return eob;
  303. }
  304. }
  305. }