Idct.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. using Ryujinx.Graphics.Nvdec.Vp9.Common;
  2. using Ryujinx.Graphics.Nvdec.Vp9.Types;
  3. using System;
  4. using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
  5. namespace Ryujinx.Graphics.Nvdec.Vp9
  6. {
  7. internal static class Idct
  8. {
  9. private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
  10. private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
  11. private struct Transform2D
  12. {
  13. public Transform1D Cols, Rows; // Vertical and horizontal
  14. public Transform2D(Transform1D cols, Transform1D rows)
  15. {
  16. Cols = cols;
  17. Rows = rows;
  18. }
  19. }
  20. private struct HighbdTransform2D
  21. {
  22. public HighbdTransform1D Cols, Rows; // Vertical and horizontal
  23. public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
  24. {
  25. Cols = cols;
  26. Rows = rows;
  27. }
  28. }
  29. private static readonly Transform2D[] _iht4 = {
  30. new(Idct4, Idct4), // DCT_DCT = 0
  31. new(Iadst4, Idct4), // ADST_DCT = 1
  32. new(Idct4, Iadst4), // DCT_ADST = 2
  33. new(Iadst4, Iadst4), // ADST_ADST = 3
  34. };
  35. public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
  36. {
  37. int i, j;
  38. Span<int> output = stackalloc int[4 * 4];
  39. Span<int> outptr = output;
  40. Span<int> tempIn = stackalloc int[4];
  41. Span<int> tempOut = stackalloc int[4];
  42. // Inverse transform row vectors
  43. for (i = 0; i < 4; ++i)
  44. {
  45. _iht4[txType].Rows(input, outptr);
  46. input = input[4..];
  47. outptr = outptr[4..];
  48. }
  49. // Inverse transform column vectors
  50. for (i = 0; i < 4; ++i)
  51. {
  52. for (j = 0; j < 4; ++j)
  53. {
  54. tempIn[j] = output[j * 4 + i];
  55. }
  56. _iht4[txType].Cols(tempIn, tempOut);
  57. for (j = 0; j < 4; ++j)
  58. {
  59. dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
  60. }
  61. }
  62. }
  63. private static readonly Transform2D[] _iht8 = {
  64. new(Idct8, Idct8), // DCT_DCT = 0
  65. new(Iadst8, Idct8), // ADST_DCT = 1
  66. new(Idct8, Iadst8), // DCT_ADST = 2
  67. new(Iadst8, Iadst8), // ADST_ADST = 3
  68. };
  69. public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
  70. {
  71. int i, j;
  72. Span<int> output = stackalloc int[8 * 8];
  73. Span<int> outptr = output;
  74. Span<int> tempIn = stackalloc int[8];
  75. Span<int> tempOut = stackalloc int[8];
  76. Transform2D ht = _iht8[txType];
  77. // Inverse transform row vectors
  78. for (i = 0; i < 8; ++i)
  79. {
  80. ht.Rows(input, outptr);
  81. input = input[8..];
  82. outptr = outptr[8..];
  83. }
  84. // Inverse transform column vectors
  85. for (i = 0; i < 8; ++i)
  86. {
  87. for (j = 0; j < 8; ++j)
  88. {
  89. tempIn[j] = output[j * 8 + i];
  90. }
  91. ht.Cols(tempIn, tempOut);
  92. for (j = 0; j < 8; ++j)
  93. {
  94. dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
  95. }
  96. }
  97. }
  98. private static readonly Transform2D[] _iht16 = {
  99. new(Idct16, Idct16), // DCT_DCT = 0
  100. new(Iadst16, Idct16), // ADST_DCT = 1
  101. new(Idct16, Iadst16), // DCT_ADST = 2
  102. new(Iadst16, Iadst16), // ADST_ADST = 3
  103. };
  104. public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
  105. {
  106. int i, j;
  107. Span<int> output = stackalloc int[16 * 16];
  108. Span<int> outptr = output;
  109. Span<int> tempIn = stackalloc int[16];
  110. Span<int> tempOut = stackalloc int[16];
  111. Transform2D ht = _iht16[txType];
  112. // Rows
  113. for (i = 0; i < 16; ++i)
  114. {
  115. ht.Rows(input, outptr);
  116. input = input[16..];
  117. outptr = outptr[16..];
  118. }
  119. // Columns
  120. for (i = 0; i < 16; ++i)
  121. {
  122. for (j = 0; j < 16; ++j)
  123. {
  124. tempIn[j] = output[j * 16 + i];
  125. }
  126. ht.Cols(tempIn, tempOut);
  127. for (j = 0; j < 16; ++j)
  128. {
  129. dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
  130. }
  131. }
  132. }
  133. // Idct
  134. public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  135. {
  136. if (eob > 1)
  137. {
  138. Idct4x416Add(input, dest, stride);
  139. }
  140. else
  141. {
  142. Idct4x41Add(input, dest, stride);
  143. }
  144. }
  145. public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  146. {
  147. if (eob > 1)
  148. {
  149. Iwht4x416Add(input, dest, stride);
  150. }
  151. else
  152. {
  153. Iwht4x41Add(input, dest, stride);
  154. }
  155. }
  156. public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  157. {
  158. // If dc is 1, then input[0] is the reconstructed value, do not need
  159. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  160. // The calculation can be simplified if there are not many non-zero dct
  161. // coefficients. Use eobs to decide what to do.
  162. if (eob == 1)
  163. {
  164. // DC only DCT coefficient
  165. Idct8x81Add(input, dest, stride);
  166. }
  167. else if (eob <= 12)
  168. {
  169. Idct8x812Add(input, dest, stride);
  170. }
  171. else
  172. {
  173. Idct8x864Add(input, dest, stride);
  174. }
  175. }
  176. public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  177. {
  178. /* The calculation can be simplified if there are not many non-zero dct
  179. * coefficients. Use eobs to separate different cases. */
  180. if (eob == 1) /* DC only DCT coefficient. */
  181. {
  182. Idct16x161Add(input, dest, stride);
  183. }
  184. else if (eob <= 10)
  185. {
  186. Idct16x1610Add(input, dest, stride);
  187. }
  188. else if (eob <= 38)
  189. {
  190. Idct16x1638Add(input, dest, stride);
  191. }
  192. else
  193. {
  194. Idct16x16256Add(input, dest, stride);
  195. }
  196. }
  197. public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  198. {
  199. if (eob == 1)
  200. {
  201. Idct32x321Add(input, dest, stride);
  202. }
  203. else if (eob <= 34)
  204. {
  205. // Non-zero coeff only in upper-left 8x8
  206. Idct32x3234Add(input, dest, stride);
  207. }
  208. else if (eob <= 135)
  209. {
  210. // Non-zero coeff only in upper-left 16x16
  211. Idct32x32135Add(input, dest, stride);
  212. }
  213. else
  214. {
  215. Idct32x321024Add(input, dest, stride);
  216. }
  217. }
  218. // Iht
  219. public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  220. {
  221. if (txType == TxType.DctDct)
  222. {
  223. Idct4x4Add(input, dest, stride, eob);
  224. }
  225. else
  226. {
  227. Iht4x416Add(input, dest, stride, (int)txType);
  228. }
  229. }
  230. public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  231. {
  232. if (txType == TxType.DctDct)
  233. {
  234. Idct8x8Add(input, dest, stride, eob);
  235. }
  236. else
  237. {
  238. Iht8x864Add(input, dest, stride, (int)txType);
  239. }
  240. }
  241. public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
  242. int stride, int eob)
  243. {
  244. if (txType == TxType.DctDct)
  245. {
  246. Idct16x16Add(input, dest, stride, eob);
  247. }
  248. else
  249. {
  250. Iht16x16256Add(input, dest, stride, (int)txType);
  251. }
  252. }
  253. private static readonly HighbdTransform2D[] _highbdIht4 = {
  254. new(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0
  255. new(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
  256. new(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
  257. new(HighbdIadst4, HighbdIadst4), // ADST_ADST = 3
  258. };
  259. public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
  260. {
  261. int i, j;
  262. Span<int> output = stackalloc int[4 * 4];
  263. Span<int> outptr = output;
  264. Span<int> tempIn = stackalloc int[4];
  265. Span<int> tempOut = stackalloc int[4];
  266. // Inverse transform row vectors.
  267. for (i = 0; i < 4; ++i)
  268. {
  269. _highbdIht4[txType].Rows(input, outptr, bd);
  270. input = input[4..];
  271. outptr = outptr[4..];
  272. }
  273. // Inverse transform column vectors.
  274. for (i = 0; i < 4; ++i)
  275. {
  276. for (j = 0; j < 4; ++j)
  277. {
  278. tempIn[j] = output[j * 4 + i];
  279. }
  280. _highbdIht4[txType].Cols(tempIn, tempOut, bd);
  281. for (j = 0; j < 4; ++j)
  282. {
  283. dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
  284. }
  285. }
  286. }
  287. private static readonly HighbdTransform2D[] _highIht8 = {
  288. new(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0
  289. new(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
  290. new(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
  291. new(HighbdIadst8, HighbdIadst8), // ADST_ADST = 3
  292. };
  293. public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
  294. {
  295. int i, j;
  296. Span<int> output = stackalloc int[8 * 8];
  297. Span<int> outptr = output;
  298. Span<int> tempIn = stackalloc int[8];
  299. Span<int> tempOut = stackalloc int[8];
  300. HighbdTransform2D ht = _highIht8[txType];
  301. // Inverse transform row vectors.
  302. for (i = 0; i < 8; ++i)
  303. {
  304. ht.Rows(input, outptr, bd);
  305. input = input[8..];
  306. outptr = output[8..];
  307. }
  308. // Inverse transform column vectors.
  309. for (i = 0; i < 8; ++i)
  310. {
  311. for (j = 0; j < 8; ++j)
  312. {
  313. tempIn[j] = output[j * 8 + i];
  314. }
  315. ht.Cols(tempIn, tempOut, bd);
  316. for (j = 0; j < 8; ++j)
  317. {
  318. dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
  319. }
  320. }
  321. }
  322. private static readonly HighbdTransform2D[] _highIht16 = {
  323. new(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0
  324. new(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
  325. new(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
  326. new(HighbdIadst16, HighbdIadst16), // ADST_ADST = 3
  327. };
  328. public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
  329. {
  330. int i, j;
  331. Span<int> output = stackalloc int[16 * 16];
  332. Span<int> outptr = output;
  333. Span<int> tempIn = stackalloc int[16];
  334. Span<int> tempOut = stackalloc int[16];
  335. HighbdTransform2D ht = _highIht16[txType];
  336. // Rows
  337. for (i = 0; i < 16; ++i)
  338. {
  339. ht.Rows(input, outptr, bd);
  340. input = input[16..];
  341. outptr = output[16..];
  342. }
  343. // Columns
  344. for (i = 0; i < 16; ++i)
  345. {
  346. for (j = 0; j < 16; ++j)
  347. {
  348. tempIn[j] = output[j * 16 + i];
  349. }
  350. ht.Cols(tempIn, tempOut, bd);
  351. for (j = 0; j < 16; ++j)
  352. {
  353. dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
  354. }
  355. }
  356. }
  357. // Idct
  358. public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  359. {
  360. if (eob > 1)
  361. {
  362. HighbdIdct4x416Add(input, dest, stride, bd);
  363. }
  364. else
  365. {
  366. HighbdIdct4x41Add(input, dest, stride, bd);
  367. }
  368. }
  369. public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  370. {
  371. if (eob > 1)
  372. {
  373. HighbdIwht4x416Add(input, dest, stride, bd);
  374. }
  375. else
  376. {
  377. HighbdIwht4x41Add(input, dest, stride, bd);
  378. }
  379. }
  380. public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  381. {
  382. // If dc is 1, then input[0] is the reconstructed value, do not need
  383. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  384. // The calculation can be simplified if there are not many non-zero dct
  385. // coefficients. Use eobs to decide what to do.
  386. // DC only DCT coefficient
  387. if (eob == 1)
  388. {
  389. Vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
  390. }
  391. else if (eob <= 12)
  392. {
  393. HighbdIdct8x812Add(input, dest, stride, bd);
  394. }
  395. else
  396. {
  397. HighbdIdct8x864Add(input, dest, stride, bd);
  398. }
  399. }
  400. public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  401. {
  402. // The calculation can be simplified if there are not many non-zero dct
  403. // coefficients. Use eobs to separate different cases.
  404. // DC only DCT coefficient.
  405. if (eob == 1)
  406. {
  407. HighbdIdct16x161Add(input, dest, stride, bd);
  408. }
  409. else if (eob <= 10)
  410. {
  411. HighbdIdct16x1610Add(input, dest, stride, bd);
  412. }
  413. else if (eob <= 38)
  414. {
  415. HighbdIdct16x1638Add(input, dest, stride, bd);
  416. }
  417. else
  418. {
  419. HighbdIdct16x16256Add(input, dest, stride, bd);
  420. }
  421. }
  422. public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  423. {
  424. // Non-zero coeff only in upper-left 8x8
  425. if (eob == 1)
  426. {
  427. HighbdIdct32x321Add(input, dest, stride, bd);
  428. }
  429. else if (eob <= 34)
  430. {
  431. HighbdIdct32x3234Add(input, dest, stride, bd);
  432. }
  433. else if (eob <= 135)
  434. {
  435. HighbdIdct32x32135Add(input, dest, stride, bd);
  436. }
  437. else
  438. {
  439. HighbdIdct32x321024Add(input, dest, stride, bd);
  440. }
  441. }
  442. // Iht
  443. public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  444. {
  445. if (txType == TxType.DctDct)
  446. {
  447. HighbdIdct4x4Add(input, dest, stride, eob, bd);
  448. }
  449. else
  450. {
  451. HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
  452. }
  453. }
  454. public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  455. {
  456. if (txType == TxType.DctDct)
  457. {
  458. HighbdIdct8x8Add(input, dest, stride, eob, bd);
  459. }
  460. else
  461. {
  462. HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
  463. }
  464. }
  465. public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  466. {
  467. if (txType == TxType.DctDct)
  468. {
  469. HighbdIdct16x16Add(input, dest, stride, eob, bd);
  470. }
  471. else
  472. {
  473. HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
  474. }
  475. }
  476. }
  477. }