Idct.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. using Ryujinx.Graphics.Nvdec.Vp9.Common;
  2. using Ryujinx.Graphics.Nvdec.Vp9.Types;
  3. using System;
  4. using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
  5. namespace Ryujinx.Graphics.Nvdec.Vp9
  6. {
  7. internal static class Idct
  8. {
  9. private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
  10. private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
  11. private struct Transform2D
  12. {
  13. public Transform1D Cols, Rows; // Vertical and horizontal
  14. public Transform2D(Transform1D cols, Transform1D rows)
  15. {
  16. Cols = cols;
  17. Rows = rows;
  18. }
  19. }
  20. private struct HighbdTransform2D
  21. {
  22. public HighbdTransform1D Cols, Rows; // Vertical and horizontal
  23. public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
  24. {
  25. Cols = cols;
  26. Rows = rows;
  27. }
  28. }
  29. private static readonly Transform2D[] Iht4 = new Transform2D[]
  30. {
  31. new Transform2D(Idct4, Idct4), // DCT_DCT = 0
  32. new Transform2D(Iadst4, Idct4), // ADST_DCT = 1
  33. new Transform2D(Idct4, Iadst4), // DCT_ADST = 2
  34. new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3
  35. };
  36. public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
  37. {
  38. int i, j;
  39. Span<int> output = stackalloc int[4 * 4];
  40. Span<int> outptr = output;
  41. Span<int> tempIn = stackalloc int[4];
  42. Span<int> tempOut = stackalloc int[4];
  43. // Inverse transform row vectors
  44. for (i = 0; i < 4; ++i)
  45. {
  46. Iht4[txType].Rows(input, outptr);
  47. input = input.Slice(4);
  48. outptr = outptr.Slice(4);
  49. }
  50. // Inverse transform column vectors
  51. for (i = 0; i < 4; ++i)
  52. {
  53. for (j = 0; j < 4; ++j)
  54. {
  55. tempIn[j] = output[j * 4 + i];
  56. }
  57. Iht4[txType].Cols(tempIn, tempOut);
  58. for (j = 0; j < 4; ++j)
  59. {
  60. dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
  61. }
  62. }
  63. }
  64. private static readonly Transform2D[] Iht8 = new Transform2D[]
  65. {
  66. new Transform2D(Idct8, Idct8), // DCT_DCT = 0
  67. new Transform2D(Iadst8, Idct8), // ADST_DCT = 1
  68. new Transform2D(Idct8, Iadst8), // DCT_ADST = 2
  69. new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3
  70. };
  71. public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
  72. {
  73. int i, j;
  74. Span<int> output = stackalloc int[8 * 8];
  75. Span<int> outptr = output;
  76. Span<int> tempIn = stackalloc int[8];
  77. Span<int> tempOut = stackalloc int[8];
  78. Transform2D ht = Iht8[txType];
  79. // Inverse transform row vectors
  80. for (i = 0; i < 8; ++i)
  81. {
  82. ht.Rows(input, outptr);
  83. input = input.Slice(8);
  84. outptr = outptr.Slice(8);
  85. }
  86. // Inverse transform column vectors
  87. for (i = 0; i < 8; ++i)
  88. {
  89. for (j = 0; j < 8; ++j)
  90. {
  91. tempIn[j] = output[j * 8 + i];
  92. }
  93. ht.Cols(tempIn, tempOut);
  94. for (j = 0; j < 8; ++j)
  95. {
  96. dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
  97. }
  98. }
  99. }
  100. private static readonly Transform2D[] Iht16 = new Transform2D[]
  101. {
  102. new Transform2D(Idct16, Idct16), // DCT_DCT = 0
  103. new Transform2D(Iadst16, Idct16), // ADST_DCT = 1
  104. new Transform2D(Idct16, Iadst16), // DCT_ADST = 2
  105. new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3
  106. };
  107. public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
  108. {
  109. int i, j;
  110. Span<int> output = stackalloc int[16 * 16];
  111. Span<int> outptr = output;
  112. Span<int> tempIn = stackalloc int[16];
  113. Span<int> tempOut = stackalloc int[16];
  114. Transform2D ht = Iht16[txType];
  115. // Rows
  116. for (i = 0; i < 16; ++i)
  117. {
  118. ht.Rows(input, outptr);
  119. input = input.Slice(16);
  120. outptr = outptr.Slice(16);
  121. }
  122. // Columns
  123. for (i = 0; i < 16; ++i)
  124. {
  125. for (j = 0; j < 16; ++j)
  126. {
  127. tempIn[j] = output[j * 16 + i];
  128. }
  129. ht.Cols(tempIn, tempOut);
  130. for (j = 0; j < 16; ++j)
  131. {
  132. dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
  133. }
  134. }
  135. }
  136. // Idct
  137. public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  138. {
  139. if (eob > 1)
  140. {
  141. Idct4x416Add(input, dest, stride);
  142. }
  143. else
  144. {
  145. Idct4x41Add(input, dest, stride);
  146. }
  147. }
  148. public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  149. {
  150. if (eob > 1)
  151. {
  152. Iwht4x416Add(input, dest, stride);
  153. }
  154. else
  155. {
  156. Iwht4x41Add(input, dest, stride);
  157. }
  158. }
  159. public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  160. {
  161. // If dc is 1, then input[0] is the reconstructed value, do not need
  162. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  163. // The calculation can be simplified if there are not many non-zero dct
  164. // coefficients. Use eobs to decide what to do.
  165. if (eob == 1)
  166. {
  167. // DC only DCT coefficient
  168. Idct8x81Add(input, dest, stride);
  169. }
  170. else if (eob <= 12)
  171. {
  172. Idct8x812Add(input, dest, stride);
  173. }
  174. else
  175. {
  176. Idct8x864Add(input, dest, stride);
  177. }
  178. }
  179. public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  180. {
  181. /* The calculation can be simplified if there are not many non-zero dct
  182. * coefficients. Use eobs to separate different cases. */
  183. if (eob == 1) /* DC only DCT coefficient. */
  184. {
  185. Idct16x161Add(input, dest, stride);
  186. }
  187. else if (eob <= 10)
  188. {
  189. Idct16x1610Add(input, dest, stride);
  190. }
  191. else if (eob <= 38)
  192. {
  193. Idct16x1638Add(input, dest, stride);
  194. }
  195. else
  196. {
  197. Idct16x16256Add(input, dest, stride);
  198. }
  199. }
  200. public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  201. {
  202. if (eob == 1)
  203. {
  204. Idct32x321Add(input, dest, stride);
  205. }
  206. else if (eob <= 34)
  207. {
  208. // Non-zero coeff only in upper-left 8x8
  209. Idct32x3234Add(input, dest, stride);
  210. }
  211. else if (eob <= 135)
  212. {
  213. // Non-zero coeff only in upper-left 16x16
  214. Idct32x32135Add(input, dest, stride);
  215. }
  216. else
  217. {
  218. Idct32x321024Add(input, dest, stride);
  219. }
  220. }
  221. // Iht
  222. public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  223. {
  224. if (txType == TxType.DctDct)
  225. {
  226. Idct4x4Add(input, dest, stride, eob);
  227. }
  228. else
  229. {
  230. Iht4x416Add(input, dest, stride, (int)txType);
  231. }
  232. }
  233. public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
  234. {
  235. if (txType == TxType.DctDct)
  236. {
  237. Idct8x8Add(input, dest, stride, eob);
  238. }
  239. else
  240. {
  241. Iht8x864Add(input, dest, stride, (int)txType);
  242. }
  243. }
  244. public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
  245. int stride, int eob)
  246. {
  247. if (txType == TxType.DctDct)
  248. {
  249. Idct16x16Add(input, dest, stride, eob);
  250. }
  251. else
  252. {
  253. Iht16x16256Add(input, dest, stride, (int)txType);
  254. }
  255. }
  256. private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[]
  257. {
  258. new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0
  259. new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
  260. new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
  261. new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3
  262. };
  263. public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
  264. {
  265. int i, j;
  266. Span<int> output = stackalloc int[4 * 4];
  267. Span<int> outptr = output;
  268. Span<int> tempIn = stackalloc int[4];
  269. Span<int> tempOut = stackalloc int[4];
  270. // Inverse transform row vectors.
  271. for (i = 0; i < 4; ++i)
  272. {
  273. HighbdIht4[txType].Rows(input, outptr, bd);
  274. input = input.Slice(4);
  275. outptr = outptr.Slice(4);
  276. }
  277. // Inverse transform column vectors.
  278. for (i = 0; i < 4; ++i)
  279. {
  280. for (j = 0; j < 4; ++j)
  281. {
  282. tempIn[j] = output[j * 4 + i];
  283. }
  284. HighbdIht4[txType].Cols(tempIn, tempOut, bd);
  285. for (j = 0; j < 4; ++j)
  286. {
  287. dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
  288. }
  289. }
  290. }
  291. private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[]
  292. {
  293. new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0
  294. new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
  295. new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
  296. new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3
  297. };
  298. public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
  299. {
  300. int i, j;
  301. Span<int> output = stackalloc int[8 * 8];
  302. Span<int> outptr = output;
  303. Span<int> tempIn = stackalloc int[8];
  304. Span<int> tempOut = stackalloc int[8];
  305. HighbdTransform2D ht = HighIht8[txType];
  306. // Inverse transform row vectors.
  307. for (i = 0; i < 8; ++i)
  308. {
  309. ht.Rows(input, outptr, bd);
  310. input = input.Slice(8);
  311. outptr = output.Slice(8);
  312. }
  313. // Inverse transform column vectors.
  314. for (i = 0; i < 8; ++i)
  315. {
  316. for (j = 0; j < 8; ++j)
  317. {
  318. tempIn[j] = output[j * 8 + i];
  319. }
  320. ht.Cols(tempIn, tempOut, bd);
  321. for (j = 0; j < 8; ++j)
  322. {
  323. dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
  324. }
  325. }
  326. }
  327. private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[]
  328. {
  329. new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0
  330. new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
  331. new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
  332. new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3
  333. };
  334. public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
  335. {
  336. int i, j;
  337. Span<int> output = stackalloc int[16 * 16];
  338. Span<int> outptr = output;
  339. Span<int> tempIn = stackalloc int[16];
  340. Span<int> tempOut = stackalloc int[16];
  341. HighbdTransform2D ht = HighIht16[txType];
  342. // Rows
  343. for (i = 0; i < 16; ++i)
  344. {
  345. ht.Rows(input, outptr, bd);
  346. input = input.Slice(16);
  347. outptr = output.Slice(16);
  348. }
  349. // Columns
  350. for (i = 0; i < 16; ++i)
  351. {
  352. for (j = 0; j < 16; ++j)
  353. {
  354. tempIn[j] = output[j * 16 + i];
  355. }
  356. ht.Cols(tempIn, tempOut, bd);
  357. for (j = 0; j < 16; ++j)
  358. {
  359. dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
  360. }
  361. }
  362. }
  363. // Idct
  364. public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  365. {
  366. if (eob > 1)
  367. {
  368. HighbdIdct4x416Add(input, dest, stride, bd);
  369. }
  370. else
  371. {
  372. HighbdIdct4x41Add(input, dest, stride, bd);
  373. }
  374. }
  375. public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  376. {
  377. if (eob > 1)
  378. {
  379. HighbdIwht4x416Add(input, dest, stride, bd);
  380. }
  381. else
  382. {
  383. HighbdIwht4x41Add(input, dest, stride, bd);
  384. }
  385. }
  386. public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  387. {
  388. // If dc is 1, then input[0] is the reconstructed value, do not need
  389. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  390. // The calculation can be simplified if there are not many non-zero dct
  391. // coefficients. Use eobs to decide what to do.
  392. // DC only DCT coefficient
  393. if (eob == 1)
  394. {
  395. vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
  396. }
  397. else if (eob <= 12)
  398. {
  399. HighbdIdct8x812Add(input, dest, stride, bd);
  400. }
  401. else
  402. {
  403. HighbdIdct8x864Add(input, dest, stride, bd);
  404. }
  405. }
  406. public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  407. {
  408. // The calculation can be simplified if there are not many non-zero dct
  409. // coefficients. Use eobs to separate different cases.
  410. // DC only DCT coefficient.
  411. if (eob == 1)
  412. {
  413. HighbdIdct16x161Add(input, dest, stride, bd);
  414. }
  415. else if (eob <= 10)
  416. {
  417. HighbdIdct16x1610Add(input, dest, stride, bd);
  418. }
  419. else if (eob <= 38)
  420. {
  421. HighbdIdct16x1638Add(input, dest, stride, bd);
  422. }
  423. else
  424. {
  425. HighbdIdct16x16256Add(input, dest, stride, bd);
  426. }
  427. }
  428. public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  429. {
  430. // Non-zero coeff only in upper-left 8x8
  431. if (eob == 1)
  432. {
  433. HighbdIdct32x321Add(input, dest, stride, bd);
  434. }
  435. else if (eob <= 34)
  436. {
  437. HighbdIdct32x3234Add(input, dest, stride, bd);
  438. }
  439. else if (eob <= 135)
  440. {
  441. HighbdIdct32x32135Add(input, dest, stride, bd);
  442. }
  443. else
  444. {
  445. HighbdIdct32x321024Add(input, dest, stride, bd);
  446. }
  447. }
  448. // Iht
  449. public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  450. {
  451. if (txType == TxType.DctDct)
  452. {
  453. HighbdIdct4x4Add(input, dest, stride, eob, bd);
  454. }
  455. else
  456. {
  457. HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
  458. }
  459. }
  460. public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  461. {
  462. if (txType == TxType.DctDct)
  463. {
  464. HighbdIdct8x8Add(input, dest, stride, eob, bd);
  465. }
  466. else
  467. {
  468. HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
  469. }
  470. }
  471. public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
  472. {
  473. if (txType == TxType.DctDct)
  474. {
  475. HighbdIdct16x16Add(input, dest, stride, eob, bd);
  476. }
  477. else
  478. {
  479. HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
  480. }
  481. }
  482. }
  483. }