LoopFilter.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. using Ryujinx.Common.Memory;
  2. using Ryujinx.Graphics.Nvdec.Vp9.Common;
  3. using Ryujinx.Graphics.Nvdec.Vp9.Types;
  4. using System;
  5. using System.Runtime.InteropServices;
  6. namespace Ryujinx.Graphics.Nvdec.Vp9
  7. {
  8. internal static class LoopFilter
  9. {
  10. public const int MaxLoopFilter = 63;
  11. public const int MaxRefLfDeltas = 4;
  12. public const int MaxModeLfDeltas = 2;
  13. // 64 bit masks for left transform size. Each 1 represents a position where
  14. // we should apply a loop filter across the left border of an 8x8 block
  15. // boundary.
  16. //
  17. // In the case of TX_16X16 -> ( in low order byte first we end up with
  18. // a mask that looks like this
  19. //
  20. // 10101010
  21. // 10101010
  22. // 10101010
  23. // 10101010
  24. // 10101010
  25. // 10101010
  26. // 10101010
  27. // 10101010
  28. //
  29. // A loopfilter should be applied to every other 8x8 horizontally.
  30. private static readonly ulong[] Left64X64TxformMask = new ulong[]
  31. {
  32. 0xffffffffffffffffUL, // TX_4X4
  33. 0xffffffffffffffffUL, // TX_8x8
  34. 0x5555555555555555UL, // TX_16x16
  35. 0x1111111111111111UL, // TX_32x32
  36. };
  37. // 64 bit masks for above transform size. Each 1 represents a position where
  38. // we should apply a loop filter across the top border of an 8x8 block
  39. // boundary.
  40. //
  41. // In the case of TX_32x32 -> ( in low order byte first we end up with
  42. // a mask that looks like this
  43. //
  44. // 11111111
  45. // 00000000
  46. // 00000000
  47. // 00000000
  48. // 11111111
  49. // 00000000
  50. // 00000000
  51. // 00000000
  52. //
  53. // A loopfilter should be applied to every other 4 the row vertically.
  54. private static readonly ulong[] Above64X64TxformMask = new ulong[]
  55. {
  56. 0xffffffffffffffffUL, // TX_4X4
  57. 0xffffffffffffffffUL, // TX_8x8
  58. 0x00ff00ff00ff00ffUL, // TX_16x16
  59. 0x000000ff000000ffUL, // TX_32x32
  60. };
  61. // 64 bit masks for prediction sizes (left). Each 1 represents a position
  62. // where left border of an 8x8 block. These are aligned to the right most
  63. // appropriate bit, and then shifted into place.
  64. //
  65. // In the case of TX_16x32 -> ( low order byte first ) we end up with
  66. // a mask that looks like this :
  67. //
  68. // 10000000
  69. // 10000000
  70. // 10000000
  71. // 10000000
  72. // 00000000
  73. // 00000000
  74. // 00000000
  75. // 00000000
  76. private static readonly ulong[] LeftPredictionMask = new ulong[]
  77. {
  78. 0x0000000000000001UL, // BLOCK_4X4,
  79. 0x0000000000000001UL, // BLOCK_4X8,
  80. 0x0000000000000001UL, // BLOCK_8X4,
  81. 0x0000000000000001UL, // BLOCK_8X8,
  82. 0x0000000000000101UL, // BLOCK_8X16,
  83. 0x0000000000000001UL, // BLOCK_16X8,
  84. 0x0000000000000101UL, // BLOCK_16X16,
  85. 0x0000000001010101UL, // BLOCK_16X32,
  86. 0x0000000000000101UL, // BLOCK_32X16,
  87. 0x0000000001010101UL, // BLOCK_32X32,
  88. 0x0101010101010101UL, // BLOCK_32X64,
  89. 0x0000000001010101UL, // BLOCK_64X32,
  90. 0x0101010101010101UL, // BLOCK_64X64
  91. };
  92. // 64 bit mask to shift and set for each prediction size.
  93. private static readonly ulong[] AbovePredictionMask = new ulong[]
  94. {
  95. 0x0000000000000001UL, // BLOCK_4X4
  96. 0x0000000000000001UL, // BLOCK_4X8
  97. 0x0000000000000001UL, // BLOCK_8X4
  98. 0x0000000000000001UL, // BLOCK_8X8
  99. 0x0000000000000001UL, // BLOCK_8X16,
  100. 0x0000000000000003UL, // BLOCK_16X8
  101. 0x0000000000000003UL, // BLOCK_16X16
  102. 0x0000000000000003UL, // BLOCK_16X32,
  103. 0x000000000000000fUL, // BLOCK_32X16,
  104. 0x000000000000000fUL, // BLOCK_32X32,
  105. 0x000000000000000fUL, // BLOCK_32X64,
  106. 0x00000000000000ffUL, // BLOCK_64X32,
  107. 0x00000000000000ffUL, // BLOCK_64X64
  108. };
  109. // 64 bit mask to shift and set for each prediction size. A bit is set for
  110. // each 8x8 block that would be in the left most block of the given block
  111. // size in the 64x64 block.
  112. private static readonly ulong[] SizeMask = new ulong[]
  113. {
  114. 0x0000000000000001UL, // BLOCK_4X4
  115. 0x0000000000000001UL, // BLOCK_4X8
  116. 0x0000000000000001UL, // BLOCK_8X4
  117. 0x0000000000000001UL, // BLOCK_8X8
  118. 0x0000000000000101UL, // BLOCK_8X16,
  119. 0x0000000000000003UL, // BLOCK_16X8
  120. 0x0000000000000303UL, // BLOCK_16X16
  121. 0x0000000003030303UL, // BLOCK_16X32,
  122. 0x0000000000000f0fUL, // BLOCK_32X16,
  123. 0x000000000f0f0f0fUL, // BLOCK_32X32,
  124. 0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64,
  125. 0x00000000ffffffffUL, // BLOCK_64X32,
  126. 0xffffffffffffffffUL, // BLOCK_64X64
  127. };
  128. // These are used for masking the left and above borders.
  129. private const ulong LeftBorder = 0x1111111111111111UL;
  130. private const ulong AboveBorder = 0x000000ff000000ffUL;
  131. // 16 bit masks for uv transform sizes.
  132. private static readonly ushort[] Left64X64TxformMaskUv = new ushort[]
  133. {
  134. 0xffff, // TX_4X4
  135. 0xffff, // TX_8x8
  136. 0x5555, // TX_16x16
  137. 0x1111, // TX_32x32
  138. };
  139. private static readonly ushort[] Above64X64TxformMaskUv = new ushort[]
  140. {
  141. 0xffff, // TX_4X4
  142. 0xffff, // TX_8x8
  143. 0x0f0f, // TX_16x16
  144. 0x000f, // TX_32x32
  145. };
  146. // 16 bit left mask to shift and set for each uv prediction size.
  147. private static readonly ushort[] LeftPredictionMaskUv = new ushort[]
  148. {
  149. 0x0001, // BLOCK_4X4,
  150. 0x0001, // BLOCK_4X8,
  151. 0x0001, // BLOCK_8X4,
  152. 0x0001, // BLOCK_8X8,
  153. 0x0001, // BLOCK_8X16,
  154. 0x0001, // BLOCK_16X8,
  155. 0x0001, // BLOCK_16X16,
  156. 0x0011, // BLOCK_16X32,
  157. 0x0001, // BLOCK_32X16,
  158. 0x0011, // BLOCK_32X32,
  159. 0x1111, // BLOCK_32X64
  160. 0x0011, // BLOCK_64X32,
  161. 0x1111, // BLOCK_64X64
  162. };
  163. // 16 bit above mask to shift and set for uv each prediction size.
  164. private static readonly ushort[] AbovePredictionMaskUv = new ushort[]
  165. {
  166. 0x0001, // BLOCK_4X4
  167. 0x0001, // BLOCK_4X8
  168. 0x0001, // BLOCK_8X4
  169. 0x0001, // BLOCK_8X8
  170. 0x0001, // BLOCK_8X16,
  171. 0x0001, // BLOCK_16X8
  172. 0x0001, // BLOCK_16X16
  173. 0x0001, // BLOCK_16X32,
  174. 0x0003, // BLOCK_32X16,
  175. 0x0003, // BLOCK_32X32,
  176. 0x0003, // BLOCK_32X64,
  177. 0x000f, // BLOCK_64X32,
  178. 0x000f, // BLOCK_64X64
  179. };
  180. // 64 bit mask to shift and set for each uv prediction size
  181. private static readonly ushort[] SizeMaskUv = new ushort[]
  182. {
  183. 0x0001, // BLOCK_4X4
  184. 0x0001, // BLOCK_4X8
  185. 0x0001, // BLOCK_8X4
  186. 0x0001, // BLOCK_8X8
  187. 0x0001, // BLOCK_8X16,
  188. 0x0001, // BLOCK_16X8
  189. 0x0001, // BLOCK_16X16
  190. 0x0011, // BLOCK_16X32,
  191. 0x0003, // BLOCK_32X16,
  192. 0x0033, // BLOCK_32X32,
  193. 0x3333, // BLOCK_32X64,
  194. 0x00ff, // BLOCK_64X32,
  195. 0xffff, // BLOCK_64X64
  196. };
  197. private const ushort LeftBorderUv = 0x1111;
  198. private const ushort AboveBorderUv = 0x000f;
  199. private static readonly int[] ModeLfLut = new int[]
  200. {
  201. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
  202. 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
  203. };
  204. private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
  205. {
  206. return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]];
  207. }
  208. private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
  209. {
  210. return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
  211. }
  212. // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
  213. // or greater area.
  214. private static readonly byte[][] FirstBlockIn16x16 = new byte[][]
  215. {
  216. new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
  217. new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
  218. new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
  219. new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }
  220. };
  221. // This function sets up the bit masks for a block represented
  222. // by miRow, miCol in a 64x64 region.
  223. public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
  224. {
  225. BlockSize blockSize = mi.SbType;
  226. TxSize txSizeY = mi.TxSize;
  227. ref LoopFilterInfoN lfiN = ref cm.LfInfo;
  228. int filterLevel = GetFilterLevel(ref lfiN, ref mi);
  229. TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
  230. ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
  231. ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
  232. ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
  233. ref ulong int4X4Y = ref lfm.Int4x4Y;
  234. ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
  235. ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
  236. ref ushort int4X4Uv = ref lfm.Int4x4Uv;
  237. int rowInSb = (miRow & 7);
  238. int colInSb = (miCol & 7);
  239. int shiftY = colInSb + (rowInSb << 3);
  240. int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
  241. int buildUv = FirstBlockIn16x16[rowInSb][colInSb];
  242. if (filterLevel == 0)
  243. {
  244. return;
  245. }
  246. else
  247. {
  248. int index = shiftY;
  249. int i;
  250. for (i = 0; i < bh; i++)
  251. {
  252. MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel);
  253. index += 8;
  254. }
  255. }
  256. // These set 1 in the current block size for the block size edges.
  257. // For instance if the block size is 32x16, we'll set:
  258. // above = 1111
  259. // 0000
  260. // and
  261. // left = 1000
  262. // = 1000
  263. // NOTE : In this example the low bit is left most ( 1000 ) is stored as
  264. // 1, not 8...
  265. //
  266. // U and V set things on a 16 bit scale.
  267. //
  268. aboveY |= AbovePredictionMask[(int)blockSize] << shiftY;
  269. leftY |= LeftPredictionMask[(int)blockSize] << shiftY;
  270. if (buildUv != 0)
  271. {
  272. aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv);
  273. leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv);
  274. }
  275. // If the block has no coefficients and is not intra we skip applying
  276. // the loop filter on block edges.
  277. if (mi.Skip != 0 && mi.IsInterBlock())
  278. {
  279. return;
  280. }
  281. // Add a mask for the transform size. The transform size mask is set to
  282. // be correct for a 64x64 prediction block size. Mask to match the size of
  283. // the block we are working on and then shift it into place.
  284. aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY;
  285. leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY;
  286. if (buildUv != 0)
  287. {
  288. aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
  289. leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
  290. }
  291. // Try to determine what to do with the internal 4x4 block boundaries. These
  292. // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
  293. // internal ones can be skipped and don't depend on the prediction block size.
  294. if (txSizeY == TxSize.Tx4x4)
  295. {
  296. int4X4Y |= SizeMask[(int)blockSize] << shiftY;
  297. }
  298. if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
  299. {
  300. int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
  301. }
  302. }
  303. public static unsafe void ResetLfm(ref Vp9Common cm)
  304. {
  305. if (cm.Lf.FilterLevel != 0)
  306. {
  307. MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
  308. }
  309. }
  310. private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
  311. {
  312. int lvl;
  313. // For each possible value for the loop filter fill out limits
  314. for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
  315. {
  316. // Set loop filter parameters that control sharpness.
  317. int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));
  318. if (sharpnessLvl > 0)
  319. {
  320. if (blockInsideLimit > (9 - sharpnessLvl))
  321. {
  322. blockInsideLimit = (9 - sharpnessLvl);
  323. }
  324. }
  325. if (blockInsideLimit < 1)
  326. {
  327. blockInsideLimit = 1;
  328. }
  329. lfi.Lfthr[lvl].Lim.ToSpan().Fill((byte)blockInsideLimit);
  330. lfi.Lfthr[lvl].Mblim.ToSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
  331. }
  332. }
  333. public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
  334. {
  335. int segId;
  336. // nShift is the multiplier for lfDeltas
  337. // the multiplier is 1 for when filterLvl is between 0 and 31;
  338. // 2 when filterLvl is between 32 and 63
  339. int scale = 1 << (defaultFiltLvl >> 5);
  340. ref LoopFilterInfoN lfi = ref cm.LfInfo;
  341. ref Types.LoopFilter lf = ref cm.Lf;
  342. ref Segmentation seg = ref cm.Seg;
  343. // Update limits if sharpness has changed
  344. if (lf.LastSharpnessLevel != lf.SharpnessLevel)
  345. {
  346. UpdateSharpness(ref lfi, lf.SharpnessLevel);
  347. lf.LastSharpnessLevel = lf.SharpnessLevel;
  348. }
  349. for (segId = 0; segId < Constants.MaxSegments; segId++)
  350. {
  351. int lvlSeg = defaultFiltLvl;
  352. if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
  353. {
  354. int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
  355. lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
  356. }
  357. if (!lf.ModeRefDeltaEnabled)
  358. {
  359. // We could get rid of this if we assume that deltas are set to
  360. // zero when not in use; encoder always uses deltas
  361. MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].ToSpan()).Fill((byte)lvlSeg);
  362. }
  363. else
  364. {
  365. int refr, mode;
  366. int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
  367. lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
  368. for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
  369. {
  370. for (mode = 0; mode < MaxModeLfDeltas; ++mode)
  371. {
  372. int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
  373. lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
  374. }
  375. }
  376. }
  377. }
  378. }
  379. }
  380. }