| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 |
- using Ryujinx.Common.Memory;
- using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
- using Ryujinx.Graphics.Nvdec.Vp9.Types;
- using Ryujinx.Graphics.Video;
- using System;
- using System.Diagnostics;
- using System.Runtime.InteropServices;
- using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
- namespace Ryujinx.Graphics.Nvdec.Vp9
- {
- internal static class Detokenize
- {
- private const int EobContextNode = 0;
- private const int ZeroContextNode = 1;
- private const int OneContextNode = 2;
- private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
- {
- const int maxNeighbors = 2;
- return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
- }
- private static int ReadCoeff(
- ref Reader r,
- ReadOnlySpan<byte> probs,
- int n,
- ref ulong value,
- ref int count,
- ref uint range)
- {
- int i, val = 0;
- for (i = 0; i < n; ++i)
- {
- val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
- }
- return val;
- }
- private static int DecodeCoefs(
- ref MacroBlockD xd,
- PlaneType type,
- Span<int> dqcoeff,
- TxSize txSize,
- ref Array2<short> dq,
- int ctx,
- ReadOnlySpan<short> scan,
- ReadOnlySpan<short> nb,
- ref Reader r)
- {
- ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
- int maxEob = 16 << ((int)txSize << 1);
- ref Vp9EntropyProbs fc = ref xd.Fc.Value;
- int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
- int band, c = 0;
- ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
- Span<byte> tokenCache = stackalloc byte[32 * 32];
- ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
- int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
- int v;
- short dqv = dq[0];
- ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
- ? Luts.Vp9Cat6ProbHigh12
- : (xd.Bd == 10) ? Luts.Vp9Cat6ProbHigh12.Slice(2) : Luts.Vp9Cat6Prob;
- int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
- // Keep value, range, and count as locals. The compiler produces better
- // results with the locals than using r directly.
- ulong value = r.Value;
- uint range = r.Range;
- int count = r.Count;
- while (c < maxEob)
- {
- int val = -1;
- band = bandTranslate[0];
- bandTranslate = bandTranslate.Slice(1);
- ref Array3<byte> prob = ref coefProbs[band][ctx];
- if (!xd.Counts.IsNull)
- {
- ++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
- }
- if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
- {
- if (!xd.Counts.IsNull)
- {
- ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
- }
- break;
- }
- while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
- {
- if (!xd.Counts.IsNull)
- {
- ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
- }
- dqv = dq[1];
- tokenCache[scan[c]] = 0;
- ++c;
- if (c >= maxEob)
- {
- r.Value = value;
- r.Range = range;
- r.Count = count;
- return c; // Zero tokens at the end (no eob token)
- }
- ctx = GetCoefContext(nb, tokenCache, c);
- band = bandTranslate[0];
- bandTranslate = bandTranslate.Slice(1);
- prob = ref coefProbs[band][ctx];
- }
- if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
- {
- ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
- if (!xd.Counts.IsNull)
- {
- ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
- }
- if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
- {
- if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
- {
- tokenCache[scan[c]] = 5;
- if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
- {
- if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
- {
- val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
- }
- else
- {
- val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
- }
- }
- else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
- {
- val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
- }
- else
- {
- val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
- }
- }
- else
- {
- tokenCache[scan[c]] = 4;
- if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
- {
- val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
- }
- else
- {
- val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
- }
- }
- // Val may use 18-bits
- v = (int)(((long)val * dqv) >> dqShift);
- }
- else
- {
- if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
- {
- tokenCache[scan[c]] = 3;
- v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
- }
- else
- {
- tokenCache[scan[c]] = 2;
- v = (2 * dqv) >> dqShift;
- }
- }
- }
- else
- {
- if (!xd.Counts.IsNull)
- {
- ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
- }
- tokenCache[scan[c]] = 1;
- v = dqv >> dqShift;
- }
- dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
- ++c;
- ctx = GetCoefContext(nb, tokenCache, c);
- dqv = dq[1];
- }
- r.Value = value;
- r.Range = range;
- r.Count = count;
- return c;
- }
- private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
- {
- if (xd.MaxBlocksWide != 0)
- {
- if (txSizeInBlocks + x > xd.MaxBlocksWide)
- {
- ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
- }
- }
- if (xd.MaxBlocksHigh != 0)
- {
- if (txSizeInBlocks + y > xd.MaxBlocksHigh)
- {
- ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
- }
- }
- }
- private static PlaneType GetPlaneType(int plane)
- {
- return (PlaneType)(plane > 0 ? 1 : 0);
- }
- public static int DecodeBlockTokens(
- ref TileWorkerData twd,
- int plane,
- Luts.ScanOrder sc,
- int x,
- int y,
- TxSize txSize,
- int segId)
- {
- ref Reader r = ref twd.BitReader;
- ref MacroBlockD xd = ref twd.Xd;
- ref MacroBlockDPlane pd = ref xd.Plane[plane];
- ref Array2<short> dequant = ref pd.SegDequant[segId];
- int eob;
- Span<sbyte> a = pd.AboveContext.AsSpan().Slice(x);
- Span<sbyte> l = pd.LeftContext.AsSpan().Slice(y);
- int ctx;
- int ctxShiftA = 0;
- int ctxShiftL = 0;
- switch (txSize)
- {
- case TxSize.Tx4x4:
- ctx = a[0] != 0 ? 1 : 0;
- ctx += l[0] != 0 ? 1 : 0;
- eob = DecodeCoefs(
- ref xd,
- GetPlaneType(plane),
- pd.DqCoeff.AsSpan(),
- txSize,
- ref dequant,
- ctx,
- sc.Scan,
- sc.Neighbors,
- ref r);
- a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
- break;
- case TxSize.Tx8x8:
- GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
- ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
- ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
- eob = DecodeCoefs(
- ref xd,
- GetPlaneType(plane),
- pd.DqCoeff.AsSpan(),
- txSize,
- ref dequant,
- ctx,
- sc.Scan,
- sc.Neighbors,
- ref r);
- MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
- MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
- break;
- case TxSize.Tx16x16:
- GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
- ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
- ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
- eob = DecodeCoefs(
- ref xd,
- GetPlaneType(plane),
- pd.DqCoeff.AsSpan(),
- txSize,
- ref dequant,
- ctx,
- sc.Scan,
- sc.Neighbors,
- ref r);
- MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
- MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
- break;
- case TxSize.Tx32x32:
- GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
- // NOTE: Casting to ulong here is safe because the default memory
- // alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
- // boundaries.
- ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
- ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
- eob = DecodeCoefs(
- ref xd,
- GetPlaneType(plane),
- pd.DqCoeff.AsSpan(),
- txSize,
- ref dequant,
- ctx,
- sc.Scan,
- sc.Neighbors,
- ref r);
- MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
- MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
- break;
- default:
- Debug.Assert(false, "Invalid transform size.");
- eob = 0;
- break;
- }
- return eob;
- }
- }
- }
|