| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327 |
- using System;
- using System.Diagnostics;
- using System.Runtime.CompilerServices;
- using System.Runtime.Intrinsics;
- using System.Runtime.Intrinsics.X86;
- namespace Ryujinx.Graphics.Texture.Utils
- {
- static class BC67Utils
- {
- private static byte[][] _quantizationLut;
- private static byte[][] _quantizationLutNoPBit;
- static BC67Utils()
- {
- _quantizationLut = new byte[5][];
- _quantizationLutNoPBit = new byte[5][];
- for (int depth = 4; depth < 9; depth++)
- {
- byte[] lut = new byte[512];
- byte[] lutNoPBit = new byte[256];
- for (int i = 0; i < lut.Length; i++)
- {
- lut[i] = QuantizeComponentForLut((byte)i, depth, i >> 8);
- if (i < lutNoPBit.Length)
- {
- lutNoPBit[i] = QuantizeComponentForLut((byte)i, depth);
- }
- }
- _quantizationLut[depth - 4] = lut;
- _quantizationLutNoPBit[depth - 4] = lutNoPBit;
- }
- }
- public static (RgbaColor8, RgbaColor8) GetMinMaxColors(ReadOnlySpan<uint> tile, int w, int h)
- {
- if (Sse41.IsSupported && w == 4 && h == 4)
- {
- GetMinMaxColorsOneSubset4x4Sse41(tile, out RgbaColor8 minColor, out RgbaColor8 maxColor);
- return (minColor, maxColor);
- }
- else
- {
- RgbaColor8 minColor = new RgbaColor8(255, 255, 255, 255);
- RgbaColor8 maxColor = default;
- for (int i = 0; i < tile.Length; i++)
- {
- RgbaColor8 color = RgbaColor8.FromUInt32(tile[i]);
- minColor.R = Math.Min(minColor.R, color.R);
- minColor.G = Math.Min(minColor.G, color.G);
- minColor.B = Math.Min(minColor.B, color.B);
- minColor.A = Math.Min(minColor.A, color.A);
- maxColor.R = Math.Max(maxColor.R, color.R);
- maxColor.G = Math.Max(maxColor.G, color.G);
- maxColor.B = Math.Max(maxColor.B, color.B);
- maxColor.A = Math.Max(maxColor.A, color.A);
- }
- return (minColor, maxColor);
- }
- }
- public static void GetMinMaxColors(
- ReadOnlySpan<byte> partitionTable,
- ReadOnlySpan<uint> tile,
- int w,
- int h,
- Span<RgbaColor8> minColors,
- Span<RgbaColor8> maxColors,
- int subsetCount)
- {
- if (Sse41.IsSupported && w == 4 && h == 4)
- {
- if (subsetCount == 1)
- {
- GetMinMaxColorsOneSubset4x4Sse41(tile, out minColors[0], out maxColors[0]);
- return;
- }
- else if (subsetCount == 2)
- {
- GetMinMaxColorsTwoSubsets4x4Sse41(partitionTable, tile, minColors, maxColors);
- return;
- }
- }
- minColors.Fill(new RgbaColor8(255, 255, 255, 255));
- int i = 0;
- for (int ty = 0; ty < h; ty++)
- {
- for (int tx = 0; tx < w; tx++)
- {
- int subset = partitionTable[ty * w + tx];
- RgbaColor8 color = RgbaColor8.FromUInt32(tile[i++]);
- minColors[subset].R = Math.Min(minColors[subset].R, color.R);
- minColors[subset].G = Math.Min(minColors[subset].G, color.G);
- minColors[subset].B = Math.Min(minColors[subset].B, color.B);
- minColors[subset].A = Math.Min(minColors[subset].A, color.A);
- maxColors[subset].R = Math.Max(maxColors[subset].R, color.R);
- maxColors[subset].G = Math.Max(maxColors[subset].G, color.G);
- maxColors[subset].B = Math.Max(maxColors[subset].B, color.B);
- maxColors[subset].A = Math.Max(maxColors[subset].A, color.A);
- }
- }
- }
- private static unsafe void GetMinMaxColorsOneSubset4x4Sse41(ReadOnlySpan<uint> tile, out RgbaColor8 minColor, out RgbaColor8 maxColor)
- {
- Vector128<byte> min = Vector128<byte>.AllBitsSet;
- Vector128<byte> max = Vector128<byte>.Zero;
- Vector128<byte> row0, row1, row2, row3;
- fixed (uint* pTile = tile)
- {
- row0 = Sse2.LoadVector128(pTile).AsByte();
- row1 = Sse2.LoadVector128(pTile + 4).AsByte();
- row2 = Sse2.LoadVector128(pTile + 8).AsByte();
- row3 = Sse2.LoadVector128(pTile + 12).AsByte();
- }
- min = Sse2.Min(min, row0);
- max = Sse2.Max(max, row0);
- min = Sse2.Min(min, row1);
- max = Sse2.Max(max, row1);
- min = Sse2.Min(min, row2);
- max = Sse2.Max(max, row2);
- min = Sse2.Min(min, row3);
- max = Sse2.Max(max, row3);
- minColor = HorizontalMin(min);
- maxColor = HorizontalMax(max);
- }
- private static unsafe void GetMinMaxColorsTwoSubsets4x4Sse41(
- ReadOnlySpan<byte> partitionTable,
- ReadOnlySpan<uint> tile,
- Span<RgbaColor8> minColors,
- Span<RgbaColor8> maxColors)
- {
- Vector128<byte> partitionMask;
- fixed (byte* pPartitionTable = partitionTable)
- {
- partitionMask = Sse2.LoadVector128(pPartitionTable);
- }
- Vector128<byte> subset0Mask = Sse2.CompareEqual(partitionMask, Vector128<byte>.Zero);
- Vector128<byte> subset0MaskRep16Low = Sse2.UnpackLow(subset0Mask, subset0Mask);
- Vector128<byte> subset0MaskRep16High = Sse2.UnpackHigh(subset0Mask, subset0Mask);
- Vector128<byte> subset0Mask0 = Sse2.UnpackLow(subset0MaskRep16Low.AsInt16(), subset0MaskRep16Low.AsInt16()).AsByte();
- Vector128<byte> subset0Mask1 = Sse2.UnpackHigh(subset0MaskRep16Low.AsInt16(), subset0MaskRep16Low.AsInt16()).AsByte();
- Vector128<byte> subset0Mask2 = Sse2.UnpackLow(subset0MaskRep16High.AsInt16(), subset0MaskRep16High.AsInt16()).AsByte();
- Vector128<byte> subset0Mask3 = Sse2.UnpackHigh(subset0MaskRep16High.AsInt16(), subset0MaskRep16High.AsInt16()).AsByte();
- Vector128<byte> min0 = Vector128<byte>.AllBitsSet;
- Vector128<byte> min1 = Vector128<byte>.AllBitsSet;
- Vector128<byte> max0 = Vector128<byte>.Zero;
- Vector128<byte> max1 = Vector128<byte>.Zero;
- Vector128<byte> row0, row1, row2, row3;
- fixed (uint* pTile = tile)
- {
- row0 = Sse2.LoadVector128(pTile).AsByte();
- row1 = Sse2.LoadVector128(pTile + 4).AsByte();
- row2 = Sse2.LoadVector128(pTile + 8).AsByte();
- row3 = Sse2.LoadVector128(pTile + 12).AsByte();
- }
- min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row0, subset0Mask0));
- min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row1, subset0Mask1));
- min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row2, subset0Mask2));
- min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row3, subset0Mask3));
- min1 = Sse2.Min(min1, Sse2.Or(row0, subset0Mask0));
- min1 = Sse2.Min(min1, Sse2.Or(row1, subset0Mask1));
- min1 = Sse2.Min(min1, Sse2.Or(row2, subset0Mask2));
- min1 = Sse2.Min(min1, Sse2.Or(row3, subset0Mask3));
- max0 = Sse2.Max(max0, Sse2.And(row0, subset0Mask0));
- max0 = Sse2.Max(max0, Sse2.And(row1, subset0Mask1));
- max0 = Sse2.Max(max0, Sse2.And(row2, subset0Mask2));
- max0 = Sse2.Max(max0, Sse2.And(row3, subset0Mask3));
- max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask0, row0));
- max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask1, row1));
- max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask2, row2));
- max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask3, row3));
- minColors[0] = HorizontalMin(min0);
- minColors[1] = HorizontalMin(min1);
- maxColors[0] = HorizontalMax(max0);
- maxColors[1] = HorizontalMax(max1);
- }
- private static RgbaColor8 HorizontalMin(Vector128<byte> x)
- {
- x = Sse2.Min(x, Sse2.Shuffle(x.AsInt32(), 0x31).AsByte());
- x = Sse2.Min(x, Sse2.Shuffle(x.AsInt32(), 2).AsByte());
- return RgbaColor8.FromUInt32(x.AsUInt32().GetElement(0));
- }
- private static RgbaColor8 HorizontalMax(Vector128<byte> x)
- {
- x = Sse2.Max(x, Sse2.Shuffle(x.AsInt32(), 0x31).AsByte());
- x = Sse2.Max(x, Sse2.Shuffle(x.AsInt32(), 2).AsByte());
- return RgbaColor8.FromUInt32(x.AsUInt32().GetElement(0));
- }
- public static int SelectIndices(
- ReadOnlySpan<uint> values,
- uint endPoint0,
- uint endPoint1,
- int pBit0,
- int pBit1,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- uint alphaMask)
- {
- if (Sse41.IsSupported)
- {
- if (indexBitCount == 2)
- {
- return Select2BitIndicesSse41(
- values,
- endPoint0,
- endPoint1,
- pBit0,
- pBit1,
- indexBitCount,
- indexCount,
- colorDepth,
- alphaDepth,
- alphaMask);
- }
- else if (indexBitCount == 3)
- {
- return Select3BitIndicesSse41(
- values,
- endPoint0,
- endPoint1,
- pBit0,
- pBit1,
- indexBitCount,
- indexCount,
- colorDepth,
- alphaDepth,
- alphaMask);
- }
- else if (indexBitCount == 4)
- {
- return Select4BitIndicesOneSubsetSse41(
- values,
- endPoint0,
- endPoint1,
- pBit0,
- pBit1,
- indexBitCount,
- indexCount,
- colorDepth,
- alphaDepth,
- alphaMask);
- }
- }
- return SelectIndicesFallback(
- values,
- endPoint0,
- endPoint1,
- pBit0,
- pBit1,
- indexBitCount,
- indexCount,
- colorDepth,
- alphaDepth,
- alphaMask);
- }
- private static unsafe int Select2BitIndicesSse41(
- ReadOnlySpan<uint> values,
- uint endPoint0,
- uint endPoint1,
- int pBit0,
- int pBit1,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- uint alphaMask)
- {
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- int errorSum = 0;
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1);
- Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep);
- Vector128<byte> rWeights;
- Vector128<byte> lWeights;
- fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0])
- {
- rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte();
- lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte();
- }
- Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights);
- Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte();
- Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte()));
- Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte()));
- for (int i = 0; i < values.Length; i++)
- {
- uint c = values[i] | alphaMask;
- Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte());
- Vector128<short> delta0 = Sse2.Subtract(color, pal0);
- Vector128<short> delta1 = Sse2.Subtract(color, pal1);
- Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0);
- Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1);
- Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1);
- Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum01);
- Vector128<ushort> min = Sse41.MinHorizontal(delta);
- ushort error = min.GetElement(0);
- errorSum += error;
- }
- return errorSum;
- }
- private static unsafe int Select3BitIndicesSse41(
- ReadOnlySpan<uint> values,
- uint endPoint0,
- uint endPoint1,
- int pBit0,
- int pBit1,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- uint alphaMask)
- {
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- int errorSum = 0;
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1);
- Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep);
- Vector128<byte> rWeights;
- Vector128<byte> lWeights;
- fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1])
- {
- rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte();
- lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte();
- }
- Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights);
- Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte();
- Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte();
- Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte()));
- Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte()));
- Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte()));
- Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte()));
- for (int i = 0; i < values.Length; i++)
- {
- uint c = values[i] | alphaMask;
- Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte());
- Vector128<short> delta0 = Sse2.Subtract(color, pal0);
- Vector128<short> delta1 = Sse2.Subtract(color, pal1);
- Vector128<short> delta2 = Sse2.Subtract(color, pal2);
- Vector128<short> delta3 = Sse2.Subtract(color, pal3);
- Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0);
- Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1);
- Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2);
- Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3);
- Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1);
- Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3);
- Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23);
- Vector128<ushort> min = Sse41.MinHorizontal(delta);
- ushort error = min.GetElement(0);
- errorSum += error;
- }
- return errorSum;
- }
- private static unsafe int Select4BitIndicesOneSubsetSse41(
- ReadOnlySpan<uint> values,
- uint endPoint0,
- uint endPoint1,
- int pBit0,
- int pBit1,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- uint alphaMask)
- {
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- int errorSum = 0;
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1);
- Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep);
- Vector128<byte> rWeights;
- Vector128<byte> lWeights;
- fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2])
- {
- rWeights = Sse2.LoadVector128(pWeights);
- lWeights = Sse2.LoadVector128(pInvWeights);
- }
- Vector128<byte> iWeightsLow = Sse2.UnpackLow(lWeights, rWeights);
- Vector128<byte> iWeightsHigh = Sse2.UnpackHigh(lWeights, rWeights);
- Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte();
- Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte();
- Vector128<byte> iWeights45 = Sse2.UnpackLow(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte();
- Vector128<byte> iWeights67 = Sse2.UnpackHigh(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte();
- Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<byte> iWeights4 = Sse2.UnpackLow(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte();
- Vector128<byte> iWeights5 = Sse2.UnpackHigh(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte();
- Vector128<byte> iWeights6 = Sse2.UnpackLow(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte();
- Vector128<byte> iWeights7 = Sse2.UnpackHigh(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte();
- Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte()));
- Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte()));
- Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte()));
- Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte()));
- Vector128<short> pal4 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights4.AsSByte()));
- Vector128<short> pal5 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights5.AsSByte()));
- Vector128<short> pal6 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights6.AsSByte()));
- Vector128<short> pal7 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights7.AsSByte()));
- for (int i = 0; i < values.Length; i++)
- {
- uint c = values[i] | alphaMask;
- Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte());
- Vector128<short> delta0 = Sse2.Subtract(color, pal0);
- Vector128<short> delta1 = Sse2.Subtract(color, pal1);
- Vector128<short> delta2 = Sse2.Subtract(color, pal2);
- Vector128<short> delta3 = Sse2.Subtract(color, pal3);
- Vector128<short> delta4 = Sse2.Subtract(color, pal4);
- Vector128<short> delta5 = Sse2.Subtract(color, pal5);
- Vector128<short> delta6 = Sse2.Subtract(color, pal6);
- Vector128<short> delta7 = Sse2.Subtract(color, pal7);
- Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0);
- Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1);
- Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2);
- Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3);
- Vector128<int> deltaSum4 = Sse2.MultiplyAddAdjacent(delta4, delta4);
- Vector128<int> deltaSum5 = Sse2.MultiplyAddAdjacent(delta5, delta5);
- Vector128<int> deltaSum6 = Sse2.MultiplyAddAdjacent(delta6, delta6);
- Vector128<int> deltaSum7 = Sse2.MultiplyAddAdjacent(delta7, delta7);
- Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1);
- Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3);
- Vector128<int> deltaSum45 = Ssse3.HorizontalAdd(deltaSum4, deltaSum5);
- Vector128<int> deltaSum67 = Ssse3.HorizontalAdd(deltaSum6, deltaSum7);
- Vector128<ushort> delta0123 = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23);
- Vector128<ushort> delta4567 = Sse41.PackUnsignedSaturate(deltaSum45, deltaSum67);
- Vector128<ushort> min0123 = Sse41.MinHorizontal(delta0123);
- Vector128<ushort> min4567 = Sse41.MinHorizontal(delta4567);
- ushort minPos0123 = min0123.GetElement(0);
- ushort minPos4567 = min4567.GetElement(0);
- if (minPos4567 < minPos0123)
- {
- errorSum += minPos4567;
- }
- else
- {
- errorSum += minPos0123;
- }
- }
- return errorSum;
- }
- private static int SelectIndicesFallback(
- ReadOnlySpan<uint> values,
- uint endPoint0,
- uint endPoint1,
- int pBit0,
- int pBit1,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- uint alphaMask)
- {
- int errorSum = 0;
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- Span<uint> palette = stackalloc uint[indexCount];
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1);
- Unsafe.As<RgbaColor8, uint>(ref c0) |= alphaMaskForPalette;
- Unsafe.As<RgbaColor8, uint>(ref c1) |= alphaMaskForPalette;
- palette[0] = c0.ToUInt32();
- palette[indexCount - 1] = c1.ToUInt32();
- for (int j = 1; j < indexCount - 1; j++)
- {
- palette[j] = Interpolate(c0, c1, j, indexBitCount).ToUInt32();
- }
- for (int i = 0; i < values.Length; i++)
- {
- uint color = values[i] | alphaMask;
- int bestMatchScore = int.MaxValue;
- int bestMatchIndex = 0;
- for (int j = 0; j < indexCount; j++)
- {
- int score = SquaredDifference(
- RgbaColor8.FromUInt32(color).GetColor32(),
- RgbaColor8.FromUInt32(palette[j]).GetColor32());
- if (score < bestMatchScore)
- {
- bestMatchScore = score;
- bestMatchIndex = j;
- }
- }
- errorSum += bestMatchScore;
- }
- return errorSum;
- }
- public static int SelectIndices(
- ReadOnlySpan<uint> tile,
- int w,
- int h,
- ReadOnlySpan<uint> endPoints0,
- ReadOnlySpan<uint> endPoints1,
- ReadOnlySpan<int> pBitValues,
- Span<byte> indices,
- int subsetCount,
- int partition,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- int pBits,
- uint alphaMask)
- {
- if (Sse41.IsSupported)
- {
- if (indexBitCount == 2)
- {
- return Select2BitIndicesSse41(
- tile,
- w,
- h,
- endPoints0,
- endPoints1,
- pBitValues,
- indices,
- subsetCount,
- partition,
- colorDepth,
- alphaDepth,
- pBits,
- alphaMask);
- }
- else if (indexBitCount == 3)
- {
- return Select3BitIndicesSse41(
- tile,
- w,
- h,
- endPoints0,
- endPoints1,
- pBitValues,
- indices,
- subsetCount,
- partition,
- colorDepth,
- alphaDepth,
- pBits,
- alphaMask);
- }
- else if (indexBitCount == 4)
- {
- Debug.Assert(subsetCount == 1);
- return Select4BitIndicesOneSubsetSse41(
- tile,
- w,
- h,
- endPoints0[0],
- endPoints1[0],
- pBitValues,
- indices,
- partition,
- colorDepth,
- alphaDepth,
- pBits,
- alphaMask);
- }
- }
- return SelectIndicesFallback(
- tile,
- w,
- h,
- endPoints0,
- endPoints1,
- pBitValues,
- indices,
- subsetCount,
- partition,
- indexBitCount,
- indexCount,
- colorDepth,
- alphaDepth,
- pBits,
- alphaMask);
- }
- private static unsafe int Select2BitIndicesSse41(
- ReadOnlySpan<uint> tile,
- int w,
- int h,
- ReadOnlySpan<uint> endPoints0,
- ReadOnlySpan<uint> endPoints1,
- ReadOnlySpan<int> pBitValues,
- Span<byte> indices,
- int subsetCount,
- int partition,
- int colorDepth,
- int alphaDepth,
- int pBits,
- uint alphaMask)
- {
- byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition];
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- int errorSum = 0;
- for (int subset = 0; subset < subsetCount; subset++)
- {
- int pBit0 = -1, pBit1 = -1;
- if (pBits == subsetCount)
- {
- pBit0 = pBit1 = pBitValues[subset];
- }
- else if (pBits != 0)
- {
- pBit0 = pBitValues[subset * 2];
- pBit1 = pBitValues[subset * 2 + 1];
- }
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1);
- Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep);
- Vector128<byte> rWeights;
- Vector128<byte> lWeights;
- fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0])
- {
- rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte();
- lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte();
- }
- Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights);
- Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte();
- Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte()));
- Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte()));
- int i = 0;
- for (int ty = 0; ty < h; ty++)
- {
- for (int tx = 0; tx < w; tx++, i++)
- {
- int tileOffset = ty * 4 + tx;
- if (partitionTable[tileOffset] != subset)
- {
- continue;
- }
- uint c = tile[i] | alphaMask;
- Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte());
- Vector128<short> delta0 = Sse2.Subtract(color, pal0);
- Vector128<short> delta1 = Sse2.Subtract(color, pal1);
- Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0);
- Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1);
- Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1);
- Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum01);
- Vector128<ushort> min = Sse41.MinHorizontal(delta);
- uint minPos = min.AsUInt32().GetElement(0);
- ushort error = (ushort)minPos;
- uint index = minPos >> 16;
- indices[tileOffset] = (byte)index;
- errorSum += error;
- }
- }
- }
- return errorSum;
- }
- private static unsafe int Select3BitIndicesSse41(
- ReadOnlySpan<uint> tile,
- int w,
- int h,
- ReadOnlySpan<uint> endPoints0,
- ReadOnlySpan<uint> endPoints1,
- ReadOnlySpan<int> pBitValues,
- Span<byte> indices,
- int subsetCount,
- int partition,
- int colorDepth,
- int alphaDepth,
- int pBits,
- uint alphaMask)
- {
- byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition];
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- int errorSum = 0;
- for (int subset = 0; subset < subsetCount; subset++)
- {
- int pBit0 = -1, pBit1 = -1;
- if (pBits == subsetCount)
- {
- pBit0 = pBit1 = pBitValues[subset];
- }
- else if (pBits != 0)
- {
- pBit0 = pBitValues[subset * 2];
- pBit1 = pBitValues[subset * 2 + 1];
- }
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1);
- Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep);
- Vector128<byte> rWeights;
- Vector128<byte> lWeights;
- fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1])
- {
- rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte();
- lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte();
- }
- Vector128<byte> iWeights = Sse2.UnpackLow(lWeights, rWeights);
- Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte();
- Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte();
- Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte()));
- Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte()));
- Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte()));
- Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte()));
- int i = 0;
- for (int ty = 0; ty < h; ty++)
- {
- for (int tx = 0; tx < w; tx++, i++)
- {
- int tileOffset = ty * 4 + tx;
- if (partitionTable[tileOffset] != subset)
- {
- continue;
- }
- uint c = tile[i] | alphaMask;
- Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte());
- Vector128<short> delta0 = Sse2.Subtract(color, pal0);
- Vector128<short> delta1 = Sse2.Subtract(color, pal1);
- Vector128<short> delta2 = Sse2.Subtract(color, pal2);
- Vector128<short> delta3 = Sse2.Subtract(color, pal3);
- Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0);
- Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1);
- Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2);
- Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3);
- Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1);
- Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3);
- Vector128<ushort> delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23);
- Vector128<ushort> min = Sse41.MinHorizontal(delta);
- uint minPos = min.AsUInt32().GetElement(0);
- ushort error = (ushort)minPos;
- uint index = minPos >> 16;
- indices[tileOffset] = (byte)index;
- errorSum += error;
- }
- }
- }
- return errorSum;
- }
- private static unsafe int Select4BitIndicesOneSubsetSse41(
- ReadOnlySpan<uint> tile,
- int w,
- int h,
- uint endPoint0,
- uint endPoint1,
- ReadOnlySpan<int> pBitValues,
- Span<byte> indices,
- int partition,
- int colorDepth,
- int alphaDepth,
- int pBits,
- uint alphaMask)
- {
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- int errorSum = 0;
- int pBit0 = -1, pBit1 = -1;
- if (pBits != 0)
- {
- pBit0 = pBitValues[0];
- pBit1 = pBitValues[1];
- }
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1);
- Vector128<byte> c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte();
- Vector128<byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep);
- Vector128<byte> rWeights;
- Vector128<byte> lWeights;
- fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2])
- {
- rWeights = Sse2.LoadVector128(pWeights);
- lWeights = Sse2.LoadVector128(pInvWeights);
- }
- Vector128<byte> iWeightsLow = Sse2.UnpackLow(lWeights, rWeights);
- Vector128<byte> iWeightsHigh = Sse2.UnpackHigh(lWeights, rWeights);
- Vector128<byte> iWeights01 = Sse2.UnpackLow(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte();
- Vector128<byte> iWeights23 = Sse2.UnpackHigh(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte();
- Vector128<byte> iWeights45 = Sse2.UnpackLow(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte();
- Vector128<byte> iWeights67 = Sse2.UnpackHigh(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte();
- Vector128<byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte();
- Vector128<byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
- Vector128<byte> iWeights4 = Sse2.UnpackLow(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte();
- Vector128<byte> iWeights5 = Sse2.UnpackHigh(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte();
- Vector128<byte> iWeights6 = Sse2.UnpackLow(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte();
- Vector128<byte> iWeights7 = Sse2.UnpackHigh(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte();
- Vector128<short> pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte()));
- Vector128<short> pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte()));
- Vector128<short> pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte()));
- Vector128<short> pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte()));
- Vector128<short> pal4 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights4.AsSByte()));
- Vector128<short> pal5 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights5.AsSByte()));
- Vector128<short> pal6 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights6.AsSByte()));
- Vector128<short> pal7 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights7.AsSByte()));
- int i = 0;
- for (int ty = 0; ty < h; ty++)
- {
- for (int tx = 0; tx < w; tx++, i++)
- {
- uint c = tile[i] | alphaMask;
- Vector128<short> color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte());
- Vector128<short> delta0 = Sse2.Subtract(color, pal0);
- Vector128<short> delta1 = Sse2.Subtract(color, pal1);
- Vector128<short> delta2 = Sse2.Subtract(color, pal2);
- Vector128<short> delta3 = Sse2.Subtract(color, pal3);
- Vector128<short> delta4 = Sse2.Subtract(color, pal4);
- Vector128<short> delta5 = Sse2.Subtract(color, pal5);
- Vector128<short> delta6 = Sse2.Subtract(color, pal6);
- Vector128<short> delta7 = Sse2.Subtract(color, pal7);
- Vector128<int> deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0);
- Vector128<int> deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1);
- Vector128<int> deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2);
- Vector128<int> deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3);
- Vector128<int> deltaSum4 = Sse2.MultiplyAddAdjacent(delta4, delta4);
- Vector128<int> deltaSum5 = Sse2.MultiplyAddAdjacent(delta5, delta5);
- Vector128<int> deltaSum6 = Sse2.MultiplyAddAdjacent(delta6, delta6);
- Vector128<int> deltaSum7 = Sse2.MultiplyAddAdjacent(delta7, delta7);
- Vector128<int> deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1);
- Vector128<int> deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3);
- Vector128<int> deltaSum45 = Ssse3.HorizontalAdd(deltaSum4, deltaSum5);
- Vector128<int> deltaSum67 = Ssse3.HorizontalAdd(deltaSum6, deltaSum7);
- Vector128<ushort> delta0123 = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23);
- Vector128<ushort> delta4567 = Sse41.PackUnsignedSaturate(deltaSum45, deltaSum67);
- Vector128<ushort> min0123 = Sse41.MinHorizontal(delta0123);
- Vector128<ushort> min4567 = Sse41.MinHorizontal(delta4567);
- uint minPos0123 = min0123.AsUInt32().GetElement(0);
- uint minPos4567 = min4567.AsUInt32().GetElement(0);
- if ((ushort)minPos4567 < (ushort)minPos0123)
- {
- errorSum += (ushort)minPos4567;
- indices[ty * 4 + tx] = (byte)(8 + (minPos4567 >> 16));
- }
- else
- {
- errorSum += (ushort)minPos0123;
- indices[ty * 4 + tx] = (byte)(minPos0123 >> 16);
- }
- }
- }
- return errorSum;
- }
- private static Vector128<short> ShiftRoundToNearest(Vector128<short> x)
- {
- return Sse2.ShiftRightLogical(Sse2.Add(x, Vector128.Create((short)32)), 6);
- }
- private static int SelectIndicesFallback(
- ReadOnlySpan<uint> tile,
- int w,
- int h,
- ReadOnlySpan<uint> endPoints0,
- ReadOnlySpan<uint> endPoints1,
- ReadOnlySpan<int> pBitValues,
- Span<byte> indices,
- int subsetCount,
- int partition,
- int indexBitCount,
- int indexCount,
- int colorDepth,
- int alphaDepth,
- int pBits,
- uint alphaMask)
- {
- int errorSum = 0;
- uint alphaMaskForPalette = alphaMask;
- if (alphaDepth == 0)
- {
- alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32();
- }
- Span<uint> palette = stackalloc uint[subsetCount * indexCount];
- for (int subset = 0; subset < subsetCount; subset++)
- {
- int palBase = subset * indexCount;
- int pBit0 = -1, pBit1 = -1;
- if (pBits == subsetCount)
- {
- pBit0 = pBit1 = pBitValues[subset];
- }
- else if (pBits != 0)
- {
- pBit0 = pBitValues[subset * 2];
- pBit1 = pBitValues[subset * 2 + 1];
- }
- RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0);
- RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1);
- Unsafe.As<RgbaColor8, uint>(ref c0) |= alphaMaskForPalette;
- Unsafe.As<RgbaColor8, uint>(ref c1) |= alphaMaskForPalette;
- palette[palBase + 0] = c0.ToUInt32();
- palette[palBase + indexCount - 1] = c1.ToUInt32();
- for (int j = 1; j < indexCount - 1; j++)
- {
- palette[palBase + j] = Interpolate(c0, c1, j, indexBitCount).ToUInt32();
- }
- }
- int i = 0;
- for (int ty = 0; ty < h; ty++)
- {
- for (int tx = 0; tx < w; tx++)
- {
- int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx];
- uint color = tile[i++] | alphaMask;
- int bestMatchScore = int.MaxValue;
- int bestMatchIndex = 0;
- for (int j = 0; j < indexCount; j++)
- {
- int score = SquaredDifference(
- RgbaColor8.FromUInt32(color).GetColor32(),
- RgbaColor8.FromUInt32(palette[subset * indexCount + j]).GetColor32());
- if (score < bestMatchScore)
- {
- bestMatchScore = score;
- bestMatchIndex = j;
- }
- }
- indices[ty * 4 + tx] = (byte)bestMatchIndex;
- errorSum += bestMatchScore;
- }
- }
- return errorSum;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static int SquaredDifference(RgbaColor32 color1, RgbaColor32 color2)
- {
- RgbaColor32 delta = color1 - color2;
- return RgbaColor32.Dot(delta, delta);
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RgbaColor8 Interpolate(RgbaColor8 color1, RgbaColor8 color2, int weightIndex, int indexBitCount)
- {
- return Interpolate(color1.GetColor32(), color2.GetColor32(), weightIndex, indexBitCount).GetColor8();
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RgbaColor32 Interpolate(RgbaColor32 color1, RgbaColor32 color2, int weightIndex, int indexBitCount)
- {
- Debug.Assert(indexBitCount >= 2 && indexBitCount <= 4);
- int weight = (((weightIndex << 7) / ((1 << indexBitCount) - 1)) + 1) >> 1;
- RgbaColor32 weightV = new RgbaColor32(weight);
- RgbaColor32 invWeightV = new RgbaColor32(64 - weight);
- return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RgbaColor32 Interpolate(
- RgbaColor32 color1,
- RgbaColor32 color2,
- int colorWeightIndex,
- int alphaWeightIndex,
- int colorIndexBitCount,
- int alphaIndexBitCount)
- {
- Debug.Assert(colorIndexBitCount >= 2 && colorIndexBitCount <= 4);
- Debug.Assert(alphaIndexBitCount >= 2 && alphaIndexBitCount <= 4);
- int colorWeight = BC67Tables.Weights[colorIndexBitCount - 2][colorWeightIndex];
- int alphaWeight = BC67Tables.Weights[alphaIndexBitCount - 2][alphaWeightIndex];
- RgbaColor32 weightV = new RgbaColor32(colorWeight);
- weightV.A = alphaWeight;
- RgbaColor32 invWeightV = new RgbaColor32(64) - weightV;
- return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6;
- }
- public static RgbaColor8 Quantize(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1)
- {
- if (alphaBits == 0)
- {
- int colorShift = 8 - colorBits;
- uint c;
- if (pBit >= 0)
- {
- byte[] lutColor = _quantizationLut[colorBits - 4];
- Debug.Assert(pBit <= 1);
- int high = pBit << 8;
- uint mask = (0xffu >> (colorBits + 1)) * 0x10101;
- c = lutColor[color.R | high];
- c |= (uint)lutColor[color.G | high] << 8;
- c |= (uint)lutColor[color.B | high] << 16;
- c <<= colorShift;
- c |= (c >> (colorBits + 1)) & mask;
- c |= ((uint)pBit * 0x10101) << (colorShift - 1);
- }
- else
- {
- byte[] lutColor = _quantizationLutNoPBit[colorBits - 4];
- uint mask = (0xffu >> colorBits) * 0x10101;
- c = lutColor[color.R];
- c |= (uint)lutColor[color.G] << 8;
- c |= (uint)lutColor[color.B] << 16;
- c <<= colorShift;
- c |= (c >> colorBits) & mask;
- }
- c |= (uint)color.A << 24;
- return RgbaColor8.FromUInt32(c);
- }
- return QuantizeFallback(color, colorBits, alphaBits, pBit);
- }
- private static RgbaColor8 QuantizeFallback(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1)
- {
- byte r = UnquantizeComponent(QuantizeComponent(color.R, colorBits, pBit), colorBits, pBit);
- byte g = UnquantizeComponent(QuantizeComponent(color.G, colorBits, pBit), colorBits, pBit);
- byte b = UnquantizeComponent(QuantizeComponent(color.B, colorBits, pBit), colorBits, pBit);
- byte a = alphaBits == 0 ? color.A : UnquantizeComponent(QuantizeComponent(color.A, alphaBits, pBit), alphaBits, pBit);
- return new RgbaColor8(r, g, b, a);
- }
- public static byte QuantizeComponent(byte component, int bits, int pBit = -1)
- {
- return pBit >= 0 ? _quantizationLut[bits - 4][component | (pBit << 8)] : _quantizationLutNoPBit[bits - 4][component];
- }
- private static byte QuantizeComponentForLut(byte component, int bits, int pBit = -1)
- {
- int shift = 8 - bits;
- int fill = component >> bits;
- if (pBit >= 0)
- {
- Debug.Assert(pBit <= 1);
- fill >>= 1;
- fill |= pBit << (shift - 1);
- }
- int q1 = component >> shift;
- int q2 = Math.Max(q1 - 1, 0);
- int q3 = Math.Min(q1 + 1, (1 << bits) - 1);
- int delta1 = FastAbs(((q1 << shift) | fill) - component);
- int delta2 = component - ((q2 << shift) | fill);
- int delta3 = ((q3 << shift) | fill) - component;
- if (delta1 < delta2 && delta1 < delta3)
- {
- return (byte)q1;
- }
- else if (delta2 < delta3)
- {
- return (byte)q2;
- }
- else
- {
- return (byte)q3;
- }
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static int FastAbs(int x)
- {
- int sign = x >> 31;
- return (x + sign) ^ sign;
- }
- private static byte UnquantizeComponent(byte component, int bits, int pBit)
- {
- int shift = 8 - bits;
- int value = component << shift;
- if (pBit >= 0)
- {
- Debug.Assert(pBit <= 1);
- value |= value >> (bits + 1);
- value |= pBit << (shift - 1);
- }
- else
- {
- value |= value >> bits;
- }
- return (byte)value;
- }
- }
- }
|