|
|
@@ -1,483 +1,1140 @@
|
|
|
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
|
|
+using System;
|
|
|
using System.Collections.Generic;
|
|
|
+using System.Linq;
|
|
|
|
|
|
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
|
|
-using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
|
|
|
|
|
|
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|
|
{
|
|
|
static class GlobalToStorage
|
|
|
{
|
|
|
- private struct SearchResult
|
|
|
+ private const int DriverReservedCb = 0;
|
|
|
+
|
|
|
+ enum LsMemoryType
|
|
|
{
|
|
|
- public static SearchResult NotFound => new SearchResult(-1, 0);
|
|
|
- public bool Found => SbCbSlot != -1;
|
|
|
- public int SbCbSlot { get; }
|
|
|
- public int SbCbOffset { get; }
|
|
|
+ Local,
|
|
|
+ Shared
|
|
|
+ }
|
|
|
|
|
|
- public SearchResult(int sbCbSlot, int sbCbOffset)
|
|
|
+ private class GtsContext
|
|
|
+ {
|
|
|
+ private struct Entry
|
|
|
{
|
|
|
- SbCbSlot = sbCbSlot;
|
|
|
- SbCbOffset = sbCbOffset;
|
|
|
+ public readonly int FunctionId;
|
|
|
+ public readonly Instruction Inst;
|
|
|
+ public readonly StorageKind StorageKind;
|
|
|
+ public readonly bool IsMultiTarget;
|
|
|
+ public readonly IReadOnlyList<uint> TargetCbs;
|
|
|
+
|
|
|
+ public Entry(
|
|
|
+ int functionId,
|
|
|
+ Instruction inst,
|
|
|
+ StorageKind storageKind,
|
|
|
+ bool isMultiTarget,
|
|
|
+ IReadOnlyList<uint> targetCbs)
|
|
|
+ {
|
|
|
+ FunctionId = functionId;
|
|
|
+ Inst = inst;
|
|
|
+ StorageKind = storageKind;
|
|
|
+ IsMultiTarget = isMultiTarget;
|
|
|
+ TargetCbs = targetCbs;
|
|
|
+ }
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
|
|
|
- {
|
|
|
- int sbStart = GetStorageBaseCbOffset(config.Stage);
|
|
|
- int sbEnd = sbStart + StorageDescsSize;
|
|
|
+ private struct LsKey : IEquatable<LsKey>
|
|
|
+ {
|
|
|
+ public readonly Operand BaseOffset;
|
|
|
+ public readonly int ConstOffset;
|
|
|
+ public readonly LsMemoryType Type;
|
|
|
|
|
|
- int ubeStart = UbeBaseOffset;
|
|
|
- int ubeEnd = UbeBaseOffset + UbeDescsSize;
|
|
|
+ public LsKey(Operand baseOffset, int constOffset, LsMemoryType type)
|
|
|
+ {
|
|
|
+ BaseOffset = baseOffset;
|
|
|
+ ConstOffset = constOffset;
|
|
|
+ Type = type;
|
|
|
+ }
|
|
|
|
|
|
- for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
|
|
- {
|
|
|
- for (int index = 0; index < node.Value.SourcesCount; index++)
|
|
|
+ public override int GetHashCode()
|
|
|
+ {
|
|
|
+ return HashCode.Combine(BaseOffset, ConstOffset, Type);
|
|
|
+ }
|
|
|
+
|
|
|
+ public override bool Equals(object obj)
|
|
|
{
|
|
|
- Operand src = node.Value.GetSource(index);
|
|
|
+ return obj is LsKey other && Equals(other);
|
|
|
+ }
|
|
|
+
|
|
|
+ public bool Equals(LsKey other)
|
|
|
+ {
|
|
|
+ return other.BaseOffset == BaseOffset && other.ConstOffset == ConstOffset && other.Type == Type;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private readonly List<Entry> _entries;
|
|
|
+ private readonly Dictionary<LsKey, Dictionary<uint, SearchResult>> _sharedEntries;
|
|
|
+ private readonly HelperFunctionManager _hfm;
|
|
|
+
|
|
|
+ public GtsContext(HelperFunctionManager hfm)
|
|
|
+ {
|
|
|
+ _entries = new List<Entry>();
|
|
|
+ _sharedEntries = new Dictionary<LsKey, Dictionary<uint, SearchResult>>();
|
|
|
+ _hfm = hfm;
|
|
|
+ }
|
|
|
+
|
|
|
+ public int AddFunction(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, Function function)
|
|
|
+ {
|
|
|
+ int functionId = _hfm.AddFunction(function);
|
|
|
|
|
|
- int storageIndex = GetStorageIndex(src, sbStart, sbEnd);
|
|
|
+ _entries.Add(new Entry(functionId, baseOp.Inst, baseOp.StorageKind, isMultiTarget, targetCbs));
|
|
|
|
|
|
- if (storageIndex >= 0)
|
|
|
+ return functionId;
|
|
|
+ }
|
|
|
+
|
|
|
+ public bool TryGetFunctionId(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, out int functionId)
|
|
|
+ {
|
|
|
+ foreach (Entry entry in _entries)
|
|
|
+ {
|
|
|
+ if (entry.Inst != baseOp.Inst ||
|
|
|
+ entry.StorageKind != baseOp.StorageKind ||
|
|
|
+ entry.IsMultiTarget != isMultiTarget ||
|
|
|
+ entry.TargetCbs.Count != targetCbs.Count)
|
|
|
{
|
|
|
- sbUseMask |= 1 << storageIndex;
|
|
|
+ continue;
|
|
|
}
|
|
|
|
|
|
- if (config.Stage == ShaderStage.Compute)
|
|
|
- {
|
|
|
- int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd);
|
|
|
+ bool allEqual = true;
|
|
|
|
|
|
- if (constantIndex >= 0)
|
|
|
+ for (int index = 0; index < targetCbs.Count; index++)
|
|
|
+ {
|
|
|
+ if (targetCbs[index] != entry.TargetCbs[index])
|
|
|
{
|
|
|
- ubeUseMask |= 1 << constantIndex;
|
|
|
+ allEqual = false;
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ if (allEqual)
|
|
|
+ {
|
|
|
+ functionId = entry.FunctionId;
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ functionId = -1;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ public void AddMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, uint targetCb, SearchResult result)
|
|
|
+ {
|
|
|
+ LsKey key = new LsKey(baseOffset, constOffset, type);
|
|
|
+
|
|
|
+ if (!_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs))
|
|
|
+ {
|
|
|
+ // No entry with this base offset, create a new one.
|
|
|
+
|
|
|
+ targetCbs = new Dictionary<uint, SearchResult>() { { targetCb, result } };
|
|
|
+
|
|
|
+ _sharedEntries.Add(key, targetCbs);
|
|
|
}
|
|
|
+ else if (targetCbs.TryGetValue(targetCb, out SearchResult existingResult))
|
|
|
+ {
|
|
|
+ // If our entry already exists, but does not match the new result,
|
|
|
+ // we set the offset to null to indicate there are multiple possible offsets.
|
|
|
+ // This will be used on the multi-target access that does not need to know the offset.
|
|
|
|
|
|
- if (!(node.Value is Operation operation))
|
|
|
+ if (existingResult.Offset != null &&
|
|
|
+ (existingResult.Offset != result.Offset ||
|
|
|
+ existingResult.ConstOffset != result.ConstOffset))
|
|
|
+ {
|
|
|
+ targetCbs[targetCb] = new SearchResult(result.SbCbSlot, result.SbCbOffset);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
{
|
|
|
- continue;
|
|
|
+ // An entry for this base offset already exists, but not for the specified
|
|
|
+ // constant buffer region where the storage buffer base address and size
|
|
|
+ // comes from.
|
|
|
+
|
|
|
+ targetCbs.Add(targetCb, result);
|
|
|
}
|
|
|
+ }
|
|
|
+
|
|
|
+ public bool TryGetMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, out SearchResult result)
|
|
|
+ {
|
|
|
+ LsKey key = new LsKey(baseOffset, constOffset, type);
|
|
|
|
|
|
- if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
|
|
|
+ if (_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs) && targetCbs.Count == 1)
|
|
|
{
|
|
|
- Operand source = operation.GetSource(0);
|
|
|
+ SearchResult candidateResult = targetCbs.Values.First();
|
|
|
|
|
|
- var result = SearchForStorageBase(config, block, source);
|
|
|
- if (!result.Found)
|
|
|
+ if (candidateResult.Found)
|
|
|
+ {
|
|
|
+ result = candidateResult;
|
|
|
+
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ result = default;
|
|
|
+
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private struct SearchResult
|
|
|
+ {
|
|
|
+ public static SearchResult NotFound => new SearchResult(-1, 0);
|
|
|
+ public bool Found => SbCbSlot != -1;
|
|
|
+ public int SbCbSlot { get; }
|
|
|
+ public int SbCbOffset { get; }
|
|
|
+ public Operand Offset { get; }
|
|
|
+ public int ConstOffset { get; }
|
|
|
+
|
|
|
+ public SearchResult(int sbCbSlot, int sbCbOffset)
|
|
|
+ {
|
|
|
+ SbCbSlot = sbCbSlot;
|
|
|
+ SbCbOffset = sbCbOffset;
|
|
|
+ }
|
|
|
+
|
|
|
+ public SearchResult(int sbCbSlot, int sbCbOffset, Operand offset, int constOffset = 0)
|
|
|
+ {
|
|
|
+ SbCbSlot = sbCbSlot;
|
|
|
+ SbCbOffset = sbCbOffset;
|
|
|
+ Offset = offset;
|
|
|
+ ConstOffset = constOffset;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
|
|
|
+ {
|
|
|
+ GtsContext gtsContext = new GtsContext(hfm);
|
|
|
+
|
|
|
+ foreach (BasicBlock block in blocks)
|
|
|
+ {
|
|
|
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
|
|
+ {
|
|
|
+ if (!(node.Value is Operation operation))
|
|
|
{
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- if (config.Stage == ShaderStage.Compute &&
|
|
|
- operation.Inst == Instruction.LoadGlobal &&
|
|
|
- result.SbCbSlot == DriverReservedCb &&
|
|
|
- result.SbCbOffset >= UbeBaseOffset &&
|
|
|
- result.SbCbOffset < UbeBaseOffset + UbeDescsSize)
|
|
|
+ if (IsGlobalMemory(operation.StorageKind))
|
|
|
{
|
|
|
- // Here we effectively try to replace a LDG instruction with LDC.
|
|
|
- // The hardware only supports a limited amount of constant buffers
|
|
|
- // so NVN "emulates" more constant buffers using global memory access.
|
|
|
- // Here we try to replace the global access back to a constant buffer
|
|
|
- // load.
|
|
|
- node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize);
|
|
|
+ LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node);
|
|
|
+
|
|
|
+ if (nextNode == null)
|
|
|
+ {
|
|
|
+ // The returned value being null means that the global memory replacement failed,
|
|
|
+ // so we just make loads read 0 and stores do nothing.
|
|
|
+
|
|
|
+ config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\".");
|
|
|
+
|
|
|
+ if (operation.Dest != null)
|
|
|
+ {
|
|
|
+ operation.TurnIntoCopy(Const(0));
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Utils.DeleteNode(node, operation);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ node = nextNode;
|
|
|
+ }
|
|
|
}
|
|
|
- else
|
|
|
+ else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal)
|
|
|
{
|
|
|
- // Storage buffers are implemented using global memory access.
|
|
|
- // If we know from where the base address of the access is loaded,
|
|
|
- // we can guess which storage buffer it is accessing.
|
|
|
- // We can then replace the global memory access with a storage
|
|
|
- // buffer access.
|
|
|
- node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset));
|
|
|
+ // The NVIDIA compiler can sometimes use shared or local memory as temporary
|
|
|
+ // storage to place the base address and size on, so we need
|
|
|
+ // to be able to find such information stored in memory too.
|
|
|
+
|
|
|
+ if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand baseOffset, out int constOffset))
|
|
|
+ {
|
|
|
+ Operand value = operation.GetSource(operation.SourcesCount - 1);
|
|
|
+
|
|
|
+ var result = FindUniqueBaseAddressCb(gtsContext, block, value, needsOffset: false);
|
|
|
+ if (result.Found)
|
|
|
+ {
|
|
|
+ uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset);
|
|
|
+ gtsContext.AddMemoryTargetCb(type, baseOffset, constOffset, targetCb, result);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
|
|
|
+ private static bool IsGlobalMemory(StorageKind storageKind)
|
|
|
+ {
|
|
|
+ return storageKind == StorageKind.GlobalMemory ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryS8 ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryS16 ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryU8 ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryU16;
|
|
|
}
|
|
|
|
|
|
- private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
|
|
|
+ private static bool IsSmallInt(StorageKind storageKind)
|
|
|
{
|
|
|
- Operation operation = (Operation)node.Value;
|
|
|
+ return storageKind == StorageKind.GlobalMemoryS8 ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryS16 ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryU8 ||
|
|
|
+ storageKind == StorageKind.GlobalMemoryU16;
|
|
|
+ }
|
|
|
|
|
|
- bool isAtomic = operation.Inst.IsAtomic();
|
|
|
- bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
|
|
|
- bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
|
|
|
+ private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage(
|
|
|
+ GtsContext gtsContext,
|
|
|
+ ShaderConfig config,
|
|
|
+ BasicBlock block,
|
|
|
+ LinkedListNode<INode> node)
|
|
|
+ {
|
|
|
+ Operation operation = node.Value as Operation;
|
|
|
+ Operand globalAddress = operation.GetSource(0);
|
|
|
+ SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, globalAddress, needsOffset: true);
|
|
|
|
|
|
- config.SetUsedStorageBuffer(storageIndex, isWrite);
|
|
|
+ if (result.Found)
|
|
|
+ {
|
|
|
+ // We found the storage buffer that is being accessed.
|
|
|
+ // There are two possible paths here, if the operation is simple enough,
|
|
|
+ // we just generate the storage access code inline.
|
|
|
+ // Otherwise, we generate a function call (and the function if necessary).
|
|
|
|
|
|
- Operand[] sources = new Operand[operation.SourcesCount];
|
|
|
+ Operand offset = result.Offset;
|
|
|
|
|
|
- sources[0] = Const(storageIndex);
|
|
|
- sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
|
|
|
+ bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer();
|
|
|
|
|
|
- for (int index = 2; index < operation.SourcesCount; index++)
|
|
|
- {
|
|
|
- sources[index] = operation.GetSource(index);
|
|
|
+ if (storageUnaligned)
|
|
|
+ {
|
|
|
+ Operand baseAddress = Cbuf(result.SbCbSlot, result.SbCbOffset);
|
|
|
+
|
|
|
+ Operand baseAddressMasked = Local();
|
|
|
+ Operand hostOffset = Local();
|
|
|
+
|
|
|
+ int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
|
|
|
+
|
|
|
+ Operation maskOp = new Operation(Instruction.BitwiseAnd, baseAddressMasked, new[] { baseAddress, Const(-alignment) });
|
|
|
+ Operation subOp = new Operation(Instruction.Subtract, hostOffset, new[] { globalAddress, baseAddressMasked });
|
|
|
+
|
|
|
+ node.List.AddBefore(node, maskOp);
|
|
|
+ node.List.AddBefore(node, subOp);
|
|
|
+
|
|
|
+ offset = hostOffset;
|
|
|
+ }
|
|
|
+ else if (result.ConstOffset != 0)
|
|
|
+ {
|
|
|
+ Operand newOffset = Local();
|
|
|
+
|
|
|
+ Operation addOp = new Operation(Instruction.Add, newOffset, new[] { offset, Const(result.ConstOffset) });
|
|
|
+
|
|
|
+ node.List.AddBefore(node, addOp);
|
|
|
+
|
|
|
+ offset = newOffset;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage))
|
|
|
+ {
|
|
|
+ return GenerateInlineStorageOp(config, node, operation, offset, result);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId))
|
|
|
+ {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ return GenerateCallStorageOp(node, operation, offset, functionId);
|
|
|
+ }
|
|
|
}
|
|
|
+ else
|
|
|
+ {
|
|
|
+ // Failed to find the storage buffer directly.
|
|
|
+ // Try to walk through Phi chains and find all possible constant buffers where
|
|
|
+ // the base address might be stored.
|
|
|
+ // Generate a helper function that will check all possible storage buffers and use the right one.
|
|
|
|
|
|
- Operation storageOp;
|
|
|
+ if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId))
|
|
|
+ {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ return GenerateCallStorageOp(node, operation, null, functionId);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- if (isAtomic)
|
|
|
+ private static bool CanUseInlineStorageOp(Operation operation, TargetLanguage targetLanguage)
|
|
|
+ {
|
|
|
+ if (operation.StorageKind != StorageKind.GlobalMemory)
|
|
|
{
|
|
|
- storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
|
|
|
+ return false;
|
|
|
}
|
|
|
- else if (operation.Inst == Instruction.LoadGlobal)
|
|
|
+
|
|
|
+ return (operation.Inst != Instruction.AtomicMaxS32 &&
|
|
|
+ operation.Inst != Instruction.AtomicMinS32) || targetLanguage == TargetLanguage.Spirv;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static LinkedListNode<INode> GenerateInlineStorageOp(
|
|
|
+ ShaderConfig config,
|
|
|
+ LinkedListNode<INode> node,
|
|
|
+ Operation operation,
|
|
|
+ Operand offset,
|
|
|
+ SearchResult result)
|
|
|
+ {
|
|
|
+ bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
|
|
|
+ if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
|
|
|
{
|
|
|
- storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
|
|
|
+ return null;
|
|
|
}
|
|
|
- else
|
|
|
+
|
|
|
+ Operand wordOffset = Local();
|
|
|
+
|
|
|
+ Operand[] sources;
|
|
|
+
|
|
|
+ if (operation.Inst == Instruction.AtomicCompareAndSwap)
|
|
|
{
|
|
|
- Instruction storeInst = operation.Inst switch
|
|
|
+ sources = new Operand[]
|
|
|
{
|
|
|
- Instruction.StoreGlobal16 => Instruction.StoreStorage16,
|
|
|
- Instruction.StoreGlobal8 => Instruction.StoreStorage8,
|
|
|
- _ => Instruction.StoreStorage
|
|
|
+ Const(binding),
|
|
|
+ Const(0),
|
|
|
+ wordOffset,
|
|
|
+ operation.GetSource(operation.SourcesCount - 2),
|
|
|
+ operation.GetSource(operation.SourcesCount - 1)
|
|
|
};
|
|
|
-
|
|
|
- storageOp = new Operation(storeInst, null, sources);
|
|
|
}
|
|
|
-
|
|
|
- for (int index = 0; index < operation.SourcesCount; index++)
|
|
|
+ else if (isStore)
|
|
|
+ {
|
|
|
+ sources = new Operand[] { Const(binding), Const(0), wordOffset, operation.GetSource(operation.SourcesCount - 1) };
|
|
|
+ }
|
|
|
+ else
|
|
|
{
|
|
|
- operation.SetSource(index, null);
|
|
|
+ sources = new Operand[] { Const(binding), Const(0), wordOffset };
|
|
|
}
|
|
|
|
|
|
- LinkedListNode<INode> oldNode = node;
|
|
|
+ Operation shiftOp = new Operation(Instruction.ShiftRightU32, wordOffset, new[] { offset, Const(2) });
|
|
|
+ Operation storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
|
|
|
|
|
|
- node = node.List.AddBefore(node, storageOp);
|
|
|
+ node.List.AddBefore(node, shiftOp);
|
|
|
+ LinkedListNode<INode> newNode = node.List.AddBefore(node, storageOp);
|
|
|
|
|
|
- node.List.Remove(oldNode);
|
|
|
+ Utils.DeleteNode(node, operation);
|
|
|
|
|
|
- return node;
|
|
|
+ return newNode;
|
|
|
}
|
|
|
|
|
|
- private static Operand GetStorageOffset(
|
|
|
- BasicBlock block,
|
|
|
- LinkedListNode<INode> node,
|
|
|
- ShaderConfig config,
|
|
|
- int storageIndex,
|
|
|
- Operand addrLow,
|
|
|
- bool isStg16Or8)
|
|
|
+ private static LinkedListNode<INode> GenerateCallStorageOp(LinkedListNode<INode> node, Operation operation, Operand offset, int functionId)
|
|
|
{
|
|
|
- (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex);
|
|
|
+ // Generate call to a helper function that will perform the storage buffer operation.
|
|
|
|
|
|
- bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
|
|
|
+ Operand[] sources = new Operand[operation.SourcesCount - 1 + (offset == null ? 2 : 1)];
|
|
|
|
|
|
- (Operand byteOffset, int constantOffset) = storageAligned ?
|
|
|
- GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), sbCbSlot, sbCbOffset) :
|
|
|
- (null, 0);
|
|
|
+ sources[0] = Const(functionId);
|
|
|
|
|
|
- if (byteOffset != null)
|
|
|
+ if (offset != null)
|
|
|
{
|
|
|
- ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
|
|
|
+ // If the offset was supplised, we use that and skip the global address.
|
|
|
+
|
|
|
+ sources[1] = offset;
|
|
|
+
|
|
|
+ for (int srcIndex = 2; srcIndex < operation.SourcesCount; srcIndex++)
|
|
|
+ {
|
|
|
+ sources[srcIndex] = operation.GetSource(srcIndex);
|
|
|
+ }
|
|
|
}
|
|
|
+ else
|
|
|
+ {
|
|
|
+ // Use the 64-bit global address which is split in 2 32-bit arguments.
|
|
|
|
|
|
- if (byteOffset == null)
|
|
|
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
|
|
+ {
|
|
|
+ sources[srcIndex + 1] = operation.GetSource(srcIndex);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ bool returnsValue = operation.Dest != null;
|
|
|
+ Operand returnValue = returnsValue ? Local() : null;
|
|
|
+
|
|
|
+ Operation callOp = new Operation(Instruction.Call, returnValue, sources);
|
|
|
+
|
|
|
+ LinkedListNode<INode> newNode = node.List.AddBefore(node, callOp);
|
|
|
+
|
|
|
+ if (returnsValue)
|
|
|
{
|
|
|
- Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
|
|
|
- Operand baseAddrTrunc = Local();
|
|
|
+ operation.TurnIntoCopy(returnValue);
|
|
|
|
|
|
- Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Utils.DeleteNode(node, operation);
|
|
|
|
|
|
- Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
|
|
|
+ return newNode;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- node.List.AddBefore(node, andOp);
|
|
|
+ private static bool TryGenerateSingleTargetStorageOp(
|
|
|
+ GtsContext gtsContext,
|
|
|
+ ShaderConfig config,
|
|
|
+ Operation operation,
|
|
|
+ SearchResult result,
|
|
|
+ out int functionId)
|
|
|
+ {
|
|
|
+ List<uint> targetCbs = new List<uint>() { PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset) };
|
|
|
|
|
|
- Operand offset = Local();
|
|
|
- Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
|
|
|
+ if (gtsContext.TryGetFunctionId(operation, isMultiTarget: false, targetCbs, out functionId))
|
|
|
+ {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
|
|
|
- node.List.AddBefore(node, subOp);
|
|
|
+ int inArgumentsCount = 1;
|
|
|
|
|
|
- byteOffset = offset;
|
|
|
+ if (operation.Inst == Instruction.AtomicCompareAndSwap)
|
|
|
+ {
|
|
|
+ inArgumentsCount = 3;
|
|
|
}
|
|
|
- else if (constantOffset != 0)
|
|
|
+ else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic())
|
|
|
{
|
|
|
- Operand offset = Local();
|
|
|
- Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
|
|
|
+ inArgumentsCount = 2;
|
|
|
+ }
|
|
|
+
|
|
|
+ EmitterContext context = new EmitterContext();
|
|
|
+
|
|
|
+ Operand offset = Argument(0);
|
|
|
+ Operand compare = null;
|
|
|
+ Operand value = null;
|
|
|
|
|
|
- node.List.AddBefore(node, addOp);
|
|
|
+ if (inArgumentsCount == 3)
|
|
|
+ {
|
|
|
+ compare = Argument(1);
|
|
|
+ value = Argument(2);
|
|
|
+ }
|
|
|
+ else if (inArgumentsCount == 2)
|
|
|
+ {
|
|
|
+ value = Argument(1);
|
|
|
+ }
|
|
|
|
|
|
- byteOffset = offset;
|
|
|
+ if (!TryGenerateStorageOp(
|
|
|
+ config,
|
|
|
+ context,
|
|
|
+ operation.Inst,
|
|
|
+ operation.StorageKind,
|
|
|
+ offset,
|
|
|
+ compare,
|
|
|
+ value,
|
|
|
+ result,
|
|
|
+ out Operand resultValue))
|
|
|
+ {
|
|
|
+ functionId = 0;
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
- if (isStg16Or8)
|
|
|
+ bool returnsValue = resultValue != null;
|
|
|
+
|
|
|
+ if (returnsValue)
|
|
|
{
|
|
|
- return byteOffset;
|
|
|
+ context.Return(resultValue);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ context.Return();
|
|
|
}
|
|
|
|
|
|
- Operand wordOffset = Local();
|
|
|
- Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
|
|
|
+ string functionName = GetFunctionName(operation, isMultiTarget: false, targetCbs);
|
|
|
|
|
|
- node.List.AddBefore(node, shrOp);
|
|
|
+ Function function = new Function(
|
|
|
+ ControlFlowGraph.Create(context.GetOperations()).Blocks,
|
|
|
+ functionName,
|
|
|
+ returnsValue,
|
|
|
+ inArgumentsCount,
|
|
|
+ 0);
|
|
|
|
|
|
- return wordOffset;
|
|
|
- }
|
|
|
+ functionId = gtsContext.AddFunction(operation, isMultiTarget: false, targetCbs, function);
|
|
|
|
|
|
- private static bool IsCbOffset(Operand operand, int slot, int offset)
|
|
|
- {
|
|
|
- return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == slot && operand.GetCbufOffset() == offset;
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
- private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
|
|
|
+ private static bool TryGenerateMultiTargetStorageOp(
|
|
|
+ GtsContext gtsContext,
|
|
|
+ ShaderConfig config,
|
|
|
+ BasicBlock block,
|
|
|
+ Operation operation,
|
|
|
+ out int functionId)
|
|
|
{
|
|
|
- // When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
|
|
|
- // Eliminate the storage buffer base address from this too, leaving only the byte offset.
|
|
|
+ Queue<PhiNode> phis = new Queue<PhiNode>();
|
|
|
+ HashSet<PhiNode> visited = new HashSet<PhiNode>();
|
|
|
+ List<uint> targetCbs = new List<uint>();
|
|
|
+
|
|
|
+ Operand globalAddress = operation.GetSource(0);
|
|
|
|
|
|
- foreach (INode useNode in address.UseOps)
|
|
|
+ if (globalAddress.AsgOp is Operation addOp && addOp.Inst == Instruction.Add)
|
|
|
{
|
|
|
- if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
|
|
|
+ Operand src1 = addOp.GetSource(0);
|
|
|
+ Operand src2 = addOp.GetSource(1);
|
|
|
+
|
|
|
+ if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable)
|
|
|
{
|
|
|
- Operand src1 = op.GetSource(0);
|
|
|
- Operand src2 = op.GetSource(1);
|
|
|
+ globalAddress = src2;
|
|
|
+ }
|
|
|
+ else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant)
|
|
|
+ {
|
|
|
+ globalAddress = src1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (globalAddress.AsgOp is PhiNode phi && visited.Add(phi))
|
|
|
+ {
|
|
|
+ phis.Enqueue(phi);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, operation.GetSource(0), needsOffset: false);
|
|
|
|
|
|
- int addressIndex = -1;
|
|
|
+ if (result.Found)
|
|
|
+ {
|
|
|
+ targetCbs.Add(PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset));
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
|
|
|
- {
|
|
|
- addressIndex = 0;
|
|
|
- }
|
|
|
- else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
|
|
|
- {
|
|
|
- addressIndex = 1;
|
|
|
- }
|
|
|
+ while (phis.TryDequeue(out phi))
|
|
|
+ {
|
|
|
+ for (int srcIndex = 0; srcIndex < phi.SourcesCount; srcIndex++)
|
|
|
+ {
|
|
|
+ BasicBlock phiBlock = phi.GetBlock(srcIndex);
|
|
|
+ Operand phiSource = phi.GetSource(srcIndex);
|
|
|
|
|
|
- if (addressIndex != -1)
|
|
|
+ SearchResult result = FindUniqueBaseAddressCb(gtsContext, phiBlock, phiSource, needsOffset: false);
|
|
|
+
|
|
|
+ if (result.Found)
|
|
|
{
|
|
|
- LinkedListNode<INode> node = list.Find(op);
|
|
|
+ uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset);
|
|
|
|
|
|
- // Add offset calculation before the use. Needs to be on the same block.
|
|
|
- if (node != null)
|
|
|
+ if (!targetCbs.Contains(targetCb))
|
|
|
{
|
|
|
- Operand offset = Local();
|
|
|
- Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
|
|
|
- list.AddBefore(node, addOp);
|
|
|
-
|
|
|
- op.SetSource(addressIndex, offset);
|
|
|
+ targetCbs.Add(targetCb);
|
|
|
}
|
|
|
}
|
|
|
+ else if (phiSource.AsgOp is PhiNode phi2 && visited.Add(phi2))
|
|
|
+ {
|
|
|
+ phis.Enqueue(phi2);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int cbSlot, int baseAddressCbOffset)
|
|
|
- {
|
|
|
- if (IsCbOffset(address, cbSlot, baseAddressCbOffset))
|
|
|
+ targetCbs.Sort();
|
|
|
+
|
|
|
+ if (targetCbs.Count == 0)
|
|
|
{
|
|
|
- // Direct offset: zero.
|
|
|
- return (Const(0), 0);
|
|
|
+ config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\".");
|
|
|
}
|
|
|
|
|
|
- (address, int constantOffset) = GetStorageConstantOffset(block, address);
|
|
|
+ if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId))
|
|
|
+ {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
|
|
|
- address = Utils.FindLastOperation(address, block);
|
|
|
+ int inArgumentsCount = 2;
|
|
|
|
|
|
- if (IsCbOffset(address, cbSlot, baseAddressCbOffset))
|
|
|
+ if (operation.Inst == Instruction.AtomicCompareAndSwap)
|
|
|
+ {
|
|
|
+ inArgumentsCount = 4;
|
|
|
+ }
|
|
|
+ else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic())
|
|
|
{
|
|
|
- // Only constant offset
|
|
|
- return (Const(0), constantOffset);
|
|
|
+ inArgumentsCount = 3;
|
|
|
}
|
|
|
|
|
|
- if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
|
|
|
+ EmitterContext context = new EmitterContext();
|
|
|
+
|
|
|
+ Operand globalAddressLow = Argument(0);
|
|
|
+ Operand globalAddressHigh = Argument(1);
|
|
|
+
|
|
|
+ foreach (uint targetCb in targetCbs)
|
|
|
{
|
|
|
- return (null, 0);
|
|
|
+ (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb);
|
|
|
+
|
|
|
+ Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
|
|
|
+ Operand baseAddrHigh = Cbuf(sbCbSlot, sbCbOffset + 1);
|
|
|
+ Operand size = Cbuf(sbCbSlot, sbCbOffset + 2);
|
|
|
+
|
|
|
+ Operand offset = context.ISubtract(globalAddressLow, baseAddrLow);
|
|
|
+ Operand borrow = context.ICompareLessUnsigned(globalAddressLow, baseAddrLow);
|
|
|
+
|
|
|
+ Operand inRangeLow = context.ICompareLessUnsigned(offset, size);
|
|
|
+
|
|
|
+ Operand addrHighBorrowed = context.IAdd(globalAddressHigh, borrow);
|
|
|
+
|
|
|
+ Operand inRangeHigh = context.ICompareEqual(addrHighBorrowed, baseAddrHigh);
|
|
|
+
|
|
|
+ Operand inRange = context.BitwiseAnd(inRangeLow, inRangeHigh);
|
|
|
+
|
|
|
+ Operand lblSkip = Label();
|
|
|
+ context.BranchIfFalse(lblSkip, inRange);
|
|
|
+
|
|
|
+ Operand compare = null;
|
|
|
+ Operand value = null;
|
|
|
+
|
|
|
+ if (inArgumentsCount == 4)
|
|
|
+ {
|
|
|
+ compare = Argument(2);
|
|
|
+ value = Argument(3);
|
|
|
+ }
|
|
|
+ else if (inArgumentsCount == 3)
|
|
|
+ {
|
|
|
+ value = Argument(2);
|
|
|
+ }
|
|
|
+
|
|
|
+ SearchResult result = new SearchResult(sbCbSlot, sbCbOffset);
|
|
|
+
|
|
|
+ int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
|
|
|
+
|
|
|
+ Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment));
|
|
|
+ Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked);
|
|
|
+
|
|
|
+ if (!TryGenerateStorageOp(
|
|
|
+ config,
|
|
|
+ context,
|
|
|
+ operation.Inst,
|
|
|
+ operation.StorageKind,
|
|
|
+ hostOffset,
|
|
|
+ compare,
|
|
|
+ value,
|
|
|
+ result,
|
|
|
+ out Operand resultValue))
|
|
|
+ {
|
|
|
+ functionId = 0;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (resultValue != null)
|
|
|
+ {
|
|
|
+ context.Return(resultValue);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ context.Return();
|
|
|
+ }
|
|
|
+
|
|
|
+ context.MarkLabel(lblSkip);
|
|
|
}
|
|
|
|
|
|
- Operand src1 = offsetAdd.GetSource(0);
|
|
|
- Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
|
|
|
+ bool returnsValue = operation.Dest != null;
|
|
|
|
|
|
- if (IsCbOffset(src2, cbSlot, baseAddressCbOffset))
|
|
|
+ if (returnsValue)
|
|
|
{
|
|
|
- return (src1, constantOffset);
|
|
|
+ context.Return(Const(0));
|
|
|
}
|
|
|
- else if (IsCbOffset(src1, cbSlot, baseAddressCbOffset))
|
|
|
+ else
|
|
|
{
|
|
|
- return (src2, constantOffset);
|
|
|
+ context.Return();
|
|
|
}
|
|
|
|
|
|
- return (null, 0);
|
|
|
+ string functionName = GetFunctionName(operation, isMultiTarget: true, targetCbs);
|
|
|
+
|
|
|
+ Function function = new Function(
|
|
|
+ ControlFlowGraph.Create(context.GetOperations()).Blocks,
|
|
|
+ functionName,
|
|
|
+ returnsValue,
|
|
|
+ inArgumentsCount,
|
|
|
+ 0);
|
|
|
+
|
|
|
+ functionId = gtsContext.AddFunction(operation, isMultiTarget: true, targetCbs, function);
|
|
|
+
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
- private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
|
|
|
+ private static uint PackCbSlotAndOffset(int cbSlot, int cbOffset)
|
|
|
{
|
|
|
- if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
|
|
|
+ return (uint)((ushort)cbSlot | ((ushort)cbOffset << 16));
|
|
|
+ }
|
|
|
+
|
|
|
+ private static (int, int) UnpackCbSlotAndOffset(uint packed)
|
|
|
+ {
|
|
|
+ return ((ushort)packed, (ushort)(packed >> 16));
|
|
|
+ }
|
|
|
+
|
|
|
+ private static string GetFunctionName(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs)
|
|
|
+ {
|
|
|
+ string name = baseOp.Inst.ToString();
|
|
|
+
|
|
|
+ name += baseOp.StorageKind switch
|
|
|
+ {
|
|
|
+ StorageKind.GlobalMemoryS8 => "S8",
|
|
|
+ StorageKind.GlobalMemoryS16 => "S16",
|
|
|
+ StorageKind.GlobalMemoryU8 => "U8",
|
|
|
+ StorageKind.GlobalMemoryU16 => "U16",
|
|
|
+ _ => string.Empty
|
|
|
+ };
|
|
|
+
|
|
|
+ if (isMultiTarget)
|
|
|
{
|
|
|
- return (address, 0);
|
|
|
+ name += "Multi";
|
|
|
}
|
|
|
|
|
|
- Operand src1 = offsetAdd.GetSource(0);
|
|
|
- Operand src2 = offsetAdd.GetSource(1);
|
|
|
-
|
|
|
- if (src2.Type != OperandType.Constant)
|
|
|
+ foreach (uint targetCb in targetCbs)
|
|
|
{
|
|
|
- return (address, 0);
|
|
|
+ (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb);
|
|
|
+
|
|
|
+ name += $"_c{sbCbSlot}o{sbCbOffset}";
|
|
|
}
|
|
|
|
|
|
- return (src1, src2.Value);
|
|
|
+ return name;
|
|
|
}
|
|
|
|
|
|
- private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
|
|
|
+ private static bool TryGenerateStorageOp(
|
|
|
+ ShaderConfig config,
|
|
|
+ EmitterContext context,
|
|
|
+ Instruction inst,
|
|
|
+ StorageKind storageKind,
|
|
|
+ Operand offset,
|
|
|
+ Operand compare,
|
|
|
+ Operand value,
|
|
|
+ SearchResult result,
|
|
|
+ out Operand resultValue)
|
|
|
{
|
|
|
- Operation operation = (Operation)node.Value;
|
|
|
+ resultValue = null;
|
|
|
+ bool isStore = inst.IsAtomic() || inst == Instruction.Store;
|
|
|
|
|
|
- Operand GetCbufOffset()
|
|
|
+ if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
|
|
|
{
|
|
|
- Operand addrLow = operation.GetSource(0);
|
|
|
-
|
|
|
- Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- Operand baseAddrTrunc = Local();
|
|
|
+ Operand wordOffset = context.ShiftRightU32(offset, Const(2));
|
|
|
|
|
|
- Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
|
|
|
+ if (inst.IsAtomic())
|
|
|
+ {
|
|
|
+ if (IsSmallInt(storageKind))
|
|
|
+ {
|
|
|
+ throw new NotImplementedException();
|
|
|
+ }
|
|
|
|
|
|
- Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
|
|
|
+ switch (inst)
|
|
|
+ {
|
|
|
+ case Instruction.AtomicAdd:
|
|
|
+ resultValue = context.AtomicAdd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicAnd:
|
|
|
+ resultValue = context.AtomicAnd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicCompareAndSwap:
|
|
|
+ resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicMaxS32:
|
|
|
+ if (config.Options.TargetLanguage == TargetLanguage.Spirv)
|
|
|
+ {
|
|
|
+ resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
|
|
|
+ {
|
|
|
+ return context.IMaximumS32(memValue, value);
|
|
|
+ });
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicMaxU32:
|
|
|
+ resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicMinS32:
|
|
|
+ if (config.Options.TargetLanguage == TargetLanguage.Spirv)
|
|
|
+ {
|
|
|
+ resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
|
|
|
+ {
|
|
|
+ return context.IMinimumS32(memValue, value);
|
|
|
+ });
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicMinU32:
|
|
|
+ resultValue = context.AtomicMinU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicOr:
|
|
|
+ resultValue = context.AtomicOr(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicSwap:
|
|
|
+ resultValue = context.AtomicSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ case Instruction.AtomicXor:
|
|
|
+ resultValue = context.AtomicXor(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else if (inst == Instruction.Store)
|
|
|
+ {
|
|
|
+ int bitSize = storageKind switch
|
|
|
+ {
|
|
|
+ StorageKind.GlobalMemoryS8 or
|
|
|
+ StorageKind.GlobalMemoryU8 => 8,
|
|
|
+ StorageKind.GlobalMemoryS16 or
|
|
|
+ StorageKind.GlobalMemoryU16 => 16,
|
|
|
+ _ => 32
|
|
|
+ };
|
|
|
|
|
|
- node.List.AddBefore(node, andOp);
|
|
|
+ if (bitSize < 32)
|
|
|
+ {
|
|
|
+ Operand bitOffset = GetBitOffset(context, offset);
|
|
|
|
|
|
- Operand byteOffset = Local();
|
|
|
- Operand wordOffset = Local();
|
|
|
+ GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
|
|
|
+ {
|
|
|
+ return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize));
|
|
|
+ });
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ context.Store(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ value = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset);
|
|
|
|
|
|
- Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
|
|
|
- Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
|
|
|
+ if (IsSmallInt(storageKind))
|
|
|
+ {
|
|
|
+ Operand bitOffset = GetBitOffset(context, offset);
|
|
|
|
|
|
- node.List.AddBefore(node, subOp);
|
|
|
- node.List.AddBefore(node, shrOp);
|
|
|
+ switch (storageKind)
|
|
|
+ {
|
|
|
+ case StorageKind.GlobalMemoryS8:
|
|
|
+ value = context.ShiftRightS32(value, bitOffset);
|
|
|
+ value = context.BitfieldExtractS32(value, Const(0), Const(8));
|
|
|
+ break;
|
|
|
+ case StorageKind.GlobalMemoryS16:
|
|
|
+ value = context.ShiftRightS32(value, bitOffset);
|
|
|
+ value = context.BitfieldExtractS32(value, Const(0), Const(16));
|
|
|
+ break;
|
|
|
+ case StorageKind.GlobalMemoryU8:
|
|
|
+ value = context.ShiftRightU32(value, bitOffset);
|
|
|
+ value = context.BitwiseAnd(value, Const(byte.MaxValue));
|
|
|
+ break;
|
|
|
+ case StorageKind.GlobalMemoryU16:
|
|
|
+ value = context.ShiftRightU32(value, bitOffset);
|
|
|
+ value = context.BitwiseAnd(value, Const(ushort.MaxValue));
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- return wordOffset;
|
|
|
+ resultValue = value;
|
|
|
}
|
|
|
|
|
|
- Operand cbufOffset = GetCbufOffset();
|
|
|
- Operand vecIndex = Local();
|
|
|
- Operand elemIndex = Local();
|
|
|
-
|
|
|
- node.List.AddBefore(node, new Operation(Instruction.ShiftRightU32, 0, vecIndex, cbufOffset, Const(2)));
|
|
|
- node.List.AddBefore(node, new Operation(Instruction.BitwiseAnd, 0, elemIndex, cbufOffset, Const(3)));
|
|
|
+ return true;
|
|
|
+ }
|
|
|
|
|
|
- Operand[] sources = new Operand[4];
|
|
|
+ private static Operand GetBitOffset(EmitterContext context, Operand offset)
|
|
|
+ {
|
|
|
+ return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3));
|
|
|
+ }
|
|
|
|
|
|
- int cbSlot = UbeFirstCbuf + storageIndex;
|
|
|
+ private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback)
|
|
|
+ {
|
|
|
+ Operand lblLoopHead = Label();
|
|
|
|
|
|
- sources[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot));
|
|
|
- sources[1] = Const(0);
|
|
|
- sources[2] = vecIndex;
|
|
|
- sources[3] = elemIndex;
|
|
|
+ context.MarkLabel(lblLoopHead);
|
|
|
|
|
|
- Operation ldcOp = new Operation(Instruction.Load, StorageKind.ConstantBuffer, operation.Dest, sources);
|
|
|
+ Operand oldValue = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset);
|
|
|
+ Operand newValue = opCallback(oldValue);
|
|
|
|
|
|
- for (int index = 0; index < operation.SourcesCount; index++)
|
|
|
- {
|
|
|
- operation.SetSource(index, null);
|
|
|
- }
|
|
|
+ Operand casResult = context.AtomicCompareAndSwap(
|
|
|
+ StorageKind.StorageBuffer,
|
|
|
+ binding,
|
|
|
+ Const(0),
|
|
|
+ wordOffset,
|
|
|
+ oldValue,
|
|
|
+ newValue);
|
|
|
|
|
|
- LinkedListNode<INode> oldNode = node;
|
|
|
+ Operand casFail = context.ICompareNotEqual(casResult, oldValue);
|
|
|
|
|
|
- node = node.List.AddBefore(node, ldcOp);
|
|
|
+ context.BranchIfTrue(lblLoopHead, casFail);
|
|
|
|
|
|
- node.List.Remove(oldNode);
|
|
|
-
|
|
|
- return node;
|
|
|
+ return oldValue;
|
|
|
}
|
|
|
|
|
|
- private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress)
|
|
|
+ private static SearchResult FindUniqueBaseAddressCb(GtsContext gtsContext, BasicBlock block, Operand globalAddress, bool needsOffset)
|
|
|
{
|
|
|
globalAddress = Utils.FindLastOperation(globalAddress, block);
|
|
|
|
|
|
if (globalAddress.Type == OperandType.ConstantBuffer)
|
|
|
{
|
|
|
- return GetStorageIndex(config, globalAddress);
|
|
|
+ return GetBaseAddressCbWithOffset(globalAddress, Const(0), 0);
|
|
|
}
|
|
|
|
|
|
Operation operation = globalAddress.AsgOp as Operation;
|
|
|
|
|
|
if (operation == null || operation.Inst != Instruction.Add)
|
|
|
{
|
|
|
- return SearchResult.NotFound;
|
|
|
+ return FindBaseAddressCbFromMemory(gtsContext, operation, 0, needsOffset);
|
|
|
}
|
|
|
|
|
|
Operand src1 = operation.GetSource(0);
|
|
|
Operand src2 = operation.GetSource(1);
|
|
|
|
|
|
+ int constOffset = 0;
|
|
|
+
|
|
|
if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
|
|
|
(src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
|
|
|
{
|
|
|
Operand baseAddr;
|
|
|
+ Operand offset;
|
|
|
|
|
|
if (src1.Type == OperandType.LocalVariable)
|
|
|
{
|
|
|
baseAddr = Utils.FindLastOperation(src1, block);
|
|
|
+ offset = src2;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
baseAddr = Utils.FindLastOperation(src2, block);
|
|
|
+ offset = src1;
|
|
|
}
|
|
|
|
|
|
- var result = GetStorageIndex(config, baseAddr);
|
|
|
+ var result = GetBaseAddressCbWithOffset(baseAddr, offset, 0);
|
|
|
if (result.Found)
|
|
|
{
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
+ constOffset = offset.Value;
|
|
|
operation = baseAddr.AsgOp as Operation;
|
|
|
|
|
|
if (operation == null || operation.Inst != Instruction.Add)
|
|
|
{
|
|
|
- return SearchResult.NotFound;
|
|
|
+ return FindBaseAddressCbFromMemory(gtsContext, operation, constOffset, needsOffset);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- var selectedResult = SearchResult.NotFound;
|
|
|
+ src1 = operation.GetSource(0);
|
|
|
+ src2 = operation.GetSource(1);
|
|
|
+
|
|
|
+ // If we have two possible results, we give preference to the ones from
|
|
|
+ // the driver reserved constant buffer, as those are the ones that
|
|
|
+ // contains the base address.
|
|
|
|
|
|
- for (int index = 0; index < operation.SourcesCount; index++)
|
|
|
+ // If both are constant buffer, give preference to the second operand,
|
|
|
+ // because constant buffer are always encoded as the second operand,
|
|
|
+ // so the second operand will always be the one from the last instruction.
|
|
|
+
|
|
|
+ if (src1.Type != OperandType.ConstantBuffer ||
|
|
|
+ (src1.Type == OperandType.ConstantBuffer && src2.Type == OperandType.ConstantBuffer) ||
|
|
|
+ (src2.Type == OperandType.ConstantBuffer && src2.GetCbufSlot() == DriverReservedCb))
|
|
|
{
|
|
|
- Operand source = operation.GetSource(index);
|
|
|
+ return GetBaseAddressCbWithOffset(src2, src1, constOffset);
|
|
|
+ }
|
|
|
|
|
|
- var result = GetStorageIndex(config, source);
|
|
|
+ return GetBaseAddressCbWithOffset(src1, src2, constOffset);
|
|
|
+ }
|
|
|
|
|
|
- // If we already have a result, we give preference to the ones from
|
|
|
- // the driver reserved constant buffer, as those are the ones that
|
|
|
- // contains the base address.
|
|
|
- if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb))
|
|
|
+ private static SearchResult FindBaseAddressCbFromMemory(GtsContext gtsContext, Operation operation, int constOffset, bool needsOffset)
|
|
|
+ {
|
|
|
+ if (operation != null)
|
|
|
+ {
|
|
|
+ if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand bo, out int co) &&
|
|
|
+ gtsContext.TryGetMemoryTargetCb(type, bo, co, out SearchResult result) &&
|
|
|
+ (result.Offset != null || !needsOffset))
|
|
|
{
|
|
|
- selectedResult = result;
|
|
|
+ if (constOffset != 0)
|
|
|
+ {
|
|
|
+ return new SearchResult(
|
|
|
+ result.SbCbSlot,
|
|
|
+ result.SbCbOffset,
|
|
|
+ result.Offset,
|
|
|
+ result.ConstOffset + constOffset);
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- return selectedResult;
|
|
|
+ return SearchResult.NotFound;
|
|
|
}
|
|
|
|
|
|
- private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand)
|
|
|
+ private static SearchResult GetBaseAddressCbWithOffset(Operand baseAddress, Operand offset, int constOffset)
|
|
|
{
|
|
|
- if (operand.Type == OperandType.ConstantBuffer)
|
|
|
+ if (baseAddress.Type == OperandType.ConstantBuffer)
|
|
|
{
|
|
|
- int slot = operand.GetCbufSlot();
|
|
|
- int offset = operand.GetCbufOffset();
|
|
|
+ int sbCbSlot = baseAddress.GetCbufSlot();
|
|
|
+ int sbCbOffset = baseAddress.GetCbufOffset();
|
|
|
|
|
|
- if ((offset & 3) == 0)
|
|
|
+ // We require the offset to be aligned to 1 word (64 bits),
|
|
|
+ // since the address size is 64-bit and the GPU only supports aligned memory access.
|
|
|
+ if ((sbCbOffset & 1) == 0)
|
|
|
{
|
|
|
- return new SearchResult(slot, offset);
|
|
|
+ return new SearchResult(sbCbSlot, sbCbOffset, offset, constOffset);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return SearchResult.NotFound;
|
|
|
}
|
|
|
|
|
|
- private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
|
|
|
+ private static bool TryGetMemoryOffsets(Operation operation, out LsMemoryType type, out Operand baseOffset, out int constOffset)
|
|
|
{
|
|
|
- if (operand.Type == OperandType.ConstantBuffer)
|
|
|
+ baseOffset = null;
|
|
|
+
|
|
|
+ if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared)
|
|
|
+ {
|
|
|
+ type = LsMemoryType.Shared;
|
|
|
+ return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset);
|
|
|
+ }
|
|
|
+ else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal)
|
|
|
{
|
|
|
- int slot = operand.GetCbufSlot();
|
|
|
- int offset = operand.GetCbufOffset();
|
|
|
+ type = LsMemoryType.Local;
|
|
|
+ return TryGetLocalMemoryOffset(operation, out constOffset);
|
|
|
+ }
|
|
|
|
|
|
- if (slot == 0 && offset >= sbStart && offset < sbEnd)
|
|
|
- {
|
|
|
- int storageIndex = (offset - sbStart) / StorageDescSize;
|
|
|
+ type = default;
|
|
|
+ constOffset = 0;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static bool TryGetSharedMemoryOffsets(Operation operation, out Operand baseOffset, out int constOffset)
|
|
|
+ {
|
|
|
+ baseOffset = null;
|
|
|
+ constOffset = 0;
|
|
|
+
|
|
|
+ // The byte offset is right shifted by 2 to get the 32-bit word offset,
|
|
|
+ // so we want to get the byte offset back, since each one of those word
|
|
|
+ // offsets are a new "local variable" which will not match.
|
|
|
|
|
|
- return storageIndex;
|
|
|
+ if (operation.GetSource(0).AsgOp is Operation shiftRightOp &&
|
|
|
+ shiftRightOp.Inst == Instruction.ShiftRightU32 &&
|
|
|
+ shiftRightOp.GetSource(1).Type == OperandType.Constant &&
|
|
|
+ shiftRightOp.GetSource(1).Value == 2)
|
|
|
+ {
|
|
|
+ baseOffset = shiftRightOp.GetSource(0);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Check if we have a constant offset being added to the base offset.
|
|
|
+
|
|
|
+ if (baseOffset?.AsgOp is Operation addOp && addOp.Inst == Instruction.Add)
|
|
|
+ {
|
|
|
+ Operand src1 = addOp.GetSource(0);
|
|
|
+ Operand src2 = addOp.GetSource(1);
|
|
|
+
|
|
|
+ if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable)
|
|
|
+ {
|
|
|
+ constOffset = src1.Value;
|
|
|
+ baseOffset = src2;
|
|
|
}
|
|
|
+ else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant)
|
|
|
+ {
|
|
|
+ baseOffset = src1;
|
|
|
+ constOffset = src2.Value;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return baseOffset != null && baseOffset.Type == OperandType.LocalVariable;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static bool TryGetLocalMemoryOffset(Operation operation, out int constOffset)
|
|
|
+ {
|
|
|
+ if (operation.GetSource(0).Type == OperandType.Constant)
|
|
|
+ {
|
|
|
+ constOffset = operation.GetSource(0).Value;
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
- return -1;
|
|
|
+ constOffset = 0;
|
|
|
+ return false;
|
|
|
}
|
|
|
}
|
|
|
}
|