NoWxCache.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. using ARMeilleure.Memory;
  2. using Ryujinx.Common;
  3. using Ryujinx.Memory;
  4. using System;
  5. using System.Collections.Generic;
  6. using System.Diagnostics;
  7. using System.Threading;
  8. namespace Ryujinx.Cpu.LightningJit.Cache
  9. {
  10. class NoWxCache : IDisposable
  11. {
  12. private const int CodeAlignment = 4; // Bytes.
  13. private const int SharedCacheSize = 2047 * 1024 * 1024;
  14. private const int LocalCacheSize = 256 * 1024 * 1024;
  15. // How many calls to the same function we allow until we pad the shared cache to force the function to become available there
  16. // and allow the guest to take the fast path.
  17. private const int MinCallsForPad = 8;
  18. private class MemoryCache : IDisposable
  19. {
  20. private readonly ReservedRegion _region;
  21. private readonly CacheMemoryAllocator _cacheAllocator;
  22. public CacheMemoryAllocator Allocator => _cacheAllocator;
  23. public nint Pointer => _region.Block.Pointer;
  24. public MemoryCache(IJitMemoryAllocator allocator, ulong size)
  25. {
  26. _region = new(allocator, size);
  27. _cacheAllocator = new((int)size);
  28. }
  29. public int Allocate(int codeSize)
  30. {
  31. codeSize = AlignCodeSize(codeSize);
  32. int allocOffset = _cacheAllocator.Allocate(codeSize);
  33. if (allocOffset < 0)
  34. {
  35. throw new OutOfMemoryException("JIT Cache exhausted.");
  36. }
  37. _region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
  38. return allocOffset;
  39. }
  40. public void Free(int offset, int size)
  41. {
  42. _cacheAllocator.Free(offset, size);
  43. }
  44. public void ReprotectAsRw(int offset, int size)
  45. {
  46. Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
  47. Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
  48. _region.Block.MapAsRw((ulong)offset, (ulong)size);
  49. }
  50. public void ReprotectAsRx(int offset, int size)
  51. {
  52. Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
  53. Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
  54. _region.Block.MapAsRx((ulong)offset, (ulong)size);
  55. if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
  56. {
  57. JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size);
  58. }
  59. else
  60. {
  61. throw new PlatformNotSupportedException();
  62. }
  63. }
  64. private static int AlignCodeSize(int codeSize)
  65. {
  66. return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
  67. }
  68. protected virtual void Dispose(bool disposing)
  69. {
  70. if (disposing)
  71. {
  72. _region.Dispose();
  73. _cacheAllocator.Clear();
  74. }
  75. }
  76. public void Dispose()
  77. {
  78. // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
  79. Dispose(disposing: true);
  80. GC.SuppressFinalize(this);
  81. }
  82. }
  83. private readonly IStackWalker _stackWalker;
  84. private readonly Translator _translator;
  85. private readonly MemoryCache _sharedCache;
  86. private readonly MemoryCache _localCache;
  87. private readonly PageAlignedRangeList _pendingMap;
  88. private readonly Lock _lock = new();
  89. class ThreadLocalCacheEntry
  90. {
  91. public readonly int Offset;
  92. public readonly int Size;
  93. public readonly nint FuncPtr;
  94. private int _useCount;
  95. public ThreadLocalCacheEntry(int offset, int size, nint funcPtr)
  96. {
  97. Offset = offset;
  98. Size = size;
  99. FuncPtr = funcPtr;
  100. _useCount = 0;
  101. }
  102. public int IncrementUseCount()
  103. {
  104. return ++_useCount;
  105. }
  106. }
  107. [ThreadStatic]
  108. private static Dictionary<ulong, ThreadLocalCacheEntry> _threadLocalCache;
  109. public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator)
  110. {
  111. _stackWalker = stackWalker;
  112. _translator = translator;
  113. _sharedCache = new(allocator, SharedCacheSize);
  114. _localCache = new(allocator, LocalCacheSize);
  115. _pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction);
  116. }
  117. public unsafe nint Map(nint framePointer, ReadOnlySpan<byte> code, ulong guestAddress, ulong guestSize)
  118. {
  119. if (TryGetThreadLocalFunction(guestAddress, out nint funcPtr))
  120. {
  121. return funcPtr;
  122. }
  123. lock (_lock)
  124. {
  125. if (!_pendingMap.Has(guestAddress) && !_translator.Functions.ContainsKey(guestAddress))
  126. {
  127. int funcOffset = _sharedCache.Allocate(code.Length);
  128. funcPtr = _sharedCache.Pointer + funcOffset;
  129. code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
  130. TranslatedFunction function = new(funcPtr, guestSize);
  131. _pendingMap.Add(funcOffset, code.Length, guestAddress, function);
  132. }
  133. ClearThreadLocalCache(framePointer);
  134. return AddThreadLocalFunction(code, guestAddress);
  135. }
  136. }
  137. public unsafe nint MapPageAligned(ReadOnlySpan<byte> code)
  138. {
  139. lock (_lock)
  140. {
  141. // Ensure we will get an aligned offset from the allocator.
  142. _pendingMap.Pad(_sharedCache.Allocator);
  143. int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
  144. int funcOffset = _sharedCache.Allocate(sizeAligned);
  145. Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0);
  146. nint funcPtr = _sharedCache.Pointer + funcOffset;
  147. code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
  148. _sharedCache.ReprotectAsRx(funcOffset, sizeAligned);
  149. return funcPtr;
  150. }
  151. }
  152. private bool TryGetThreadLocalFunction(ulong guestAddress, out nint funcPtr)
  153. {
  154. if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out ThreadLocalCacheEntry entry))
  155. {
  156. if (entry.IncrementUseCount() >= MinCallsForPad)
  157. {
  158. // Function is being called often, let's make it available in the shared cache so that the guest code
  159. // can take the fast path and stop calling the emulator to get the function from the thread local cache.
  160. // To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX.
  161. lock (_lock)
  162. {
  163. _pendingMap.Pad(_sharedCache.Allocator);
  164. }
  165. }
  166. funcPtr = entry.FuncPtr;
  167. return true;
  168. }
  169. funcPtr = nint.Zero;
  170. return false;
  171. }
  172. private void ClearThreadLocalCache(nint framePointer)
  173. {
  174. // Try to delete functions that are already on the shared cache
  175. // and no longer being executed.
  176. if (_threadLocalCache == null)
  177. {
  178. return;
  179. }
  180. IEnumerable<ulong> callStack = _stackWalker.GetCallStack(
  181. framePointer,
  182. _localCache.Pointer,
  183. LocalCacheSize,
  184. _sharedCache.Pointer,
  185. SharedCacheSize);
  186. List<(ulong, ThreadLocalCacheEntry)> toDelete = [];
  187. foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache)
  188. {
  189. // We only want to delete if the function is already on the shared cache,
  190. // otherwise we will keep translating the same function over and over again.
  191. bool canDelete = !_pendingMap.Has(address);
  192. if (!canDelete)
  193. {
  194. continue;
  195. }
  196. // We can only delete if the function is not part of the current thread call stack,
  197. // otherwise we will crash the program when the thread returns to it.
  198. foreach (ulong funcAddress in callStack)
  199. {
  200. if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size)
  201. {
  202. canDelete = false;
  203. break;
  204. }
  205. }
  206. if (canDelete)
  207. {
  208. toDelete.Add((address, entry));
  209. }
  210. }
  211. int pageSize = (int)MemoryBlock.GetPageSize();
  212. foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete)
  213. {
  214. _threadLocalCache.Remove(address);
  215. int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
  216. _localCache.Free(entry.Offset, sizeAligned);
  217. _localCache.ReprotectAsRw(entry.Offset, sizeAligned);
  218. }
  219. }
  220. public void ClearEntireThreadLocalCache()
  221. {
  222. // Thread is exiting, delete everything.
  223. if (_threadLocalCache == null)
  224. {
  225. return;
  226. }
  227. int pageSize = (int)MemoryBlock.GetPageSize();
  228. foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache)
  229. {
  230. int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
  231. _localCache.Free(entry.Offset, sizeAligned);
  232. _localCache.ReprotectAsRw(entry.Offset, sizeAligned);
  233. }
  234. _threadLocalCache.Clear();
  235. _threadLocalCache = null;
  236. }
  237. private unsafe nint AddThreadLocalFunction(ReadOnlySpan<byte> code, ulong guestAddress)
  238. {
  239. int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
  240. int funcOffset = _localCache.Allocate(alignedSize);
  241. Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
  242. nint funcPtr = _localCache.Pointer + funcOffset;
  243. code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
  244. (_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr));
  245. _localCache.ReprotectAsRx(funcOffset, alignedSize);
  246. return funcPtr;
  247. }
  248. private void RegisterFunction(ulong address, TranslatedFunction func)
  249. {
  250. TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func);
  251. Debug.Assert(oldFunc == func);
  252. _translator.RegisterFunction(address, func);
  253. }
  254. protected virtual void Dispose(bool disposing)
  255. {
  256. if (disposing)
  257. {
  258. _localCache.Dispose();
  259. _sharedCache.Dispose();
  260. }
  261. }
  262. public void Dispose()
  263. {
  264. Dispose(disposing: true);
  265. GC.SuppressFinalize(this);
  266. }
  267. }
  268. }