Translator.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. using ARMeilleure.Common;
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.Diagnostics;
  4. using ARMeilleure.Instructions;
  5. using ARMeilleure.IntermediateRepresentation;
  6. using ARMeilleure.Memory;
  7. using ARMeilleure.Signal;
  8. using ARMeilleure.State;
  9. using ARMeilleure.Translation.Cache;
  10. using ARMeilleure.Translation.PTC;
  11. using Ryujinx.Common;
  12. using System;
  13. using System.Collections.Concurrent;
  14. using System.Collections.Generic;
  15. using System.Diagnostics;
  16. using System.Runtime;
  17. using System.Threading;
  18. using static ARMeilleure.Common.BitMapPool;
  19. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  20. using static ARMeilleure.IntermediateRepresentation.OperationHelper;
  21. namespace ARMeilleure.Translation
  22. {
  23. public class Translator
  24. {
  25. private const int CountTableCapacity = 4 * 1024 * 1024;
  26. private readonly IJitMemoryAllocator _allocator;
  27. private readonly IMemoryManager _memory;
  28. private readonly ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
  29. private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
  30. private readonly ConcurrentDictionary<ulong, object> _backgroundSet;
  31. private readonly ConcurrentStack<RejitRequest> _backgroundStack;
  32. private readonly AutoResetEvent _backgroundTranslatorEvent;
  33. private readonly ReaderWriterLock _backgroundTranslatorLock;
  34. private JumpTable _jumpTable;
  35. internal JumpTable JumpTable => _jumpTable;
  36. internal EntryTable<uint> CountTable { get; }
  37. private volatile int _threadCount;
  38. // FIXME: Remove this once the init logic of the emulator will be redone.
  39. public static readonly ManualResetEvent IsReadyForTranslation = new(false);
  40. public Translator(IJitMemoryAllocator allocator, IMemoryManager memory)
  41. {
  42. _allocator = allocator;
  43. _memory = memory;
  44. _funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
  45. _oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
  46. _backgroundSet = new ConcurrentDictionary<ulong, object>();
  47. _backgroundStack = new ConcurrentStack<RejitRequest>();
  48. _backgroundTranslatorEvent = new AutoResetEvent(false);
  49. _backgroundTranslatorLock = new ReaderWriterLock();
  50. CountTable = new EntryTable<uint>();
  51. JitCache.Initialize(allocator);
  52. DirectCallStubs.InitializeStubs();
  53. if (memory.Type.IsHostMapped())
  54. {
  55. NativeSignalHandler.InitializeSignalHandler();
  56. }
  57. }
  58. private void TranslateStackedSubs()
  59. {
  60. while (_threadCount != 0)
  61. {
  62. _backgroundTranslatorLock.AcquireReaderLock(Timeout.Infinite);
  63. if (_backgroundStack.TryPop(out RejitRequest request) &&
  64. _backgroundSet.TryRemove(request.Address, out _))
  65. {
  66. TranslatedFunction func = Translate(
  67. _memory,
  68. _jumpTable,
  69. CountTable,
  70. request.Address,
  71. request.Mode,
  72. highCq: true);
  73. _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) =>
  74. {
  75. EnqueueForDeletion(key, oldFunc);
  76. return func;
  77. });
  78. _jumpTable.RegisterFunction(request.Address, func);
  79. if (PtcProfiler.Enabled)
  80. {
  81. PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true);
  82. }
  83. _backgroundTranslatorLock.ReleaseReaderLock();
  84. }
  85. else
  86. {
  87. _backgroundTranslatorLock.ReleaseReaderLock();
  88. _backgroundTranslatorEvent.WaitOne();
  89. }
  90. }
  91. // Wake up any other background translator threads, to encourage them to exit.
  92. _backgroundTranslatorEvent.Set();
  93. }
  94. public void Execute(State.ExecutionContext context, ulong address)
  95. {
  96. if (Interlocked.Increment(ref _threadCount) == 1)
  97. {
  98. IsReadyForTranslation.WaitOne();
  99. Debug.Assert(_jumpTable == null);
  100. _jumpTable = new JumpTable(_allocator);
  101. if (Ptc.State == PtcState.Enabled)
  102. {
  103. Debug.Assert(_funcs.Count == 0);
  104. Ptc.LoadTranslations(_funcs, _memory, _jumpTable, CountTable);
  105. Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable, CountTable);
  106. }
  107. PtcProfiler.Start();
  108. Ptc.Disable();
  109. // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht
  110. // etc). All threads are normal priority except from the last, which just fills as much of the last core
  111. // as the os lets it with a low priority. If we only have one rejit thread, it should be normal priority
  112. // as highCq code is performance critical.
  113. //
  114. // TODO: Use physical cores rather than logical. This only really makes sense for processors with
  115. // hyperthreading. Requires OS specific code.
  116. int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
  117. int threadCount = Math.Min(4, unboundedThreadCount);
  118. for (int i = 0; i < threadCount; i++)
  119. {
  120. bool last = i != 0 && i == unboundedThreadCount - 1;
  121. Thread backgroundTranslatorThread = new Thread(TranslateStackedSubs)
  122. {
  123. Name = "CPU.BackgroundTranslatorThread." + i,
  124. Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal
  125. };
  126. backgroundTranslatorThread.Start();
  127. }
  128. }
  129. Statistics.InitializeTimer();
  130. NativeInterface.RegisterThread(context, _memory, this);
  131. do
  132. {
  133. address = ExecuteSingle(context, address);
  134. }
  135. while (context.Running && address != 0);
  136. NativeInterface.UnregisterThread();
  137. if (Interlocked.Decrement(ref _threadCount) == 0)
  138. {
  139. _backgroundTranslatorEvent.Set();
  140. ClearJitCache();
  141. DisposePools();
  142. _jumpTable.Dispose();
  143. _jumpTable = null;
  144. CountTable.Dispose();
  145. GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
  146. }
  147. }
  148. public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
  149. {
  150. TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
  151. Statistics.StartTimer();
  152. ulong nextAddr = func.Execute(context);
  153. Statistics.StopTimer(address);
  154. return nextAddr;
  155. }
  156. internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
  157. {
  158. if (!_funcs.TryGetValue(address, out TranslatedFunction func))
  159. {
  160. func = Translate(_memory, _jumpTable, CountTable, address, mode, highCq: false);
  161. TranslatedFunction getFunc = _funcs.GetOrAdd(address, func);
  162. if (getFunc != func)
  163. {
  164. JitCache.Unmap(func.FuncPtr);
  165. func = getFunc;
  166. }
  167. if (PtcProfiler.Enabled)
  168. {
  169. PtcProfiler.AddEntry(address, mode, highCq: false);
  170. }
  171. }
  172. return func;
  173. }
  174. internal static TranslatedFunction Translate(
  175. IMemoryManager memory,
  176. JumpTable jumpTable,
  177. EntryTable<uint> countTable,
  178. ulong address,
  179. ExecutionMode mode,
  180. bool highCq)
  181. {
  182. var context = new ArmEmitterContext(memory, jumpTable, countTable, address, highCq, Aarch32Mode.User);
  183. Logger.StartPass(PassName.Decoding);
  184. Block[] blocks = Decoder.Decode(memory, address, mode, highCq, singleBlock: false);
  185. Logger.EndPass(PassName.Decoding);
  186. PreparePool(highCq ? 1 : 0);
  187. Logger.StartPass(PassName.Translation);
  188. EmitSynchronization(context);
  189. if (blocks[0].Address != address)
  190. {
  191. context.Branch(context.GetLabel(address));
  192. }
  193. ControlFlowGraph cfg = EmitAndGetCFG(context, blocks, out Range funcRange, out Counter<uint> counter);
  194. ulong funcSize = funcRange.End - funcRange.Start;
  195. Logger.EndPass(PassName.Translation);
  196. Logger.StartPass(PassName.RegisterUsage);
  197. RegisterUsage.RunPass(cfg, mode);
  198. Logger.EndPass(PassName.RegisterUsage);
  199. OperandType[] argTypes = new OperandType[] { OperandType.I64 };
  200. CompilerOptions options = highCq ? CompilerOptions.HighCq : CompilerOptions.None;
  201. GuestFunction func;
  202. if (Ptc.State == PtcState.Disabled)
  203. {
  204. func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
  205. ResetPool(highCq ? 1 : 0);
  206. }
  207. else
  208. {
  209. using PtcInfo ptcInfo = new PtcInfo();
  210. func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options, ptcInfo);
  211. ResetPool(highCq ? 1 : 0);
  212. Hash128 hash = Ptc.ComputeHash(memory, address, funcSize);
  213. Ptc.WriteInfoCodeRelocUnwindInfo(address, funcSize, hash, highCq, ptcInfo);
  214. }
  215. return new TranslatedFunction(func, counter, funcSize, highCq);
  216. }
  217. internal static void PreparePool(int groupId = 0)
  218. {
  219. PrepareOperandPool(groupId);
  220. PrepareOperationPool(groupId);
  221. }
  222. internal static void ResetPool(int groupId = 0)
  223. {
  224. ResetOperationPool(groupId);
  225. ResetOperandPool(groupId);
  226. }
  227. internal static void DisposePools()
  228. {
  229. DisposeOperandPools();
  230. DisposeOperationPools();
  231. DisposeBitMapPools();
  232. }
  233. private struct Range
  234. {
  235. public ulong Start { get; }
  236. public ulong End { get; }
  237. public Range(ulong start, ulong end)
  238. {
  239. Start = start;
  240. End = end;
  241. }
  242. }
  243. private static ControlFlowGraph EmitAndGetCFG(
  244. ArmEmitterContext context,
  245. Block[] blocks,
  246. out Range range,
  247. out Counter<uint> counter)
  248. {
  249. counter = null;
  250. ulong rangeStart = ulong.MaxValue;
  251. ulong rangeEnd = 0;
  252. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  253. {
  254. Block block = blocks[blkIndex];
  255. if (!block.Exit)
  256. {
  257. if (rangeStart > block.Address)
  258. {
  259. rangeStart = block.Address;
  260. }
  261. if (rangeEnd < block.EndAddress)
  262. {
  263. rangeEnd = block.EndAddress;
  264. }
  265. }
  266. if (block.Address == context.EntryAddress && !context.HighCq)
  267. {
  268. EmitRejitCheck(context, out counter);
  269. }
  270. context.CurrBlock = block;
  271. context.MarkLabel(context.GetLabel(block.Address));
  272. if (block.Exit)
  273. {
  274. InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address));
  275. }
  276. else
  277. {
  278. for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
  279. {
  280. OpCode opCode = block.OpCodes[opcIndex];
  281. context.CurrOp = opCode;
  282. bool isLastOp = opcIndex == block.OpCodes.Count - 1;
  283. if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
  284. {
  285. EmitSynchronization(context);
  286. }
  287. Operand lblPredicateSkip = null;
  288. if (opCode is OpCode32 op && op.Cond < Condition.Al)
  289. {
  290. lblPredicateSkip = Label();
  291. InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
  292. }
  293. if (opCode.Instruction.Emitter != null)
  294. {
  295. opCode.Instruction.Emitter(context);
  296. }
  297. else
  298. {
  299. throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
  300. }
  301. if (lblPredicateSkip != null)
  302. {
  303. context.MarkLabel(lblPredicateSkip);
  304. }
  305. }
  306. }
  307. }
  308. range = new Range(rangeStart, rangeEnd);
  309. return context.GetControlFlowGraph();
  310. }
  311. internal static void EmitRejitCheck(ArmEmitterContext context, out Counter<uint> counter)
  312. {
  313. const int MinsCallForRejit = 100;
  314. counter = new Counter<uint>(context.CountTable);
  315. Operand lblEnd = Label();
  316. Operand address = Const(ref counter.Value, Ptc.CountTableIndex);
  317. Operand curCount = context.Load(OperandType.I32, address);
  318. Operand count = context.Add(curCount, Const(1));
  319. context.Store(address, count);
  320. context.BranchIf(lblEnd, curCount, Const(MinsCallForRejit), Comparison.NotEqual, BasicBlockFrequency.Cold);
  321. context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)), Const(context.EntryAddress));
  322. context.MarkLabel(lblEnd);
  323. }
  324. internal static void EmitSynchronization(EmitterContext context)
  325. {
  326. long countOffs = NativeContext.GetCounterOffset();
  327. Operand lblNonZero = Label();
  328. Operand lblExit = Label();
  329. Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
  330. Operand count = context.Load(OperandType.I32, countAddr);
  331. context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold);
  332. Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization)));
  333. context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold);
  334. context.Return(Const(0L));
  335. context.MarkLabel(lblNonZero);
  336. count = context.Subtract(count, Const(1));
  337. context.Store(countAddr, count);
  338. context.MarkLabel(lblExit);
  339. }
  340. public void InvalidateJitCacheRegion(ulong address, ulong size)
  341. {
  342. // If rejit is running, stop it as it may be trying to rejit a function on the invalidated region.
  343. ClearRejitQueue(allowRequeue: true);
  344. // TODO: Completely remove functions overlapping the specified range from the cache.
  345. }
  346. internal void EnqueueForRejit(ulong guestAddress, ExecutionMode mode)
  347. {
  348. if (_backgroundSet.TryAdd(guestAddress, null))
  349. {
  350. _backgroundStack.Push(new RejitRequest(guestAddress, mode));
  351. _backgroundTranslatorEvent.Set();
  352. }
  353. }
  354. private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func)
  355. {
  356. _oldFuncs.Enqueue(new(guestAddress, func));
  357. }
  358. private void ClearJitCache()
  359. {
  360. // Ensure no attempt will be made to compile new functions due to rejit.
  361. ClearRejitQueue(allowRequeue: false);
  362. foreach (var func in _funcs.Values)
  363. {
  364. JitCache.Unmap(func.FuncPtr);
  365. func.CallCounter?.Dispose();
  366. }
  367. _funcs.Clear();
  368. while (_oldFuncs.TryDequeue(out var kv))
  369. {
  370. JitCache.Unmap(kv.Value.FuncPtr);
  371. kv.Value.CallCounter?.Dispose();
  372. }
  373. }
  374. private void ClearRejitQueue(bool allowRequeue)
  375. {
  376. _backgroundTranslatorLock.AcquireWriterLock(Timeout.Infinite);
  377. if (allowRequeue)
  378. {
  379. while (_backgroundStack.TryPop(out var request))
  380. {
  381. if (_funcs.TryGetValue(request.Address, out var func) && func.CallCounter != null)
  382. {
  383. Volatile.Write(ref func.CallCounter.Value, 0);
  384. }
  385. _backgroundSet.TryRemove(request.Address, out _);
  386. }
  387. }
  388. else
  389. {
  390. _backgroundStack.Clear();
  391. }
  392. _backgroundTranslatorLock.ReleaseWriterLock();
  393. }
  394. }
  395. }