Translator.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. using ARMeilleure.Common;
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.Diagnostics;
  4. using ARMeilleure.Instructions;
  5. using ARMeilleure.IntermediateRepresentation;
  6. using ARMeilleure.Memory;
  7. using ARMeilleure.State;
  8. using ARMeilleure.Translation.Cache;
  9. using ARMeilleure.Translation.PTC;
  10. using Ryujinx.Common;
  11. using System;
  12. using System.Collections.Concurrent;
  13. using System.Collections.Generic;
  14. using System.Diagnostics;
  15. using System.Runtime;
  16. using System.Threading;
  17. using static ARMeilleure.Common.BitMapPool;
  18. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  19. using static ARMeilleure.IntermediateRepresentation.OperationHelper;
  20. namespace ARMeilleure.Translation
  21. {
  22. public class Translator
  23. {
  24. private const int CountTableCapacity = 4 * 1024 * 1024;
  25. private readonly IJitMemoryAllocator _allocator;
  26. private readonly IMemoryManager _memory;
  27. private readonly ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
  28. private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
  29. private readonly ConcurrentDictionary<ulong, object> _backgroundSet;
  30. private readonly ConcurrentStack<RejitRequest> _backgroundStack;
  31. private readonly AutoResetEvent _backgroundTranslatorEvent;
  32. private readonly ReaderWriterLock _backgroundTranslatorLock;
  33. private JumpTable _jumpTable;
  34. internal JumpTable JumpTable => _jumpTable;
  35. internal EntryTable<uint> CountTable { get; }
  36. private volatile int _threadCount;
  37. // FIXME: Remove this once the init logic of the emulator will be redone.
  38. public static readonly ManualResetEvent IsReadyForTranslation = new(false);
  39. public Translator(IJitMemoryAllocator allocator, IMemoryManager memory)
  40. {
  41. _allocator = allocator;
  42. _memory = memory;
  43. _funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
  44. _oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
  45. _backgroundSet = new ConcurrentDictionary<ulong, object>();
  46. _backgroundStack = new ConcurrentStack<RejitRequest>();
  47. _backgroundTranslatorEvent = new AutoResetEvent(false);
  48. _backgroundTranslatorLock = new ReaderWriterLock();
  49. CountTable = new EntryTable<uint>();
  50. JitCache.Initialize(allocator);
  51. DirectCallStubs.InitializeStubs();
  52. }
  53. private void TranslateStackedSubs()
  54. {
  55. while (_threadCount != 0)
  56. {
  57. _backgroundTranslatorLock.AcquireReaderLock(Timeout.Infinite);
  58. if (_backgroundStack.TryPop(out RejitRequest request) &&
  59. _backgroundSet.TryRemove(request.Address, out _))
  60. {
  61. TranslatedFunction func = Translate(
  62. _memory,
  63. _jumpTable,
  64. CountTable,
  65. request.Address,
  66. request.Mode,
  67. highCq: true);
  68. _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) =>
  69. {
  70. EnqueueForDeletion(key, oldFunc);
  71. return func;
  72. });
  73. _jumpTable.RegisterFunction(request.Address, func);
  74. if (PtcProfiler.Enabled)
  75. {
  76. PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true);
  77. }
  78. _backgroundTranslatorLock.ReleaseReaderLock();
  79. }
  80. else
  81. {
  82. _backgroundTranslatorLock.ReleaseReaderLock();
  83. _backgroundTranslatorEvent.WaitOne();
  84. }
  85. }
  86. // Wake up any other background translator threads, to encourage them to exit.
  87. _backgroundTranslatorEvent.Set();
  88. }
  89. public void Execute(State.ExecutionContext context, ulong address)
  90. {
  91. if (Interlocked.Increment(ref _threadCount) == 1)
  92. {
  93. IsReadyForTranslation.WaitOne();
  94. Debug.Assert(_jumpTable == null);
  95. _jumpTable = new JumpTable(_allocator);
  96. if (Ptc.State == PtcState.Enabled)
  97. {
  98. Debug.Assert(_funcs.Count == 0);
  99. Ptc.LoadTranslations(_funcs, _memory, _jumpTable, CountTable);
  100. Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable, CountTable);
  101. }
  102. PtcProfiler.Start();
  103. Ptc.Disable();
  104. // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht
  105. // etc). All threads are normal priority except from the last, which just fills as much of the last core
  106. // as the os lets it with a low priority. If we only have one rejit thread, it should be normal priority
  107. // as highCq code is performance critical.
  108. //
  109. // TODO: Use physical cores rather than logical. This only really makes sense for processors with
  110. // hyperthreading. Requires OS specific code.
  111. int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
  112. int threadCount = Math.Min(4, unboundedThreadCount);
  113. for (int i = 0; i < threadCount; i++)
  114. {
  115. bool last = i != 0 && i == unboundedThreadCount - 1;
  116. Thread backgroundTranslatorThread = new Thread(TranslateStackedSubs)
  117. {
  118. Name = "CPU.BackgroundTranslatorThread." + i,
  119. Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal
  120. };
  121. backgroundTranslatorThread.Start();
  122. }
  123. }
  124. Statistics.InitializeTimer();
  125. NativeInterface.RegisterThread(context, _memory, this);
  126. do
  127. {
  128. address = ExecuteSingle(context, address);
  129. }
  130. while (context.Running && address != 0);
  131. NativeInterface.UnregisterThread();
  132. if (Interlocked.Decrement(ref _threadCount) == 0)
  133. {
  134. _backgroundTranslatorEvent.Set();
  135. ClearJitCache();
  136. DisposePools();
  137. _jumpTable.Dispose();
  138. _jumpTable = null;
  139. CountTable.Dispose();
  140. GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
  141. }
  142. }
  143. public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
  144. {
  145. TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
  146. Statistics.StartTimer();
  147. ulong nextAddr = func.Execute(context);
  148. Statistics.StopTimer(address);
  149. return nextAddr;
  150. }
  151. internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
  152. {
  153. if (!_funcs.TryGetValue(address, out TranslatedFunction func))
  154. {
  155. func = Translate(_memory, _jumpTable, CountTable, address, mode, highCq: false);
  156. TranslatedFunction getFunc = _funcs.GetOrAdd(address, func);
  157. if (getFunc != func)
  158. {
  159. JitCache.Unmap(func.FuncPtr);
  160. func = getFunc;
  161. }
  162. if (PtcProfiler.Enabled)
  163. {
  164. PtcProfiler.AddEntry(address, mode, highCq: false);
  165. }
  166. }
  167. return func;
  168. }
  169. internal static TranslatedFunction Translate(
  170. IMemoryManager memory,
  171. JumpTable jumpTable,
  172. EntryTable<uint> countTable,
  173. ulong address,
  174. ExecutionMode mode,
  175. bool highCq)
  176. {
  177. var context = new ArmEmitterContext(memory, jumpTable, countTable, address, highCq, Aarch32Mode.User);
  178. Logger.StartPass(PassName.Decoding);
  179. Block[] blocks = Decoder.Decode(memory, address, mode, highCq, singleBlock: false);
  180. Logger.EndPass(PassName.Decoding);
  181. PreparePool(highCq ? 1 : 0);
  182. Logger.StartPass(PassName.Translation);
  183. EmitSynchronization(context);
  184. if (blocks[0].Address != address)
  185. {
  186. context.Branch(context.GetLabel(address));
  187. }
  188. ControlFlowGraph cfg = EmitAndGetCFG(context, blocks, out Range funcRange, out Counter<uint> counter);
  189. ulong funcSize = funcRange.End - funcRange.Start;
  190. Logger.EndPass(PassName.Translation);
  191. Logger.StartPass(PassName.RegisterUsage);
  192. RegisterUsage.RunPass(cfg, mode);
  193. Logger.EndPass(PassName.RegisterUsage);
  194. OperandType[] argTypes = new OperandType[] { OperandType.I64 };
  195. CompilerOptions options = highCq ? CompilerOptions.HighCq : CompilerOptions.None;
  196. GuestFunction func;
  197. if (Ptc.State == PtcState.Disabled)
  198. {
  199. func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
  200. ResetPool(highCq ? 1 : 0);
  201. }
  202. else
  203. {
  204. using PtcInfo ptcInfo = new PtcInfo();
  205. func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options, ptcInfo);
  206. ResetPool(highCq ? 1 : 0);
  207. Hash128 hash = Ptc.ComputeHash(memory, address, funcSize);
  208. Ptc.WriteInfoCodeRelocUnwindInfo(address, funcSize, hash, highCq, ptcInfo);
  209. }
  210. return new TranslatedFunction(func, counter, funcSize, highCq);
  211. }
  212. internal static void PreparePool(int groupId = 0)
  213. {
  214. PrepareOperandPool(groupId);
  215. PrepareOperationPool(groupId);
  216. }
  217. internal static void ResetPool(int groupId = 0)
  218. {
  219. ResetOperationPool(groupId);
  220. ResetOperandPool(groupId);
  221. }
  222. internal static void DisposePools()
  223. {
  224. DisposeOperandPools();
  225. DisposeOperationPools();
  226. DisposeBitMapPools();
  227. }
  228. private struct Range
  229. {
  230. public ulong Start { get; }
  231. public ulong End { get; }
  232. public Range(ulong start, ulong end)
  233. {
  234. Start = start;
  235. End = end;
  236. }
  237. }
  238. private static ControlFlowGraph EmitAndGetCFG(
  239. ArmEmitterContext context,
  240. Block[] blocks,
  241. out Range range,
  242. out Counter<uint> counter)
  243. {
  244. counter = null;
  245. ulong rangeStart = ulong.MaxValue;
  246. ulong rangeEnd = 0;
  247. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  248. {
  249. Block block = blocks[blkIndex];
  250. if (!block.Exit)
  251. {
  252. if (rangeStart > block.Address)
  253. {
  254. rangeStart = block.Address;
  255. }
  256. if (rangeEnd < block.EndAddress)
  257. {
  258. rangeEnd = block.EndAddress;
  259. }
  260. }
  261. if (block.Address == context.EntryAddress && !context.HighCq)
  262. {
  263. EmitRejitCheck(context, out counter);
  264. }
  265. context.CurrBlock = block;
  266. context.MarkLabel(context.GetLabel(block.Address));
  267. if (block.Exit)
  268. {
  269. InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address));
  270. }
  271. else
  272. {
  273. for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
  274. {
  275. OpCode opCode = block.OpCodes[opcIndex];
  276. context.CurrOp = opCode;
  277. bool isLastOp = opcIndex == block.OpCodes.Count - 1;
  278. if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
  279. {
  280. EmitSynchronization(context);
  281. }
  282. Operand lblPredicateSkip = null;
  283. if (opCode is OpCode32 op && op.Cond < Condition.Al)
  284. {
  285. lblPredicateSkip = Label();
  286. InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
  287. }
  288. if (opCode.Instruction.Emitter != null)
  289. {
  290. opCode.Instruction.Emitter(context);
  291. }
  292. else
  293. {
  294. throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
  295. }
  296. if (lblPredicateSkip != null)
  297. {
  298. context.MarkLabel(lblPredicateSkip);
  299. }
  300. }
  301. }
  302. }
  303. range = new Range(rangeStart, rangeEnd);
  304. return context.GetControlFlowGraph();
  305. }
  306. internal static void EmitRejitCheck(ArmEmitterContext context, out Counter<uint> counter)
  307. {
  308. const int MinsCallForRejit = 100;
  309. counter = new Counter<uint>(context.CountTable);
  310. Operand lblEnd = Label();
  311. Operand address = Const(ref counter.Value, Ptc.CountTableIndex);
  312. Operand curCount = context.Load(OperandType.I32, address);
  313. Operand count = context.Add(curCount, Const(1));
  314. context.Store(address, count);
  315. context.BranchIf(lblEnd, curCount, Const(MinsCallForRejit), Comparison.NotEqual, BasicBlockFrequency.Cold);
  316. context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)), Const(context.EntryAddress));
  317. context.MarkLabel(lblEnd);
  318. }
  319. internal static void EmitSynchronization(EmitterContext context)
  320. {
  321. long countOffs = NativeContext.GetCounterOffset();
  322. Operand lblNonZero = Label();
  323. Operand lblExit = Label();
  324. Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
  325. Operand count = context.Load(OperandType.I32, countAddr);
  326. context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold);
  327. Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization)));
  328. context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold);
  329. context.Return(Const(0L));
  330. context.MarkLabel(lblNonZero);
  331. count = context.Subtract(count, Const(1));
  332. context.Store(countAddr, count);
  333. context.MarkLabel(lblExit);
  334. }
  335. public void InvalidateJitCacheRegion(ulong address, ulong size)
  336. {
  337. // If rejit is running, stop it as it may be trying to rejit a function on the invalidated region.
  338. ClearRejitQueue(allowRequeue: true);
  339. // TODO: Completely remove functions overlapping the specified range from the cache.
  340. }
  341. internal void EnqueueForRejit(ulong guestAddress, ExecutionMode mode)
  342. {
  343. if (_backgroundSet.TryAdd(guestAddress, null))
  344. {
  345. _backgroundStack.Push(new RejitRequest(guestAddress, mode));
  346. _backgroundTranslatorEvent.Set();
  347. }
  348. }
  349. private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func)
  350. {
  351. _oldFuncs.Enqueue(new(guestAddress, func));
  352. }
  353. private void ClearJitCache()
  354. {
  355. // Ensure no attempt will be made to compile new functions due to rejit.
  356. ClearRejitQueue(allowRequeue: false);
  357. foreach (var func in _funcs.Values)
  358. {
  359. JitCache.Unmap(func.FuncPtr);
  360. func.CallCounter?.Dispose();
  361. }
  362. _funcs.Clear();
  363. while (_oldFuncs.TryDequeue(out var kv))
  364. {
  365. JitCache.Unmap(kv.Value.FuncPtr);
  366. kv.Value.CallCounter?.Dispose();
  367. }
  368. }
  369. private void ClearRejitQueue(bool allowRequeue)
  370. {
  371. _backgroundTranslatorLock.AcquireWriterLock(Timeout.Infinite);
  372. if (allowRequeue)
  373. {
  374. while (_backgroundStack.TryPop(out var request))
  375. {
  376. if (_funcs.TryGetValue(request.Address, out var func) && func.CallCounter != null)
  377. {
  378. Volatile.Write(ref func.CallCounter.Value, 0);
  379. }
  380. _backgroundSet.TryRemove(request.Address, out _);
  381. }
  382. }
  383. else
  384. {
  385. _backgroundStack.Clear();
  386. }
  387. _backgroundTranslatorLock.ReleaseWriterLock();
  388. }
  389. }
  390. }