ASoftFloat.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. using System;
  2. namespace ChocolArm64.Instruction
  3. {
  4. static class ASoftFloat
  5. {
  6. static ASoftFloat()
  7. {
  8. InvSqrtEstimateTable = BuildInvSqrtEstimateTable();
  9. RecipEstimateTable = BuildRecipEstimateTable();
  10. }
  11. private static readonly byte[] RecipEstimateTable;
  12. private static readonly byte[] InvSqrtEstimateTable;
  13. private static byte[] BuildInvSqrtEstimateTable()
  14. {
  15. byte[] Table = new byte[512];
  16. for (ulong index = 128; index < 512; index++)
  17. {
  18. ulong a = index;
  19. if (a < 256)
  20. {
  21. a = (a << 1) + 1;
  22. }
  23. else
  24. {
  25. a = (a | 1) << 1;
  26. }
  27. ulong b = 256;
  28. while (a * (b + 1) * (b + 1) < (1ul << 28))
  29. {
  30. b++;
  31. }
  32. b = (b + 1) >> 1;
  33. Table[index] = (byte)(b & 0xFF);
  34. }
  35. return Table;
  36. }
  37. private static byte[] BuildRecipEstimateTable()
  38. {
  39. byte[] Table = new byte[256];
  40. for (ulong index = 0; index < 256; index++)
  41. {
  42. ulong a = index | 0x100;
  43. a = (a << 1) + 1;
  44. ulong b = 0x80000 / a;
  45. b = (b + 1) >> 1;
  46. Table[index] = (byte)(b & 0xFF);
  47. }
  48. return Table;
  49. }
  50. public static float InvSqrtEstimate(float x)
  51. {
  52. return (float)InvSqrtEstimate((double)x);
  53. }
  54. public static double InvSqrtEstimate(double x)
  55. {
  56. ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x);
  57. ulong x_sign = x_bits & 0x8000000000000000;
  58. long x_exp = (long)((x_bits >> 52) & 0x7FF);
  59. ulong scaled = x_bits & ((1ul << 52) - 1);
  60. if (x_exp == 0x7FF && scaled != 0)
  61. {
  62. // NaN
  63. return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000));
  64. }
  65. if (x_exp == 0)
  66. {
  67. if (scaled == 0)
  68. {
  69. // Zero -> Infinity
  70. return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000));
  71. }
  72. // Denormal
  73. while ((scaled & (1 << 51)) == 0)
  74. {
  75. scaled <<= 1;
  76. x_exp--;
  77. }
  78. scaled <<= 1;
  79. }
  80. if (x_sign != 0)
  81. {
  82. // Negative -> NaN
  83. return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000);
  84. }
  85. if (x_exp == 0x7ff && scaled == 0)
  86. {
  87. // Infinity -> Zero
  88. return BitConverter.Int64BitsToDouble((long)x_sign);
  89. }
  90. if (((ulong)x_exp & 1) == 1)
  91. {
  92. scaled >>= 45;
  93. scaled &= 0xFF;
  94. scaled |= 0x80;
  95. }
  96. else
  97. {
  98. scaled >>= 44;
  99. scaled &= 0xFF;
  100. scaled |= 0x100;
  101. }
  102. ulong result_exp = ((ulong)(3068 - x_exp) / 2) & 0x7FF;
  103. ulong estimate = (ulong)InvSqrtEstimateTable[scaled];
  104. ulong fraction = estimate << 44;
  105. ulong result = x_sign | (result_exp << 52) | fraction;
  106. return BitConverter.Int64BitsToDouble((long)result);
  107. }
  108. public static float RecipEstimate(float x)
  109. {
  110. return (float)RecipEstimate((double)x);
  111. }
  112. public static double RecipEstimate(double x)
  113. {
  114. ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x);
  115. ulong x_sign = x_bits & 0x8000000000000000;
  116. ulong x_exp = (x_bits >> 52) & 0x7FF;
  117. ulong scaled = x_bits & ((1ul << 52) - 1);
  118. if (x_exp >= 2045)
  119. {
  120. if (x_exp == 0x7ff && scaled != 0)
  121. {
  122. // NaN
  123. return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000));
  124. }
  125. // Infinity, or Out of range -> Zero
  126. return BitConverter.Int64BitsToDouble((long)x_sign);
  127. }
  128. if (x_exp == 0)
  129. {
  130. if (scaled == 0)
  131. {
  132. // Zero -> Infinity
  133. return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000));
  134. }
  135. // Denormal
  136. if ((scaled & (1ul << 51)) == 0)
  137. {
  138. x_exp = ~0ul;
  139. scaled <<= 2;
  140. }
  141. else
  142. {
  143. scaled <<= 1;
  144. }
  145. }
  146. scaled >>= 44;
  147. scaled &= 0xFF;
  148. ulong result_exp = (2045 - x_exp) & 0x7FF;
  149. ulong estimate = (ulong)RecipEstimateTable[scaled];
  150. ulong fraction = estimate << 44;
  151. if (result_exp == 0)
  152. {
  153. fraction >>= 1;
  154. fraction |= 1ul << 51;
  155. }
  156. else if (result_exp == 0x7FF)
  157. {
  158. result_exp = 0;
  159. fraction >>= 2;
  160. fraction |= 1ul << 50;
  161. }
  162. ulong result = x_sign | (result_exp << 52) | fraction;
  163. return BitConverter.Int64BitsToDouble((long)result);
  164. }
  165. public static float RecipStep(float op1, float op2)
  166. {
  167. return (float)RecipStep((double)op1, (double)op2);
  168. }
  169. public static double RecipStep(double op1, double op2)
  170. {
  171. op1 = -op1;
  172. ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
  173. ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
  174. ulong op1_sign = op1_bits & 0x8000000000000000;
  175. ulong op2_sign = op2_bits & 0x8000000000000000;
  176. ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF;
  177. ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF;
  178. bool inf1 = op1_other == 0x7FF0000000000000;
  179. bool inf2 = op2_other == 0x7FF0000000000000;
  180. bool zero1 = op1_other == 0;
  181. bool zero2 = op2_other == 0;
  182. if ((inf1 && zero2) || (zero1 && inf2))
  183. {
  184. return 2.0;
  185. }
  186. else if (inf1 || inf2)
  187. {
  188. // Infinity
  189. return BitConverter.Int64BitsToDouble((long)(0x7FF0000000000000 | (op1_sign ^ op2_sign)));
  190. }
  191. return 2.0 + op1 * op2;
  192. }
  193. public static float ConvertHalfToSingle(ushort x)
  194. {
  195. uint x_sign = (uint)(x >> 15) & 0x0001;
  196. uint x_exp = (uint)(x >> 10) & 0x001F;
  197. uint x_mantissa = (uint)x & 0x03FF;
  198. if (x_exp == 0 && x_mantissa == 0)
  199. {
  200. // Zero
  201. return BitConverter.Int32BitsToSingle((int)(x_sign << 31));
  202. }
  203. if (x_exp == 0x1F)
  204. {
  205. // NaN or Infinity
  206. return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | 0x7F800000 | (x_mantissa << 13)));
  207. }
  208. int exponent = (int)x_exp - 15;
  209. if (x_exp == 0)
  210. {
  211. // Denormal
  212. x_mantissa <<= 1;
  213. while ((x_mantissa & 0x0400) == 0)
  214. {
  215. x_mantissa <<= 1;
  216. exponent--;
  217. }
  218. x_mantissa &= 0x03FF;
  219. }
  220. uint new_exp = (uint)((exponent + 127) & 0xFF) << 23;
  221. return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13)));
  222. }
  223. public static float MaxNum(float op1, float op2)
  224. {
  225. uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
  226. uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
  227. if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
  228. {
  229. op1 = float.NegativeInfinity;
  230. }
  231. else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
  232. {
  233. op2 = float.NegativeInfinity;
  234. }
  235. return Max(op1, op2);
  236. }
  237. public static double MaxNum(double op1, double op2)
  238. {
  239. ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
  240. ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
  241. if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
  242. {
  243. op1 = double.NegativeInfinity;
  244. }
  245. else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
  246. {
  247. op2 = double.NegativeInfinity;
  248. }
  249. return Max(op1, op2);
  250. }
  251. public static float Max(float op1, float op2)
  252. {
  253. // Fast path
  254. if (op1 > op2)
  255. {
  256. return op1;
  257. }
  258. if (op1 < op2 || (op1 == op2 && op2 != 0))
  259. {
  260. return op2;
  261. }
  262. uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
  263. uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
  264. // Handle NaN cases
  265. if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits))
  266. {
  267. return BitConverter.Int32BitsToSingle((int)op_bits);
  268. }
  269. // Return the most positive zero
  270. if ((op1_bits & op2_bits) == 0x80000000u)
  271. {
  272. return BitConverter.Int32BitsToSingle(int.MinValue);
  273. }
  274. return 0;
  275. }
  276. public static double Max(double op1, double op2)
  277. {
  278. // Fast path
  279. if (op1 > op2)
  280. {
  281. return op1;
  282. }
  283. if (op1 < op2 || (op1 == op2 && op2 != 0))
  284. {
  285. return op2;
  286. }
  287. ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
  288. ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
  289. // Handle NaN cases
  290. if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits))
  291. {
  292. return BitConverter.Int64BitsToDouble((long)op_bits);
  293. }
  294. // Return the most positive zero
  295. if ((op1_bits & op2_bits) == 0x8000000000000000ul)
  296. {
  297. return BitConverter.Int64BitsToDouble(long.MinValue);
  298. }
  299. return 0;
  300. }
  301. public static float MinNum(float op1, float op2)
  302. {
  303. uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
  304. uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
  305. if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
  306. {
  307. op1 = float.PositiveInfinity;
  308. }
  309. else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
  310. {
  311. op2 = float.PositiveInfinity;
  312. }
  313. return Min(op1, op2);
  314. }
  315. public static double MinNum(double op1, double op2)
  316. {
  317. ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
  318. ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
  319. if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
  320. {
  321. op1 = double.PositiveInfinity;
  322. }
  323. else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
  324. {
  325. op2 = double.PositiveInfinity;
  326. }
  327. return Min(op1, op2);
  328. }
  329. public static float Min(float op1, float op2)
  330. {
  331. // Fast path
  332. if (op1 < op2)
  333. {
  334. return op1;
  335. }
  336. if (op1 > op2 || (op1 == op2 && op2 != 0))
  337. {
  338. return op2;
  339. }
  340. uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
  341. uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
  342. // Handle NaN cases
  343. if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits))
  344. {
  345. return BitConverter.Int32BitsToSingle((int)op_bits);
  346. }
  347. // Return the most negative zero
  348. if ((op1_bits | op2_bits) == 0x80000000u)
  349. {
  350. return BitConverter.Int32BitsToSingle(int.MinValue);
  351. }
  352. return 0;
  353. }
  354. public static double Min(double op1, double op2)
  355. {
  356. // Fast path
  357. if (op1 < op2)
  358. {
  359. return op1;
  360. }
  361. if (op1 > op2 || (op1 == op2 && op2 != 0))
  362. {
  363. return op2;
  364. }
  365. ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
  366. ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
  367. // Handle NaN cases
  368. if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits))
  369. {
  370. return BitConverter.Int64BitsToDouble((long)op_bits);
  371. }
  372. // Return the most negative zero
  373. if ((op1_bits | op2_bits) == 0x8000000000000000ul)
  374. {
  375. return BitConverter.Int64BitsToDouble(long.MinValue);
  376. }
  377. return 0;
  378. }
  379. private static bool ProcessNaNs(uint op1_bits, uint op2_bits, out uint op_bits)
  380. {
  381. if (IsSNaN(op1_bits))
  382. {
  383. op_bits = op1_bits | (1u << 22); // op1 is SNaN, return QNaN op1
  384. }
  385. else if (IsSNaN(op2_bits))
  386. {
  387. op_bits = op2_bits | (1u << 22); // op2 is SNaN, return QNaN op2
  388. }
  389. else if (IsQNaN(op1_bits))
  390. {
  391. op_bits = op1_bits; // op1 is QNaN, return QNaN op1
  392. }
  393. else if (IsQNaN(op2_bits))
  394. {
  395. op_bits = op2_bits; // op2 is QNaN, return QNaN op2
  396. }
  397. else
  398. {
  399. op_bits = 0;
  400. return false;
  401. }
  402. return true;
  403. }
  404. private static bool ProcessNaNs(ulong op1_bits, ulong op2_bits, out ulong op_bits)
  405. {
  406. if (IsSNaN(op1_bits))
  407. {
  408. op_bits = op1_bits | (1ul << 51); // op1 is SNaN, return QNaN op1
  409. }
  410. else if (IsSNaN(op2_bits))
  411. {
  412. op_bits = op2_bits | (1ul << 51); // op2 is SNaN, return QNaN op2
  413. }
  414. else if (IsQNaN(op1_bits))
  415. {
  416. op_bits = op1_bits; // op1 is QNaN, return QNaN op1
  417. }
  418. else if (IsQNaN(op2_bits))
  419. {
  420. op_bits = op2_bits; // op2 is QNaN, return QNaN op2
  421. }
  422. else
  423. {
  424. op_bits = 0;
  425. return false;
  426. }
  427. return true;
  428. }
  429. private static bool IsQNaN(uint op_bits)
  430. {
  431. return (op_bits & 0x007FFFFF) != 0 &&
  432. (op_bits & 0x7FC00000) == 0x7FC00000;
  433. }
  434. private static bool IsQNaN(ulong op_bits)
  435. {
  436. return (op_bits & 0x000FFFFFFFFFFFFF) != 0 &&
  437. (op_bits & 0x7FF8000000000000) == 0x7FF8000000000000;
  438. }
  439. private static bool IsSNaN(uint op_bits)
  440. {
  441. return (op_bits & 0x007FFFFF) != 0 &&
  442. (op_bits & 0x7FC00000) == 0x7F800000;
  443. }
  444. private static bool IsSNaN(ulong op_bits)
  445. {
  446. return (op_bits & 0x000FFFFFFFFFFFFF) != 0 &&
  447. (op_bits & 0x7FF8000000000000) == 0x7FF0000000000000;
  448. }
  449. }
  450. }