|
|
@@ -186,18 +186,101 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Fabs_S(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitScalarUnaryOpF(context, () =>
|
|
|
+ if (Optimizations.UseSse2)
|
|
|
{
|
|
|
- EmitUnaryMathCall(context, nameof(Math.Abs));
|
|
|
- });
|
|
|
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
|
|
+
|
|
|
+ if (op.Size == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSsv = new Type[] { typeof(float) };
|
|
|
+ Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R4(-0f);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ EmitVectorZero32_128(context, op.Rd);
|
|
|
+ }
|
|
|
+ else /* if (op.Size == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSsv = new Type[] { typeof(double) };
|
|
|
+ Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R8(-0d);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitScalarUnaryOpF(context, () =>
|
|
|
+ {
|
|
|
+ EmitUnaryMathCall(context, nameof(Math.Abs));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Fabs_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorUnaryOpF(context, () =>
|
|
|
+ if (Optimizations.UseSse2)
|
|
|
{
|
|
|
- EmitUnaryMathCall(context, nameof(Math.Abs));
|
|
|
- });
|
|
|
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
|
|
+
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
+
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSav = new Type[] { typeof(float) };
|
|
|
+ Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R4(-0f);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSav = new Type[] { typeof(double) };
|
|
|
+ Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R8(-0d);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorUnaryOpF(context, () =>
|
|
|
+ {
|
|
|
+ EmitUnaryMathCall(context, nameof(Math.Abs));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Fadd_S(ILEmitterCtx context)
|
|
|
@@ -283,7 +366,7 @@ namespace ChocolArm64.Instructions
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public static void Fmadd_S(ILEmitterCtx context)
|
|
|
+ public static void Fmadd_S(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
@@ -450,22 +533,118 @@ namespace ChocolArm64.Instructions
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- public static void Fmla_V(ILEmitterCtx context)
|
|
|
+ public static void Fmla_V(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
- EmitVectorTernaryOpF(context, () =>
|
|
|
+ if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
- context.Emit(OpCodes.Mul);
|
|
|
- context.Emit(OpCodes.Add);
|
|
|
- });
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
+
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
+
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rd);
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+ context.EmitLdvec(op.Rm);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rd);
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rm);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorTernaryOpF(context, () =>
|
|
|
+ {
|
|
|
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- public static void Fmla_Ve(ILEmitterCtx context)
|
|
|
+ public static void Fmla_Ve(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
- EmitVectorTernaryOpByElemF(context, () =>
|
|
|
+ if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
- context.Emit(OpCodes.Mul);
|
|
|
- context.Emit(OpCodes.Add);
|
|
|
- });
|
|
|
+ OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
|
|
|
+
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
+
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
|
|
|
+ Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rd);
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rm);
|
|
|
+ context.Emit(OpCodes.Dup);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
|
|
|
+ Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rd);
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rm);
|
|
|
+ context.Emit(OpCodes.Dup);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(op.Index | op.Index << 1);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorTernaryOpByElemF(context, () =>
|
|
|
+ {
|
|
|
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Fmls_Se(ILEmitterCtx context)
|
|
|
@@ -477,25 +656,121 @@ namespace ChocolArm64.Instructions
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- public static void Fmls_V(ILEmitterCtx context)
|
|
|
+ public static void Fmls_V(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
- EmitVectorTernaryOpF(context, () =>
|
|
|
+ if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
- context.Emit(OpCodes.Mul);
|
|
|
- context.Emit(OpCodes.Sub);
|
|
|
- });
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
+
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
+
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rd);
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+ context.EmitLdvec(op.Rm);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rd);
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rm);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorTernaryOpF(context, () =>
|
|
|
+ {
|
|
|
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- public static void Fmls_Ve(ILEmitterCtx context)
|
|
|
+ public static void Fmls_Ve(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
- EmitVectorTernaryOpByElemF(context, () =>
|
|
|
+ if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
- context.Emit(OpCodes.Mul);
|
|
|
- context.Emit(OpCodes.Sub);
|
|
|
- });
|
|
|
+ OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
|
|
|
+
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
+
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
|
|
|
+ Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rd);
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rm);
|
|
|
+ context.Emit(OpCodes.Dup);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
|
|
|
+ Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rd);
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rm);
|
|
|
+ context.Emit(OpCodes.Dup);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(op.Index | op.Index << 1);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorTernaryOpByElemF(context, () =>
|
|
|
+ {
|
|
|
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- public static void Fmsub_S(ILEmitterCtx context)
|
|
|
+ public static void Fmsub_S(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
@@ -580,7 +855,59 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Fmul_Ve(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul));
|
|
|
+ if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
+ {
|
|
|
+ OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
|
|
|
+
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
+
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
|
|
|
+ Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rm);
|
|
|
+ context.Emit(OpCodes.Dup);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
|
|
|
+ Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rm);
|
|
|
+ context.Emit(OpCodes.Dup);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(op.Index | op.Index << 1);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorBinaryOpByElemF(context, () =>
|
|
|
+ {
|
|
|
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul));
|
|
|
+ });
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Fmulx_S(ILEmitterCtx context)
|
|
|
@@ -607,22 +934,105 @@ namespace ChocolArm64.Instructions
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- public static void Fmulx_Ve(ILEmitterCtx context)
|
|
|
+ public static void Fmulx_Ve(ILEmitterCtx context)
|
|
|
+ {
|
|
|
+ EmitVectorBinaryOpByElemF(context, () =>
|
|
|
+ {
|
|
|
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX));
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void Fneg_S(ILEmitterCtx context)
|
|
|
+ {
|
|
|
+ if (Optimizations.UseSse2)
|
|
|
+ {
|
|
|
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
|
|
+
|
|
|
+ if (op.Size == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSsv = new Type[] { typeof(float) };
|
|
|
+ Type[] typesXor = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R4(-0f);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ EmitVectorZero32_128(context, op.Rd);
|
|
|
+ }
|
|
|
+ else /* if (op.Size == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSsv = new Type[] { typeof(double) };
|
|
|
+ Type[] typesXor = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R8(-0d);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void Fneg_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorBinaryOpByElemF(context, () =>
|
|
|
+ if (Optimizations.UseSse2)
|
|
|
{
|
|
|
- EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX));
|
|
|
- });
|
|
|
- }
|
|
|
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
|
|
|
|
|
- public static void Fneg_S(ILEmitterCtx context)
|
|
|
- {
|
|
|
- EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg));
|
|
|
- }
|
|
|
+ int sizeF = op.Size & 1;
|
|
|
|
|
|
- public static void Fneg_V(ILEmitterCtx context)
|
|
|
- {
|
|
|
- EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg));
|
|
|
+ if (sizeF == 0)
|
|
|
+ {
|
|
|
+ Type[] typesSav = new Type[] { typeof(float) };
|
|
|
+ Type[] typesXor = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R4(-0f);
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
|
|
|
+
|
|
|
+ context.EmitLdvec(op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor));
|
|
|
+
|
|
|
+ context.EmitStvec(op.Rd);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else /* if (sizeF == 1) */
|
|
|
+ {
|
|
|
+ Type[] typesSav = new Type[] { typeof(double) };
|
|
|
+ Type[] typesXor = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
|
|
+
|
|
|
+ context.EmitLdc_R8(-0d);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
|
|
+
|
|
|
+ EmitLdvecWithCastToDouble(context, op.Rn);
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
|
|
|
+
|
|
|
+ EmitStvecWithCastFromDouble(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Fnmadd_S(ILEmitterCtx context)
|
|
|
@@ -689,7 +1099,7 @@ namespace ChocolArm64.Instructions
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- public static void Frecps_S(ILEmitterCtx context)
|
|
|
+ public static void Frecps_S(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
@@ -743,7 +1153,7 @@ namespace ChocolArm64.Instructions
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public static void Frecps_V(ILEmitterCtx context)
|
|
|
+ public static void Frecps_V(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
@@ -986,7 +1396,7 @@ namespace ChocolArm64.Instructions
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- public static void Frsqrts_S(ILEmitterCtx context)
|
|
|
+ public static void Frsqrts_S(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
@@ -1048,7 +1458,7 @@ namespace ChocolArm64.Instructions
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public static void Frsqrts_V(ILEmitterCtx context)
|
|
|
+ public static void Frsqrts_V(ILEmitterCtx context) // Fused.
|
|
|
{
|
|
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
|
{
|
|
|
@@ -1310,7 +1720,7 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
|
|
|
|
|
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rm, op.Size);
|
|
|
|
|
|
context.EmitLdc_I4(numBytes);
|
|
|
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
|
|
@@ -1334,7 +1744,38 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Saddw_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add));
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
+
|
|
|
+ Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
|
|
+ Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
|
|
|
+ Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
|
|
|
+ VectorIntTypesPerSizeLog2[op.Size + 1] };
|
|
|
+
|
|
|
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
|
|
|
+ nameof(Sse41.ConvertToVector128Int32),
|
|
|
+ nameof(Sse41.ConvertToVector128Int64) };
|
|
|
+
|
|
|
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
|
|
+
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
|
|
|
+
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(numBytes);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
|
|
|
+
|
|
|
+ EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Shadd_V(ILEmitterCtx context)
|
|
|
@@ -1439,11 +1880,34 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Smax_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- Type[] types = new Type[] { typeof(long), typeof(long) };
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
|
|
|
- MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types);
|
|
|
+ Type[] typesMax = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
|
|
+
|
|
|
+ Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rn, op.Size);
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
|
|
|
+
|
|
|
+ EmitStvecWithSignedCast(context, op.Rd, op.Size);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Type[] types = new Type[] { typeof(long), typeof(long) };
|
|
|
|
|
|
- EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types);
|
|
|
+
|
|
|
+ EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Smaxp_V(ILEmitterCtx context)
|
|
|
@@ -1457,11 +1921,34 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Smin_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- Type[] types = new Type[] { typeof(long), typeof(long) };
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
|
|
|
- MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types);
|
|
|
+ Type[] typesMin = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
|
|
+
|
|
|
+ Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rn, op.Size);
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
|
|
|
+
|
|
|
+ EmitStvecWithSignedCast(context, op.Rd, op.Size);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Type[] types = new Type[] { typeof(long), typeof(long) };
|
|
|
|
|
|
- EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types);
|
|
|
+
|
|
|
+ EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Sminp_V(ILEmitterCtx context)
|
|
|
@@ -1484,7 +1971,7 @@ namespace ChocolArm64.Instructions
|
|
|
Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
|
|
|
VectorIntTypesPerSizeLog2[op.Size + 1] };
|
|
|
|
|
|
- Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
|
|
|
string nameCvt = op.Size == 0
|
|
|
? nameof(Sse41.ConvertToVector128Int16)
|
|
|
@@ -1508,7 +1995,7 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
|
|
|
|
|
- context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd));
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd));
|
|
|
|
|
|
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
|
|
|
|
|
|
@@ -1535,7 +2022,7 @@ namespace ChocolArm64.Instructions
|
|
|
Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
|
|
|
VectorIntTypesPerSizeLog2[op.Size + 1] };
|
|
|
|
|
|
- Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
|
|
|
string nameCvt = op.Size == 0
|
|
|
? nameof(Sse41.ConvertToVector128Int16)
|
|
|
@@ -1559,7 +2046,7 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
|
|
|
|
|
- context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub));
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub));
|
|
|
|
|
|
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
|
|
|
|
|
|
@@ -1735,7 +2222,7 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
|
|
|
|
|
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rm, op.Size);
|
|
|
|
|
|
context.EmitLdc_I4(numBytes);
|
|
|
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
|
|
@@ -1754,7 +2241,38 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Ssubw_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub));
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
+
|
|
|
+ Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
|
|
+ Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
|
|
|
+ Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
|
|
|
+ VectorIntTypesPerSizeLog2[op.Size + 1] };
|
|
|
+
|
|
|
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
|
|
|
+ nameof(Sse41.ConvertToVector128Int32),
|
|
|
+ nameof(Sse41.ConvertToVector128Int64) };
|
|
|
+
|
|
|
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
|
|
+
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
|
|
|
+
|
|
|
+ EmitLdvecWithSignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(numBytes);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
|
|
|
+
|
|
|
+ EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Sub_S(ILEmitterCtx context)
|
|
|
@@ -1901,7 +2419,38 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Uaddw_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add));
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
+
|
|
|
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
|
|
+ Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
|
|
|
+ Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1],
|
|
|
+ VectorUIntTypesPerSizeLog2[op.Size + 1] };
|
|
|
+
|
|
|
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
|
|
|
+ nameof(Sse41.ConvertToVector128Int32),
|
|
|
+ nameof(Sse41.ConvertToVector128Int64) };
|
|
|
+
|
|
|
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
|
|
+
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
|
|
|
+
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(numBytes);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
|
|
|
+
|
|
|
+ EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Uhadd_V(ILEmitterCtx context)
|
|
|
@@ -1992,11 +2541,34 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Umax_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- Type[] types = new Type[] { typeof(ulong), typeof(ulong) };
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
|
|
|
- MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types);
|
|
|
+ Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
|
|
+
|
|
|
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
|
|
|
|
|
|
- EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Type[] types = new Type[] { typeof(ulong), typeof(ulong) };
|
|
|
+
|
|
|
+ MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types);
|
|
|
+
|
|
|
+ EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Umaxp_V(ILEmitterCtx context)
|
|
|
@@ -2010,11 +2582,34 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Umin_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- Type[] types = new Type[] { typeof(ulong), typeof(ulong) };
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
|
|
|
- MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types);
|
|
|
+ Type[] typesMin = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
|
|
+
|
|
|
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
|
|
|
+
|
|
|
+ EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
|
|
|
+
|
|
|
+ if (op.RegisterSize == RegisterSize.Simd64)
|
|
|
+ {
|
|
|
+ EmitVectorZeroUpper(context, op.Rd);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Type[] types = new Type[] { typeof(ulong), typeof(ulong) };
|
|
|
|
|
|
- EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types);
|
|
|
+
|
|
|
+ EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public static void Uminp_V(ILEmitterCtx context)
|
|
|
@@ -2037,7 +2632,7 @@ namespace ChocolArm64.Instructions
|
|
|
Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1],
|
|
|
VectorIntTypesPerSizeLog2 [op.Size + 1] };
|
|
|
|
|
|
- Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
|
|
|
string nameCvt = op.Size == 0
|
|
|
? nameof(Sse41.ConvertToVector128Int16)
|
|
|
@@ -2061,7 +2656,7 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
|
|
|
|
|
- context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd));
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd));
|
|
|
|
|
|
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
|
|
|
|
|
|
@@ -2088,7 +2683,7 @@ namespace ChocolArm64.Instructions
|
|
|
Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1],
|
|
|
VectorIntTypesPerSizeLog2 [op.Size + 1] };
|
|
|
|
|
|
- Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
|
|
|
|
|
string nameCvt = op.Size == 0
|
|
|
? nameof(Sse41.ConvertToVector128Int16)
|
|
|
@@ -2112,7 +2707,7 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
|
|
|
|
|
- context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub));
|
|
|
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub));
|
|
|
|
|
|
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
|
|
|
|
|
|
@@ -2251,7 +2846,38 @@ namespace ChocolArm64.Instructions
|
|
|
|
|
|
public static void Usubw_V(ILEmitterCtx context)
|
|
|
{
|
|
|
- EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub));
|
|
|
+ if (Optimizations.UseSse41)
|
|
|
+ {
|
|
|
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
|
|
+
|
|
|
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
|
|
+ Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
|
|
|
+ Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1],
|
|
|
+ VectorUIntTypesPerSizeLog2[op.Size + 1] };
|
|
|
+
|
|
|
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
|
|
|
+ nameof(Sse41.ConvertToVector128Int32),
|
|
|
+ nameof(Sse41.ConvertToVector128Int64) };
|
|
|
+
|
|
|
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
|
|
+
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
|
|
|
+
|
|
|
+ EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
|
|
|
+
|
|
|
+ context.EmitLdc_I4(numBytes);
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
|
|
+
|
|
|
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
|
|
|
+
|
|
|
+ EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
private static void EmitAbs(ILEmitterCtx context)
|