Skip to content

Commit 44f5795

Browse files
committed
add extMul
1 parent 0b27b49 commit 44f5795

File tree

1 file changed

+117
-0
lines changed

1 file changed

+117
-0
lines changed

source/mir/utility.d

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ import std.traits;
2020

2121
import mir.math.common: optmath;
2222

23+
version(LDC)
24+
pragma(LDC_inline_ir) R inlineIR(string s, R, P...)(P) @safe pure nothrow @nogc;
25+
2326
@optmath:
2427

2528
void swapStars(I1, I2)(auto ref I1 i1, auto ref I2 i2)
@@ -469,3 +472,117 @@ version(mir_test) unittest
469472
uint b = 10;
470473
static assert(!is(typeof(max(a, b))));
471474
}
475+
476+
/++
477+
Return type for $(LREF extMul);
478+
+/
479+
struct ExtMulResult(I)
480+
if (isIntegral!I)
481+
{
482+
/// Lower I.sizeof * 8 bits
483+
I low;
484+
/// Higher I.sizeof * 8 bits
485+
I high;
486+
}
487+
488+
/++
489+
Extended unsigned multiplications.
490+
Performs U x U multiplication and returns $(LREF ExtMulResult)!U that contains extended result.
491+
Params:
492+
a = unsigned integer
493+
b = unsigned integer
494+
Returns:
495+
128bit result if U is ulong or 256bit result if U is ucent.
496+
Optimization:
497+
Algorithm is optimized for LDC (LLVM IR, any target) and for DMD (X86_64).
498+
+/
499+
ExtMulResult!U extMul(U)(in U a, in U b) @nogc nothrow pure @safe
500+
if(isUnsigned!U && U.sizeof >= ulong.sizeof)
501+
{
502+
static if (is(U == ulong))
503+
alias H = uint;
504+
else
505+
alias H = ulong;
506+
507+
enum hbc = H.sizeof * 8;
508+
509+
if (!__ctfe)
510+
{
511+
version(LDC)
512+
{
513+
// LLVM IR by n8sh
514+
pragma(inline, true);
515+
static if (is(U == ulong))
516+
{
517+
auto r = inlineIR!(`
518+
%a = zext i64 %0 to i128
519+
%b = zext i64 %1 to i128
520+
%m = mul i128 %a, %b
521+
%n = lshr i128 %m, 64
522+
%h = trunc i128 %n to i64
523+
%l = trunc i128 %m to i64
524+
%agg1 = insertvalue [2 x i64] undef, i64 %l, 0
525+
%agg2 = insertvalue [2 x i64] %agg1, i64 %h, 1
526+
ret [2 x i64] %agg2`, ulong[2])(a, b);
527+
}
528+
else
529+
{
530+
auto r = inlineIR!(`
531+
%a = zext i128 %0 to i256
532+
%b = zext i128 %1 to i256
533+
%m = mul i256 %a, %b
534+
%n = lshr i256 %m, 128
535+
%h = trunc i256 %n to i128
536+
%l = trunc i256 %m to i128
537+
%agg1 = insertvalue [2 x i128] undef, i128 %l, 0
538+
%agg2 = insertvalue [2 x i128] %agg1, i128 %h, 1
539+
ret [2 x i128] %agg2`, ucent[2])(a, b);
540+
}
541+
return ExtMulResult!U(r[0], r[1]);
542+
}
543+
else
544+
version(D_InlineAsm_X86_64)
545+
{
546+
static if (is(U == ulong))
547+
{
548+
return extMul_X86_64(a, b);
549+
}
550+
}
551+
}
552+
553+
U al = cast(H)a;
554+
U ah = a >>> hbc;
555+
U bl = cast(H)b;
556+
U bh = b >>> hbc;
557+
558+
U p0 = al * bl;
559+
U p1 = al * bh;
560+
U p2 = ah * bl;
561+
U p3 = ah * bh;
562+
563+
H cy = cast(H)(((p0 >>> hbc) + cast(H)p1 + cast(H)p2) >>> hbc);
564+
U lo = p0 + (p1 << hbc) + (p2 << hbc);
565+
U hi = p3 + (p1 >>> hbc) + (p2 >>> hbc) + cy;
566+
567+
return typeof(return)(lo, hi);
568+
}
569+
570+
unittest
571+
{
572+
immutable a = 0x93_8d_28_00_0f_50_a5_56;
573+
immutable b = 0x54_c3_2f_e8_cc_a5_97_10;
574+
enum c = extMul(a, b); // Compile time algorithm
575+
assert(extMul(a, b) == c); // Fast runtime algorihtm
576+
}
577+
578+
private ExtMulResult!ulong extMul_X86_64()(ulong a, ulong b)
579+
{
580+
pragma(msg, "EEE");
581+
asm @safe pure nothrow @nogc
582+
{
583+
naked;
584+
mov RAX, RDI;
585+
mul RSI;
586+
ret;
587+
}
588+
}

0 commit comments

Comments
 (0)