@@ -20,6 +20,9 @@ import std.traits;
2020
2121import mir.math.common: optmath;
2222
23+ version (LDC )
24+ pragma (LDC_inline_ir) R inlineIR(string s, R, P... )(P) @safe pure nothrow @nogc ;
25+
2326@optmath:
2427
2528void swapStars (I1 , I2 )(auto ref I1 i1, auto ref I2 i2)
@@ -469,3 +472,117 @@ version(mir_test) unittest
469472 uint b = 10 ;
470473 static assert (! is (typeof (max(a, b))));
471474}
475+
476+ /+ +
477+ Return type for $(LREF extMul);
478+ +/
479+ struct ExtMulResult (I)
480+ if (isIntegral! I)
481+ {
482+ // / Lower I.sizeof * 8 bits
483+ I low;
484+ // / Higher I.sizeof * 8 bits
485+ I high;
486+ }
487+
488+ /+ +
489+ Extended unsigned multiplications.
490+ Performs U x U multiplication and returns $(LREF ExtMulResult)!U that contains extended result.
491+ Params:
492+ a = unsigned integer
493+ b = unsigned integer
494+ Returns:
495+ 128bit result if U is ulong or 256bit result if U is ucent.
496+ Optimization:
497+ Algorithm is optimized for LDC (LLVM IR, any target) and for DMD (X86_64).
498+ +/
499+ ExtMulResult! U extMul (U)(in U a, in U b) @nogc nothrow pure @safe
500+ if (isUnsigned! U && U.sizeof >= ulong .sizeof)
501+ {
502+ static if (is (U == ulong ))
503+ alias H = uint ;
504+ else
505+ alias H = ulong ;
506+
507+ enum hbc = H.sizeof * 8 ;
508+
509+ if (! __ctfe)
510+ {
511+ version (LDC )
512+ {
513+ // LLVM IR by n8sh
514+ pragma (inline, true );
515+ static if (is (U == ulong ))
516+ {
517+ auto r = inlineIR! (`
518+ %a = zext i64 %0 to i128
519+ %b = zext i64 %1 to i128
520+ %m = mul i128 %a, %b
521+ %n = lshr i128 %m, 64
522+ %h = trunc i128 %n to i64
523+ %l = trunc i128 %m to i64
524+ %agg1 = insertvalue [2 x i64] undef, i64 %l, 0
525+ %agg2 = insertvalue [2 x i64] %agg1, i64 %h, 1
526+ ret [2 x i64] %agg2` , ulong [2 ])(a, b);
527+ }
528+ else
529+ {
530+ auto r = inlineIR! (`
531+ %a = zext i128 %0 to i256
532+ %b = zext i128 %1 to i256
533+ %m = mul i256 %a, %b
534+ %n = lshr i256 %m, 128
535+ %h = trunc i256 %n to i128
536+ %l = trunc i256 %m to i128
537+ %agg1 = insertvalue [2 x i128] undef, i128 %l, 0
538+ %agg2 = insertvalue [2 x i128] %agg1, i128 %h, 1
539+ ret [2 x i128] %agg2` , ucent [2 ])(a, b);
540+ }
541+ return ExtMulResult! U(r[0 ], r[1 ]);
542+ }
543+ else
544+ version (D_InlineAsm_X86_64 )
545+ {
546+ static if (is (U == ulong ))
547+ {
548+ return extMul_X86_64 (a, b);
549+ }
550+ }
551+ }
552+
553+ U al = cast (H)a;
554+ U ah = a >>> hbc;
555+ U bl = cast (H)b;
556+ U bh = b >>> hbc;
557+
558+ U p0 = al * bl;
559+ U p1 = al * bh;
560+ U p2 = ah * bl;
561+ U p3 = ah * bh;
562+
563+ H cy = cast (H)(((p0 >>> hbc) + cast (H)p1 + cast (H)p2) >>> hbc);
564+ U lo = p0 + (p1 << hbc) + (p2 << hbc);
565+ U hi = p3 + (p1 >>> hbc) + (p2 >>> hbc) + cy;
566+
567+ return typeof (return )(lo, hi);
568+ }
569+
570+ unittest
571+ {
572+ immutable a = 0x93_8d_28_00_0f_50_a5_56;
573+ immutable b = 0x54_c3_2f_e8_cc_a5_97_10;
574+ enum c = extMul(a, b); // Compile time algorithm
575+ assert (extMul(a, b) == c); // Fast runtime algorihtm
576+ }
577+
578+ private ExtMulResult! ulong extMul_X86_64 ()(ulong a, ulong b)
579+ {
580+ pragma (msg, " EEE" );
581+ asm @safe pure nothrow @nogc
582+ {
583+ naked;
584+ mov RAX , RDI ;
585+ mul RSI ;
586+ ret;
587+ }
588+ }
0 commit comments