17db96d56Sopenharmony_ci
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci(* Copyright (c) 2011-2020 Stefan Krah. All rights reserved. *)
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci==========================================================================
77db96d56Sopenharmony_ci                Calculate (a * b) % p using special primes
87db96d56Sopenharmony_ci==========================================================================
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ciA description of the algorithm can be found in the apfloat manual by
117db96d56Sopenharmony_ciTommila [1].
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ciDefinitions:
157db96d56Sopenharmony_ci------------
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ciIn the whole document, "==" stands for "is congruent with".
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciResult of a * b in terms of high/low words:
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_ci   (1) hi * 2**64 + lo = a * b
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ciSpecial primes:
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ci   (2) p = 2**64 - z + 1, where z = 2**n
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ciSingle step modular reduction:
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci   (3) R(hi, lo) = hi * z - hi + lo
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ciStrategy:
337db96d56Sopenharmony_ci---------
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ci   a) Set (hi, lo) to the result of a * b.
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci   b) Set (hi', lo') to the result of R(hi, lo).
387db96d56Sopenharmony_ci
397db96d56Sopenharmony_ci   c) Repeat step b) until 0 <= hi' * 2**64 + lo' < 2*p.
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci   d) If the result is less than p, return lo'. Otherwise return lo' - p.
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ciThe reduction step b) preserves congruence:
457db96d56Sopenharmony_ci-------------------------------------------
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci    hi * 2**64 + lo == hi * z - hi + lo   (mod p)
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci    Proof:
507db96d56Sopenharmony_ci    ~~~~~~
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci       hi * 2**64 + lo = (2**64 - z + 1) * hi + z * hi - hi + lo
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ci                       = p * hi               + z * hi - hi + lo
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci                       == z * hi - hi + lo   (mod p)
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ciMaximum numbers of step b):
607db96d56Sopenharmony_ci---------------------------
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci# To avoid unnecessary formalism, define:
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_cidef R(hi, lo, z):
657db96d56Sopenharmony_ci     return divmod(hi * z - hi + lo, 2**64)
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci# For simplicity, assume hi=2**64-1, lo=2**64-1 after the
687db96d56Sopenharmony_ci# initial multiplication a * b. This is of course impossible
697db96d56Sopenharmony_ci# but certainly covers all cases.
707db96d56Sopenharmony_ci
717db96d56Sopenharmony_ci# Then, for p1:
727db96d56Sopenharmony_cihi=2**64-1; lo=2**64-1; z=2**32
737db96d56Sopenharmony_cip1 = 2**64 - z + 1
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # First reduction
767db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # Second reduction
777db96d56Sopenharmony_cihi * 2**64 + lo < 2 * p1 # True
787db96d56Sopenharmony_ci
797db96d56Sopenharmony_ci# For p2:
807db96d56Sopenharmony_cihi=2**64-1; lo=2**64-1; z=2**34
817db96d56Sopenharmony_cip2 = 2**64 - z + 1
827db96d56Sopenharmony_ci
837db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # First reduction
847db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # Second reduction
857db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # Third reduction
867db96d56Sopenharmony_cihi * 2**64 + lo < 2 * p2 # True
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci# For p3:
897db96d56Sopenharmony_cihi=2**64-1; lo=2**64-1; z=2**40
907db96d56Sopenharmony_cip3 = 2**64 - z + 1
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # First reduction
937db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # Second reduction
947db96d56Sopenharmony_cihi, lo = R(hi, lo, z)    # Third reduction
957db96d56Sopenharmony_cihi * 2**64 + lo < 2 * p3 # True
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ciStep d) preserves congruence and yields a result < p:
997db96d56Sopenharmony_ci-----------------------------------------------------
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci   Case hi = 0:
1027db96d56Sopenharmony_ci
1037db96d56Sopenharmony_ci       Case lo < p: trivial.
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci       Case lo >= p:
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci          lo == lo - p   (mod p)             # result is congruent
1087db96d56Sopenharmony_ci
1097db96d56Sopenharmony_ci          p <= lo < 2*p  ->  0 <= lo - p < p # result is in the correct range
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci   Case hi = 1:
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci       p < 2**64 /\ 2**64 + lo < 2*p  ->  lo < p  # lo is always less than p
1147db96d56Sopenharmony_ci
1157db96d56Sopenharmony_ci       2**64 + lo == 2**64 + (lo - p)   (mod p)   # result is congruent
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_ci                  = lo - p   # exactly the same value as the previous RHS
1187db96d56Sopenharmony_ci                             # in uint64_t arithmetic.
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci       p < 2**64 + lo < 2*p  ->  0 < 2**64 + (lo - p) < p  # correct range
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_ci[1]  http://www.apfloat.org/apfloat/2.40/apfloat.pdf
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci
1277db96d56Sopenharmony_ci
128