17db96d56Sopenharmony_ci
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci(* Copyright (c) 2011-2020 Stefan Krah. All rights reserved. *)
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci========================================================================
77db96d56Sopenharmony_ci             Calculate (a * b) % p using the 80-bit x87 FPU
87db96d56Sopenharmony_ci========================================================================
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ciA description of the algorithm can be found in the apfloat manual by
117db96d56Sopenharmony_ciTommila [1].
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ciThe proof follows an argument made by Granlund/Montgomery in [2].
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ciDefinitions and assumptions:
177db96d56Sopenharmony_ci----------------------------
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciThe 80-bit extended precision format uses 64 bits for the significand:
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_ci  (1) F = 64
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ciThe modulus is prime and less than 2**31:
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ci  (2) 2 <= p < 2**31
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ciThe factors are less than p:
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci  (3) 0 <= a < p
307db96d56Sopenharmony_ci  (4) 0 <= b < p
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ciThe product a * b is less than 2**62 and is thus exact in 64 bits:
337db96d56Sopenharmony_ci
347db96d56Sopenharmony_ci  (5) n = a * b
357db96d56Sopenharmony_ci
367db96d56Sopenharmony_ciThe product can be represented in terms of quotient and remainder:
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci  (6) n = q * p + r
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ciUsing (3), (4) and the fact that p is prime, the remainder is always
417db96d56Sopenharmony_cigreater than zero:
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci  (7) 0 <= q < p  /\  1 <= r < p
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci
467db96d56Sopenharmony_ciStrategy:
477db96d56Sopenharmony_ci---------
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ciPrecalculate the 80-bit long double inverse of p, with a maximum
507db96d56Sopenharmony_cirelative error of 2**(1-F):
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci  (8) pinv = (long double)1.0 / p
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ciCalculate an estimate for q = floor(n/p). The multiplication has another
557db96d56Sopenharmony_cimaximum relative error of 2**(1-F):
567db96d56Sopenharmony_ci
577db96d56Sopenharmony_ci  (9) qest = n * pinv
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ciIf we can show that q < qest < q+1, then trunc(qest) = q. It is then
607db96d56Sopenharmony_cieasy to recover the remainder r. The complete algorithm is:
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci  a) Set the control word to 64-bit precision and truncation mode.
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ci  b) n = a * b              # Calculate exact product.
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci  c) qest = n * pinv        # Calculate estimate for the quotient.
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci  d) q = (qest+2**63)-2**63 # Truncate qest to the exact quotient.
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ci  f) r = n - q * p          # Calculate remainder.
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ciProof for q < qest < q+1:
747db96d56Sopenharmony_ci-------------------------
757db96d56Sopenharmony_ci
767db96d56Sopenharmony_ciUsing the cumulative error, the error bounds for qest are:
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci                n                       n * (1 + 2**(1-F))**2
797db96d56Sopenharmony_ci  (9) --------------------- <= qest <=  ---------------------
807db96d56Sopenharmony_ci      p * (1 + 2**(1-F))**2                       p
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci
837db96d56Sopenharmony_ci  Lemma 1:
847db96d56Sopenharmony_ci  --------
857db96d56Sopenharmony_ci                       n                   q * p + r
867db96d56Sopenharmony_ci    (10) q < --------------------- = ---------------------
877db96d56Sopenharmony_ci             p * (1 + 2**(1-F))**2   p * (1 + 2**(1-F))**2
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci
907db96d56Sopenharmony_ci    Proof:
917db96d56Sopenharmony_ci    ~~~~~~
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_ci    (I)     q * p * (1 + 2**(1-F))**2 < q * p + r
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci    (II)    q * p * 2**(2-F) + q * p * 2**(2-2*F) < r
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci    Using (1) and (7), it is sufficient to show that:
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_ci    (III)   q * p * 2**(-62) + q * p * 2**(-126) < 1 <= r
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci    (III) can easily be verified by substituting the largest possible
1027db96d56Sopenharmony_ci    values p = 2**31-1 and q = 2**31-2.
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci    The critical cases occur when r = 1, n = m * p + 1. These cases
1057db96d56Sopenharmony_ci    can be exhaustively verified with a test program.
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci  Lemma 2:
1097db96d56Sopenharmony_ci  --------
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci           n * (1 + 2**(1-F))**2     (q * p + r) * (1 + 2**(1-F))**2
1127db96d56Sopenharmony_ci    (11)   ---------------------  =  -------------------------------  <  q + 1
1137db96d56Sopenharmony_ci                     p                              p
1147db96d56Sopenharmony_ci
1157db96d56Sopenharmony_ci    Proof:
1167db96d56Sopenharmony_ci    ~~~~~~
1177db96d56Sopenharmony_ci
1187db96d56Sopenharmony_ci    (I)  (q * p + r) + (q * p + r) * 2**(2-F) + (q * p + r) * 2**(2-2*F) < q * p + p
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci    (II)  (q * p + r) * 2**(2-F) + (q * p + r) * 2**(2-2*F) < p - r
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci    Using (1) and (7), it is sufficient to show that:
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_ci    (III) (q * p + r) * 2**(-62) + (q * p + r) * 2**(-126) < 1 <= p - r
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci    (III) can easily be verified by substituting the largest possible
1277db96d56Sopenharmony_ci    values p = 2**31-1, q = 2**31-2 and r = 2**31-2.
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_ci    The critical cases occur when r = (p - 1), n = m * p - 1. These cases
1307db96d56Sopenharmony_ci    can be exhaustively verified with a test program.
1317db96d56Sopenharmony_ci
1327db96d56Sopenharmony_ci
1337db96d56Sopenharmony_ci[1]  http://www.apfloat.org/apfloat/2.40/apfloat.pdf
1347db96d56Sopenharmony_ci[2]  http://gmplib.org/~tege/divcnst-pldi94.pdf
1357db96d56Sopenharmony_ci     [Section 7: "Use of floating point"]
1367db96d56Sopenharmony_ci
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ci(* Coq proof for (10) and (11) *)
1407db96d56Sopenharmony_ci
1417db96d56Sopenharmony_ciRequire Import ZArith.
1427db96d56Sopenharmony_ciRequire Import QArith.
1437db96d56Sopenharmony_ciRequire Import Qpower.
1447db96d56Sopenharmony_ciRequire Import Qabs.
1457db96d56Sopenharmony_ciRequire Import Psatz.
1467db96d56Sopenharmony_ci
1477db96d56Sopenharmony_ciOpen Scope Q_scope.
1487db96d56Sopenharmony_ci
1497db96d56Sopenharmony_ci
1507db96d56Sopenharmony_ciLtac qreduce T :=
1517db96d56Sopenharmony_ci  rewrite <- (Qred_correct (T)); simpl (Qred (T)).
1527db96d56Sopenharmony_ci
1537db96d56Sopenharmony_ciTheorem Qlt_move_right :
1547db96d56Sopenharmony_ci  forall x y z:Q, x + z < y <-> x < y - z.
1557db96d56Sopenharmony_ciProof.
1567db96d56Sopenharmony_ci  intros.
1577db96d56Sopenharmony_ci  split.
1587db96d56Sopenharmony_ci    intros.
1597db96d56Sopenharmony_ci    psatzl Q.
1607db96d56Sopenharmony_ci    intros.
1617db96d56Sopenharmony_ci    psatzl Q.
1627db96d56Sopenharmony_ciQed.
1637db96d56Sopenharmony_ci
1647db96d56Sopenharmony_ciTheorem Qlt_mult_by_z :
1657db96d56Sopenharmony_ci  forall x y z:Q, 0 < z -> (x < y <-> x * z < y * z).
1667db96d56Sopenharmony_ciProof.
1677db96d56Sopenharmony_ci  intros.
1687db96d56Sopenharmony_ci  split.
1697db96d56Sopenharmony_ci    intros.
1707db96d56Sopenharmony_ci    apply Qmult_lt_compat_r. trivial. trivial.
1717db96d56Sopenharmony_ci    intros.
1727db96d56Sopenharmony_ci    rewrite <- (Qdiv_mult_l x z). rewrite <- (Qdiv_mult_l y z).
1737db96d56Sopenharmony_ci    apply Qmult_lt_compat_r.
1747db96d56Sopenharmony_ci    apply Qlt_shift_inv_l.
1757db96d56Sopenharmony_ci    trivial. psatzl Q. trivial. psatzl Q. psatzl Q.
1767db96d56Sopenharmony_ciQed.
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ciTheorem Qle_mult_quad :
1797db96d56Sopenharmony_ci  forall (a b c d:Q),
1807db96d56Sopenharmony_ci    0 <= a -> a <= c ->
1817db96d56Sopenharmony_ci    0 <= b -> b <= d ->
1827db96d56Sopenharmony_ci      a * b <= c * d.
1837db96d56Sopenharmony_ci  intros.
1847db96d56Sopenharmony_ci  psatz Q.
1857db96d56Sopenharmony_ciQed.
1867db96d56Sopenharmony_ci
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ciTheorem q_lt_qest:
1897db96d56Sopenharmony_ci  forall (p q r:Q),
1907db96d56Sopenharmony_ci    (0 < p) -> (p <= (2#1)^31 - 1) ->
1917db96d56Sopenharmony_ci    (0 <= q) -> (q <= p - 1) ->
1927db96d56Sopenharmony_ci    (1 <= r) -> (r <= p - 1) ->
1937db96d56Sopenharmony_ci      q < (q * p + r) / (p * (1 + (2#1)^(-63))^2).
1947db96d56Sopenharmony_ciProof.
1957db96d56Sopenharmony_ci  intros.
1967db96d56Sopenharmony_ci  rewrite Qlt_mult_by_z with (z := (p * (1 + (2#1)^(-63))^2)).
1977db96d56Sopenharmony_ci
1987db96d56Sopenharmony_ci  unfold Qdiv.
1997db96d56Sopenharmony_ci  rewrite <- Qmult_assoc.
2007db96d56Sopenharmony_ci  rewrite (Qmult_comm (/ (p * (1 + (2 # 1) ^ (-63)) ^ 2)) (p * (1 + (2 # 1) ^ (-63)) ^ 2)).
2017db96d56Sopenharmony_ci  rewrite Qmult_inv_r.
2027db96d56Sopenharmony_ci  rewrite Qmult_1_r.
2037db96d56Sopenharmony_ci
2047db96d56Sopenharmony_ci  assert (q * (p * (1 + (2 # 1) ^ (-63)) ^ 2) == q * p + (q * p) * ((2 # 1) ^ (-62) + (2 # 1) ^ (-126))).
2057db96d56Sopenharmony_ci  qreduce ((1 + (2 # 1) ^ (-63)) ^ 2).
2067db96d56Sopenharmony_ci  qreduce ((2 # 1) ^ (-62) + (2 # 1) ^ (-126)).
2077db96d56Sopenharmony_ci  ring_simplify.
2087db96d56Sopenharmony_ci  reflexivity.
2097db96d56Sopenharmony_ci  rewrite H5.
2107db96d56Sopenharmony_ci
2117db96d56Sopenharmony_ci  rewrite Qplus_comm.
2127db96d56Sopenharmony_ci  rewrite Qlt_move_right.
2137db96d56Sopenharmony_ci  ring_simplify (q * p + r - q * p).
2147db96d56Sopenharmony_ci  qreduce ((2 # 1) ^ (-62) + (2 # 1) ^ (-126)).
2157db96d56Sopenharmony_ci
2167db96d56Sopenharmony_ci  apply Qlt_le_trans with (y := 1).
2177db96d56Sopenharmony_ci  rewrite Qlt_mult_by_z with (z := 85070591730234615865843651857942052864 # 18446744073709551617).
2187db96d56Sopenharmony_ci  ring_simplify.
2197db96d56Sopenharmony_ci
2207db96d56Sopenharmony_ci  apply Qle_lt_trans with (y := ((2 # 1) ^ 31 - (2#1)) * ((2 # 1) ^ 31 - 1)).
2217db96d56Sopenharmony_ci  apply Qle_mult_quad.
2227db96d56Sopenharmony_ci  assumption. psatzl Q. psatzl Q. psatzl Q. psatzl Q. psatzl Q. assumption. psatzl Q. psatzl Q.
2237db96d56Sopenharmony_ciQed.
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ciTheorem qest_lt_qplus1:
2267db96d56Sopenharmony_ci  forall (p q r:Q),
2277db96d56Sopenharmony_ci    (0 < p) -> (p <= (2#1)^31 - 1) ->
2287db96d56Sopenharmony_ci    (0 <= q) -> (q <= p - 1) ->
2297db96d56Sopenharmony_ci    (1 <= r) -> (r <= p - 1) ->
2307db96d56Sopenharmony_ci      ((q * p + r) * (1 + (2#1)^(-63))^2) / p < q + 1.
2317db96d56Sopenharmony_ciProof.
2327db96d56Sopenharmony_ci  intros.
2337db96d56Sopenharmony_ci  rewrite Qlt_mult_by_z with (z := p).
2347db96d56Sopenharmony_ci
2357db96d56Sopenharmony_ci  unfold Qdiv.
2367db96d56Sopenharmony_ci  rewrite <- Qmult_assoc.
2377db96d56Sopenharmony_ci  rewrite (Qmult_comm (/ p) p).
2387db96d56Sopenharmony_ci  rewrite Qmult_inv_r.
2397db96d56Sopenharmony_ci  rewrite Qmult_1_r.
2407db96d56Sopenharmony_ci
2417db96d56Sopenharmony_ci  assert ((q * p + r) * (1 + (2 # 1) ^ (-63)) ^ 2 == q * p + r + (q * p + r) * ((2 # 1) ^ (-62) + (2 # 1) ^ (-126))).
2427db96d56Sopenharmony_ci  qreduce ((1 + (2 # 1) ^ (-63)) ^ 2).
2437db96d56Sopenharmony_ci  qreduce ((2 # 1) ^ (-62) + (2 # 1) ^ (-126)).
2447db96d56Sopenharmony_ci  ring_simplify. reflexivity.
2457db96d56Sopenharmony_ci  rewrite H5.
2467db96d56Sopenharmony_ci
2477db96d56Sopenharmony_ci  rewrite <- Qplus_assoc. rewrite <- Qplus_comm. rewrite Qlt_move_right.
2487db96d56Sopenharmony_ci  ring_simplify ((q + 1) * p - q * p).
2497db96d56Sopenharmony_ci
2507db96d56Sopenharmony_ci  rewrite <- Qplus_comm. rewrite Qlt_move_right.
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_ci  apply Qlt_le_trans with (y := 1).
2537db96d56Sopenharmony_ci  qreduce ((2 # 1) ^ (-62) + (2 # 1) ^ (-126)).
2547db96d56Sopenharmony_ci
2557db96d56Sopenharmony_ci  rewrite Qlt_mult_by_z with (z := 85070591730234615865843651857942052864 # 18446744073709551617).
2567db96d56Sopenharmony_ci  ring_simplify.
2577db96d56Sopenharmony_ci
2587db96d56Sopenharmony_ci  ring_simplify in H0.
2597db96d56Sopenharmony_ci  apply Qle_lt_trans with (y := (2147483646 # 1) * (2147483647 # 1) + (2147483646 # 1)).
2607db96d56Sopenharmony_ci
2617db96d56Sopenharmony_ci  apply Qplus_le_compat.
2627db96d56Sopenharmony_ci  apply Qle_mult_quad.
2637db96d56Sopenharmony_ci  assumption. psatzl Q. auto with qarith. assumption. psatzl Q.
2647db96d56Sopenharmony_ci  auto with qarith. auto with qarith.
2657db96d56Sopenharmony_ci  psatzl Q. psatzl Q. assumption.
2667db96d56Sopenharmony_ciQed.
2677db96d56Sopenharmony_ci
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci
270