Update contrib.
2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of the License "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
20 #include "algorithms.h"
22 word Add(word *C, const word *A, const word *B, unsigned int N)
26 for (unsigned int i = 0; i < N; i+=2)
28 dword u = (dword) carry + A[i] + B[i];
30 u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];
37 word Subtract(word *C, const word *A, const word *B, unsigned int N)
41 for (unsigned i = 0; i < N; i+=2)
43 dword u = (dword) A[i] - B[i] - borrow;
45 u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));
47 borrow = 0-HIGH_WORD(u);
52 int Compare(const word *A, const word *B, unsigned int N)
63 // It is the job of the calling code to ensure that this won't carry.
64 // If you aren't sure, use the next version that will tell you if you need to
66 // Having two of these creates ever so slightly more code but avoids having
67 // ifdefs all over the rest of the code checking the following type stuff which
68 // causes warnings in certain compilers about unused parameters in release
69 // builds. We can't have that can we!
71 Allows avoid this all over bigint.cpp and primes.cpp
73 TUint carry = Increment(Ptr(), Size());
76 Increment(Ptr(), Size())
79 void IncrementNoCarry(word *A, unsigned int N, word B)
86 for (unsigned i=1; i<N; i++)
92 word Increment(word *A, unsigned int N, word B)
99 for (unsigned i=1; i<N; i++)
105 //See commments above about IncrementNoCarry
106 void DecrementNoCarry(word *A, unsigned int N, word B)
113 for (unsigned i=1; i<N; i++)
119 word Decrement(word *A, unsigned int N, word B)
126 for (unsigned i=1; i<N; i++)
132 void TwosComplement(word *A, unsigned int N)
135 for (unsigned i=0; i<N; i++)
139 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
142 for(unsigned i=0; i<N; i++)
144 dword p = (dword)A[i] * B + carry;
146 carry = HIGH_WORD(p);
151 static void AtomicMultiply(word *C, const word *A, const word *B)
161 d = (dword)(A1-A0)*(B0-B1);
166 d = (dword)s*(word)(B0-B1);
172 d = (word)(A1-A0)*(dword)s;
177 d = (dword)(A0-A1)*(B1-B0);
180 // this segment is the branchless equivalent of above
181 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
182 unsigned int ai = A[1] < A[0];
183 unsigned int bi = B[0] < B[1];
184 unsigned int di = ai & bi;
185 dword d = (dword)D[di]*D[di+2];
187 unsigned int si = ai + !bi;
190 dword A0B0 = (dword)A[0]*B[0];
191 C[0] = LOW_WORD(A0B0);
193 dword A1B1 = (dword)A[1]*B[1];
194 dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);
197 t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;
202 static word AtomicMultiplyAdd(word *C, const word *A, const word *B)
204 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
205 unsigned int ai = A[1] < A[0];
206 unsigned int bi = B[0] < B[1];
207 unsigned int di = ai & bi;
208 dword d = (dword)D[di]*D[di+2];
210 unsigned int si = ai + !bi;
213 dword A0B0 = (dword)A[0]*B[0];
214 dword t = A0B0 + C[0];
217 dword A1B1 = (dword)A[1]*B[1];
218 t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];
221 t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];
224 t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];
229 static inline void AtomicMultiplyBottom(word *C, const word *A, const word *B)
231 dword t = (dword)A[0]*B[0];
233 C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0];
236 #define MulAcc(x, y) \
237 p = (dword)A[x] * B[y] + c; \
239 p = (dword)d + HIGH_WORD(p); \
243 #define SaveMulAcc(s, x, y) \
245 p = (dword)A[x] * B[y] + d; \
247 p = (dword)e + HIGH_WORD(p); \
251 #define MulAcc1(x, y) \
252 p = (dword)A[x] * A[y] + c; \
254 p = (dword)d + HIGH_WORD(p); \
258 #define SaveMulAcc1(s, x, y) \
260 p = (dword)A[x] * A[y] + d; \
262 p = (dword)e + HIGH_WORD(p); \
266 #define SquAcc(x, y) \
267 p = (dword)A[x] * A[y]; \
270 p = (dword)d + HIGH_WORD(p); \
274 #define SaveSquAcc(s, x, y) \
276 p = (dword)A[x] * A[y]; \
279 p = (dword)e + HIGH_WORD(p); \
283 // VC60 workaround: MSVC 6.0 has an optimization problem that makes
284 // (dword)A*B where either A or B has been cast to a dword before
285 // very expensive. Revisit a CombaSquare4() function when this
288 // WARNING: KeithR. 05/08/03 This routine doesn't work with gcc on hardware
289 // either. I've completely removed it. It may be worth looking into sometime
292 static void CombaSquare4(word *R, const word *A)
297 p = (dword)A[0] * A[0];
316 p = (dword)A[3] * A[3] + d;
318 R[7] = e + HIGH_WORD(p);
322 static void CombaMultiply4(word *R, const word *A, const word *B)
327 p = (dword)A[0] * B[0];
352 p = (dword)A[3] * B[3] + d;
354 R[7] = e + HIGH_WORD(p);
357 static void CombaMultiply8(word *R, const word *A, const word *B)
362 p = (dword)A[0] * B[0];
430 SaveMulAcc(10, 4, 7);
435 SaveMulAcc(11, 5, 7);
439 SaveMulAcc(12, 6, 7);
443 p = (dword)A[7] * B[7] + d;
445 R[15] = e + HIGH_WORD(p);
448 static void CombaMultiplyBottom4(word *R, const word *A, const word *B)
453 p = (dword)A[0] * B[0];
466 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
469 static void CombaMultiplyBottom8(word *R, const word *A, const word *B)
474 p = (dword)A[0] * B[0];
513 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
514 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
519 static void AtomicInverseModPower2(word *C, word A0, word A1)
523 dword A=MAKE_DWORD(A0, A1), R=A0%8;
525 for (unsigned i=3; i<2*WORD_BITS; i*=2)
533 // ********************************************************
550 // R[2*N] - result = A*B
551 // T[2*N] - temporary work space
552 // A[N] --- multiplier
553 // B[N] --- multiplicant
555 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N)
557 assert(N>=2 && N%2==0);
560 AtomicMultiply(R, A, B);
562 CombaMultiply4(R, A, B);
564 CombaMultiply8(R, A, B);
567 const unsigned int N2 = N/2;
570 int aComp = Compare(A0, A1, N2);
571 int bComp = Compare(B0, B1, N2);
573 switch (2*aComp + aComp + bComp)
576 Subtract(R0, A1, A0, N2);
577 Subtract(R1, B0, B1, N2);
578 RecursiveMultiply(T0, T2, R0, R1, N2);
579 Subtract(T1, T1, R0, N2);
583 Subtract(R0, A1, A0, N2);
584 Subtract(R1, B0, B1, N2);
585 RecursiveMultiply(T0, T2, R0, R1, N2);
589 Subtract(R0, A0, A1, N2);
590 Subtract(R1, B1, B0, N2);
591 RecursiveMultiply(T0, T2, R0, R1, N2);
595 Subtract(R0, A1, A0, N2);
596 Subtract(R1, B0, B1, N2);
597 RecursiveMultiply(T0, T2, R0, R1, N2);
598 Subtract(T1, T1, R1, N2);
606 RecursiveMultiply(R0, T2, A0, B0, N2);
607 RecursiveMultiply(R2, T2, A1, B1, N2);
609 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
611 carry += Add(T0, T0, R0, N);
612 carry += Add(T0, T0, R2, N);
613 carry += Add(R1, R1, T0, N);
615 assert (carry >= 0 && carry <= 2);
616 Increment(R3, N2, carry);
620 // R[2*N] - result = A*A
621 // T[2*N] - temporary work space
622 // A[N] --- number to be squared
624 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)
629 AtomicMultiply(R, A, A);
632 // VC60 workaround: MSVC 6.0 has an optimization problem that makes
633 // (dword)A*B where either A or B has been cast to a dword before
634 // very expensive. Revisit a CombaSquare4() function when this
637 // WARNING: KeithR. 05/08/03 This routine doesn't work with gcc on hardware
638 // either. I've completely removed it. It may be worth looking into sometime
639 // in the future. Therefore, we use the CombaMultiply4 on all targets.
641 CombaMultiply4(R, A, A);
648 const unsigned int N2 = N/2;
650 RecursiveSquare(R0, T2, A0, N2);
651 RecursiveSquare(R2, T2, A1, N2);
652 RecursiveMultiply(T0, T2, A0, A1, N2);
654 word carry = Add(R1, R1, T0, N);
655 carry += Add(R1, R1, T0, N);
656 Increment(R3, N2, carry);
659 // R[N] - bottom half of A*B
660 // T[N] - temporary work space
662 // B[N] - multiplicant
664 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
666 assert(N>=2 && N%2==0);
669 AtomicMultiplyBottom(R, A, B);
671 CombaMultiplyBottom4(R, A, B);
673 CombaMultiplyBottom8(R, A, B);
676 const unsigned int N2 = N/2;
678 RecursiveMultiply(R, T, A0, B0, N2);
679 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
681 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
686 // R[N] --- upper half of A*B
687 // T[2*N] - temporary work space
688 // L[N] --- lower half of A*B
689 // A[N] --- multiplier
690 // B[N] --- multiplicant
692 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
694 assert(N>=2 && N%2==0);
698 AtomicMultiply(T, A, B);
699 ((dword *)R)[0] = ((dword *)T)[1];
703 CombaMultiply4(T, A, B);
704 ((dword *)R)[0] = ((dword *)T)[2];
705 ((dword *)R)[1] = ((dword *)T)[3];
709 const unsigned int N2 = N/2;
712 int aComp = Compare(A0, A1, N2);
713 int bComp = Compare(B0, B1, N2);
715 switch (2*aComp + aComp + bComp)
718 Subtract(R0, A1, A0, N2);
719 Subtract(R1, B0, B1, N2);
720 RecursiveMultiply(T0, T2, R0, R1, N2);
721 Subtract(T1, T1, R0, N2);
725 Subtract(R0, A1, A0, N2);
726 Subtract(R1, B0, B1, N2);
727 RecursiveMultiply(T0, T2, R0, R1, N2);
731 Subtract(R0, A0, A1, N2);
732 Subtract(R1, B1, B0, N2);
733 RecursiveMultiply(T0, T2, R0, R1, N2);
737 Subtract(R0, A1, A0, N2);
738 Subtract(R1, B0, B1, N2);
739 RecursiveMultiply(T0, T2, R0, R1, N2);
740 Subtract(T1, T1, R1, N2);
748 RecursiveMultiply(T2, R0, A1, B1, N2);
750 // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
752 CopyWords(R0, L+N2, N2);
753 word c2 = Subtract(R0, R0, L, N2);
754 c2 += Subtract(R0, R0, T0, N2);
755 word t = (Compare(R0, T2, N2) == -1);
758 carry += Increment(R0, N2, c2+t);
759 carry += Add(R0, R0, T1, N2);
760 carry += Add(R0, R0, T3, N2);
762 CopyWords(R1, T3, N2);
763 assert (carry >= 0 && carry <= 2);
764 Increment(R1, N2, carry);
768 // R[NA+NB] - result = A*B
769 // T[NA+NB] - temporary work space
770 // A[NA] ---- multiplier
771 // B[NB] ---- multiplicant
773 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
778 RecursiveSquare(R, T, A, NA);
780 RecursiveMultiply(R, T, A, B, NA);
793 assert(NB % NA == 0);
794 assert((NB/NA)%2 == 0); // NB is an even multiple of NA
801 SetWords(R, 0, NB+2);
808 R[NB] = LinearMultiply(R, B, A[0], NB);
814 RecursiveMultiply(R, T, A, B, NA);
815 CopyWords(T+2*NA, R+NA, NA);
819 for (i=2*NA; i<NB; i+=2*NA)
820 RecursiveMultiply(T+NA+i, T, A, B+i, NA);
821 for (i=NA; i<NB; i+=2*NA)
822 RecursiveMultiply(R+i, T, A, B+i, NA);
824 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
827 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
828 // T[3*N/2] - temporary work space
829 // A[N] ----- an odd number as input
831 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
834 AtomicInverseModPower2(R, A[0], A[1]);
837 const unsigned int N2 = N/2;
838 RecursiveInverseModPower2(R0, T0, A0, N2);
840 SetWords(T0+1, 0, N2-1);
841 RecursiveMultiplyTop(R1, T1, T0, R0, A0, N2);
842 RecursiveMultiplyBottom(T0, T1, R0, A1, N2);
844 TwosComplement(T0, N2);
845 RecursiveMultiplyBottom(R1, T1, R0, T0, N2);
863 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
864 // T[3*N] - temporary work space
865 // X[2*N] - number to be reduced
867 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
869 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)
871 RecursiveMultiplyBottom(R, T, X, U, N);
872 RecursiveMultiplyTop(T, T+N, X, R, M, N);
873 if (Subtract(R, X+N, T, N))
876 word carry = Add(R, R, M, N);
884 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
885 static word SubatomicDivide(word *A, word B0, word B1)
887 // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
888 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
893 // estimate the quotient: do a 2 word by 1 word divide
897 Q = word(MAKE_DWORD(A[1], A[2]) / (B1+1));
899 // now subtract Q*B from A
901 u = (dword) A[0] - LOW_WORD(p);
903 u = (dword) A[1] - HIGH_WORD(p) - (word)(0-HIGH_WORD(u)) - (dword)B1*Q;
905 A[2] += HIGH_WORD(u);
907 // Q <= actual quotient, so fix it
908 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
910 u = (dword) A[0] - B0;
912 u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u));
914 A[2] += HIGH_WORD(u);
916 assert(Q); // shouldn't overflow
922 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
923 static inline void AtomicDivide(word *Q, const word *A, const word *B)
925 if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
933 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
934 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
935 Q[0] = SubatomicDivide(T, B[0], B[1]);
938 // multiply quotient and divisor and add remainder, make sure it equals dividend
939 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
941 AtomicMultiply(P, Q, B);
943 assert(Mem::Compare((TUint8*)P, 4*WORD_SIZE, (TUint8*)A, 4*WORD_SIZE)==0);
948 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
949 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)
958 AtomicMultiply(T+i, Q, B+i);
960 if (AtomicMultiplyAdd(T+i, Q, B+i))
961 T[i+5] += (++T[i+4]==0);
965 T[N] = LinearMultiply(T, B, Q[0], N);
970 word borrow = Subtract(R, R, T, N+2);
971 assert(!borrow && !R[N+1]);
973 Subtract(R, R, T, N+2);
976 while (R[N] || Compare(R, B, N) >= 0)
978 R[N] -= Subtract(R, R, B, N);
980 assert(Q[0] || Q[1]); // no overflow
984 // R[NB] -------- remainder = A%B
985 // Q[NA-NB+2] --- quotient = A/B
986 // T[NA+2*NB+4] - temp work space
987 // A[NA] -------- dividend
988 // B[NB] -------- divisor
990 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
992 assert(NA && NB && NA%2==0 && NB%2==0);
993 assert(B[NB-1] || B[NB-2]);
996 // set up temporary work space
998 word *const TB=T+NA+2;
999 word *const TP=T+NA+2+NB;
1001 // copy B into TB and normalize it so that TB has highest bit set to 1
1002 unsigned shiftWords = (B[NB-1]==0);
1003 TB[0] = TB[NB-1] = 0;
1004 CopyWords(TB+shiftWords, B, NB-shiftWords);
1005 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
1006 assert(shiftBits < WORD_BITS);
1007 ShiftWordsLeftByBits(TB, NB, shiftBits);
1009 // copy A into TA and normalize it
1010 TA[0] = TA[NA] = TA[NA+1] = 0;
1011 CopyWords(TA+shiftWords, A, NA);
1012 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
1014 if (TA[NA+1]==0 && TA[NA] <= 1)
1016 Q[NA-NB+1] = Q[NA-NB] = 0;
1017 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
1019 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
1026 assert(Compare(TA+NA-NB, TB, NB) < 0);
1030 BT[0] = TB[NB-2] + 1;
1031 BT[1] = TB[NB-1] + (BT[0]==0);
1033 // start reducing TA mod TB, 2 words at a time
1034 for (unsigned i=NA-2; i>=NB; i-=2)
1036 AtomicDivide(Q+i-NB, TA+i-2, BT);
1037 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
1040 // copy TA into R, and denormalize it
1041 CopyWords(R, TA+shiftWords, NB);
1042 ShiftWordsRightByBits(R, NB, shiftBits);
1045 static inline unsigned int EvenWordCount(const word *X, unsigned int N)
1047 while (N && X[N-2]==0 && X[N-1]==0)
1053 // R[N] --- result = A^(-1) * 2^k mod M
1054 // T[4*N] - temporary work space
1055 // A[NA] -- number to take inverse of
1058 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)
1060 assert(NA<=N && N && N%2==0);
1066 unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
1067 unsigned int k=0, s=0;
1069 SetWords(T, 0, 3*N);
1071 CopyWords(f, A, NA);
1079 if (EvenWordCount(f, fgLen)==0)
1085 ShiftWordsRightByWords(f, fgLen, 1);
1086 if (c[bcLen-1]) bcLen+=2;
1088 ShiftWordsLeftByWords(c, bcLen, 1);
1101 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
1106 Subtract(R, M, b, N);
1110 ShiftWordsRightByBits(f, fgLen, i);
1111 t=ShiftWordsLeftByBits(c, bcLen, i);
1119 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
1122 if (Compare(f, g, fgLen)==-1)
1124 TClassSwap<word*>(f,g);
1125 TClassSwap<word*>(b,c);
1129 Subtract(f, f, g, fgLen);
1131 if (Add(b, b, c, bcLen))
1140 // R[N] - result = A/(2^k) mod M
1144 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
1151 ShiftWordsRightByBits(R, N, 1);
1154 word carry = Add(R, R, M, N);
1155 ShiftWordsRightByBits(R, N, 1);
1156 R[N-1] += carry<<(WORD_BITS-1);