Symaptic: os/security/crypto/weakcryptospi/source/bigint/algorithms.cpp@260cb5ec6c19

     1 /*

     2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).

     3 * All rights reserved.

     4 * This component and the accompanying materials are made available

     5 * under the terms of the License "Eclipse Public License v1.0"

     6 * which accompanies this distribution, and is available

     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".

8 *

     9 * Initial Contributors:

    10 * Nokia Corporation - initial contribution.

    11 *

    12 * Contributors:

    13 *

    14 * Description:

    15 *

    16 */

    19 #include "words.h"

    20 #include "algorithms.h"

    22 word Add(word *C, const word *A, const word *B, unsigned int N)

    23 {

    24 	assert (N%2 == 0);

    25 	word carry = 0;

    26 	for (unsigned int i = 0; i < N; i+=2)

    27 	{

    28 		dword u = (dword) carry + A[i] + B[i];

    29 		C[i] = LOW_WORD(u);

    30 		u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];

    31 		C[i+1] = LOW_WORD(u);

    32 		carry = HIGH_WORD(u);

    33 	}

    34 	return carry;

    35 }

    37 word Subtract(word *C, const word *A, const word *B, unsigned int N)

    38 {

    39 	assert (N%2 == 0);

    40 	word borrow=0;

    41 	for (unsigned i = 0; i < N; i+=2)

    42 	{

    43 		dword u = (dword) A[i] - B[i] - borrow;

    44 		C[i] = LOW_WORD(u);

    45 		u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));

    46 		C[i+1] = LOW_WORD(u);

    47 		borrow = 0-HIGH_WORD(u);

    48 	}

    49 	return borrow;

    50 }

    52 int Compare(const word *A, const word *B, unsigned int N)

    53 {

    54 	while (N--)

    55 		if (A[N] > B[N])

    56 			return 1;

    57 		else if (A[N] < B[N])

    58 			return -1;

    60 	return 0;

    61 }

    63 // It is the job of the calling code to ensure that this won't carry.

    64 // If you aren't sure, use the next version that will tell you if you need to

    65 // grow your integer.

    66 // Having two of these creates ever so slightly more code but avoids having

    67 // ifdefs all over the rest of the code checking the following type stuff which

    68 // causes warnings in certain compilers about unused parameters in release

    69 // builds.  We can't have that can we!

    70 /*

    71 Allows avoid this all over bigint.cpp and primes.cpp

    72 ifdef _DEBUG

    73 	TUint carry = Increment(Ptr(), Size());

    74 	assert(!carry);

    75 else

    76 	Increment(Ptr(), Size())

    77 endif

    78 */

    79 void IncrementNoCarry(word *A, unsigned int N, word B)

    80 {

    81 	assert(N);

    82 	word t = A[0];

    83 	A[0] = t+B;

    84 	if (A[0] >= t)

    85 		return;

    86 	for (unsigned i=1; i<N; i++)

    87 		if (++A[i])

    88 			return;

    89 	assert(0);

    90 }

    92 word Increment(word *A, unsigned int N, word B)

    93 {

    94 	assert(N);

    95 	word t = A[0];

    96 	A[0] = t+B;

    97 	if (A[0] >= t)

    98 		return 0;

    99 	for (unsigned i=1; i<N; i++)

   100 		if (++A[i])

   101 			return 0;

   102 	return 1;

   103 }

   105 //See commments above about IncrementNoCarry

   106 void DecrementNoCarry(word *A, unsigned int N, word B)

   107 {

   108 	assert(N);

   109 	word t = A[0];

   110 	A[0] = t-B;

   111 	if (A[0] <= t)

   112 		return;

   113 	for (unsigned i=1; i<N; i++)

   114 		if (A[i]--)

   115 			return;

   116 	assert(0);

   117 }

   119 word Decrement(word *A, unsigned int N, word B)

   120 {

   121 	assert(N);

   122 	word t = A[0];

   123 	A[0] = t-B;

   124 	if (A[0] <= t)

   125 		return 0;

   126 	for (unsigned i=1; i<N; i++)

   127 		if (A[i]--)

   128 			return 0;

   129 	return 1;

   130 }

   132 void TwosComplement(word *A, unsigned int N)

   133 {

   134 	Decrement(A, N);

   135 	for (unsigned i=0; i<N; i++)

   136 		A[i] = ~A[i];

   137 }

   139 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)

   140 {

   141 	word carry=0;

   142 	for(unsigned i=0; i<N; i++)

   143 	{

   144 		dword p = (dword)A[i] * B + carry;

   145 		C[i] = LOW_WORD(p);

   146 		carry = HIGH_WORD(p);

   147 	}

   148 	return carry;

   149 }

   151 static void AtomicMultiply(word *C, const word *A, const word *B)

   152 {

   153 /*

   154 	word s;

   155 	dword d;

   157 	if (A1 >= A0)

   158 		if (B0 >= B1)

   159 		{

   160 			s = 0;

   161 			d = (dword)(A1-A0)*(B0-B1);

   162 		}

   163 		else

   164 		{

   165 			s = (A1-A0);

   166 			d = (dword)s*(word)(B0-B1);

   167 		}

   168 	else

   169 		if (B0 > B1)

   170 		{

   171 			s = (B0-B1);

   172 			d = (word)(A1-A0)*(dword)s;

   173 		}

   174 		else

   175 		{

   176 			s = 0;

   177 			d = (dword)(A0-A1)*(B1-B0);

   178 		}

   179 */

   180 	// this segment is the branchless equivalent of above

   181 	word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};

   182 	unsigned int ai = A[1] < A[0];

   183 	unsigned int bi = B[0] < B[1];

   184 	unsigned int di = ai & bi;

   185 	dword d = (dword)D[di]*D[di+2];

   186 	D[1] = D[3] = 0;

   187 	unsigned int si = ai + !bi;

   188 	word s = D[si];

   190 	dword A0B0 = (dword)A[0]*B[0];

   191 	C[0] = LOW_WORD(A0B0);

   193 	dword A1B1 = (dword)A[1]*B[1];

   194 	dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);

   195 	C[1] = LOW_WORD(t);

   197 	t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;

   198 	C[2] = LOW_WORD(t);

   199 	C[3] = HIGH_WORD(t);

   200 }

   202 static word AtomicMultiplyAdd(word *C, const word *A, const word *B)

   203 {

   204 	word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};

   205 	unsigned int ai = A[1] < A[0];

   206 	unsigned int bi = B[0] < B[1];

   207 	unsigned int di = ai & bi;

   208 	dword d = (dword)D[di]*D[di+2];

   209 	D[1] = D[3] = 0;

   210 	unsigned int si = ai + !bi;

   211 	word s = D[si];

   213 	dword A0B0 = (dword)A[0]*B[0];

   214 	dword t = A0B0 + C[0];

   215 	C[0] = LOW_WORD(t);

   217 	dword A1B1 = (dword)A[1]*B[1];

   218 	t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];

   219 	C[1] = LOW_WORD(t);

   221 	t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];

   222 	C[2] = LOW_WORD(t);

   224 	t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];

   225 	C[3] = LOW_WORD(t);

   226 	return HIGH_WORD(t);

   227 }

   229 static inline void AtomicMultiplyBottom(word *C, const word *A, const word *B)

   230 {

   231 	dword t = (dword)A[0]*B[0];

   232 	C[0] = LOW_WORD(t);

   233 	C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0];

   234 }

   236 #define MulAcc(x, y)								\

   237 	p = (dword)A[x] * B[y] + c; 					\

   238 	c = LOW_WORD(p);								\

   239 	p = (dword)d + HIGH_WORD(p);					\

   240 	d = LOW_WORD(p);								\

   241 	e += HIGH_WORD(p);

   243 #define SaveMulAcc(s, x, y) 						\

   244 	R[s] = c;										\

   245 	p = (dword)A[x] * B[y] + d; 					\

   246 	c = LOW_WORD(p);								\

   247 	p = (dword)e + HIGH_WORD(p);					\

   248 	d = LOW_WORD(p);								\

   249 	e = HIGH_WORD(p);

   251 #define MulAcc1(x, y)								\

   252 	p = (dword)A[x] * A[y] + c; 					\

   253 	c = LOW_WORD(p);								\

   254 	p = (dword)d + HIGH_WORD(p);					\

   255 	d = LOW_WORD(p);								\

   256 	e += HIGH_WORD(p);

   258 #define SaveMulAcc1(s, x, y) 						\

   259 	R[s] = c;										\

   260 	p = (dword)A[x] * A[y] + d; 					\

   261 	c = LOW_WORD(p);								\

   262 	p = (dword)e + HIGH_WORD(p);					\

   263 	d = LOW_WORD(p);								\

   264 	e = HIGH_WORD(p);

   266 #define SquAcc(x, y)								\

   267 	p = (dword)A[x] * A[y];	\

   268 	p = p + p + c; 					\

   269 	c = LOW_WORD(p);								\

   270 	p = (dword)d + HIGH_WORD(p);					\

   271 	d = LOW_WORD(p);								\

   272 	e += HIGH_WORD(p);

   274 #define SaveSquAcc(s, x, y) 						\

   275 	R[s] = c;										\

   276 	p = (dword)A[x] * A[y];	\

   277 	p = p + p + d; 					\

   278 	c = LOW_WORD(p);								\

   279 	p = (dword)e + HIGH_WORD(p);					\

   280 	d = LOW_WORD(p);								\

   281 	e = HIGH_WORD(p);

   283 // VC60 workaround: MSVC 6.0 has an optimization problem that makes

   284 // (dword)A*B where either A or B has been cast to a dword before

   285 // very expensive. Revisit a CombaSquare4() function when this

   286 // problem is fixed.

   288 // WARNING: KeithR.  05/08/03 This routine doesn't work with gcc on hardware

   289 // either.  I've completely removed it.  It may be worth looking into sometime

   290 // in the future.

   291 /*#ifndef __WINS__

   292 static void CombaSquare4(word *R, const word *A)

   293 {

   294 	dword p;

   295 	word c, d, e;

   297 	p = (dword)A[0] * A[0];

   298 	R[0] = LOW_WORD(p);

   299 	c = HIGH_WORD(p);

   300 	d = e = 0;

   302 	SquAcc(0, 1);

   304 	SaveSquAcc(1, 2, 0);

   305 	MulAcc1(1, 1);

   307 	SaveSquAcc(2, 0, 3);

   308 	SquAcc(1, 2);

   310 	SaveSquAcc(3, 3, 1);

   311 	MulAcc1(2, 2);

   313 	SaveSquAcc(4, 2, 3);

   315 	R[5] = c;

   316 	p = (dword)A[3] * A[3] + d;

   317 	R[6] = LOW_WORD(p);

   318 	R[7] = e + HIGH_WORD(p);

   319 }

   320 #endif */

   322 static void CombaMultiply4(word *R, const word *A, const word *B)

   323 {

   324 	dword p;

   325 	word c, d, e;

   327 	p = (dword)A[0] * B[0];

   328 	R[0] = LOW_WORD(p);

   329 	c = HIGH_WORD(p);

   330 	d = e = 0;

   332 	MulAcc(0, 1);

   333 	MulAcc(1, 0);

   335 	SaveMulAcc(1, 2, 0);

   336 	MulAcc(1, 1);

   337 	MulAcc(0, 2);

   339 	SaveMulAcc(2, 0, 3);

   340 	MulAcc(1, 2);

   341 	MulAcc(2, 1);

   342 	MulAcc(3, 0);

   344 	SaveMulAcc(3, 3, 1);

   345 	MulAcc(2, 2);

   346 	MulAcc(1, 3);

   348 	SaveMulAcc(4, 2, 3);

   349 	MulAcc(3, 2);

   351 	R[5] = c;

   352 	p = (dword)A[3] * B[3] + d;

   353 	R[6] = LOW_WORD(p);

   354 	R[7] = e + HIGH_WORD(p);

   355 }

   357 static void CombaMultiply8(word *R, const word *A, const word *B)

   358 {

   359 	dword p;

   360 	word c, d, e;

   362 	p = (dword)A[0] * B[0];

   363 	R[0] = LOW_WORD(p);

   364 	c = HIGH_WORD(p);

   365 	d = e = 0;

   367 	MulAcc(0, 1);

   368 	MulAcc(1, 0);

   370 	SaveMulAcc(1, 2, 0);

   371 	MulAcc(1, 1);

   372 	MulAcc(0, 2);

   374 	SaveMulAcc(2, 0, 3);

   375 	MulAcc(1, 2);

   376 	MulAcc(2, 1);

   377 	MulAcc(3, 0);

   379 	SaveMulAcc(3, 0, 4);

   380 	MulAcc(1, 3);

   381 	MulAcc(2, 2);

   382 	MulAcc(3, 1);

   383 	MulAcc(4, 0);

   385 	SaveMulAcc(4, 0, 5);

   386 	MulAcc(1, 4);

   387 	MulAcc(2, 3);

   388 	MulAcc(3, 2);

   389 	MulAcc(4, 1);

   390 	MulAcc(5, 0);

   392 	SaveMulAcc(5, 0, 6);

   393 	MulAcc(1, 5);

   394 	MulAcc(2, 4);

   395 	MulAcc(3, 3);

   396 	MulAcc(4, 2);

   397 	MulAcc(5, 1);

   398 	MulAcc(6, 0);

   400 	SaveMulAcc(6, 0, 7);

   401 	MulAcc(1, 6);

   402 	MulAcc(2, 5);

   403 	MulAcc(3, 4);

   404 	MulAcc(4, 3);

   405 	MulAcc(5, 2);

   406 	MulAcc(6, 1);

   407 	MulAcc(7, 0);

   409 	SaveMulAcc(7, 1, 7);

   410 	MulAcc(2, 6);

   411 	MulAcc(3, 5);

   412 	MulAcc(4, 4);

   413 	MulAcc(5, 3);

   414 	MulAcc(6, 2);

   415 	MulAcc(7, 1);

   417 	SaveMulAcc(8, 2, 7);

   418 	MulAcc(3, 6);

   419 	MulAcc(4, 5);

   420 	MulAcc(5, 4);

   421 	MulAcc(6, 3);

   422 	MulAcc(7, 2);

   424 	SaveMulAcc(9, 3, 7);

   425 	MulAcc(4, 6);

   426 	MulAcc(5, 5);

   427 	MulAcc(6, 4);

   428 	MulAcc(7, 3);

   430 	SaveMulAcc(10, 4, 7);

   431 	MulAcc(5, 6);

   432 	MulAcc(6, 5);

   433 	MulAcc(7, 4);

   435 	SaveMulAcc(11, 5, 7);

   436 	MulAcc(6, 6);

   437 	MulAcc(7, 5);

   439 	SaveMulAcc(12, 6, 7);

   440 	MulAcc(7, 6);

   442 	R[13] = c;

   443 	p = (dword)A[7] * B[7] + d;

   444 	R[14] = LOW_WORD(p);

   445 	R[15] = e + HIGH_WORD(p);

   446 }

   448 static void CombaMultiplyBottom4(word *R, const word *A, const word *B)

   449 {

   450 	dword p;

   451 	word c, d, e;

   453 	p = (dword)A[0] * B[0];

   454 	R[0] = LOW_WORD(p);

   455 	c = HIGH_WORD(p);

   456 	d = e = 0;

   458 	MulAcc(0, 1);

   459 	MulAcc(1, 0);

   461 	SaveMulAcc(1, 2, 0);

   462 	MulAcc(1, 1);

   463 	MulAcc(0, 2);

   465 	R[2] = c;

   466 	R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];

   467 }

   469 static void CombaMultiplyBottom8(word *R, const word *A, const word *B)

   470 {

   471 	dword p;

   472 	word c, d, e;

   474 	p = (dword)A[0] * B[0];

   475 	R[0] = LOW_WORD(p);

   476 	c = HIGH_WORD(p);

   477 	d = e = 0;

   479 	MulAcc(0, 1);

   480 	MulAcc(1, 0);

   482 	SaveMulAcc(1, 2, 0);

   483 	MulAcc(1, 1);

   484 	MulAcc(0, 2);

   486 	SaveMulAcc(2, 0, 3);

   487 	MulAcc(1, 2);

   488 	MulAcc(2, 1);

   489 	MulAcc(3, 0);

   491 	SaveMulAcc(3, 0, 4);

   492 	MulAcc(1, 3);

   493 	MulAcc(2, 2);

   494 	MulAcc(3, 1);

   495 	MulAcc(4, 0);

   497 	SaveMulAcc(4, 0, 5);

   498 	MulAcc(1, 4);

   499 	MulAcc(2, 3);

   500 	MulAcc(3, 2);

   501 	MulAcc(4, 1);

   502 	MulAcc(5, 0);

   504 	SaveMulAcc(5, 0, 6);

   505 	MulAcc(1, 5);

   506 	MulAcc(2, 4);

   507 	MulAcc(3, 3);

   508 	MulAcc(4, 2);

   509 	MulAcc(5, 1);

   510 	MulAcc(6, 0);

   512 	R[6] = c;

   513 	R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +

   514 				A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];

   515 }

   517 #undef MulAcc

   518 #undef SaveMulAcc

   519 static void AtomicInverseModPower2(word *C, word A0, word A1)

   520 {

   521 	assert(A0%2==1);

   523 	dword A=MAKE_DWORD(A0, A1), R=A0%8;

   525 	for (unsigned i=3; i<2*WORD_BITS; i*=2)

   526 		R = R*(2-R*A);

   528 	assert(R*A==1);

   530 	C[0] = LOW_WORD(R);

   531 	C[1] = HIGH_WORD(R);

   532 }

   533 // ********************************************************

   535 #define A0		A

   536 #define A1		(A+N2)

   537 #define B0		B

   538 #define B1		(B+N2)

   540 #define T0		T

   541 #define T1		(T+N2)

   542 #define T2		(T+N)

   543 #define T3		(T+N+N2)

   545 #define R0		R

   546 #define R1		(R+N2)

   547 #define R2		(R+N)

   548 #define R3		(R+N+N2)

   550 // R[2*N] - result = A*B

   551 // T[2*N] - temporary work space

   552 // A[N] --- multiplier

   553 // B[N] --- multiplicant

   555 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N)

   556 {

   557 	assert(N>=2 && N%2==0);

   559 	if (N==2)

   560 		AtomicMultiply(R, A, B);

   561 	else if (N==4)

   562 		CombaMultiply4(R, A, B);

   563 	else if (N==8)

   564 		CombaMultiply8(R, A, B);

   565 	else

   566 	{

   567 		const unsigned int N2 = N/2;

   568 		int carry;

   570 		int aComp = Compare(A0, A1, N2);

   571 		int bComp = Compare(B0, B1, N2);

   573 		switch (2*aComp + aComp + bComp)

   574 		{

   575 		case -4:

   576 			Subtract(R0, A1, A0, N2);

   577 			Subtract(R1, B0, B1, N2);

   578 			RecursiveMultiply(T0, T2, R0, R1, N2);

   579 			Subtract(T1, T1, R0, N2);

   580 			carry = -1;

   581 			break;

   582 		case -2:

   583 			Subtract(R0, A1, A0, N2);

   584 			Subtract(R1, B0, B1, N2);

   585 			RecursiveMultiply(T0, T2, R0, R1, N2);

   586 			carry = 0;

   587 			break;

   588 		case 2:

   589 			Subtract(R0, A0, A1, N2);

   590 			Subtract(R1, B1, B0, N2);

   591 			RecursiveMultiply(T0, T2, R0, R1, N2);

   592 			carry = 0;

   593 			break;

   594 		case 4:

   595 			Subtract(R0, A1, A0, N2);

   596 			Subtract(R1, B0, B1, N2);

   597 			RecursiveMultiply(T0, T2, R0, R1, N2);

   598 			Subtract(T1, T1, R1, N2);

   599 			carry = -1;

   600 			break;

   601 		default:

   602 			SetWords(T0, 0, N);

   603 			carry = 0;

   604 		}

   606 		RecursiveMultiply(R0, T2, A0, B0, N2);

   607 		RecursiveMultiply(R2, T2, A1, B1, N2);

   609 		// now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1

   611 		carry += Add(T0, T0, R0, N);

   612 		carry += Add(T0, T0, R2, N);

   613 		carry += Add(R1, R1, T0, N);

   615 		assert (carry >= 0 && carry <= 2);

   616 		Increment(R3, N2, carry);

   617 	}

   618 }

   620 // R[2*N] - result = A*A

   621 // T[2*N] - temporary work space

   622 // A[N] --- number to be squared

   624 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)

   625 {

   626 	assert(N && N%2==0);

   628 	if (N==2)

   629 		AtomicMultiply(R, A, A);

   630 	else if (N==4)

   631 	{

   632 		// VC60 workaround: MSVC 6.0 has an optimization problem that makes

   633 		// (dword)A*B where either A or B has been cast to a dword before

   634 		// very expensive. Revisit a CombaSquare4() function when this

   635 		// problem is fixed.

   637 // WARNING: KeithR. 05/08/03 This routine doesn't work with gcc on hardware

   638 // either.  I've completely removed it.  It may be worth looking into sometime

   639 // in the future.  Therefore, we use the CombaMultiply4 on all targets.

   640 //#ifdef __WINS__

   641 		CombaMultiply4(R, A, A);

   642 /*#else

   643 		CombaSquare4(R, A);

   644 #endif*/

   645 	}

   646 	else

   647 	{

   648 		const unsigned int N2 = N/2;

   650 		RecursiveSquare(R0, T2, A0, N2);

   651 		RecursiveSquare(R2, T2, A1, N2);

   652 		RecursiveMultiply(T0, T2, A0, A1, N2);

   654 		word carry = Add(R1, R1, T0, N);

   655 		carry += Add(R1, R1, T0, N);

   656 		Increment(R3, N2, carry);

   657 	}

   658 }

   659 // R[N] - bottom half of A*B

   660 // T[N] - temporary work space

   661 // A[N] - multiplier

   662 // B[N] - multiplicant

   664 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)

   665 {

   666 	assert(N>=2 && N%2==0);

   668 	if (N==2)

   669 		AtomicMultiplyBottom(R, A, B);

   670 	else if (N==4)

   671 		CombaMultiplyBottom4(R, A, B);

   672 	else if (N==8)

   673 		CombaMultiplyBottom8(R, A, B);

   674 	else

   675 	{

   676 		const unsigned int N2 = N/2;

   678 		RecursiveMultiply(R, T, A0, B0, N2);

   679 		RecursiveMultiplyBottom(T0, T1, A1, B0, N2);

   680 		Add(R1, R1, T0, N2);

   681 		RecursiveMultiplyBottom(T0, T1, A0, B1, N2);

   682 		Add(R1, R1, T0, N2);

   683 	}

   684 }

   686 // R[N] --- upper half of A*B

   687 // T[2*N] - temporary work space

   688 // L[N] --- lower half of A*B

   689 // A[N] --- multiplier

   690 // B[N] --- multiplicant

   692 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)

   693 {

   694 	assert(N>=2 && N%2==0);

   696 	if (N==2)

   697 	{

   698 		AtomicMultiply(T, A, B);

   699 		((dword *)R)[0] = ((dword *)T)[1];

   700 	}

   701 	else if (N==4)

   702 	{

   703 		CombaMultiply4(T, A, B);

   704 		((dword *)R)[0] = ((dword *)T)[2];

   705 		((dword *)R)[1] = ((dword *)T)[3];

   706 	}

   707 	else

   708 	{

   709 		const unsigned int N2 = N/2;

   710 		int carry;

   712 		int aComp = Compare(A0, A1, N2);

   713 		int bComp = Compare(B0, B1, N2);

   715 		switch (2*aComp + aComp + bComp)

   716 		{

   717 		case -4:

   718 			Subtract(R0, A1, A0, N2);

   719 			Subtract(R1, B0, B1, N2);

   720 			RecursiveMultiply(T0, T2, R0, R1, N2);

   721 			Subtract(T1, T1, R0, N2);

   722 			carry = -1;

   723 			break;

   724 		case -2:

   725 			Subtract(R0, A1, A0, N2);

   726 			Subtract(R1, B0, B1, N2);

   727 			RecursiveMultiply(T0, T2, R0, R1, N2);

   728 			carry = 0;

   729 			break;

   730 		case 2:

   731 			Subtract(R0, A0, A1, N2);

   732 			Subtract(R1, B1, B0, N2);

   733 			RecursiveMultiply(T0, T2, R0, R1, N2);

   734 			carry = 0;

   735 			break;

   736 		case 4:

   737 			Subtract(R0, A1, A0, N2);

   738 			Subtract(R1, B0, B1, N2);

   739 			RecursiveMultiply(T0, T2, R0, R1, N2);

   740 			Subtract(T1, T1, R1, N2);

   741 			carry = -1;

   742 			break;

   743 		default:

   744 			SetWords(T0, 0, N);

   745 			carry = 0;

   746 		}

   748 		RecursiveMultiply(T2, R0, A1, B1, N2);

   750 		// now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1

   752 		CopyWords(R0, L+N2, N2);

   753 		word c2 = Subtract(R0, R0, L, N2);

   754 		c2 += Subtract(R0, R0, T0, N2);

   755 		word t = (Compare(R0, T2, N2) == -1);

   757 		carry += t;

   758 		carry += Increment(R0, N2, c2+t);

   759 		carry += Add(R0, R0, T1, N2);

   760 		carry += Add(R0, R0, T3, N2);

   762 		CopyWords(R1, T3, N2);

   763 		assert (carry >= 0 && carry <= 2);

   764 		Increment(R1, N2, carry);

   765 	}

   766 }

   768 // R[NA+NB] - result = A*B

   769 // T[NA+NB] - temporary work space

   770 // A[NA] ---- multiplier

   771 // B[NB] ---- multiplicant

   773 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)

   774 {

   775 	if (NA == NB)

   776 	{

   777 		if (A == B)

   778 			RecursiveSquare(R, T, A, NA);

   779 		else

   780 			RecursiveMultiply(R, T, A, B, NA);

   782 		return;

   783 	}

   785 	if (NA > NB)

   786 	{

   787 		TClassSwap(A, B);

   788 		TClassSwap(NA, NB);

   789 		//std::swap(A, B);

   790 		//std::swap(NA, NB);

   791 	}

   793 	assert(NB % NA == 0);

   794 	assert((NB/NA)%2 == 0); 	// NB is an even multiple of NA

   796 	if (NA==2 && !A[1])

   797 	{

   798 		switch (A[0])

   799 		{

   800 		case 0:

   801 			SetWords(R, 0, NB+2);

   802 			return;

   803 		case 1:

   804 			CopyWords(R, B, NB);

   805 			R[NB] = R[NB+1] = 0;

   806 			return;

   807 		default:

   808 			R[NB] = LinearMultiply(R, B, A[0], NB);

   809 			R[NB+1] = 0;

   810 			return;

   811 		}

   812 	}

   814 	RecursiveMultiply(R, T, A, B, NA);

   815 	CopyWords(T+2*NA, R+NA, NA);

   817 	unsigned i;

   819 	for (i=2*NA; i<NB; i+=2*NA)

   820 		RecursiveMultiply(T+NA+i, T, A, B+i, NA);

   821 	for (i=NA; i<NB; i+=2*NA)

   822 		RecursiveMultiply(R+i, T, A, B+i, NA);

   824 	if (Add(R+NA, R+NA, T+2*NA, NB-NA))

   825 		Increment(R+NB, NA);

   826 }

   827 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)

   828 // T[3*N/2] - temporary work space

   829 // A[N] ----- an odd number as input

   831 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)

   832 {

   833 	if (N==2)

   834 		AtomicInverseModPower2(R, A[0], A[1]);

   835 	else

   836 	{

   837 		const unsigned int N2 = N/2;

   838 		RecursiveInverseModPower2(R0, T0, A0, N2);

   839 		T0[0] = 1;

   840 		SetWords(T0+1, 0, N2-1);

   841 		RecursiveMultiplyTop(R1, T1, T0, R0, A0, N2);

   842 		RecursiveMultiplyBottom(T0, T1, R0, A1, N2);

   843 		Add(T0, R1, T0, N2);

   844 		TwosComplement(T0, N2);

   845 		RecursiveMultiplyBottom(R1, T1, R0, T0, N2);

   846 	}

   847 }

   848 #undef A0

   849 #undef A1

   850 #undef B0

   851 #undef B1

   853 #undef T0

   854 #undef T1

   855 #undef T2

   856 #undef T3

   858 #undef R0

   859 #undef R1

   860 #undef R2

   861 #undef R3

   863 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M

   864 // T[3*N] - temporary work space

   865 // X[2*N] - number to be reduced

   866 // M[N] --- modulus

   867 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)

   869 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)

   870 {

   871 	RecursiveMultiplyBottom(R, T, X, U, N);

   872 	RecursiveMultiplyTop(T, T+N, X, R, M, N);

   873 	if (Subtract(R, X+N, T, N))

   874 	{

   875 #ifdef _DEBUG

   876 		word carry = Add(R, R, M, N);

   877 		assert(carry);

   878 #else

   879 		Add(R, R, M, N);

   880 #endif

   881 	}

   882 }

   884 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A

   885 static word SubatomicDivide(word *A, word B0, word B1)

   886 {

   887 	// assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word

   888 	assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));

   890 	dword p, u;

   891 	word Q;

   893 	// estimate the quotient: do a 2 word by 1 word divide

   894 	if (B1+1 == 0)

   895 		Q = A[2];

   896 	else

   897 		Q = word(MAKE_DWORD(A[1], A[2]) / (B1+1));

   899 	// now subtract Q*B from A

   900 	p = (dword) B0*Q;

   901 	u = (dword) A[0] - LOW_WORD(p);

   902 	A[0] = LOW_WORD(u);

   903 	u = (dword) A[1] - HIGH_WORD(p) - (word)(0-HIGH_WORD(u)) - (dword)B1*Q;

   904 	A[1] = LOW_WORD(u);

   905 	A[2] += HIGH_WORD(u);

   907 	// Q <= actual quotient, so fix it

   908 	while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))

   909 	{

   910 		u = (dword) A[0] - B0;

   911 		A[0] = LOW_WORD(u);

   912 		u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u));

   913 		A[1] = LOW_WORD(u);

   914 		A[2] += HIGH_WORD(u);

   915 		Q++;

   916 		assert(Q);	// shouldn't overflow

   917 	}

   919 	return Q;

   920 }

   922 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1

   923 static inline void AtomicDivide(word *Q, const word *A, const word *B)

   924 {

   925 	if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)

   926 	{

   927 		Q[0] = A[2];

   928 		Q[1] = A[3];

   929 	}

   930 	else

   931 	{

   932 		word T[4];

   933 		T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];

   934 		Q[1] = SubatomicDivide(T+1, B[0], B[1]);

   935 		Q[0] = SubatomicDivide(T, B[0], B[1]);

   937 #ifdef _DEBUG

   938 		// multiply quotient and divisor and add remainder, make sure it equals dividend

   939 		assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));

   940 		word P[4];

   941 		AtomicMultiply(P, Q, B);

   942 		Add(P, P, T, 4);

   943 		assert(Mem::Compare((TUint8*)P, 4*WORD_SIZE, (TUint8*)A, 4*WORD_SIZE)==0);

   944 #endif

   945 	}

   946 }

   948 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}

   949 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)

   950 {

   951 	assert(N && N%2==0);

   953 	if (Q[1])

   954 	{

   955 		T[N] = T[N+1] = 0;

   956 		unsigned i;

   957 		for (i=0; i<N; i+=4)

   958 			AtomicMultiply(T+i, Q, B+i);

   959 		for (i=2; i<N; i+=4)

   960 			if (AtomicMultiplyAdd(T+i, Q, B+i))

   961 				T[i+5] += (++T[i+4]==0);

   962 	}

   963 	else

   964 	{

   965 		T[N] = LinearMultiply(T, B, Q[0], N);

   966 		T[N+1] = 0;

   967 	}

   969 #ifdef _DEBUG

   970 	word borrow = Subtract(R, R, T, N+2);

   971 	assert(!borrow && !R[N+1]);

   972 #else

   973 	Subtract(R, R, T, N+2);

   974 #endif

   976 	while (R[N] || Compare(R, B, N) >= 0)

   977 	{

   978 		R[N] -= Subtract(R, R, B, N);

   979 		Q[1] += (++Q[0]==0);

   980 		assert(Q[0] || Q[1]); // no overflow

   981 	}

   982 }

   984 // R[NB] -------- remainder = A%B

   985 // Q[NA-NB+2] --- quotient	= A/B

   986 // T[NA+2*NB+4] - temp work space

   987 // A[NA] -------- dividend

   988 // B[NB] -------- divisor

   990 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)

   991 {

   992 	assert(NA && NB && NA%2==0 && NB%2==0);

   993 	assert(B[NB-1] || B[NB-2]);

   994 	assert(NB <= NA);

   996 	// set up temporary work space

   997 	word *const TA=T;

   998 	word *const TB=T+NA+2;

   999 	word *const TP=T+NA+2+NB;

  1001 	// copy B into TB and normalize it so that TB has highest bit set to 1

  1002 	unsigned shiftWords = (B[NB-1]==0);

  1003 	TB[0] = TB[NB-1] = 0;

  1004 	CopyWords(TB+shiftWords, B, NB-shiftWords);

  1005 	unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);

  1006 	assert(shiftBits < WORD_BITS);

  1007 	ShiftWordsLeftByBits(TB, NB, shiftBits);

  1009 	// copy A into TA and normalize it

  1010 	TA[0] = TA[NA] = TA[NA+1] = 0;

  1011 	CopyWords(TA+shiftWords, A, NA);

  1012 	ShiftWordsLeftByBits(TA, NA+2, shiftBits);

  1014 	if (TA[NA+1]==0 && TA[NA] <= 1)

  1015 	{

  1016 		Q[NA-NB+1] = Q[NA-NB] = 0;

  1017 		while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)

  1018 		{

  1019 			TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);

  1020 			++Q[NA-NB];

  1021 		}

  1022 	}

  1023 	else

  1024 	{

  1025 		NA+=2;

  1026 		assert(Compare(TA+NA-NB, TB, NB) < 0);

  1027 	}

  1029 	word BT[2];

  1030 	BT[0] = TB[NB-2] + 1;

  1031 	BT[1] = TB[NB-1] + (BT[0]==0);

  1033 	// start reducing TA mod TB, 2 words at a time

  1034 	for (unsigned i=NA-2; i>=NB; i-=2)

  1035 	{

  1036 		AtomicDivide(Q+i-NB, TA+i-2, BT);

  1037 		CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);

  1038 	}

  1040 	// copy TA into R, and denormalize it

  1041 	CopyWords(R, TA+shiftWords, NB);

  1042 	ShiftWordsRightByBits(R, NB, shiftBits);

  1043 }

  1045 static inline unsigned int EvenWordCount(const word *X, unsigned int N)

  1046 {

  1047 	while (N && X[N-2]==0 && X[N-1]==0)

  1048 		N-=2;

  1049 	return N;

  1050 }

  1052 // return k

  1053 // R[N] --- result = A^(-1) * 2^k mod M

  1054 // T[4*N] - temporary work space

  1055 // A[NA] -- number to take inverse of

  1056 // M[N] --- modulus

  1058 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)

  1059 {

  1060 	assert(NA<=N && N && N%2==0);

  1062 	word *b = T;

  1063 	word *c = T+N;

  1064 	word *f = T+2*N;

  1065 	word *g = T+3*N;

  1066 	unsigned int bcLen=2, fgLen=EvenWordCount(M, N);

  1067 	unsigned int k=0, s=0;

  1069 	SetWords(T, 0, 3*N);

  1070 	b[0]=1;

  1071 	CopyWords(f, A, NA);

  1072 	CopyWords(g, M, N);

  1074 	FOREVER

  1075 	{

  1076 		word t=f[0];

  1077 		while (!t)

  1078 		{

  1079 			if (EvenWordCount(f, fgLen)==0)

  1080 			{

  1081 				SetWords(R, 0, N);

  1082 				return 0;

  1083 			}

  1085 			ShiftWordsRightByWords(f, fgLen, 1);

  1086 			if (c[bcLen-1]) bcLen+=2;

  1087 			assert(bcLen <= N);

  1088 			ShiftWordsLeftByWords(c, bcLen, 1);

  1089 			k+=WORD_BITS;

  1090 			t=f[0];

  1091 		}

  1093 		unsigned int i=0;

  1094 		while (t%2 == 0)

  1095 		{

  1096 			t>>=1;

  1097 			i++;

  1098 		}

  1099 		k+=i;

  1101 		if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)

  1102 		{

  1103 			if (s%2==0)

  1104 				CopyWords(R, b, N);

  1105 			else

  1106 				Subtract(R, M, b, N);

  1107 			return k;

  1108 		}

  1110 		ShiftWordsRightByBits(f, fgLen, i);

  1111 		t=ShiftWordsLeftByBits(c, bcLen, i);

  1112 		if (t)

  1113 		{

  1114 			c[bcLen] = t;

  1115 			bcLen+=2;

  1116 			assert(bcLen <= N);

  1117 		}

  1119 		if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)

  1120 			fgLen-=2;

  1122 		if (Compare(f, g, fgLen)==-1)

  1123 		{

  1124 			TClassSwap<word*>(f,g);

  1125 			TClassSwap<word*>(b,c);

  1126 			s++;

  1127 		}

  1129 		Subtract(f, f, g, fgLen);

  1131 		if (Add(b, b, c, bcLen))

  1132 		{

  1133 			b[bcLen] = 1;

  1134 			bcLen+=2;

  1135 			assert(bcLen <= N);

  1136 		}

  1137 	}

  1138 }

  1140 // R[N] - result = A/(2^k) mod M

  1141 // A[N] - input

  1142 // M[N] - modulus

  1144 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)

  1145 {

  1146 	CopyWords(R, A, N);

  1148 	while (k--)

  1149 	{

  1150 		if (R[0]%2==0)

  1151 			ShiftWordsRightByBits(R, N, 1);

  1152 		else

  1153 		{

  1154 			word carry = Add(R, R, M, N);

  1155 			ShiftWordsRightByBits(R, N, 1);

  1156 			R[N-1] += carry<<(WORD_BITS-1);

  1157 		}

  1158 	}

  1159 }

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--