sl@0: /* crypto/bn/bn_asm.c */ sl@0: /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) sl@0: * All rights reserved. sl@0: * sl@0: * This package is an SSL implementation written sl@0: * by Eric Young (eay@cryptsoft.com). sl@0: * The implementation was written so as to conform with Netscapes SSL. sl@0: * sl@0: * This library is free for commercial and non-commercial use as long as sl@0: * the following conditions are aheared to. The following conditions sl@0: * apply to all code found in this distribution, be it the RC4, RSA, sl@0: * lhash, DES, etc., code; not just the SSL code. The SSL documentation sl@0: * included with this distribution is covered by the same copyright terms sl@0: * except that the holder is Tim Hudson (tjh@cryptsoft.com). sl@0: * sl@0: * Copyright remains Eric Young's, and as such any Copyright notices in sl@0: * the code are not to be removed. sl@0: * If this package is used in a product, Eric Young should be given attribution sl@0: * as the author of the parts of the library used. sl@0: * This can be in the form of a textual message at program startup or sl@0: * in documentation (online or textual) provided with the package. sl@0: * sl@0: * Redistribution and use in source and binary forms, with or without sl@0: * modification, are permitted provided that the following conditions sl@0: * are met: sl@0: * 1. Redistributions of source code must retain the copyright sl@0: * notice, this list of conditions and the following disclaimer. sl@0: * 2. Redistributions in binary form must reproduce the above copyright sl@0: * notice, this list of conditions and the following disclaimer in the sl@0: * documentation and/or other materials provided with the distribution. sl@0: * 3. All advertising materials mentioning features or use of this software sl@0: * must display the following acknowledgement: sl@0: * "This product includes cryptographic software written by sl@0: * Eric Young (eay@cryptsoft.com)" sl@0: * The word 'cryptographic' can be left out if the rouines from the library sl@0: * being used are not cryptographic related :-). sl@0: * 4. If you include any Windows specific code (or a derivative thereof) from sl@0: * the apps directory (application code) you must include an acknowledgement: sl@0: * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND sl@0: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE sl@0: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE sl@0: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE sl@0: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL sl@0: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS sl@0: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) sl@0: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT sl@0: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY sl@0: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF sl@0: * SUCH DAMAGE. sl@0: * sl@0: * The licence and distribution terms for any publically available version or sl@0: * derivative of this code cannot be changed. i.e. this code cannot simply be sl@0: * copied and put under another distribution licence sl@0: * [including the GNU Public Licence.] sl@0: */ sl@0: sl@0: #ifndef BN_DEBUG sl@0: # undef NDEBUG /* avoid conflicting definitions */ sl@0: # define NDEBUG sl@0: #endif sl@0: sl@0: #include sl@0: #include sl@0: #include "cryptlib.h" sl@0: #include "bn_lcl.h" sl@0: sl@0: #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) sl@0: sl@0: EXPORT_C BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) sl@0: { sl@0: BN_ULONG c1=0; sl@0: sl@0: assert(num >= 0); sl@0: if (num <= 0) return(c1); sl@0: sl@0: while (num&~3) sl@0: { sl@0: mul_add(rp[0],ap[0],w,c1); sl@0: mul_add(rp[1],ap[1],w,c1); sl@0: mul_add(rp[2],ap[2],w,c1); sl@0: mul_add(rp[3],ap[3],w,c1); sl@0: ap+=4; rp+=4; num-=4; sl@0: } sl@0: if (num) sl@0: { sl@0: mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; sl@0: mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; sl@0: mul_add(rp[2],ap[2],w,c1); return c1; sl@0: } sl@0: sl@0: return(c1); sl@0: } sl@0: sl@0: EXPORT_C BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) sl@0: { sl@0: BN_ULONG c1=0; sl@0: sl@0: assert(num >= 0); sl@0: if (num <= 0) return(c1); sl@0: sl@0: while (num&~3) sl@0: { sl@0: mul(rp[0],ap[0],w,c1); sl@0: mul(rp[1],ap[1],w,c1); sl@0: mul(rp[2],ap[2],w,c1); sl@0: mul(rp[3],ap[3],w,c1); sl@0: ap+=4; rp+=4; num-=4; sl@0: } sl@0: if (num) sl@0: { sl@0: mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; sl@0: mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; sl@0: mul(rp[2],ap[2],w,c1); sl@0: } sl@0: return(c1); sl@0: } sl@0: sl@0: EXPORT_C void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) sl@0: { sl@0: assert(n >= 0); sl@0: if (n <= 0) return; sl@0: while (n&~3) sl@0: { sl@0: sqr(r[0],r[1],a[0]); sl@0: sqr(r[2],r[3],a[1]); sl@0: sqr(r[4],r[5],a[2]); sl@0: sqr(r[6],r[7],a[3]); sl@0: a+=4; r+=8; n-=4; sl@0: } sl@0: if (n) sl@0: { sl@0: sqr(r[0],r[1],a[0]); if (--n == 0) return; sl@0: sqr(r[2],r[3],a[1]); if (--n == 0) return; sl@0: sqr(r[4],r[5],a[2]); sl@0: } sl@0: } sl@0: sl@0: #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ sl@0: sl@0: EXPORT_C BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) sl@0: { sl@0: BN_ULONG c=0; sl@0: BN_ULONG bl,bh; sl@0: sl@0: assert(num >= 0); sl@0: if (num <= 0) return((BN_ULONG)0); sl@0: sl@0: bl=LBITS(w); sl@0: bh=HBITS(w); sl@0: sl@0: for (;;) sl@0: { sl@0: mul_add(rp[0],ap[0],bl,bh,c); sl@0: if (--num == 0) break; sl@0: mul_add(rp[1],ap[1],bl,bh,c); sl@0: if (--num == 0) break; sl@0: mul_add(rp[2],ap[2],bl,bh,c); sl@0: if (--num == 0) break; sl@0: mul_add(rp[3],ap[3],bl,bh,c); sl@0: if (--num == 0) break; sl@0: ap+=4; sl@0: rp+=4; sl@0: } sl@0: return(c); sl@0: } sl@0: sl@0: EXPORT_C BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) sl@0: { sl@0: BN_ULONG carry=0; sl@0: BN_ULONG bl,bh; sl@0: sl@0: assert(num >= 0); sl@0: if (num <= 0) return((BN_ULONG)0); sl@0: sl@0: bl=LBITS(w); sl@0: bh=HBITS(w); sl@0: sl@0: for (;;) sl@0: { sl@0: mul(rp[0],ap[0],bl,bh,carry); sl@0: if (--num == 0) break; sl@0: mul(rp[1],ap[1],bl,bh,carry); sl@0: if (--num == 0) break; sl@0: mul(rp[2],ap[2],bl,bh,carry); sl@0: if (--num == 0) break; sl@0: mul(rp[3],ap[3],bl,bh,carry); sl@0: if (--num == 0) break; sl@0: ap+=4; sl@0: rp+=4; sl@0: } sl@0: return(carry); sl@0: } sl@0: sl@0: EXPORT_C void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) sl@0: { sl@0: assert(n >= 0); sl@0: if (n <= 0) return; sl@0: for (;;) sl@0: { sl@0: sqr64(r[0],r[1],a[0]); sl@0: if (--n == 0) break; sl@0: sl@0: sqr64(r[2],r[3],a[1]); sl@0: if (--n == 0) break; sl@0: sl@0: sqr64(r[4],r[5],a[2]); sl@0: if (--n == 0) break; sl@0: sl@0: sqr64(r[6],r[7],a[3]); sl@0: if (--n == 0) break; sl@0: sl@0: a+=4; sl@0: r+=8; sl@0: } sl@0: } sl@0: sl@0: #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ sl@0: sl@0: #if defined(BN_LLONG) && defined(BN_DIV2W) sl@0: sl@0: EXPORT_C BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) sl@0: { sl@0: return((BN_ULONG)(((((BN_ULLONG)h)<= d) h-=d; sl@0: sl@0: if (i) sl@0: { sl@0: d<<=i; sl@0: h=(h<>(BN_BITS2-i)); sl@0: l<<=i; sl@0: } sl@0: dh=(d&BN_MASK2h)>>BN_BITS4; sl@0: dl=(d&BN_MASK2l); sl@0: for (;;) sl@0: { sl@0: if ((h>>BN_BITS4) == dh) sl@0: q=BN_MASK2l; sl@0: else sl@0: q=h/dh; sl@0: sl@0: th=q*dh; sl@0: tl=dl*q; sl@0: for (;;) sl@0: { sl@0: t=h-th; sl@0: if ((t&BN_MASK2h) || sl@0: ((tl) <= ( sl@0: (t<>BN_BITS4)))) sl@0: break; sl@0: q--; sl@0: th-=dh; sl@0: tl-=dl; sl@0: } sl@0: t=(tl>>BN_BITS4); sl@0: tl=(tl<>BN_BITS4))&BN_MASK2; sl@0: l=(l&BN_MASK2l)<= 0); sl@0: if (n <= 0) return((BN_ULONG)0); sl@0: sl@0: for (;;) sl@0: { sl@0: ll+=(BN_ULLONG)a[0]+b[0]; sl@0: r[0]=(BN_ULONG)ll&BN_MASK2; sl@0: ll>>=BN_BITS2; sl@0: if (--n <= 0) break; sl@0: sl@0: ll+=(BN_ULLONG)a[1]+b[1]; sl@0: r[1]=(BN_ULONG)ll&BN_MASK2; sl@0: ll>>=BN_BITS2; sl@0: if (--n <= 0) break; sl@0: sl@0: ll+=(BN_ULLONG)a[2]+b[2]; sl@0: r[2]=(BN_ULONG)ll&BN_MASK2; sl@0: ll>>=BN_BITS2; sl@0: if (--n <= 0) break; sl@0: sl@0: ll+=(BN_ULLONG)a[3]+b[3]; sl@0: r[3]=(BN_ULONG)ll&BN_MASK2; sl@0: ll>>=BN_BITS2; sl@0: if (--n <= 0) break; sl@0: sl@0: a+=4; sl@0: b+=4; sl@0: r+=4; sl@0: } sl@0: return((BN_ULONG)ll); sl@0: } sl@0: #else /* !BN_LLONG */ sl@0: EXPORT_C BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) sl@0: { sl@0: BN_ULONG c,l,t; sl@0: sl@0: assert(n >= 0); sl@0: if (n <= 0) return((BN_ULONG)0); sl@0: sl@0: c=0; sl@0: for (;;) sl@0: { sl@0: t=a[0]; sl@0: t=(t+c)&BN_MASK2; sl@0: c=(t < c); sl@0: l=(t+b[0])&BN_MASK2; sl@0: c+=(l < t); sl@0: r[0]=l; sl@0: if (--n <= 0) break; sl@0: sl@0: t=a[1]; sl@0: t=(t+c)&BN_MASK2; sl@0: c=(t < c); sl@0: l=(t+b[1])&BN_MASK2; sl@0: c+=(l < t); sl@0: r[1]=l; sl@0: if (--n <= 0) break; sl@0: sl@0: t=a[2]; sl@0: t=(t+c)&BN_MASK2; sl@0: c=(t < c); sl@0: l=(t+b[2])&BN_MASK2; sl@0: c+=(l < t); sl@0: r[2]=l; sl@0: if (--n <= 0) break; sl@0: sl@0: t=a[3]; sl@0: t=(t+c)&BN_MASK2; sl@0: c=(t < c); sl@0: l=(t+b[3])&BN_MASK2; sl@0: c+=(l < t); sl@0: r[3]=l; sl@0: if (--n <= 0) break; sl@0: sl@0: a+=4; sl@0: b+=4; sl@0: r+=4; sl@0: } sl@0: return((BN_ULONG)c); sl@0: } sl@0: #endif /* !BN_LLONG */ sl@0: sl@0: EXPORT_C BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) sl@0: { sl@0: BN_ULONG t1,t2; sl@0: int c=0; sl@0: sl@0: assert(n >= 0); sl@0: if (n <= 0) return((BN_ULONG)0); sl@0: sl@0: for (;;) sl@0: { sl@0: t1=a[0]; t2=b[0]; sl@0: r[0]=(t1-t2-c)&BN_MASK2; sl@0: if (t1 != t2) c=(t1 < t2); sl@0: if (--n <= 0) break; sl@0: sl@0: t1=a[1]; t2=b[1]; sl@0: r[1]=(t1-t2-c)&BN_MASK2; sl@0: if (t1 != t2) c=(t1 < t2); sl@0: if (--n <= 0) break; sl@0: sl@0: t1=a[2]; t2=b[2]; sl@0: r[2]=(t1-t2-c)&BN_MASK2; sl@0: if (t1 != t2) c=(t1 < t2); sl@0: if (--n <= 0) break; sl@0: sl@0: t1=a[3]; t2=b[3]; sl@0: r[3]=(t1-t2-c)&BN_MASK2; sl@0: if (t1 != t2) c=(t1 < t2); sl@0: if (--n <= 0) break; sl@0: sl@0: a+=4; sl@0: b+=4; sl@0: r+=4; sl@0: } sl@0: return(c); sl@0: } sl@0: sl@0: #ifdef BN_MUL_COMBA sl@0: sl@0: #undef bn_mul_comba8 sl@0: #undef bn_mul_comba4 sl@0: #undef bn_sqr_comba8 sl@0: #undef bn_sqr_comba4 sl@0: sl@0: /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ sl@0: /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ sl@0: /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ sl@0: /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ sl@0: sl@0: #ifdef BN_LLONG sl@0: #define mul_add_c(a,b,c0,c1,c2) \ sl@0: t=(BN_ULLONG)a*b; \ sl@0: t1=(BN_ULONG)Lw(t); \ sl@0: t2=(BN_ULONG)Hw(t); \ sl@0: c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ sl@0: c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; sl@0: sl@0: #define mul_add_c2(a,b,c0,c1,c2) \ sl@0: t=(BN_ULLONG)a*b; \ sl@0: tt=(t+t)&BN_MASK; \ sl@0: if (tt < t) c2++; \ sl@0: t1=(BN_ULONG)Lw(tt); \ sl@0: t2=(BN_ULONG)Hw(tt); \ sl@0: c0=(c0+t1)&BN_MASK2; \ sl@0: if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ sl@0: c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; sl@0: sl@0: #define sqr_add_c(a,i,c0,c1,c2) \ sl@0: t=(BN_ULLONG)a[i]*a[i]; \ sl@0: t1=(BN_ULONG)Lw(t); \ sl@0: t2=(BN_ULONG)Hw(t); \ sl@0: c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ sl@0: c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; sl@0: sl@0: #define sqr_add_c2(a,i,j,c0,c1,c2) \ sl@0: mul_add_c2((a)[i],(a)[j],c0,c1,c2) sl@0: sl@0: #elif defined(BN_UMULT_LOHI) sl@0: sl@0: #define mul_add_c(a,b,c0,c1,c2) { \ sl@0: BN_ULONG ta=(a),tb=(b); \ sl@0: BN_UMULT_LOHI(t1,t2,ta,tb); \ sl@0: c0 += t1; t2 += (c0