os/ossrv/ssl/libcrypto/src/crypto/sha/sha512.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /* crypto/sha/sha512.c */
     2 /* ====================================================================
     3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
     4  * according to the OpenSSL license [found here].
     5  * ====================================================================
     6  */
     7 
     8 /* ====================================================================
     9  * Copyright (c) 1998-2007 The OpenSSL Project.  All rights reserved.
    10  *
    11  * Redistribution and use in source and binary forms, with or without
    12  * modification, are permitted provided that the following conditions
    13  * are met:
    14  *
    15  * 1. Redistributions of source code must retain the above copyright
    16  *    notice, this list of conditions and the following disclaimer. 
    17  *
    18  * 2. Redistributions in binary form must reproduce the above copyright
    19  *    notice, this list of conditions and the following disclaimer in
    20  *    the documentation and/or other materials provided with the
    21  *    distribution.
    22  *
    23  * 3. All advertising materials mentioning features or use of this
    24  *    software must display the following acknowledgment:
    25  *    "This product includes software developed by the OpenSSL Project
    26  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
    27  *
    28  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
    29  *    endorse or promote products derived from this software without
    30  *    prior written permission. For written permission, please contact
    31  *    openssl-core@openssl.org.
    32  *
    33  * 5. Products derived from this software may not be called "OpenSSL"
    34  *    nor may "OpenSSL" appear in their names without prior written
    35  *    permission of the OpenSSL Project.
    36  *
    37  * 6. Redistributions of any form whatsoever must retain the following
    38  *    acknowledgment:
    39  *    "This product includes software developed by the OpenSSL Project
    40  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
    41  *
    42  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
    43  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    45  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
    46  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	
    47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
    48  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    49  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    51  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    52  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
    53  * OF THE POSSIBILITY OF SUCH DAMAGE.
    54  * ====================================================================
    55  *
    56  * This product includes cryptographic software written by Eric Young
    57  * (eay@cryptsoft.com).  This product includes software written by Tim
    58  * Hudson (tjh@cryptsoft.com).
    59  *
    60  */
    61 
    62 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
    63  * All rights reserved.
    64  *
    65  * This package is an SSL implementation written
    66  * by Eric Young (eay@cryptsoft.com).
    67  * The implementation was written so as to conform with Netscapes SSL.
    68  * 
    69  * This library is free for commercial and non-commercial use as long as
    70  * the following conditions are aheared to.  The following conditions
    71  * apply to all code found in this distribution, be it the RC4, RSA,
    72  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
    73  * included with this distribution is covered by the same copyright terms
    74  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
    75  * 
    76  * Copyright remains Eric Young's, and as such any Copyright notices in
    77  * the code are not to be removed.
    78  * If this package is used in a product, Eric Young should be given attribution
    79  * as the author of the parts of the library used.
    80  * This can be in the form of a textual message at program startup or
    81  * in documentation (online or textual) provided with the package.
    82  * 
    83  * Redistribution and use in source and binary forms, with or without
    84  * modification, are permitted provided that the following conditions
    85  * are met:
    86  * 1. Redistributions of source code must retain the copyright
    87  *    notice, this list of conditions and the following disclaimer.
    88  * 2. Redistributions in binary form must reproduce the above copyright
    89  *    notice, this list of conditions and the following disclaimer in the
    90  *    documentation and/or other materials provided with the distribution.
    91  * 3. All advertising materials mentioning features or use of this software
    92  *    must display the following acknowledgement:
    93  *    "This product includes cryptographic software written by
    94  *     Eric Young (eay@cryptsoft.com)"
    95  *    The word 'cryptographic' can be left out if the rouines from the library
    96  *    being used are not cryptographic related :-).
    97  * 4. If you include any Windows specific code (or a derivative thereof) from 
    98  *    the apps directory (application code) you must include an acknowledgement:
    99  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
   100  * 
   101  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
   102  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   103  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   104  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   105  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   106  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   107  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   108  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   109  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   110  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   111  * SUCH DAMAGE.
   112  * 
   113  * The licence and distribution terms for any publically available version or
   114  * derivative of this code cannot be changed.  i.e. this code cannot simply be
   115  * copied and put under another distribution licence
   116  * [including the GNU Public Licence.]
   117  */
   118 /*
   119  © Portions copyright (c) 2010 Nokia Corporation.  All rights reserved.
   120  */
   121 
   122 #include <openssl/opensslconf.h>
   123 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
   124 /*
   125  * IMPLEMENTATION NOTES.
   126  *
   127  * As you might have noticed 32-bit hash algorithms:
   128  *
   129  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
   130  * - optimized versions implement two transform functions: one operating
   131  *   on [aligned] data in host byte order and one - on data in input
   132  *   stream byte order;
   133  * - share common byte-order neutral collector and padding function
   134  *   implementations, ../md32_common.h;
   135  *
   136  * Neither of the above applies to this SHA-512 implementations. Reasons
   137  * [in reverse order] are:
   138  *
   139  * - it's the only 64-bit hash algorithm for the moment of this writing,
   140  *   there is no need for common collector/padding implementation [yet];
   141  * - by supporting only one transform function [which operates on
   142  *   *aligned* data in input stream byte order, big-endian in this case]
   143  *   we minimize burden of maintenance in two ways: a) collector/padding
   144  *   function is simpler; b) only one transform function to stare at;
   145  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
   146  *   apply a number of optimizations to mitigate potential performance
   147  *   penalties caused by previous design decision;
   148  *
   149  * Caveat lector.
   150  *
   151  * Implementation relies on the fact that "long long" is 64-bit on
   152  * both 32- and 64-bit platforms. If some compiler vendor comes up
   153  * with 128-bit long long, adjustment to sha.h would be required.
   154  * As this implementation relies on 64-bit integer type, it's totally
   155  * inappropriate for platforms which don't support it, most notably
   156  * 16-bit platforms.
   157  *					<appro@fy.chalmers.se>
   158  */
   159 #include <stdlib.h>
   160 #include <string.h>
   161 
   162 #include <openssl/crypto.h>
   163 #include <openssl/sha.h>
   164 #include <openssl/opensslv.h>
   165 
   166 #include "cryptlib.h"
   167 
   168 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
   169 
   170 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
   171 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
   172 #endif
   173 
   174 EXPORT_C int SHA384_Init (SHA512_CTX *c)
   175 	{
   176 	c->h[0]=U64(0xcbbb9d5dc1059ed8);
   177 	c->h[1]=U64(0x629a292a367cd507);
   178 	c->h[2]=U64(0x9159015a3070dd17);
   179 	c->h[3]=U64(0x152fecd8f70e5939);
   180 	c->h[4]=U64(0x67332667ffc00b31);
   181 	c->h[5]=U64(0x8eb44a8768581511);
   182 	c->h[6]=U64(0xdb0c2e0d64f98fa7);
   183 	c->h[7]=U64(0x47b5481dbefa4fa4);
   184         c->Nl=0;        c->Nh=0;
   185         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
   186         return 1;
   187 	}
   188 
   189 EXPORT_C int SHA512_Init (SHA512_CTX *c)
   190 	{
   191 	c->h[0]=U64(0x6a09e667f3bcc908);
   192 	c->h[1]=U64(0xbb67ae8584caa73b);
   193 	c->h[2]=U64(0x3c6ef372fe94f82b);
   194 	c->h[3]=U64(0xa54ff53a5f1d36f1);
   195 	c->h[4]=U64(0x510e527fade682d1);
   196 	c->h[5]=U64(0x9b05688c2b3e6c1f);
   197 	c->h[6]=U64(0x1f83d9abfb41bd6b);
   198 	c->h[7]=U64(0x5be0cd19137e2179);
   199         c->Nl=0;        c->Nh=0;
   200         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
   201         return 1;
   202 	}
   203 
   204 #ifndef SHA512_ASM
   205 static
   206 #endif
   207 void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
   208 
   209 EXPORT_C int SHA512_Final (unsigned char *md, SHA512_CTX *c)
   210 	{
   211 	unsigned char *p=(unsigned char *)c->u.p;
   212 	size_t n=c->num;
   213 
   214 	p[n]=0x80;	/* There always is a room for one */
   215 	n++;
   216 	if (n > (sizeof(c->u)-16))
   217 		memset (p+n,0,sizeof(c->u)-n), n=0,
   218 		sha512_block (c,p,1);
   219 
   220 	memset (p+n,0,sizeof(c->u)-16-n);
   221 #ifdef	B_ENDIAN
   222 	c->u.d[SHA_LBLOCK-2] = c->Nh;
   223 	c->u.d[SHA_LBLOCK-1] = c->Nl;
   224 #else
   225 	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
   226 	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
   227 	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
   228 	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
   229 	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
   230 	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
   231 	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
   232 	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
   233 	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
   234 	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
   235 	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
   236 	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
   237 	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
   238 	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
   239 	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
   240 	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
   241 #endif
   242 
   243 	sha512_block (c,p,1);
   244 
   245 	if (md==0) return 0;
   246 
   247 	switch (c->md_len)
   248 		{
   249 		/* Let compiler decide if it's appropriate to unroll... */
   250 		case SHA384_DIGEST_LENGTH:
   251 			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
   252 				{
   253 				SHA_LONG64 t = c->h[n];
   254 
   255 				*(md++)	= (unsigned char)(t>>56);
   256 				*(md++)	= (unsigned char)(t>>48);
   257 				*(md++)	= (unsigned char)(t>>40);
   258 				*(md++)	= (unsigned char)(t>>32);
   259 				*(md++)	= (unsigned char)(t>>24);
   260 				*(md++)	= (unsigned char)(t>>16);
   261 				*(md++)	= (unsigned char)(t>>8);
   262 				*(md++)	= (unsigned char)(t);
   263 				}
   264 			break;
   265 		case SHA512_DIGEST_LENGTH:
   266 			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
   267 				{
   268 				SHA_LONG64 t = c->h[n];
   269 
   270 				*(md++)	= (unsigned char)(t>>56);
   271 				*(md++)	= (unsigned char)(t>>48);
   272 				*(md++)	= (unsigned char)(t>>40);
   273 				*(md++)	= (unsigned char)(t>>32);
   274 				*(md++)	= (unsigned char)(t>>24);
   275 				*(md++)	= (unsigned char)(t>>16);
   276 				*(md++)	= (unsigned char)(t>>8);
   277 				*(md++)	= (unsigned char)(t);
   278 				}
   279 			break;
   280 		/* ... as well as make sure md_len is not abused. */
   281 		default:	return 0;
   282 		}
   283 
   284 	return 1;
   285 	}
   286 
   287 EXPORT_C int SHA384_Final (unsigned char *md,SHA512_CTX *c)
   288 {   return SHA512_Final (md,c);   }
   289 
   290 EXPORT_C int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
   291 	{
   292 	SHA_LONG64	l;
   293 	unsigned char  *p=c->u.p;
   294 	const unsigned char *data=(const unsigned char *)_data;
   295 
   296 	if (len==0) return  1;
   297 
   298 	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
   299 	if (l < c->Nl)		c->Nh++;
   300 	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
   301 	c->Nl=l;
   302 
   303 	if (c->num != 0)
   304 		{
   305 		size_t n = sizeof(c->u) - c->num;
   306 
   307 		if (len < n)
   308 			{
   309 			memcpy (p+c->num,data,len), c->num += len;
   310 			return 1;
   311 			}
   312 		else	{
   313 			memcpy (p+c->num,data,n), c->num = 0;
   314 			len-=n, data+=n;
   315 			sha512_block (c,p,1);
   316 			}
   317 		}
   318 
   319 	if (len >= sizeof(c->u))
   320 		{
   321 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
   322 		if ((size_t)data%sizeof(c->u.d[0]) != 0)
   323 			while (len >= sizeof(c->u))
   324 				memcpy (p,data,sizeof(c->u)),
   325 				sha512_block (c,p,1),
   326 				len  -= sizeof(c->u),
   327 				data += sizeof(c->u);
   328 		else
   329 #endif
   330 			sha512_block (c,data,len/sizeof(c->u)),
   331 			data += len,
   332 			len  %= sizeof(c->u),
   333 			data -= len;
   334 		}
   335 
   336 	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
   337 
   338 	return 1;
   339 	}
   340 
   341 EXPORT_C int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
   342 {   return SHA512_Update (c,data,len);   }
   343 
   344 EXPORT_C void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
   345 {   sha512_block (c,data,1);  }
   346 
   347 EXPORT_C unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
   348 	{
   349 	SHA512_CTX c;
   350 	static unsigned char m[SHA384_DIGEST_LENGTH];
   351 
   352 	if (md == NULL) md=m;
   353 	SHA384_Init(&c);
   354 	SHA512_Update(&c,d,n);
   355 	SHA512_Final(md,&c);
   356 	OPENSSL_cleanse(&c,sizeof(c));
   357 	return(md);
   358 	}
   359 
   360 EXPORT_C unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
   361 	{
   362 	SHA512_CTX c;
   363 	static unsigned char m[SHA512_DIGEST_LENGTH];
   364 
   365 	if (md == NULL) md=m;
   366 	SHA512_Init(&c);
   367 	SHA512_Update(&c,d,n);
   368 	SHA512_Final(md,&c);
   369 	OPENSSL_cleanse(&c,sizeof(c));
   370 	return(md);
   371 	}
   372 
   373 #ifndef SHA512_ASM
   374 static const SHA_LONG64 K512[80] = {
   375         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
   376         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
   377         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
   378         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
   379         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
   380         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
   381         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
   382         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
   383         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
   384         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
   385         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
   386         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
   387         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
   388         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
   389         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
   390         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
   391         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
   392         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
   393         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
   394         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
   395         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
   396         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
   397         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
   398         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
   399         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
   400         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
   401         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
   402         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
   403         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
   404         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
   405         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
   406         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
   407         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
   408         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
   409         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
   410         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
   411         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
   412         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
   413         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
   414         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
   415 
   416 #ifndef PEDANTIC
   417 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
   418 #  if defined(__x86_64) || defined(__x86_64__)
   419 #   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
   420 				asm ("bswapq	%0"		\
   421 				: "=r"(ret)			\
   422 				: "0"(ret)); ret;		})
   423 #  endif
   424 # endif
   425 #endif
   426 
   427 #ifndef PULL64
   428 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
   429 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
   430 #endif
   431 
   432 #ifndef PEDANTIC
   433 # if defined(_MSC_VER)
   434 #  if defined(_WIN64)	/* applies to both IA-64 and AMD64 */
   435 #   define ROTR(a,n)	_rotr64((a),n)
   436 #  endif
   437 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
   438 #  if defined(__x86_64) || defined(__x86_64__)
   439 #   define ROTR(a,n)	({ unsigned long ret;		\
   440 				asm ("rorq %1,%0"	\
   441 				: "=r"(ret)		\
   442 				: "J"(n),"0"(a)		\
   443 				: "cc"); ret;		})
   444 #  elif defined(_ARCH_PPC) && defined(__64BIT__)
   445 #   define ROTR(a,n)	({ unsigned long ret;		\
   446 				asm ("rotrdi %0,%1,%2"	\
   447 				: "=r"(ret)		\
   448 				: "r"(a),"K"(n)); ret;	})
   449 #  endif
   450 # endif
   451 #endif
   452 
   453 #ifndef ROTR
   454 #define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
   455 #endif
   456 
   457 #define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
   458 #define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
   459 #define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
   460 #define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
   461 
   462 #define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
   463 #define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
   464 
   465 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
   466 #define	GO_FOR_SSE2(ctx,in,num)		do {		\
   467 	void	sha512_block_sse2(void *,const void *,size_t);	\
   468 	if (!(OPENSSL_ia32cap_P & (1<<26))) break;	\
   469 	sha512_block_sse2(ctx->h,in,num); return;	\
   470 					} while (0)
   471 #endif
   472 
   473 #ifdef OPENSSL_SMALL_FOOTPRINT
   474 
   475 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
   476 	{
   477 	const SHA_LONG64 *W=in;
   478 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
   479 	SHA_LONG64	X[16];
   480 	int i;
   481 
   482 #ifdef GO_FOR_SSE2
   483 	GO_FOR_SSE2(ctx,in,num);
   484 #endif
   485 
   486 			while (num--) {
   487 
   488 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
   489 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
   490 
   491 	for (i=0;i<16;i++)
   492 		{
   493 #ifdef B_ENDIAN
   494 		T1 = X[i] = W[i];
   495 #else
   496 		T1 = X[i] = PULL64(W[i]);
   497 #endif
   498 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
   499 		T2 = Sigma0(a) + Maj(a,b,c);
   500 		h = g;	g = f;	f = e;	e = d + T1;
   501 		d = c;	c = b;	b = a;	a = T1 + T2;
   502 		}
   503 
   504 	for (;i<80;i++)
   505 		{
   506 		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
   507 		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
   508 
   509 		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
   510 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
   511 		T2 = Sigma0(a) + Maj(a,b,c);
   512 		h = g;	g = f;	f = e;	e = d + T1;
   513 		d = c;	c = b;	b = a;	a = T1 + T2;
   514 		}
   515 
   516 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
   517 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
   518 
   519 			W+=SHA_LBLOCK;
   520 			}
   521 	}
   522 
   523 #else
   524 
   525 #define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
   526 	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
   527 	h = Sigma0(a) + Maj(a,b,c);			\
   528 	d += T1;	h += T1;		} while (0)
   529 
   530 #define	ROUND_16_80(i,a,b,c,d,e,f,g,h,X)	do {	\
   531 	s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);	\
   532 	s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);	\
   533 	T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];	\
   534 	ROUND_00_15(i,a,b,c,d,e,f,g,h);		} while (0)
   535 
   536 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
   537 	{
   538 	const SHA_LONG64 *W=in;
   539 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
   540 	SHA_LONG64	X[16];
   541 	int i;
   542 
   543 #ifdef GO_FOR_SSE2
   544 	GO_FOR_SSE2(ctx,in,num);
   545 #endif
   546 
   547 			while (num--) {
   548 
   549 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
   550 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
   551 
   552 #ifdef B_ENDIAN
   553 	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
   554 	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
   555 	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
   556 	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
   557 	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
   558 	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
   559 	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
   560 	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
   561 	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
   562 	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
   563 	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
   564 	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
   565 	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
   566 	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
   567 	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
   568 	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
   569 #else
   570 	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
   571 	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
   572 	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
   573 	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
   574 	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
   575 	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
   576 	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
   577 	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
   578 	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
   579 	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
   580 	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
   581 	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
   582 	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
   583 	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
   584 	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
   585 	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
   586 #endif
   587 
   588 	for (i=16;i<80;i+=8)
   589 		{
   590 		ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
   591 		ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
   592 		ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
   593 		ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
   594 		ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
   595 		ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
   596 		ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
   597 		ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
   598 		}
   599 
   600 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
   601 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
   602 
   603 			W+=SHA_LBLOCK;
   604 			}
   605 	}
   606 
   607 #endif
   608 
   609 #endif /* SHA512_ASM */
   610 
   611 #endif /* OPENSSL_NO_SHA512 */