os/ossrv/ssl/libcrypto/src/crypto/engine/eng_padlock.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/* 
sl@0
     2
 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
sl@0
     3
 * Written by Michal Ludvig <michal@logix.cz>
sl@0
     4
 *            http://www.logix.cz/michal
sl@0
     5
 *
sl@0
     6
 * Big thanks to Andy Polyakov for a help with optimization, 
sl@0
     7
 * assembler fixes, port to MS Windows and a lot of other 
sl@0
     8
 * valuable work on this engine!
sl@0
     9
 */
sl@0
    10
sl@0
    11
/* ====================================================================
sl@0
    12
 * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
sl@0
    13
 *
sl@0
    14
 * Redistribution and use in source and binary forms, with or without
sl@0
    15
 * modification, are permitted provided that the following conditions
sl@0
    16
 * are met:
sl@0
    17
 *
sl@0
    18
 * 1. Redistributions of source code must retain the above copyright
sl@0
    19
 *    notice, this list of conditions and the following disclaimer.
sl@0
    20
 *
sl@0
    21
 * 2. Redistributions in binary form must reproduce the above copyright
sl@0
    22
 *    notice, this list of conditions and the following disclaimer in
sl@0
    23
 *    the documentation and/or other materials provided with the
sl@0
    24
 *    distribution.
sl@0
    25
 *
sl@0
    26
 * 3. All advertising materials mentioning features or use of this
sl@0
    27
 *    software must display the following acknowledgment:
sl@0
    28
 *    "This product includes software developed by the OpenSSL Project
sl@0
    29
 *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
sl@0
    30
 *
sl@0
    31
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
sl@0
    32
 *    endorse or promote products derived from this software without
sl@0
    33
 *    prior written permission. For written permission, please contact
sl@0
    34
 *    licensing@OpenSSL.org.
sl@0
    35
 *
sl@0
    36
 * 5. Products derived from this software may not be called "OpenSSL"
sl@0
    37
 *    nor may "OpenSSL" appear in their names without prior written
sl@0
    38
 *    permission of the OpenSSL Project.
sl@0
    39
 *
sl@0
    40
 * 6. Redistributions of any form whatsoever must retain the following
sl@0
    41
 *    acknowledgment:
sl@0
    42
 *    "This product includes software developed by the OpenSSL Project
sl@0
    43
 *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
sl@0
    44
 *
sl@0
    45
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
sl@0
    46
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
sl@0
    47
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
sl@0
    48
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
sl@0
    49
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
sl@0
    50
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
sl@0
    51
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
sl@0
    52
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
sl@0
    53
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
sl@0
    54
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
sl@0
    55
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
sl@0
    56
 * OF THE POSSIBILITY OF SUCH DAMAGE.
sl@0
    57
 * ====================================================================
sl@0
    58
 *
sl@0
    59
 * This product includes cryptographic software written by Eric Young
sl@0
    60
 * (eay@cryptsoft.com).  This product includes software written by Tim
sl@0
    61
 * Hudson (tjh@cryptsoft.com).
sl@0
    62
 *
sl@0
    63
 */
sl@0
    64
sl@0
    65
sl@0
    66
#include <stdio.h>
sl@0
    67
#include <string.h>
sl@0
    68
sl@0
    69
#include <openssl/opensslconf.h>
sl@0
    70
#include <openssl/crypto.h>
sl@0
    71
#include <openssl/dso.h>
sl@0
    72
#include <openssl/engine.h>
sl@0
    73
#include <openssl/evp.h>
sl@0
    74
#ifndef OPENSSL_NO_AES
sl@0
    75
#include <openssl/aes.h>
sl@0
    76
#endif
sl@0
    77
#include <openssl/rand.h>
sl@0
    78
#include <openssl/err.h>
sl@0
    79
sl@0
    80
#ifndef OPENSSL_NO_HW
sl@0
    81
#ifndef OPENSSL_NO_HW_PADLOCK
sl@0
    82
sl@0
    83
/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
sl@0
    84
#if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
sl@0
    85
#  ifndef OPENSSL_NO_DYNAMIC_ENGINE
sl@0
    86
#    define DYNAMIC_ENGINE
sl@0
    87
#  endif
sl@0
    88
#elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
sl@0
    89
#  ifdef ENGINE_DYNAMIC_SUPPORT
sl@0
    90
#    define DYNAMIC_ENGINE
sl@0
    91
#  endif
sl@0
    92
#else
sl@0
    93
#  error "Only OpenSSL >= 0.9.7 is supported"
sl@0
    94
#endif
sl@0
    95
sl@0
    96
/* VIA PadLock AES is available *ONLY* on some x86 CPUs.
sl@0
    97
   Not only that it doesn't exist elsewhere, but it
sl@0
    98
   even can't be compiled on other platforms!
sl@0
    99
 
sl@0
   100
   In addition, because of the heavy use of inline assembler,
sl@0
   101
   compiler choice is limited to GCC and Microsoft C. */
sl@0
   102
#undef COMPILE_HW_PADLOCK
sl@0
   103
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
sl@0
   104
# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
sl@0
   105
     (defined(_MSC_VER) && defined(_M_IX86))
sl@0
   106
#  define COMPILE_HW_PADLOCK
sl@0
   107
static ENGINE *ENGINE_padlock (void);
sl@0
   108
# endif
sl@0
   109
#endif
sl@0
   110
sl@0
   111
EXPORT_C void ENGINE_load_padlock (void)
sl@0
   112
{
sl@0
   113
/* On non-x86 CPUs it just returns. */
sl@0
   114
#ifdef COMPILE_HW_PADLOCK
sl@0
   115
	ENGINE *toadd = ENGINE_padlock ();
sl@0
   116
	if (!toadd) return;
sl@0
   117
	ENGINE_add (toadd);
sl@0
   118
	ENGINE_free (toadd);
sl@0
   119
	ERR_clear_error ();
sl@0
   120
#endif
sl@0
   121
}
sl@0
   122
sl@0
   123
#ifdef COMPILE_HW_PADLOCK
sl@0
   124
/* We do these includes here to avoid header problems on platforms that
sl@0
   125
   do not have the VIA padlock anyway... */
sl@0
   126
#ifdef _MSC_VER
sl@0
   127
# include <malloc.h>
sl@0
   128
# define alloca _alloca
sl@0
   129
#else
sl@0
   130
# include <stdlib.h>
sl@0
   131
#endif
sl@0
   132
sl@0
   133
/* Function for ENGINE detection and control */
sl@0
   134
static int padlock_available(void);
sl@0
   135
static int padlock_init(ENGINE *e);
sl@0
   136
sl@0
   137
/* RNG Stuff */
sl@0
   138
static RAND_METHOD padlock_rand;
sl@0
   139
sl@0
   140
/* Cipher Stuff */
sl@0
   141
#ifndef OPENSSL_NO_AES
sl@0
   142
static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
sl@0
   143
#endif
sl@0
   144
sl@0
   145
/* Engine names */
sl@0
   146
static const char *padlock_id = "padlock";
sl@0
   147
static char padlock_name[100];
sl@0
   148
sl@0
   149
/* Available features */
sl@0
   150
static int padlock_use_ace = 0;	/* Advanced Cryptography Engine */
sl@0
   151
static int padlock_use_rng = 0;	/* Random Number Generator */
sl@0
   152
#ifndef OPENSSL_NO_AES
sl@0
   153
static int padlock_aes_align_required = 1;
sl@0
   154
#endif
sl@0
   155
sl@0
   156
/* ===== Engine "management" functions ===== */
sl@0
   157
sl@0
   158
/* Prepare the ENGINE structure for registration */
sl@0
   159
static int
sl@0
   160
padlock_bind_helper(ENGINE *e)
sl@0
   161
{
sl@0
   162
	/* Check available features */
sl@0
   163
	padlock_available();
sl@0
   164
sl@0
   165
#if 1	/* disable RNG for now, see commentary in vicinity of RNG code */
sl@0
   166
	padlock_use_rng=0;
sl@0
   167
#endif
sl@0
   168
sl@0
   169
	/* Generate a nice engine name with available features */
sl@0
   170
	BIO_snprintf(padlock_name, sizeof(padlock_name),
sl@0
   171
		"VIA PadLock (%s, %s)", 
sl@0
   172
		 padlock_use_rng ? "RNG" : "no-RNG",
sl@0
   173
		 padlock_use_ace ? "ACE" : "no-ACE");
sl@0
   174
sl@0
   175
	/* Register everything or return with an error */ 
sl@0
   176
	if (!ENGINE_set_id(e, padlock_id) ||
sl@0
   177
	    !ENGINE_set_name(e, padlock_name) ||
sl@0
   178
sl@0
   179
	    !ENGINE_set_init_function(e, padlock_init) ||
sl@0
   180
#ifndef OPENSSL_NO_AES
sl@0
   181
	    (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
sl@0
   182
#endif
sl@0
   183
	    (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
sl@0
   184
		return 0;
sl@0
   185
	}
sl@0
   186
sl@0
   187
	/* Everything looks good */
sl@0
   188
	return 1;
sl@0
   189
}
sl@0
   190
sl@0
   191
/* Constructor */
sl@0
   192
static ENGINE *
sl@0
   193
ENGINE_padlock(void)
sl@0
   194
{
sl@0
   195
	ENGINE *eng = ENGINE_new();
sl@0
   196
sl@0
   197
	if (!eng) {
sl@0
   198
		return NULL;
sl@0
   199
	}
sl@0
   200
sl@0
   201
	if (!padlock_bind_helper(eng)) {
sl@0
   202
		ENGINE_free(eng);
sl@0
   203
		return NULL;
sl@0
   204
	}
sl@0
   205
sl@0
   206
	return eng;
sl@0
   207
}
sl@0
   208
sl@0
   209
/* Check availability of the engine */
sl@0
   210
static int
sl@0
   211
padlock_init(ENGINE *e)
sl@0
   212
{
sl@0
   213
	return (padlock_use_rng || padlock_use_ace);
sl@0
   214
}
sl@0
   215
sl@0
   216
/* This stuff is needed if this ENGINE is being compiled into a self-contained
sl@0
   217
 * shared-library.
sl@0
   218
 */
sl@0
   219
#ifdef DYNAMIC_ENGINE
sl@0
   220
static int
sl@0
   221
padlock_bind_fn(ENGINE *e, const char *id)
sl@0
   222
{
sl@0
   223
	if (id && (strcmp(id, padlock_id) != 0)) {
sl@0
   224
		return 0;
sl@0
   225
	}
sl@0
   226
sl@0
   227
	if (!padlock_bind_helper(e))  {
sl@0
   228
		return 0;
sl@0
   229
	}
sl@0
   230
sl@0
   231
	return 1;
sl@0
   232
}
sl@0
   233
sl@0
   234
IMPLEMENT_DYNAMIC_CHECK_FN ();
sl@0
   235
IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn);
sl@0
   236
#endif /* DYNAMIC_ENGINE */
sl@0
   237
sl@0
   238
/* ===== Here comes the "real" engine ===== */
sl@0
   239
sl@0
   240
#ifndef OPENSSL_NO_AES
sl@0
   241
/* Some AES-related constants */
sl@0
   242
#define AES_BLOCK_SIZE		16
sl@0
   243
#define AES_KEY_SIZE_128	16
sl@0
   244
#define AES_KEY_SIZE_192	24
sl@0
   245
#define AES_KEY_SIZE_256	32
sl@0
   246
sl@0
   247
/* Here we store the status information relevant to the 
sl@0
   248
   current context. */
sl@0
   249
/* BIG FAT WARNING:
sl@0
   250
 * 	Inline assembler in PADLOCK_XCRYPT_ASM()
sl@0
   251
 * 	depends on the order of items in this structure.
sl@0
   252
 * 	Don't blindly modify, reorder, etc!
sl@0
   253
 */
sl@0
   254
struct padlock_cipher_data
sl@0
   255
{
sl@0
   256
	unsigned char iv[AES_BLOCK_SIZE];	/* Initialization vector */
sl@0
   257
	union {	unsigned int pad[4];
sl@0
   258
		struct {
sl@0
   259
			int rounds:4;
sl@0
   260
			int dgst:1;	/* n/a in C3 */
sl@0
   261
			int align:1;	/* n/a in C3 */
sl@0
   262
			int ciphr:1;	/* n/a in C3 */
sl@0
   263
			unsigned int keygen:1;
sl@0
   264
			int interm:1;
sl@0
   265
			unsigned int encdec:1;
sl@0
   266
			int ksize:2;
sl@0
   267
		} b;
sl@0
   268
	} cword;		/* Control word */
sl@0
   269
	AES_KEY ks;		/* Encryption key */
sl@0
   270
};
sl@0
   271
sl@0
   272
/*
sl@0
   273
 * Essentially this variable belongs in thread local storage.
sl@0
   274
 * Having this variable global on the other hand can only cause
sl@0
   275
 * few bogus key reloads [if any at all on single-CPU system],
sl@0
   276
 * so we accept the penatly...
sl@0
   277
 */
sl@0
   278
static volatile struct padlock_cipher_data *padlock_saved_context;
sl@0
   279
#endif
sl@0
   280
sl@0
   281
/*
sl@0
   282
 * =======================================================
sl@0
   283
 * Inline assembler section(s).
sl@0
   284
 * =======================================================
sl@0
   285
 * Order of arguments is chosen to facilitate Windows port
sl@0
   286
 * using __fastcall calling convention. If you wish to add
sl@0
   287
 * more routines, keep in mind that first __fastcall
sl@0
   288
 * argument is passed in %ecx and second - in %edx.
sl@0
   289
 * =======================================================
sl@0
   290
 */
sl@0
   291
#if defined(__GNUC__) && __GNUC__>=2
sl@0
   292
/*
sl@0
   293
 * As for excessive "push %ebx"/"pop %ebx" found all over.
sl@0
   294
 * When generating position-independent code GCC won't let
sl@0
   295
 * us use "b" in assembler templates nor even respect "ebx"
sl@0
   296
 * in "clobber description." Therefore the trouble...
sl@0
   297
 */
sl@0
   298
sl@0
   299
/* Helper function - check if a CPUID instruction
sl@0
   300
   is available on this CPU */
sl@0
   301
static int
sl@0
   302
padlock_insn_cpuid_available(void)
sl@0
   303
{
sl@0
   304
	int result = -1;
sl@0
   305
sl@0
   306
	/* We're checking if the bit #21 of EFLAGS 
sl@0
   307
	   can be toggled. If yes = CPUID is available. */
sl@0
   308
	asm volatile (
sl@0
   309
		"pushf\n"
sl@0
   310
		"popl %%eax\n"
sl@0
   311
		"xorl $0x200000, %%eax\n"
sl@0
   312
		"movl %%eax, %%ecx\n"
sl@0
   313
		"andl $0x200000, %%ecx\n"
sl@0
   314
		"pushl %%eax\n"
sl@0
   315
		"popf\n"
sl@0
   316
		"pushf\n"
sl@0
   317
		"popl %%eax\n"
sl@0
   318
		"andl $0x200000, %%eax\n"
sl@0
   319
		"xorl %%eax, %%ecx\n"
sl@0
   320
		"movl %%ecx, %0\n"
sl@0
   321
		: "=r" (result) : : "eax", "ecx");
sl@0
   322
	
sl@0
   323
	return (result == 0);
sl@0
   324
}
sl@0
   325
sl@0
   326
/* Load supported features of the CPU to see if
sl@0
   327
   the PadLock is available. */
sl@0
   328
static int
sl@0
   329
padlock_available(void)
sl@0
   330
{
sl@0
   331
	char vendor_string[16];
sl@0
   332
	unsigned int eax, edx;
sl@0
   333
sl@0
   334
	/* First check if the CPUID instruction is available at all... */
sl@0
   335
	if (! padlock_insn_cpuid_available())
sl@0
   336
		return 0;
sl@0
   337
sl@0
   338
	/* Are we running on the Centaur (VIA) CPU? */
sl@0
   339
	eax = 0x00000000;
sl@0
   340
	vendor_string[12] = 0;
sl@0
   341
	asm volatile (
sl@0
   342
		"pushl	%%ebx\n"
sl@0
   343
		"cpuid\n"
sl@0
   344
		"movl	%%ebx,(%%edi)\n"
sl@0
   345
		"movl	%%edx,4(%%edi)\n"
sl@0
   346
		"movl	%%ecx,8(%%edi)\n"
sl@0
   347
		"popl	%%ebx"
sl@0
   348
		: "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
sl@0
   349
	if (strcmp(vendor_string, "CentaurHauls") != 0)
sl@0
   350
		return 0;
sl@0
   351
sl@0
   352
	/* Check for Centaur Extended Feature Flags presence */
sl@0
   353
	eax = 0xC0000000;
sl@0
   354
	asm volatile ("pushl %%ebx; cpuid; popl	%%ebx"
sl@0
   355
		: "+a"(eax) : : "ecx", "edx");
sl@0
   356
	if (eax < 0xC0000001)
sl@0
   357
		return 0;
sl@0
   358
sl@0
   359
	/* Read the Centaur Extended Feature Flags */
sl@0
   360
	eax = 0xC0000001;
sl@0
   361
	asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
sl@0
   362
		: "+a"(eax), "=d"(edx) : : "ecx");
sl@0
   363
sl@0
   364
	/* Fill up some flags */
sl@0
   365
	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
sl@0
   366
	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
sl@0
   367
sl@0
   368
	return padlock_use_ace + padlock_use_rng;
sl@0
   369
}
sl@0
   370
sl@0
   371
#ifndef OPENSSL_NO_AES
sl@0
   372
/* Our own htonl()/ntohl() */
sl@0
   373
static inline void
sl@0
   374
padlock_bswapl(AES_KEY *ks)
sl@0
   375
{
sl@0
   376
	size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
sl@0
   377
	unsigned int *key = ks->rd_key;
sl@0
   378
sl@0
   379
	while (i--) {
sl@0
   380
		asm volatile ("bswapl %0" : "+r"(*key));
sl@0
   381
		key++;
sl@0
   382
	}
sl@0
   383
}
sl@0
   384
#endif
sl@0
   385
sl@0
   386
/* Force key reload from memory to the CPU microcode.
sl@0
   387
   Loading EFLAGS from the stack clears EFLAGS[30] 
sl@0
   388
   which does the trick. */
sl@0
   389
static inline void
sl@0
   390
padlock_reload_key(void)
sl@0
   391
{
sl@0
   392
	asm volatile ("pushfl; popfl");
sl@0
   393
}
sl@0
   394
sl@0
   395
#ifndef OPENSSL_NO_AES
sl@0
   396
/*
sl@0
   397
 * This is heuristic key context tracing. At first one
sl@0
   398
 * believes that one should use atomic swap instructions,
sl@0
   399
 * but it's not actually necessary. Point is that if
sl@0
   400
 * padlock_saved_context was changed by another thread
sl@0
   401
 * after we've read it and before we compare it with cdata,
sl@0
   402
 * our key *shall* be reloaded upon thread context switch
sl@0
   403
 * and we are therefore set in either case...
sl@0
   404
 */
sl@0
   405
static inline void
sl@0
   406
padlock_verify_context(struct padlock_cipher_data *cdata)
sl@0
   407
{
sl@0
   408
	asm volatile (
sl@0
   409
	"pushfl\n"
sl@0
   410
"	btl	$30,(%%esp)\n"
sl@0
   411
"	jnc	1f\n"
sl@0
   412
"	cmpl	%2,%1\n"
sl@0
   413
"	je	1f\n"
sl@0
   414
"	popfl\n"
sl@0
   415
"	subl	$4,%%esp\n"
sl@0
   416
"1:	addl	$4,%%esp\n"
sl@0
   417
"	movl	%2,%0"
sl@0
   418
	:"+m"(padlock_saved_context)
sl@0
   419
	: "r"(padlock_saved_context), "r"(cdata) : "cc");
sl@0
   420
}
sl@0
   421
sl@0
   422
/* Template for padlock_xcrypt_* modes */
sl@0
   423
/* BIG FAT WARNING: 
sl@0
   424
 * 	The offsets used with 'leal' instructions
sl@0
   425
 * 	describe items of the 'padlock_cipher_data'
sl@0
   426
 * 	structure.
sl@0
   427
 */
sl@0
   428
#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)	\
sl@0
   429
static inline void *name(size_t cnt,		\
sl@0
   430
	struct padlock_cipher_data *cdata,	\
sl@0
   431
	void *out, const void *inp) 		\
sl@0
   432
{	void *iv; 				\
sl@0
   433
	asm volatile ( "pushl	%%ebx\n"	\
sl@0
   434
		"	leal	16(%0),%%edx\n"	\
sl@0
   435
		"	leal	32(%0),%%ebx\n"	\
sl@0
   436
			rep_xcrypt "\n"		\
sl@0
   437
		"	popl	%%ebx"		\
sl@0
   438
		: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
sl@0
   439
		: "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
sl@0
   440
		: "edx", "cc", "memory");	\
sl@0
   441
	return iv;				\
sl@0
   442
}
sl@0
   443
sl@0
   444
/* Generate all functions with appropriate opcodes */
sl@0
   445
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")	/* rep xcryptecb */
sl@0
   446
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")	/* rep xcryptcbc */
sl@0
   447
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")	/* rep xcryptcfb */
sl@0
   448
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")	/* rep xcryptofb */
sl@0
   449
#endif
sl@0
   450
sl@0
   451
/* The RNG call itself */
sl@0
   452
static inline unsigned int
sl@0
   453
padlock_xstore(void *addr, unsigned int edx_in)
sl@0
   454
{
sl@0
   455
	unsigned int eax_out;
sl@0
   456
sl@0
   457
	asm volatile (".byte 0x0f,0xa7,0xc0"	/* xstore */
sl@0
   458
	    : "=a"(eax_out),"=m"(*(unsigned *)addr)
sl@0
   459
	    : "D"(addr), "d" (edx_in)
sl@0
   460
	    );
sl@0
   461
sl@0
   462
	return eax_out;
sl@0
   463
}
sl@0
   464
sl@0
   465
/* Why not inline 'rep movsd'? I failed to find information on what
sl@0
   466
 * value in Direction Flag one can expect and consequently have to
sl@0
   467
 * apply "better-safe-than-sorry" approach and assume "undefined."
sl@0
   468
 * I could explicitly clear it and restore the original value upon
sl@0
   469
 * return from padlock_aes_cipher, but it's presumably too much
sl@0
   470
 * trouble for too little gain...
sl@0
   471
 *
sl@0
   472
 * In case you wonder 'rep xcrypt*' instructions above are *not*
sl@0
   473
 * affected by the Direction Flag and pointers advance toward
sl@0
   474
 * larger addresses unconditionally.
sl@0
   475
 */ 
sl@0
   476
static inline unsigned char *
sl@0
   477
padlock_memcpy(void *dst,const void *src,size_t n)
sl@0
   478
{
sl@0
   479
	long       *d=dst;
sl@0
   480
	const long *s=src;
sl@0
   481
sl@0
   482
	n /= sizeof(*d);
sl@0
   483
	do { *d++ = *s++; } while (--n);
sl@0
   484
sl@0
   485
	return dst;
sl@0
   486
}
sl@0
   487
sl@0
   488
#elif defined(_MSC_VER)
sl@0
   489
/*
sl@0
   490
 * Unlike GCC these are real functions. In order to minimize impact
sl@0
   491
 * on performance we adhere to __fastcall calling convention in
sl@0
   492
 * order to get two first arguments passed through %ecx and %edx.
sl@0
   493
 * Which kind of suits very well, as instructions in question use
sl@0
   494
 * both %ecx and %edx as input:-)
sl@0
   495
 */
sl@0
   496
#define REP_XCRYPT(code)		\
sl@0
   497
	_asm _emit 0xf3			\
sl@0
   498
	_asm _emit 0x0f _asm _emit 0xa7	\
sl@0
   499
	_asm _emit code
sl@0
   500
sl@0
   501
/* BIG FAT WARNING: 
sl@0
   502
 * 	The offsets used with 'lea' instructions
sl@0
   503
 * 	describe items of the 'padlock_cipher_data'
sl@0
   504
 * 	structure.
sl@0
   505
 */
sl@0
   506
#define PADLOCK_XCRYPT_ASM(name,code)	\
sl@0
   507
static void * __fastcall 		\
sl@0
   508
	name (size_t cnt, void *cdata,	\
sl@0
   509
	void *outp, const void *inp)	\
sl@0
   510
{	_asm	mov	eax,edx		\
sl@0
   511
	_asm	lea	edx,[eax+16]	\
sl@0
   512
	_asm	lea	ebx,[eax+32]	\
sl@0
   513
	_asm	mov	edi,outp	\
sl@0
   514
	_asm	mov	esi,inp		\
sl@0
   515
	REP_XCRYPT(code)		\
sl@0
   516
}
sl@0
   517
sl@0
   518
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
sl@0
   519
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
sl@0
   520
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
sl@0
   521
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
sl@0
   522
sl@0
   523
static int __fastcall
sl@0
   524
padlock_xstore(void *outp,unsigned int code)
sl@0
   525
{	_asm	mov	edi,ecx
sl@0
   526
	_asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
sl@0
   527
}
sl@0
   528
sl@0
   529
static void __fastcall
sl@0
   530
padlock_reload_key(void)
sl@0
   531
{	_asm pushfd _asm popfd		}
sl@0
   532
sl@0
   533
static void __fastcall
sl@0
   534
padlock_verify_context(void *cdata)
sl@0
   535
{	_asm	{
sl@0
   536
		pushfd
sl@0
   537
		bt	DWORD PTR[esp],30
sl@0
   538
		jnc	skip
sl@0
   539
		cmp	ecx,padlock_saved_context
sl@0
   540
		je	skip
sl@0
   541
		popfd
sl@0
   542
		sub	esp,4
sl@0
   543
	skip:	add	esp,4
sl@0
   544
		mov	padlock_saved_context,ecx
sl@0
   545
		}
sl@0
   546
}
sl@0
   547
sl@0
   548
static int
sl@0
   549
padlock_available(void)
sl@0
   550
{	_asm	{
sl@0
   551
		pushfd
sl@0
   552
		pop	eax
sl@0
   553
		mov	ecx,eax
sl@0
   554
		xor	eax,1<<21
sl@0
   555
		push	eax
sl@0
   556
		popfd
sl@0
   557
		pushfd
sl@0
   558
		pop	eax
sl@0
   559
		xor	eax,ecx
sl@0
   560
		bt	eax,21
sl@0
   561
		jnc	noluck
sl@0
   562
		mov	eax,0
sl@0
   563
		cpuid
sl@0
   564
		xor	eax,eax
sl@0
   565
		cmp	ebx,'tneC'
sl@0
   566
		jne	noluck
sl@0
   567
		cmp	edx,'Hrua'
sl@0
   568
		jne	noluck
sl@0
   569
		cmp	ecx,'slua'
sl@0
   570
		jne	noluck
sl@0
   571
		mov	eax,0xC0000000
sl@0
   572
		cpuid
sl@0
   573
		mov	edx,eax
sl@0
   574
		xor	eax,eax
sl@0
   575
		cmp	edx,0xC0000001
sl@0
   576
		jb	noluck
sl@0
   577
		mov	eax,0xC0000001
sl@0
   578
		cpuid
sl@0
   579
		xor	eax,eax
sl@0
   580
		bt	edx,6
sl@0
   581
		jnc	skip_a
sl@0
   582
		bt	edx,7
sl@0
   583
		jnc	skip_a
sl@0
   584
		mov	padlock_use_ace,1
sl@0
   585
		inc	eax
sl@0
   586
	skip_a:	bt	edx,2
sl@0
   587
		jnc	skip_r
sl@0
   588
		bt	edx,3
sl@0
   589
		jnc	skip_r
sl@0
   590
		mov	padlock_use_rng,1
sl@0
   591
		inc	eax
sl@0
   592
	skip_r:
sl@0
   593
	noluck:
sl@0
   594
		}
sl@0
   595
}
sl@0
   596
sl@0
   597
static void __fastcall
sl@0
   598
padlock_bswapl(void *key)
sl@0
   599
{	_asm	{
sl@0
   600
		pushfd
sl@0
   601
		cld
sl@0
   602
		mov	esi,ecx
sl@0
   603
		mov	edi,ecx
sl@0
   604
		mov	ecx,60
sl@0
   605
	up:	lodsd
sl@0
   606
		bswap	eax
sl@0
   607
		stosd
sl@0
   608
		loop	up
sl@0
   609
		popfd
sl@0
   610
		}
sl@0
   611
}
sl@0
   612
sl@0
   613
/* MS actually specifies status of Direction Flag and compiler even
sl@0
   614
 * manages to compile following as 'rep movsd' all by itself...
sl@0
   615
 */
sl@0
   616
#define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
sl@0
   617
#endif
sl@0
   618
sl@0
   619
/* ===== AES encryption/decryption ===== */
sl@0
   620
#ifndef OPENSSL_NO_AES
sl@0
   621
sl@0
   622
#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
sl@0
   623
#define NID_aes_128_cfb	NID_aes_128_cfb128
sl@0
   624
#endif
sl@0
   625
sl@0
   626
#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
sl@0
   627
#define NID_aes_128_ofb	NID_aes_128_ofb128
sl@0
   628
#endif
sl@0
   629
sl@0
   630
#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
sl@0
   631
#define NID_aes_192_cfb	NID_aes_192_cfb128
sl@0
   632
#endif
sl@0
   633
sl@0
   634
#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
sl@0
   635
#define NID_aes_192_ofb	NID_aes_192_ofb128
sl@0
   636
#endif
sl@0
   637
sl@0
   638
#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
sl@0
   639
#define NID_aes_256_cfb	NID_aes_256_cfb128
sl@0
   640
#endif
sl@0
   641
sl@0
   642
#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
sl@0
   643
#define NID_aes_256_ofb	NID_aes_256_ofb128
sl@0
   644
#endif
sl@0
   645
sl@0
   646
/* List of supported ciphers. */
sl@0
   647
static int padlock_cipher_nids[] = {
sl@0
   648
	NID_aes_128_ecb,
sl@0
   649
	NID_aes_128_cbc,
sl@0
   650
	NID_aes_128_cfb,
sl@0
   651
	NID_aes_128_ofb,
sl@0
   652
sl@0
   653
	NID_aes_192_ecb,
sl@0
   654
	NID_aes_192_cbc,
sl@0
   655
#if 0
sl@0
   656
	NID_aes_192_cfb,	/* FIXME: AES192/256 CFB/OFB don't work. */
sl@0
   657
	NID_aes_192_ofb,
sl@0
   658
#endif
sl@0
   659
sl@0
   660
	NID_aes_256_ecb,
sl@0
   661
	NID_aes_256_cbc,
sl@0
   662
#if 0
sl@0
   663
	NID_aes_256_cfb,
sl@0
   664
	NID_aes_256_ofb,
sl@0
   665
#endif
sl@0
   666
};
sl@0
   667
static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
sl@0
   668
				      sizeof(padlock_cipher_nids[0]));
sl@0
   669
sl@0
   670
/* Function prototypes ... */
sl@0
   671
static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
sl@0
   672
				const unsigned char *iv, int enc);
sl@0
   673
static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
sl@0
   674
			      const unsigned char *in, size_t nbytes);
sl@0
   675
sl@0
   676
#define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +		\
sl@0
   677
	( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )	)
sl@0
   678
#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
sl@0
   679
	NEAREST_ALIGNED(ctx->cipher_data))
sl@0
   680
sl@0
   681
#define EVP_CIPHER_block_size_ECB	AES_BLOCK_SIZE
sl@0
   682
#define EVP_CIPHER_block_size_CBC	AES_BLOCK_SIZE
sl@0
   683
#define EVP_CIPHER_block_size_OFB	1
sl@0
   684
#define EVP_CIPHER_block_size_CFB	1
sl@0
   685
/* Declaring so many ciphers by hand would be a pain.
sl@0
   686
   Instead introduce a bit of preprocessor magic :-) */
sl@0
   687
#define	DECLARE_AES_EVP(ksize,lmode,umode)	\
sl@0
   688
static const EVP_CIPHER padlock_aes_##ksize##_##lmode = {	\
sl@0
   689
	NID_aes_##ksize##_##lmode,		\
sl@0
   690
	EVP_CIPHER_block_size_##umode,	\
sl@0
   691
	AES_KEY_SIZE_##ksize,		\
sl@0
   692
	AES_BLOCK_SIZE,			\
sl@0
   693
	0 | EVP_CIPH_##umode##_MODE,	\
sl@0
   694
	padlock_aes_init_key,		\
sl@0
   695
	padlock_aes_cipher,		\
sl@0
   696
	NULL,				\
sl@0
   697
	sizeof(struct padlock_cipher_data) + 16,	\
sl@0
   698
	EVP_CIPHER_set_asn1_iv,		\
sl@0
   699
	EVP_CIPHER_get_asn1_iv,		\
sl@0
   700
	NULL,				\
sl@0
   701
	NULL				\
sl@0
   702
}
sl@0
   703
sl@0
   704
DECLARE_AES_EVP(128,ecb,ECB);
sl@0
   705
DECLARE_AES_EVP(128,cbc,CBC);
sl@0
   706
DECLARE_AES_EVP(128,cfb,CFB);
sl@0
   707
DECLARE_AES_EVP(128,ofb,OFB);
sl@0
   708
sl@0
   709
DECLARE_AES_EVP(192,ecb,ECB);
sl@0
   710
DECLARE_AES_EVP(192,cbc,CBC);
sl@0
   711
DECLARE_AES_EVP(192,cfb,CFB);
sl@0
   712
DECLARE_AES_EVP(192,ofb,OFB);
sl@0
   713
sl@0
   714
DECLARE_AES_EVP(256,ecb,ECB);
sl@0
   715
DECLARE_AES_EVP(256,cbc,CBC);
sl@0
   716
DECLARE_AES_EVP(256,cfb,CFB);
sl@0
   717
DECLARE_AES_EVP(256,ofb,OFB);
sl@0
   718
sl@0
   719
static int
sl@0
   720
padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
sl@0
   721
{
sl@0
   722
	/* No specific cipher => return a list of supported nids ... */
sl@0
   723
	if (!cipher) {
sl@0
   724
		*nids = padlock_cipher_nids;
sl@0
   725
		return padlock_cipher_nids_num;
sl@0
   726
	}
sl@0
   727
sl@0
   728
	/* ... or the requested "cipher" otherwise */
sl@0
   729
	switch (nid) {
sl@0
   730
	  case NID_aes_128_ecb:
sl@0
   731
	    *cipher = &padlock_aes_128_ecb;
sl@0
   732
	    break;
sl@0
   733
	  case NID_aes_128_cbc:
sl@0
   734
	    *cipher = &padlock_aes_128_cbc;
sl@0
   735
	    break;
sl@0
   736
	  case NID_aes_128_cfb:
sl@0
   737
	    *cipher = &padlock_aes_128_cfb;
sl@0
   738
	    break;
sl@0
   739
	  case NID_aes_128_ofb:
sl@0
   740
	    *cipher = &padlock_aes_128_ofb;
sl@0
   741
	    break;
sl@0
   742
sl@0
   743
	  case NID_aes_192_ecb:
sl@0
   744
	    *cipher = &padlock_aes_192_ecb;
sl@0
   745
	    break;
sl@0
   746
	  case NID_aes_192_cbc:
sl@0
   747
	    *cipher = &padlock_aes_192_cbc;
sl@0
   748
	    break;
sl@0
   749
	  case NID_aes_192_cfb:
sl@0
   750
	    *cipher = &padlock_aes_192_cfb;
sl@0
   751
	    break;
sl@0
   752
	  case NID_aes_192_ofb:
sl@0
   753
	    *cipher = &padlock_aes_192_ofb;
sl@0
   754
	    break;
sl@0
   755
sl@0
   756
	  case NID_aes_256_ecb:
sl@0
   757
	    *cipher = &padlock_aes_256_ecb;
sl@0
   758
	    break;
sl@0
   759
	  case NID_aes_256_cbc:
sl@0
   760
	    *cipher = &padlock_aes_256_cbc;
sl@0
   761
	    break;
sl@0
   762
	  case NID_aes_256_cfb:
sl@0
   763
	    *cipher = &padlock_aes_256_cfb;
sl@0
   764
	    break;
sl@0
   765
	  case NID_aes_256_ofb:
sl@0
   766
	    *cipher = &padlock_aes_256_ofb;
sl@0
   767
	    break;
sl@0
   768
sl@0
   769
	  default:
sl@0
   770
	    /* Sorry, we don't support this NID */
sl@0
   771
	    *cipher = NULL;
sl@0
   772
	    return 0;
sl@0
   773
	}
sl@0
   774
sl@0
   775
	return 1;
sl@0
   776
}
sl@0
   777
sl@0
   778
/* Prepare the encryption key for PadLock usage */
sl@0
   779
static int
sl@0
   780
padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
sl@0
   781
		      const unsigned char *iv, int enc)
sl@0
   782
{
sl@0
   783
	struct padlock_cipher_data *cdata;
sl@0
   784
	int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
sl@0
   785
sl@0
   786
	if (key==NULL) return 0;	/* ERROR */
sl@0
   787
sl@0
   788
	cdata = ALIGNED_CIPHER_DATA(ctx);
sl@0
   789
	memset(cdata, 0, sizeof(struct padlock_cipher_data));
sl@0
   790
sl@0
   791
	/* Prepare Control word. */
sl@0
   792
	if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
sl@0
   793
		cdata->cword.b.encdec = 0;
sl@0
   794
	else
sl@0
   795
	cdata->cword.b.encdec = (ctx->encrypt == 0);
sl@0
   796
	cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
sl@0
   797
	cdata->cword.b.ksize = (key_len - 128) / 64;
sl@0
   798
sl@0
   799
	switch(key_len) {
sl@0
   800
		case 128:
sl@0
   801
			/* PadLock can generate an extended key for
sl@0
   802
			   AES128 in hardware */
sl@0
   803
			memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
sl@0
   804
			cdata->cword.b.keygen = 0;
sl@0
   805
			break;
sl@0
   806
sl@0
   807
		case 192:
sl@0
   808
		case 256:
sl@0
   809
			/* Generate an extended AES key in software.
sl@0
   810
			   Needed for AES192/AES256 */
sl@0
   811
			/* Well, the above applies to Stepping 8 CPUs
sl@0
   812
			   and is listed as hardware errata. They most
sl@0
   813
			   likely will fix it at some point and then
sl@0
   814
			   a check for stepping would be due here. */
sl@0
   815
			if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
sl@0
   816
			    EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
sl@0
   817
			    enc)
sl@0
   818
				AES_set_encrypt_key(key, key_len, &cdata->ks);
sl@0
   819
			else
sl@0
   820
				AES_set_decrypt_key(key, key_len, &cdata->ks);
sl@0
   821
#ifndef AES_ASM
sl@0
   822
			/* OpenSSL C functions use byte-swapped extended key. */
sl@0
   823
			padlock_bswapl(&cdata->ks);
sl@0
   824
#endif
sl@0
   825
			cdata->cword.b.keygen = 1;
sl@0
   826
			break;
sl@0
   827
sl@0
   828
		default:
sl@0
   829
			/* ERROR */
sl@0
   830
			return 0;
sl@0
   831
	}
sl@0
   832
sl@0
   833
	/*
sl@0
   834
	 * This is done to cover for cases when user reuses the
sl@0
   835
	 * context for new key. The catch is that if we don't do
sl@0
   836
	 * this, padlock_eas_cipher might proceed with old key...
sl@0
   837
	 */
sl@0
   838
	padlock_reload_key ();
sl@0
   839
sl@0
   840
	return 1;
sl@0
   841
}
sl@0
   842
sl@0
   843
/* 
sl@0
   844
 * Simplified version of padlock_aes_cipher() used when
sl@0
   845
 * 1) both input and output buffers are at aligned addresses.
sl@0
   846
 * or when
sl@0
   847
 * 2) running on a newer CPU that doesn't require aligned buffers.
sl@0
   848
 */
sl@0
   849
static int
sl@0
   850
padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
sl@0
   851
		const unsigned char *in_arg, size_t nbytes)
sl@0
   852
{
sl@0
   853
	struct padlock_cipher_data *cdata;
sl@0
   854
	void  *iv;
sl@0
   855
sl@0
   856
	cdata = ALIGNED_CIPHER_DATA(ctx);
sl@0
   857
	padlock_verify_context(cdata);
sl@0
   858
sl@0
   859
	switch (EVP_CIPHER_CTX_mode(ctx)) {
sl@0
   860
	case EVP_CIPH_ECB_MODE:
sl@0
   861
		padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
sl@0
   862
		break;
sl@0
   863
sl@0
   864
	case EVP_CIPH_CBC_MODE:
sl@0
   865
		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
sl@0
   866
		iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
sl@0
   867
		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
sl@0
   868
		break;
sl@0
   869
sl@0
   870
	case EVP_CIPH_CFB_MODE:
sl@0
   871
		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
sl@0
   872
		iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
sl@0
   873
		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
sl@0
   874
		break;
sl@0
   875
sl@0
   876
	case EVP_CIPH_OFB_MODE:
sl@0
   877
		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
sl@0
   878
		padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
sl@0
   879
		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
sl@0
   880
		break;
sl@0
   881
sl@0
   882
	default:
sl@0
   883
		return 0;
sl@0
   884
	}
sl@0
   885
sl@0
   886
	memset(cdata->iv, 0, AES_BLOCK_SIZE);
sl@0
   887
sl@0
   888
	return 1;
sl@0
   889
}
sl@0
   890
sl@0
   891
#ifndef  PADLOCK_CHUNK
sl@0
   892
# define PADLOCK_CHUNK	512	/* Must be a power of 2 larger than 16 */
sl@0
   893
#endif
sl@0
   894
#if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
sl@0
   895
# error "insane PADLOCK_CHUNK..."
sl@0
   896
#endif
sl@0
   897
sl@0
   898
/* Re-align the arguments to 16-Bytes boundaries and run the 
sl@0
   899
   encryption function itself. This function is not AES-specific. */
sl@0
   900
static int
sl@0
   901
padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
sl@0
   902
		   const unsigned char *in_arg, size_t nbytes)
sl@0
   903
{
sl@0
   904
	struct padlock_cipher_data *cdata;
sl@0
   905
	const  void *inp;
sl@0
   906
	unsigned char  *out;
sl@0
   907
	void  *iv;
sl@0
   908
	int    inp_misaligned, out_misaligned, realign_in_loop;
sl@0
   909
	size_t chunk, allocated=0;
sl@0
   910
sl@0
   911
	/* ctx->num is maintained in byte-oriented modes,
sl@0
   912
	   such as CFB and OFB... */
sl@0
   913
	if ((chunk = ctx->num)) { /* borrow chunk variable */
sl@0
   914
		unsigned char *ivp=ctx->iv;
sl@0
   915
sl@0
   916
		switch (EVP_CIPHER_CTX_mode(ctx)) {
sl@0
   917
		case EVP_CIPH_CFB_MODE:
sl@0
   918
			if (chunk >= AES_BLOCK_SIZE)
sl@0
   919
				return 0; /* bogus value */
sl@0
   920
sl@0
   921
			if (ctx->encrypt)
sl@0
   922
				while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
sl@0
   923
					ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
sl@0
   924
					chunk++, nbytes--;
sl@0
   925
				}
sl@0
   926
			else	while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
sl@0
   927
					unsigned char c = *(in_arg++);
sl@0
   928
					*(out_arg++) = c ^ ivp[chunk];
sl@0
   929
					ivp[chunk++] = c, nbytes--;
sl@0
   930
				}
sl@0
   931
sl@0
   932
			ctx->num = chunk%AES_BLOCK_SIZE;
sl@0
   933
			break;
sl@0
   934
		case EVP_CIPH_OFB_MODE:
sl@0
   935
			if (chunk >= AES_BLOCK_SIZE)
sl@0
   936
				return 0; /* bogus value */
sl@0
   937
sl@0
   938
			while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
sl@0
   939
				*(out_arg++) = *(in_arg++) ^ ivp[chunk];
sl@0
   940
				chunk++, nbytes--;
sl@0
   941
			}
sl@0
   942
sl@0
   943
			ctx->num = chunk%AES_BLOCK_SIZE;
sl@0
   944
			break;
sl@0
   945
		}
sl@0
   946
	}
sl@0
   947
sl@0
   948
	if (nbytes == 0)
sl@0
   949
		return 1;
sl@0
   950
#if 0
sl@0
   951
	if (nbytes % AES_BLOCK_SIZE)
sl@0
   952
		return 0; /* are we expected to do tail processing? */
sl@0
   953
#else
sl@0
   954
	/* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
sl@0
   955
	   modes and arbitrary value in byte-oriented modes, such as
sl@0
   956
	   CFB and OFB... */
sl@0
   957
#endif
sl@0
   958
sl@0
   959
	/* VIA promises CPUs that won't require alignment in the future.
sl@0
   960
	   For now padlock_aes_align_required is initialized to 1 and
sl@0
   961
	   the condition is never met... */
sl@0
   962
	/* C7 core is capable to manage unaligned input in non-ECB[!]
sl@0
   963
	   mode, but performance penalties appear to be approximately
sl@0
   964
	   same as for software alignment below or ~3x. They promise to
sl@0
   965
	   improve it in the future, but for now we can just as well
sl@0
   966
	   pretend that it can only handle aligned input... */
sl@0
   967
	if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
sl@0
   968
		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
sl@0
   969
sl@0
   970
	inp_misaligned = (((size_t)in_arg) & 0x0F);
sl@0
   971
	out_misaligned = (((size_t)out_arg) & 0x0F);
sl@0
   972
sl@0
   973
	/* Note that even if output is aligned and input not,
sl@0
   974
	 * I still prefer to loop instead of copy the whole
sl@0
   975
	 * input and then encrypt in one stroke. This is done
sl@0
   976
	 * in order to improve L1 cache utilization... */
sl@0
   977
	realign_in_loop = out_misaligned|inp_misaligned;
sl@0
   978
sl@0
   979
	if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
sl@0
   980
		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
sl@0
   981
sl@0
   982
	/* this takes one "if" out of the loops */
sl@0
   983
	chunk  = nbytes;
sl@0
   984
	chunk %= PADLOCK_CHUNK;
sl@0
   985
	if (chunk==0) chunk = PADLOCK_CHUNK;
sl@0
   986
sl@0
   987
	if (out_misaligned) {
sl@0
   988
		/* optmize for small input */
sl@0
   989
		allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
sl@0
   990
		out = alloca(0x10 + allocated);
sl@0
   991
		out = NEAREST_ALIGNED(out);
sl@0
   992
	}
sl@0
   993
	else
sl@0
   994
		out = out_arg;
sl@0
   995
sl@0
   996
	cdata = ALIGNED_CIPHER_DATA(ctx);
sl@0
   997
	padlock_verify_context(cdata);
sl@0
   998
sl@0
   999
	switch (EVP_CIPHER_CTX_mode(ctx)) {
sl@0
  1000
	case EVP_CIPH_ECB_MODE:
sl@0
  1001
		do	{
sl@0
  1002
			if (inp_misaligned)
sl@0
  1003
				inp = padlock_memcpy(out, in_arg, chunk);
sl@0
  1004
			else
sl@0
  1005
				inp = in_arg;
sl@0
  1006
			in_arg += chunk;
sl@0
  1007
sl@0
  1008
			padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
sl@0
  1009
sl@0
  1010
			if (out_misaligned)
sl@0
  1011
				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
sl@0
  1012
			else
sl@0
  1013
				out     = out_arg+=chunk;
sl@0
  1014
sl@0
  1015
			nbytes -= chunk;
sl@0
  1016
			chunk   = PADLOCK_CHUNK;
sl@0
  1017
		} while (nbytes);
sl@0
  1018
		break;
sl@0
  1019
sl@0
  1020
	case EVP_CIPH_CBC_MODE:
sl@0
  1021
		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
sl@0
  1022
		goto cbc_shortcut;
sl@0
  1023
		do	{
sl@0
  1024
			if (iv != cdata->iv)
sl@0
  1025
				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
sl@0
  1026
			chunk = PADLOCK_CHUNK;
sl@0
  1027
		cbc_shortcut: /* optimize for small input */
sl@0
  1028
			if (inp_misaligned)
sl@0
  1029
				inp = padlock_memcpy(out, in_arg, chunk);
sl@0
  1030
			else
sl@0
  1031
				inp = in_arg;
sl@0
  1032
			in_arg += chunk;
sl@0
  1033
sl@0
  1034
			iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
sl@0
  1035
sl@0
  1036
			if (out_misaligned)
sl@0
  1037
				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
sl@0
  1038
			else
sl@0
  1039
				out     = out_arg+=chunk;
sl@0
  1040
sl@0
  1041
		} while (nbytes -= chunk);
sl@0
  1042
		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
sl@0
  1043
		break;
sl@0
  1044
sl@0
  1045
	case EVP_CIPH_CFB_MODE:
sl@0
  1046
		memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
sl@0
  1047
		chunk &= ~(AES_BLOCK_SIZE-1);
sl@0
  1048
		if (chunk)	goto cfb_shortcut;
sl@0
  1049
		else		goto cfb_skiploop;
sl@0
  1050
		do	{
sl@0
  1051
			if (iv != cdata->iv)
sl@0
  1052
				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
sl@0
  1053
			chunk = PADLOCK_CHUNK;
sl@0
  1054
		cfb_shortcut: /* optimize for small input */
sl@0
  1055
			if (inp_misaligned)
sl@0
  1056
				inp = padlock_memcpy(out, in_arg, chunk);
sl@0
  1057
			else
sl@0
  1058
				inp = in_arg;
sl@0
  1059
			in_arg += chunk;
sl@0
  1060
sl@0
  1061
			iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
sl@0
  1062
sl@0
  1063
			if (out_misaligned)
sl@0
  1064
				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
sl@0
  1065
			else
sl@0
  1066
				out     = out_arg+=chunk;
sl@0
  1067
sl@0
  1068
			nbytes -= chunk;
sl@0
  1069
		} while (nbytes >= AES_BLOCK_SIZE);
sl@0
  1070
sl@0
  1071
		cfb_skiploop:
sl@0
  1072
		if (nbytes) {
sl@0
  1073
			unsigned char *ivp = cdata->iv;
sl@0
  1074
sl@0
  1075
			if (iv != ivp) {
sl@0
  1076
				memcpy(ivp, iv, AES_BLOCK_SIZE);
sl@0
  1077
				iv = ivp;
sl@0
  1078
			}
sl@0
  1079
			ctx->num = nbytes;
sl@0
  1080
			if (cdata->cword.b.encdec) {
sl@0
  1081
				cdata->cword.b.encdec=0;
sl@0
  1082
				padlock_reload_key();
sl@0
  1083
				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
sl@0
  1084
				cdata->cword.b.encdec=1;
sl@0
  1085
				padlock_reload_key();
sl@0
  1086
				while(nbytes) {
sl@0
  1087
					unsigned char c = *(in_arg++);
sl@0
  1088
					*(out_arg++) = c ^ *ivp;
sl@0
  1089
					*(ivp++) = c, nbytes--;
sl@0
  1090
				}
sl@0
  1091
			}
sl@0
  1092
			else {	padlock_reload_key();
sl@0
  1093
				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
sl@0
  1094
				padlock_reload_key();
sl@0
  1095
				while (nbytes) {
sl@0
  1096
					*ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
sl@0
  1097
					ivp++, nbytes--;
sl@0
  1098
				}
sl@0
  1099
			}
sl@0
  1100
		}
sl@0
  1101
		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
sl@0
  1102
		break;
sl@0
  1103
sl@0
  1104
	case EVP_CIPH_OFB_MODE:
sl@0
  1105
		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
sl@0
  1106
		chunk &= ~(AES_BLOCK_SIZE-1);
sl@0
  1107
		if (chunk) do	{
sl@0
  1108
			if (inp_misaligned)
sl@0
  1109
				inp = padlock_memcpy(out, in_arg, chunk);
sl@0
  1110
			else
sl@0
  1111
				inp = in_arg;
sl@0
  1112
			in_arg += chunk;
sl@0
  1113
sl@0
  1114
			padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
sl@0
  1115
sl@0
  1116
			if (out_misaligned)
sl@0
  1117
				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
sl@0
  1118
			else
sl@0
  1119
				out     = out_arg+=chunk;
sl@0
  1120
sl@0
  1121
			nbytes -= chunk;
sl@0
  1122
			chunk   = PADLOCK_CHUNK;
sl@0
  1123
		} while (nbytes >= AES_BLOCK_SIZE);
sl@0
  1124
sl@0
  1125
		if (nbytes) {
sl@0
  1126
			unsigned char *ivp = cdata->iv;
sl@0
  1127
sl@0
  1128
			ctx->num = nbytes;
sl@0
  1129
			padlock_reload_key();	/* empirically found */
sl@0
  1130
			padlock_xcrypt_ecb(1,cdata,ivp,ivp);
sl@0
  1131
			padlock_reload_key();	/* empirically found */
sl@0
  1132
			while (nbytes) {
sl@0
  1133
				*(out_arg++) = *(in_arg++) ^ *ivp;
sl@0
  1134
				ivp++, nbytes--;
sl@0
  1135
			}
sl@0
  1136
		}
sl@0
  1137
		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
sl@0
  1138
		break;
sl@0
  1139
sl@0
  1140
	default:
sl@0
  1141
		return 0;
sl@0
  1142
	}
sl@0
  1143
sl@0
  1144
	/* Clean the realign buffer if it was used */
sl@0
  1145
	if (out_misaligned) {
sl@0
  1146
		volatile unsigned long *p=(void *)out;
sl@0
  1147
		size_t   n = allocated/sizeof(*p);
sl@0
  1148
		while (n--) *p++=0;
sl@0
  1149
	}
sl@0
  1150
sl@0
  1151
	memset(cdata->iv, 0, AES_BLOCK_SIZE);
sl@0
  1152
sl@0
  1153
	return 1;
sl@0
  1154
}
sl@0
  1155
sl@0
  1156
#endif /* OPENSSL_NO_AES */
sl@0
  1157
sl@0
  1158
/* ===== Random Number Generator ===== */
sl@0
  1159
/*
sl@0
  1160
 * This code is not engaged. The reason is that it does not comply
sl@0
  1161
 * with recommendations for VIA RNG usage for secure applications
sl@0
  1162
 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
sl@0
  1163
 * provide meaningful error control...
sl@0
  1164
 */
sl@0
  1165
/* Wrapper that provides an interface between the API and 
sl@0
  1166
   the raw PadLock RNG */
sl@0
  1167
static int
sl@0
  1168
padlock_rand_bytes(unsigned char *output, int count)
sl@0
  1169
{
sl@0
  1170
	unsigned int eax, buf;
sl@0
  1171
sl@0
  1172
	while (count >= 8) {
sl@0
  1173
		eax = padlock_xstore(output, 0);
sl@0
  1174
		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
sl@0
  1175
		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
sl@0
  1176
		if (eax&(0x1F<<10))	return 0;
sl@0
  1177
		if ((eax&0x1F)==0)	continue; /* no data, retry... */
sl@0
  1178
		if ((eax&0x1F)!=8)	return 0; /* fatal failure...  */
sl@0
  1179
		output += 8;
sl@0
  1180
		count  -= 8;
sl@0
  1181
	}
sl@0
  1182
	while (count > 0) {
sl@0
  1183
		eax = padlock_xstore(&buf, 3);
sl@0
  1184
		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
sl@0
  1185
		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
sl@0
  1186
		if (eax&(0x1F<<10))	return 0;
sl@0
  1187
		if ((eax&0x1F)==0)	continue; /* no data, retry... */
sl@0
  1188
		if ((eax&0x1F)!=1)	return 0; /* fatal failure...  */
sl@0
  1189
		*output++ = (unsigned char)buf;
sl@0
  1190
		count--;
sl@0
  1191
	}
sl@0
  1192
	*(volatile unsigned int *)&buf=0;
sl@0
  1193
sl@0
  1194
	return 1;
sl@0
  1195
}
sl@0
  1196
sl@0
  1197
/* Dummy but necessary function */
sl@0
  1198
static int
sl@0
  1199
padlock_rand_status(void)
sl@0
  1200
{
sl@0
  1201
	return 1;
sl@0
  1202
}
sl@0
  1203
sl@0
  1204
/* Prepare structure for registration */
sl@0
  1205
static RAND_METHOD padlock_rand = {
sl@0
  1206
	NULL,			/* seed */
sl@0
  1207
	padlock_rand_bytes,	/* bytes */
sl@0
  1208
	NULL,			/* cleanup */
sl@0
  1209
	NULL,			/* add */
sl@0
  1210
	padlock_rand_bytes,	/* pseudorand */
sl@0
  1211
	padlock_rand_status,	/* rand status */
sl@0
  1212
};
sl@0
  1213
sl@0
  1214
#endif /* COMPILE_HW_PADLOCK */
sl@0
  1215
sl@0
  1216
#endif /* !OPENSSL_NO_HW_PADLOCK */
sl@0
  1217
#endif /* !OPENSSL_NO_HW */