os/ossrv/genericopenlibs/liboil/src/i386/abs_i386.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
 * LIBOIL - Library of Optimized Inner Loops
sl@0
     3
 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
sl@0
     4
 * All rights reserved.
sl@0
     5
 *
sl@0
     6
 * Redistribution and use in source and binary forms, with or without
sl@0
     7
 * modification, are permitted provided that the following conditions
sl@0
     8
 * are met:
sl@0
     9
 * 1. Redistributions of source code must retain the above copyright
sl@0
    10
 *    notice, this list of conditions and the following disclaimer.
sl@0
    11
 * 2. Redistributions in binary form must reproduce the above copyright
sl@0
    12
 *    notice, this list of conditions and the following disclaimer in the
sl@0
    13
 *    documentation and/or other materials provided with the distribution.
sl@0
    14
 * 
sl@0
    15
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
sl@0
    16
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
sl@0
    17
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
sl@0
    18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
sl@0
    19
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
sl@0
    20
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
sl@0
    21
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
sl@0
    22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
sl@0
    23
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
sl@0
    24
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
sl@0
    25
 * POSSIBILITY OF SUCH DAMAGE.
sl@0
    26
 */
sl@0
    27
//Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
sl@0
    28
sl@0
    29
#ifdef HAVE_CONFIG_H
sl@0
    30
#include "config.h"
sl@0
    31
#endif
sl@0
    32
sl@0
    33
#include <liboil/liboilfunction.h>
sl@0
    34
#include "liboil/simdpack/simdpack.h"
sl@0
    35
sl@0
    36
#define ABS(x) ((x)>0 ? (x) : -(x))
sl@0
    37
sl@0
    38
#if 0
sl@0
    39
static void
sl@0
    40
abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
    41
{
sl@0
    42
  __asm__ __volatile__ ("\n"
sl@0
    43
      "	.p2align 4,,15			\n"
sl@0
    44
      "1:  movswl	(%0), %%eax		\n"
sl@0
    45
      "    addl	$2, %0			\n"
sl@0
    46
      "    movl	%%eax, %%edx		\n"
sl@0
    47
      "    negl	%%edx			\n"
sl@0
    48
      "    cmpl	$-1, %%eax		\n"
sl@0
    49
      "    cmovle	%%edx, %%eax		\n"
sl@0
    50
      "    movw	%%ax, (%1)		\n"
sl@0
    51
      "    addl	$2, %1			\n"
sl@0
    52
      "    decl	%2			\n"
sl@0
    53
      "    testl	%2, %2			\n"
sl@0
    54
      "    jg	1b			\n":"+r" (src), "+r" (dest), "+r" (n)
sl@0
    55
      ::"eax", "edx");
sl@0
    56
}
sl@0
    57
sl@0
    58
OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
sl@0
    59
#endif
sl@0
    60
sl@0
    61
#if 0
sl@0
    62
/* The previous function after running through uberopt */
sl@0
    63
static void
sl@0
    64
abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
sl@0
    65
    int sstr, int n)
sl@0
    66
{
sl@0
    67
  __asm__ __volatile__ ("\n"
sl@0
    68
      "	.p2align 4,,15			\n"
sl@0
    69
      "1:                               \n"
sl@0
    70
      "    movswl	(%0), %%eax	\n" /* UBER 0:     */
sl@0
    71
      "    addl	$2, %0			\n" /* UBER 1: 0   */
sl@0
    72
      "    movl	%%eax, %%edx		\n" /* UBER 2: 0   */
sl@0
    73
      "    decl	%2			\n" /* UBER 7:     */
sl@0
    74
      "    negl	%%edx			\n" /* UBER 3: 2   */
sl@0
    75
      "    cmpl	$-1, %%eax ; cmovle %%edx, %%eax \n" /* UBER 4: 3 */
sl@0
    76
      "    movw	%%ax, (%1)		\n" /* UBER 5: 4   */
sl@0
    77
      "    addl	$2, %1			\n" /* UBER 6: 5   */
sl@0
    78
      "    testl	%2, %2		\n"
sl@0
    79
      "    jg	1b			\n"
sl@0
    80
      :"+r" (src), "+r" (dest), "+r" (n)
sl@0
    81
      ::"eax", "edx");
sl@0
    82
}
sl@0
    83
OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
sl@0
    84
#endif
sl@0
    85
sl@0
    86
#if 0
sl@0
    87
static void
sl@0
    88
abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
    89
{
sl@0
    90
  __asm__ __volatile__ ("\n"
sl@0
    91
      "	pushl	%%ebp			\n"
sl@0
    92
      "	movl	%%eax, %%ebp		\n"
sl@0
    93
      "	.p2align 4,,15			\n"
sl@0
    94
      "1:	movswl	(%%edi), %%eax		\n"
sl@0
    95
      "	addl	$2, %%edi		\n"
sl@0
    96
      "	movl	%%eax, %%edx		\n"
sl@0
    97
      "	negl	%%edx			\n"
sl@0
    98
      "	cmpl	$-1, %%eax		\n"
sl@0
    99
      "	cmovle	%%edx, %%eax		\n"
sl@0
   100
      "	movw	%%ax, (%%ebp)		\n"
sl@0
   101
      "	addl	$2, %%ebp		\n"
sl@0
   102
      "	decl	%2			\n"
sl@0
   103
      "	testl	%2, %2			\n"
sl@0
   104
      "	jg	1b			\n"
sl@0
   105
      "	popl	%%ebp			\n":"+D" (src), "+a" (dest), "+S" (n)
sl@0
   106
      ::"ecx", "edx");
sl@0
   107
}
sl@0
   108
OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
sl@0
   109
#endif
sl@0
   110
sl@0
   111
static void
sl@0
   112
abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
   113
{
sl@0
   114
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   115
  __asm__ __volatile__ ("\n"
sl@0
   116
      "	.p2align 4,,15			\n"
sl@0
   117
      "1:  movswl (%1), %%eax           \n"
sl@0
   118
      "    add %3, %1                   \n"
sl@0
   119
      "    mov %%eax, %%edx             \n"
sl@0
   120
      "    sar $0xf, %%ax               \n"
sl@0
   121
      "    and %%edx, %%eax             \n"
sl@0
   122
      "    add %%eax, %%eax             \n"
sl@0
   123
      "    sub %%eax, %%edx             \n"
sl@0
   124
      "    mov %%dx, (%0)               \n"
sl@0
   125
      "    add %4, %0                   \n"
sl@0
   126
      "    decl %2                      \n"
sl@0
   127
      "    jne 1b                       \n"
sl@0
   128
      : "+r" (dest), "+r" (src), "+m" (n)
sl@0
   129
      : "m" (dstr), "m" (sstr)
sl@0
   130
      : "eax", "edx");
sl@0
   131
#endif
sl@0
   132
}
sl@0
   133
OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16);
sl@0
   134
sl@0
   135
sl@0
   136
sl@0
   137
static void
sl@0
   138
abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
   139
{
sl@0
   140
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   141
  static const int16_t p[][4] = {
sl@0
   142
    { -32768, -32768, -32768, -32768 },
sl@0
   143
    { 32767, 32767, 32767, 32767 }
sl@0
   144
  };
sl@0
   145
  int16_t tmp[4];
sl@0
   146
sl@0
   147
  while (n & 3) {
sl@0
   148
    *dest = ABS (*src);
sl@0
   149
    OIL_INCREMENT (dest, dstr);
sl@0
   150
    OIL_INCREMENT (src, sstr);
sl@0
   151
    n--;
sl@0
   152
  }
sl@0
   153
  n /= 4;
sl@0
   154
 
sl@0
   155
  __asm__ __volatile__ ("\n"
sl@0
   156
      "	movq	(%0), %%mm2		\n"
sl@0
   157
      "	movq	8(%0), %%mm3		\n"
sl@0
   158
      :: "r" (p));
sl@0
   159
sl@0
   160
  while (n--) {
sl@0
   161
    tmp[0] = *src;
sl@0
   162
    OIL_INCREMENT (src, sstr);
sl@0
   163
    tmp[1] = *src;
sl@0
   164
    OIL_INCREMENT (src, sstr);
sl@0
   165
    tmp[2] = *src;
sl@0
   166
    OIL_INCREMENT (src, sstr);
sl@0
   167
    tmp[3] = *src;
sl@0
   168
    OIL_INCREMENT (src, sstr);
sl@0
   169
    __asm__ __volatile__ ("\n"
sl@0
   170
        "	movq	(%0), %%mm1		\n"
sl@0
   171
        "	movq	%%mm1, %%mm0		\n"
sl@0
   172
        "	paddsw	%%mm2, %%mm0		\n"
sl@0
   173
        "	paddsw	%%mm3, %%mm1		\n"
sl@0
   174
        "	psubsw	%%mm2, %%mm0		\n"
sl@0
   175
        "	psubsw	%%mm3, %%mm1		\n"
sl@0
   176
        "	psubw	%%mm1, %%mm0		\n"
sl@0
   177
        "	movq	%%mm0, (%0)		\n"
sl@0
   178
        : : "r" (tmp)
sl@0
   179
        : "memory" );
sl@0
   180
    *dest = tmp[0];
sl@0
   181
    OIL_INCREMENT (dest, dstr);
sl@0
   182
    *dest = tmp[1];
sl@0
   183
    OIL_INCREMENT (dest, dstr);
sl@0
   184
    *dest = tmp[2];
sl@0
   185
    OIL_INCREMENT (dest, dstr);
sl@0
   186
    *dest = tmp[3];
sl@0
   187
    OIL_INCREMENT (dest, dstr);
sl@0
   188
  }
sl@0
   189
  asm volatile ("emms");
sl@0
   190
#endif  
sl@0
   191
}
sl@0
   192
sl@0
   193
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
sl@0
   194
sl@0
   195
#if 0
sl@0
   196
static void
sl@0
   197
abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
   198
{
sl@0
   199
  short p[] = { -32768, -32768, -32768, -32768,
sl@0
   200
    32767, 32767, 32767, 32767
sl@0
   201
  };
sl@0
   202
sl@0
   203
  while (n & 7) {
sl@0
   204
    *dest = ABS (*src);
sl@0
   205
    OIL_INCREMENT (dest, dstr);
sl@0
   206
    OIL_INCREMENT (src, sstr);
sl@0
   207
    n--;
sl@0
   208
  }
sl@0
   209
  n /= 8;
sl@0
   210
  __asm__ __volatile__ ("\n"
sl@0
   211
      "	movq	(%3), %%mm2		\n"
sl@0
   212
      "	movq	8(%3), %%mm3		\n"
sl@0
   213
      "	.p2align 4,,15			\n"
sl@0
   214
      "1:	movq	(%%edi), %%mm0		\n"
sl@0
   215
      "	movq	(%%edi), %%mm1		\n"
sl@0
   216
      "	paddsw	%%mm2, %%mm0		\n"
sl@0
   217
      "	paddsw	%%mm3, %%mm1		\n"
sl@0
   218
      "	psubsw	%%mm2, %%mm0		\n"
sl@0
   219
      "	psubsw	%%mm3, %%mm1		\n"
sl@0
   220
      "	psubw	%%mm1, %%mm0		\n"
sl@0
   221
      "	movq	%%mm0, (%%eax)		\n"
sl@0
   222
      "	 movq	8(%%edi), %%mm4		\n"
sl@0
   223
      "	 movq	8(%%edi), %%mm5		\n"
sl@0
   224
      "	 addl	$16, %%edi		\n"
sl@0
   225
      "	 paddsw	%%mm2, %%mm4		\n"
sl@0
   226
      "	 paddsw	%%mm3, %%mm5		\n"
sl@0
   227
      "	 psubsw	%%mm2, %%mm4		\n"
sl@0
   228
      "	 psubsw	%%mm3, %%mm5		\n"
sl@0
   229
      "	 psubw	%%mm5, %%mm4		\n"
sl@0
   230
      "	 movq	%%mm4, 8(%%eax)		\n"
sl@0
   231
      "	 addl	$16, %%eax		\n"
sl@0
   232
      "	decl	%2			\n"
sl@0
   233
      "	testl	%2, %2			\n"
sl@0
   234
      "	jg	1b			\n":"+D" (src), "+a" (dest), "+S" (n)
sl@0
   235
      :"c" (p));
sl@0
   236
  asm volatile ("emms");
sl@0
   237
}
sl@0
   238
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
sl@0
   239
#endif
sl@0
   240
sl@0
   241
#ifdef ENABLE_BROKEN_IMPLS
sl@0
   242
static void
sl@0
   243
abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
   244
{
sl@0
   245
  while (n & 7) {
sl@0
   246
    *dest = ABS (*src);
sl@0
   247
    OIL_INCREMENT (dest, dstr);
sl@0
   248
    OIL_INCREMENT (src, sstr);
sl@0
   249
    n--;
sl@0
   250
  }
sl@0
   251
  n /= 8;
sl@0
   252
  __asm__ __volatile__ ("\n"
sl@0
   253
      "	pushl	%%ebp			\n"
sl@0
   254
      "	movl	%%eax, %%ebp		\n"
sl@0
   255
      "	.p2align 4,,15			\n"
sl@0
   256
      "1:	movq	(%%edi), %%mm0		\n"
sl@0
   257
      "	pxor	%%mm1, %%mm1		\n"
sl@0
   258
      "	 movq	8(%%edi), %%mm2		\n"
sl@0
   259
      "	 addl	$16, %%edi		\n"
sl@0
   260
      "	psubw	%%mm0, %%mm1		\n"
sl@0
   261
      "	 pxor	%%mm3, %%mm3		\n"
sl@0
   262
      "	pmaxsw	%%mm0, %%mm1		\n"
sl@0
   263
      "	 psubw	%%mm2, %%mm3		\n"
sl@0
   264
      "	movq	%%mm1, (%%ebp)		\n"
sl@0
   265
      "	 pmaxsw	%%mm2, %%mm3		\n"
sl@0
   266
      "	 movq	%%mm3, 8(%%ebp)		\n"
sl@0
   267
      "	 addl	$16, %%ebp		\n"
sl@0
   268
      "	decl	%2			\n"
sl@0
   269
      "	testl	%2, %2			\n"
sl@0
   270
      "	jg	1b			\n"
sl@0
   271
      "	popl	%%ebp			\n":"+D" (src), "+a" (dest), "+S" (n)
sl@0
   272
      ::"ecx", "edx");
sl@0
   273
  asm volatile ("emms");
sl@0
   274
}
sl@0
   275
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx2, abs_u16_s16, OIL_IMPL_FLAG_MMXEXT);
sl@0
   276
#endif
sl@0
   277
sl@0
   278
#ifdef ENABLE_BROKEN_IMPLS
sl@0
   279
static void
sl@0
   280
abs_u16_s16_sse2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
sl@0
   281
{
sl@0
   282
  while (n & 7) {
sl@0
   283
    *dest = ABS (*src);
sl@0
   284
    OIL_INCREMENT (dest, dstr);
sl@0
   285
    OIL_INCREMENT (src, sstr);
sl@0
   286
    n--;
sl@0
   287
  }
sl@0
   288
  n /= 8;
sl@0
   289
  __asm__ __volatile__ ("\n"
sl@0
   290
      "	pushl	%%ebp			\n"
sl@0
   291
      "	movl	%%eax, %%ebp		\n"
sl@0
   292
      "	.p2align 4,,15			\n"
sl@0
   293
      "1:	movq	(%%edi), %%xmm0		\n"
sl@0
   294
      "	addl	$16, %%edi		\n"
sl@0
   295
      "	pxor	%%xmm1, %%xmm1		\n"
sl@0
   296
      "	psubw	%%xmm0, %%xmm1		\n"
sl@0
   297
      "	pmaxsw	%%xmm0, %%xmm1		\n"
sl@0
   298
      "	movq	%%xmm1, (%%ebp)		\n"
sl@0
   299
      "	addl	$16, %%ebp		\n"
sl@0
   300
      "	decl	%2			\n"
sl@0
   301
      "	testl	%2, %2			\n"
sl@0
   302
      "	jg	1b			\n"
sl@0
   303
      "	popl	%%ebp			\n":"+D" (src), "+a" (dest), "+S" (n)
sl@0
   304
      ::"ecx", "edx");
sl@0
   305
}
sl@0
   306
OIL_DEFINE_IMPL_FULL (abs_u16_s16_sse2, abs_u16_s16, OIL_IMPL_FLAG_SSE2);
sl@0
   307
#endif
sl@0
   308
sl@0
   309
sl@0
   310
sl@0
   311
#ifdef	__SYMBIAN32__
sl@0
   312
 
sl@0
   313
OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm, abs_u16_s16() {
sl@0
   314
		return &_oil_function_impl_abs_u16_s16_i386asm, abs_u16_s16;
sl@0
   315
}
sl@0
   316
#endif
sl@0
   317
sl@0
   318
#ifdef	__SYMBIAN32__
sl@0
   319
 
sl@0
   320
OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm_uber4, abs_u16_s16() {
sl@0
   321
		return &_oil_function_impl_abs_u16_s16_i386asm_uber4, abs_u16_s16;
sl@0
   322
}
sl@0
   323
#endif
sl@0
   324
sl@0
   325
#ifdef	__SYMBIAN32__
sl@0
   326
 
sl@0
   327
OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm2, abs_u16_s16() {
sl@0
   328
		return &_oil_function_impl_abs_u16_s16_i386asm2, abs_u16_s16;
sl@0
   329
}
sl@0
   330
#endif
sl@0
   331
sl@0
   332
#ifdef	__SYMBIAN32__
sl@0
   333
 
sl@0
   334
OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmx, abs_u16_s16() {
sl@0
   335
		return &_oil_function_impl_abs_u16_s16_mmx, abs_u16_s16;
sl@0
   336
}
sl@0
   337
#endif
sl@0
   338
sl@0
   339
#ifdef	__SYMBIAN32__
sl@0
   340
 
sl@0
   341
OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmxx, abs_u16_s16() {
sl@0
   342
		return &_oil_function_impl_abs_u16_s16_mmxx, abs_u16_s16;
sl@0
   343
}
sl@0
   344
#endif
sl@0
   345
sl@0
   346
#ifdef	__SYMBIAN32__
sl@0
   347
 
sl@0
   348
OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmx2, abs_u16_s16() {
sl@0
   349
		return &_oil_function_impl_abs_u16_s16_mmx2, abs_u16_s16;
sl@0
   350
}
sl@0
   351
#endif
sl@0
   352
sl@0
   353
#ifdef	__SYMBIAN32__
sl@0
   354
 
sl@0
   355
OilFunctionImpl* __oil_function_impl_abs_u16_s16_sse2, abs_u16_s16() {
sl@0
   356
		return &_oil_function_impl_abs_u16_s16_sse2, abs_u16_s16;
sl@0
   357
}
sl@0
   358
#endif
sl@0
   359
sl@0
   360
sl@0
   361
sl@0
   362
#ifdef	__SYMBIAN32__
sl@0
   363
 
sl@0
   364
OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm3() {
sl@0
   365
		return &_oil_function_impl_abs_u16_s16_i386asm3;
sl@0
   366
}
sl@0
   367
#endif
sl@0
   368