os/ossrv/genericopenlibs/liboil/src/i386/abs_i386.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /*
     2  * LIBOIL - Library of Optimized Inner Loops
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
     4  * All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions
     8  * are met:
     9  * 1. Redistributions of source code must retain the above copyright
    10  *    notice, this list of conditions and the following disclaimer.
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  *    notice, this list of conditions and the following disclaimer in the
    13  *    documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    25  * POSSIBILITY OF SUCH DAMAGE.
    26  */
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    28 
    29 #ifdef HAVE_CONFIG_H
    30 #include "config.h"
    31 #endif
    32 
    33 #include <liboil/liboilfunction.h>
    34 #include "liboil/simdpack/simdpack.h"
    35 
    36 #define ABS(x) ((x)>0 ? (x) : -(x))
    37 
    38 #if 0
    39 static void
    40 abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
    41 {
    42   __asm__ __volatile__ ("\n"
    43       "	.p2align 4,,15			\n"
    44       "1:  movswl	(%0), %%eax		\n"
    45       "    addl	$2, %0			\n"
    46       "    movl	%%eax, %%edx		\n"
    47       "    negl	%%edx			\n"
    48       "    cmpl	$-1, %%eax		\n"
    49       "    cmovle	%%edx, %%eax		\n"
    50       "    movw	%%ax, (%1)		\n"
    51       "    addl	$2, %1			\n"
    52       "    decl	%2			\n"
    53       "    testl	%2, %2			\n"
    54       "    jg	1b			\n":"+r" (src), "+r" (dest), "+r" (n)
    55       ::"eax", "edx");
    56 }
    57 
    58 OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
    59 #endif
    60 
    61 #if 0
    62 /* The previous function after running through uberopt */
    63 static void
    64 abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
    65     int sstr, int n)
    66 {
    67   __asm__ __volatile__ ("\n"
    68       "	.p2align 4,,15			\n"
    69       "1:                               \n"
    70       "    movswl	(%0), %%eax	\n" /* UBER 0:     */
    71       "    addl	$2, %0			\n" /* UBER 1: 0   */
    72       "    movl	%%eax, %%edx		\n" /* UBER 2: 0   */
    73       "    decl	%2			\n" /* UBER 7:     */
    74       "    negl	%%edx			\n" /* UBER 3: 2   */
    75       "    cmpl	$-1, %%eax ; cmovle %%edx, %%eax \n" /* UBER 4: 3 */
    76       "    movw	%%ax, (%1)		\n" /* UBER 5: 4   */
    77       "    addl	$2, %1			\n" /* UBER 6: 5   */
    78       "    testl	%2, %2		\n"
    79       "    jg	1b			\n"
    80       :"+r" (src), "+r" (dest), "+r" (n)
    81       ::"eax", "edx");
    82 }
    83 OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
    84 #endif
    85 
    86 #if 0
    87 static void
    88 abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
    89 {
    90   __asm__ __volatile__ ("\n"
    91       "	pushl	%%ebp			\n"
    92       "	movl	%%eax, %%ebp		\n"
    93       "	.p2align 4,,15			\n"
    94       "1:	movswl	(%%edi), %%eax		\n"
    95       "	addl	$2, %%edi		\n"
    96       "	movl	%%eax, %%edx		\n"
    97       "	negl	%%edx			\n"
    98       "	cmpl	$-1, %%eax		\n"
    99       "	cmovle	%%edx, %%eax		\n"
   100       "	movw	%%ax, (%%ebp)		\n"
   101       "	addl	$2, %%ebp		\n"
   102       "	decl	%2			\n"
   103       "	testl	%2, %2			\n"
   104       "	jg	1b			\n"
   105       "	popl	%%ebp			\n":"+D" (src), "+a" (dest), "+S" (n)
   106       ::"ecx", "edx");
   107 }
   108 OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
   109 #endif
   110 
   111 static void
   112 abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
   113 {
   114 #if !defined(__WINSCW__) && !defined(__WINS__)      
   115   __asm__ __volatile__ ("\n"
   116       "	.p2align 4,,15			\n"
   117       "1:  movswl (%1), %%eax           \n"
   118       "    add %3, %1                   \n"
   119       "    mov %%eax, %%edx             \n"
   120       "    sar $0xf, %%ax               \n"
   121       "    and %%edx, %%eax             \n"
   122       "    add %%eax, %%eax             \n"
   123       "    sub %%eax, %%edx             \n"
   124       "    mov %%dx, (%0)               \n"
   125       "    add %4, %0                   \n"
   126       "    decl %2                      \n"
   127       "    jne 1b                       \n"
   128       : "+r" (dest), "+r" (src), "+m" (n)
   129       : "m" (dstr), "m" (sstr)
   130       : "eax", "edx");
   131 #endif
   132 }
   133 OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16);
   134 
   135 
   136 
   137 static void
   138 abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
   139 {
   140 #if !defined(__WINSCW__) && !defined(__WINS__)      
   141   static const int16_t p[][4] = {
   142     { -32768, -32768, -32768, -32768 },
   143     { 32767, 32767, 32767, 32767 }
   144   };
   145   int16_t tmp[4];
   146 
   147   while (n & 3) {
   148     *dest = ABS (*src);
   149     OIL_INCREMENT (dest, dstr);
   150     OIL_INCREMENT (src, sstr);
   151     n--;
   152   }
   153   n /= 4;
   154  
   155   __asm__ __volatile__ ("\n"
   156       "	movq	(%0), %%mm2		\n"
   157       "	movq	8(%0), %%mm3		\n"
   158       :: "r" (p));
   159 
   160   while (n--) {
   161     tmp[0] = *src;
   162     OIL_INCREMENT (src, sstr);
   163     tmp[1] = *src;
   164     OIL_INCREMENT (src, sstr);
   165     tmp[2] = *src;
   166     OIL_INCREMENT (src, sstr);
   167     tmp[3] = *src;
   168     OIL_INCREMENT (src, sstr);
   169     __asm__ __volatile__ ("\n"
   170         "	movq	(%0), %%mm1		\n"
   171         "	movq	%%mm1, %%mm0		\n"
   172         "	paddsw	%%mm2, %%mm0		\n"
   173         "	paddsw	%%mm3, %%mm1		\n"
   174         "	psubsw	%%mm2, %%mm0		\n"
   175         "	psubsw	%%mm3, %%mm1		\n"
   176         "	psubw	%%mm1, %%mm0		\n"
   177         "	movq	%%mm0, (%0)		\n"
   178         : : "r" (tmp)
   179         : "memory" );
   180     *dest = tmp[0];
   181     OIL_INCREMENT (dest, dstr);
   182     *dest = tmp[1];
   183     OIL_INCREMENT (dest, dstr);
   184     *dest = tmp[2];
   185     OIL_INCREMENT (dest, dstr);
   186     *dest = tmp[3];
   187     OIL_INCREMENT (dest, dstr);
   188   }
   189   asm volatile ("emms");
   190 #endif  
   191 }
   192 
   193 OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
   194 
   195 #if 0
   196 static void
   197 abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
   198 {
   199   short p[] = { -32768, -32768, -32768, -32768,
   200     32767, 32767, 32767, 32767
   201   };
   202 
   203   while (n & 7) {
   204     *dest = ABS (*src);
   205     OIL_INCREMENT (dest, dstr);
   206     OIL_INCREMENT (src, sstr);
   207     n--;
   208   }
   209   n /= 8;
   210   __asm__ __volatile__ ("\n"
   211       "	movq	(%3), %%mm2		\n"
   212       "	movq	8(%3), %%mm3		\n"
   213       "	.p2align 4,,15			\n"
   214       "1:	movq	(%%edi), %%mm0		\n"
   215       "	movq	(%%edi), %%mm1		\n"
   216       "	paddsw	%%mm2, %%mm0		\n"
   217       "	paddsw	%%mm3, %%mm1		\n"
   218       "	psubsw	%%mm2, %%mm0		\n"
   219       "	psubsw	%%mm3, %%mm1		\n"
   220       "	psubw	%%mm1, %%mm0		\n"
   221       "	movq	%%mm0, (%%eax)		\n"
   222       "	 movq	8(%%edi), %%mm4		\n"
   223       "	 movq	8(%%edi), %%mm5		\n"
   224       "	 addl	$16, %%edi		\n"
   225       "	 paddsw	%%mm2, %%mm4		\n"
   226       "	 paddsw	%%mm3, %%mm5		\n"
   227       "	 psubsw	%%mm2, %%mm4		\n"
   228       "	 psubsw	%%mm3, %%mm5		\n"
   229       "	 psubw	%%mm5, %%mm4		\n"
   230       "	 movq	%%mm4, 8(%%eax)		\n"
   231       "	 addl	$16, %%eax		\n"
   232       "	decl	%2			\n"
   233       "	testl	%2, %2			\n"
   234       "	jg	1b			\n":"+D" (src), "+a" (dest), "+S" (n)
   235       :"c" (p));
   236   asm volatile ("emms");
   237 }
   238 OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
   239 #endif
   240 
   241 #ifdef ENABLE_BROKEN_IMPLS
   242 static void
   243 abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
   244 {
   245   while (n & 7) {
   246     *dest = ABS (*src);
   247     OIL_INCREMENT (dest, dstr);
   248     OIL_INCREMENT (src, sstr);
   249     n--;
   250   }
   251   n /= 8;
   252   __asm__ __volatile__ ("\n"
   253       "	pushl	%%ebp			\n"
   254       "	movl	%%eax, %%ebp		\n"
   255       "	.p2align 4,,15			\n"
   256       "1:	movq	(%%edi), %%mm0		\n"
   257       "	pxor	%%mm1, %%mm1		\n"
   258       "	 movq	8(%%edi), %%mm2		\n"
   259       "	 addl	$16, %%edi		\n"
   260       "	psubw	%%mm0, %%mm1		\n"
   261       "	 pxor	%%mm3, %%mm3		\n"
   262       "	pmaxsw	%%mm0, %%mm1		\n"
   263       "	 psubw	%%mm2, %%mm3		\n"
   264       "	movq	%%mm1, (%%ebp)		\n"
   265       "	 pmaxsw	%%mm2, %%mm3		\n"
   266       "	 movq	%%mm3, 8(%%ebp)		\n"
   267       "	 addl	$16, %%ebp		\n"
   268       "	decl	%2			\n"
   269       "	testl	%2, %2			\n"
   270       "	jg	1b			\n"
   271       "	popl	%%ebp			\n":"+D" (src), "+a" (dest), "+S" (n)
   272       ::"ecx", "edx");
   273   asm volatile ("emms");
   274 }
   275 OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx2, abs_u16_s16, OIL_IMPL_FLAG_MMXEXT);
   276 #endif
   277 
   278 #ifdef ENABLE_BROKEN_IMPLS
   279 static void
   280 abs_u16_s16_sse2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
   281 {
   282   while (n & 7) {
   283     *dest = ABS (*src);
   284     OIL_INCREMENT (dest, dstr);
   285     OIL_INCREMENT (src, sstr);
   286     n--;
   287   }
   288   n /= 8;
   289   __asm__ __volatile__ ("\n"
   290       "	pushl	%%ebp			\n"
   291       "	movl	%%eax, %%ebp		\n"
   292       "	.p2align 4,,15			\n"
   293       "1:	movq	(%%edi), %%xmm0		\n"
   294       "	addl	$16, %%edi		\n"
   295       "	pxor	%%xmm1, %%xmm1		\n"
   296       "	psubw	%%xmm0, %%xmm1		\n"
   297       "	pmaxsw	%%xmm0, %%xmm1		\n"
   298       "	movq	%%xmm1, (%%ebp)		\n"
   299       "	addl	$16, %%ebp		\n"
   300       "	decl	%2			\n"
   301       "	testl	%2, %2			\n"
   302       "	jg	1b			\n"
   303       "	popl	%%ebp			\n":"+D" (src), "+a" (dest), "+S" (n)
   304       ::"ecx", "edx");
   305 }
   306 OIL_DEFINE_IMPL_FULL (abs_u16_s16_sse2, abs_u16_s16, OIL_IMPL_FLAG_SSE2);
   307 #endif
   308 
   309 
   310 
   311 #ifdef	__SYMBIAN32__
   312  
   313 OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm, abs_u16_s16() {
   314 		return &_oil_function_impl_abs_u16_s16_i386asm, abs_u16_s16;
   315 }
   316 #endif
   317 
   318 #ifdef	__SYMBIAN32__
   319  
   320 OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm_uber4, abs_u16_s16() {
   321 		return &_oil_function_impl_abs_u16_s16_i386asm_uber4, abs_u16_s16;
   322 }
   323 #endif
   324 
   325 #ifdef	__SYMBIAN32__
   326  
   327 OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm2, abs_u16_s16() {
   328 		return &_oil_function_impl_abs_u16_s16_i386asm2, abs_u16_s16;
   329 }
   330 #endif
   331 
   332 #ifdef	__SYMBIAN32__
   333  
   334 OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmx, abs_u16_s16() {
   335 		return &_oil_function_impl_abs_u16_s16_mmx, abs_u16_s16;
   336 }
   337 #endif
   338 
   339 #ifdef	__SYMBIAN32__
   340  
   341 OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmxx, abs_u16_s16() {
   342 		return &_oil_function_impl_abs_u16_s16_mmxx, abs_u16_s16;
   343 }
   344 #endif
   345 
   346 #ifdef	__SYMBIAN32__
   347  
   348 OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmx2, abs_u16_s16() {
   349 		return &_oil_function_impl_abs_u16_s16_mmx2, abs_u16_s16;
   350 }
   351 #endif
   352 
   353 #ifdef	__SYMBIAN32__
   354  
   355 OilFunctionImpl* __oil_function_impl_abs_u16_s16_sse2, abs_u16_s16() {
   356 		return &_oil_function_impl_abs_u16_s16_sse2, abs_u16_s16;
   357 }
   358 #endif
   359 
   360 
   361 
   362 #ifdef	__SYMBIAN32__
   363  
   364 OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm3() {
   365 		return &_oil_function_impl_abs_u16_s16_i386asm3;
   366 }
   367 #endif
   368