1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/i386/abs_i386.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,368 @@
1.4 +/*
1.5 + * LIBOIL - Library of Optimized Inner Loops
1.6 + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
1.7 + * All rights reserved.
1.8 + *
1.9 + * Redistribution and use in source and binary forms, with or without
1.10 + * modification, are permitted provided that the following conditions
1.11 + * are met:
1.12 + * 1. Redistributions of source code must retain the above copyright
1.13 + * notice, this list of conditions and the following disclaimer.
1.14 + * 2. Redistributions in binary form must reproduce the above copyright
1.15 + * notice, this list of conditions and the following disclaimer in the
1.16 + * documentation and/or other materials provided with the distribution.
1.17 + *
1.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1.20 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.21 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1.22 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1.23 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.25 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1.26 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1.27 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.28 + * POSSIBILITY OF SUCH DAMAGE.
1.29 + */
1.30 +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.31 +
1.32 +#ifdef HAVE_CONFIG_H
1.33 +#include "config.h"
1.34 +#endif
1.35 +
1.36 +#include <liboil/liboilfunction.h>
1.37 +#include "liboil/simdpack/simdpack.h"
1.38 +
1.39 +#define ABS(x) ((x)>0 ? (x) : -(x))
1.40 +
1.41 +#if 0
1.42 +static void
1.43 +abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.44 +{
1.45 + __asm__ __volatile__ ("\n"
1.46 + " .p2align 4,,15 \n"
1.47 + "1: movswl (%0), %%eax \n"
1.48 + " addl $2, %0 \n"
1.49 + " movl %%eax, %%edx \n"
1.50 + " negl %%edx \n"
1.51 + " cmpl $-1, %%eax \n"
1.52 + " cmovle %%edx, %%eax \n"
1.53 + " movw %%ax, (%1) \n"
1.54 + " addl $2, %1 \n"
1.55 + " decl %2 \n"
1.56 + " testl %2, %2 \n"
1.57 + " jg 1b \n":"+r" (src), "+r" (dest), "+r" (n)
1.58 + ::"eax", "edx");
1.59 +}
1.60 +
1.61 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
1.62 +#endif
1.63 +
1.64 +#if 0
1.65 +/* The previous function after running through uberopt */
1.66 +static void
1.67 +abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
1.68 + int sstr, int n)
1.69 +{
1.70 + __asm__ __volatile__ ("\n"
1.71 + " .p2align 4,,15 \n"
1.72 + "1: \n"
1.73 + " movswl (%0), %%eax \n" /* UBER 0: */
1.74 + " addl $2, %0 \n" /* UBER 1: 0 */
1.75 + " movl %%eax, %%edx \n" /* UBER 2: 0 */
1.76 + " decl %2 \n" /* UBER 7: */
1.77 + " negl %%edx \n" /* UBER 3: 2 */
1.78 + " cmpl $-1, %%eax ; cmovle %%edx, %%eax \n" /* UBER 4: 3 */
1.79 + " movw %%ax, (%1) \n" /* UBER 5: 4 */
1.80 + " addl $2, %1 \n" /* UBER 6: 5 */
1.81 + " testl %2, %2 \n"
1.82 + " jg 1b \n"
1.83 + :"+r" (src), "+r" (dest), "+r" (n)
1.84 + ::"eax", "edx");
1.85 +}
1.86 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
1.87 +#endif
1.88 +
1.89 +#if 0
1.90 +static void
1.91 +abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.92 +{
1.93 + __asm__ __volatile__ ("\n"
1.94 + " pushl %%ebp \n"
1.95 + " movl %%eax, %%ebp \n"
1.96 + " .p2align 4,,15 \n"
1.97 + "1: movswl (%%edi), %%eax \n"
1.98 + " addl $2, %%edi \n"
1.99 + " movl %%eax, %%edx \n"
1.100 + " negl %%edx \n"
1.101 + " cmpl $-1, %%eax \n"
1.102 + " cmovle %%edx, %%eax \n"
1.103 + " movw %%ax, (%%ebp) \n"
1.104 + " addl $2, %%ebp \n"
1.105 + " decl %2 \n"
1.106 + " testl %2, %2 \n"
1.107 + " jg 1b \n"
1.108 + " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n)
1.109 + ::"ecx", "edx");
1.110 +}
1.111 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
1.112 +#endif
1.113 +
1.114 +static void
1.115 +abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.116 +{
1.117 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.118 + __asm__ __volatile__ ("\n"
1.119 + " .p2align 4,,15 \n"
1.120 + "1: movswl (%1), %%eax \n"
1.121 + " add %3, %1 \n"
1.122 + " mov %%eax, %%edx \n"
1.123 + " sar $0xf, %%ax \n"
1.124 + " and %%edx, %%eax \n"
1.125 + " add %%eax, %%eax \n"
1.126 + " sub %%eax, %%edx \n"
1.127 + " mov %%dx, (%0) \n"
1.128 + " add %4, %0 \n"
1.129 + " decl %2 \n"
1.130 + " jne 1b \n"
1.131 + : "+r" (dest), "+r" (src), "+m" (n)
1.132 + : "m" (dstr), "m" (sstr)
1.133 + : "eax", "edx");
1.134 +#endif
1.135 +}
1.136 +OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16);
1.137 +
1.138 +
1.139 +
1.140 +static void
1.141 +abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.142 +{
1.143 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.144 + static const int16_t p[][4] = {
1.145 + { -32768, -32768, -32768, -32768 },
1.146 + { 32767, 32767, 32767, 32767 }
1.147 + };
1.148 + int16_t tmp[4];
1.149 +
1.150 + while (n & 3) {
1.151 + *dest = ABS (*src);
1.152 + OIL_INCREMENT (dest, dstr);
1.153 + OIL_INCREMENT (src, sstr);
1.154 + n--;
1.155 + }
1.156 + n /= 4;
1.157 +
1.158 + __asm__ __volatile__ ("\n"
1.159 + " movq (%0), %%mm2 \n"
1.160 + " movq 8(%0), %%mm3 \n"
1.161 + :: "r" (p));
1.162 +
1.163 + while (n--) {
1.164 + tmp[0] = *src;
1.165 + OIL_INCREMENT (src, sstr);
1.166 + tmp[1] = *src;
1.167 + OIL_INCREMENT (src, sstr);
1.168 + tmp[2] = *src;
1.169 + OIL_INCREMENT (src, sstr);
1.170 + tmp[3] = *src;
1.171 + OIL_INCREMENT (src, sstr);
1.172 + __asm__ __volatile__ ("\n"
1.173 + " movq (%0), %%mm1 \n"
1.174 + " movq %%mm1, %%mm0 \n"
1.175 + " paddsw %%mm2, %%mm0 \n"
1.176 + " paddsw %%mm3, %%mm1 \n"
1.177 + " psubsw %%mm2, %%mm0 \n"
1.178 + " psubsw %%mm3, %%mm1 \n"
1.179 + " psubw %%mm1, %%mm0 \n"
1.180 + " movq %%mm0, (%0) \n"
1.181 + : : "r" (tmp)
1.182 + : "memory" );
1.183 + *dest = tmp[0];
1.184 + OIL_INCREMENT (dest, dstr);
1.185 + *dest = tmp[1];
1.186 + OIL_INCREMENT (dest, dstr);
1.187 + *dest = tmp[2];
1.188 + OIL_INCREMENT (dest, dstr);
1.189 + *dest = tmp[3];
1.190 + OIL_INCREMENT (dest, dstr);
1.191 + }
1.192 + asm volatile ("emms");
1.193 +#endif
1.194 +}
1.195 +
1.196 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
1.197 +
1.198 +#if 0
1.199 +static void
1.200 +abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.201 +{
1.202 + short p[] = { -32768, -32768, -32768, -32768,
1.203 + 32767, 32767, 32767, 32767
1.204 + };
1.205 +
1.206 + while (n & 7) {
1.207 + *dest = ABS (*src);
1.208 + OIL_INCREMENT (dest, dstr);
1.209 + OIL_INCREMENT (src, sstr);
1.210 + n--;
1.211 + }
1.212 + n /= 8;
1.213 + __asm__ __volatile__ ("\n"
1.214 + " movq (%3), %%mm2 \n"
1.215 + " movq 8(%3), %%mm3 \n"
1.216 + " .p2align 4,,15 \n"
1.217 + "1: movq (%%edi), %%mm0 \n"
1.218 + " movq (%%edi), %%mm1 \n"
1.219 + " paddsw %%mm2, %%mm0 \n"
1.220 + " paddsw %%mm3, %%mm1 \n"
1.221 + " psubsw %%mm2, %%mm0 \n"
1.222 + " psubsw %%mm3, %%mm1 \n"
1.223 + " psubw %%mm1, %%mm0 \n"
1.224 + " movq %%mm0, (%%eax) \n"
1.225 + " movq 8(%%edi), %%mm4 \n"
1.226 + " movq 8(%%edi), %%mm5 \n"
1.227 + " addl $16, %%edi \n"
1.228 + " paddsw %%mm2, %%mm4 \n"
1.229 + " paddsw %%mm3, %%mm5 \n"
1.230 + " psubsw %%mm2, %%mm4 \n"
1.231 + " psubsw %%mm3, %%mm5 \n"
1.232 + " psubw %%mm5, %%mm4 \n"
1.233 + " movq %%mm4, 8(%%eax) \n"
1.234 + " addl $16, %%eax \n"
1.235 + " decl %2 \n"
1.236 + " testl %2, %2 \n"
1.237 + " jg 1b \n":"+D" (src), "+a" (dest), "+S" (n)
1.238 + :"c" (p));
1.239 + asm volatile ("emms");
1.240 +}
1.241 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
1.242 +#endif
1.243 +
1.244 +#ifdef ENABLE_BROKEN_IMPLS
1.245 +static void
1.246 +abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.247 +{
1.248 + while (n & 7) {
1.249 + *dest = ABS (*src);
1.250 + OIL_INCREMENT (dest, dstr);
1.251 + OIL_INCREMENT (src, sstr);
1.252 + n--;
1.253 + }
1.254 + n /= 8;
1.255 + __asm__ __volatile__ ("\n"
1.256 + " pushl %%ebp \n"
1.257 + " movl %%eax, %%ebp \n"
1.258 + " .p2align 4,,15 \n"
1.259 + "1: movq (%%edi), %%mm0 \n"
1.260 + " pxor %%mm1, %%mm1 \n"
1.261 + " movq 8(%%edi), %%mm2 \n"
1.262 + " addl $16, %%edi \n"
1.263 + " psubw %%mm0, %%mm1 \n"
1.264 + " pxor %%mm3, %%mm3 \n"
1.265 + " pmaxsw %%mm0, %%mm1 \n"
1.266 + " psubw %%mm2, %%mm3 \n"
1.267 + " movq %%mm1, (%%ebp) \n"
1.268 + " pmaxsw %%mm2, %%mm3 \n"
1.269 + " movq %%mm3, 8(%%ebp) \n"
1.270 + " addl $16, %%ebp \n"
1.271 + " decl %2 \n"
1.272 + " testl %2, %2 \n"
1.273 + " jg 1b \n"
1.274 + " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n)
1.275 + ::"ecx", "edx");
1.276 + asm volatile ("emms");
1.277 +}
1.278 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx2, abs_u16_s16, OIL_IMPL_FLAG_MMXEXT);
1.279 +#endif
1.280 +
1.281 +#ifdef ENABLE_BROKEN_IMPLS
1.282 +static void
1.283 +abs_u16_s16_sse2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
1.284 +{
1.285 + while (n & 7) {
1.286 + *dest = ABS (*src);
1.287 + OIL_INCREMENT (dest, dstr);
1.288 + OIL_INCREMENT (src, sstr);
1.289 + n--;
1.290 + }
1.291 + n /= 8;
1.292 + __asm__ __volatile__ ("\n"
1.293 + " pushl %%ebp \n"
1.294 + " movl %%eax, %%ebp \n"
1.295 + " .p2align 4,,15 \n"
1.296 + "1: movq (%%edi), %%xmm0 \n"
1.297 + " addl $16, %%edi \n"
1.298 + " pxor %%xmm1, %%xmm1 \n"
1.299 + " psubw %%xmm0, %%xmm1 \n"
1.300 + " pmaxsw %%xmm0, %%xmm1 \n"
1.301 + " movq %%xmm1, (%%ebp) \n"
1.302 + " addl $16, %%ebp \n"
1.303 + " decl %2 \n"
1.304 + " testl %2, %2 \n"
1.305 + " jg 1b \n"
1.306 + " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n)
1.307 + ::"ecx", "edx");
1.308 +}
1.309 +OIL_DEFINE_IMPL_FULL (abs_u16_s16_sse2, abs_u16_s16, OIL_IMPL_FLAG_SSE2);
1.310 +#endif
1.311 +
1.312 +
1.313 +
1.314 +#ifdef __SYMBIAN32__
1.315 +
1.316 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm, abs_u16_s16() {
1.317 + return &_oil_function_impl_abs_u16_s16_i386asm, abs_u16_s16;
1.318 +}
1.319 +#endif
1.320 +
1.321 +#ifdef __SYMBIAN32__
1.322 +
1.323 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm_uber4, abs_u16_s16() {
1.324 + return &_oil_function_impl_abs_u16_s16_i386asm_uber4, abs_u16_s16;
1.325 +}
1.326 +#endif
1.327 +
1.328 +#ifdef __SYMBIAN32__
1.329 +
1.330 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm2, abs_u16_s16() {
1.331 + return &_oil_function_impl_abs_u16_s16_i386asm2, abs_u16_s16;
1.332 +}
1.333 +#endif
1.334 +
1.335 +#ifdef __SYMBIAN32__
1.336 +
1.337 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmx, abs_u16_s16() {
1.338 + return &_oil_function_impl_abs_u16_s16_mmx, abs_u16_s16;
1.339 +}
1.340 +#endif
1.341 +
1.342 +#ifdef __SYMBIAN32__
1.343 +
1.344 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmxx, abs_u16_s16() {
1.345 + return &_oil_function_impl_abs_u16_s16_mmxx, abs_u16_s16;
1.346 +}
1.347 +#endif
1.348 +
1.349 +#ifdef __SYMBIAN32__
1.350 +
1.351 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_mmx2, abs_u16_s16() {
1.352 + return &_oil_function_impl_abs_u16_s16_mmx2, abs_u16_s16;
1.353 +}
1.354 +#endif
1.355 +
1.356 +#ifdef __SYMBIAN32__
1.357 +
1.358 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_sse2, abs_u16_s16() {
1.359 + return &_oil_function_impl_abs_u16_s16_sse2, abs_u16_s16;
1.360 +}
1.361 +#endif
1.362 +
1.363 +
1.364 +
1.365 +#ifdef __SYMBIAN32__
1.366 +
1.367 +OilFunctionImpl* __oil_function_impl_abs_u16_s16_i386asm3() {
1.368 + return &_oil_function_impl_abs_u16_s16_i386asm3;
1.369 +}
1.370 +#endif
1.371 +