os/ossrv/genericopenlibs/liboil/src/i386/trans8x8_i386.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/i386/trans8x8_i386.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,267 @@
     1.4 +/*
     1.5 + * LIBOIL - Library of Optimized Inner Loops
     1.6 + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
     1.7 + * All rights reserved.
     1.8 + *
     1.9 + * Redistribution and use in source and binary forms, with or without
    1.10 + * modification, are permitted provided that the following conditions
    1.11 + * are met:
    1.12 + * 1. Redistributions of source code must retain the above copyright
    1.13 + *    notice, this list of conditions and the following disclaimer.
    1.14 + * 2. Redistributions in binary form must reproduce the above copyright
    1.15 + *    notice, this list of conditions and the following disclaimer in the
    1.16 + *    documentation and/or other materials provided with the distribution.
    1.17 + * 
    1.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    1.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    1.20 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.21 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
    1.22 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    1.23 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    1.25 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    1.26 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
    1.27 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.28 + * POSSIBILITY OF SUCH DAMAGE.
    1.29 + */
    1.30 +//Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    1.31 +
    1.32 +#ifdef HAVE_CONFIG_H
    1.33 +#include "config.h"
    1.34 +#endif
    1.35 +
    1.36 +#include <liboil/liboilfunction.h>
    1.37 +#include <math.h>
    1.38 +
    1.39 +OIL_DECLARE_CLASS(trans8x8_u16);
    1.40 +
    1.41 +/* this could use additional work. */
    1.42 +static void
    1.43 +trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
    1.44 +{
    1.45 +#if !defined(__WINSCW__) && !defined(__WINS__)      
    1.46 +  asm volatile (
    1.47 +      "  leal (%3,%3,2),%%eax \n"         // UBER 0:
    1.48 +      "  movq (%1), %%mm0 \n"             // UBER 1:
    1.49 +      "  movq (%1,%3,2), %%mm2 \n"        // UBER 2: 
    1.50 +      "  movq %%mm0, %%mm4 \n"            // UBER 3: 1
    1.51 +      "  movq %%mm2, %%mm5 \n"            // UBER 4: 2
    1.52 +      "  punpcklwd (%1,%3), %%mm0 \n"     // UBER 5: 1
    1.53 +      "  punpcklwd (%1,%%eax), %%mm2 \n"  // UBER 6: 0 2
    1.54 +      "  punpckhwd (%1,%3), %%mm4 \n"     // UBER 7: 3
    1.55 +      "  punpckhwd (%1,%%eax), %%mm5 \n"  // UBER 8: 4
    1.56 +      "  movq %%mm0, %%mm1 \n"            // UBER 9: 5
    1.57 +      "  movq %%mm4, %%mm3 \n"            // UBER 10: 7
    1.58 +      "  punpckldq %%mm2, %%mm0 \n"       // UBER 11: 5 6
    1.59 +      "  punpckldq %%mm5, %%mm4 \n"       // UBER 12: 7 8
    1.60 +      "  punpckhdq %%mm2, %%mm1 \n"       // UBER 13: 6 9
    1.61 +      "  punpckhdq %%mm5, %%mm3 \n"       // UBER 14: 9 10
    1.62 +      "  leal (%2,%2,2),%%eax \n"         // UBER 15: 8
    1.63 +      "  movq %%mm0, 0(%0) \n"            // UBER 16: 11
    1.64 +      "  movq %%mm1, (%0,%2) \n"          // UBER 17: 13
    1.65 +      "  movq %%mm4, (%0,%2,2) \n"        // UBER 18: 12
    1.66 +      "  movq %%mm3, (%0,%%eax) \n"       // UBER 19: 14 15
    1.67 +
    1.68 +      "  leal (%3,%3,2),%%eax \n"
    1.69 +      "  movq 8(%1), %%mm0 \n"
    1.70 +      "  movq 8(%1,%3,2), %%mm2 \n"
    1.71 +      "  movq %%mm0, %%mm4 \n"
    1.72 +      "  movq %%mm2, %%mm5 \n"
    1.73 +      "  punpcklwd 8(%1,%3), %%mm0 \n"
    1.74 +      "  punpcklwd 8(%1,%%eax), %%mm2 \n"
    1.75 +      "  punpckhwd 8(%1,%3), %%mm4 \n"
    1.76 +      "  punpckhwd 8(%1,%%eax), %%mm5 \n"
    1.77 +      "  movq %%mm0, %%mm1 \n"
    1.78 +      "  movq %%mm4, %%mm3 \n"
    1.79 +      "  punpckldq %%mm2, %%mm0 \n"
    1.80 +      "  punpckldq %%mm5, %%mm4 \n"
    1.81 +      "  punpckhdq %%mm2, %%mm1 \n"
    1.82 +      "  punpckhdq %%mm5, %%mm3 \n"
    1.83 +      "  leal (%2,%2,2),%%eax \n"
    1.84 +      "  leal (%0,%2,4),%0 \n"
    1.85 +      "  movq %%mm0, 0(%0) \n"
    1.86 +      "  movq %%mm1, (%0,%2) \n"
    1.87 +      "  movq %%mm4, (%0,%2,2) \n"
    1.88 +      "  movq %%mm3, (%0,%%eax) \n"
    1.89 +
    1.90 +      "  leal (%1,%3,4),%1 \n"
    1.91 +      "  leal (%3,%3,2),%%eax \n"
    1.92 +      "  movq 0(%1), %%mm0 \n"
    1.93 +      "  movq 0(%1,%3,2), %%mm2 \n"
    1.94 +      "  movq %%mm0, %%mm4 \n"
    1.95 +      "  movq %%mm2, %%mm5 \n"
    1.96 +      "  punpcklwd 0(%1,%3), %%mm0 \n"
    1.97 +      "  punpcklwd 0(%1,%%eax), %%mm2 \n"
    1.98 +      "  punpckhwd 0(%1,%3), %%mm4 \n"
    1.99 +      "  punpckhwd 0(%1,%%eax), %%mm5 \n"
   1.100 +      "  movq %%mm0, %%mm1 \n"
   1.101 +      "  movq %%mm4, %%mm3 \n"
   1.102 +      "  punpckldq %%mm2, %%mm0 \n"
   1.103 +      "  punpckldq %%mm5, %%mm4 \n"
   1.104 +      "  punpckhdq %%mm2, %%mm1 \n"
   1.105 +      "  punpckhdq %%mm5, %%mm3 \n"
   1.106 +      "  leal (%2,%2,2),%%eax \n"
   1.107 +      "  neg %2 \n"
   1.108 +      "  leal (%0,%2,4),%0 \n"
   1.109 +      "  neg %2 \n"
   1.110 +      "  movq %%mm0, 8(%0) \n"
   1.111 +      "  movq %%mm1, 8(%0,%2) \n"
   1.112 +      "  movq %%mm4, 8(%0,%2,2) \n"
   1.113 +      "  movq %%mm3, 8(%0,%%eax) \n"
   1.114 +
   1.115 +      "  leal (%3,%3,2),%%eax \n"
   1.116 +      "  movq 8(%1), %%mm0 \n"
   1.117 +      "  movq 8(%1,%3,2), %%mm2 \n"
   1.118 +      "  movq %%mm0, %%mm4 \n"
   1.119 +      "  movq %%mm2, %%mm5 \n"
   1.120 +      "  punpcklwd 8(%1,%3), %%mm0 \n"
   1.121 +      "  punpcklwd 8(%1,%%eax), %%mm2 \n"
   1.122 +      "  punpckhwd 8(%1,%3), %%mm4 \n"
   1.123 +      "  punpckhwd 8(%1,%%eax), %%mm5 \n"
   1.124 +      "  movq %%mm0, %%mm1 \n"
   1.125 +      "  movq %%mm4, %%mm3 \n"
   1.126 +      "  punpckldq %%mm2, %%mm0 \n"
   1.127 +      "  punpckldq %%mm5, %%mm4 \n"
   1.128 +      "  punpckhdq %%mm2, %%mm1 \n"
   1.129 +      "  punpckhdq %%mm5, %%mm3 \n"
   1.130 +      "  leal (%2,%2,2),%%eax \n"
   1.131 +      "  leal (%0,%2,4),%0 \n"
   1.132 +      "  movq %%mm0, 8(%0) \n"
   1.133 +      "  movq %%mm1, 8(%0,%2) \n"
   1.134 +      "  movq %%mm4, 8(%0,%2,2) \n"
   1.135 +      "  movq %%mm3, 8(%0,%%eax) \n"
   1.136 +      "  emms \n"
   1.137 +      : "+r" (dest), "+r" (src), "+r" (dstr), "+r" (sstr)
   1.138 +      :
   1.139 +      : "eax");
   1.140 +#endif
   1.141 +}
   1.142 +OIL_DEFINE_IMPL_FULL (trans8x8_u16_mmx, trans8x8_u16, OIL_IMPL_FLAG_MMX);
   1.143 +
   1.144 +static void
   1.145 +trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
   1.146 +{
   1.147 +#if !defined(__WINSCW__) && !defined(__WINS__)      
   1.148 +  int saved_ebx = 0;
   1.149 +  asm (
   1.150 +      "  movl %%ebx, %4 \n"
   1.151 +      "  movl %0, %%ecx \n"
   1.152 +      "  movl %2, %%ebx \n"
   1.153 +      "  movl %1, %%edx \n"
   1.154 +      "  lea (%%ecx,%%edx,8), %%esi \n"
   1.155 +      "  sub %%edx, %%esi\n "
   1.156 +      "  movl $7, %%edi \n"
   1.157 +      "1: \n"
   1.158 +
   1.159 +      "  mov (%%ebx), %%ax \n"
   1.160 +      "  mov %%ax,(%%ecx) \n"
   1.161 +      "  mov 2(%%ebx), %%ax \n"
   1.162 +      "  mov %%ax,(%%ecx,%%edx,1) \n"
   1.163 +      "  mov 4(%%ebx), %%ax \n"
   1.164 +      "  mov %%ax,(%%ecx,%%edx,2) \n"
   1.165 +      "  mov 8(%%ebx), %%ax \n"
   1.166 +      "  mov %%ax,(%%ecx,%%edx,4) \n"
   1.167 +
   1.168 +      "  neg %%edx \n"
   1.169 +
   1.170 +      "  mov 6(%%ebx), %%ax \n"
   1.171 +      "  mov %%ax,(%%esi,%%edx,4) \n"
   1.172 +      "  mov 10(%%ebx), %%ax \n"
   1.173 +      "  mov %%ax,(%%esi,%%edx,2) \n"
   1.174 +      "  mov 12(%%ebx), %%ax \n"
   1.175 +      "  mov %%ax,(%%esi,%%edx,1) \n"
   1.176 +      "  mov 14(%%ebx), %%ax \n"
   1.177 +      "  mov %%ax,(%%esi) \n"
   1.178 +
   1.179 +      "  neg %%edx \n"
   1.180 +      "  add %3, %%ebx \n"
   1.181 +      "  add $2, %%ecx \n"
   1.182 +      "  add $2, %%esi \n"
   1.183 +
   1.184 +      "  dec %%edi \n"
   1.185 +      "  jge 1b \n"
   1.186 +      "  movl %4, %%ebx \n"
   1.187 +      :
   1.188 +      : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (saved_ebx)
   1.189 +      : "eax", "ecx", "edx", "esi", "edi");
   1.190 +#endif
   1.191 +}
   1.192 +OIL_DEFINE_IMPL (trans8x8_u16_asm1, trans8x8_u16);
   1.193 +
   1.194 +static void
   1.195 +trans8x8_u16_asm2 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
   1.196 +{
   1.197 +#if !defined(__WINSCW__) && !defined(__WINS__)      
   1.198 +  int i;
   1.199 +  int saved_ebx = 0;
   1.200 +  asm (
   1.201 +      "  movl %%ebx, %5 \n"
   1.202 +      "  movl %0, %%ecx \n"
   1.203 +      "  movl %2, %%ebx \n"
   1.204 +      "  movl %1, %%edx \n"
   1.205 +      "  lea (%%ecx,%%edx,8), %%esi \n"
   1.206 +      "  sub %%edx, %%esi\n "
   1.207 +      "  movl $7, %4 \n"
   1.208 +      "  movl %%edx, %%edi \n"
   1.209 +      "  negl %%edi \n"
   1.210 +      "1: \n"
   1.211 +
   1.212 +      "  movl (%%ebx), %%eax \n"
   1.213 +      "  mov %%ax,(%%ecx) \n"
   1.214 +      "  shr $16, %%eax \n"
   1.215 +      "  mov %%ax,(%%ecx,%%edx,1) \n"
   1.216 +
   1.217 +      "  movl 4(%%ebx), %%eax \n"
   1.218 +      "  mov %%ax,(%%ecx,%%edx,2) \n"
   1.219 +      "  shr $16, %%eax \n"
   1.220 +      "  mov %%ax,(%%esi,%%edi,4) \n"
   1.221 +
   1.222 +      "  movl 8(%%ebx), %%eax \n"
   1.223 +      "  mov %%ax,(%%ecx,%%edx,4) \n"
   1.224 +      "  shr $16, %%eax \n"
   1.225 +      "  mov %%ax,(%%esi,%%edi,2) \n"
   1.226 +
   1.227 +      "  movl 12(%%ebx), %%eax \n"
   1.228 +      "  mov %%ax,(%%esi,%%edi,1) \n"
   1.229 +      "  shr $16, %%eax \n"
   1.230 +      "  mov %%ax,(%%esi) \n"
   1.231 +
   1.232 +      "  add %3, %%ebx \n"
   1.233 +      "  add $2, %%ecx \n"
   1.234 +      "  add $2, %%esi \n"
   1.235 +
   1.236 +      "  decl %4 \n"
   1.237 +      "  jge 1b \n"
   1.238 +      "  movl %5, %%ebx \n"
   1.239 +      :
   1.240 +      : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (i), "m" (saved_ebx)
   1.241 +      : "eax", "ecx", "edx", "esi", "edi");
   1.242 +#endif
   1.243 +}
   1.244 +OIL_DEFINE_IMPL (trans8x8_u16_asm2, trans8x8_u16);
   1.245 +
   1.246 +
   1.247 +
   1.248 +#ifdef	__SYMBIAN32__
   1.249 + 
   1.250 +OilFunctionImpl* __oil_function_impl_trans8x8_u16_mmx, trans8x8_u16() {
   1.251 +		return &_oil_function_impl_trans8x8_u16_mmx, trans8x8_u16;
   1.252 +}
   1.253 +#endif
   1.254 +
   1.255 +
   1.256 +
   1.257 +#ifdef	__SYMBIAN32__
   1.258 + 
   1.259 +OilFunctionImpl* __oil_function_impl_trans8x8_u16_asm1() {
   1.260 +		return &_oil_function_impl_trans8x8_u16_asm1;
   1.261 +}
   1.262 +#endif
   1.263 +
   1.264 +#ifdef	__SYMBIAN32__
   1.265 + 
   1.266 +OilFunctionImpl* __oil_function_impl_trans8x8_u16_asm2() {
   1.267 +		return &_oil_function_impl_trans8x8_u16_asm2;
   1.268 +}
   1.269 +#endif
   1.270 +