1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/i386/composite_i386.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1147 @@
1.4 +/*
1.5 + * LIBOIL - Library of Optimized Inner Loops
1.6 + * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
1.7 + * All rights reserved.
1.8 + *
1.9 + * Redistribution and use in source and binary forms, with or without
1.10 + * modification, are permitted provided that the following conditions
1.11 + * are met:
1.12 + * 1. Redistributions of source code must retain the above copyright
1.13 + * notice, this list of conditions and the following disclaimer.
1.14 + * 2. Redistributions in binary form must reproduce the above copyright
1.15 + * notice, this list of conditions and the following disclaimer in the
1.16 + * documentation and/or other materials provided with the distribution.
1.17 + *
1.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1.20 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.21 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1.22 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1.23 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.25 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1.26 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1.27 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.28 + * POSSIBILITY OF SUCH DAMAGE.
1.29 + */
1.30 +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.31 +
1.32 +#ifdef HAVE_CONFIG_H
1.33 +#include "config.h"
1.34 +#endif
1.35 +
1.36 +#include <liboil/liboil.h>
1.37 +#include <liboil/liboilfunction.h>
1.38 +
1.39 +OIL_DECLARE_CLASS (composite_in_argb);
1.40 +OIL_DECLARE_CLASS (composite_in_argb_const_src);
1.41 +OIL_DECLARE_CLASS (composite_in_argb_const_mask);
1.42 +OIL_DECLARE_CLASS (composite_over_argb);
1.43 +OIL_DECLARE_CLASS (composite_over_argb_const_src);
1.44 +OIL_DECLARE_CLASS (composite_add_argb);
1.45 +OIL_DECLARE_CLASS (composite_add_argb_const_src);
1.46 +OIL_DECLARE_CLASS (composite_in_over_argb);
1.47 +OIL_DECLARE_CLASS (composite_in_over_argb_const_src);
1.48 +OIL_DECLARE_CLASS (composite_in_over_argb_const_mask);
1.49 +
1.50 +#if 0
1.51 +static void
1.52 +composite_in_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
1.53 +{
1.54 + int i;
1.55 +
1.56 + for(i=0;i<n;i++){
1.57 + dest[i] = ARGB(
1.58 + COMPOSITE_IN(ARGB_A(src[i]), mask[i]),
1.59 + COMPOSITE_IN(ARGB_R(src[i]), mask[i]),
1.60 + COMPOSITE_IN(ARGB_G(src[i]), mask[i]),
1.61 + COMPOSITE_IN(ARGB_B(src[i]), mask[i]));
1.62 + }
1.63 +}
1.64 +OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb);
1.65 +#endif
1.66 +
1.67 +/*
1.68 + * This macro loads the constants:
1.69 + * mm7 = { 0, 0, 0, 0 }
1.70 + * mm6 = { 128, 128, 128, 128 }
1.71 + * mm5 = { 255, 255, 255, 255 }
1.72 + */
1.73 +#define MMX_LOAD_CONSTANTS \
1.74 + " pxor %%mm7, %%mm7\n" \
1.75 + " movl $0x80808080, %%eax\n" \
1.76 + " movd %%eax, %%mm6\n" \
1.77 + " punpcklbw %%mm7, %%mm6\n" \
1.78 + " movl $0xffffffff, %%eax\n" \
1.79 + " movd %%eax, %%mm5\n" \
1.80 + " punpcklbw %%mm7, %%mm5\n"
1.81 +
1.82 +/*
1.83 + * a = muldiv255(a, b)
1.84 + * a, b are unpacked
1.85 + * destroys both registers
1.86 + * requires mm6 set up as above
1.87 + */
1.88 +#define MMX_MULDIV255(a,b) \
1.89 + " pmullw %%" #b ", %%" #a "\n" \
1.90 + " paddw %%mm6, %%" #a "\n" \
1.91 + " movq %%" #a ", %%" #b "\n" \
1.92 + " psrlw $8, %%" #b "\n" \
1.93 + " paddw %%" #b ", %%" #a "\n" \
1.94 + " psrlw $8, %%" #a "\n"
1.95 +
1.96 +static void
1.97 +composite_in_argb_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
1.98 +{
1.99 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.100 + __asm__ __volatile__ (
1.101 + MMX_LOAD_CONSTANTS
1.102 + "1:\n"
1.103 + " movd (%2), %%mm0\n"
1.104 + " punpcklbw %%mm7, %%mm0\n"
1.105 + " pshufw $0x00, %%mm0, %%mm1\n"
1.106 +
1.107 + " movd (%1), %%mm2\n"
1.108 + " punpcklbw %%mm7, %%mm2\n"
1.109 +
1.110 + MMX_MULDIV255(mm2, mm1)
1.111 +
1.112 + " packuswb %%mm2, %%mm2\n"
1.113 + " movd %%mm2, (%0)\n"
1.114 + " addl $4, %0\n"
1.115 + " addl $4, %1\n"
1.116 + " addl $1, %2\n"
1.117 + " decl %3\n"
1.118 + " jnz 1b\n"
1.119 + " emms\n"
1.120 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.121 + :
1.122 + :"eax");
1.123 +#endif
1.124 +}
1.125 +OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.126 +
1.127 +/*
1.128 + * This is a different style than the others. Should be moved elsewhere.
1.129 + */
1.130 +static void
1.131 +composite_in_argb_mmx2 (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
1.132 +{
1.133 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.134 + __asm__ __volatile__ (
1.135 + MMX_LOAD_CONSTANTS
1.136 + "1:\n"
1.137 + " movl (%2), %%eax\n"
1.138 + /* if alpha == 0, write a 0 */
1.139 + " testl $0x000000ff, %%eax\n"
1.140 + " je 2f\n"
1.141 + /* if alpha == 0xff, write src value */
1.142 + " cmp $0xff, %%al\n"
1.143 + " je 3f\n"
1.144 +
1.145 + " movd %%eax, %%mm0\n"
1.146 + " punpcklbw %%mm7, %%mm0\n"
1.147 + " pshufw $0x00, %%mm0, %%mm1\n"
1.148 +
1.149 + " movd (%1), %%mm2\n"
1.150 + " punpcklbw %%mm7, %%mm2\n"
1.151 +
1.152 + MMX_MULDIV255(mm2, mm1)
1.153 +
1.154 + " packuswb %%mm2, %%mm2\n"
1.155 + " movd %%mm2, (%0)\n"
1.156 + " jmp 4f\n"
1.157 + "2:\n"
1.158 + " movl $0, (%0)\n"
1.159 + " jmp 4f\n"
1.160 + "3:\n"
1.161 + " movl (%1), %%eax\n"
1.162 + " movl %%eax, (%0)\n"
1.163 + "4:\n"
1.164 + " addl $4, %0\n"
1.165 + " addl $4, %1\n"
1.166 + " addl $1, %2\n"
1.167 + " decl %3\n"
1.168 + " jnz 1b\n"
1.169 + " emms\n"
1.170 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.171 + :
1.172 + :"eax");
1.173 +#endif
1.174 +}
1.175 +OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx2, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.176 +
1.177 +static void
1.178 +composite_in_argb_const_src_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
1.179 +{
1.180 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.181 + __asm__ __volatile__ (
1.182 + MMX_LOAD_CONSTANTS
1.183 + " movd (%1), %%mm3\n"
1.184 + " punpcklbw %%mm7, %%mm3\n"
1.185 + "1:\n"
1.186 + " movd (%2), %%mm0\n"
1.187 + " punpcklbw %%mm7, %%mm0\n"
1.188 + " pshufw $0x00, %%mm0, %%mm1\n"
1.189 +
1.190 + " movq %%mm3, %%mm2\n"
1.191 +
1.192 + MMX_MULDIV255(mm2, mm1)
1.193 +
1.194 + " packuswb %%mm2, %%mm2\n"
1.195 + " movd %%mm2, (%0)\n"
1.196 + " addl $4, %0\n"
1.197 + " addl $1, %2\n"
1.198 + " decl %3\n"
1.199 + " jnz 1b\n"
1.200 + " emms\n"
1.201 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.202 + :
1.203 + :"eax");
1.204 +#endif
1.205 +}
1.206 +OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_mmx, composite_in_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.207 +
1.208 +static void
1.209 +composite_in_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
1.210 +{
1.211 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.212 + __asm__ __volatile__ (
1.213 + MMX_LOAD_CONSTANTS
1.214 + " movd (%2), %%mm0\n"
1.215 + " punpcklbw %%mm7, %%mm0\n"
1.216 + " pshufw $0x00, %%mm0, %%mm3\n"
1.217 + "1:\n"
1.218 + " movq %%mm3, %%mm1\n"
1.219 + " movd (%1), %%mm2\n"
1.220 + " punpcklbw %%mm7, %%mm2\n"
1.221 +
1.222 + MMX_MULDIV255(mm2, mm1)
1.223 +
1.224 + " packuswb %%mm2, %%mm2\n"
1.225 + " movd %%mm2, (%0)\n"
1.226 + " addl $4, %0\n"
1.227 + " addl $4, %1\n"
1.228 + " decl %3\n"
1.229 + " jnz 1b\n"
1.230 + " emms\n"
1.231 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.232 + :
1.233 + :"eax");
1.234 +#endif
1.235 +}
1.236 +OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_mmx, composite_in_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.237 +
1.238 +static void
1.239 +composite_over_argb_mmx (uint32_t *dest, uint32_t *src, int n)
1.240 +{
1.241 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.242 + __asm__ __volatile__ (
1.243 + MMX_LOAD_CONSTANTS
1.244 + "1:\n"
1.245 + " movl (%1), %%eax\n"
1.246 + " testl $0xff000000, %%eax\n"
1.247 + " jz 2f\n"
1.248 +
1.249 + " movd %%eax, %%mm0\n"
1.250 + " punpcklbw %%mm7, %%mm0\n"
1.251 + " pshufw $0xff, %%mm0, %%mm1\n"
1.252 + " pxor %%mm5, %%mm1\n"
1.253 +
1.254 + " movd (%0), %%mm2\n"
1.255 + " punpcklbw %%mm7, %%mm2\n"
1.256 +
1.257 + MMX_MULDIV255(mm2, mm1)
1.258 +
1.259 + " paddw %%mm0, %%mm2\n"
1.260 + " packuswb %%mm2, %%mm2\n"
1.261 +
1.262 + " movd %%mm2, (%0)\n"
1.263 + "2:\n"
1.264 + " addl $4, %0\n"
1.265 + " addl $4, %1\n"
1.266 + " decl %2\n"
1.267 + " jnz 1b\n"
1.268 + " emms\n"
1.269 + :"+r" (dest), "+r" (src), "+r" (n)
1.270 + :
1.271 + :"eax");
1.272 +#endif
1.273 +}
1.274 +OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.275 +
1.276 +/* unroll 2 */
1.277 +static void
1.278 +composite_over_argb_mmx_2 (uint32_t *dest, uint32_t *src, int n)
1.279 +{
1.280 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.281 + __asm__ __volatile__ (
1.282 + MMX_LOAD_CONSTANTS
1.283 +
1.284 + " testl $0x1, %2\n"
1.285 + " jz 2f\n"
1.286 +
1.287 + " movl (%1), %%eax\n"
1.288 + " testl $0xff000000, %%eax\n"
1.289 + " jz 1f\n"
1.290 +
1.291 + " movd %%eax, %%mm0\n"
1.292 + " punpcklbw %%mm7, %%mm0\n"
1.293 + " pshufw $0xff, %%mm0, %%mm1\n"
1.294 + " pxor %%mm5, %%mm1\n"
1.295 +
1.296 + " movd (%0), %%mm2\n"
1.297 + " punpcklbw %%mm7, %%mm2\n"
1.298 + " pmullw %%mm1, %%mm2\n"
1.299 + " paddw %%mm6, %%mm2\n"
1.300 + " movq %%mm2, %%mm1\n"
1.301 + " psrlw $8, %%mm1\n"
1.302 + " paddw %%mm1, %%mm2\n"
1.303 + " psrlw $8, %%mm2\n"
1.304 +
1.305 + " paddw %%mm0, %%mm2\n"
1.306 + " packuswb %%mm2, %%mm2\n"
1.307 +
1.308 + " movd %%mm2, (%0)\n"
1.309 +
1.310 + "1:\n"
1.311 + " addl $4, %0\n"
1.312 + " addl $4, %1\n"
1.313 +
1.314 + "2:\n"
1.315 + " shr $1, %2\n"
1.316 + " jz 5f\n"
1.317 + "3:\n"
1.318 + " movl (%1), %%eax\n"
1.319 + " orl 4(%1), %%eax\n"
1.320 + " testl $0xff000000, %%eax\n"
1.321 + " jz 4f\n"
1.322 +
1.323 + " movd (%1), %%mm0\n"
1.324 + " movd (%0), %%mm2\n"
1.325 +
1.326 + " punpcklbw %%mm7, %%mm0\n"
1.327 + " movd 4(%1), %%mm3\n"
1.328 +
1.329 + " pshufw $0xff, %%mm0, %%mm1\n"
1.330 + " punpcklbw %%mm7, %%mm2\n"
1.331 +
1.332 + " pxor %%mm5, %%mm1\n"
1.333 + " movd 4(%0), %%mm4\n"
1.334 +
1.335 + " pmullw %%mm1, %%mm2\n"
1.336 + " punpcklbw %%mm7, %%mm3\n"
1.337 +
1.338 + " paddw %%mm6, %%mm2\n"
1.339 + " punpcklbw %%mm7, %%mm4\n"
1.340 +
1.341 + " movq %%mm2, %%mm1\n"
1.342 + " pshufw $0xff, %%mm3, %%mm7\n"
1.343 +
1.344 + " psrlw $8, %%mm1\n"
1.345 + " pxor %%mm5, %%mm7\n"
1.346 +
1.347 + " paddw %%mm1, %%mm2\n"
1.348 + " pmullw %%mm7, %%mm4\n"
1.349 +
1.350 + " psrlw $8, %%mm2\n"
1.351 + " paddw %%mm6, %%mm4\n"
1.352 +
1.353 + " paddw %%mm0, %%mm2\n"
1.354 + " movq %%mm4, %%mm7\n"
1.355 +
1.356 + " packuswb %%mm2, %%mm2\n"
1.357 + " psrlw $8, %%mm7\n"
1.358 +
1.359 + " movd %%mm2, (%0)\n"
1.360 + " paddw %%mm7, %%mm4\n"
1.361 +
1.362 + " psrlw $8, %%mm4\n"
1.363 + " paddw %%mm3, %%mm4\n"
1.364 + " packuswb %%mm4, %%mm4\n"
1.365 + " movd %%mm4, 4(%0)\n"
1.366 +
1.367 + " pxor %%mm7, %%mm7\n"
1.368 + "4:\n"
1.369 + " addl $8, %0\n"
1.370 + " addl $8, %1\n"
1.371 + " decl %2\n"
1.372 + " jnz 3b\n"
1.373 + "5:\n"
1.374 + " emms\n"
1.375 + :"+r" (dest), "+r" (src), "+r" (n)
1.376 + :
1.377 + :"eax");
1.378 +#endif
1.379 +}
1.380 +OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_2, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.381 +
1.382 +/* replace pshufw with punpck */
1.383 +static void
1.384 +composite_over_argb_mmx_3 (uint32_t *dest, uint32_t *src, int n)
1.385 +{
1.386 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.387 + __asm__ __volatile__ (
1.388 + MMX_LOAD_CONSTANTS
1.389 + "1:\n"
1.390 + " movl (%1), %%eax\n"
1.391 + " testl $0xff000000, %%eax\n"
1.392 + " jz 2f\n"
1.393 +
1.394 + " movd %%eax, %%mm0\n"
1.395 + " punpcklbw %%mm7, %%mm0\n"
1.396 + " movq %%mm0, %%mm1\n"
1.397 + " punpckhwd %%mm1, %%mm1\n"
1.398 + " punpckhdq %%mm1, %%mm1\n"
1.399 + " pxor %%mm5, %%mm1\n"
1.400 +
1.401 + " movd (%0), %%mm2\n"
1.402 + " punpcklbw %%mm7, %%mm2\n"
1.403 + " pmullw %%mm1, %%mm2\n"
1.404 + " paddw %%mm6, %%mm2\n"
1.405 + " movq %%mm2, %%mm1\n"
1.406 + " psrlw $8, %%mm1\n"
1.407 + " paddw %%mm1, %%mm2\n"
1.408 + " psrlw $8, %%mm2\n"
1.409 +
1.410 + " paddw %%mm0, %%mm2\n"
1.411 + " packuswb %%mm2, %%mm2\n"
1.412 + " movd %%mm2, (%0)\n"
1.413 +
1.414 + "2:\n"
1.415 + " addl $4, %0\n"
1.416 + " addl $4, %1\n"
1.417 + " decl %2\n"
1.418 + " jnz 1b\n"
1.419 + " emms\n"
1.420 + :"+r" (dest), "+r" (src), "+r" (n)
1.421 + :
1.422 + :"eax");
1.423 +#endif
1.424 +}
1.425 +OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_3, composite_over_argb, OIL_IMPL_FLAG_MMX);
1.426 +
1.427 +/* written for gromit */
1.428 +static void
1.429 +composite_over_argb_mmx_4 (uint32_t *dest, uint32_t *src, int n)
1.430 +{
1.431 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.432 + __asm__ __volatile__ (" pxor %%mm7, %%mm7\n" // mm7 = { 0, 0, 0, 0 }
1.433 + " movl $0x80808080, %%eax\n"
1.434 + " movd %%eax, %%mm6\n" // mm6 = { 128, 128, 128, 128 }
1.435 + " punpcklbw %%mm7, %%mm6\n"
1.436 + " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
1.437 + " movd %%eax, %%mm5\n"
1.438 + " punpcklbw %%mm7, %%mm5\n"
1.439 + " movl $0x02020202, %%eax\n"
1.440 + " movd %%eax, %%mm4\n"
1.441 + " punpcklbw %%mm7, %%mm4\n"
1.442 + " paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
1.443 + "1:\n"
1.444 + " movl (%1), %%eax\n"
1.445 + " testl $0xff000000, %%eax\n"
1.446 + " jz 2f\n"
1.447 +
1.448 + " movd %%eax, %%mm0\n"
1.449 + " punpcklbw %%mm7, %%mm0\n"
1.450 + " pshufw $0xff, %%mm0, %%mm1\n"
1.451 + " pxor %%mm5, %%mm1\n"
1.452 +
1.453 + " movd (%0), %%mm2\n"
1.454 + " punpcklbw %%mm7, %%mm2\n"
1.455 + " pmullw %%mm1, %%mm2\n"
1.456 + " paddw %%mm6, %%mm2\n"
1.457 + " pmulhuw %%mm4, %%mm2\n"
1.458 +
1.459 + " paddw %%mm0, %%mm2\n"
1.460 + " packuswb %%mm2, %%mm2\n"
1.461 +
1.462 + " movd %%mm2, (%0)\n"
1.463 + "2:\n"
1.464 + " addl $4, %0\n"
1.465 + " addl $4, %1\n"
1.466 + " subl $1, %2\n"
1.467 + " jnz 1b\n"
1.468 + " emms\n"
1.469 + :"+r" (dest), "+r" (src), "+r" (n)
1.470 + :
1.471 + :"eax");
1.472 +#endif
1.473 +}
1.474 +OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_4, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.475 +
1.476 +static void
1.477 +composite_over_argb_mmx_5 (uint32_t *dest, uint32_t *src, int n)
1.478 +{
1.479 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.480 + __asm__ __volatile__ (" pxor %%mm7, %%mm7\n" // mm7 = { 0, 0, 0, 0 }
1.481 + " movl $0x80808080, %%eax\n"
1.482 + " movd %%eax, %%mm6\n" // mm6 = { 128, 128, 128, 128 }
1.483 + " punpcklbw %%mm7, %%mm6\n"
1.484 +#if 0
1.485 + " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
1.486 + " movd %%eax, %%mm5\n"
1.487 + " punpcklbw %%mm7, %%mm5\n"
1.488 +#else
1.489 + " pcmpeqw %%mm5, %%mm5\n"
1.490 + " psrlw $8, %%mm5\n" // mm5 = { 255, 255, 255, 255 }
1.491 +#endif
1.492 + " movl $0x02020202, %%eax\n"
1.493 + " movd %%eax, %%mm4\n"
1.494 + " punpcklbw %%mm7, %%mm4\n"
1.495 + " paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
1.496 + "1:\n"
1.497 + " movd (%1), %%mm0\n"
1.498 + " punpcklbw %%mm7, %%mm0\n"
1.499 + " xor %%eax, %%eax\n"
1.500 + " pextrw $3, %%mm0, %%eax\n"
1.501 + " test %%eax, %%eax\n"
1.502 + " jz 2f\n"
1.503 +
1.504 + " pshufw $0xff, %%mm0, %%mm1\n"
1.505 + " pxor %%mm5, %%mm1\n"
1.506 +
1.507 + " movd (%0), %%mm2\n"
1.508 + " punpcklbw %%mm7, %%mm2\n"
1.509 + " pmullw %%mm1, %%mm2\n"
1.510 + " paddw %%mm6, %%mm2\n"
1.511 + " pmulhuw %%mm4, %%mm2\n"
1.512 +
1.513 + " paddw %%mm0, %%mm2\n"
1.514 + " packuswb %%mm2, %%mm2\n"
1.515 +
1.516 + " movd %%mm2, (%0)\n"
1.517 + "2:\n"
1.518 + " addl $4, %0\n"
1.519 + " addl $4, %1\n"
1.520 + " subl $1, %2\n"
1.521 + " jnz 1b\n"
1.522 + " emms\n"
1.523 + :"+r" (dest), "+r" (src), "+r" (n)
1.524 + :
1.525 + :"eax");
1.526 +#endif
1.527 +}
1.528 +OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_5, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.529 +
1.530 +static void
1.531 +composite_over_argb_sse2 (uint32_t *dest, uint32_t *src, int n)
1.532 +{
1.533 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.534 + __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 }
1.535 + " movl $0x80808080, %%eax\n"
1.536 + " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 }
1.537 + " punpcklbw %%xmm7, %%xmm6\n"
1.538 + " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
1.539 + " movd %%eax, %%xmm5\n"
1.540 + " punpcklbw %%xmm7, %%xmm5\n"
1.541 + " movl $0x02020202, %%eax\n"
1.542 + " movd %%eax, %%xmm4\n"
1.543 + " punpcklbw %%xmm7, %%xmm4\n"
1.544 + " paddw %%xmm5, %%xmm4\n" // mm4 = { 255, 255, 255, 255 }
1.545 + "1:\n"
1.546 + " movl (%1), %%eax\n"
1.547 + " testl $0xff000000, %%eax\n"
1.548 + " jz 2f\n"
1.549 +
1.550 + " movd (%1), %%xmm1\n"
1.551 + " punpcklbw %%xmm7, %%xmm1\n"
1.552 + " pshuflw $0xff, %%xmm1, %%xmm0\n"
1.553 + " pxor %%xmm5, %%xmm0\n"
1.554 +
1.555 + " movd (%0), %%xmm3\n"
1.556 + " punpcklbw %%xmm7, %%xmm3\n"
1.557 + " pmullw %%xmm0, %%xmm3\n"
1.558 + " paddw %%xmm6, %%xmm3\n"
1.559 + " pmulhuw %%xmm4, %%xmm3\n"
1.560 +
1.561 + " paddw %%xmm1, %%xmm3\n"
1.562 + " packuswb %%xmm3, %%xmm3\n"
1.563 + " movd %%xmm3, (%0)\n"
1.564 + "2:\n"
1.565 + " addl $4, %0\n"
1.566 + " addl $4, %1\n"
1.567 + " decl %2\n"
1.568 + " jnz 1b\n"
1.569 + :"+r" (dest), "+r" (src), "+r" (n)
1.570 + :
1.571 + :"eax");
1.572 +#endif
1.573 +}
1.574 +OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
1.575 +
1.576 +/* written for shaun */
1.577 +static void
1.578 +composite_over_argb_sse2_2 (uint32_t *dest, uint32_t *src, int n)
1.579 +{
1.580 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.581 + __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 }
1.582 + " movl $0x80808080, %%eax\n"
1.583 + " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 }
1.584 + " punpcklbw %%xmm7, %%xmm6\n"
1.585 + " punpcklwd %%xmm6, %%xmm6\n"
1.586 + " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
1.587 + " movd %%eax, %%xmm5\n"
1.588 + " punpcklbw %%xmm7, %%xmm5\n"
1.589 + " punpcklwd %%xmm5, %%xmm5\n"
1.590 + " movl $0x02020202, %%eax\n"
1.591 + " movd %%eax, %%xmm4\n"
1.592 + " punpcklbw %%xmm7, %%xmm4\n"
1.593 + " paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
1.594 + " punpcklwd %%xmm4, %%xmm4\n"
1.595 + :
1.596 + :
1.597 + :"eax");
1.598 +
1.599 + if (n&1) {
1.600 + __asm__ __volatile__ (
1.601 + " movl (%1), %%eax\n"
1.602 + " testl $0xff000000, %%eax\n"
1.603 + " jz 1f\n"
1.604 +
1.605 + " movd (%1), %%xmm1\n"
1.606 + " punpcklbw %%xmm7, %%xmm1\n"
1.607 + " pshuflw $0xff, %%xmm1, %%xmm0\n"
1.608 + " pxor %%xmm5, %%xmm0\n"
1.609 +
1.610 + " movd (%0), %%xmm3\n"
1.611 + " punpcklbw %%xmm7, %%xmm3\n"
1.612 + " pmullw %%xmm0, %%xmm3\n"
1.613 + " paddw %%xmm6, %%xmm3\n"
1.614 + " pmulhuw %%xmm4, %%xmm3\n"
1.615 +
1.616 + " paddw %%xmm1, %%xmm3\n"
1.617 + " packuswb %%xmm3, %%xmm3\n"
1.618 + " movd %%xmm3, (%0)\n"
1.619 +
1.620 + "1:\n"
1.621 + " addl $4, %0\n"
1.622 + " addl $4, %1\n"
1.623 + :"+r" (dest), "+r" (src)
1.624 + :
1.625 + :"eax");
1.626 + }
1.627 + n>>=1;
1.628 +
1.629 + if (n>0){
1.630 + __asm__ __volatile__ ("\n"
1.631 + "3:\n"
1.632 +#if 0
1.633 + " movl (%1), %%eax\n"
1.634 + " orl 4(%1), %%eax\n"
1.635 + " testl $0xff000000, %%eax\n"
1.636 + " jz 4f\n"
1.637 +#endif
1.638 +
1.639 + " movq (%1), %%xmm1\n"
1.640 + " punpcklbw %%xmm7, %%xmm1\n"
1.641 + " pshuflw $0xff, %%xmm1, %%xmm0\n"
1.642 + " pshufhw $0xff, %%xmm0, %%xmm0\n"
1.643 + " pxor %%xmm5, %%xmm0\n"
1.644 +
1.645 + " movq (%0), %%xmm3\n"
1.646 + " punpcklbw %%xmm7, %%xmm3\n"
1.647 + " pmullw %%xmm0, %%xmm3\n"
1.648 + " paddw %%xmm6, %%xmm3\n"
1.649 + " pmulhuw %%xmm4, %%xmm3\n"
1.650 + " paddw %%xmm1, %%xmm3\n"
1.651 + " packuswb %%xmm3, %%xmm3\n"
1.652 + " movq %%xmm3, (%0)\n"
1.653 + "4:\n"
1.654 + " addl $8, %0\n"
1.655 + " addl $8, %1\n"
1.656 + " subl $1, %2\n"
1.657 + " jnz 3b\n"
1.658 + :"+r" (dest), "+r" (src), "+r" (n)
1.659 + :
1.660 + :"eax");
1.661 + }
1.662 +#endif
1.663 +}
1.664 +OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
1.665 +
1.666 +/* written for shaun */
1.667 +static void
1.668 +composite_over_argb_sse2_3 (uint32_t *dest, uint32_t *src, int n)
1.669 +{
1.670 + int begin;
1.671 + int middle;
1.672 + int end;
1.673 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.674 + __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 }
1.675 + " movl $0x80808080, %%eax\n"
1.676 + " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 }
1.677 + " punpcklbw %%xmm7, %%xmm6\n"
1.678 + " punpcklwd %%xmm6, %%xmm6\n"
1.679 + " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
1.680 + " movd %%eax, %%xmm5\n"
1.681 + " punpcklbw %%xmm7, %%xmm5\n"
1.682 + " punpcklwd %%xmm5, %%xmm5\n"
1.683 + " movl $0x02020202, %%eax\n"
1.684 + " movd %%eax, %%xmm4\n"
1.685 + " punpcklbw %%xmm7, %%xmm4\n"
1.686 + " paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
1.687 + " punpcklwd %%xmm4, %%xmm4\n"
1.688 + :
1.689 + :
1.690 + :"eax");
1.691 +
1.692 + begin = 0x3 & (4 - (((unsigned long)dest & 0xf) >> 2));
1.693 + if (begin>n) {
1.694 + begin = n;
1.695 + middle = 0;
1.696 + end = 0;
1.697 + } else {
1.698 + middle = (n-begin)>>2;
1.699 + end = n - begin - middle*4;
1.700 + }
1.701 +
1.702 + if (begin>0) {
1.703 + __asm__ __volatile__ ("\n"
1.704 + "1:\n"
1.705 + " movl (%1), %%eax\n"
1.706 + " testl $0xff000000, %%eax\n"
1.707 + " jz 2f\n"
1.708 +
1.709 + " movd (%1), %%xmm1\n"
1.710 + " punpcklbw %%xmm7, %%xmm1\n"
1.711 + " pshuflw $0xff, %%xmm1, %%xmm0\n"
1.712 + " pxor %%xmm5, %%xmm0\n"
1.713 +
1.714 + " movd (%0), %%xmm3\n"
1.715 + " punpcklbw %%xmm7, %%xmm3\n"
1.716 + " pmullw %%xmm0, %%xmm3\n"
1.717 + " paddw %%xmm6, %%xmm3\n"
1.718 + " pmulhuw %%xmm4, %%xmm3\n"
1.719 +
1.720 + " paddw %%xmm1, %%xmm3\n"
1.721 + " packuswb %%xmm3, %%xmm3\n"
1.722 + " movd %%xmm3, (%0)\n"
1.723 +
1.724 + "2:\n"
1.725 + " addl $4, %0\n"
1.726 + " addl $4, %1\n"
1.727 + " subl $1, %2\n"
1.728 + " jnz 1b\n"
1.729 + :"+r" (dest), "+r" (src), "+r" (begin)
1.730 + :
1.731 + :"eax");
1.732 + }
1.733 +
1.734 + if (middle>0){
1.735 + __asm__ __volatile__ ("\n"
1.736 + "1:\n"
1.737 + " movq (%1), %%xmm1\n"
1.738 + " movq 8(%1), %%xmm0\n"
1.739 + " movl (%1), %%eax\n"
1.740 + " orl 4(%1), %%eax\n"
1.741 + " orl 8(%1), %%eax\n"
1.742 + " orl 12(%1), %%eax\n"
1.743 + " test $0xff000000, %%eax\n"
1.744 + " jz 2f\n"
1.745 + " punpcklbw %%xmm7, %%xmm1\n"
1.746 + " punpcklbw %%xmm7, %%xmm0\n"
1.747 + " pshuflw $0xff, %%xmm1, %%xmm1\n"
1.748 + " pshuflw $0xff, %%xmm0, %%xmm0\n"
1.749 + " pshufhw $0xff, %%xmm1, %%xmm1\n"
1.750 + " pshufhw $0xff, %%xmm0, %%xmm0\n"
1.751 +
1.752 + " pxor %%xmm5, %%xmm1\n"
1.753 + " pxor %%xmm5, %%xmm0\n"
1.754 +
1.755 + " movq (%0), %%xmm3\n"
1.756 + " movq 8(%0), %%xmm2\n"
1.757 + " punpcklbw %%xmm7, %%xmm3\n"
1.758 + " punpcklbw %%xmm7, %%xmm2\n"
1.759 +
1.760 + " pmullw %%xmm1, %%xmm3\n"
1.761 + " paddw %%xmm6, %%xmm3\n"
1.762 + " pmulhuw %%xmm4, %%xmm3\n"
1.763 + " pmullw %%xmm0, %%xmm2\n"
1.764 + " paddw %%xmm6, %%xmm2\n"
1.765 + " pmulhuw %%xmm4, %%xmm2\n"
1.766 + " packuswb %%xmm2, %%xmm3\n"
1.767 +
1.768 + " movdqu (%1), %%xmm1\n"
1.769 + " paddb %%xmm1, %%xmm3\n"
1.770 + " movdqa %%xmm3, (%0)\n"
1.771 + "2:\n"
1.772 + " addl $16, %0\n"
1.773 + " addl $16, %1\n"
1.774 + " subl $1, %2\n"
1.775 + " jnz 1b\n"
1.776 + :"+r" (dest), "+r" (src), "+r" (middle)
1.777 + :
1.778 + :"eax");
1.779 + }
1.780 + if (end>0) {
1.781 + __asm__ __volatile__ ("\n"
1.782 + "1:\n"
1.783 + " movl (%1), %%eax\n"
1.784 + " testl $0xff000000, %%eax\n"
1.785 + " jz 2f\n"
1.786 +
1.787 + " movd (%1), %%xmm1\n"
1.788 + " punpcklbw %%xmm7, %%xmm1\n"
1.789 + " pshuflw $0xff, %%xmm1, %%xmm0\n"
1.790 + " pxor %%xmm5, %%xmm0\n"
1.791 +
1.792 + " movd (%0), %%xmm3\n"
1.793 + " punpcklbw %%xmm7, %%xmm3\n"
1.794 + " pmullw %%xmm0, %%xmm3\n"
1.795 + " paddw %%xmm6, %%xmm3\n"
1.796 + " pmulhuw %%xmm4, %%xmm3\n"
1.797 +
1.798 + " paddw %%xmm1, %%xmm3\n"
1.799 + " packuswb %%xmm3, %%xmm3\n"
1.800 + " movd %%xmm3, (%0)\n"
1.801 +
1.802 + "2:\n"
1.803 + " addl $4, %0\n"
1.804 + " addl $4, %1\n"
1.805 + " subl $1, %2\n"
1.806 + " jnz 1b\n"
1.807 + :"+r" (dest), "+r" (src), "+r" (end)
1.808 + :
1.809 + :"eax");
1.810 + }
1.811 +#endif
1.812 +}
1.813 +OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_3, composite_over_argb, OIL_IMPL_FLAG_SSE2);
1.814 +
1.815 +
1.816 +static void
1.817 +composite_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
1.818 +{
1.819 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.820 + __asm__ __volatile__ (
1.821 + MMX_LOAD_CONSTANTS
1.822 + " movl (%1), %%eax\n"
1.823 + " movd %%eax, %%mm0\n"
1.824 + " punpcklbw %%mm7, %%mm0\n"
1.825 + " pshufw $0xff, %%mm0, %%mm3\n"
1.826 + " pxor %%mm5, %%mm3\n"
1.827 + "1:\n"
1.828 + " movq %%mm3, %%mm1\n"
1.829 + " movd (%0), %%mm2\n"
1.830 + " punpcklbw %%mm7, %%mm2\n"
1.831 +
1.832 + MMX_MULDIV255(mm2, mm1)
1.833 +
1.834 + " paddw %%mm0, %%mm2\n"
1.835 + " packuswb %%mm2, %%mm2\n"
1.836 +
1.837 + " movd %%mm2, (%0)\n"
1.838 + " addl $4, %0\n"
1.839 + " decl %2\n"
1.840 + " jnz 1b\n"
1.841 + " emms\n"
1.842 + :"+r" (dest), "+r" (src), "+r" (n)
1.843 + :
1.844 + :"eax");
1.845 +#endif
1.846 +}
1.847 +OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_mmx, composite_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.848 +
1.849 +static void
1.850 +composite_add_argb_mmx (uint32_t *dest, uint32_t *src, int n)
1.851 +{
1.852 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.853 + __asm__ __volatile__ (
1.854 + "1:\n"
1.855 + " movd (%1), %%mm0\n"
1.856 + " movd (%0), %%mm2\n"
1.857 + " paddusb %%mm0, %%mm2\n"
1.858 + " movd %%mm2, (%0)\n"
1.859 + " addl $4, %0\n"
1.860 + " addl $4, %1\n"
1.861 + " decl %2\n"
1.862 + " jnz 1b\n"
1.863 + " emms\n"
1.864 + :"+r" (dest), "+r" (src), "+r" (n)
1.865 + :
1.866 + :"eax");
1.867 +#endif
1.868 +}
1.869 +OIL_DEFINE_IMPL_FULL (composite_add_argb_mmx, composite_add_argb, OIL_IMPL_FLAG_MMX);
1.870 +
1.871 +static void
1.872 +composite_add_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
1.873 +{
1.874 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.875 + __asm__ __volatile__ (
1.876 + " movd (%1), %%mm0\n"
1.877 + "1:\n"
1.878 + " movd (%0), %%mm2\n"
1.879 + " paddusb %%mm0, %%mm2\n"
1.880 + " movd %%mm2, (%0)\n"
1.881 + " addl $4, %0\n"
1.882 + " decl %2\n"
1.883 + " jnz 1b\n"
1.884 + " emms\n"
1.885 + :"+r" (dest), "+r" (src), "+r" (n)
1.886 + :
1.887 + :"eax");
1.888 +#endif
1.889 +}
1.890 +OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_mmx, composite_add_argb_const_src, OIL_IMPL_FLAG_MMX);
1.891 +
1.892 +static void
1.893 +composite_in_over_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
1.894 +{
1.895 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.896 + __asm__ __volatile__ (
1.897 + MMX_LOAD_CONSTANTS
1.898 + "1:\n"
1.899 + " movd (%2), %%mm0\n"
1.900 + " punpcklbw %%mm7, %%mm0\n"
1.901 + " pshufw $0x00, %%mm0, %%mm1\n"
1.902 +
1.903 + " movd (%1), %%mm2\n"
1.904 + " punpcklbw %%mm7, %%mm2\n"
1.905 +
1.906 + MMX_MULDIV255(mm2, mm1)
1.907 +
1.908 + " movd (%0), %%mm0\n"
1.909 + " punpcklbw %%mm7, %%mm0\n"
1.910 +
1.911 + " pshufw $0xff, %%mm2, %%mm1\n"
1.912 + " pxor %%mm5, %%mm1\n"
1.913 +
1.914 + MMX_MULDIV255(mm0, mm1)
1.915 +
1.916 + " paddw %%mm0, %%mm2\n"
1.917 + " packuswb %%mm2, %%mm2\n"
1.918 +
1.919 + " movd %%mm2, (%0)\n"
1.920 + " addl $4, %0\n"
1.921 + " addl $4, %1\n"
1.922 + " addl $1, %2\n"
1.923 + " decl %3\n"
1.924 + " jnz 1b\n"
1.925 + " emms\n"
1.926 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.927 + :
1.928 + :"eax");
1.929 +#endif
1.930 +}
1.931 +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_mmx, composite_in_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.932 +
1.933 +static void
1.934 +composite_in_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
1.935 +{
1.936 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.937 + __asm__ __volatile__ (
1.938 + MMX_LOAD_CONSTANTS
1.939 +
1.940 + " movd (%1), %%mm3\n"
1.941 + " punpcklbw %%mm7, %%mm3\n"
1.942 + "1:\n"
1.943 + " movd (%2), %%mm0\n"
1.944 + " punpcklbw %%mm7, %%mm0\n"
1.945 + " pshufw $0x00, %%mm0, %%mm1\n"
1.946 +
1.947 + " movq %%mm3, %%mm2\n"
1.948 +
1.949 + MMX_MULDIV255(mm2, mm1)
1.950 +
1.951 + " movd (%0), %%mm0\n"
1.952 + " punpcklbw %%mm7, %%mm0\n"
1.953 +
1.954 + " pshufw $0xff, %%mm2, %%mm1\n"
1.955 + " pxor %%mm5, %%mm1\n"
1.956 +
1.957 + MMX_MULDIV255(mm0, mm1)
1.958 +
1.959 + " paddw %%mm0, %%mm2\n"
1.960 + " packuswb %%mm2, %%mm2\n"
1.961 +
1.962 + " movd %%mm2, (%0)\n"
1.963 + " addl $4, %0\n"
1.964 + " addl $1, %2\n"
1.965 + " decl %3\n"
1.966 + " jnz 1b\n"
1.967 + " emms\n"
1.968 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.969 + :
1.970 + :"eax");
1.971 +#endif
1.972 +}
1.973 +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.974 +
1.975 +static void
1.976 +composite_in_over_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
1.977 +{
1.978 +#if !defined(__WINSCW__) && !defined(__WINS__)
1.979 + __asm__ __volatile__ (
1.980 + MMX_LOAD_CONSTANTS
1.981 + " movd (%2), %%mm0\n"
1.982 + " punpcklbw %%mm7, %%mm0\n"
1.983 + " pshufw $0x00, %%mm0, %%mm3\n"
1.984 +
1.985 + "1:\n"
1.986 + " movd (%1), %%mm2\n"
1.987 + " punpcklbw %%mm7, %%mm2\n"
1.988 + " movq %%mm3, %%mm1\n"
1.989 +
1.990 + MMX_MULDIV255(mm2, mm1)
1.991 +
1.992 + " movd (%0), %%mm0\n"
1.993 + " punpcklbw %%mm7, %%mm0\n"
1.994 +
1.995 + " pshufw $0xff, %%mm2, %%mm1\n"
1.996 + " pxor %%mm5, %%mm1\n"
1.997 +
1.998 + MMX_MULDIV255(mm0, mm1)
1.999 +
1.1000 + " paddw %%mm0, %%mm2\n"
1.1001 + " packuswb %%mm2, %%mm2\n"
1.1002 +
1.1003 + " movd %%mm2, (%0)\n"
1.1004 + " addl $4, %0\n"
1.1005 + " addl $4, %1\n"
1.1006 + " decl %3\n"
1.1007 + " jnz 1b\n"
1.1008 + " emms\n"
1.1009 + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1.1010 + :
1.1011 + :"eax");
1.1012 +#endif
1.1013 +}
1.1014 +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1.1015 +
1.1016 +
1.1017 +
1.1018 +#ifdef __SYMBIAN32__
1.1019 +
1.1020 +OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx() {
1.1021 + return &_oil_function_impl_composite_in_argb_mmx;
1.1022 +}
1.1023 +#endif
1.1024 +
1.1025 +#ifdef __SYMBIAN32__
1.1026 +
1.1027 +OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx, composite_in_argb() {
1.1028 + return &_oil_function_impl_composite_in_argb_mmx, composite_in_argb;
1.1029 +}
1.1030 +#endif
1.1031 +
1.1032 +#ifdef __SYMBIAN32__
1.1033 +
1.1034 +OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx2, composite_in_argb() {
1.1035 + return &_oil_function_impl_composite_in_argb_mmx2, composite_in_argb;
1.1036 +}
1.1037 +#endif
1.1038 +
1.1039 +#ifdef __SYMBIAN32__
1.1040 +
1.1041 +OilFunctionImpl* __oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src() {
1.1042 + return &_oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src;
1.1043 +}
1.1044 +#endif
1.1045 +
1.1046 +#ifdef __SYMBIAN32__
1.1047 +
1.1048 +OilFunctionImpl* __oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask() {
1.1049 + return &_oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask;
1.1050 +}
1.1051 +#endif
1.1052 +
1.1053 +#ifdef __SYMBIAN32__
1.1054 +
1.1055 +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx, composite_over_argb() {
1.1056 + return &_oil_function_impl_composite_over_argb_mmx, composite_over_argb;
1.1057 +}
1.1058 +#endif
1.1059 +
1.1060 +#ifdef __SYMBIAN32__
1.1061 +
1.1062 +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_2, composite_over_argb() {
1.1063 + return &_oil_function_impl_composite_over_argb_mmx_2, composite_over_argb;
1.1064 +}
1.1065 +#endif
1.1066 +
1.1067 +#ifdef __SYMBIAN32__
1.1068 +
1.1069 +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_3, composite_over_argb() {
1.1070 + return &_oil_function_impl_composite_over_argb_mmx_3, composite_over_argb;
1.1071 +}
1.1072 +#endif
1.1073 +
1.1074 +#ifdef __SYMBIAN32__
1.1075 +
1.1076 +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_4, composite_over_argb() {
1.1077 + return &_oil_function_impl_composite_over_argb_mmx_4, composite_over_argb;
1.1078 +}
1.1079 +#endif
1.1080 +
1.1081 +#ifdef __SYMBIAN32__
1.1082 +
1.1083 +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_5, composite_over_argb() {
1.1084 + return &_oil_function_impl_composite_over_argb_mmx_5, composite_over_argb;
1.1085 +}
1.1086 +#endif
1.1087 +
1.1088 +#ifdef __SYMBIAN32__
1.1089 +
1.1090 +OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2, composite_over_argb() {
1.1091 + return &_oil_function_impl_composite_over_argb_sse2, composite_over_argb;
1.1092 +}
1.1093 +#endif
1.1094 +
1.1095 +#ifdef __SYMBIAN32__
1.1096 +
1.1097 +OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_2, composite_over_argb() {
1.1098 + return &_oil_function_impl_composite_over_argb_sse2_2, composite_over_argb;
1.1099 +}
1.1100 +#endif
1.1101 +
1.1102 +#ifdef __SYMBIAN32__
1.1103 +
1.1104 +OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_3, composite_over_argb() {
1.1105 + return &_oil_function_impl_composite_over_argb_sse2_3, composite_over_argb;
1.1106 +}
1.1107 +#endif
1.1108 +
1.1109 +#ifdef __SYMBIAN32__
1.1110 +
1.1111 +OilFunctionImpl* __oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src() {
1.1112 + return &_oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src;
1.1113 +}
1.1114 +#endif
1.1115 +
1.1116 +#ifdef __SYMBIAN32__
1.1117 +
1.1118 +OilFunctionImpl* __oil_function_impl_composite_add_argb_mmx, composite_add_argb() {
1.1119 + return &_oil_function_impl_composite_add_argb_mmx, composite_add_argb;
1.1120 +}
1.1121 +#endif
1.1122 +
1.1123 +#ifdef __SYMBIAN32__
1.1124 +
1.1125 +OilFunctionImpl* __oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src() {
1.1126 + return &_oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src;
1.1127 +}
1.1128 +#endif
1.1129 +
1.1130 +#ifdef __SYMBIAN32__
1.1131 +
1.1132 +OilFunctionImpl* __oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb() {
1.1133 + return &_oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb;
1.1134 +}
1.1135 +#endif
1.1136 +
1.1137 +#ifdef __SYMBIAN32__
1.1138 +
1.1139 +OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src() {
1.1140 + return &_oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src;
1.1141 +}
1.1142 +#endif
1.1143 +
1.1144 +#ifdef __SYMBIAN32__
1.1145 +
1.1146 +OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask() {
1.1147 + return &_oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask;
1.1148 +}
1.1149 +#endif
1.1150 +