diff -r 000000000000 -r bde4ae8d615e os/ossrv/genericopenlibs/liboil/src/i386/composite_i386.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/os/ossrv/genericopenlibs/liboil/src/i386/composite_i386.c Fri Jun 15 03:10:57 2012 +0200 @@ -0,0 +1,1147 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2005 David A. Schleef + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +OIL_DECLARE_CLASS (composite_in_argb); +OIL_DECLARE_CLASS (composite_in_argb_const_src); +OIL_DECLARE_CLASS (composite_in_argb_const_mask); +OIL_DECLARE_CLASS (composite_over_argb); +OIL_DECLARE_CLASS (composite_over_argb_const_src); +OIL_DECLARE_CLASS (composite_add_argb); +OIL_DECLARE_CLASS (composite_add_argb_const_src); +OIL_DECLARE_CLASS (composite_in_over_argb); +OIL_DECLARE_CLASS (composite_in_over_argb_const_src); +OIL_DECLARE_CLASS (composite_in_over_argb_const_mask); + +#if 0 +static void +composite_in_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n) +{ + int i; + + for(i=0;i>=1; + + if (n>0){ + __asm__ __volatile__ ("\n" + "3:\n" +#if 0 + " movl (%1), %%eax\n" + " orl 4(%1), %%eax\n" + " testl $0xff000000, %%eax\n" + " jz 4f\n" +#endif + + " movq (%1), %%xmm1\n" + " punpcklbw %%xmm7, %%xmm1\n" + " pshuflw $0xff, %%xmm1, %%xmm0\n" + " pshufhw $0xff, %%xmm0, %%xmm0\n" + " pxor %%xmm5, %%xmm0\n" + + " movq (%0), %%xmm3\n" + " punpcklbw %%xmm7, %%xmm3\n" + " pmullw %%xmm0, %%xmm3\n" + " paddw %%xmm6, %%xmm3\n" + " pmulhuw %%xmm4, %%xmm3\n" + " paddw %%xmm1, %%xmm3\n" + " packuswb %%xmm3, %%xmm3\n" + " movq %%xmm3, (%0)\n" + "4:\n" + " addl $8, %0\n" + " addl $8, %1\n" + " subl $1, %2\n" + " jnz 3b\n" + :"+r" (dest), "+r" (src), "+r" (n) + : + :"eax"); + } +#endif +} +OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_2, composite_over_argb, OIL_IMPL_FLAG_SSE2); + +/* written for shaun */ +static void +composite_over_argb_sse2_3 (uint32_t *dest, uint32_t *src, int n) +{ + int begin; + int middle; + int end; +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 } + " movl $0x80808080, %%eax\n" + " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 } + " punpcklbw %%xmm7, %%xmm6\n" + " punpcklwd %%xmm6, %%xmm6\n" + " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 } + " movd %%eax, %%xmm5\n" + " punpcklbw %%xmm7, %%xmm5\n" + " punpcklwd %%xmm5, %%xmm5\n" + " movl $0x02020202, %%eax\n" + " movd %%eax, %%xmm4\n" + " punpcklbw %%xmm7, %%xmm4\n" + " paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 } + " punpcklwd %%xmm4, %%xmm4\n" + : + : + :"eax"); + + begin = 0x3 & (4 - (((unsigned long)dest & 0xf) >> 2)); + if (begin>n) { + begin = n; + middle = 0; + end = 0; + } else { + middle = (n-begin)>>2; + end = n - begin - middle*4; + } + + if (begin>0) { + __asm__ __volatile__ ("\n" + "1:\n" + " movl (%1), %%eax\n" + " testl $0xff000000, %%eax\n" + " jz 2f\n" + + " movd (%1), %%xmm1\n" + " punpcklbw %%xmm7, %%xmm1\n" + " pshuflw $0xff, %%xmm1, %%xmm0\n" + " pxor %%xmm5, %%xmm0\n" + + " movd (%0), %%xmm3\n" + " punpcklbw %%xmm7, %%xmm3\n" + " pmullw %%xmm0, %%xmm3\n" + " paddw %%xmm6, %%xmm3\n" + " pmulhuw %%xmm4, %%xmm3\n" + + " paddw %%xmm1, %%xmm3\n" + " packuswb %%xmm3, %%xmm3\n" + " movd %%xmm3, (%0)\n" + + "2:\n" + " addl $4, %0\n" + " addl $4, %1\n" + " subl $1, %2\n" + " jnz 1b\n" + :"+r" (dest), "+r" (src), "+r" (begin) + : + :"eax"); + } + + if (middle>0){ + __asm__ __volatile__ ("\n" + "1:\n" + " movq (%1), %%xmm1\n" + " movq 8(%1), %%xmm0\n" + " movl (%1), %%eax\n" + " orl 4(%1), %%eax\n" + " orl 8(%1), %%eax\n" + " orl 12(%1), %%eax\n" + " test $0xff000000, %%eax\n" + " jz 2f\n" + " punpcklbw %%xmm7, %%xmm1\n" + " punpcklbw %%xmm7, %%xmm0\n" + " pshuflw $0xff, %%xmm1, %%xmm1\n" + " pshuflw $0xff, %%xmm0, %%xmm0\n" + " pshufhw $0xff, %%xmm1, %%xmm1\n" + " pshufhw $0xff, %%xmm0, %%xmm0\n" + + " pxor %%xmm5, %%xmm1\n" + " pxor %%xmm5, %%xmm0\n" + + " movq (%0), %%xmm3\n" + " movq 8(%0), %%xmm2\n" + " punpcklbw %%xmm7, %%xmm3\n" + " punpcklbw %%xmm7, %%xmm2\n" + + " pmullw %%xmm1, %%xmm3\n" + " paddw %%xmm6, %%xmm3\n" + " pmulhuw %%xmm4, %%xmm3\n" + " pmullw %%xmm0, %%xmm2\n" + " paddw %%xmm6, %%xmm2\n" + " pmulhuw %%xmm4, %%xmm2\n" + " packuswb %%xmm2, %%xmm3\n" + + " movdqu (%1), %%xmm1\n" + " paddb %%xmm1, %%xmm3\n" + " movdqa %%xmm3, (%0)\n" + "2:\n" + " addl $16, %0\n" + " addl $16, %1\n" + " subl $1, %2\n" + " jnz 1b\n" + :"+r" (dest), "+r" (src), "+r" (middle) + : + :"eax"); + } + if (end>0) { + __asm__ __volatile__ ("\n" + "1:\n" + " movl (%1), %%eax\n" + " testl $0xff000000, %%eax\n" + " jz 2f\n" + + " movd (%1), %%xmm1\n" + " punpcklbw %%xmm7, %%xmm1\n" + " pshuflw $0xff, %%xmm1, %%xmm0\n" + " pxor %%xmm5, %%xmm0\n" + + " movd (%0), %%xmm3\n" + " punpcklbw %%xmm7, %%xmm3\n" + " pmullw %%xmm0, %%xmm3\n" + " paddw %%xmm6, %%xmm3\n" + " pmulhuw %%xmm4, %%xmm3\n" + + " paddw %%xmm1, %%xmm3\n" + " packuswb %%xmm3, %%xmm3\n" + " movd %%xmm3, (%0)\n" + + "2:\n" + " addl $4, %0\n" + " addl $4, %1\n" + " subl $1, %2\n" + " jnz 1b\n" + :"+r" (dest), "+r" (src), "+r" (end) + : + :"eax"); + } +#endif +} +OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_3, composite_over_argb, OIL_IMPL_FLAG_SSE2); + + +static void +composite_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n) +{ +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ ( + MMX_LOAD_CONSTANTS + " movl (%1), %%eax\n" + " movd %%eax, %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + " pshufw $0xff, %%mm0, %%mm3\n" + " pxor %%mm5, %%mm3\n" + "1:\n" + " movq %%mm3, %%mm1\n" + " movd (%0), %%mm2\n" + " punpcklbw %%mm7, %%mm2\n" + + MMX_MULDIV255(mm2, mm1) + + " paddw %%mm0, %%mm2\n" + " packuswb %%mm2, %%mm2\n" + + " movd %%mm2, (%0)\n" + " addl $4, %0\n" + " decl %2\n" + " jnz 1b\n" + " emms\n" + :"+r" (dest), "+r" (src), "+r" (n) + : + :"eax"); +#endif +} +OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_mmx, composite_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); + +static void +composite_add_argb_mmx (uint32_t *dest, uint32_t *src, int n) +{ +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ ( + "1:\n" + " movd (%1), %%mm0\n" + " movd (%0), %%mm2\n" + " paddusb %%mm0, %%mm2\n" + " movd %%mm2, (%0)\n" + " addl $4, %0\n" + " addl $4, %1\n" + " decl %2\n" + " jnz 1b\n" + " emms\n" + :"+r" (dest), "+r" (src), "+r" (n) + : + :"eax"); +#endif +} +OIL_DEFINE_IMPL_FULL (composite_add_argb_mmx, composite_add_argb, OIL_IMPL_FLAG_MMX); + +static void +composite_add_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n) +{ +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ ( + " movd (%1), %%mm0\n" + "1:\n" + " movd (%0), %%mm2\n" + " paddusb %%mm0, %%mm2\n" + " movd %%mm2, (%0)\n" + " addl $4, %0\n" + " decl %2\n" + " jnz 1b\n" + " emms\n" + :"+r" (dest), "+r" (src), "+r" (n) + : + :"eax"); +#endif +} +OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_mmx, composite_add_argb_const_src, OIL_IMPL_FLAG_MMX); + +static void +composite_in_over_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n) +{ +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ ( + MMX_LOAD_CONSTANTS + "1:\n" + " movd (%2), %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + " pshufw $0x00, %%mm0, %%mm1\n" + + " movd (%1), %%mm2\n" + " punpcklbw %%mm7, %%mm2\n" + + MMX_MULDIV255(mm2, mm1) + + " movd (%0), %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + + " pshufw $0xff, %%mm2, %%mm1\n" + " pxor %%mm5, %%mm1\n" + + MMX_MULDIV255(mm0, mm1) + + " paddw %%mm0, %%mm2\n" + " packuswb %%mm2, %%mm2\n" + + " movd %%mm2, (%0)\n" + " addl $4, %0\n" + " addl $4, %1\n" + " addl $1, %2\n" + " decl %3\n" + " jnz 1b\n" + " emms\n" + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n) + : + :"eax"); +#endif +} +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_mmx, composite_in_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); + +static void +composite_in_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n) +{ +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ ( + MMX_LOAD_CONSTANTS + + " movd (%1), %%mm3\n" + " punpcklbw %%mm7, %%mm3\n" + "1:\n" + " movd (%2), %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + " pshufw $0x00, %%mm0, %%mm1\n" + + " movq %%mm3, %%mm2\n" + + MMX_MULDIV255(mm2, mm1) + + " movd (%0), %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + + " pshufw $0xff, %%mm2, %%mm1\n" + " pxor %%mm5, %%mm1\n" + + MMX_MULDIV255(mm0, mm1) + + " paddw %%mm0, %%mm2\n" + " packuswb %%mm2, %%mm2\n" + + " movd %%mm2, (%0)\n" + " addl $4, %0\n" + " addl $1, %2\n" + " decl %3\n" + " jnz 1b\n" + " emms\n" + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n) + : + :"eax"); +#endif +} +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); + +static void +composite_in_over_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n) +{ +#if !defined(__WINSCW__) && !defined(__WINS__) + __asm__ __volatile__ ( + MMX_LOAD_CONSTANTS + " movd (%2), %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + " pshufw $0x00, %%mm0, %%mm3\n" + + "1:\n" + " movd (%1), %%mm2\n" + " punpcklbw %%mm7, %%mm2\n" + " movq %%mm3, %%mm1\n" + + MMX_MULDIV255(mm2, mm1) + + " movd (%0), %%mm0\n" + " punpcklbw %%mm7, %%mm0\n" + + " pshufw $0xff, %%mm2, %%mm1\n" + " pxor %%mm5, %%mm1\n" + + MMX_MULDIV255(mm0, mm1) + + " paddw %%mm0, %%mm2\n" + " packuswb %%mm2, %%mm2\n" + + " movd %%mm2, (%0)\n" + " addl $4, %0\n" + " addl $4, %1\n" + " decl %3\n" + " jnz 1b\n" + " emms\n" + :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n) + : + :"eax"); +#endif +} +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); + + + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx() { + return &_oil_function_impl_composite_in_argb_mmx; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx, composite_in_argb() { + return &_oil_function_impl_composite_in_argb_mmx, composite_in_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx2, composite_in_argb() { + return &_oil_function_impl_composite_in_argb_mmx2, composite_in_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src() { + return &_oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask() { + return &_oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_mmx, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_2, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_mmx_2, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_3, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_mmx_3, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_4, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_mmx_4, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_5, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_mmx_5, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_sse2, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_2, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_sse2_2, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_3, composite_over_argb() { + return &_oil_function_impl_composite_over_argb_sse2_3, composite_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src() { + return &_oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_add_argb_mmx, composite_add_argb() { + return &_oil_function_impl_composite_add_argb_mmx, composite_add_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src() { + return &_oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb() { + return &_oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src() { + return &_oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src; +} +#endif + +#ifdef __SYMBIAN32__ + +OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask() { + return &_oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask; +} +#endif +