First public contribution.
2 * Copyright © 2004 Red Hat, Inc.
3 * Copyright © 2004 Nicholas Miell
4 * Copyright © 2005 Trolltech AS
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Red Hat not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. Red Hat makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
17 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
21 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
22 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
25 * Author: Søren Sandmann (sandmann@redhat.com)
26 * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
27 * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
29 * Based on work by Owen Taylor
31 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
37 #include <liboil/liboil.h>
38 #include <liboil/liboilfunction.h>
40 #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
42 typedef uint32_t CARD32;
43 typedef uint16_t CARD16;
44 typedef int16_t INT16;
45 typedef uint8_t CARD8;
46 typedef uint64_t ullong;
47 typedef CARD32* PicturePtr;
48 typedef CARD32* FbBits;
57 OIL_DECLARE_CLASS (composite_in_argb);
58 OIL_DECLARE_CLASS (composite_in_argb_const_src);
59 OIL_DECLARE_CLASS (composite_in_argb_const_mask);
60 OIL_DECLARE_CLASS (composite_over_argb);
61 OIL_DECLARE_CLASS (composite_over_argb_const_src);
62 OIL_DECLARE_CLASS (composite_add_argb);
63 OIL_DECLARE_CLASS (composite_add_argb_const_src);
64 OIL_DECLARE_CLASS (composite_in_over_argb);
65 OIL_DECLARE_CLASS (composite_in_over_argb_const_src);
66 OIL_DECLARE_CLASS (composite_in_over_argb_const_mask);
67 OIL_DECLARE_CLASS (composite_over_u8);
68 OIL_DECLARE_CLASS (composite_add_u8);
71 /* --------------- MMX code patch for fbcompose.c --------------------- */
75 mmxCombineMaskU (uint32_t *dest, const uint32_t *src, const uint8_t *mask, int width)
77 const __m64 mmx_0 = _mm_setzero_si64();
78 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
80 const uint32_t *end = mask + width;
82 __m64 a = MmxTo(*mask);
83 __m64 s = MmxTo(*src);
95 #ifdef ENABLE_BROKEN_IMPLS
97 mmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
99 const __m64 mmx_0 = _mm_setzero_si64();
100 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
101 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
103 const uint32_t *end = dest + width;
118 OIL_DEFINE_IMPL_FULL(mmxCombineOverU, composite_over_argb, OIL_IMPL_FLAG_MMX);
123 mmxCombineOverReverseU (CARD32 *dest, const CARD32 *src, int width)
125 const __m64 mmx_0 = _mm_setzero_si64();
126 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
127 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
129 const CARD32 *end = dest + width;
148 mmxCombineInU (CARD32 *dest, const CARD32 *src, int width)
150 const __m64 mmx_0 = _mm_setzero_si64();
151 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
153 const CARD32 *end = dest + width;
171 mmxCombineInReverseU (CARD32 *dest, const CARD32 *src, int width)
173 const __m64 mmx_0 = _mm_setzero_si64();
174 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
176 const CARD32 *end = dest + width;
194 mmxCombineOutU (CARD32 *dest, const CARD32 *src, int width)
196 const __m64 mmx_0 = _mm_setzero_si64();
197 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
198 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
200 const CARD32 *end = dest + width;
219 mmxCombineOutReverseU (CARD32 *dest, const CARD32 *src, int width)
221 const __m64 mmx_0 = _mm_setzero_si64();
222 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
223 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
225 const CARD32 *end = dest + width;
242 mmxCombineAtopU (CARD32 *dest, const CARD32 *src, int width)
244 const __m64 mmx_0 = _mm_setzero_si64();
245 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
246 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
248 const CARD32 *end = dest + width;
255 sia = MmxNegate(sia);
257 MmxAddMul(s, da, d, sia);
266 mmxCombineAtopReverseU (CARD32 *dest, const CARD32 *src, int width)
268 const __m64 mmx_0 = _mm_setzero_si64();
269 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
270 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
282 dia = MmxNegate(dia);
283 MmxAddMul(s, dia, d, sa);
292 mmxCombineXorU (CARD32 *dest, const CARD32 *src, int width)
294 const __m64 mmx_0 = _mm_setzero_si64();
295 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
296 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
298 const CARD32 *end = dest + width;
301 __m64 s, dia, d, sia;
306 sia = MmxNegate(sia);
307 dia = MmxNegate(dia);
308 MmxAddMul(s, dia, d, sia);
318 mmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
320 const __m64 mmx_0 = _mm_setzero_si64();
322 const uint32_t *end = dest + width;
334 OIL_DEFINE_IMPL_FULL(mmxCombineAddU, composite_add_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE);
338 mmxCombineSaturateU (CARD32 *dest, const CARD32 *src, int width)
340 const __m64 mmx_0 = _mm_setzero_si64();
341 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
343 const CARD32 *end = dest + width;
350 CARD32 da = ~d >> 24;
353 __m64 msa = MmxTo(FbIntDiv(da, sa));
367 mmxCombineSrcC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
369 const __m64 mmx_0 = _mm_setzero_si64();
370 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
372 const CARD32 *end = src + width;
374 __m64 a = MmxTo(*mask);
375 __m64 s = MmxTo(*src);
386 mmxCombineOverC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
388 const __m64 mmx_0 = _mm_setzero_si64();
389 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
390 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
392 const CARD32 *end = src + width;
394 __m64 a = MmxTo(*mask);
395 __m64 s = MmxTo(*src);
396 __m64 d = MmxTo(*dest);
397 __m64 sa = MmxAlpha(s);
411 mmxCombineOverReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
413 const __m64 mmx_0 = _mm_setzero_si64();
414 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
415 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
417 const CARD32 *end = src + width;
419 __m64 a = MmxTo(*mask);
420 __m64 s = MmxTo(*src);
421 __m64 d = MmxTo(*dest);
422 __m64 da = MmxAlpha(d);
436 mmxCombineInC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
438 const __m64 mmx_0 = _mm_setzero_si64();
439 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
441 const CARD32 *end = src + width;
443 __m64 a = MmxTo(*mask);
444 __m64 s = MmxTo(*src);
445 __m64 d = MmxTo(*dest);
446 __m64 da = MmxAlpha(d);
458 mmxCombineInReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
460 const __m64 mmx_0 = _mm_setzero_si64();
461 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
463 const CARD32 *end = src + width;
465 __m64 a = MmxTo(*mask);
466 __m64 s = MmxTo(*src);
467 __m64 d = MmxTo(*dest);
468 __m64 sa = MmxAlpha(s);
480 mmxCombineOutC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
482 const __m64 mmx_0 = _mm_setzero_si64();
483 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
484 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
486 const CARD32 *end = src + width;
488 __m64 a = MmxTo(*mask);
489 __m64 s = MmxTo(*src);
490 __m64 d = MmxTo(*dest);
491 __m64 da = MmxAlpha(d);
504 mmxCombineOutReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
506 const __m64 mmx_0 = _mm_setzero_si64();
507 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
508 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
510 const CARD32 *end = src + width;
512 __m64 a = MmxTo(*mask);
513 __m64 s = MmxTo(*src);
514 __m64 d = MmxTo(*dest);
515 __m64 sa = MmxAlpha(s);
528 mmxCombineAtopC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
530 const __m64 mmx_0 = _mm_setzero_si64();
531 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
532 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
534 const CARD32 *end = src + width;
536 __m64 a = MmxTo(*mask);
537 __m64 s = MmxTo(*src);
538 __m64 d = MmxTo(*dest);
539 __m64 da = MmxAlpha(d);
540 __m64 sa = MmxAlpha(s);
544 MmxAddMul(d, a, s, da);
554 mmxCombineAtopReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
556 const __m64 mmx_0 = _mm_setzero_si64();
557 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
558 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
560 const CARD32 *end = src + width;
562 __m64 a = MmxTo(*mask);
563 __m64 s = MmxTo(*src);
564 __m64 d = MmxTo(*dest);
565 __m64 da = MmxAlpha(d);
566 __m64 sa = MmxAlpha(s)
570 MmxAddMul(d, a, s, da);
580 mmxCombineXorC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
582 const __m64 mmx_0 = _mm_setzero_si64();
583 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
584 const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
586 const CARD32 *end = src + width;
588 __m64 a = MmxTo(*mask);
589 __m64 s = MmxTo(*src);
590 __m64 d = MmxTo(*dest);
591 __m64 da = MmxAlpha(d);
592 __m64 sa = MmxAlpha(s);
597 MmxAddMul(d, a, s, da);
607 mmxCombineAddC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
609 const __m64 mmx_0 = _mm_setzero_si64();
610 const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
612 const CARD32 *end = src + width;
614 __m64 a = MmxTo(*mask);
615 __m64 s = MmxTo(*src);
616 __m64 d = MmxTo(*dest);
627 extern FbComposeFunctions composeFunctions;
629 void fbComposeSetupMMX(void)
631 /* check if we have MMX support and initialize accordingly */
633 composeFunctions.combineU[PictOpOver] = mmxCombineOverU;
634 composeFunctions.combineU[PictOpOverReverse] = mmxCombineOverReverseU;
635 composeFunctions.combineU[PictOpIn] = mmxCombineInU;
636 composeFunctions.combineU[PictOpInReverse] = mmxCombineInReverseU;
637 composeFunctions.combineU[PictOpOut] = mmxCombineOutU;
638 composeFunctions.combineU[PictOpOutReverse] = mmxCombineOutReverseU;
639 composeFunctions.combineU[PictOpAtop] = mmxCombineAtopU;
640 composeFunctions.combineU[PictOpAtopReverse] = mmxCombineAtopReverseU;
641 composeFunctions.combineU[PictOpXor] = mmxCombineXorU;
642 composeFunctions.combineU[PictOpAdd] = mmxCombineAddU;
643 composeFunctions.combineU[PictOpSaturate] = mmxCombineSaturateU;
645 composeFunctions.combineC[PictOpSrc] = mmxCombineSrcC;
646 composeFunctions.combineC[PictOpOver] = mmxCombineOverC;
647 composeFunctions.combineC[PictOpOverReverse] = mmxCombineOverReverseC;
648 composeFunctions.combineC[PictOpIn] = mmxCombineInC;
649 composeFunctions.combineC[PictOpInReverse] = mmxCombineInReverseC;
650 composeFunctions.combineC[PictOpOut] = mmxCombineOutC;
651 composeFunctions.combineC[PictOpOutReverse] = mmxCombineOutReverseC;
652 composeFunctions.combineC[PictOpAtop] = mmxCombineAtopC;
653 composeFunctions.combineC[PictOpAtopReverse] = mmxCombineAtopReverseC;
654 composeFunctions.combineC[PictOpXor] = mmxCombineXorC;
655 composeFunctions.combineC[PictOpAdd] = mmxCombineAddC;
657 composeFunctions.combineMaskU = mmxCombineMaskU;
663 /* ------------------ MMX code paths called from fbpict.c ----------------------- */
675 m64_ull mmx_565_unpack_multiplier;
683 m64_ull mmx_full_alpha;
684 m64_ull mmx_ffff0000ffff0000;
685 m64_ull mmx_0000ffff00000000;
686 m64_ull mmx_000000000000ffff;
689 static const MMXData c =
691 .mmx_4x00ff.ull = 0x00ff00ff00ff00ffULL,
692 .mmx_4x0080.ull = 0x0080008000800080ULL,
693 .mmx_565_rgb.ull = 0x000001f0003f001fULL,
694 .mmx_565_r.ull = 0x000000f800000000ULL,
695 .mmx_565_g.ull = 0x0000000000fc0000ULL,
696 .mmx_565_b.ull = 0x00000000000000f8ULL,
697 .mmx_mask_0.ull = 0xffffffffffff0000ULL,
698 .mmx_mask_1.ull = 0xffffffff0000ffffULL,
699 .mmx_mask_2.ull = 0xffff0000ffffffffULL,
700 .mmx_mask_3.ull = 0x0000ffffffffffffULL,
701 .mmx_full_alpha.ull = 0x00ff000000000000ULL,
702 .mmx_565_unpack_multiplier.ull = 0x0000008404100840ULL,
703 .mmx_ffff0000ffff0000.ull = 0xffff0000ffff0000ULL,
704 .mmx_0000ffff00000000.ull = 0x0000ffff00000000ULL,
705 .mmx_000000000000ffff.ull = 0x000000000000ffffULL,
708 #define MC(x) ((__m64) c.mmx_##x.m64)
710 static __inline__ __m64
711 shift (__m64 v, int s)
714 return _mm_slli_si64 (v, s);
716 return _mm_srli_si64 (v, -s);
721 static __inline__ __m64
724 return _mm_xor_si64 (mask, MC(4x00ff));
727 static __inline__ __m64
728 pix_multiply (__m64 a, __m64 b)
732 res = _mm_mullo_pi16 (a, b);
733 res = _mm_adds_pu16 (res, MC(4x0080));
734 res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
735 res = _mm_srli_pi16 (res, 8);
740 static __inline__ __m64
741 expand_alpha (__m64 pixel)
743 return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 3, 3, 3));
746 static __inline__ __m64
747 expand_alpha_rev (__m64 pixel)
749 return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(0, 0, 0, 0));
752 static __inline__ __m64
753 invert_colors (__m64 pixel)
755 return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 0, 1, 2));
758 /* Notes about writing mmx code
760 * give memory operands as the second operand. If you give it as the
761 * first, gcc will first load it into a register, then use that
766 * _mm_mullo_pi16 (x, mmx_constant);
770 * _mm_mullo_pi16 (mmx_constant, x);
772 * Also try to minimize dependencies. i.e. when you need a value, try
773 * to calculate it from a value that was calculated as early as
777 static __inline__ __m64
778 over (__m64 src, __m64 srca, __m64 dest)
780 return _mm_adds_pu8 (src, pix_multiply(dest, negate(srca)));
783 static __inline__ __m64
784 over_rev_non_pre (__m64 src, __m64 dest)
786 __m64 srca = expand_alpha (src);
787 __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha));
789 return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
792 static __inline__ __m64
796 return pix_multiply (src, mask);
799 static __inline__ __m64
805 return over(in(src, mask), pix_multiply(srca, mask), dest);
808 static __inline__ __m64
811 return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64());
814 static __inline__ __m64
815 pack8888 (__m64 lo, __m64 hi)
818 r = _mm_packs_pu16 (lo, hi);
822 static __inline__ CARD32
825 return _mm_cvtsi64_si32(pack8888(v, _mm_setzero_si64()));
828 /* Expand 16 bits positioned at @pos (0-3) of a mmx register into
832 * --- Expanding 565 in the low word ---
834 * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
835 * m = m & (01f0003f001f);
836 * m = m * (008404100840);
839 * Note the trick here - the top word is shifted by another nibble to
840 * avoid it bumping into the middle word
842 static __inline__ __m64
843 expand565 (__m64 pixel, int pos)
848 /* move pixel to low 16 bit and zero the rest */
849 p = shift (shift (p, (3 - pos) * 16), -48);
851 t1 = shift (p, 36 - 11);
852 t2 = shift (p, 16 - 5);
854 p = _mm_or_si64 (t1, p);
855 p = _mm_or_si64 (t2, p);
856 p = _mm_and_si64 (p, MC(565_rgb));
858 pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier));
859 return _mm_srli_pi16 (pixel, 8);
862 static __inline__ __m64
863 expand8888 (__m64 in, int pos)
866 return _mm_unpacklo_pi8 (in, _mm_setzero_si64());
868 return _mm_unpackhi_pi8 (in, _mm_setzero_si64());
871 static __inline__ __m64
872 pack565 (__m64 pixel, __m64 target, int pos)
878 r = _mm_and_si64 (p, MC(565_r));
879 g = _mm_and_si64 (p, MC(565_g));
880 b = _mm_and_si64 (p, MC(565_b));
882 r = shift (r, - (32 - 8) + pos * 16);
883 g = shift (g, - (16 - 3) + pos * 16);
884 b = shift (b, - (0 + 3) + pos * 16);
887 t = _mm_and_si64 (t, MC(mask_0));
889 t = _mm_and_si64 (t, MC(mask_1));
891 t = _mm_and_si64 (t, MC(mask_2));
893 t = _mm_and_si64 (t, MC(mask_3));
895 p = _mm_or_si64 (r, t);
896 p = _mm_or_si64 (g, p);
898 return _mm_or_si64 (b, p);
901 #ifdef ENABLE_BROKEN_IMPLS
902 /* broken. See Debian bug #340932 */
904 fbCompositeSolid_nx8888mmx (uint32_t *dst, uint32_t *src, int w)
908 vsrc = load8888 (*src);
909 vsrca = expand_alpha (vsrc);
911 while (w && (unsigned long)dst & 7)
913 *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
924 vdest = *(__m64 *)dst;
926 dest0 = over(vsrc, vsrca, expand8888(vdest, 0));
927 dest1 = over(vsrc, vsrca, expand8888(vdest, 1));
929 *(__m64 *)dst = pack8888(dest0, dest1);
937 *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
945 OIL_DEFINE_IMPL_FULL(fbCompositeSolid_nx8888mmx, composite_over_argb_const_src,
946 OIL_IMPL_FLAG_MMX| OIL_IMPL_FLAG_MMXEXT);
951 fbCompositeSolid_nx0565mmx (CARD8 op,
965 CARD16 *dstLine, *dst;
972 fbComposeGetSolid(pSrc, src, pDst->format);
977 fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
979 vsrc = load8888 (src);
980 vsrca = expand_alpha (vsrc);
985 dstLine += dstStride;
990 while (w && (unsigned long)dst & 7)
993 __m64 vdest = expand565 ((__m64)d, 0);
994 vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
995 *dst = (ullong)vdest;
1005 vdest = *(__m64 *)dst;
1007 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0);
1008 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1);
1009 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2);
1010 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3);
1012 *(__m64 *)dst = vdest;
1023 __m64 vdest = expand565 ((__m64)d, 0);
1024 vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
1025 *dst = (ullong)vdest;
1038 fbCompositeSolidMask_nx8888x8888Cmmx (uint32_t *dst, uint32_t *src, uint8_t *mask, int w)
1043 FbStride dstStride, maskStride;
1047 while (twidth && (unsigned long)q & 7)
1049 CARD32 m = *(CARD32 *)p;
1053 __m64 vdest = load8888(*q);
1054 vdest = in_over(vsrc, vsrca, load8888(m), vdest);
1055 *q = (ullong)pack8888(vdest, _mm_setzero_si64());
1072 __m64 vdest = *(__m64 *)q;
1074 dest0 = in_over(vsrc, vsrca, load8888(m0),
1075 expand8888 (vdest, 0));
1076 dest1 = in_over(vsrc, vsrca, load8888(m1),
1077 expand8888 (vdest, 1));
1079 *(__m64 *)q = pack8888(dest0, dest1);
1089 CARD32 m = *(CARD32 *)p;
1093 __m64 vdest = load8888(*q);
1094 vdest = in_over(vsrc, vsrca, load8888(m), vdest);
1095 *q = (ullong)pack8888(vdest, _mm_setzero_si64());
1109 fbCompositeSrc_8888x8x8888mmx (uint32_t *dest, uint32_t *src, uint8_t *mask,
1113 mask = *maskLine << 24 | *maskLine << 16 | *maskLine << 8 | *maskLine;
1114 vmask = load8888 (mask);
1120 dstLine += dstStride;
1122 srcLine += srcStride;
1125 while (w && (unsigned long)dst & 7)
1127 __m64 s = load8888 (*src);
1128 __m64 d = load8888 (*dst);
1130 *dst = (ullong)pack8888 (in_over (s, srca, vmask, d), (__m64)_mm_setzero_si64());
1139 __m64 vd0 = *(__m64 *)(dst + 0);
1140 __m64 vd1 = *(__m64 *)(dst + 2);
1141 __m64 vd2 = *(__m64 *)(dst + 4);
1142 __m64 vd3 = *(__m64 *)(dst + 6);
1143 __m64 vd4 = *(__m64 *)(dst + 8);
1144 __m64 vd5 = *(__m64 *)(dst + 10);
1145 __m64 vd6 = *(__m64 *)(dst + 12);
1146 __m64 vd7 = *(__m64 *)(dst + 14);
1148 __m64 vs0 = *(__m64 *)(src + 0);
1149 __m64 vs1 = *(__m64 *)(src + 2);
1150 __m64 vs2 = *(__m64 *)(src + 4);
1151 __m64 vs3 = *(__m64 *)(src + 6);
1152 __m64 vs4 = *(__m64 *)(src + 8);
1153 __m64 vs5 = *(__m64 *)(src + 10);
1154 __m64 vs6 = *(__m64 *)(src + 12);
1155 __m64 vs7 = *(__m64 *)(src + 14);
1157 vd0 = (__m64)pack8888 (
1158 in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
1159 in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
1161 vd1 = (__m64)pack8888 (
1162 in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
1163 in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
1165 vd2 = (__m64)pack8888 (
1166 in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
1167 in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
1169 vd3 = (__m64)pack8888 (
1170 in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
1171 in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
1173 vd4 = (__m64)pack8888 (
1174 in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
1175 in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
1177 vd5 = (__m64)pack8888 (
1178 in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
1179 in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
1181 vd6 = (__m64)pack8888 (
1182 in_over (expand8888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
1183 in_over (expand8888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
1185 vd7 = (__m64)pack8888 (
1186 in_over (expand8888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
1187 in_over (expand8888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
1189 *(__m64 *)(dst + 0) = vd0;
1190 *(__m64 *)(dst + 2) = vd1;
1191 *(__m64 *)(dst + 4) = vd2;
1192 *(__m64 *)(dst + 6) = vd3;
1193 *(__m64 *)(dst + 8) = vd4;
1194 *(__m64 *)(dst + 10) = vd5;
1195 *(__m64 *)(dst + 12) = vd6;
1196 *(__m64 *)(dst + 14) = vd7;
1205 __m64 s = load8888 (*src);
1206 __m64 d = load8888 (*dst);
1208 *dst = (ullong)pack8888 (in_over (s, srca, vmask, d), (__m64)_mm_setzero_si64());
1220 fbCompositeSrc_8888x8888mmx (CARD8 op,
1233 CARD32 *dstLine, *dst;
1234 CARD32 *srcLine, *src;
1235 FbStride dstStride, srcStride;
1241 fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
1242 fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
1249 dstLine += dstStride;
1251 srcLine += srcStride;
1254 while (w && (unsigned long)dst & 7)
1256 __m64 s = load8888 (*src);
1257 __m64 d = load8888 (*dst);
1259 *dst = (ullong)pack8888 (over (s, expand_alpha (s), d), (__m64)_mm_setzero_si64());
1268 __m64 vd = *(__m64 *)(dst + 0);
1269 __m64 vs = *(__m64 *)(src + 0);
1270 __m64 vs0 = expand8888 (vs, 0);
1271 __m64 vs1 = expand8888 (vs, 1);
1273 *(__m64 *)dst = (__m64)pack8888 (
1274 over (vs0, expand_alpha (vs0), expand8888 (vd, 0)),
1275 over (vs1, expand_alpha (vs1), expand8888 (vd, 1)));
1284 __m64 s = load8888 (*src);
1285 __m64 d = load8888 (*dst);
1287 *dst = (ullong)pack8888 (over (s, expand_alpha (s), d),
1288 (__m64)_mm_setzero_si64());
1300 fbCompositeSolidMask_nx8x8888mmx (CARD8 op,
1314 CARD32 *dstLine, *dst;
1315 CARD8 *maskLine, *mask;
1316 FbStride dstStride, maskStride;
1323 fbComposeGetSolid(pSrc, src, pDst->format);
1329 srcsrc = (unsigned long long)src << 32 | src;
1331 fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
1332 fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
1334 vsrc = load8888 (src);
1335 vsrca = expand_alpha (vsrc);
1340 dstLine += dstStride;
1342 maskLine += maskStride;
1347 while (w && (unsigned long)dst & 7)
1353 __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), load8888(*dst));
1354 *dst = (ullong)pack8888(vdest, _mm_setzero_si64());
1370 if (srca == 0xff && (m0 & m1) == 0xff)
1372 *(unsigned long long *)dst = srcsrc;
1379 vdest = *(__m64 *)dst;
1381 dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0), expand8888(vdest, 0));
1382 dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1), expand8888(vdest, 1));
1384 *(__m64 *)dst = pack8888(dest0, dest1);
1400 __m64 vdest = load8888(*dst);
1401 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest);
1402 *dst = (ullong)pack8888(vdest, _mm_setzero_si64());
1416 fbCompositeSolidMask_nx8x0565mmx (CARD8 op,
1430 CARD16 *dstLine, *dst;
1431 CARD8 *maskLine, *mask;
1432 FbStride dstStride, maskStride;
1435 unsigned long long srcsrcsrcsrc, src16;
1439 fbComposeGetSolid(pSrc, src, pDst->format);
1445 fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
1446 fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
1448 vsrc = load8888 (src);
1449 vsrca = expand_alpha (vsrc);
1451 src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0);
1453 srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 |
1454 (ullong)src16 << 16 | (ullong)src16;
1459 dstLine += dstStride;
1461 maskLine += maskStride;
1466 while (w && (unsigned long)dst & 7)
1473 __m64 vd = (__m64)d;
1474 __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0));
1475 *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
1487 ullong m0, m1, m2, m3;
1493 if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
1495 *(unsigned long long *)dst = srcsrcsrcsrc;
1497 else if (m0 | m1 | m2 | m3)
1500 __m64 vm0, vm1, vm2, vm3;
1502 vdest = *(__m64 *)dst;
1505 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0);
1507 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1);
1509 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2);
1511 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3);
1513 *(__m64 *)dst = vdest;
1530 __m64 vd = (__m64)d;
1531 __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0));
1532 *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
1545 fbCompositeSrc_8888RevNPx0565mmx (CARD8 op,
1558 CARD16 *dstLine, *dst;
1559 CARD32 *srcLine, *src;
1560 FbStride dstStride, srcStride;
1565 fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
1566 fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
1568 assert (pSrc->pDrawable == pMask->pDrawable);
1573 dstLine += dstStride;
1575 srcLine += srcStride;
1580 while (w && (unsigned long)dst & 7)
1582 __m64 vsrc = load8888 (*src);
1584 __m64 vdest = expand565 ((__m64)d, 0);
1586 vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
1588 *dst = (ullong)vdest;
1599 CARD32 s0, s1, s2, s3;
1600 unsigned char a0, a1, a2, a3;
1612 if ((a0 & a1 & a2 & a3) == 0xFF)
1615 vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0);
1616 vdest = pack565(invert_colors(load8888(s1)), vdest, 1);
1617 vdest = pack565(invert_colors(load8888(s2)), vdest, 2);
1618 vdest = pack565(invert_colors(load8888(s3)), vdest, 3);
1620 *(__m64 *)dst = vdest;
1622 else if (a0 | a1 | a2 | a3)
1624 __m64 vdest = *(__m64 *)dst;
1626 vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0);
1627 vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1);
1628 vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2);
1629 vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3);
1631 *(__m64 *)dst = vdest;
1643 __m64 vsrc = load8888 (*src);
1645 __m64 vdest = expand565 ((__m64)d, 0);
1647 vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
1649 *dst = (ullong)vdest;
1660 /* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */
1663 fbCompositeSrc_8888RevNPx8888mmx (CARD8 op,
1676 CARD32 *dstLine, *dst;
1677 CARD32 *srcLine, *src;
1678 FbStride dstStride, srcStride;
1683 fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
1684 fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
1686 assert (pSrc->pDrawable == pMask->pDrawable);
1691 dstLine += dstStride;
1693 srcLine += srcStride;
1696 while (w && (unsigned long)dst & 7)
1698 __m64 s = load8888 (*src);
1699 __m64 d = load8888 (*dst);
1701 *dst = (ullong)pack8888 (over_rev_non_pre (s, d), _mm_setzero_si64());
1711 unsigned char a0, a1;
1720 if ((a0 & a1) == 0xFF)
1722 d0 = invert_colors(load8888(s0));
1723 d1 = invert_colors(load8888(s1));
1725 *(__m64 *)dst = pack8888 (d0, d1);
1729 __m64 vdest = *(__m64 *)dst;
1731 d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0));
1732 d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1));
1734 *(__m64 *)dst = pack8888 (d0, d1);
1744 __m64 s = load8888 (*src);
1745 __m64 d = load8888 (*dst);
1747 *dst = (ullong)pack8888 (over_rev_non_pre (s, d), _mm_setzero_si64());
1759 fbCompositeSolidMask_nx8888x0565Cmmx (CARD8 op,
1775 FbStride dstStride, maskStride;
1780 fbComposeGetSolid(pSrc, src, pDst->format);
1786 fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
1787 fbComposeGetStart (pMask, xMask, yMask, CARD32, maskStride, maskLine, 1);
1789 vsrc = load8888 (src);
1790 vsrca = expand_alpha (vsrc);
1795 CARD32 *p = (CARD32 *)maskLine;
1796 CARD16 *q = (CARD16 *)dstLine;
1798 while (twidth && ((unsigned long)q & 7))
1800 CARD32 m = *(CARD32 *)p;
1805 __m64 vdest = expand565 ((__m64)d, 0);
1806 vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
1817 CARD32 m0, m1, m2, m3;
1824 if ((m0 | m1 | m2 | m3))
1826 __m64 vdest = *(__m64 *)q;
1828 vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0);
1829 vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1);
1830 vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2);
1831 vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3);
1833 *(__m64 *)q = vdest;
1848 __m64 vdest = expand565((__m64)d, 0);
1849 vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0);
1858 maskLine += maskStride;
1859 dstLine += dstStride;
1867 fbCompositeSrcAdd_8000x8000mmx (uint8_t *dst, uint8_t *src, int w)
1873 while (w && (unsigned long)dst & 7)
1878 s = t | (0 - (t >> 8));
1888 *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
1899 s = t | (0 - (t >> 8));
1909 OIL_DEFINE_IMPL_FULL (fbCompositeSrcAdd_8000x8000mmx, composite_add_u8, OIL_IMPL_FLAG_MMX);
1912 fbCompositeSrcAdd_8888x8888mmx (uint32_t *dst, uint32_t *src, int w)
1914 while (w && (unsigned long)dst & 7)
1916 *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
1917 _mm_cvtsi32_si64(*dst)));
1925 *(__m64 *)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
1933 *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
1934 _mm_cvtsi32_si64(*dst)));
1940 OIL_DEFINE_IMPL_FULL (fbCompositeSrcAdd_8888x8888mmx, composite_add_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE);
1943 #define GetStart(drw,x,y,type,stride,line,bpp) {\
1945 FbStride __stride__; \
1946 int __xoff__,__yoff__; \
1948 fbGetDrawable((drw),__bits__,__stride__,bpp,__xoff__,__yoff__); \
1949 (stride) = __stride__ * sizeof (FbBits) / sizeof (type); \
1950 (line) = ((type *) __bits__) + (stride) * ((y) - __yoff__) + ((x) - __xoff__); \
1954 fbSolidFillmmx (DrawablePtr pDraw,
1972 fbGetDrawable(pDraw, bits, stride, bpp, xoff, yoff);
1974 if (bpp == 16 && (xor >> 16 != (xor & 0xffff)))
1977 if (bpp != 16 && bpp != 32)
1982 stride = stride * sizeof (FbBits) / 2;
1983 byte_line = (CARD8 *)(((CARD16 *)bits) + stride * (y - yoff) + (x - xoff));
1984 byte_width = 2 * width;
1989 stride = stride * sizeof (FbBits) / 4;
1990 byte_line = (CARD8 *)(((CARD32 *)bits) + stride * (y - yoff) + (x - xoff));
1991 byte_width = 4 * width;
1995 fill = ((ullong)xor << 32) | xor;
1996 vfill = (__m64)fill;
2001 CARD8 *d = byte_line;
2002 byte_line += stride;
2005 while (w >= 2 && ((unsigned long)d & 3))
2012 while (w >= 4 && ((unsigned long)d & 7))
2022 *(__m64*) (d + 0) = vfill;
2023 *(__m64*) (d + 8) = vfill;
2024 *(__m64*) (d + 16) = vfill;
2025 *(__m64*) (d + 24) = vfill;
2026 *(__m64*) (d + 32) = vfill;
2027 *(__m64*) (d + 40) = vfill;
2028 *(__m64*) (d + 48) = vfill;
2029 *(__m64*) (d + 56) = vfill;
2054 fbCopyAreammx (DrawablePtr pSrc,
2064 FbStride src_stride;
2070 FbStride dst_stride;
2079 fbGetDrawable(pSrc, src_bits, src_stride, src_bpp, src_xoff, src_yoff);
2080 fbGetDrawable(pDst, dst_bits, dst_stride, dst_bpp, dst_xoff, dst_yoff);
2082 if (src_bpp != 16 && src_bpp != 32)
2085 if (dst_bpp != 16 && dst_bpp != 32)
2088 if (src_bpp != dst_bpp)
2095 src_stride = src_stride * sizeof (FbBits) / 2;
2096 dst_stride = dst_stride * sizeof (FbBits) / 2;
2097 src_bytes = (CARD8 *)(((CARD16 *)src_bits) + src_stride * (src_y - src_yoff) + (src_x - src_xoff));
2098 dst_bytes = (CARD8 *)(((CARD16 *)dst_bits) + dst_stride * (dst_y - dst_yoff) + (dst_x - dst_xoff));
2099 byte_width = 2 * width;
2105 src_stride = src_stride * sizeof (FbBits) / 4;
2106 dst_stride = dst_stride * sizeof (FbBits) / 4;
2107 src_bytes = (CARD8 *)(((CARD32 *)src_bits) + src_stride * (src_y - src_yoff) + (src_x - src_xoff));
2108 dst_bytes = (CARD8 *)(((CARD32 *)dst_bits) + dst_stride * (dst_y - dst_yoff) + (dst_x - dst_xoff));
2109 byte_width = 4 * width;
2117 CARD8 *s = src_bytes;
2118 CARD8 *d = dst_bytes;
2119 src_bytes += src_stride;
2120 dst_bytes += dst_stride;
2123 while (w >= 2 && ((unsigned long)d & 3))
2125 *(CARD16 *)d = *(CARD16 *)s;
2131 while (w >= 4 && ((unsigned long)d & 7))
2133 *(CARD32 *)d = *(CARD32 *)s;
2142 *(__m64 *)(d + 0) = *(__m64 *)(s + 0);
2143 *(__m64 *)(d + 8) = *(__m64 *)(s + 8);
2144 *(__m64 *)(d + 16) = *(__m64 *)(s + 16);
2145 *(__m64 *)(d + 24) = *(__m64 *)(s + 24);
2146 *(__m64 *)(d + 32) = *(__m64 *)(s + 32);
2147 *(__m64 *)(d + 40) = *(__m64 *)(s + 40);
2148 *(__m64 *)(d + 48) = *(__m64 *)(s + 48);
2149 *(__m64 *)(d + 56) = *(__m64 *)(s + 56);
2156 *(CARD32 *)d = *(CARD32 *)s;
2164 *(CARD16 *)d = *(CARD16 *)s;
2176 fbCompositeCopyAreammx (CARD8 op,
2189 fbCopyAreammx (pSrc->pDrawable,
2196 #if !defined(__amd64__) && !defined(__x86_64__)
2201 MMX_Extensions = 0x2,
2207 static unsigned int detectCPUFeatures(void) {
2208 unsigned int result;
2212 /* see p. 118 of amd64 instruction set manual Vol3 */
2213 __asm__ ("push %%ebx\n"
2216 "mov %%eax, %%ebx\n"
2217 "xor $0x00200000, %%eax\n"
2223 "xor %%ebx, %%eax\n"
2226 "mov $0x00000000, %%eax\n"
2231 "mov $0x00000001, %%eax\n"
2241 : "%eax", "%ecx", "%edx"
2244 unsigned int features = 0;
2246 /* result now contains the standard feature bits */
2247 if (result & (1 << 15))
2249 if (result & (1 << 23))
2251 if (result & (1 << 25))
2253 if (result & (1 << 26))
2255 if ((result & MMX) && !(result & SSE) && (strcmp(vendor, "AuthenticAMD") == 0)) {
2256 /* check for AMD MMX extensions */
2258 unsigned int result;
2259 __asm__("push %%ebx\n"
2260 "mov $0x80000000, %%eax\n"
2262 "xor %%edx, %%edx\n"
2265 "mov $0x80000001, %%eax\n"
2272 : "%eax", "%ecx", "%edx"
2274 if (result & (1<<22))
2275 features |= MMX_Extensions;
2284 static Bool initialized = FALSE;
2285 static Bool mmx_present;
2289 unsigned int features = detectCPUFeatures();
2290 mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions);
2296 #endif /* __amd64__ */
2302 #ifdef __SYMBIAN32__
2304 OilFunctionImpl* __oil_function_impl_mmxCombineOverU, composite_over_argb() {
2305 return &_oil_function_impl_mmxCombineOverU, composite_over_argb;
2309 #ifdef __SYMBIAN32__
2311 OilFunctionImpl* __oil_function_impl_mmxCombineAddU, composite_add_argb() {
2312 return &_oil_function_impl_mmxCombineAddU, composite_add_argb;
2316 #ifdef __SYMBIAN32__
2318 OilFunctionImpl* __oil_function_impl_fbCompositeSolid_nx8888mmx, composite_over_argb_const_src() {
2319 return &_oil_function_impl_fbCompositeSolid_nx8888mmx, composite_over_argb_const_src;
2323 #ifdef __SYMBIAN32__
2325 OilFunctionImpl* __oil_function_impl_fbCompositeSrcAdd_8000x8000mmx, composite_add_u8() {
2326 return &_oil_function_impl_fbCompositeSrcAdd_8000x8000mmx, composite_add_u8;
2330 #ifdef __SYMBIAN32__
2332 OilFunctionImpl* __oil_function_impl_fbCompositeSrcAdd_8888x8888mmx, composite_add_argb() {
2333 return &_oil_function_impl_fbCompositeSrcAdd_8888x8888mmx, composite_add_argb;