Update contrib.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboil.h>
34 #include <liboil/liboilfunction.h>
36 OIL_DECLARE_CLASS (composite_in_argb);
37 OIL_DECLARE_CLASS (composite_in_argb_const_src);
38 OIL_DECLARE_CLASS (composite_in_argb_const_mask);
39 OIL_DECLARE_CLASS (composite_over_argb);
40 OIL_DECLARE_CLASS (composite_over_argb_const_src);
41 OIL_DECLARE_CLASS (composite_add_argb);
42 OIL_DECLARE_CLASS (composite_add_argb_const_src);
43 OIL_DECLARE_CLASS (composite_in_over_argb);
44 OIL_DECLARE_CLASS (composite_in_over_argb_const_src);
45 OIL_DECLARE_CLASS (composite_in_over_argb_const_mask);
49 composite_in_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
55 COMPOSITE_IN(ARGB_A(src[i]), mask[i]),
56 COMPOSITE_IN(ARGB_R(src[i]), mask[i]),
57 COMPOSITE_IN(ARGB_G(src[i]), mask[i]),
58 COMPOSITE_IN(ARGB_B(src[i]), mask[i]));
61 OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb);
65 * This macro loads the constants:
66 * mm7 = { 0, 0, 0, 0 }
67 * mm6 = { 128, 128, 128, 128 }
68 * mm5 = { 255, 255, 255, 255 }
70 #define MMX_LOAD_CONSTANTS \
71 " pxor %%mm7, %%mm7\n" \
72 " movl $0x80808080, %%eax\n" \
73 " movd %%eax, %%mm6\n" \
74 " punpcklbw %%mm7, %%mm6\n" \
75 " movl $0xffffffff, %%eax\n" \
76 " movd %%eax, %%mm5\n" \
77 " punpcklbw %%mm7, %%mm5\n"
82 * destroys both registers
83 * requires mm6 set up as above
85 #define MMX_MULDIV255(a,b) \
86 " pmullw %%" #b ", %%" #a "\n" \
87 " paddw %%mm6, %%" #a "\n" \
88 " movq %%" #a ", %%" #b "\n" \
89 " psrlw $8, %%" #b "\n" \
90 " paddw %%" #b ", %%" #a "\n" \
91 " psrlw $8, %%" #a "\n"
94 composite_in_argb_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
96 #if !defined(__WINSCW__) && !defined(__WINS__)
97 __asm__ __volatile__ (
100 " movd (%2), %%mm0\n"
101 " punpcklbw %%mm7, %%mm0\n"
102 " pshufw $0x00, %%mm0, %%mm1\n"
104 " movd (%1), %%mm2\n"
105 " punpcklbw %%mm7, %%mm2\n"
107 MMX_MULDIV255(mm2, mm1)
109 " packuswb %%mm2, %%mm2\n"
110 " movd %%mm2, (%0)\n"
117 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
122 OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
125 * This is a different style than the others. Should be moved elsewhere.
128 composite_in_argb_mmx2 (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
130 #if !defined(__WINSCW__) && !defined(__WINS__)
131 __asm__ __volatile__ (
134 " movl (%2), %%eax\n"
135 /* if alpha == 0, write a 0 */
136 " testl $0x000000ff, %%eax\n"
138 /* if alpha == 0xff, write src value */
142 " movd %%eax, %%mm0\n"
143 " punpcklbw %%mm7, %%mm0\n"
144 " pshufw $0x00, %%mm0, %%mm1\n"
146 " movd (%1), %%mm2\n"
147 " punpcklbw %%mm7, %%mm2\n"
149 MMX_MULDIV255(mm2, mm1)
151 " packuswb %%mm2, %%mm2\n"
152 " movd %%mm2, (%0)\n"
158 " movl (%1), %%eax\n"
159 " movl %%eax, (%0)\n"
167 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
172 OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx2, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
175 composite_in_argb_const_src_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
177 #if !defined(__WINSCW__) && !defined(__WINS__)
178 __asm__ __volatile__ (
180 " movd (%1), %%mm3\n"
181 " punpcklbw %%mm7, %%mm3\n"
183 " movd (%2), %%mm0\n"
184 " punpcklbw %%mm7, %%mm0\n"
185 " pshufw $0x00, %%mm0, %%mm1\n"
187 " movq %%mm3, %%mm2\n"
189 MMX_MULDIV255(mm2, mm1)
191 " packuswb %%mm2, %%mm2\n"
192 " movd %%mm2, (%0)\n"
198 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
203 OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_mmx, composite_in_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
206 composite_in_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
208 #if !defined(__WINSCW__) && !defined(__WINS__)
209 __asm__ __volatile__ (
211 " movd (%2), %%mm0\n"
212 " punpcklbw %%mm7, %%mm0\n"
213 " pshufw $0x00, %%mm0, %%mm3\n"
215 " movq %%mm3, %%mm1\n"
216 " movd (%1), %%mm2\n"
217 " punpcklbw %%mm7, %%mm2\n"
219 MMX_MULDIV255(mm2, mm1)
221 " packuswb %%mm2, %%mm2\n"
222 " movd %%mm2, (%0)\n"
228 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
233 OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_mmx, composite_in_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
236 composite_over_argb_mmx (uint32_t *dest, uint32_t *src, int n)
238 #if !defined(__WINSCW__) && !defined(__WINS__)
239 __asm__ __volatile__ (
242 " movl (%1), %%eax\n"
243 " testl $0xff000000, %%eax\n"
246 " movd %%eax, %%mm0\n"
247 " punpcklbw %%mm7, %%mm0\n"
248 " pshufw $0xff, %%mm0, %%mm1\n"
249 " pxor %%mm5, %%mm1\n"
251 " movd (%0), %%mm2\n"
252 " punpcklbw %%mm7, %%mm2\n"
254 MMX_MULDIV255(mm2, mm1)
256 " paddw %%mm0, %%mm2\n"
257 " packuswb %%mm2, %%mm2\n"
259 " movd %%mm2, (%0)\n"
266 :"+r" (dest), "+r" (src), "+r" (n)
271 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
275 composite_over_argb_mmx_2 (uint32_t *dest, uint32_t *src, int n)
277 #if !defined(__WINSCW__) && !defined(__WINS__)
278 __asm__ __volatile__ (
284 " movl (%1), %%eax\n"
285 " testl $0xff000000, %%eax\n"
288 " movd %%eax, %%mm0\n"
289 " punpcklbw %%mm7, %%mm0\n"
290 " pshufw $0xff, %%mm0, %%mm1\n"
291 " pxor %%mm5, %%mm1\n"
293 " movd (%0), %%mm2\n"
294 " punpcklbw %%mm7, %%mm2\n"
295 " pmullw %%mm1, %%mm2\n"
296 " paddw %%mm6, %%mm2\n"
297 " movq %%mm2, %%mm1\n"
299 " paddw %%mm1, %%mm2\n"
302 " paddw %%mm0, %%mm2\n"
303 " packuswb %%mm2, %%mm2\n"
305 " movd %%mm2, (%0)\n"
315 " movl (%1), %%eax\n"
316 " orl 4(%1), %%eax\n"
317 " testl $0xff000000, %%eax\n"
320 " movd (%1), %%mm0\n"
321 " movd (%0), %%mm2\n"
323 " punpcklbw %%mm7, %%mm0\n"
324 " movd 4(%1), %%mm3\n"
326 " pshufw $0xff, %%mm0, %%mm1\n"
327 " punpcklbw %%mm7, %%mm2\n"
329 " pxor %%mm5, %%mm1\n"
330 " movd 4(%0), %%mm4\n"
332 " pmullw %%mm1, %%mm2\n"
333 " punpcklbw %%mm7, %%mm3\n"
335 " paddw %%mm6, %%mm2\n"
336 " punpcklbw %%mm7, %%mm4\n"
338 " movq %%mm2, %%mm1\n"
339 " pshufw $0xff, %%mm3, %%mm7\n"
342 " pxor %%mm5, %%mm7\n"
344 " paddw %%mm1, %%mm2\n"
345 " pmullw %%mm7, %%mm4\n"
348 " paddw %%mm6, %%mm4\n"
350 " paddw %%mm0, %%mm2\n"
351 " movq %%mm4, %%mm7\n"
353 " packuswb %%mm2, %%mm2\n"
356 " movd %%mm2, (%0)\n"
357 " paddw %%mm7, %%mm4\n"
360 " paddw %%mm3, %%mm4\n"
361 " packuswb %%mm4, %%mm4\n"
362 " movd %%mm4, 4(%0)\n"
364 " pxor %%mm7, %%mm7\n"
372 :"+r" (dest), "+r" (src), "+r" (n)
377 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_2, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
379 /* replace pshufw with punpck */
381 composite_over_argb_mmx_3 (uint32_t *dest, uint32_t *src, int n)
383 #if !defined(__WINSCW__) && !defined(__WINS__)
384 __asm__ __volatile__ (
387 " movl (%1), %%eax\n"
388 " testl $0xff000000, %%eax\n"
391 " movd %%eax, %%mm0\n"
392 " punpcklbw %%mm7, %%mm0\n"
393 " movq %%mm0, %%mm1\n"
394 " punpckhwd %%mm1, %%mm1\n"
395 " punpckhdq %%mm1, %%mm1\n"
396 " pxor %%mm5, %%mm1\n"
398 " movd (%0), %%mm2\n"
399 " punpcklbw %%mm7, %%mm2\n"
400 " pmullw %%mm1, %%mm2\n"
401 " paddw %%mm6, %%mm2\n"
402 " movq %%mm2, %%mm1\n"
404 " paddw %%mm1, %%mm2\n"
407 " paddw %%mm0, %%mm2\n"
408 " packuswb %%mm2, %%mm2\n"
409 " movd %%mm2, (%0)\n"
417 :"+r" (dest), "+r" (src), "+r" (n)
422 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_3, composite_over_argb, OIL_IMPL_FLAG_MMX);
424 /* written for gromit */
426 composite_over_argb_mmx_4 (uint32_t *dest, uint32_t *src, int n)
428 #if !defined(__WINSCW__) && !defined(__WINS__)
429 __asm__ __volatile__ (" pxor %%mm7, %%mm7\n" // mm7 = { 0, 0, 0, 0 }
430 " movl $0x80808080, %%eax\n"
431 " movd %%eax, %%mm6\n" // mm6 = { 128, 128, 128, 128 }
432 " punpcklbw %%mm7, %%mm6\n"
433 " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
434 " movd %%eax, %%mm5\n"
435 " punpcklbw %%mm7, %%mm5\n"
436 " movl $0x02020202, %%eax\n"
437 " movd %%eax, %%mm4\n"
438 " punpcklbw %%mm7, %%mm4\n"
439 " paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
441 " movl (%1), %%eax\n"
442 " testl $0xff000000, %%eax\n"
445 " movd %%eax, %%mm0\n"
446 " punpcklbw %%mm7, %%mm0\n"
447 " pshufw $0xff, %%mm0, %%mm1\n"
448 " pxor %%mm5, %%mm1\n"
450 " movd (%0), %%mm2\n"
451 " punpcklbw %%mm7, %%mm2\n"
452 " pmullw %%mm1, %%mm2\n"
453 " paddw %%mm6, %%mm2\n"
454 " pmulhuw %%mm4, %%mm2\n"
456 " paddw %%mm0, %%mm2\n"
457 " packuswb %%mm2, %%mm2\n"
459 " movd %%mm2, (%0)\n"
466 :"+r" (dest), "+r" (src), "+r" (n)
471 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_4, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
474 composite_over_argb_mmx_5 (uint32_t *dest, uint32_t *src, int n)
476 #if !defined(__WINSCW__) && !defined(__WINS__)
477 __asm__ __volatile__ (" pxor %%mm7, %%mm7\n" // mm7 = { 0, 0, 0, 0 }
478 " movl $0x80808080, %%eax\n"
479 " movd %%eax, %%mm6\n" // mm6 = { 128, 128, 128, 128 }
480 " punpcklbw %%mm7, %%mm6\n"
482 " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
483 " movd %%eax, %%mm5\n"
484 " punpcklbw %%mm7, %%mm5\n"
486 " pcmpeqw %%mm5, %%mm5\n"
487 " psrlw $8, %%mm5\n" // mm5 = { 255, 255, 255, 255 }
489 " movl $0x02020202, %%eax\n"
490 " movd %%eax, %%mm4\n"
491 " punpcklbw %%mm7, %%mm4\n"
492 " paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
494 " movd (%1), %%mm0\n"
495 " punpcklbw %%mm7, %%mm0\n"
496 " xor %%eax, %%eax\n"
497 " pextrw $3, %%mm0, %%eax\n"
498 " test %%eax, %%eax\n"
501 " pshufw $0xff, %%mm0, %%mm1\n"
502 " pxor %%mm5, %%mm1\n"
504 " movd (%0), %%mm2\n"
505 " punpcklbw %%mm7, %%mm2\n"
506 " pmullw %%mm1, %%mm2\n"
507 " paddw %%mm6, %%mm2\n"
508 " pmulhuw %%mm4, %%mm2\n"
510 " paddw %%mm0, %%mm2\n"
511 " packuswb %%mm2, %%mm2\n"
513 " movd %%mm2, (%0)\n"
520 :"+r" (dest), "+r" (src), "+r" (n)
525 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_5, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
528 composite_over_argb_sse2 (uint32_t *dest, uint32_t *src, int n)
530 #if !defined(__WINSCW__) && !defined(__WINS__)
531 __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 }
532 " movl $0x80808080, %%eax\n"
533 " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 }
534 " punpcklbw %%xmm7, %%xmm6\n"
535 " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
536 " movd %%eax, %%xmm5\n"
537 " punpcklbw %%xmm7, %%xmm5\n"
538 " movl $0x02020202, %%eax\n"
539 " movd %%eax, %%xmm4\n"
540 " punpcklbw %%xmm7, %%xmm4\n"
541 " paddw %%xmm5, %%xmm4\n" // mm4 = { 255, 255, 255, 255 }
543 " movl (%1), %%eax\n"
544 " testl $0xff000000, %%eax\n"
547 " movd (%1), %%xmm1\n"
548 " punpcklbw %%xmm7, %%xmm1\n"
549 " pshuflw $0xff, %%xmm1, %%xmm0\n"
550 " pxor %%xmm5, %%xmm0\n"
552 " movd (%0), %%xmm3\n"
553 " punpcklbw %%xmm7, %%xmm3\n"
554 " pmullw %%xmm0, %%xmm3\n"
555 " paddw %%xmm6, %%xmm3\n"
556 " pmulhuw %%xmm4, %%xmm3\n"
558 " paddw %%xmm1, %%xmm3\n"
559 " packuswb %%xmm3, %%xmm3\n"
560 " movd %%xmm3, (%0)\n"
566 :"+r" (dest), "+r" (src), "+r" (n)
571 OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
573 /* written for shaun */
575 composite_over_argb_sse2_2 (uint32_t *dest, uint32_t *src, int n)
577 #if !defined(__WINSCW__) && !defined(__WINS__)
578 __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 }
579 " movl $0x80808080, %%eax\n"
580 " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 }
581 " punpcklbw %%xmm7, %%xmm6\n"
582 " punpcklwd %%xmm6, %%xmm6\n"
583 " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
584 " movd %%eax, %%xmm5\n"
585 " punpcklbw %%xmm7, %%xmm5\n"
586 " punpcklwd %%xmm5, %%xmm5\n"
587 " movl $0x02020202, %%eax\n"
588 " movd %%eax, %%xmm4\n"
589 " punpcklbw %%xmm7, %%xmm4\n"
590 " paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
591 " punpcklwd %%xmm4, %%xmm4\n"
597 __asm__ __volatile__ (
598 " movl (%1), %%eax\n"
599 " testl $0xff000000, %%eax\n"
602 " movd (%1), %%xmm1\n"
603 " punpcklbw %%xmm7, %%xmm1\n"
604 " pshuflw $0xff, %%xmm1, %%xmm0\n"
605 " pxor %%xmm5, %%xmm0\n"
607 " movd (%0), %%xmm3\n"
608 " punpcklbw %%xmm7, %%xmm3\n"
609 " pmullw %%xmm0, %%xmm3\n"
610 " paddw %%xmm6, %%xmm3\n"
611 " pmulhuw %%xmm4, %%xmm3\n"
613 " paddw %%xmm1, %%xmm3\n"
614 " packuswb %%xmm3, %%xmm3\n"
615 " movd %%xmm3, (%0)\n"
620 :"+r" (dest), "+r" (src)
627 __asm__ __volatile__ ("\n"
630 " movl (%1), %%eax\n"
631 " orl 4(%1), %%eax\n"
632 " testl $0xff000000, %%eax\n"
636 " movq (%1), %%xmm1\n"
637 " punpcklbw %%xmm7, %%xmm1\n"
638 " pshuflw $0xff, %%xmm1, %%xmm0\n"
639 " pshufhw $0xff, %%xmm0, %%xmm0\n"
640 " pxor %%xmm5, %%xmm0\n"
642 " movq (%0), %%xmm3\n"
643 " punpcklbw %%xmm7, %%xmm3\n"
644 " pmullw %%xmm0, %%xmm3\n"
645 " paddw %%xmm6, %%xmm3\n"
646 " pmulhuw %%xmm4, %%xmm3\n"
647 " paddw %%xmm1, %%xmm3\n"
648 " packuswb %%xmm3, %%xmm3\n"
649 " movq %%xmm3, (%0)\n"
655 :"+r" (dest), "+r" (src), "+r" (n)
661 OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
663 /* written for shaun */
665 composite_over_argb_sse2_3 (uint32_t *dest, uint32_t *src, int n)
670 #if !defined(__WINSCW__) && !defined(__WINS__)
671 __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 }
672 " movl $0x80808080, %%eax\n"
673 " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 }
674 " punpcklbw %%xmm7, %%xmm6\n"
675 " punpcklwd %%xmm6, %%xmm6\n"
676 " movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
677 " movd %%eax, %%xmm5\n"
678 " punpcklbw %%xmm7, %%xmm5\n"
679 " punpcklwd %%xmm5, %%xmm5\n"
680 " movl $0x02020202, %%eax\n"
681 " movd %%eax, %%xmm4\n"
682 " punpcklbw %%xmm7, %%xmm4\n"
683 " paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
684 " punpcklwd %%xmm4, %%xmm4\n"
689 begin = 0x3 & (4 - (((unsigned long)dest & 0xf) >> 2));
695 middle = (n-begin)>>2;
696 end = n - begin - middle*4;
700 __asm__ __volatile__ ("\n"
702 " movl (%1), %%eax\n"
703 " testl $0xff000000, %%eax\n"
706 " movd (%1), %%xmm1\n"
707 " punpcklbw %%xmm7, %%xmm1\n"
708 " pshuflw $0xff, %%xmm1, %%xmm0\n"
709 " pxor %%xmm5, %%xmm0\n"
711 " movd (%0), %%xmm3\n"
712 " punpcklbw %%xmm7, %%xmm3\n"
713 " pmullw %%xmm0, %%xmm3\n"
714 " paddw %%xmm6, %%xmm3\n"
715 " pmulhuw %%xmm4, %%xmm3\n"
717 " paddw %%xmm1, %%xmm3\n"
718 " packuswb %%xmm3, %%xmm3\n"
719 " movd %%xmm3, (%0)\n"
726 :"+r" (dest), "+r" (src), "+r" (begin)
732 __asm__ __volatile__ ("\n"
734 " movq (%1), %%xmm1\n"
735 " movq 8(%1), %%xmm0\n"
736 " movl (%1), %%eax\n"
737 " orl 4(%1), %%eax\n"
738 " orl 8(%1), %%eax\n"
739 " orl 12(%1), %%eax\n"
740 " test $0xff000000, %%eax\n"
742 " punpcklbw %%xmm7, %%xmm1\n"
743 " punpcklbw %%xmm7, %%xmm0\n"
744 " pshuflw $0xff, %%xmm1, %%xmm1\n"
745 " pshuflw $0xff, %%xmm0, %%xmm0\n"
746 " pshufhw $0xff, %%xmm1, %%xmm1\n"
747 " pshufhw $0xff, %%xmm0, %%xmm0\n"
749 " pxor %%xmm5, %%xmm1\n"
750 " pxor %%xmm5, %%xmm0\n"
752 " movq (%0), %%xmm3\n"
753 " movq 8(%0), %%xmm2\n"
754 " punpcklbw %%xmm7, %%xmm3\n"
755 " punpcklbw %%xmm7, %%xmm2\n"
757 " pmullw %%xmm1, %%xmm3\n"
758 " paddw %%xmm6, %%xmm3\n"
759 " pmulhuw %%xmm4, %%xmm3\n"
760 " pmullw %%xmm0, %%xmm2\n"
761 " paddw %%xmm6, %%xmm2\n"
762 " pmulhuw %%xmm4, %%xmm2\n"
763 " packuswb %%xmm2, %%xmm3\n"
765 " movdqu (%1), %%xmm1\n"
766 " paddb %%xmm1, %%xmm3\n"
767 " movdqa %%xmm3, (%0)\n"
773 :"+r" (dest), "+r" (src), "+r" (middle)
778 __asm__ __volatile__ ("\n"
780 " movl (%1), %%eax\n"
781 " testl $0xff000000, %%eax\n"
784 " movd (%1), %%xmm1\n"
785 " punpcklbw %%xmm7, %%xmm1\n"
786 " pshuflw $0xff, %%xmm1, %%xmm0\n"
787 " pxor %%xmm5, %%xmm0\n"
789 " movd (%0), %%xmm3\n"
790 " punpcklbw %%xmm7, %%xmm3\n"
791 " pmullw %%xmm0, %%xmm3\n"
792 " paddw %%xmm6, %%xmm3\n"
793 " pmulhuw %%xmm4, %%xmm3\n"
795 " paddw %%xmm1, %%xmm3\n"
796 " packuswb %%xmm3, %%xmm3\n"
797 " movd %%xmm3, (%0)\n"
804 :"+r" (dest), "+r" (src), "+r" (end)
810 OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_3, composite_over_argb, OIL_IMPL_FLAG_SSE2);
814 composite_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
816 #if !defined(__WINSCW__) && !defined(__WINS__)
817 __asm__ __volatile__ (
819 " movl (%1), %%eax\n"
820 " movd %%eax, %%mm0\n"
821 " punpcklbw %%mm7, %%mm0\n"
822 " pshufw $0xff, %%mm0, %%mm3\n"
823 " pxor %%mm5, %%mm3\n"
825 " movq %%mm3, %%mm1\n"
826 " movd (%0), %%mm2\n"
827 " punpcklbw %%mm7, %%mm2\n"
829 MMX_MULDIV255(mm2, mm1)
831 " paddw %%mm0, %%mm2\n"
832 " packuswb %%mm2, %%mm2\n"
834 " movd %%mm2, (%0)\n"
839 :"+r" (dest), "+r" (src), "+r" (n)
844 OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_mmx, composite_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
847 composite_add_argb_mmx (uint32_t *dest, uint32_t *src, int n)
849 #if !defined(__WINSCW__) && !defined(__WINS__)
850 __asm__ __volatile__ (
852 " movd (%1), %%mm0\n"
853 " movd (%0), %%mm2\n"
854 " paddusb %%mm0, %%mm2\n"
855 " movd %%mm2, (%0)\n"
861 :"+r" (dest), "+r" (src), "+r" (n)
866 OIL_DEFINE_IMPL_FULL (composite_add_argb_mmx, composite_add_argb, OIL_IMPL_FLAG_MMX);
869 composite_add_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
871 #if !defined(__WINSCW__) && !defined(__WINS__)
872 __asm__ __volatile__ (
873 " movd (%1), %%mm0\n"
875 " movd (%0), %%mm2\n"
876 " paddusb %%mm0, %%mm2\n"
877 " movd %%mm2, (%0)\n"
882 :"+r" (dest), "+r" (src), "+r" (n)
887 OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_mmx, composite_add_argb_const_src, OIL_IMPL_FLAG_MMX);
890 composite_in_over_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
892 #if !defined(__WINSCW__) && !defined(__WINS__)
893 __asm__ __volatile__ (
896 " movd (%2), %%mm0\n"
897 " punpcklbw %%mm7, %%mm0\n"
898 " pshufw $0x00, %%mm0, %%mm1\n"
900 " movd (%1), %%mm2\n"
901 " punpcklbw %%mm7, %%mm2\n"
903 MMX_MULDIV255(mm2, mm1)
905 " movd (%0), %%mm0\n"
906 " punpcklbw %%mm7, %%mm0\n"
908 " pshufw $0xff, %%mm2, %%mm1\n"
909 " pxor %%mm5, %%mm1\n"
911 MMX_MULDIV255(mm0, mm1)
913 " paddw %%mm0, %%mm2\n"
914 " packuswb %%mm2, %%mm2\n"
916 " movd %%mm2, (%0)\n"
923 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
928 OIL_DEFINE_IMPL_FULL (composite_in_over_argb_mmx, composite_in_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
931 composite_in_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
933 #if !defined(__WINSCW__) && !defined(__WINS__)
934 __asm__ __volatile__ (
937 " movd (%1), %%mm3\n"
938 " punpcklbw %%mm7, %%mm3\n"
940 " movd (%2), %%mm0\n"
941 " punpcklbw %%mm7, %%mm0\n"
942 " pshufw $0x00, %%mm0, %%mm1\n"
944 " movq %%mm3, %%mm2\n"
946 MMX_MULDIV255(mm2, mm1)
948 " movd (%0), %%mm0\n"
949 " punpcklbw %%mm7, %%mm0\n"
951 " pshufw $0xff, %%mm2, %%mm1\n"
952 " pxor %%mm5, %%mm1\n"
954 MMX_MULDIV255(mm0, mm1)
956 " paddw %%mm0, %%mm2\n"
957 " packuswb %%mm2, %%mm2\n"
959 " movd %%mm2, (%0)\n"
965 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
970 OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
973 composite_in_over_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
975 #if !defined(__WINSCW__) && !defined(__WINS__)
976 __asm__ __volatile__ (
978 " movd (%2), %%mm0\n"
979 " punpcklbw %%mm7, %%mm0\n"
980 " pshufw $0x00, %%mm0, %%mm3\n"
983 " movd (%1), %%mm2\n"
984 " punpcklbw %%mm7, %%mm2\n"
985 " movq %%mm3, %%mm1\n"
987 MMX_MULDIV255(mm2, mm1)
989 " movd (%0), %%mm0\n"
990 " punpcklbw %%mm7, %%mm0\n"
992 " pshufw $0xff, %%mm2, %%mm1\n"
993 " pxor %%mm5, %%mm1\n"
995 MMX_MULDIV255(mm0, mm1)
997 " paddw %%mm0, %%mm2\n"
998 " packuswb %%mm2, %%mm2\n"
1000 " movd %%mm2, (%0)\n"
1006 :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
1011 OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1015 #ifdef __SYMBIAN32__
1017 OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx() {
1018 return &_oil_function_impl_composite_in_argb_mmx;
1022 #ifdef __SYMBIAN32__
1024 OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx, composite_in_argb() {
1025 return &_oil_function_impl_composite_in_argb_mmx, composite_in_argb;
1029 #ifdef __SYMBIAN32__
1031 OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx2, composite_in_argb() {
1032 return &_oil_function_impl_composite_in_argb_mmx2, composite_in_argb;
1036 #ifdef __SYMBIAN32__
1038 OilFunctionImpl* __oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src() {
1039 return &_oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src;
1043 #ifdef __SYMBIAN32__
1045 OilFunctionImpl* __oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask() {
1046 return &_oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask;
1050 #ifdef __SYMBIAN32__
1052 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx, composite_over_argb() {
1053 return &_oil_function_impl_composite_over_argb_mmx, composite_over_argb;
1057 #ifdef __SYMBIAN32__
1059 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_2, composite_over_argb() {
1060 return &_oil_function_impl_composite_over_argb_mmx_2, composite_over_argb;
1064 #ifdef __SYMBIAN32__
1066 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_3, composite_over_argb() {
1067 return &_oil_function_impl_composite_over_argb_mmx_3, composite_over_argb;
1071 #ifdef __SYMBIAN32__
1073 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_4, composite_over_argb() {
1074 return &_oil_function_impl_composite_over_argb_mmx_4, composite_over_argb;
1078 #ifdef __SYMBIAN32__
1080 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_5, composite_over_argb() {
1081 return &_oil_function_impl_composite_over_argb_mmx_5, composite_over_argb;
1085 #ifdef __SYMBIAN32__
1087 OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2, composite_over_argb() {
1088 return &_oil_function_impl_composite_over_argb_sse2, composite_over_argb;
1092 #ifdef __SYMBIAN32__
1094 OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_2, composite_over_argb() {
1095 return &_oil_function_impl_composite_over_argb_sse2_2, composite_over_argb;
1099 #ifdef __SYMBIAN32__
1101 OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_3, composite_over_argb() {
1102 return &_oil_function_impl_composite_over_argb_sse2_3, composite_over_argb;
1106 #ifdef __SYMBIAN32__
1108 OilFunctionImpl* __oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src() {
1109 return &_oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src;
1113 #ifdef __SYMBIAN32__
1115 OilFunctionImpl* __oil_function_impl_composite_add_argb_mmx, composite_add_argb() {
1116 return &_oil_function_impl_composite_add_argb_mmx, composite_add_argb;
1120 #ifdef __SYMBIAN32__
1122 OilFunctionImpl* __oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src() {
1123 return &_oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src;
1127 #ifdef __SYMBIAN32__
1129 OilFunctionImpl* __oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb() {
1130 return &_oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb;
1134 #ifdef __SYMBIAN32__
1136 OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src() {
1137 return &_oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src;
1141 #ifdef __SYMBIAN32__
1143 OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask() {
1144 return &_oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask;