First public contribution.
1 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
3 Copyright 2002,2003,2004,2005 David A. Schleef <ds@schleef.org>
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
28 #include <liboil/liboilfunction.h>
29 #include <liboil/liboilclasses.h>
33 split_53_nomix (int16_t *d_2xn, int16_t *s_2xn, int n)
39 for(i=1;i<n*2-2;i+=2){
40 d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 1);
42 d_2xn[n*2-1] = s_2xn[n*2-1] - s_2xn[n*2-2];
45 d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
47 d_2xn[i] = s_2xn[i] + ((d_2xn[i-1] + d_2xn[i+1]) >> 2);
50 OIL_DEFINE_IMPL (split_53_nomix, split_53);
54 synth_53_nomix (int16_t *d_2xn, int16_t *s_2xn, int n)
59 i_n[0] -= i_n[1] >> 1;
61 i_n[i] -= (i_n[i-1] + i_n[i+1]) >> 2;
65 for(i=1;i<n*2-2;i+=2){
66 i_n[i] += (i_n[i+1] + i_n[i-1]) >> 1;
68 i_n[n*2-1] += i_n[n*2-2];
74 split_53_c (int16_t *d_2xn, int16_t *s_2xn, int n)
80 d_2xn[1] = s_2xn[1] - s_2xn[0];
81 d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
83 d_2xn[1] = s_2xn[1] - ((s_2xn[0] + s_2xn[2]) >> 1);
84 d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
87 for(i=0;i<(n*2-4)/2;i++){
88 d_2xn[1] = s_2xn[1] - ((s_2xn[0] + s_2xn[2]) >> 1);
89 d_2xn[0] = s_2xn[0] + ((d_2xn[-1] + d_2xn[1]) >> 2);
93 d_2xn[1] = s_2xn[1] - s_2xn[0];
94 d_2xn[0] = s_2xn[0] + ((d_2xn[-1] + d_2xn[1]) >> 2);
97 OIL_DEFINE_IMPL (split_53_c, split_53);
100 synth_53_c (int16_t *d_2xn, int16_t *s_2xn, int n)
106 d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
107 d_2xn[1] = s_2xn[1] + d_2xn[0];
109 d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
110 for(i=2;i<n*2-2;i+=2){
111 d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 2);
112 d_2xn[i-1] = s_2xn[i-1] + ((d_2xn[i] + d_2xn[i-2]) >> 1);
114 d_2xn[n*2-2] = s_2xn[n*2-2] - ((s_2xn[n*2-3] + s_2xn[n*2-1]) >> 2);
115 d_2xn[n*2-3] = s_2xn[n*2-3] + ((d_2xn[n*2-2] + d_2xn[n*2-4]) >> 1);
116 d_2xn[n*2-1] = s_2xn[n*2-1] + d_2xn[n*2-2];
119 OIL_DEFINE_IMPL (synth_53_c, synth_53);
122 deinterleave2_c_1 (int16_t *d1, int16_t *d2, int16_t *s_2xn, int n)
128 d2[i] = s_2xn[2*i + 1];
131 OIL_DEFINE_IMPL (deinterleave2_c_1, deinterleave2_s16);
134 deinterleave2_asm (int16_t *d1, int16_t *d2, int16_t *s_2xn, int n)
151 " movw (%1,%%ecx,4), %%ax\n"
152 " movw %%ax, (%0,%%ecx,2)\n"
153 " movw 2(%1,%%ecx,4), %%ax\n"
154 " movw %%ax, (%2,%%ecx,2)\n"
155 " movw 4(%1,%%ecx,4), %%ax\n"
156 " movw %%ax, 2(%0,%%ecx,2)\n"
157 " movw 6(%1,%%ecx,4), %%ax\n"
158 " movw %%ax, 2(%2,%%ecx,2)\n"
161 : "+r" (d1), "+r" (s_2xn), "+r" (d2)
165 OIL_DEFINE_IMPL (deinterleave2_asm, deinterleave2_s16);
168 deinterleave2_mmx (int16_t *d1, int16_t *d2, int16_t *s_2xn, int n)
181 " xor %%ecx, %%ecx\n"
183 " movq (%1,%%ecx,4), %%mm0\n"
184 " movq 8(%1,%%ecx,4), %%mm1\n"
185 " pslld $16, %%mm0\n"
186 " pslld $16, %%mm1\n"
187 " psrad $16, %%mm0\n"
188 " psrad $16, %%mm1\n"
189 " packssdw %%mm1, %%mm0\n"
190 " movq %%mm0, (%0,%%ecx,2)\n"
191 " movq (%1,%%ecx,4), %%mm0\n"
192 " movq 8(%1,%%ecx,4), %%mm1\n"
193 " psrad $16, %%mm0\n"
194 " psrad $16, %%mm1\n"
195 " packssdw %%mm1, %%mm0\n"
196 " movq %%mm0, (%2,%%ecx,2)\n"
201 : "+r" (d1), "+r" (s_2xn), "+r" (d2)
205 OIL_DEFINE_IMPL_FULL (deinterleave2_mmx, deinterleave2_s16, OIL_IMPL_FLAG_MMX);
208 deinterleave2_mmx_2 (int16_t *d1, int16_t *d2, int16_t *s_2xn, int n)
221 " xor %%ecx, %%ecx\n"
223 " pshufw $0xd8, (%1,%%ecx,4), %%mm0\n"
224 " movd %%mm0, (%0,%%ecx,2)\n"
225 " pshufw $0x8d, (%1,%%ecx,4), %%mm0\n"
226 " movd %%mm0, (%2,%%ecx,2)\n"
231 : "+r" (d1), "+r" (s_2xn), "+r" (d2)
235 OIL_DEFINE_IMPL_FULL (deinterleave2_mmx_2, deinterleave2_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
238 deinterleave2_mmx_3 (int16_t *d1, int16_t *d2, int16_t *s_2xn, int n)
251 " xor %%ecx, %%ecx\n"
253 " movq (%1,%%ecx,4), %%mm1\n"
254 " movq (%1,%%ecx,4), %%mm2\n"
255 " movq 8(%1,%%ecx,4), %%mm0\n"
256 " punpcklwd %%mm0, %%mm1\n"
257 " punpckhwd %%mm0, %%mm2\n"
258 " movq %%mm1, %%mm0\n"
259 " punpcklwd %%mm2, %%mm0\n"
260 " punpckhwd %%mm2, %%mm1\n"
261 " movq %%mm0, (%0,%%ecx,2)\n"
262 " movq %%mm1, (%2,%%ecx,2)\n"
267 : "+r" (d1), "+r" (s_2xn), "+r" (d2)
271 OIL_DEFINE_IMPL_FULL (deinterleave2_mmx_3, deinterleave2_s16, OIL_IMPL_FLAG_MMX);
274 deinterleave2_mmx_4 (int16_t *d1, int16_t *d2, int16_t *s_2xn, int n)
287 " xor %%ecx, %%ecx\n"
289 " movq (%1,%%ecx,4), %%mm1\n"
290 " movq %%mm1, %%mm2\n"
291 " movq 8(%1,%%ecx,4), %%mm0\n"
292 " movq 16(%1,%%ecx,4), %%mm5\n"
293 " punpcklwd %%mm0, %%mm1\n"
294 " movq %%mm5, %%mm6\n"
295 " punpckhwd %%mm0, %%mm2\n"
296 " movq 24(%1,%%ecx,4), %%mm4\n"
297 " movq %%mm1, %%mm0\n"
298 " punpcklwd %%mm4, %%mm5\n"
299 " punpcklwd %%mm2, %%mm0\n"
300 " punpckhwd %%mm4, %%mm6\n"
301 " punpckhwd %%mm2, %%mm1\n"
302 " movq %%mm5, %%mm4\n"
303 " movq %%mm0, (%0,%%ecx,2)\n"
304 " punpcklwd %%mm6, %%mm4\n"
305 " movq %%mm1, (%2,%%ecx,2)\n"
306 " punpckhwd %%mm6, %%mm5\n"
307 " movq %%mm4, 8(%0,%%ecx,2)\n"
308 " movq %%mm5, 8(%2,%%ecx,2)\n"
313 : "+r" (d1), "+r" (s_2xn), "+r" (d2)
317 OIL_DEFINE_IMPL_FULL (deinterleave2_mmx_4, deinterleave2_s16, OIL_IMPL_FLAG_MMX);
321 lift_add_mult_shift12_i386_mmx (int16_t *d, int16_t *s1, int16_t *s2,
322 int16_t *s3, int16_t *s4, int n)
327 d[0] = s1[0] + ((s4[0]*(s2[0] + s3[0]))>>12);
336 val = ((*(uint16_t *)s4)<<16) | (*(uint16_t *)s4);
340 " movd %%ecx, %%mm7\n"
341 " punpcklwd %%mm7, %%mm7\n"
344 " movq 0(%2), %%mm0\n"
345 " paddsw 0(%3), %%mm0\n"
346 " movq %%mm0, %%mm1\n"
347 " pmullw %%mm7, %%mm0\n"
348 " pmulhw %%mm7, %%mm1\n"
349 " psrlw $12, %%mm0\n"
351 " por %%mm1, %%mm0\n"
352 " paddsw 0(%1), %%mm0\n"
353 " movq %%mm0, 0(%0)\n"
361 : "+r" (d), "+r" (s1), "+r" (s2), "+r" (s3)
365 OIL_DEFINE_IMPL_FULL (lift_add_mult_shift12_i386_mmx, lift_add_mult_shift12, OIL_IMPL_FLAG_MMX);
368 interleave2_mmx (int16_t *d_2xn, int16_t *s1, int16_t *s2, int n)
381 " xor %%ecx, %%ecx\n"
383 " movq (%1,%%ecx,2), %%mm0\n"
384 " movq (%2,%%ecx,2), %%mm1\n"
385 " movq %%mm0, %%mm2\n"
386 " punpckhwd %%mm1, %%mm0\n"
387 " punpcklwd %%mm1, %%mm2\n"
388 " movq %%mm2, (%0,%%ecx,4)\n"
389 " movq %%mm0, 8(%0,%%ecx,4)\n"
394 : "+r" (d_2xn), "+r" (s1), "+r" (s2)
398 OIL_DEFINE_IMPL_FULL (interleave2_mmx, interleave2_s16, OIL_IMPL_FLAG_MMX);
401 lift_add_shift1_mmx (int16_t *d, int16_t *s1, int16_t *s2, int16_t *s3, int n)
404 d[0] = s1[0] + ((s2[0] + s3[0])>>1);
414 " xor %%ecx, %%ecx\n"
416 " movq (%2,%%ecx,2), %%mm1\n"
417 " movq (%3,%%ecx,2), %%mm2\n"
418 " paddw %%mm2, %%mm1\n"
420 " paddw (%1,%%ecx,2), %%mm1\n"
421 " movq %%mm1, (%0,%%ecx,2)\n"
426 : "+r" (d), "+r" (s1), "+r" (s2), "+r" (s3)
430 OIL_DEFINE_IMPL_FULL (lift_add_shift1_mmx, lift_add_shift1, OIL_IMPL_FLAG_MMX);
433 lift_sub_shift1_mmx (int16_t *d, int16_t *s1, int16_t *s2, int16_t *s3, int n)
436 d[0] = s1[0] - ((s2[0] + s3[0])>>1);
446 " xor %%ecx, %%ecx\n"
448 " movq (%2,%%ecx,2), %%mm1\n"
449 " movq (%3,%%ecx,2), %%mm2\n"
450 " movq (%1,%%ecx,2), %%mm0\n"
451 " paddw %%mm2, %%mm1\n"
453 " psubw %%mm1, %%mm0\n"
454 " movq %%mm0, (%0,%%ecx,2)\n"
459 : "+r" (d), "+r" (s1), "+r" (s2), "+r" (s3)
463 OIL_DEFINE_IMPL_FULL (lift_sub_shift1_mmx, lift_sub_shift1, OIL_IMPL_FLAG_MMX);
466 lift_add_shift2_mmx (int16_t *d, int16_t *s1, int16_t *s2, int16_t *s3, int n)
469 d[0] = s1[0] + ((s2[0] + s3[0])>>2);
479 " xor %%ecx, %%ecx\n"
481 " movq (%2,%%ecx,2), %%mm1\n"
482 " movq (%3,%%ecx,2), %%mm2\n"
483 " paddw %%mm2, %%mm1\n"
485 " paddw (%1,%%ecx,2), %%mm1\n"
486 " movq %%mm1, (%0,%%ecx,2)\n"
491 : "+r" (d), "+r" (s1), "+r" (s2), "+r" (s3)
495 OIL_DEFINE_IMPL_FULL (lift_add_shift2_mmx, lift_add_shift2, OIL_IMPL_FLAG_MMX);
498 lift_sub_shift2_mmx (int16_t *d, int16_t *s1, int16_t *s2, int16_t *s3, int n)
501 d[0] = s1[0] - ((s2[0] + s3[0])>>2);
511 " xor %%ecx, %%ecx\n"
513 " movq (%2,%%ecx,2), %%mm1\n"
514 " movq (%3,%%ecx,2), %%mm2\n"
515 " movq (%1,%%ecx,2), %%mm0\n"
516 " paddw %%mm2, %%mm1\n"
518 " psubw %%mm1, %%mm0\n"
519 " movq %%mm0, (%0,%%ecx,2)\n"
524 : "+r" (d), "+r" (s1), "+r" (s2), "+r" (s3)
528 OIL_DEFINE_IMPL_FULL (lift_sub_shift2_mmx, lift_sub_shift2, OIL_IMPL_FLAG_MMX);
530 #ifdef ENABLE_BROKEN_IMPLS
532 synth_53_mmx (int16_t *d_2xn, int16_t *s_2xn, int n)
538 d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
539 d_2xn[1] = s_2xn[1] + d_2xn[0];
543 d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
549 " xor %%ecx, %%ecx\n"
550 " movw 2(%1), %%ecx\n"
551 " movd %%ecx, %%mm7\n"
552 " movw 0(%0), %%ecx\n"
553 " movd %%ecx, %%mm6\n"
554 " movw 0(%1), %%ecx\n"
555 " movd %%ecx, %%mm5\n"
557 " xor %%ecx, %%ecx\n"
559 " movq 4(%1,%%ecx,4), %%mm1\n" // mm1 = s5 s4 s3 s2
560 " movq %%mm1, %%mm2\n" // mm2 = s5 s4 s3 s2
561 " movq 12(%1,%%ecx,4), %%mm0\n" // mm0 = s9 s8 s7 s6
562 " punpcklwd %%mm0, %%mm1\n" // mm1 = s7 s3 s6 s2
563 " punpckhwd %%mm0, %%mm2\n" // mm2 = s9 s5 s8 s4
564 " movq %%mm1, %%mm0\n" // mm0 = s7 s3 s6 s2
565 " punpcklwd %%mm2, %%mm0\n" // mm0 = s8 s6 s4 s2
566 " punpckhwd %%mm2, %%mm1\n" // mm1 = s9 s7 s5 s3
567 //" movq %%mm0, %%mm3\n" // mm0 = s8 s6 s4 s2
569 " movq %%mm1, %%mm2\n" // mm2 = s9 s7 s5 s3
570 " psllq $16, %%mm2\n" // mm2 = s7 s5 s3 00
571 " por %%mm7, %%mm2\n" // mm2 = s7 s5 s3 s1
572 " movq %%mm2, %%mm4\n" // mm4 = s7 s5 s3 s1
573 " paddw %%mm1, %%mm2\n" // mm2 = s9+s7 ...
574 " psraw $2, %%mm2\n" // mm2 = (s9+s7)>>2 ...
575 " movq %%mm1, %%mm7\n" // mm7 = s9 s7 s5 s3
576 " psrlq $48, %%mm7\n" // mm7 = 00 00 00 s9
577 " psubw %%mm2, %%mm0\n" // mm0 = d8 d6 d4 d2
579 " movq %%mm0, %%mm1\n" // mm1 = d8 d6 d4 d2
580 " movq %%mm0, %%mm3\n" // mm1 = d8 d6 d4 d2
581 " psllq $16, %%mm0\n" // mm0 = d6 d4 d2 00
582 " por %%mm6, %%mm0\n" // mm0 = d6 d4 d2 d0
583 " psrlq $48, %%mm1\n" // mm1 = 00 00 00 d8
584 " movq %%mm1, %%mm6\n" // mm6 = 00 00 00 d8
586 " movq %%mm0, %%mm1\n"
587 " paddw %%mm3, %%mm1\n" // mm0 = d8+d6 ...
588 " psraw $1, %%mm1\n" // mm1 = (d8+d6)>>1 ...
589 " paddw %%mm4, %%mm1\n" // mm1 = d7 d5 d3 d1
591 " movq %%mm1, %%mm2\n"
593 " movq %%mm0, %%mm1\n"
594 " punpcklwd %%mm2, %%mm0\n"
595 " punpckhwd %%mm2, %%mm1\n"
597 " movq %%mm0, (%0, %%ecx, 4)\n"
598 " movq %%mm1, 8(%0, %%ecx, 4)\n"
604 : "+r" (d_2xn), "+r" (s_2xn), "+ecx" (i)
609 d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 2);
615 d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 2);
616 d_2xn[i-1] = s_2xn[i-1] + ((d_2xn[i] + d_2xn[i-2]) >> 1);
618 d_2xn[n*2-2] = s_2xn[n*2-2] - ((s_2xn[n*2-3] + s_2xn[n*2-1]) >> 2);
619 d_2xn[n*2-3] = s_2xn[n*2-3] + ((d_2xn[n*2-2] + d_2xn[n*2-4]) >> 1);
620 d_2xn[n*2-1] = s_2xn[n*2-1] + d_2xn[n*2-2];
623 OIL_DEFINE_IMPL_FULL (synth_53_mmx, synth_53, OIL_IMPL_FLAG_MMX);
628 mas2_add_s16_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
629 int16_t *s4_2, int n)
636 x = s4_2[0] + s2[0]*s3_2[0] + s2[1]*s3_2[1];
649 " movzwl 0(%0), %%ecx\n"
650 " movd %%ecx, %%mm7\n"
651 " pshufw $0x00, %%mm7, %%mm7\n"
652 " movzwl 2(%0), %%ecx\n"
653 " movd %%ecx, %%mm6\n"
654 " pshufw $0x00, %%mm6, %%mm6\n"
655 " movzwl 0(%1), %%ecx\n"
656 " movd %%ecx, %%mm5\n"
657 " pshufw $0x44, %%mm5, %%mm5\n"
658 :: "r" (s3_2), "r" (s4_2)
663 " movq 0(%2), %%mm0\n" // mm0 = s0, s1, s2, s3
664 " movq 0(%2), %%mm1\n" // mm1 = s0, s1, s2, s3
665 " pmullw %%mm7, %%mm0\n" // mm0 = lo(s0*a0), lo(s1*a0), ...
666 " pmulhw %%mm7, %%mm1\n" // mm1 = hi(s0*a0), hi(s1*a0), ...
667 " movq %%mm0, %%mm2\n" // mm2 = lo(s0*a0), lo(s1*a0), ...
668 " punpcklwd %%mm1, %%mm0\n" // mm0 = s0*a0, s1*a0
669 " punpckhwd %%mm1, %%mm2\n" // mm2 = s2*a0, s3*a0
670 " movq %%mm2, %%mm1\n" // mm1 = s2*a0, s3*a0
672 " movq 2(%2), %%mm2\n"
673 " movq 2(%2), %%mm3\n"
674 " pmullw %%mm6, %%mm2\n"
675 " pmulhw %%mm6, %%mm3\n"
676 " movq %%mm2, %%mm4\n"
677 " punpcklwd %%mm3, %%mm2\n" // mm2 = s1*a1, s2*a1
678 " punpckhwd %%mm3, %%mm4\n" // mm4 = s3*a1, s4*a1
679 " movq %%mm4, %%mm3\n" // mm3 = s3*a1, s4*a1
681 " paddd %%mm3, %%mm1\n" // mm1 = s2*a0 + s3*a1, ...
682 " paddd %%mm2, %%mm0\n" // mm0 = s0*a0 + s1*a1, ...
684 " paddd %%mm5, %%mm1\n" // mm1 = s2*a0 + s3*a1 + offset, ...
685 " paddd %%mm5, %%mm0\n" // mm0 = s0*a0 + s1*a1 + offset, ...
688 " psrad %%mm4, %%mm1\n" // mm1 = (s2*a0 + s3*a1 + offset)>>shift, ...
689 " psrad %%mm4, %%mm0\n" // mm0 = (s0*a0 + s1*a1 + offset)>>shift, ...
691 " packssdw %%mm1, %%mm0\n"
692 " paddw 0(%1), %%mm0\n"
693 " movq %%mm0, 0(%0)\n"
700 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
704 OIL_DEFINE_IMPL_FULL (mas2_add_s16_mmx, mas2_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
708 mas2_add_s16_lim_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
709 int16_t *s4_2, int n)
716 x = s4_2[0] + s2[0]*s3_2[0] + s2[1]*s3_2[1];
729 " movzwl 0(%0), %%ecx\n"
730 " movd %%ecx, %%mm7\n"
731 " pshufw $0x00, %%mm7, %%mm7\n"
732 " movzwl 2(%0), %%ecx\n"
733 " movd %%ecx, %%mm6\n"
734 " pshufw $0x00, %%mm6, %%mm6\n"
735 " movzwl 0(%1), %%ecx\n"
736 " movd %%ecx, %%mm5\n"
737 " pshufw $0x44, %%mm5, %%mm5\n"
738 :: "r" (s3_2), "r" (s4_2)
743 " movq 0(%2), %%mm0\n"
744 " paddq 2(%2), %%mm0\n"
747 " psraw %%mm4, %%mm0\n"
749 " paddw 0(%1), %%mm0\n"
750 " movq %%mm0, 0(%0)\n"
757 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
761 OIL_DEFINE_IMPL_FULL (mas2_add_s16_lim_mmx, mas2_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
765 mas4_add_s16_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_4,
766 int16_t *s4_2, int n)
794 " movq 0(%0), %%mm7\n"
795 " movzwl 0(%1), %%ecx\n"
796 " movd %%ecx, %%mm5\n"
797 " pshufw $0x44, %%mm5, %%mm5\n"
798 :: "r" (s3_4), "r" (s4_2)
803 " movq 0(%2), %%mm0\n" // mm0 = s0, s1, s2, s3
804 " movq 0(%2), %%mm1\n" // mm1 = s0, s1, s2, s3
805 " pshufw $0x00, %%mm7, %%mm6\n"
806 " pmullw %%mm6, %%mm0\n" // mm0 = lo(s0*a0), lo(s1*a0), ...
807 " pmulhw %%mm6, %%mm1\n" // mm1 = hi(s0*a0), hi(s1*a0), ...
808 " movq %%mm0, %%mm2\n" // mm2 = lo(s0*a0), lo(s1*a0), ...
809 " punpcklwd %%mm1, %%mm0\n" // mm0 = s0*a0, s1*a0
810 " punpckhwd %%mm1, %%mm2\n" // mm2 = s2*a0, s3*a0
811 " movq %%mm2, %%mm1\n" // mm1 = s2*a0, s3*a0
813 " movq 2(%2), %%mm2\n"
814 " movq 2(%2), %%mm3\n"
815 " pshufw $0x55, %%mm7, %%mm6\n"
816 " pmullw %%mm6, %%mm2\n"
817 " pmulhw %%mm6, %%mm3\n"
818 " movq %%mm2, %%mm4\n"
819 " punpcklwd %%mm3, %%mm2\n" // mm2 = s1*a1, s2*a1
820 " punpckhwd %%mm3, %%mm4\n" // mm4 = s3*a1, s4*a1
821 " movq %%mm4, %%mm3\n" // mm3 = s3*a1, s4*a1
822 " paddd %%mm3, %%mm1\n" // mm1 = s2*a0 + s3*a1, ...
823 " paddd %%mm2, %%mm0\n" // mm0 = s0*a0 + s1*a1, ...
825 " movq 4(%2), %%mm2\n"
826 " movq 4(%2), %%mm3\n"
827 " pshufw $0xaa, %%mm7, %%mm6\n"
828 " pmullw %%mm6, %%mm2\n"
829 " pmulhw %%mm6, %%mm3\n"
830 " movq %%mm2, %%mm4\n"
831 " punpcklwd %%mm3, %%mm2\n"
832 " punpckhwd %%mm3, %%mm4\n"
833 " movq %%mm4, %%mm3\n"
834 " paddd %%mm3, %%mm1\n"
835 " paddd %%mm2, %%mm0\n"
837 " movq 6(%2), %%mm2\n"
838 " movq 6(%2), %%mm3\n"
839 " pshufw $0xff, %%mm7, %%mm6\n"
840 " pmullw %%mm6, %%mm2\n"
841 " pmulhw %%mm6, %%mm3\n"
842 " movq %%mm2, %%mm4\n"
843 " punpcklwd %%mm3, %%mm2\n"
844 " punpckhwd %%mm3, %%mm4\n"
845 " movq %%mm4, %%mm3\n"
846 " paddd %%mm3, %%mm1\n"
847 " paddd %%mm2, %%mm0\n"
849 " paddd %%mm5, %%mm1\n"
850 " paddd %%mm5, %%mm0\n"
853 " psrad %%mm4, %%mm1\n"
854 " psrad %%mm4, %%mm0\n"
856 " packssdw %%mm1, %%mm0\n"
857 " paddw 0(%1), %%mm0\n"
858 " movq %%mm0, 0(%0)\n"
865 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
887 OIL_DEFINE_IMPL_FULL (mas4_add_s16_mmx, mas4_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
890 /* This only does 16-bit intermediates, whereas the ref specifies 32-bit */
892 mas2_add_s16_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
893 int16_t *s4_2, int n)
898 x = s4_2[0] + s2[0]*s3_2[0] + s2[1]*s3_2[1];
911 " movzwl 0(%0), %%ecx\n"
912 " movd %%ecx, %%mm7\n"
913 " pshufw $0x00, %%mm7, %%mm7\n"
914 " movzwl 2(%0), %%ecx\n"
915 " movd %%ecx, %%mm6\n"
916 " pshufw $0x00, %%mm6, %%mm6\n"
917 " movzwl 0(%1), %%ecx\n"
918 " movd %%ecx, %%mm5\n"
919 " pshufw $0x00, %%mm5, %%mm5\n"
920 " movzwl 2(%1), %%ecx\n"
921 " movd %%ecx, %%mm4\n"
922 :: "r" (s3_2), "r" (s4_2)
927 " movq 0(%2), %%mm0\n"
928 " pmullw %%mm7, %%mm0\n"
929 " movq 2(%2), %%mm1\n"
930 " pmullw %%mm6, %%mm1\n"
931 " paddw %%mm1, %%mm0\n"
932 " paddw %%mm5, %%mm0\n"
933 " psraw %%mm4, %%mm0\n"
934 " paddw 0(%1), %%mm0\n"
935 " movq %%mm0, 0(%0)\n"
942 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
945 OIL_DEFINE_IMPL_FULL (mas2_add_s16_mmx, mas2_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
950 /* This only does 16-bit intermediates, whereas the ref specifies 32-bit */
952 mas4_add_s16_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
953 int16_t *s4_2, int n)
958 x = s4_2[0] + s2[0]*s3_2[0] + s2[1]*s3_2[1] +
959 s2[2]*s3_2[2] + s2[2]*s3_2[2];
972 " movzwl 0(%0), %%ecx\n"
973 " movd %%ecx, %%mm7\n"
974 " pshufw $0x00, %%mm7, %%mm7\n"
975 " movzwl 2(%0), %%ecx\n"
976 " movd %%ecx, %%mm6\n"
977 " pshufw $0x00, %%mm6, %%mm6\n"
978 " movzwl 2(%0), %%ecx\n"
979 " movd %%ecx, %%mm5\n"
980 " pshufw $0x00, %%mm5, %%mm5\n"
981 " movzwl 2(%0), %%ecx\n"
982 " movd %%ecx, %%mm4\n"
983 " pshufw $0x00, %%mm4, %%mm4\n"
984 " movzwl 0(%1), %%ecx\n"
985 " movd %%ecx, %%mm3\n"
986 " pshufw $0x00, %%mm3, %%mm3\n"
987 " movzwl 2(%1), %%ecx\n"
988 " movd %%ecx, %%mm2\n"
989 :: "r" (s3_2), "r" (s4_2)
994 " movq 0(%2), %%mm0\n"
995 " pmullw %%mm7, %%mm0\n"
996 " movq 2(%2), %%mm1\n"
997 " pmullw %%mm6, %%mm1\n"
998 " paddw %%mm1, %%mm0\n"
999 " movq 4(%2), %%mm1\n"
1000 " pmullw %%mm5, %%mm1\n"
1001 " paddw %%mm1, %%mm0\n"
1002 " movq 6(%2), %%mm1\n"
1003 " pmullw %%mm4, %%mm1\n"
1004 " paddw %%mm1, %%mm0\n"
1005 " paddw %%mm3, %%mm0\n"
1006 " psraw %%mm2, %%mm0\n"
1007 " paddw 0(%1), %%mm0\n"
1008 " movq %%mm0, 0(%0)\n"
1015 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1018 OIL_DEFINE_IMPL_FULL (mas4_add_s16_mmx, mas4_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1023 /* This only does 16-bit intermediates, whereas the ref specifies 32-bit */
1025 mas8_add_s16_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
1026 int16_t *s4_2, int n)
1048 " movq 0(%0), %%mm6\n"
1049 " movq 8(%0), %%mm7\n"
1050 " movzwl 0(%1), %%ecx\n"
1051 " movd %%ecx, %%mm3\n"
1052 " pshufw $0x00, %%mm3, %%mm3\n"
1053 " pxor %%mm4, %%mm4\n"
1054 " movzwl 2(%1), %%ecx\n"
1055 " movd %%ecx, %%mm4\n"
1056 :: "r" (s3_2), "r" (s4_2)
1061 " pshufw $0x00, %%mm6, %%mm1\n"
1062 " movq 0(%2), %%mm0\n"
1063 " pmullw %%mm1, %%mm0\n"
1064 " pshufw $0x55, %%mm6, %%mm2\n"
1065 " movq 2(%2), %%mm1\n"
1066 " pmullw %%mm2, %%mm1\n"
1067 " paddw %%mm1, %%mm0\n"
1068 " pshufw $0xaa, %%mm6, %%mm2\n"
1069 " movq 4(%2), %%mm1\n"
1070 " pmullw %%mm2, %%mm1\n"
1071 " paddw %%mm1, %%mm0\n"
1072 " pshufw $0xff, %%mm6, %%mm2\n"
1073 " movq 6(%2), %%mm1\n"
1074 " pmullw %%mm2, %%mm1\n"
1075 " paddw %%mm1, %%mm0\n"
1077 " pshufw $0x00, %%mm7, %%mm2\n"
1078 " movq 8(%2), %%mm1\n"
1079 " pmullw %%mm2, %%mm1\n"
1080 " paddw %%mm1, %%mm0\n"
1081 " pshufw $0x55, %%mm7, %%mm2\n"
1082 " movq 10(%2), %%mm1\n"
1083 " pmullw %%mm2, %%mm1\n"
1084 " paddw %%mm1, %%mm0\n"
1085 " pshufw $0xaa, %%mm7, %%mm2\n"
1086 " movq 12(%2), %%mm1\n"
1087 " pmullw %%mm2, %%mm1\n"
1088 " paddw %%mm1, %%mm0\n"
1089 " pshufw $0xff, %%mm7, %%mm2\n"
1090 " movq 14(%2), %%mm1\n"
1091 " pmullw %%mm2, %%mm1\n"
1092 " paddw %%mm1, %%mm0\n"
1094 " paddw %%mm3, %%mm0\n"
1095 " psraw %%mm4, %%mm0\n"
1096 " paddw 0(%1), %%mm0\n"
1097 " movq %%mm0, 0(%0)\n"
1104 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1107 OIL_DEFINE_IMPL_FULL (mas8_add_s16_mmx, mas8_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1112 mas4_add_s16_pmaddwd (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
1113 int16_t *s4_2, int n)
1117 " movq 0(%0), %%mm6\n"
1118 " movzwl 0(%1), %%ecx\n"
1119 " movd %%ecx, %%mm3\n"
1120 " movzwl 2(%1), %%ecx\n"
1121 " movd %%ecx, %%mm4\n"
1122 :: "r" (s3_2), "r" (s4_2)
1127 " movq 0(%2), %%mm0\n"
1128 " pmaddwd %%mm6, %%mm0\n"
1129 " pshufw $0xee, %%mm0, %%mm1\n" // 11 10 11 10
1130 " paddd %%mm1, %%mm0\n"
1131 " paddd %%mm3, %%mm0\n"
1132 " psrad %%mm4, %%mm0\n"
1133 " movd %%mm0, %%eax\n"
1134 " addw 0(%1), %%ax\n"
1135 " movw %%ax, 0(%0)\n"
1142 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1147 OIL_DEFINE_IMPL_FULL (mas4_add_s16_pmaddwd, mas4_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1150 mas4_add_s16_pmaddwd_2 (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
1151 int16_t *s4_2, int n)
1155 " movq 0(%0), %%mm6\n"
1156 " movzwl 0(%1), %%ecx\n"
1157 " movd %%ecx, %%mm3\n"
1158 " pshufw $0x44, %%mm3, %%mm3\n" // 01 00 01 00
1159 " movzwl 2(%1), %%ecx\n"
1160 " movd %%ecx, %%mm4\n"
1161 :: "r" (s3_2), "r" (s4_2)
1166 " movq 0(%2), %%mm0\n"
1167 " pmaddwd %%mm6, %%mm0\n"
1168 " pshufw $0xee, %%mm0, %%mm1\n" // 11 10 11 10
1169 " paddd %%mm1, %%mm0\n"
1170 " paddd %%mm3, %%mm0\n"
1171 " psrad %%mm4, %%mm0\n"
1172 " movd %%mm0, %%eax\n"
1173 " addw 0(%1), %%ax\n"
1174 " movw %%ax, 0(%0)\n"
1179 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1187 " movq 0(%2), %%mm0\n"
1188 " pmaddwd %%mm6, %%mm0\n"
1189 " movq 2(%2), %%mm2\n"
1190 " pmaddwd %%mm6, %%mm2\n"
1192 " movq %%mm0, %%mm1\n"
1193 " punpckhdq %%mm2, %%mm0\n"
1194 " punpckldq %%mm2, %%mm1\n"
1196 " paddd %%mm1, %%mm0\n"
1197 " paddd %%mm3, %%mm0\n"
1198 " psrad %%mm4, %%mm0\n"
1199 " pshufw $0xd8, %%mm0, %%mm0\n" // 11 01 10 00
1201 " paddw 0(%1), %%mm0\n"
1202 " movd %%mm0, 0(%0)\n"
1209 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1214 OIL_DEFINE_IMPL_FULL (mas4_add_s16_pmaddwd_2, mas4_add_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1217 mas8_add_s16_pmaddwd (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
1218 int16_t *s4_2, int n)
1222 " movq 0(%0), %%mm6\n"
1223 " movq 8(%0), %%mm7\n"
1224 " movzwl 0(%1), %%ecx\n"
1225 " movd %%ecx, %%mm3\n"
1226 " movzwl 2(%1), %%ecx\n"
1227 " movd %%ecx, %%mm4\n"
1228 :: "r" (s3_2), "r" (s4_2)
1233 " movq 0(%2), %%mm0\n"
1234 " pmaddwd %%mm6, %%mm0\n"
1235 " movq 8(%2), %%mm1\n"
1236 " pmaddwd %%mm7, %%mm1\n"
1237 " paddd %%mm1, %%mm0\n"
1238 " pshufw $0xee, %%mm0, %%mm1\n"
1239 " paddd %%mm1, %%mm0\n"
1240 " paddd %%mm3, %%mm0\n"
1241 " psrad %%mm4, %%mm0\n"
1242 " movd %%mm0, %%eax\n"
1243 " addw 0(%1), %%ax\n"
1244 " movw %%ax, 0(%0)\n"
1251 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1256 OIL_DEFINE_IMPL_FULL (mas8_add_s16_pmaddwd, mas8_add_s16, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
1262 mas8_add_s16_pmaddwd2 (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
1263 int16_t *s4_2, int n)
1285 " movq 0(%0), %%mm6\n"
1286 " movq 8(%0), %%mm7\n"
1287 " movzwl 0(%1), %%ecx\n"
1288 " movd %%ecx, %%mm5\n"
1289 " pshufw $0x00, %%mm5, %%mm5\n"
1290 " pxor %%mm4, %%mm4\n"
1291 " movzwl 2(%1), %%ecx\n"
1292 " movd %%ecx, %%mm4\n"
1293 :: "r" (s3_2), "r" (s4_2)
1298 " movq 0(%2), %%mm0\n"
1299 " pmaddwd %%mm6, %%mm0\n"
1300 " movq 8(%2), %%mm1\n"
1301 " pmaddwd %%mm7, %%mm1\n"
1302 " paddd %%mm1, %%mm0\n"
1303 " pshufw $0xee, %%mm0, %%mm1\n"
1304 " paddw %%mm1, %%mm0\n"
1306 " movq 2(%2), %%mm2\n"
1307 " pmaddwd %%mm6, %%mm2\n"
1308 " movq 10(%2), %%mm3\n"
1309 " pmaddwd %%mm7, %%mm3\n"
1310 " paddd %%mm3, %%mm2\n"
1311 " pshufw $0xee, %%mm2, %%mm3\n"
1312 " paddw %%mm3, %%mm2\n"
1313 " pextrw $0, %%mm2, %%eax\n"
1314 " pinsrw $1, %%eax, %%mm0\n"
1316 " movq 4(%2), %%mm2\n"
1317 " pmaddwd %%mm6, %%mm2\n"
1318 " movq 12(%2), %%mm3\n"
1319 " pmaddwd %%mm7, %%mm3\n"
1320 " paddd %%mm3, %%mm2\n"
1321 " pshufw $0xee, %%mm2, %%mm3\n"
1322 " paddw %%mm3, %%mm2\n"
1323 " pextrw $0, %%mm2, %%eax\n"
1324 " pinsrw $2, %%eax, %%mm0\n"
1326 " movq 6(%2), %%mm2\n"
1327 " pmaddwd %%mm6, %%mm2\n"
1328 " movq 14(%2), %%mm3\n"
1329 " pmaddwd %%mm7, %%mm3\n"
1330 " paddd %%mm3, %%mm2\n"
1331 " pshufw $0xee, %%mm2, %%mm3\n"
1332 " paddw %%mm3, %%mm2\n"
1333 " pextrw $0, %%mm2, %%eax\n"
1334 " pinsrw $3, %%eax, %%mm0\n"
1336 " paddw %%mm5, %%mm0\n"
1337 " psraw %%mm4, %%mm0\n"
1338 " paddw 0(%1), %%mm0\n"
1339 " movq %%mm0, 0(%0)\n"
1346 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1351 OIL_DEFINE_IMPL_FULL (mas8_add_s16_pmaddwd2, mas8_add_s16, OIL_IMPL_FLAG_SSE);
1355 /* This only does 16-bit intermediates, whereas the ref specifies 32-bit */
1357 mas8_add_s16_sse2 (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
1358 int16_t *s4_2, int n)
1361 " movq 0(%0), %%mm6\n"
1362 " movq 8(%0), %%mm7\n"
1363 " movzwl 0(%1), %%ecx\n"
1364 " movd %%ecx, %%mm3\n"
1365 " pshufw $0x00, %%mm3, %%mm3\n"
1366 " pxor %%mm4, %%mm4\n"
1367 " movzwl 2(%1), %%ecx\n"
1368 " movd %%ecx, %%mm4\n"
1369 :: "r" (s3_2), "r" (s4_2)
1374 " movq 0(%2), %%mm0\n"
1375 " pmullw %%mm6, %%mm0\n"
1376 " movq 8(%2), %%mm1\n"
1377 " pmullw %%mm7, %%mm1\n"
1378 " paddw %%mm1, %%mm0\n"
1379 " pshufw $0xee, %%mm0, %%mm1\n"
1380 " paddw %%mm1, %%mm0\n"
1381 " pshufw $0x01, %%mm0, %%mm1\n"
1382 " paddw %%mm1, %%mm0\n"
1383 " paddw %%mm3, %%mm0\n"
1384 " psraw %%mm4, %%mm0\n"
1385 " movd %%mm0, %%eax\n"
1386 " addw 0(%1), %%ax\n"
1387 " movw %%ax, 0(%0)\n"
1394 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (n)
1399 OIL_DEFINE_IMPL_FULL (mas8_add_s16_sse2, mas8_add_s16, OIL_IMPL_FLAG_SSE);
1403 mas2_across_add_s16_mmx (int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3,
1404 int16_t *s4_2, int16_t *s5_2, int n)
1406 int shift = s5_2[1];
1411 x = s5_2[0] + s2[0]*s4_2[0] + s3[0]*s4_2[1];
1426 " movzwl 0(%0), %%ecx\n"
1427 " movd %%ecx, %%mm7\n"
1428 " pshufw $0x00, %%mm7, %%mm7\n"
1429 " movzwl 2(%0), %%ecx\n"
1430 " movd %%ecx, %%mm6\n"
1431 " pshufw $0x00, %%mm6, %%mm6\n"
1432 " movzwl 0(%1), %%ecx\n"
1433 " movd %%ecx, %%mm5\n"
1434 " pshufw $0x44, %%mm5, %%mm5\n"
1435 :: "r" (s4_2), "r" (s5_2)
1440 " movq 0(%2), %%mm0\n" // mm0 = s0, s1, s2, s3
1441 " movq 0(%2), %%mm1\n" // mm1 = s0, s1, s2, s3
1442 " pmullw %%mm7, %%mm0\n" // mm0 = lo(s0*a0), lo(s1*a0), ...
1443 " pmulhw %%mm7, %%mm1\n" // mm1 = hi(s0*a0), hi(s1*a0), ...
1444 " movq %%mm0, %%mm2\n" // mm2 = lo(s0*a0), lo(s1*a0), ...
1445 " punpcklwd %%mm1, %%mm0\n" // mm0 = s0*a0, s1*a0
1446 " punpckhwd %%mm1, %%mm2\n" // mm2 = s2*a0, s3*a0
1447 " movq %%mm2, %%mm1\n" // mm1 = s2*a0, s3*a0
1449 " movq 0(%3), %%mm2\n"
1450 " movq 0(%3), %%mm3\n"
1451 " pmullw %%mm6, %%mm2\n"
1452 " pmulhw %%mm6, %%mm3\n"
1453 " movq %%mm2, %%mm4\n"
1454 " punpcklwd %%mm3, %%mm2\n" // mm2 = s1*a1, s2*a1
1455 " punpckhwd %%mm3, %%mm4\n" // mm4 = s3*a1, s4*a1
1456 " movq %%mm4, %%mm3\n" // mm3 = s3*a1, s4*a1
1458 " paddd %%mm3, %%mm1\n" // mm1 = s2*a0 + s3*a1, ...
1459 " paddd %%mm2, %%mm0\n" // mm0 = s0*a0 + s1*a1, ...
1461 " paddd %%mm5, %%mm1\n" // mm1 = s2*a0 + s3*a1 + offset, ...
1462 " paddd %%mm5, %%mm0\n" // mm0 = s0*a0 + s1*a1 + offset, ...
1465 " psrad %%mm4, %%mm1\n" // mm1 = (s2*a0 + s3*a1 + offset)>>shift, ...
1466 " psrad %%mm4, %%mm0\n" // mm0 = (s0*a0 + s1*a1 + offset)>>shift, ...
1468 " packssdw %%mm1, %%mm0\n"
1469 " paddw 0(%1), %%mm0\n"
1470 " movq %%mm0, 0(%0)\n"
1478 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (s3), "+m" (n)
1482 OIL_DEFINE_IMPL_FULL (mas2_across_add_s16_mmx, mas2_across_add_s16,
1483 OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1486 add_const_rshift_s16_mmx(int16_t *d1, int16_t *s1, int16_t *s2_2, int n)
1489 d1[0] = (s1[0] + s2_2[0])>>s2_2[1];
1497 " movzwl 0(%2), %%ecx\n"
1498 " movd %%ecx, %%mm7\n"
1499 " pshufw $0x00, %%mm7, %%mm7\n"
1500 " movzwl 2(%2), %%ecx\n"
1501 " movd %%ecx, %%mm6\n"
1503 " movq 0(%1), %%mm0\n"
1504 " paddsw %%mm7, %%mm0\n"
1505 " psraw %%mm6, %%mm0\n"
1506 " movq %%mm0, 0(%0)\n"
1512 : "+r" (d1), "+r" (s1), "+r" (s2_2), "+r" (n)
1518 OIL_DEFINE_IMPL_FULL (add_const_rshift_s16_mmx, add_const_rshift_s16,
1519 OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
1522 multiply_and_add_s16_mmx(int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3, int n)
1525 d1[0] = s1[0] + s2[0]*s3[0];
1536 " movq 0(%2), %%mm0\n"
1537 " pmullw 0(%3), %%mm0\n"
1538 " paddw 0(%1), %%mm0\n"
1539 " movq %%mm0, 0(%0)\n"
1547 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (s3), "+r" (n)
1551 OIL_DEFINE_IMPL_FULL (multiply_and_add_s16_mmx, multiply_and_add_s16,
1555 multiply_and_add_s16_u8_mmx(int16_t *d1, int16_t *s1, int16_t *s2,
1559 d1[0] = s1[0] + s2[0]*s3[0];
1569 " pxor %%mm7, %%mm7\n"
1571 " movd 0(%3), %%mm0\n"
1572 " punpcklbw %%mm7, %%mm0\n"
1573 " pmullw 0(%2), %%mm0\n"
1574 " paddw 0(%1), %%mm0\n"
1575 " movq %%mm0, 0(%0)\n"
1583 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (s3), "+r" (n)
1587 OIL_DEFINE_IMPL_FULL (multiply_and_add_s16_u8_mmx, multiply_and_add_s16_u8,
1591 multiply_and_add_s16_u8_mmx_2(int16_t *d1, int16_t *s1, int16_t *s2,
1595 d1[0] = s1[0] + s2[0]*s3[0];
1605 " pxor %%mm7, %%mm7\n"
1607 " movd 0(%3), %%mm0\n"
1608 " punpcklbw %%mm7, %%mm0\n"
1609 " movd 4(%3), %%mm1\n"
1610 " pmullw 0(%2), %%mm0\n"
1611 " punpcklbw %%mm7, %%mm1\n"
1612 " paddw 0(%1), %%mm0\n"
1613 " pmullw 8(%2), %%mm1\n"
1614 " movq %%mm0, 0(%0)\n"
1615 " paddw 8(%1), %%mm1\n"
1616 " movq %%mm1, 8(%0)\n"
1625 : "+r" (d1), "+r" (s1), "+r" (s2), "+r" (s3), "+r" (n)
1629 OIL_DEFINE_IMPL_FULL (multiply_and_add_s16_u8_mmx_2, multiply_and_add_s16_u8,
1633 multiply_and_acc_12xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
1634 int ss1, uint8_t *s2, int ss2, int n)
1637 __asm__ __volatile__ ("\n"
1638 " pxor %%mm7, %%mm7\n"
1640 " movd 0(%2), %%mm0\n"
1641 " punpcklbw %%mm7, %%mm0\n"
1642 " pmullw 0(%1), %%mm0\n"
1643 " paddw 0(%0), %%mm0\n"
1644 " movq %%mm0, 0(%0)\n"
1645 " movd 4(%2), %%mm1\n"
1646 " punpcklbw %%mm7, %%mm1\n"
1647 " pmullw 8(%1), %%mm1\n"
1648 " paddw 8(%0), %%mm1\n"
1649 " movq %%mm1, 8(%0)\n"
1650 " movd 8(%2), %%mm2\n"
1651 " punpcklbw %%mm7, %%mm2\n"
1652 " pmullw 16(%1), %%mm2\n"
1653 " paddw 16(%0), %%mm2\n"
1654 " movq %%mm2, 16(%0)\n"
1662 : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
1663 : "m" (is1), "m" (ss1), "m" (ss2)
1666 OIL_DEFINE_IMPL_FULL (multiply_and_acc_12xn_s16_u8_mmx,
1667 multiply_and_acc_12xn_s16_u8, OIL_IMPL_FLAG_MMX);
1669 #ifdef ENABLE_BROKEN_IMPLS
1671 mas4_across_add_s16_mmx (int16_t *d, int16_t *s1, int16_t *s2_nx4, int sstr2,
1672 int16_t *s3_4, int16_t *s4_2, int n)
1674 int16_t *s2_nx4_off;
1681 x += OIL_GET(s2_nx4, j*sstr2, int16_t)*s3_4[j];
1693 s2_nx4_off = OIL_OFFSET(s2_nx4, 3*sstr2);
1696 __asm__ __volatile__ ("\n"
1697 " movq 0(%[s3_4]), %%mm0\n"
1698 " pshufw $0x55, %%mm0, %%mm1\n"
1699 " pshufw $0xaa, %%mm0, %%mm2\n"
1700 " pshufw $0xff, %%mm0, %%mm3\n"
1701 " pshufw $0x00, %%mm0, %%mm0\n"
1702 " movzwl 0(%[s4_2]), %%ecx\n"
1703 " movd %%ecx, %%mm7\n"
1704 " pshufw $0x00, %%mm7, %%mm7\n"
1705 " movzwl 2(%[s4_2]), %%ecx\n"
1706 " movd %%ecx, %%mm6\n"
1708 : [s3_4] "r" (s3_4),
1713 __asm__ __volatile__ ("\n"
1715 " movq 0(%[s2_nx4]), %%mm4\n"
1716 " pmullw %%mm0, %%mm4\n"
1717 " movq (%[s2_nx4],%[sstr]), %%mm5\n"
1718 " pmullw %%mm1, %%mm5\n"
1719 " paddsw %%mm5,%%mm4\n"
1720 " movq (%[s2_nx4],%[sstr],2), %%mm5\n"
1721 " pmullw %%mm2, %%mm5\n"
1722 " paddsw %%mm5,%%mm4\n"
1723 " movq (%[s2_nx4_off]), %%mm5\n"
1724 " pmullw %%mm3, %%mm5\n"
1725 " paddsw %%mm5,%%mm4\n"
1726 " paddsw %%mm7, %%mm4\n"
1727 " psraw %%mm6, %%mm4\n"
1728 " paddsw (%[s1]),%%mm4\n"
1729 " movq %%mm4, 0(%[d])\n"
1731 " addl $8, %[s2_nx4]\n"
1732 " addl $8, %[s2_nx4_off]\n"
1738 : [s2_nx4] "+r" (s2_nx4),
1740 [s2_nx4_off] "+r" (s2_nx4_off),
1743 : [sstr] "r" (sstr2)
1746 OIL_DEFINE_IMPL_FULL (mas4_across_add_s16_mmx, mas4_across_add_s16,
1747 OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
1751 mas4_across_add_s16_mmx (int16_t *d, int16_t *s1, int16_t *s2_nx4, int sstr2,
1752 int16_t *s3_4, int16_t *s4_2, int n)
1754 int16_t *s2_nx4_off;
1761 x += OIL_GET(s2_nx4, j*sstr2, int16_t)*s3_4[j];
1773 s2_nx4_off = OIL_OFFSET(s2_nx4, 3*sstr2);
1776 __asm__ __volatile__ ("\n"
1777 " movq 0(%[s3_4]), %%mm0\n"
1778 " pxor %%mm5, %%mm5\n"
1779 " movd 0(%[s4_2]), %%mm5\n"
1781 : [s3_4] "r" (s3_4),
1785 __asm__ __volatile__ ("\n"
1787 " pshufw $0x00, %%mm0, %%mm6\n"
1788 " pmullw 0(%[s2_nx4]), %%mm6\n"
1789 " pshufw $0x00, %%mm0, %%mm3\n"
1790 " pmulhw 0(%[s2_nx4]), %%mm3\n"
1791 " movq %%mm6, %%mm7\n"
1792 " punpcklwd %%mm3, %%mm6\n"
1793 " punpckhwd %%mm3, %%mm7\n"
1795 " pshufw $0x55, %%mm0, %%mm2\n"
1796 " pmullw 0(%[s2_nx4],%[sstr]), %%mm2\n"
1797 " pshufw $0x55, %%mm0, %%mm3\n"
1798 " pmulhw 0(%[s2_nx4],%[sstr]), %%mm3\n"
1799 " movq %%mm2, %%mm4\n"
1800 " punpcklwd %%mm3, %%mm2\n"
1801 " punpckhwd %%mm3, %%mm4\n"
1802 " paddd %%mm2, %%mm6\n"
1803 " paddd %%mm4, %%mm7\n"
1805 " pshufw $0xaa, %%mm0, %%mm2\n"
1806 " pmullw 0(%[s2_nx4],%[sstr],2), %%mm2\n"
1807 " pshufw $0xaa, %%mm0, %%mm3\n"
1808 " pmulhw 0(%[s2_nx4],%[sstr],2), %%mm3\n"
1809 " movq %%mm2, %%mm4\n"
1810 " punpcklwd %%mm3, %%mm2\n"
1811 " punpckhwd %%mm3, %%mm4\n"
1812 " paddd %%mm2, %%mm6\n"
1813 " paddd %%mm4, %%mm7\n"
1815 " pshufw $0xff, %%mm0, %%mm2\n"
1816 " pmullw 0(%[s2_nx4_off]), %%mm2\n"
1817 " pshufw $0xff, %%mm0, %%mm3\n"
1818 " pmulhw 0(%[s2_nx4_off]), %%mm3\n"
1819 " movq %%mm2, %%mm4\n"
1820 " punpcklwd %%mm3, %%mm2\n"
1821 " punpckhwd %%mm3, %%mm4\n"
1822 " paddd %%mm2, %%mm6\n"
1823 " paddd %%mm4, %%mm7\n"
1825 " pshufw $0xcc, %%mm5, %%mm1\n"
1826 " paddd %%mm1, %%mm6\n"
1827 " paddd %%mm1, %%mm7\n"
1829 " pshufw $0xfd, %%mm5, %%mm1\n"
1830 " psrad %%mm1, %%mm6\n"
1831 " psrad %%mm1, %%mm7\n"
1832 " packssdw %%mm7, %%mm6\n"
1834 " paddsw (%[s1]),%%mm6\n"
1835 " movq %%mm6, 0(%[d])\n"
1837 " addl $8, %[s2_nx4]\n"
1838 " addl $8, %[s2_nx4_off]\n"
1844 : [s2_nx4] "+r" (s2_nx4),
1846 [s2_nx4_off] "+r" (s2_nx4_off),
1849 : [sstr] "r" (sstr2)
1852 OIL_DEFINE_IMPL_FULL (mas4_across_add_s16_mmx, mas4_across_add_s16,
1853 OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
1856 mas8_across_add_s16_mmx (int16_t *d, int16_t *s1, int16_t *s2_nx8, int sstr2,
1857 int16_t *s3_8, int16_t *s4_2, int n)
1859 int16_t *s2_nx8_off;
1867 x += OIL_GET(s2_nx8, j*sstr2, int16_t)*s3_8[j];
1879 s2_nx8_off = OIL_OFFSET(s2_nx8, 7*sstr2);
1882 __asm__ __volatile__ ("\n"
1883 " movq 0(%[s3_8]), %%mm0\n"
1884 " pxor %%mm5, %%mm5\n"
1885 " movd 0(%[s4_2]), %%mm5\n"
1887 : [s3_8] "r" (s3_8),
1891 __asm__ __volatile__ ("\n"
1893 " movl %[s2_nx8], %[tmp]\n"
1894 " movq 0(%[s3_8]), %%mm0\n"
1896 " pshufw $0x00, %%mm0, %%mm6\n"
1897 " pmullw 0(%[tmp]), %%mm6\n"
1898 " pshufw $0x00, %%mm0, %%mm3\n"
1899 " pmulhw 0(%[tmp]), %%mm3\n"
1900 " movq %%mm6, %%mm7\n"
1901 " punpcklwd %%mm3, %%mm6\n"
1902 " punpckhwd %%mm3, %%mm7\n"
1904 " addl %[sstr], %[tmp]\n"
1905 " pshufw $0x55, %%mm0, %%mm2\n"
1906 " pmullw 0(%[tmp]), %%mm2\n"
1907 " pshufw $0x55, %%mm0, %%mm3\n"
1908 " pmulhw 0(%[tmp]), %%mm3\n"
1909 " movq %%mm2, %%mm4\n"
1910 " punpcklwd %%mm3, %%mm2\n"
1911 " punpckhwd %%mm3, %%mm4\n"
1912 " paddd %%mm2, %%mm6\n"
1913 " paddd %%mm4, %%mm7\n"
1915 " addl %[sstr], %[tmp]\n"
1916 " pshufw $0xaa, %%mm0, %%mm2\n"
1917 " pmullw 0(%[tmp]), %%mm2\n"
1918 " pshufw $0xaa, %%mm0, %%mm3\n"
1919 " pmulhw 0(%[tmp]), %%mm3\n"
1920 " movq %%mm2, %%mm4\n"
1921 " punpcklwd %%mm3, %%mm2\n"
1922 " punpckhwd %%mm3, %%mm4\n"
1923 " paddd %%mm2, %%mm6\n"
1924 " paddd %%mm4, %%mm7\n"
1926 " addl %[sstr], %[tmp]\n"
1927 " pshufw $0xff, %%mm0, %%mm2\n"
1928 " pmullw 0(%[tmp]), %%mm2\n"
1929 " pshufw $0xff, %%mm0, %%mm3\n"
1930 " pmulhw 0(%[tmp]), %%mm3\n"
1931 " movq %%mm2, %%mm4\n"
1932 " punpcklwd %%mm3, %%mm2\n"
1933 " punpckhwd %%mm3, %%mm4\n"
1934 " paddd %%mm2, %%mm6\n"
1935 " paddd %%mm4, %%mm7\n"
1937 " movq 8(%[s3_8]), %%mm0\n"
1939 " addl %[sstr], %[tmp]\n"
1940 " pshufw $0x00, %%mm0, %%mm2\n"
1941 " pmullw 0(%[tmp]), %%mm2\n"
1942 " pshufw $0x00, %%mm0, %%mm3\n"
1943 " pmulhw 0(%[tmp]), %%mm3\n"
1944 " movq %%mm2, %%mm4\n"
1945 " punpcklwd %%mm3, %%mm2\n"
1946 " punpckhwd %%mm3, %%mm4\n"
1947 " paddd %%mm2, %%mm6\n"
1948 " paddd %%mm4, %%mm7\n"
1950 " addl %[sstr], %[tmp]\n"
1951 " pshufw $0x55, %%mm0, %%mm2\n"
1952 " pmullw 0(%[tmp]), %%mm2\n"
1953 " pshufw $0x55, %%mm0, %%mm3\n"
1954 " pmulhw 0(%[tmp]), %%mm3\n"
1955 " movq %%mm2, %%mm4\n"
1956 " punpcklwd %%mm3, %%mm2\n"
1957 " punpckhwd %%mm3, %%mm4\n"
1958 " paddd %%mm2, %%mm6\n"
1959 " paddd %%mm4, %%mm7\n"
1961 " addl %[sstr], %[tmp]\n"
1962 " pshufw $0xaa, %%mm0, %%mm2\n"
1963 " pmullw 0(%[tmp]), %%mm2\n"
1964 " pshufw $0xaa, %%mm0, %%mm3\n"
1965 " pmulhw 0(%[tmp]), %%mm3\n"
1966 " movq %%mm2, %%mm4\n"
1967 " punpcklwd %%mm3, %%mm2\n"
1968 " punpckhwd %%mm3, %%mm4\n"
1969 " paddd %%mm2, %%mm6\n"
1970 " paddd %%mm4, %%mm7\n"
1972 " addl %[sstr], %[tmp]\n"
1973 " pshufw $0xff, %%mm0, %%mm2\n"
1974 " pmullw 0(%[tmp]), %%mm2\n"
1975 " pshufw $0xff, %%mm0, %%mm3\n"
1976 " pmulhw 0(%[tmp]), %%mm3\n"
1977 " movq %%mm2, %%mm4\n"
1978 " punpcklwd %%mm3, %%mm2\n"
1979 " punpckhwd %%mm3, %%mm4\n"
1980 " paddd %%mm2, %%mm6\n"
1981 " paddd %%mm4, %%mm7\n"
1983 " pshufw $0xcc, %%mm5, %%mm1\n"
1984 " paddd %%mm1, %%mm6\n"
1985 " paddd %%mm1, %%mm7\n"
1987 " pshufw $0xfd, %%mm5, %%mm1\n"
1988 " psrad %%mm1, %%mm6\n"
1989 " psrad %%mm1, %%mm7\n"
1990 " packssdw %%mm7, %%mm6\n"
1992 " paddsw (%[s1]),%%mm6\n"
1993 " movq %%mm6, 0(%[d])\n"
1995 " addl $8, %[s2_nx8]\n"
2001 : [s2_nx8] "+r" (s2_nx8),
2007 : [sstr] "m" (sstr2)
2010 OIL_DEFINE_IMPL_FULL (mas8_across_add_s16_mmx, mas8_across_add_s16,
2011 OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
2014 lshift_s16_mmx(int16_t *d1, int16_t *s1, int16_t *s3_1, int n)
2017 d1[0] = s1[0]<<s3_1[0];
2023 __asm__ __volatile__ ("\n"
2024 " movzwl 0(%[s3_1]), %%ecx\n"
2025 " movd %%ecx, %%mm1\n"
2027 " movq 0(%[s1]), %%mm0\n"
2028 " psllw %%mm1, %%mm0\n"
2029 " movq %%mm0, 0(%[d1])\n"
2041 OIL_DEFINE_IMPL_FULL (lshift_s16_mmx, lshift_s16, OIL_IMPL_FLAG_MMX);
2044 lshift_s16_mmx_2(int16_t *d1, int16_t *s1, int16_t *s3_1, int n)
2047 d1[0] = s1[0]<<s3_1[0];
2054 __asm__ __volatile__ ("\n"
2055 " movzwl 0(%[s3_1]), %%ecx\n"
2056 " movd %%ecx, %%mm1\n"
2058 " movq 0(%[s1]), %%mm0\n"
2059 " psllw %%mm1, %%mm0\n"
2060 " movq %%mm0, 0(%[d1])\n"
2061 " movq 8(%[s1]), %%mm0\n"
2062 " psllw %%mm1, %%mm0\n"
2063 " movq %%mm0, 8(%[d1])\n"
2075 OIL_DEFINE_IMPL_FULL (lshift_s16_mmx_2, lshift_s16, OIL_IMPL_FLAG_MMX);
2080 #ifdef __SYMBIAN32__
2082 OilFunctionImpl* __oil_function_impl_deinterleave2_mmx, deinterleave2_s16() {
2083 return &_oil_function_impl_deinterleave2_mmx, deinterleave2_s16;
2087 #ifdef __SYMBIAN32__
2089 OilFunctionImpl* __oil_function_impl_deinterleave2_mmx_2, deinterleave2_s16() {
2090 return &_oil_function_impl_deinterleave2_mmx_2, deinterleave2_s16;
2094 #ifdef __SYMBIAN32__
2096 OilFunctionImpl* __oil_function_impl_deinterleave2_mmx_3, deinterleave2_s16() {
2097 return &_oil_function_impl_deinterleave2_mmx_3, deinterleave2_s16;
2101 #ifdef __SYMBIAN32__
2103 OilFunctionImpl* __oil_function_impl_deinterleave2_mmx_4, deinterleave2_s16() {
2104 return &_oil_function_impl_deinterleave2_mmx_4, deinterleave2_s16;
2108 #ifdef __SYMBIAN32__
2110 OilFunctionImpl* __oil_function_impl_lift_add_mult_shift12_i386_mmx, lift_add_mult_shift12() {
2111 return &_oil_function_impl_lift_add_mult_shift12_i386_mmx, lift_add_mult_shift12;
2115 #ifdef __SYMBIAN32__
2117 OilFunctionImpl* __oil_function_impl_interleave2_mmx, interleave2_s16() {
2118 return &_oil_function_impl_interleave2_mmx, interleave2_s16;
2122 #ifdef __SYMBIAN32__
2124 OilFunctionImpl* __oil_function_impl_lift_add_shift1_mmx, lift_add_shift1() {
2125 return &_oil_function_impl_lift_add_shift1_mmx, lift_add_shift1;
2129 #ifdef __SYMBIAN32__
2131 OilFunctionImpl* __oil_function_impl_lift_sub_shift1_mmx, lift_sub_shift1() {
2132 return &_oil_function_impl_lift_sub_shift1_mmx, lift_sub_shift1;
2136 #ifdef __SYMBIAN32__
2138 OilFunctionImpl* __oil_function_impl_lift_add_shift2_mmx, lift_add_shift2() {
2139 return &_oil_function_impl_lift_add_shift2_mmx, lift_add_shift2;
2143 #ifdef __SYMBIAN32__
2145 OilFunctionImpl* __oil_function_impl_lift_sub_shift2_mmx, lift_sub_shift2() {
2146 return &_oil_function_impl_lift_sub_shift2_mmx, lift_sub_shift2;
2150 #ifdef __SYMBIAN32__
2152 OilFunctionImpl* __oil_function_impl_synth_53_mmx, synth_53() {
2153 return &_oil_function_impl_synth_53_mmx, synth_53;
2157 #ifdef __SYMBIAN32__
2159 OilFunctionImpl* __oil_function_impl_mas2_add_s16_mmx, mas2_add_s16() {
2160 return &_oil_function_impl_mas2_add_s16_mmx, mas2_add_s16;
2164 #ifdef __SYMBIAN32__
2166 OilFunctionImpl* __oil_function_impl_mas2_add_s16_lim_mmx, mas2_add_s16() {
2167 return &_oil_function_impl_mas2_add_s16_lim_mmx, mas2_add_s16;
2171 #ifdef __SYMBIAN32__
2173 OilFunctionImpl* __oil_function_impl_mas4_add_s16_mmx, mas4_add_s16() {
2174 return &_oil_function_impl_mas4_add_s16_mmx, mas4_add_s16;
2178 #ifdef __SYMBIAN32__
2180 OilFunctionImpl* __oil_function_impl_mas2_add_s16_mmx, mas2_add_s16() {
2181 return &_oil_function_impl_mas2_add_s16_mmx, mas2_add_s16;
2185 #ifdef __SYMBIAN32__
2187 OilFunctionImpl* __oil_function_impl_mas4_add_s16_mmx, mas4_add_s16() {
2188 return &_oil_function_impl_mas4_add_s16_mmx, mas4_add_s16;
2192 #ifdef __SYMBIAN32__
2194 OilFunctionImpl* __oil_function_impl_mas8_add_s16_mmx, mas8_add_s16() {
2195 return &_oil_function_impl_mas8_add_s16_mmx, mas8_add_s16;
2199 #ifdef __SYMBIAN32__
2201 OilFunctionImpl* __oil_function_impl_mas4_add_s16_pmaddwd, mas4_add_s16() {
2202 return &_oil_function_impl_mas4_add_s16_pmaddwd, mas4_add_s16;
2206 #ifdef __SYMBIAN32__
2208 OilFunctionImpl* __oil_function_impl_mas4_add_s16_pmaddwd_2, mas4_add_s16() {
2209 return &_oil_function_impl_mas4_add_s16_pmaddwd_2, mas4_add_s16;
2213 #ifdef __SYMBIAN32__
2215 OilFunctionImpl* __oil_function_impl_mas8_add_s16_pmaddwd, mas8_add_s16() {
2216 return &_oil_function_impl_mas8_add_s16_pmaddwd, mas8_add_s16;
2220 #ifdef __SYMBIAN32__
2222 OilFunctionImpl* __oil_function_impl_mas8_add_s16_pmaddwd2, mas8_add_s16() {
2223 return &_oil_function_impl_mas8_add_s16_pmaddwd2, mas8_add_s16;
2227 #ifdef __SYMBIAN32__
2229 OilFunctionImpl* __oil_function_impl_mas8_add_s16_sse2, mas8_add_s16() {
2230 return &_oil_function_impl_mas8_add_s16_sse2, mas8_add_s16;
2234 #ifdef __SYMBIAN32__
2236 OilFunctionImpl* __oil_function_impl_mas2_across_add_s16_mmx, mas2_across_add_s16() {
2237 return &_oil_function_impl_mas2_across_add_s16_mmx, mas2_across_add_s16;
2241 #ifdef __SYMBIAN32__
2243 OilFunctionImpl* __oil_function_impl_add_const_rshift_s16_mmx, add_const_rshift_s16() {
2244 return &_oil_function_impl_add_const_rshift_s16_mmx, add_const_rshift_s16;
2248 #ifdef __SYMBIAN32__
2250 OilFunctionImpl* __oil_function_impl_multiply_and_add_s16_mmx, multiply_and_add_s16() {
2251 return &_oil_function_impl_multiply_and_add_s16_mmx, multiply_and_add_s16;
2255 #ifdef __SYMBIAN32__
2257 OilFunctionImpl* __oil_function_impl_multiply_and_add_s16_u8_mmx, multiply_and_add_s16_u8() {
2258 return &_oil_function_impl_multiply_and_add_s16_u8_mmx, multiply_and_add_s16_u8;
2262 #ifdef __SYMBIAN32__
2264 OilFunctionImpl* __oil_function_impl_multiply_and_add_s16_u8_mmx_2, multiply_and_add_s16_u8() {
2265 return &_oil_function_impl_multiply_and_add_s16_u8_mmx_2, multiply_and_add_s16_u8;
2269 #ifdef __SYMBIAN32__
2271 OilFunctionImpl* __oil_function_impl_multiply_and_acc_12xn_s16_u8_mmx() {
2272 return &_oil_function_impl_multiply_and_acc_12xn_s16_u8_mmx;
2276 #ifdef __SYMBIAN32__
2278 OilFunctionImpl* __oil_function_impl_mas4_across_add_s16_mmx, mas4_across_add_s16() {
2279 return &_oil_function_impl_mas4_across_add_s16_mmx, mas4_across_add_s16;
2283 #ifdef __SYMBIAN32__
2285 OilFunctionImpl* __oil_function_impl_mas4_across_add_s16_mmx, mas4_across_add_s16() {
2286 return &_oil_function_impl_mas4_across_add_s16_mmx, mas4_across_add_s16;
2290 #ifdef __SYMBIAN32__
2292 OilFunctionImpl* __oil_function_impl_mas8_across_add_s16_mmx, mas8_across_add_s16() {
2293 return &_oil_function_impl_mas8_across_add_s16_mmx, mas8_across_add_s16;
2297 #ifdef __SYMBIAN32__
2299 OilFunctionImpl* __oil_function_impl_lshift_s16_mmx, lshift_s16() {
2300 return &_oil_function_impl_lshift_s16_mmx, lshift_s16;
2304 #ifdef __SYMBIAN32__
2306 OilFunctionImpl* __oil_function_impl_lshift_s16_mmx_2, lshift_s16() {
2307 return &_oil_function_impl_lshift_s16_mmx_2, lshift_s16;
2313 #ifdef __SYMBIAN32__
2315 OilFunctionImpl* __oil_function_impl_split_53_nomix() {
2316 return &_oil_function_impl_split_53_nomix;
2320 #ifdef __SYMBIAN32__
2322 OilFunctionImpl* __oil_function_impl_split_53_c() {
2323 return &_oil_function_impl_split_53_c;
2327 #ifdef __SYMBIAN32__
2329 OilFunctionImpl* __oil_function_impl_synth_53_c() {
2330 return &_oil_function_impl_synth_53_c;
2334 #ifdef __SYMBIAN32__
2336 OilFunctionImpl* __oil_function_impl_deinterleave2_c_1() {
2337 return &_oil_function_impl_deinterleave2_c_1;
2341 #ifdef __SYMBIAN32__
2343 OilFunctionImpl* __oil_function_impl_deinterleave2_asm() {
2344 return &_oil_function_impl_deinterleave2_asm;