First public contribution.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboilfunction.h>
36 OIL_DECLARE_CLASS(trans8x8_u16);
38 /* this could use additional work. */
40 trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
42 #if !defined(__WINSCW__) && !defined(__WINS__)
44 " leal (%3,%3,2),%%eax \n" // UBER 0:
45 " movq (%1), %%mm0 \n" // UBER 1:
46 " movq (%1,%3,2), %%mm2 \n" // UBER 2:
47 " movq %%mm0, %%mm4 \n" // UBER 3: 1
48 " movq %%mm2, %%mm5 \n" // UBER 4: 2
49 " punpcklwd (%1,%3), %%mm0 \n" // UBER 5: 1
50 " punpcklwd (%1,%%eax), %%mm2 \n" // UBER 6: 0 2
51 " punpckhwd (%1,%3), %%mm4 \n" // UBER 7: 3
52 " punpckhwd (%1,%%eax), %%mm5 \n" // UBER 8: 4
53 " movq %%mm0, %%mm1 \n" // UBER 9: 5
54 " movq %%mm4, %%mm3 \n" // UBER 10: 7
55 " punpckldq %%mm2, %%mm0 \n" // UBER 11: 5 6
56 " punpckldq %%mm5, %%mm4 \n" // UBER 12: 7 8
57 " punpckhdq %%mm2, %%mm1 \n" // UBER 13: 6 9
58 " punpckhdq %%mm5, %%mm3 \n" // UBER 14: 9 10
59 " leal (%2,%2,2),%%eax \n" // UBER 15: 8
60 " movq %%mm0, 0(%0) \n" // UBER 16: 11
61 " movq %%mm1, (%0,%2) \n" // UBER 17: 13
62 " movq %%mm4, (%0,%2,2) \n" // UBER 18: 12
63 " movq %%mm3, (%0,%%eax) \n" // UBER 19: 14 15
65 " leal (%3,%3,2),%%eax \n"
66 " movq 8(%1), %%mm0 \n"
67 " movq 8(%1,%3,2), %%mm2 \n"
68 " movq %%mm0, %%mm4 \n"
69 " movq %%mm2, %%mm5 \n"
70 " punpcklwd 8(%1,%3), %%mm0 \n"
71 " punpcklwd 8(%1,%%eax), %%mm2 \n"
72 " punpckhwd 8(%1,%3), %%mm4 \n"
73 " punpckhwd 8(%1,%%eax), %%mm5 \n"
74 " movq %%mm0, %%mm1 \n"
75 " movq %%mm4, %%mm3 \n"
76 " punpckldq %%mm2, %%mm0 \n"
77 " punpckldq %%mm5, %%mm4 \n"
78 " punpckhdq %%mm2, %%mm1 \n"
79 " punpckhdq %%mm5, %%mm3 \n"
80 " leal (%2,%2,2),%%eax \n"
81 " leal (%0,%2,4),%0 \n"
82 " movq %%mm0, 0(%0) \n"
83 " movq %%mm1, (%0,%2) \n"
84 " movq %%mm4, (%0,%2,2) \n"
85 " movq %%mm3, (%0,%%eax) \n"
87 " leal (%1,%3,4),%1 \n"
88 " leal (%3,%3,2),%%eax \n"
89 " movq 0(%1), %%mm0 \n"
90 " movq 0(%1,%3,2), %%mm2 \n"
91 " movq %%mm0, %%mm4 \n"
92 " movq %%mm2, %%mm5 \n"
93 " punpcklwd 0(%1,%3), %%mm0 \n"
94 " punpcklwd 0(%1,%%eax), %%mm2 \n"
95 " punpckhwd 0(%1,%3), %%mm4 \n"
96 " punpckhwd 0(%1,%%eax), %%mm5 \n"
97 " movq %%mm0, %%mm1 \n"
98 " movq %%mm4, %%mm3 \n"
99 " punpckldq %%mm2, %%mm0 \n"
100 " punpckldq %%mm5, %%mm4 \n"
101 " punpckhdq %%mm2, %%mm1 \n"
102 " punpckhdq %%mm5, %%mm3 \n"
103 " leal (%2,%2,2),%%eax \n"
105 " leal (%0,%2,4),%0 \n"
107 " movq %%mm0, 8(%0) \n"
108 " movq %%mm1, 8(%0,%2) \n"
109 " movq %%mm4, 8(%0,%2,2) \n"
110 " movq %%mm3, 8(%0,%%eax) \n"
112 " leal (%3,%3,2),%%eax \n"
113 " movq 8(%1), %%mm0 \n"
114 " movq 8(%1,%3,2), %%mm2 \n"
115 " movq %%mm0, %%mm4 \n"
116 " movq %%mm2, %%mm5 \n"
117 " punpcklwd 8(%1,%3), %%mm0 \n"
118 " punpcklwd 8(%1,%%eax), %%mm2 \n"
119 " punpckhwd 8(%1,%3), %%mm4 \n"
120 " punpckhwd 8(%1,%%eax), %%mm5 \n"
121 " movq %%mm0, %%mm1 \n"
122 " movq %%mm4, %%mm3 \n"
123 " punpckldq %%mm2, %%mm0 \n"
124 " punpckldq %%mm5, %%mm4 \n"
125 " punpckhdq %%mm2, %%mm1 \n"
126 " punpckhdq %%mm5, %%mm3 \n"
127 " leal (%2,%2,2),%%eax \n"
128 " leal (%0,%2,4),%0 \n"
129 " movq %%mm0, 8(%0) \n"
130 " movq %%mm1, 8(%0,%2) \n"
131 " movq %%mm4, 8(%0,%2,2) \n"
132 " movq %%mm3, 8(%0,%%eax) \n"
134 : "+r" (dest), "+r" (src), "+r" (dstr), "+r" (sstr)
139 OIL_DEFINE_IMPL_FULL (trans8x8_u16_mmx, trans8x8_u16, OIL_IMPL_FLAG_MMX);
142 trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
144 #if !defined(__WINSCW__) && !defined(__WINS__)
151 " lea (%%ecx,%%edx,8), %%esi \n"
152 " sub %%edx, %%esi\n "
156 " mov (%%ebx), %%ax \n"
157 " mov %%ax,(%%ecx) \n"
158 " mov 2(%%ebx), %%ax \n"
159 " mov %%ax,(%%ecx,%%edx,1) \n"
160 " mov 4(%%ebx), %%ax \n"
161 " mov %%ax,(%%ecx,%%edx,2) \n"
162 " mov 8(%%ebx), %%ax \n"
163 " mov %%ax,(%%ecx,%%edx,4) \n"
167 " mov 6(%%ebx), %%ax \n"
168 " mov %%ax,(%%esi,%%edx,4) \n"
169 " mov 10(%%ebx), %%ax \n"
170 " mov %%ax,(%%esi,%%edx,2) \n"
171 " mov 12(%%ebx), %%ax \n"
172 " mov %%ax,(%%esi,%%edx,1) \n"
173 " mov 14(%%ebx), %%ax \n"
174 " mov %%ax,(%%esi) \n"
185 : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (saved_ebx)
186 : "eax", "ecx", "edx", "esi", "edi");
189 OIL_DEFINE_IMPL (trans8x8_u16_asm1, trans8x8_u16);
192 trans8x8_u16_asm2 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
194 #if !defined(__WINSCW__) && !defined(__WINS__)
202 " lea (%%ecx,%%edx,8), %%esi \n"
203 " sub %%edx, %%esi\n "
205 " movl %%edx, %%edi \n"
209 " movl (%%ebx), %%eax \n"
210 " mov %%ax,(%%ecx) \n"
212 " mov %%ax,(%%ecx,%%edx,1) \n"
214 " movl 4(%%ebx), %%eax \n"
215 " mov %%ax,(%%ecx,%%edx,2) \n"
217 " mov %%ax,(%%esi,%%edi,4) \n"
219 " movl 8(%%ebx), %%eax \n"
220 " mov %%ax,(%%ecx,%%edx,4) \n"
222 " mov %%ax,(%%esi,%%edi,2) \n"
224 " movl 12(%%ebx), %%eax \n"
225 " mov %%ax,(%%esi,%%edi,1) \n"
227 " mov %%ax,(%%esi) \n"
237 : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (i), "m" (saved_ebx)
238 : "eax", "ecx", "edx", "esi", "edi");
241 OIL_DEFINE_IMPL (trans8x8_u16_asm2, trans8x8_u16);
247 OilFunctionImpl* __oil_function_impl_trans8x8_u16_mmx, trans8x8_u16() {
248 return &_oil_function_impl_trans8x8_u16_mmx, trans8x8_u16;
256 OilFunctionImpl* __oil_function_impl_trans8x8_u16_asm1() {
257 return &_oil_function_impl_trans8x8_u16_asm1;
263 OilFunctionImpl* __oil_function_impl_trans8x8_u16_asm2() {
264 return &_oil_function_impl_trans8x8_u16_asm2;