Update contrib.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboilfunction.h>
35 OIL_DECLARE_CLASS (err_intra8x8_u8);
36 OIL_DECLARE_CLASS (err_inter8x8_u8);
37 OIL_DECLARE_CLASS (err_inter8x8_u8_avg);
40 err_intra8x8_u8_mmx (uint32_t *dest, uint8_t *src1, int ss1)
42 #if !defined(__WINSCW__) && !defined(__WINS__)
46 __asm__ __volatile__ (
47 " pxor %%mm5, %%mm5 \n\t"
48 " pxor %%mm6, %%mm6 \n\t"
49 " pxor %%mm7, %%mm7 \n\t"
52 " movq (%2), %%mm0 \n\t" /* take 8 bytes */
53 " movq %%mm0, %%mm2 \n\t"
55 " punpcklbw %%mm6, %%mm0 \n\t"
56 " punpckhbw %%mm6, %%mm2 \n\t"
58 " paddw %%mm0, %%mm5 \n\t"
59 " paddw %%mm2, %%mm5 \n\t"
61 " pmaddwd %%mm0, %%mm0 \n\t"
62 " pmaddwd %%mm2, %%mm2 \n\t"
64 " paddd %%mm0, %%mm7 \n\t"
65 " paddd %%mm2, %%mm7 \n\t"
67 " add %3, %2 \n\t" /* Inc pointer into src data */
72 " movq %%mm5, %%mm0 \n\t"
73 " psrlq $32, %%mm5 \n\t"
74 " paddw %%mm0, %%mm5 \n\t"
75 " movq %%mm5, %%mm0 \n\t"
76 " psrlq $16, %%mm5 \n\t"
77 " paddw %%mm0, %%mm5 \n\t"
78 " movd %%mm5, %%edi \n\t"
79 " movswl %%di, %%edi \n\t"
80 " movl %%edi, %0 \n\t"
82 " movq %%mm7, %%mm0 \n\t"
83 " psrlq $32, %%mm7 \n\t"
84 " paddd %%mm0, %%mm7 \n\t"
85 " movd %%mm7, %1 \n\t"
95 /* Compute population variance as mis-match metric. */
96 *dest = (((xxsum<<6) - xsum*xsum));
99 OIL_DEFINE_IMPL_FULL (err_intra8x8_u8_mmx, err_intra8x8_u8, OIL_IMPL_FLAG_MMX);
102 err_inter8x8_u8_mmx (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2)
106 #if !defined(__WINSCW__) && !defined(__WINS__)
107 __asm__ __volatile__ (
108 " pxor %%mm5, %%mm5 \n\t"
109 " pxor %%mm6, %%mm6 \n\t"
110 " pxor %%mm7, %%mm7 \n\t"
111 " mov $8, %%edi \n\t"
113 " movq (%2), %%mm0 \n\t" /* take 8 bytes */
114 " movq (%3), %%mm1 \n\t"
115 " movq %%mm0, %%mm2 \n\t"
116 " movq %%mm1, %%mm3 \n\t"
118 " punpcklbw %%mm6, %%mm0 \n\t"
119 " punpcklbw %%mm6, %%mm1 \n\t"
120 " punpckhbw %%mm6, %%mm2 \n\t"
121 " punpckhbw %%mm6, %%mm3 \n\t"
123 " psubsw %%mm1, %%mm0 \n\t"
124 " psubsw %%mm3, %%mm2 \n\t"
126 " paddw %%mm0, %%mm5 \n\t"
127 " paddw %%mm2, %%mm5 \n\t"
129 " pmaddwd %%mm0, %%mm0 \n\t"
130 " pmaddwd %%mm2, %%mm2 \n\t"
132 " paddd %%mm0, %%mm7 \n\t"
133 " paddd %%mm2, %%mm7 \n\t"
135 " add %4, %2 \n\t" /* Inc pointer into src data */
136 " add %5, %3 \n\t" /* Inc pointer into ref data */
141 " movq %%mm5, %%mm0 \n\t"
142 " psrlq $32, %%mm5 \n\t"
143 " paddw %%mm0, %%mm5 \n\t"
144 " movq %%mm5, %%mm0 \n\t"
145 " psrlq $16, %%mm5 \n\t"
146 " paddw %%mm0, %%mm5 \n\t"
147 " movd %%mm5, %%edi \n\t"
148 " movswl %%di, %%edi \n\t"
149 " movl %%edi, %0 \n\t"
151 " movq %%mm7, %%mm0 \n\t"
152 " psrlq $32, %%mm7 \n\t"
153 " paddd %%mm0, %%mm7 \n\t"
154 " movd %%mm7, %1 \n\t"
166 /* Compute and return population variance as mis-match metric. */
167 *dest = (((xxsum<<6) - xsum*xsum));
170 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_mmx, err_inter8x8_u8, OIL_IMPL_FLAG_MMX);
173 err_inter8x8_u8_avg_mmx (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2)
175 #if !defined(__WINSCW__) && !defined(__WINS__)
179 __asm__ __volatile__ (
180 " pcmpeqd %%mm4, %%mm4 \n\t" /* fefefefefefefefe in mm4 */
181 " paddb %%mm4, %%mm4 \n\t"
182 " pxor %%mm5, %%mm5 \n\t"
183 " pxor %%mm6, %%mm6 \n\t"
184 " pxor %%mm7, %%mm7 \n\t"
185 " mov $8, %%edi \n\t"
187 " movq (%2), %%mm0 \n\t" /* take 8 bytes */
189 " movq (%3), %%mm2 \n\t"
190 " movq (%4), %%mm3 \n\t" /* take average of mm2 and mm3 */
191 " movq %%mm2, %%mm1 \n\t"
192 " pand %%mm3, %%mm1 \n\t"
193 " pxor %%mm2, %%mm3 \n\t"
194 " pand %%mm4, %%mm3 \n\t"
195 " psrlq $1, %%mm3 \n\t"
196 " paddb %%mm3, %%mm1 \n\t"
198 " movq %%mm0, %%mm2 \n\t"
199 " movq %%mm1, %%mm3 \n\t"
201 " punpcklbw %%mm6, %%mm0 \n\t"
202 " punpcklbw %%mm6, %%mm1 \n\t"
203 " punpckhbw %%mm6, %%mm2 \n\t"
204 " punpckhbw %%mm6, %%mm3 \n\t"
206 " psubsw %%mm1, %%mm0 \n\t"
207 " psubsw %%mm3, %%mm2 \n\t"
209 " paddw %%mm0, %%mm5 \n\t"
210 " paddw %%mm2, %%mm5 \n\t"
212 " pmaddwd %%mm0, %%mm0 \n\t"
213 " pmaddwd %%mm2, %%mm2 \n\t"
215 " paddd %%mm0, %%mm7 \n\t"
216 " paddd %%mm2, %%mm7 \n\t"
218 " add %5, %2 \n\t" /* Inc pointer into src data */
219 " add %6, %3 \n\t" /* Inc pointer into ref data */
220 " add %6, %4 \n\t" /* Inc pointer into ref data */
225 " movq %%mm5, %%mm0 \n\t"
226 " psrlq $32, %%mm5 \n\t"
227 " paddw %%mm0, %%mm5 \n\t"
228 " movq %%mm5, %%mm0 \n\t"
229 " psrlq $16, %%mm5 \n\t"
230 " paddw %%mm0, %%mm5 \n\t"
231 " movd %%mm5, %%edi \n\t"
232 " movswl %%di, %%edi \n\t"
233 " movl %%edi, %0 \n\t"
235 " movq %%mm7, %%mm0 \n\t"
236 " psrlq $32, %%mm7 \n\t"
237 " paddd %%mm0, %%mm7 \n\t"
238 " movd %%mm7, %1 \n\t"
251 /* Compute and return population variance as mis-match metric. */
252 *dest = (((xxsum<<6) - xsum*xsum));
256 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg, OIL_IMPL_FLAG_MMX);
258 #ifdef ENABLE_BROKEN_IMPLS
260 err_inter8x8_u8_avg_mmxext (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2)
265 __asm__ __volatile__ (
266 " pxor %%mm4, %%mm4 \n\t"
267 " pxor %%mm5, %%mm5 \n\t"
268 " mov $0x01010101, %%edi \n\t"
269 " movd %%edi, %%mm6 \n\t"
270 " punpcklbw %%mm6, %%mm6 \n\t"
271 " pxor %%mm7, %%mm7 \n\t"
272 " mov $8, %%edi \n\t"
274 " movq (%2), %%mm0 \n\t" /* take 8 bytes */
276 " movq (%3), %%mm2 \n\t"
277 " movq (%4), %%mm1 \n\t" /* take average of mm2 and mm1 */
278 " movq %%mm1, %%mm3 \n\t"
279 " pavgb %%mm2, %%mm1 \n\t"
280 " pxor %%mm2, %%mm3 \n\t"
281 " pand %%mm6, %%mm3 \n\t"
282 " psubb %%mm3, %%mm1 \n\t"
284 " movq %%mm0, %%mm2 \n\t"
285 " movq %%mm1, %%mm3 \n\t"
287 " punpcklbw %%mm4, %%mm0 \n\t"
288 " punpcklbw %%mm4, %%mm1 \n\t"
289 " punpckhbw %%mm4, %%mm2 \n\t"
290 " punpckhbw %%mm4, %%mm3 \n\t"
292 " psubsw %%mm1, %%mm0 \n\t"
293 " psubsw %%mm3, %%mm2 \n\t"
295 " paddw %%mm0, %%mm5 \n\t"
296 " paddw %%mm2, %%mm5 \n\t"
298 " pmaddwd %%mm0, %%mm0 \n\t"
299 " pmaddwd %%mm2, %%mm2 \n\t"
301 " paddd %%mm0, %%mm7 \n\t"
302 " paddd %%mm2, %%mm7 \n\t"
304 " add %5, %2 \n\t" /* Inc pointer into src data */
305 " add %6, %3 \n\t" /* Inc pointer into ref data */
306 " add %6, %4 \n\t" /* Inc pointer into ref data */
311 " movq %%mm5, %%mm0 \n\t"
312 " psrlq $32, %%mm5 \n\t"
313 " paddw %%mm0, %%mm5 \n\t"
314 " movq %%mm5, %%mm0 \n\t"
315 " psrlq $16, %%mm5 \n\t"
316 " paddw %%mm0, %%mm5 \n\t"
317 " movd %%mm5, %%edi \n\t"
318 " movswl %%di, %%edi \n\t"
319 " movl %%edi, %0 \n\t"
321 " movq %%mm7, %%mm0 \n\t"
322 " psrlq $32, %%mm7 \n\t"
323 " paddd %%mm0, %%mm7 \n\t"
324 " movd %%mm7, %1 \n\t"
337 /* Compute and return population variance as mis-match metric. */
338 *dest = (((xxsum<<6) - xsum*xsum));
341 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
348 OilFunctionImpl* __oil_function_impl_err_intra8x8_u8_mmx, err_intra8x8_u8() {
349 return &_oil_function_impl_err_intra8x8_u8_mmx, err_intra8x8_u8;
355 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_mmx, err_inter8x8_u8() {
356 return &_oil_function_impl_err_inter8x8_u8_mmx, err_inter8x8_u8;
362 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg() {
363 return &_oil_function_impl_err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg;
369 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg() {
370 return &_oil_function_impl_err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg;