Update contrib.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboilfunction.h>
34 #include "liboil/simdpack/simdpack.h"
46 * Calculates the sum of absolute differences between elements in @s1_8x8
47 * and @s2_8x8, and places the result in @d_1.
49 OIL_DEFINE_CLASS (sad8x8_u8,
50 "uint32_t *d_1, uint8_t *s1_8x8, int ss1, uint8_t *s2_8x8, int ss2");
59 * Calculates the sum of absolute differences between elements in @s1_8x8
60 * and @s2_8x8, and places the result in @d_1.
62 OIL_DEFINE_CLASS (sad8x8_s16_2,
63 "uint32_t *d_1, int16_t *s1_8x8, int ss1, int16_t *s2_8x8, int ss2");
72 * Calculates the sum of absolute differences between elements in @s1_8x8
73 * and @s2_8x8, and places the result in @d_1.
75 OIL_DEFINE_CLASS (sad8x8_f64_2,
76 "double *d_1, double *s1_8x8, int ss1, double *s2_8x8, int ss2");
86 * Calculates the sum of absolute differences between elements in @s1_8x8
87 * and the 8x8 block of array elements starting at row i in @s2_8x8, and
88 * places the result in @d_n.
90 OIL_DEFINE_CLASS (sad8x8_8xn_u8,
91 "uint32_t *d_n, uint8_t *s1_8x8, int ss1, uint8_t *s2_8xnp7, int ss2, int n");
101 * Calculates the sum of absolute differences between elements in @s1_12x12
102 * and @s2_12x12, and places the result in @d_1.
104 OIL_DEFINE_CLASS (sad12x12_u8,
105 "uint32_t *d_1, uint8_t *s1_12x12, int ss1, uint8_t *s2_12x12, int ss2");
107 * oil_sad12x12_12xn_u8:
115 * Calculates the sum of absolute differences between elements in @s1_12x12
116 * and the 12x12 block of array elements starting at row i in @s2_12x12, and
117 * places the result in @d_n.
119 OIL_DEFINE_CLASS (sad12x12_12xn_u8,
120 "uint32_t *d_n, uint8_t *s1_12x12, int ss1, uint8_t *s2_12xnp11, int ss2, int n");
129 * Calculates the sum of absolute differences between elements in @s1_16x16
130 * and @s2_16x16, and places the result in @d_1.
132 OIL_DEFINE_CLASS (sad16x16_u8,
133 "uint32_t *d_1, uint8_t *s1_16x16, int ss1, uint8_t *s2_16x16, int ss2");
135 * oil_sad16x16_16xn_u8:
143 * Calculates the sum of absolute differences between elements in @s1_16x16
144 * and the 16x16 block of array elements starting at row i in @s2_16x16, and
145 * places the result in @d_n.
147 OIL_DEFINE_CLASS (sad16x16_16xn_u8,
148 "uint32_t *d_n, uint8_t *s1_16x16, int ss1, uint8_t *s2_16xnp15, int ss2, int n");
152 sad8x8_f64_2_ref (double *dest, double *src1, int sstr1, double *src2,
159 for (i = 0; i < 8; i++) {
160 for (j = 0; j < 8; j++) {
161 sum += fabs (OIL_GET (src1, sstr1 * i + j * sizeof (double), double) -
162 OIL_GET (src2, sstr2 * i + j * sizeof (double), double));
168 OIL_DEFINE_IMPL_REF (sad8x8_f64_2_ref, sad8x8_f64_2);
171 sad8x8_s16_2_ref (uint32_t * dest, int16_t * src1, int sstr1, int16_t * src2,
179 for (i = 0; i < 8; i++) {
180 for (j = 0; j < 8; j++) {
181 d = ((int) OIL_GET (src1, sstr1 * i + j * sizeof (int16_t), int16_t)) -
182 ((int) OIL_GET (src2, sstr2 * i + j * sizeof (int16_t), int16_t));
183 sum += (d < 0) ? -d : d;
188 OIL_DEFINE_IMPL_REF (sad8x8_s16_2_ref, sad8x8_s16_2);
191 sad8x8_u8_ref (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
199 for (i = 0; i < 8; i++) {
200 for (j = 0; j < 8; j++) {
201 d = ((int) OIL_GET (src1, sstr1 * i + j * sizeof (uint8_t), uint8_t)) -
202 ((int) OIL_GET (src2, sstr2 * i + j * sizeof (uint8_t), uint8_t));
203 sum += (d < 0) ? -d : d;
208 OIL_DEFINE_IMPL_REF (sad8x8_u8_ref, sad8x8_u8);
211 sad8x8_8xn_u8_ref (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
220 for (j = 0; j < 8; j++) {
221 for (k = 0; k < 8; k++) {
222 d = ((int) OIL_GET (src1, sstr1 * j + k * sizeof (uint8_t), uint8_t)) -
223 ((int) OIL_GET (src2, sstr2 * (i+j) + k * sizeof (uint8_t), uint8_t));
224 sum += (d < 0) ? -d : d;
230 OIL_DEFINE_IMPL_REF (sad8x8_8xn_u8_ref, sad8x8_8xn_u8);
234 sad12x12_u8_ref (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
242 for (i = 0; i < 12; i++) {
243 for (j = 0; j < 12; j++) {
244 d = ((int) OIL_GET (src1, sstr1 * i + j * sizeof (uint8_t), uint8_t)) -
245 ((int) OIL_GET (src2, sstr2 * i + j * sizeof (uint8_t), uint8_t));
246 sum += (d < 0) ? -d : d;
251 OIL_DEFINE_IMPL_REF (sad12x12_u8_ref, sad12x12_u8);
254 sad12x12_12xn_u8_ref (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
263 for (j = 0; j < 12; j++) {
264 for (k = 0; k < 12; k++) {
265 d = ((int) OIL_GET (src1, sstr1 * j + k * sizeof (uint8_t), uint8_t)) -
266 ((int) OIL_GET (src2, sstr2 * (i+j) + k * sizeof (uint8_t), uint8_t));
267 sum += (d < 0) ? -d : d;
273 OIL_DEFINE_IMPL_REF (sad12x12_12xn_u8_ref, sad12x12_12xn_u8);
277 sad16x16_u8_ref (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
285 for (i = 0; i < 16; i++) {
286 for (j = 0; j < 16; j++) {
287 d = ((int) OIL_GET (src1, sstr1 * i + j * sizeof (uint8_t), uint8_t)) -
288 ((int) OIL_GET (src2, sstr2 * i + j * sizeof (uint8_t), uint8_t));
289 sum += (d < 0) ? -d : d;
294 OIL_DEFINE_IMPL_REF (sad16x16_u8_ref, sad16x16_u8);
297 sad16x16_16xn_u8_ref (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
306 for (j = 0; j < 16; j++) {
307 for (k = 0; k < 16; k++) {
308 d = ((int) OIL_GET (src1, sstr1 * j + k * sizeof (uint8_t), uint8_t)) -
309 ((int) OIL_GET (src2, sstr2 * (i+j) + k * sizeof (uint8_t), uint8_t));
310 sum += (d < 0) ? -d : d;
316 OIL_DEFINE_IMPL_REF (sad16x16_16xn_u8_ref, sad16x16_16xn_u8);
322 OilFunctionClass* __oil_function_class_sad8x8_u8() {
323 return &_oil_function_class_sad8x8_u8;
329 OilFunctionClass* __oil_function_class_sad8x8_s16_2() {
330 return &_oil_function_class_sad8x8_s16_2;
336 OilFunctionClass* __oil_function_class_sad8x8_f64_2() {
337 return &_oil_function_class_sad8x8_f64_2;
343 OilFunctionClass* __oil_function_class_sad8x8_8xn_u8() {
344 return &_oil_function_class_sad8x8_8xn_u8;
350 OilFunctionClass* __oil_function_class_sad12x12_u8() {
351 return &_oil_function_class_sad12x12_u8;
357 OilFunctionClass* __oil_function_class_sad12x12_12xn_u8() {
358 return &_oil_function_class_sad12x12_12xn_u8;
364 OilFunctionClass* __oil_function_class_sad16x16_u8() {
365 return &_oil_function_class_sad16x16_u8;
371 OilFunctionClass* __oil_function_class_sad16x16_16xn_u8() {
372 return &_oil_function_class_sad16x16_16xn_u8;
380 OilFunctionImpl* __oil_function_impl_sad8x8_f64_2_ref() {
381 return &_oil_function_impl_sad8x8_f64_2_ref;
387 OilFunctionImpl* __oil_function_impl_sad8x8_s16_2_ref() {
388 return &_oil_function_impl_sad8x8_s16_2_ref;
394 OilFunctionImpl* __oil_function_impl_sad8x8_u8_ref() {
395 return &_oil_function_impl_sad8x8_u8_ref;
401 OilFunctionImpl* __oil_function_impl_sad8x8_8xn_u8_ref() {
402 return &_oil_function_impl_sad8x8_8xn_u8_ref;
408 OilFunctionImpl* __oil_function_impl_sad12x12_u8_ref() {
409 return &_oil_function_impl_sad12x12_u8_ref;
415 OilFunctionImpl* __oil_function_impl_sad12x12_12xn_u8_ref() {
416 return &_oil_function_impl_sad12x12_12xn_u8_ref;
422 OilFunctionImpl* __oil_function_impl_sad16x16_u8_ref() {
423 return &_oil_function_impl_sad16x16_u8_ref;
429 OilFunctionImpl* __oil_function_impl_sad16x16_16xn_u8_ref() {
430 return &_oil_function_impl_sad16x16_16xn_u8_ref;
438 EXPORT_C void** _oil_function_class_ptr_sad8x8_u8 () {
439 oil_function_class_ptr_sad8x8_u8 = __oil_function_class_sad8x8_u8();
440 return &oil_function_class_ptr_sad8x8_u8->func;
446 EXPORT_C void** _oil_function_class_ptr_sad8x8_s16_2 () {
447 oil_function_class_ptr_sad8x8_s16_2 = __oil_function_class_sad8x8_s16_2();
448 return &oil_function_class_ptr_sad8x8_s16_2->func;
454 EXPORT_C void** _oil_function_class_ptr_sad8x8_f64_2 () {
455 oil_function_class_ptr_sad8x8_f64_2 = __oil_function_class_sad8x8_f64_2();
456 return &oil_function_class_ptr_sad8x8_f64_2->func;
462 EXPORT_C void** _oil_function_class_ptr_sad8x8_8xn_u8 () {
463 oil_function_class_ptr_sad8x8_8xn_u8 = __oil_function_class_sad8x8_8xn_u8();
464 return &oil_function_class_ptr_sad8x8_8xn_u8->func;
470 EXPORT_C void** _oil_function_class_ptr_sad12x12_u8 () {
471 oil_function_class_ptr_sad12x12_u8 = __oil_function_class_sad12x12_u8();
472 return &oil_function_class_ptr_sad12x12_u8->func;
478 EXPORT_C void** _oil_function_class_ptr_sad12x12_12xn_u8 () {
479 oil_function_class_ptr_sad12x12_12xn_u8 = __oil_function_class_sad12x12_12xn_u8();
480 return &oil_function_class_ptr_sad12x12_12xn_u8->func;
486 EXPORT_C void** _oil_function_class_ptr_sad16x16_u8 () {
487 oil_function_class_ptr_sad16x16_u8 = __oil_function_class_sad16x16_u8();
488 return &oil_function_class_ptr_sad16x16_u8->func;
494 EXPORT_C void** _oil_function_class_ptr_sad16x16_16xn_u8 () {
495 oil_function_class_ptr_sad16x16_16xn_u8 = __oil_function_class_sad16x16_16xn_u8();
496 return &oil_function_class_ptr_sad16x16_16xn_u8->func;