Update contrib.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
35 #include <liboil/liboil.h>
36 #include <liboil/liboilfunction.h>
37 #include <liboil/liboiltest.h>
40 * SECTION:liboilfuncs-math
41 * @title: Simple Arithmetic
42 * @short_description: Aritmetic operations
47 * SECTION:liboilfuncs-math8x8
48 * @title: Simple Arithmetic on Blocks
49 * @short_description: Aritmetic operations on 8x8 blocks
58 * @n: number of elements
60 * Adds elements in @s2 and @s1 and places the result in @d.
62 OIL_DEFINE_CLASS (add_s16, "int16_t *d, int16_t *src1, int16_t *src2, int n");
68 * @n: number of elements
70 * Subtracts elements in @s2 from @s1 and places the result in @d.
72 OIL_DEFINE_CLASS (subtract_s16, "int16_t *d, int16_t *src1, int16_t *src2, int n");
78 * @n: number of elements
80 * Adds elements in @s2 and @s1 and places the result in @d.
82 OIL_DEFINE_CLASS (add_s16_u8, "int16_t *d, int16_t *src1, uint8_t *src2, int n");
84 * oil_subtract_s16_u8:
88 * @n: number of elements
90 * Subtracts elements in @s2 from @s1 and places the result in @d.
92 OIL_DEFINE_CLASS (subtract_s16_u8, "int16_t *d, int16_t *src1, uint8_t *src2, int n");
99 * @n: number of elements
101 * Adds elements in @s2 and @s1 and places the result in @d.
103 OIL_DEFINE_CLASS (add_f32, "float *d, float *s1, float *s2, int n");
110 * @n: number of elements
112 * Adds elements in @s2 and @s1 and places the result in @d.
114 OIL_DEFINE_CLASS (add_f64, "double *d, double *s1, double *s2, int n");
123 * @n: number of elements
125 * Subtracts elements in @s2 from @s1 and places the result in @d.
127 OIL_DEFINE_CLASS (subtract_f32, "float *d, float *s1, float *s2, int n");
133 * @n: number of elements
135 * Subtracts elements in @s2 from @s1 and places the result in @d.
137 OIL_DEFINE_CLASS (subtract_f64, "double *d, double *s1, double *s2, int n");
143 * @n: number of elements
145 * Multiplies elements in @s1 and @s2 and places the result in @d.
147 OIL_DEFINE_CLASS (multiply_f32, "float *d, float *s1, float *s2, int n");
153 * @n: number of elements
155 * Multiplies elements in @s1 and @s2 and places the result in @d.
157 OIL_DEFINE_CLASS (multiply_f64, "double *d, double *s1, double *s2, int n");
163 * @n: number of elements
165 * Divides elements in @s1 by @s2 and places the result in @d.
167 OIL_DEFINE_CLASS (divide_f32, "float *d, float *s1, float *s2, int n");
173 * @n: number of elements
175 * Divides elements in @s1 by @s2 and places the result in @d.
177 OIL_DEFINE_CLASS (divide_f64, "double *d, double *s1, double *s2, int n");
183 * @n: number of elements
185 * Places the lesser of @s1 and @s2 in @d.
187 OIL_DEFINE_CLASS (minimum_f32, "float *d, float *s1, float *s2, int n");
193 * @n: number of elements
195 * Places the lesser of @s1 and @s2 in @d.
197 OIL_DEFINE_CLASS (minimum_f64, "float *d, float *s1, float *s2, int n");
203 * @n: number of elements
205 * Places the greater of @s1 and @s2 in @d.
207 OIL_DEFINE_CLASS (maximum_f32, "float *d, float *s1, float *s2, int n");
213 * @n: number of elements
215 * Places the greater of @s1 and @s2 in @d.
217 OIL_DEFINE_CLASS (maximum_f64, "float *d, float *s1, float *s2, int n");
223 * @n: number of elements
225 * Negates each element in @s and places the result in @d.
227 OIL_DEFINE_CLASS (negative_f32, "float *d, float *s, int n");
232 * @n: number of elements
234 * Calculates the multiplicative inverse of each element in @s and
235 * places the result in @d.
237 OIL_DEFINE_CLASS (inverse_f32, "float *d, float *s, int n");
242 * @n: number of elements
244 * Calculates the sign of each element in @s and
245 * places the result in @d.
247 OIL_DEFINE_CLASS (sign_f32, "float *d, float *s, int n");
252 * @n: number of elements
254 * Calculates the greatest integer less than or equal to each element
255 * in @s and places the result in @d.
257 OIL_DEFINE_CLASS (floor_f32, "float *d, float *s, int n");
260 * oil_scalaradd_f32_ns:
264 * @n: number of elements
266 * Adds the constant value @s2_1 to each source element and places
269 OIL_DEFINE_CLASS (scalaradd_f32_ns, "float *d, float *s1, float *s2_1, int n");
271 * oil_scalarmultiply_f32_ns:
275 * @n: number of elements
277 * Multiplies the constant value @s2_1 and each source element and places
280 OIL_DEFINE_CLASS (scalarmultiply_f32_ns, "float *d, float *s1, float *s2_1, int n");
283 * oil_scalarmultiply_f64_ns:
287 * @n: number of elements
289 * Multiplies the constant value @s2_1 and each source element and places
292 OIL_DEFINE_CLASS (scalarmultiply_f64_ns, "double *d, double *s1, double *s2_1, int n");
295 add_s16_ref (int16_t *d, int16_t *src1, int16_t *src2, int n)
299 d[i] = src1[i] + src2[i];
302 OIL_DEFINE_IMPL_REF (add_s16_ref, add_s16);
305 subtract_s16_ref (int16_t *d, int16_t *src1, int16_t *src2, int n)
309 d[i] = src1[i] - src2[i];
312 OIL_DEFINE_IMPL_REF (subtract_s16_ref, subtract_s16);
315 add_s16_u8_ref (int16_t *d, int16_t *src1, uint8_t *src2, int n)
319 d[i] = src1[i] + src2[i];
322 OIL_DEFINE_IMPL_REF (add_s16_u8_ref, add_s16_u8);
325 subtract_s16_u8_ref (int16_t *d, int16_t *src1, uint8_t *src2, int n)
329 d[i] = src1[i] - src2[i];
332 OIL_DEFINE_IMPL_REF (subtract_s16_u8_ref, subtract_s16_u8);
335 add_f32_ref (float *dest, float *src1, float *src2, int n)
340 dest[i] = src1[i] + src2[i];
343 OIL_DEFINE_IMPL_REF (add_f32_ref, add_f32);
346 add_f64_ref (double *dest, double *src1, double *src2, int n)
351 dest[i] = src1[i] + src2[i];
354 OIL_DEFINE_IMPL_REF (add_f64_ref, add_f64);
357 subtract_f32_ref (float *dest, float *src1, float *src2, int n)
362 dest[i] = src1[i] - src2[i];
365 OIL_DEFINE_IMPL_REF (subtract_f32_ref, subtract_f32);
368 subtract_f64_ref (double *dest, double *src1, double *src2, int n)
373 dest[i] = src1[i] - src2[i];
376 OIL_DEFINE_IMPL_REF (subtract_f64_ref, subtract_f64);
379 multiply_f32_ref (float *dest, float *src1, float *src2, int n)
384 dest[i] = src1[i] * src2[i];
387 OIL_DEFINE_IMPL_REF (multiply_f32_ref, multiply_f32);
390 multiply_f64_ref (double *dest, double *src1, double *src2, int n)
395 dest[i] = src1[i] * src2[i];
398 OIL_DEFINE_IMPL_REF (multiply_f64_ref, multiply_f64);
401 divide_f32_ref (float *dest, float *src1, float *src2, int n)
406 dest[i] = src1[i] / src2[i];
409 OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
412 divide_f64_ref (double *dest, double *src1, double *src2, int n)
417 dest[i] = src1[i] / src2[i];
420 OIL_DEFINE_IMPL_REF (divide_f64_ref, divide_f64);
423 minimum_f32_ref (float *dest, float *src1, float *src2, int n)
428 dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
431 OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
434 maximum_f32_ref (float *dest, float *src1, float *src2, int n)
439 dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
442 OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
445 minimum_f64_ref (float *dest, float *src1, float *src2, int n)
450 dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
453 OIL_DEFINE_IMPL_REF (minimum_f64_ref, minimum_f64);
456 maximum_f64_ref (float *dest, float *src1, float *src2, int n)
461 dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
464 OIL_DEFINE_IMPL_REF (maximum_f64_ref, maximum_f64);
467 negative_f32_ref (float *dest, float *src1, int n)
475 OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
478 inverse_f32_ref (float *dest, float *src1, int n)
483 dest[i] = 1.0/src1[i];
486 OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
489 sign_f32_ref (float *dest, float *src1, int n)
494 dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
497 OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
500 floor_f32_ref (float *dest, float *src1, int n)
505 dest[i] = floor(src1[i]);
508 OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
513 scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
518 dest[i] = src1[i] + src2[0];
521 OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
524 scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
529 dest[i] = src1[i] * src2[0];
532 OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);
535 scalarmultiply_f64_ns_ref (double *dest, double *src1, double *src2, int n)
540 dest[i] = src1[i] * src2[0];
543 OIL_DEFINE_IMPL_REF (scalarmultiply_f64_ns_ref, scalarmultiply_f64_ns);
549 OilFunctionClass* __oil_function_class_add_s16() {
550 return &_oil_function_class_add_s16;
556 OilFunctionClass* __oil_function_class_subtract_s16() {
557 return &_oil_function_class_subtract_s16;
563 OilFunctionClass* __oil_function_class_add_s16_u8() {
564 return &_oil_function_class_add_s16_u8;
570 OilFunctionClass* __oil_function_class_subtract_s16_u8() {
571 return &_oil_function_class_subtract_s16_u8;
577 OilFunctionClass* __oil_function_class_add_f32() {
578 return &_oil_function_class_add_f32;
584 OilFunctionClass* __oil_function_class_add_f64() {
585 return &_oil_function_class_add_f64;
591 OilFunctionClass* __oil_function_class_subtract_f32() {
592 return &_oil_function_class_subtract_f32;
598 OilFunctionClass* __oil_function_class_subtract_f64() {
599 return &_oil_function_class_subtract_f64;
605 OilFunctionClass* __oil_function_class_multiply_f32() {
606 return &_oil_function_class_multiply_f32;
612 OilFunctionClass* __oil_function_class_multiply_f64() {
613 return &_oil_function_class_multiply_f64;
619 OilFunctionClass* __oil_function_class_divide_f32() {
620 return &_oil_function_class_divide_f32;
626 OilFunctionClass* __oil_function_class_divide_f64() {
627 return &_oil_function_class_divide_f64;
633 OilFunctionClass* __oil_function_class_minimum_f32() {
634 return &_oil_function_class_minimum_f32;
640 OilFunctionClass* __oil_function_class_minimum_f64() {
641 return &_oil_function_class_minimum_f64;
647 OilFunctionClass* __oil_function_class_maximum_f32() {
648 return &_oil_function_class_maximum_f32;
654 OilFunctionClass* __oil_function_class_maximum_f64() {
655 return &_oil_function_class_maximum_f64;
661 OilFunctionClass* __oil_function_class_negative_f32() {
662 return &_oil_function_class_negative_f32;
668 OilFunctionClass* __oil_function_class_inverse_f32() {
669 return &_oil_function_class_inverse_f32;
675 OilFunctionClass* __oil_function_class_sign_f32() {
676 return &_oil_function_class_sign_f32;
682 OilFunctionClass* __oil_function_class_floor_f32() {
683 return &_oil_function_class_floor_f32;
689 OilFunctionClass* __oil_function_class_scalaradd_f32_ns() {
690 return &_oil_function_class_scalaradd_f32_ns;
696 OilFunctionClass* __oil_function_class_scalarmultiply_f32_ns() {
697 return &_oil_function_class_scalarmultiply_f32_ns;
703 OilFunctionClass* __oil_function_class_scalarmultiply_f64_ns() {
704 return &_oil_function_class_scalarmultiply_f64_ns;
712 OilFunctionImpl* __oil_function_impl_add_s16_ref() {
713 return &_oil_function_impl_add_s16_ref;
719 OilFunctionImpl* __oil_function_impl_subtract_s16_ref() {
720 return &_oil_function_impl_subtract_s16_ref;
726 OilFunctionImpl* __oil_function_impl_add_s16_u8_ref() {
727 return &_oil_function_impl_add_s16_u8_ref;
733 OilFunctionImpl* __oil_function_impl_subtract_s16_u8_ref() {
734 return &_oil_function_impl_subtract_s16_u8_ref;
740 OilFunctionImpl* __oil_function_impl_add_f32_ref() {
741 return &_oil_function_impl_add_f32_ref;
747 OilFunctionImpl* __oil_function_impl_add_f64_ref() {
748 return &_oil_function_impl_add_f64_ref;
754 OilFunctionImpl* __oil_function_impl_subtract_f32_ref() {
755 return &_oil_function_impl_subtract_f32_ref;
761 OilFunctionImpl* __oil_function_impl_subtract_f64_ref() {
762 return &_oil_function_impl_subtract_f64_ref;
768 OilFunctionImpl* __oil_function_impl_multiply_f32_ref() {
769 return &_oil_function_impl_multiply_f32_ref;
775 OilFunctionImpl* __oil_function_impl_multiply_f64_ref() {
776 return &_oil_function_impl_multiply_f64_ref;
782 OilFunctionImpl* __oil_function_impl_divide_f32_ref() {
783 return &_oil_function_impl_divide_f32_ref;
789 OilFunctionImpl* __oil_function_impl_divide_f64_ref() {
790 return &_oil_function_impl_divide_f64_ref;
796 OilFunctionImpl* __oil_function_impl_minimum_f32_ref() {
797 return &_oil_function_impl_minimum_f32_ref;
803 OilFunctionImpl* __oil_function_impl_maximum_f32_ref() {
804 return &_oil_function_impl_maximum_f32_ref;
810 OilFunctionImpl* __oil_function_impl_minimum_f64_ref() {
811 return &_oil_function_impl_minimum_f64_ref;
817 OilFunctionImpl* __oil_function_impl_maximum_f64_ref() {
818 return &_oil_function_impl_maximum_f64_ref;
824 OilFunctionImpl* __oil_function_impl_negative_f32_ref() {
825 return &_oil_function_impl_negative_f32_ref;
831 OilFunctionImpl* __oil_function_impl_inverse_f32_ref() {
832 return &_oil_function_impl_inverse_f32_ref;
838 OilFunctionImpl* __oil_function_impl_sign_f32_ref() {
839 return &_oil_function_impl_sign_f32_ref;
845 OilFunctionImpl* __oil_function_impl_floor_f32_ref() {
846 return &_oil_function_impl_floor_f32_ref;
852 OilFunctionImpl* __oil_function_impl_scalaradd_f32_ns_ref() {
853 return &_oil_function_impl_scalaradd_f32_ns_ref;
859 OilFunctionImpl* __oil_function_impl_scalarmultiply_f32_ns_ref() {
860 return &_oil_function_impl_scalarmultiply_f32_ns_ref;
866 OilFunctionImpl* __oil_function_impl_scalarmultiply_f64_ns_ref() {
867 return &_oil_function_impl_scalarmultiply_f64_ns_ref;
875 EXPORT_C void** _oil_function_class_ptr_add_s16 () {
876 oil_function_class_ptr_add_s16 = __oil_function_class_add_s16();
877 return &oil_function_class_ptr_add_s16->func;
883 EXPORT_C void** _oil_function_class_ptr_subtract_s16 () {
884 oil_function_class_ptr_subtract_s16 = __oil_function_class_subtract_s16();
885 return &oil_function_class_ptr_subtract_s16->func;
891 EXPORT_C void** _oil_function_class_ptr_add_s16_u8 () {
892 oil_function_class_ptr_add_s16_u8 = __oil_function_class_add_s16_u8();
893 return &oil_function_class_ptr_add_s16_u8->func;
899 EXPORT_C void** _oil_function_class_ptr_subtract_s16_u8 () {
900 oil_function_class_ptr_subtract_s16_u8 = __oil_function_class_subtract_s16_u8();
901 return &oil_function_class_ptr_subtract_s16_u8->func;
907 EXPORT_C void** _oil_function_class_ptr_add_f32 () {
908 oil_function_class_ptr_add_f32 = __oil_function_class_add_f32();
909 return &oil_function_class_ptr_add_f32->func;
915 EXPORT_C void** _oil_function_class_ptr_add_f64 () {
916 oil_function_class_ptr_add_f64 = __oil_function_class_add_f64();
917 return &oil_function_class_ptr_add_f64->func;
923 EXPORT_C void** _oil_function_class_ptr_subtract_f32 () {
924 oil_function_class_ptr_subtract_f32 = __oil_function_class_subtract_f32();
925 return &oil_function_class_ptr_subtract_f32->func;
931 EXPORT_C void** _oil_function_class_ptr_subtract_f64 () {
932 oil_function_class_ptr_subtract_f64 = __oil_function_class_subtract_f64();
933 return &oil_function_class_ptr_subtract_f64->func;
939 EXPORT_C void** _oil_function_class_ptr_multiply_f32 () {
940 oil_function_class_ptr_multiply_f32 = __oil_function_class_multiply_f32();
941 return &oil_function_class_ptr_multiply_f32->func;
947 EXPORT_C void** _oil_function_class_ptr_multiply_f64 () {
948 oil_function_class_ptr_multiply_f64 = __oil_function_class_multiply_f64();
949 return &oil_function_class_ptr_multiply_f64->func;
955 EXPORT_C void** _oil_function_class_ptr_divide_f32 () {
956 oil_function_class_ptr_divide_f32 = __oil_function_class_divide_f32();
957 return &oil_function_class_ptr_divide_f32->func;
963 EXPORT_C void** _oil_function_class_ptr_divide_f64 () {
964 oil_function_class_ptr_divide_f64 = __oil_function_class_divide_f64();
965 return &oil_function_class_ptr_divide_f64->func;
971 EXPORT_C void** _oil_function_class_ptr_minimum_f32 () {
972 oil_function_class_ptr_minimum_f32 = __oil_function_class_minimum_f32();
973 return &oil_function_class_ptr_minimum_f32->func;
979 EXPORT_C void** _oil_function_class_ptr_minimum_f64 () {
980 oil_function_class_ptr_minimum_f64 = __oil_function_class_minimum_f64();
981 return &oil_function_class_ptr_minimum_f64->func;
987 EXPORT_C void** _oil_function_class_ptr_maximum_f32 () {
988 oil_function_class_ptr_maximum_f32 = __oil_function_class_maximum_f32();
989 return &oil_function_class_ptr_maximum_f32->func;
995 EXPORT_C void** _oil_function_class_ptr_maximum_f64 () {
996 oil_function_class_ptr_maximum_f64 = __oil_function_class_maximum_f64();
997 return &oil_function_class_ptr_maximum_f64->func;
1001 #ifdef __SYMBIAN32__
1003 EXPORT_C void** _oil_function_class_ptr_negative_f32 () {
1004 oil_function_class_ptr_negative_f32 = __oil_function_class_negative_f32();
1005 return &oil_function_class_ptr_negative_f32->func;
1009 #ifdef __SYMBIAN32__
1011 EXPORT_C void** _oil_function_class_ptr_inverse_f32 () {
1012 oil_function_class_ptr_inverse_f32 = __oil_function_class_inverse_f32();
1013 return &oil_function_class_ptr_inverse_f32->func;
1017 #ifdef __SYMBIAN32__
1019 EXPORT_C void** _oil_function_class_ptr_sign_f32 () {
1020 oil_function_class_ptr_sign_f32 = __oil_function_class_sign_f32();
1021 return &oil_function_class_ptr_sign_f32->func;
1025 #ifdef __SYMBIAN32__
1027 EXPORT_C void** _oil_function_class_ptr_floor_f32 () {
1028 oil_function_class_ptr_floor_f32 = __oil_function_class_floor_f32();
1029 return &oil_function_class_ptr_floor_f32->func;
1033 #ifdef __SYMBIAN32__
1035 EXPORT_C void** _oil_function_class_ptr_scalaradd_f32_ns () {
1036 oil_function_class_ptr_scalaradd_f32_ns = __oil_function_class_scalaradd_f32_ns();
1037 return &oil_function_class_ptr_scalaradd_f32_ns->func;
1041 #ifdef __SYMBIAN32__
1043 EXPORT_C void** _oil_function_class_ptr_scalarmultiply_f32_ns () {
1044 oil_function_class_ptr_scalarmultiply_f32_ns = __oil_function_class_scalarmultiply_f32_ns();
1045 return &oil_function_class_ptr_scalarmultiply_f32_ns->func;
1049 #ifdef __SYMBIAN32__
1051 EXPORT_C void** _oil_function_class_ptr_scalarmultiply_f64_ns () {
1052 oil_function_class_ptr_scalarmultiply_f64_ns = __oil_function_class_scalarmultiply_f64_ns();
1053 return &oil_function_class_ptr_scalarmultiply_f64_ns->func;