Update contrib.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
35 #include <liboil/liboil.h>
36 #include <liboil/liboilfunction.h>
37 #include <liboil/liboilclasses.h>
42 add_f32_unroll4 (float *dest, float *src1, float *src2, int n)
46 for(i=0;i<(n&(~0x3));i+=4){
47 dest[i+0] = src1[i+0] + src2[i+0];
48 dest[i+1] = src1[i+1] + src2[i+1];
49 dest[i+2] = src1[i+2] + src2[i+2];
50 dest[i+3] = src1[i+3] + src2[i+3];
53 dest[i] = src1[i] + src2[i];
56 OIL_DEFINE_IMPL (add_f32_unroll4, add_f32);
61 add_f32_unroll4b (float *dest, float *src1, float *src2, int n)
65 for(i=0;i<(n&(~0x3));i+=4){
66 *dest++ = *src1++ + *src2++;
67 *dest++ = *src1++ + *src2++;
68 *dest++ = *src1++ + *src2++;
69 *dest++ = *src1++ + *src2++;
72 *dest++ = *src1++ + *src2++;
75 OIL_DEFINE_IMPL (add_f32_unroll4b, add_f32);
81 multiply_f32_unroll4 (float *dest, float *src1, float *src2, int n)
85 for(i=0;i<(n&(~0x3));i+=4){
86 dest[i+0] = src1[i+0] * src2[i+0];
87 dest[i+1] = src1[i+1] * src2[i+1];
88 dest[i+2] = src1[i+2] * src2[i+2];
89 dest[i+3] = src1[i+3] * src2[i+3];
92 dest[i] = src1[i] * src2[i];
95 OIL_DEFINE_IMPL (multiply_f32_unroll4, multiply_f32);
99 scalaradd_f32_ns_unroll4 (float *dest, float *src1, float *src2, int n)
103 for(i=0;i<(n&(~0x3));i+=4){
104 dest[i+0] = src1[i+0] + src2[0];
105 dest[i+1] = src1[i+1] + src2[0];
106 dest[i+2] = src1[i+2] + src2[0];
107 dest[i+3] = src1[i+3] + src2[0];
110 dest[i] = src1[i] + src2[0];
113 OIL_DEFINE_IMPL (scalaradd_f32_ns_unroll4, scalaradd_f32_ns);
116 scalarmultiply_f32_ns_unroll4 (float *dest, float *src1, float *src2, int n)
120 for(i=0;i<(n&(~0x3));i+=4){
121 dest[i+0] = src1[i+0] * src2[0];
122 dest[i+1] = src1[i+1] * src2[0];
123 dest[i+2] = src1[i+2] * src2[0];
124 dest[i+3] = src1[i+3] * src2[0];
127 dest[i] = src1[i] * src2[0];
130 OIL_DEFINE_IMPL (scalarmultiply_f32_ns_unroll4, scalarmultiply_f32_ns);
133 scalarmultiply_f64_ns_unroll4 (double *dest, double *src1, double *src2, int n)
137 for(i=0;i<(n&(~0x3));i+=4){
138 dest[i+0] = src1[i+0] * src2[0];
139 dest[i+1] = src1[i+1] * src2[0];
140 dest[i+2] = src1[i+2] * src2[0];
141 dest[i+3] = src1[i+3] * src2[0];
144 dest[i] = src1[i] * src2[0];
147 OIL_DEFINE_IMPL (scalarmultiply_f64_ns_unroll4, scalarmultiply_f64_ns);
153 OilFunctionImpl* __oil_function_impl_add_f32_unroll4() {
154 return &_oil_function_impl_add_f32_unroll4;
160 OilFunctionImpl* __oil_function_impl_add_f32_unroll4b() {
161 return &_oil_function_impl_add_f32_unroll4b;
167 OilFunctionImpl* __oil_function_impl_multiply_f32_unroll4() {
168 return &_oil_function_impl_multiply_f32_unroll4;
175 OilFunctionImpl* __oil_function_impl_scalaradd_f32_ns_unroll4() {
176 return &_oil_function_impl_scalaradd_f32_ns_unroll4;
182 OilFunctionImpl* __oil_function_impl_scalarmultiply_f32_ns_unroll4() {
183 return &_oil_function_impl_scalarmultiply_f32_ns_unroll4;
189 OilFunctionImpl* __oil_function_impl_scalarmultiply_f64_ns_unroll4() {
190 return &_oil_function_impl_scalarmultiply_f64_ns_unroll4;