Update contrib.
7 /* This file is autogenerated. Do not edit. */
9 * LIBOIL - Library of Optimized Inner Loops
10 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
11 * All rights reserved.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
26 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
31 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
41 #include <liboil/liboil.h>
42 #include <liboil/liboilclasses.h>
51 my $precision = shift;
52 my $type = "oil_type_$precision";
57 ${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
60 *dest = *src1 ${operator} *src2;
67 OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});
76 my $precision = shift;
77 my $type = "oil_type_$precision";
82 ${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
87 dest[0] = src1[0] ${operator} src2[0];
94 dest[i] = src1[i] ${operator} src2[i];
95 dest[i+1] = src1[i+1] ${operator} src2[i+1];
98 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});
107 my $precision = shift;
108 my $type = "oil_type_$precision";
109 my $operator = shift;
113 ${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
118 dest[0] = src1[0] ${operator} src2[0];
125 dest[i] = src1[i] ${operator} src2[i];
126 dest[i+1] = src1[i+1] ${operator} src2[i+1];
127 dest[i+2] = src1[i+2] ${operator} src2[i+2];
128 dest[i+3] = src1[i+3] ${operator} src2[i+3];
131 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});
140 my $precision = shift;
141 my $type = "oil_type_$precision";
142 my $operator = shift;
146 ${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
150 for(i=0;i<(n&(~0x3));i+=4){
151 dest[i+0] = src1[i+0] ${operator} src2[i+0];
152 dest[i+1] = src1[i+1] ${operator} src2[i+1];
153 dest[i+2] = src1[i+2] ${operator} src2[i+2];
154 dest[i+3] = src1[i+3] ${operator} src2[i+3];
157 dest[i] = src1[i] ${operator} src2[i];
160 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});
169 my $precision = shift;
170 my $type = "oil_type_$precision";
171 my $operator = shift;
175 ${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
179 for(i=0;i<(n&(~0x3));i+=4){
180 *dest++ = *src1++ ${operator} *src2++;
181 *dest++ = *src1++ ${operator} *src2++;
182 *dest++ = *src1++ ${operator} *src2++;
183 *dest++ = *src1++ ${operator} *src2++;
186 *dest++ = *src1++ ${operator} *src2++;
189 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});
195 my %binary_operators = (
202 my @types = ( "f32", "f64" );
204 while ( ($name, $op) = each %binary_operators ) {
205 foreach $prec (@types) {
206 binary_pointer($name, $prec, $op);
207 binary_unroll2($name, $prec, $op);
208 binary_unroll4a($name, $prec, $op);
209 binary_unroll4b($name, $prec, $op);
210 binary_unroll4c($name, $prec, $op);
216 binary_pointer("subtract", "f32", "-");
217 binary_unroll2("subtract", "f32", "-");
218 binary_unroll4a("subtract", "f32", "-");
219 binary_unroll4b("subtract", "f32", "-");
220 binary_unroll4c("subtract", "f32", "-");
222 binary_pointer("add", "f32", "+");
223 binary_unroll2("add", "f32", "+");
224 binary_unroll4a("add", "f32", "+");
225 binary_unroll4b("add", "f32", "+");
226 binary_unroll4c("add", "f32", "+");
228 binary_pointer("multiply", "f32", "*");
229 binary_unroll2("multiply", "f32", "*");
230 binary_unroll4a("multiply", "f32", "*");
231 binary_unroll4b("multiply", "f32", "*");
232 binary_unroll4c("multiply", "f32", "*");
234 binary_pointer("divide", "f32", "/");
235 binary_unroll2("divide", "f32", "/");
236 binary_unroll4a("divide", "f32", "/");
237 binary_unroll4b("divide", "f32", "/");
238 binary_unroll4c("divide", "f32", "/");
240 binary_pointer("subtract", "f64", "-");
241 binary_unroll2("subtract", "f64", "-");
242 binary_unroll4a("subtract", "f64", "-");
243 binary_unroll4b("subtract", "f64", "-");
244 binary_unroll4c("subtract", "f64", "-");
246 binary_pointer("add", "f64", "+");
247 binary_unroll2("add", "f64", "+");
248 binary_unroll4a("add", "f64", "+");
249 binary_unroll4b("add", "f64", "+");
250 binary_unroll4c("add", "f64", "+");
252 binary_pointer("multiply", "f64", "*");
253 binary_unroll2("multiply", "f64", "*");
254 binary_unroll4a("multiply", "f64", "*");
255 binary_unroll4b("multiply", "f64", "*");
256 binary_unroll4c("multiply", "f64", "*");
258 binary_pointer("divide", "f64", "/");
259 binary_unroll2("divide", "f64", "/");
260 binary_unroll4a("divide", "f64", "/");
261 binary_unroll4b("divide", "f64", "/");
262 binary_unroll4c("divide", "f64", "/");
266 subtract_f32_ref (float *dest, float *src1, float *src2, int n)
271 dest[i] = src1[i] - src2[i];
274 OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);
277 multiply_f32_ref (float *dest, float *src1, float *src2, int n)
282 dest[i] = src1[i] * src2[i];
285 OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);
288 divide_f32_ref (float *dest, float *src1, float *src2, int n)
293 dest[i] = src1[i] / src2[i];
296 OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
299 minimum_f32_ref (float *dest, float *src1, float *src2, int n)
304 dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
307 OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
310 maximum_f32_ref (float *dest, float *src1, float *src2, int n)
315 dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
318 OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
321 negative_f32_ref (float *dest, float *src1, int n)
329 OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
332 inverse_f32_ref (float *dest, float *src1, int n)
337 dest[i] = 1.0/src1[i];
340 OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
343 sign_f32_ref (float *dest, float *src1, int n)
348 dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
351 OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
354 floor_f32_ref (float *dest, float *src1, int n)
359 dest[i] = floor(src1[i]);
362 OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
367 scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
372 dest[i] = src1[i] + src2[0];
375 OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
378 scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
383 dest[i] = src1[i] * src2[0];
386 OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);