1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/math/generate_math.pl Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,389 @@
1.4 +#!/usr/bin/perl
1.5 +#
1.6 +
1.7 +
1.8 +
1.9 +print <<EOF
1.10 +/* This file is autogenerated. Do not edit. */
1.11 +/*
1.12 + * LIBOIL - Library of Optimized Inner Loops
1.13 + * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
1.14 + * All rights reserved.
1.15 + *
1.16 + * Redistribution and use in source and binary forms, with or without
1.17 + * modification, are permitted provided that the following conditions
1.18 + * are met:
1.19 + * 1. Redistributions of source code must retain the above copyright
1.20 + * notice, this list of conditions and the following disclaimer.
1.21 + * 2. Redistributions in binary form must reproduce the above copyright
1.22 + * notice, this list of conditions and the following disclaimer in the
1.23 + * documentation and/or other materials provided with the distribution.
1.24 + *
1.25 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1.26 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1.27 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.28 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1.29 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1.30 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1.31 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.32 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1.33 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1.34 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.35 + * POSSIBILITY OF SUCH DAMAGE.
1.36 + */
1.37 +
1.38 +#ifdef HAVE_CONFIG_H
1.39 +#include "config.h"
1.40 +#endif
1.41 +
1.42 +#include <math.h>
1.43 +
1.44 +#include <liboil/liboil.h>
1.45 +#include <liboil/liboilclasses.h>
1.46 +
1.47 +EOF
1.48 +;
1.49 +
1.50 +
1.51 +sub binary_pointer
1.52 +{
1.53 + my $kernel = shift;
1.54 + my $precision = shift;
1.55 + my $type = "oil_type_$precision";
1.56 + my $operator = shift;
1.57 +
1.58 + print <<EOF
1.59 +static void
1.60 +${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
1.61 +{
1.62 + while (n) {
1.63 + *dest = *src1 ${operator} *src2;
1.64 + dest++;
1.65 + src1++;
1.66 + src2++;
1.67 + n--;
1.68 + }
1.69 +}
1.70 +OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});
1.71 +
1.72 +EOF
1.73 +;
1.74 +}
1.75 +
1.76 +sub binary_unroll2
1.77 +{
1.78 + my $kernel = shift;
1.79 + my $precision = shift;
1.80 + my $type = "oil_type_$precision";
1.81 + my $operator = shift;
1.82 +
1.83 + print <<EOF
1.84 +static void
1.85 +${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
1.86 +{
1.87 + int i;
1.88 +
1.89 + if (n & 1) {
1.90 + dest[0] = src1[0] ${operator} src2[0];
1.91 + dest++;
1.92 + src1++;
1.93 + src2++;
1.94 + n--;
1.95 + }
1.96 + for(i=0;i<n;i+=2){
1.97 + dest[i] = src1[i] ${operator} src2[i];
1.98 + dest[i+1] = src1[i+1] ${operator} src2[i+1];
1.99 + }
1.100 +}
1.101 +OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});
1.102 +
1.103 +EOF
1.104 +;
1.105 +}
1.106 +
1.107 +sub binary_unroll4a
1.108 +{
1.109 + my $kernel = shift;
1.110 + my $precision = shift;
1.111 + my $type = "oil_type_$precision";
1.112 + my $operator = shift;
1.113 +
1.114 + print <<EOF
1.115 +static void
1.116 +${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
1.117 +{
1.118 + int i;
1.119 +
1.120 + while (n & 3) {
1.121 + dest[0] = src1[0] ${operator} src2[0];
1.122 + dest++;
1.123 + src1++;
1.124 + src2++;
1.125 + n--;
1.126 + }
1.127 + for(i=0;i<n;i+=4){
1.128 + dest[i] = src1[i] ${operator} src2[i];
1.129 + dest[i+1] = src1[i+1] ${operator} src2[i+1];
1.130 + dest[i+2] = src1[i+2] ${operator} src2[i+2];
1.131 + dest[i+3] = src1[i+3] ${operator} src2[i+3];
1.132 + }
1.133 +}
1.134 +OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});
1.135 +
1.136 +EOF
1.137 +;
1.138 +}
1.139 +
1.140 +sub binary_unroll4b
1.141 +{
1.142 + my $kernel = shift;
1.143 + my $precision = shift;
1.144 + my $type = "oil_type_$precision";
1.145 + my $operator = shift;
1.146 +
1.147 + print <<EOF
1.148 +static void
1.149 +${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
1.150 +{
1.151 + int i;
1.152 +
1.153 + for(i=0;i<(n&(~0x3));i+=4){
1.154 + dest[i+0] = src1[i+0] ${operator} src2[i+0];
1.155 + dest[i+1] = src1[i+1] ${operator} src2[i+1];
1.156 + dest[i+2] = src1[i+2] ${operator} src2[i+2];
1.157 + dest[i+3] = src1[i+3] ${operator} src2[i+3];
1.158 + }
1.159 + for(;i<n;i++){
1.160 + dest[i] = src1[i] ${operator} src2[i];
1.161 + }
1.162 +}
1.163 +OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});
1.164 +
1.165 +EOF
1.166 +;
1.167 +}
1.168 +
1.169 +sub binary_unroll4c
1.170 +{
1.171 + my $kernel = shift;
1.172 + my $precision = shift;
1.173 + my $type = "oil_type_$precision";
1.174 + my $operator = shift;
1.175 +
1.176 + print <<EOF
1.177 +static void
1.178 +${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
1.179 +{
1.180 + int i;
1.181 +
1.182 + for(i=0;i<(n&(~0x3));i+=4){
1.183 + *dest++ = *src1++ ${operator} *src2++;
1.184 + *dest++ = *src1++ ${operator} *src2++;
1.185 + *dest++ = *src1++ ${operator} *src2++;
1.186 + *dest++ = *src1++ ${operator} *src2++;
1.187 + }
1.188 + for(;i<n;i++){
1.189 + *dest++ = *src1++ ${operator} *src2++;
1.190 + }
1.191 +}
1.192 +OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});
1.193 +
1.194 +EOF
1.195 +;
1.196 +}
1.197 +
1.198 +my %binary_operators = (
1.199 + "add" => "+",
1.200 + "subtract" => "-",
1.201 + "multiply" => "*",
1.202 + "divide" => "/"
1.203 +);
1.204 +
1.205 +my @types = ( "f32", "f64" );
1.206 +
1.207 +while ( ($name, $op) = each %binary_operators ) {
1.208 + foreach $prec (@types) {
1.209 + binary_pointer($name, $prec, $op);
1.210 + binary_unroll2($name, $prec, $op);
1.211 + binary_unroll4a($name, $prec, $op);
1.212 + binary_unroll4b($name, $prec, $op);
1.213 + binary_unroll4c($name, $prec, $op);
1.214 + }
1.215 +}
1.216 +
1.217 +exit 0;
1.218 +
1.219 +binary_pointer("subtract", "f32", "-");
1.220 +binary_unroll2("subtract", "f32", "-");
1.221 +binary_unroll4a("subtract", "f32", "-");
1.222 +binary_unroll4b("subtract", "f32", "-");
1.223 +binary_unroll4c("subtract", "f32", "-");
1.224 +
1.225 +binary_pointer("add", "f32", "+");
1.226 +binary_unroll2("add", "f32", "+");
1.227 +binary_unroll4a("add", "f32", "+");
1.228 +binary_unroll4b("add", "f32", "+");
1.229 +binary_unroll4c("add", "f32", "+");
1.230 +
1.231 +binary_pointer("multiply", "f32", "*");
1.232 +binary_unroll2("multiply", "f32", "*");
1.233 +binary_unroll4a("multiply", "f32", "*");
1.234 +binary_unroll4b("multiply", "f32", "*");
1.235 +binary_unroll4c("multiply", "f32", "*");
1.236 +
1.237 +binary_pointer("divide", "f32", "/");
1.238 +binary_unroll2("divide", "f32", "/");
1.239 +binary_unroll4a("divide", "f32", "/");
1.240 +binary_unroll4b("divide", "f32", "/");
1.241 +binary_unroll4c("divide", "f32", "/");
1.242 +
1.243 +binary_pointer("subtract", "f64", "-");
1.244 +binary_unroll2("subtract", "f64", "-");
1.245 +binary_unroll4a("subtract", "f64", "-");
1.246 +binary_unroll4b("subtract", "f64", "-");
1.247 +binary_unroll4c("subtract", "f64", "-");
1.248 +
1.249 +binary_pointer("add", "f64", "+");
1.250 +binary_unroll2("add", "f64", "+");
1.251 +binary_unroll4a("add", "f64", "+");
1.252 +binary_unroll4b("add", "f64", "+");
1.253 +binary_unroll4c("add", "f64", "+");
1.254 +
1.255 +binary_pointer("multiply", "f64", "*");
1.256 +binary_unroll2("multiply", "f64", "*");
1.257 +binary_unroll4a("multiply", "f64", "*");
1.258 +binary_unroll4b("multiply", "f64", "*");
1.259 +binary_unroll4c("multiply", "f64", "*");
1.260 +
1.261 +binary_pointer("divide", "f64", "/");
1.262 +binary_unroll2("divide", "f64", "/");
1.263 +binary_unroll4a("divide", "f64", "/");
1.264 +binary_unroll4b("divide", "f64", "/");
1.265 +binary_unroll4c("divide", "f64", "/");
1.266 +
1.267 +$blah = "
1.268 +static void
1.269 +subtract_f32_ref (float *dest, float *src1, float *src2, int n)
1.270 +{
1.271 + int i;
1.272 +
1.273 + for(i=0;i<n;i++){
1.274 + dest[i] = src1[i] - src2[i];
1.275 + }
1.276 +}
1.277 +OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);
1.278 +
1.279 +static void
1.280 +multiply_f32_ref (float *dest, float *src1, float *src2, int n)
1.281 +{
1.282 + int i;
1.283 +
1.284 + for(i=0;i<n;i++){
1.285 + dest[i] = src1[i] * src2[i];
1.286 + }
1.287 +}
1.288 +OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);
1.289 +
1.290 +static void
1.291 +divide_f32_ref (float *dest, float *src1, float *src2, int n)
1.292 +{
1.293 + int i;
1.294 +
1.295 + for(i=0;i<n;i++){
1.296 + dest[i] = src1[i] / src2[i];
1.297 + }
1.298 +}
1.299 +OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
1.300 +
1.301 +static void
1.302 +minimum_f32_ref (float *dest, float *src1, float *src2, int n)
1.303 +{
1.304 + int i;
1.305 +
1.306 + for(i=0;i<n;i++){
1.307 + dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
1.308 + }
1.309 +}
1.310 +OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
1.311 +
1.312 +static void
1.313 +maximum_f32_ref (float *dest, float *src1, float *src2, int n)
1.314 +{
1.315 + int i;
1.316 +
1.317 + for(i=0;i<n;i++){
1.318 + dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
1.319 + }
1.320 +}
1.321 +OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
1.322 +
1.323 +static void
1.324 +negative_f32_ref (float *dest, float *src1, int n)
1.325 +{
1.326 + int i;
1.327 +
1.328 + for(i=0;i<n;i++){
1.329 + dest[i] = -src1[i];
1.330 + }
1.331 +}
1.332 +OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
1.333 +
1.334 +static void
1.335 +inverse_f32_ref (float *dest, float *src1, int n)
1.336 +{
1.337 + int i;
1.338 +
1.339 + for(i=0;i<n;i++){
1.340 + dest[i] = 1.0/src1[i];
1.341 + }
1.342 +}
1.343 +OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
1.344 +
1.345 +static void
1.346 +sign_f32_ref (float *dest, float *src1, int n)
1.347 +{
1.348 + int i;
1.349 +
1.350 + for(i=0;i<n;i++){
1.351 + dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
1.352 + }
1.353 +}
1.354 +OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
1.355 +
1.356 +static void
1.357 +floor_f32_ref (float *dest, float *src1, int n)
1.358 +{
1.359 + int i;
1.360 +
1.361 + for(i=0;i<n;i++){
1.362 + dest[i] = floor(src1[i]);
1.363 + }
1.364 +}
1.365 +OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
1.366 +
1.367 +
1.368 +
1.369 +static void
1.370 +scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
1.371 +{
1.372 + int i;
1.373 +
1.374 + for(i=0;i<n;i++){
1.375 + dest[i] = src1[i] + src2[0];
1.376 + }
1.377 +}
1.378 +OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
1.379 +
1.380 +static void
1.381 +scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
1.382 +{
1.383 + int i;
1.384 +
1.385 + for(i=0;i<n;i++){
1.386 + dest[i] = src1[i] * src2[0];
1.387 + }
1.388 +}
1.389 +OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);
1.390 +
1.391 +
1.392 +";