os/ossrv/genericopenlibs/liboil/src/math/generate_math.pl
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 #!/usr/bin/perl
     2 #
     3 
     4 
     5 
     6 print <<EOF
     7 /* This file is autogenerated.  Do not edit. */
     8 /*
     9  * LIBOIL - Library of Optimized Inner Loops
    10  * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
    11  * All rights reserved.
    12  *
    13  * Redistribution and use in source and binary forms, with or without
    14  * modification, are permitted provided that the following conditions
    15  * are met:
    16  * 1. Redistributions of source code must retain the above copyright
    17  *    notice, this list of conditions and the following disclaimer.
    18  * 2. Redistributions in binary form must reproduce the above copyright
    19  *    notice, this list of conditions and the following disclaimer in the
    20  *    documentation and/or other materials provided with the distribution.
    21  * 
    22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
    26  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    27  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    28  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    30  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
    31  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    32  * POSSIBILITY OF SUCH DAMAGE.
    33  */
    34 
    35 #ifdef HAVE_CONFIG_H
    36 #include "config.h"
    37 #endif
    38 
    39 #include <math.h>
    40 
    41 #include <liboil/liboil.h>
    42 #include <liboil/liboilclasses.h>
    43 
    44 EOF
    45 ;
    46 
    47 
    48 sub binary_pointer
    49 {
    50 	my $kernel = shift;
    51 	my $precision = shift;
    52 	my $type = "oil_type_$precision";
    53 	my $operator = shift;
    54 
    55 	print <<EOF
    56 static void
    57 ${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
    58 {
    59   while (n) {
    60     *dest = *src1 ${operator} *src2;
    61     dest++;
    62     src1++;
    63     src2++;
    64     n--;
    65   }
    66 }
    67 OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});
    68 
    69 EOF
    70 ;
    71 }
    72 
    73 sub binary_unroll2
    74 {
    75 	my $kernel = shift;
    76 	my $precision = shift;
    77 	my $type = "oil_type_$precision";
    78 	my $operator = shift;
    79 
    80 	print <<EOF
    81 static void
    82 ${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
    83 {
    84   int i;
    85 
    86   if (n & 1) {
    87     dest[0] = src1[0] ${operator} src2[0];
    88     dest++;
    89     src1++;
    90     src2++;
    91     n--;
    92   }
    93   for(i=0;i<n;i+=2){
    94     dest[i] = src1[i] ${operator} src2[i];
    95     dest[i+1] = src1[i+1] ${operator} src2[i+1];
    96   }
    97 }
    98 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});
    99 
   100 EOF
   101 ;
   102 }
   103 
   104 sub binary_unroll4a
   105 {
   106 	my $kernel = shift;
   107 	my $precision = shift;
   108 	my $type = "oil_type_$precision";
   109 	my $operator = shift;
   110 
   111 	print <<EOF
   112 static void
   113 ${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
   114 {
   115   int i;
   116 
   117   while (n & 3) {
   118     dest[0] = src1[0] ${operator} src2[0];
   119     dest++;
   120     src1++;
   121     src2++;
   122     n--;
   123   }
   124   for(i=0;i<n;i+=4){
   125     dest[i] = src1[i] ${operator} src2[i];
   126     dest[i+1] = src1[i+1] ${operator} src2[i+1];
   127     dest[i+2] = src1[i+2] ${operator} src2[i+2];
   128     dest[i+3] = src1[i+3] ${operator} src2[i+3];
   129   }
   130 }
   131 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});
   132 
   133 EOF
   134 ;
   135 }
   136 
   137 sub binary_unroll4b
   138 {
   139 	my $kernel = shift;
   140 	my $precision = shift;
   141 	my $type = "oil_type_$precision";
   142 	my $operator = shift;
   143 
   144 	print <<EOF
   145 static void
   146 ${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
   147 {
   148   int i;
   149 
   150   for(i=0;i<(n&(~0x3));i+=4){
   151     dest[i+0] = src1[i+0] ${operator} src2[i+0];
   152     dest[i+1] = src1[i+1] ${operator} src2[i+1];
   153     dest[i+2] = src1[i+2] ${operator} src2[i+2];
   154     dest[i+3] = src1[i+3] ${operator} src2[i+3];
   155   }
   156   for(;i<n;i++){
   157     dest[i] = src1[i] ${operator} src2[i];
   158   }
   159 }
   160 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});
   161 
   162 EOF
   163 ;
   164 }
   165 
   166 sub binary_unroll4c
   167 {
   168 	my $kernel = shift;
   169 	my $precision = shift;
   170 	my $type = "oil_type_$precision";
   171 	my $operator = shift;
   172 
   173 	print <<EOF
   174 static void
   175 ${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
   176 {
   177   int i;
   178 
   179   for(i=0;i<(n&(~0x3));i+=4){
   180     *dest++ = *src1++ ${operator} *src2++;
   181     *dest++ = *src1++ ${operator} *src2++;
   182     *dest++ = *src1++ ${operator} *src2++;
   183     *dest++ = *src1++ ${operator} *src2++;
   184   }
   185   for(;i<n;i++){
   186     *dest++ = *src1++ ${operator} *src2++;
   187   }
   188 }
   189 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});
   190 
   191 EOF
   192 ;
   193 }
   194 
   195 my %binary_operators = (
   196  "add" => "+",
   197  "subtract" => "-",
   198  "multiply" => "*",
   199  "divide" => "/"
   200 );
   201 
   202 my @types = ( "f32", "f64" );
   203 
   204 while ( ($name, $op) = each %binary_operators ) {
   205   foreach $prec (@types) {
   206     binary_pointer($name, $prec, $op);
   207     binary_unroll2($name, $prec, $op);
   208     binary_unroll4a($name, $prec, $op);
   209     binary_unroll4b($name, $prec, $op);
   210     binary_unroll4c($name, $prec, $op);
   211   }
   212 }
   213 
   214 exit 0;
   215 
   216 binary_pointer("subtract", "f32", "-");
   217 binary_unroll2("subtract", "f32", "-");
   218 binary_unroll4a("subtract", "f32", "-");
   219 binary_unroll4b("subtract", "f32", "-");
   220 binary_unroll4c("subtract", "f32", "-");
   221 
   222 binary_pointer("add", "f32", "+");
   223 binary_unroll2("add", "f32", "+");
   224 binary_unroll4a("add", "f32", "+");
   225 binary_unroll4b("add", "f32", "+");
   226 binary_unroll4c("add", "f32", "+");
   227 
   228 binary_pointer("multiply", "f32", "*");
   229 binary_unroll2("multiply", "f32", "*");
   230 binary_unroll4a("multiply", "f32", "*");
   231 binary_unroll4b("multiply", "f32", "*");
   232 binary_unroll4c("multiply", "f32", "*");
   233 
   234 binary_pointer("divide", "f32", "/");
   235 binary_unroll2("divide", "f32", "/");
   236 binary_unroll4a("divide", "f32", "/");
   237 binary_unroll4b("divide", "f32", "/");
   238 binary_unroll4c("divide", "f32", "/");
   239 
   240 binary_pointer("subtract", "f64", "-");
   241 binary_unroll2("subtract", "f64", "-");
   242 binary_unroll4a("subtract", "f64", "-");
   243 binary_unroll4b("subtract", "f64", "-");
   244 binary_unroll4c("subtract", "f64", "-");
   245 
   246 binary_pointer("add", "f64", "+");
   247 binary_unroll2("add", "f64", "+");
   248 binary_unroll4a("add", "f64", "+");
   249 binary_unroll4b("add", "f64", "+");
   250 binary_unroll4c("add", "f64", "+");
   251 
   252 binary_pointer("multiply", "f64", "*");
   253 binary_unroll2("multiply", "f64", "*");
   254 binary_unroll4a("multiply", "f64", "*");
   255 binary_unroll4b("multiply", "f64", "*");
   256 binary_unroll4c("multiply", "f64", "*");
   257 
   258 binary_pointer("divide", "f64", "/");
   259 binary_unroll2("divide", "f64", "/");
   260 binary_unroll4a("divide", "f64", "/");
   261 binary_unroll4b("divide", "f64", "/");
   262 binary_unroll4c("divide", "f64", "/");
   263 
   264 $blah = "
   265 static void
   266 subtract_f32_ref (float *dest, float *src1, float *src2, int n)
   267 {
   268   int i;
   269 
   270   for(i=0;i<n;i++){
   271     dest[i] = src1[i] - src2[i];
   272   }
   273 }
   274 OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);
   275 
   276 static void
   277 multiply_f32_ref (float *dest, float *src1, float *src2, int n)
   278 {
   279   int i;
   280 
   281   for(i=0;i<n;i++){
   282     dest[i] = src1[i] * src2[i];
   283   }
   284 }
   285 OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);
   286 
   287 static void
   288 divide_f32_ref (float *dest, float *src1, float *src2, int n)
   289 {
   290   int i;
   291 
   292   for(i=0;i<n;i++){
   293     dest[i] = src1[i] / src2[i];
   294   }
   295 }
   296 OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
   297 
   298 static void
   299 minimum_f32_ref (float *dest, float *src1, float *src2, int n)
   300 {
   301   int i;
   302 
   303   for(i=0;i<n;i++){
   304     dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
   305   }
   306 }
   307 OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
   308 
   309 static void
   310 maximum_f32_ref (float *dest, float *src1, float *src2, int n)
   311 {
   312   int i;
   313 
   314   for(i=0;i<n;i++){
   315     dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
   316   }
   317 }
   318 OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
   319 
   320 static void
   321 negative_f32_ref (float *dest, float *src1, int n)
   322 {
   323   int i;
   324 
   325   for(i=0;i<n;i++){
   326     dest[i] = -src1[i];
   327   }
   328 }
   329 OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
   330 
   331 static void
   332 inverse_f32_ref (float *dest, float *src1, int n)
   333 {
   334   int i;
   335 
   336   for(i=0;i<n;i++){
   337     dest[i] = 1.0/src1[i];
   338   }
   339 }
   340 OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
   341 
   342 static void
   343 sign_f32_ref (float *dest, float *src1, int n)
   344 {
   345   int i;
   346 
   347   for(i=0;i<n;i++){
   348     dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
   349   }
   350 }
   351 OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
   352 
   353 static void
   354 floor_f32_ref (float *dest, float *src1, int n)
   355 {
   356   int i;
   357 
   358   for(i=0;i<n;i++){
   359     dest[i] = floor(src1[i]);
   360   }
   361 }
   362 OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
   363 
   364 
   365 
   366 static void
   367 scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
   368 {
   369   int i;
   370 
   371   for(i=0;i<n;i++){
   372     dest[i] = src1[i] + src2[0];
   373   }
   374 }
   375 OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
   376 
   377 static void
   378 scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
   379 {
   380   int i;
   381 
   382   for(i=0;i<n;i++){
   383     dest[i] = src1[i] * src2[0];
   384   }
   385 }
   386 OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);
   387 
   388 
   389 ";