os/ossrv/genericopenlibs/liboil/src/math/generate_math.pl
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
#!/usr/bin/perl
sl@0
     2
#
sl@0
     3
sl@0
     4
sl@0
     5
sl@0
     6
print <<EOF
sl@0
     7
/* This file is autogenerated.  Do not edit. */
sl@0
     8
/*
sl@0
     9
 * LIBOIL - Library of Optimized Inner Loops
sl@0
    10
 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
sl@0
    11
 * All rights reserved.
sl@0
    12
 *
sl@0
    13
 * Redistribution and use in source and binary forms, with or without
sl@0
    14
 * modification, are permitted provided that the following conditions
sl@0
    15
 * are met:
sl@0
    16
 * 1. Redistributions of source code must retain the above copyright
sl@0
    17
 *    notice, this list of conditions and the following disclaimer.
sl@0
    18
 * 2. Redistributions in binary form must reproduce the above copyright
sl@0
    19
 *    notice, this list of conditions and the following disclaimer in the
sl@0
    20
 *    documentation and/or other materials provided with the distribution.
sl@0
    21
 * 
sl@0
    22
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
sl@0
    23
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
sl@0
    24
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
sl@0
    25
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
sl@0
    26
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
sl@0
    27
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
sl@0
    28
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
sl@0
    29
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
sl@0
    30
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
sl@0
    31
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
sl@0
    32
 * POSSIBILITY OF SUCH DAMAGE.
sl@0
    33
 */
sl@0
    34
sl@0
    35
#ifdef HAVE_CONFIG_H
sl@0
    36
#include "config.h"
sl@0
    37
#endif
sl@0
    38
sl@0
    39
#include <math.h>
sl@0
    40
sl@0
    41
#include <liboil/liboil.h>
sl@0
    42
#include <liboil/liboilclasses.h>
sl@0
    43
sl@0
    44
EOF
sl@0
    45
;
sl@0
    46
sl@0
    47
sl@0
    48
sub binary_pointer
sl@0
    49
{
sl@0
    50
	my $kernel = shift;
sl@0
    51
	my $precision = shift;
sl@0
    52
	my $type = "oil_type_$precision";
sl@0
    53
	my $operator = shift;
sl@0
    54
sl@0
    55
	print <<EOF
sl@0
    56
static void
sl@0
    57
${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
sl@0
    58
{
sl@0
    59
  while (n) {
sl@0
    60
    *dest = *src1 ${operator} *src2;
sl@0
    61
    dest++;
sl@0
    62
    src1++;
sl@0
    63
    src2++;
sl@0
    64
    n--;
sl@0
    65
  }
sl@0
    66
}
sl@0
    67
OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});
sl@0
    68
sl@0
    69
EOF
sl@0
    70
;
sl@0
    71
}
sl@0
    72
sl@0
    73
sub binary_unroll2
sl@0
    74
{
sl@0
    75
	my $kernel = shift;
sl@0
    76
	my $precision = shift;
sl@0
    77
	my $type = "oil_type_$precision";
sl@0
    78
	my $operator = shift;
sl@0
    79
sl@0
    80
	print <<EOF
sl@0
    81
static void
sl@0
    82
${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
sl@0
    83
{
sl@0
    84
  int i;
sl@0
    85
sl@0
    86
  if (n & 1) {
sl@0
    87
    dest[0] = src1[0] ${operator} src2[0];
sl@0
    88
    dest++;
sl@0
    89
    src1++;
sl@0
    90
    src2++;
sl@0
    91
    n--;
sl@0
    92
  }
sl@0
    93
  for(i=0;i<n;i+=2){
sl@0
    94
    dest[i] = src1[i] ${operator} src2[i];
sl@0
    95
    dest[i+1] = src1[i+1] ${operator} src2[i+1];
sl@0
    96
  }
sl@0
    97
}
sl@0
    98
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});
sl@0
    99
sl@0
   100
EOF
sl@0
   101
;
sl@0
   102
}
sl@0
   103
sl@0
   104
sub binary_unroll4a
sl@0
   105
{
sl@0
   106
	my $kernel = shift;
sl@0
   107
	my $precision = shift;
sl@0
   108
	my $type = "oil_type_$precision";
sl@0
   109
	my $operator = shift;
sl@0
   110
sl@0
   111
	print <<EOF
sl@0
   112
static void
sl@0
   113
${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
sl@0
   114
{
sl@0
   115
  int i;
sl@0
   116
sl@0
   117
  while (n & 3) {
sl@0
   118
    dest[0] = src1[0] ${operator} src2[0];
sl@0
   119
    dest++;
sl@0
   120
    src1++;
sl@0
   121
    src2++;
sl@0
   122
    n--;
sl@0
   123
  }
sl@0
   124
  for(i=0;i<n;i+=4){
sl@0
   125
    dest[i] = src1[i] ${operator} src2[i];
sl@0
   126
    dest[i+1] = src1[i+1] ${operator} src2[i+1];
sl@0
   127
    dest[i+2] = src1[i+2] ${operator} src2[i+2];
sl@0
   128
    dest[i+3] = src1[i+3] ${operator} src2[i+3];
sl@0
   129
  }
sl@0
   130
}
sl@0
   131
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});
sl@0
   132
sl@0
   133
EOF
sl@0
   134
;
sl@0
   135
}
sl@0
   136
sl@0
   137
sub binary_unroll4b
sl@0
   138
{
sl@0
   139
	my $kernel = shift;
sl@0
   140
	my $precision = shift;
sl@0
   141
	my $type = "oil_type_$precision";
sl@0
   142
	my $operator = shift;
sl@0
   143
sl@0
   144
	print <<EOF
sl@0
   145
static void
sl@0
   146
${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
sl@0
   147
{
sl@0
   148
  int i;
sl@0
   149
sl@0
   150
  for(i=0;i<(n&(~0x3));i+=4){
sl@0
   151
    dest[i+0] = src1[i+0] ${operator} src2[i+0];
sl@0
   152
    dest[i+1] = src1[i+1] ${operator} src2[i+1];
sl@0
   153
    dest[i+2] = src1[i+2] ${operator} src2[i+2];
sl@0
   154
    dest[i+3] = src1[i+3] ${operator} src2[i+3];
sl@0
   155
  }
sl@0
   156
  for(;i<n;i++){
sl@0
   157
    dest[i] = src1[i] ${operator} src2[i];
sl@0
   158
  }
sl@0
   159
}
sl@0
   160
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});
sl@0
   161
sl@0
   162
EOF
sl@0
   163
;
sl@0
   164
}
sl@0
   165
sl@0
   166
sub binary_unroll4c
sl@0
   167
{
sl@0
   168
	my $kernel = shift;
sl@0
   169
	my $precision = shift;
sl@0
   170
	my $type = "oil_type_$precision";
sl@0
   171
	my $operator = shift;
sl@0
   172
sl@0
   173
	print <<EOF
sl@0
   174
static void
sl@0
   175
${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
sl@0
   176
{
sl@0
   177
  int i;
sl@0
   178
sl@0
   179
  for(i=0;i<(n&(~0x3));i+=4){
sl@0
   180
    *dest++ = *src1++ ${operator} *src2++;
sl@0
   181
    *dest++ = *src1++ ${operator} *src2++;
sl@0
   182
    *dest++ = *src1++ ${operator} *src2++;
sl@0
   183
    *dest++ = *src1++ ${operator} *src2++;
sl@0
   184
  }
sl@0
   185
  for(;i<n;i++){
sl@0
   186
    *dest++ = *src1++ ${operator} *src2++;
sl@0
   187
  }
sl@0
   188
}
sl@0
   189
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});
sl@0
   190
sl@0
   191
EOF
sl@0
   192
;
sl@0
   193
}
sl@0
   194
sl@0
   195
my %binary_operators = (
sl@0
   196
 "add" => "+",
sl@0
   197
 "subtract" => "-",
sl@0
   198
 "multiply" => "*",
sl@0
   199
 "divide" => "/"
sl@0
   200
);
sl@0
   201
sl@0
   202
my @types = ( "f32", "f64" );
sl@0
   203
sl@0
   204
while ( ($name, $op) = each %binary_operators ) {
sl@0
   205
  foreach $prec (@types) {
sl@0
   206
    binary_pointer($name, $prec, $op);
sl@0
   207
    binary_unroll2($name, $prec, $op);
sl@0
   208
    binary_unroll4a($name, $prec, $op);
sl@0
   209
    binary_unroll4b($name, $prec, $op);
sl@0
   210
    binary_unroll4c($name, $prec, $op);
sl@0
   211
  }
sl@0
   212
}
sl@0
   213
sl@0
   214
exit 0;
sl@0
   215
sl@0
   216
binary_pointer("subtract", "f32", "-");
sl@0
   217
binary_unroll2("subtract", "f32", "-");
sl@0
   218
binary_unroll4a("subtract", "f32", "-");
sl@0
   219
binary_unroll4b("subtract", "f32", "-");
sl@0
   220
binary_unroll4c("subtract", "f32", "-");
sl@0
   221
sl@0
   222
binary_pointer("add", "f32", "+");
sl@0
   223
binary_unroll2("add", "f32", "+");
sl@0
   224
binary_unroll4a("add", "f32", "+");
sl@0
   225
binary_unroll4b("add", "f32", "+");
sl@0
   226
binary_unroll4c("add", "f32", "+");
sl@0
   227
sl@0
   228
binary_pointer("multiply", "f32", "*");
sl@0
   229
binary_unroll2("multiply", "f32", "*");
sl@0
   230
binary_unroll4a("multiply", "f32", "*");
sl@0
   231
binary_unroll4b("multiply", "f32", "*");
sl@0
   232
binary_unroll4c("multiply", "f32", "*");
sl@0
   233
sl@0
   234
binary_pointer("divide", "f32", "/");
sl@0
   235
binary_unroll2("divide", "f32", "/");
sl@0
   236
binary_unroll4a("divide", "f32", "/");
sl@0
   237
binary_unroll4b("divide", "f32", "/");
sl@0
   238
binary_unroll4c("divide", "f32", "/");
sl@0
   239
sl@0
   240
binary_pointer("subtract", "f64", "-");
sl@0
   241
binary_unroll2("subtract", "f64", "-");
sl@0
   242
binary_unroll4a("subtract", "f64", "-");
sl@0
   243
binary_unroll4b("subtract", "f64", "-");
sl@0
   244
binary_unroll4c("subtract", "f64", "-");
sl@0
   245
sl@0
   246
binary_pointer("add", "f64", "+");
sl@0
   247
binary_unroll2("add", "f64", "+");
sl@0
   248
binary_unroll4a("add", "f64", "+");
sl@0
   249
binary_unroll4b("add", "f64", "+");
sl@0
   250
binary_unroll4c("add", "f64", "+");
sl@0
   251
sl@0
   252
binary_pointer("multiply", "f64", "*");
sl@0
   253
binary_unroll2("multiply", "f64", "*");
sl@0
   254
binary_unroll4a("multiply", "f64", "*");
sl@0
   255
binary_unroll4b("multiply", "f64", "*");
sl@0
   256
binary_unroll4c("multiply", "f64", "*");
sl@0
   257
sl@0
   258
binary_pointer("divide", "f64", "/");
sl@0
   259
binary_unroll2("divide", "f64", "/");
sl@0
   260
binary_unroll4a("divide", "f64", "/");
sl@0
   261
binary_unroll4b("divide", "f64", "/");
sl@0
   262
binary_unroll4c("divide", "f64", "/");
sl@0
   263
sl@0
   264
$blah = "
sl@0
   265
static void
sl@0
   266
subtract_f32_ref (float *dest, float *src1, float *src2, int n)
sl@0
   267
{
sl@0
   268
  int i;
sl@0
   269
sl@0
   270
  for(i=0;i<n;i++){
sl@0
   271
    dest[i] = src1[i] - src2[i];
sl@0
   272
  }
sl@0
   273
}
sl@0
   274
OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);
sl@0
   275
sl@0
   276
static void
sl@0
   277
multiply_f32_ref (float *dest, float *src1, float *src2, int n)
sl@0
   278
{
sl@0
   279
  int i;
sl@0
   280
sl@0
   281
  for(i=0;i<n;i++){
sl@0
   282
    dest[i] = src1[i] * src2[i];
sl@0
   283
  }
sl@0
   284
}
sl@0
   285
OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);
sl@0
   286
sl@0
   287
static void
sl@0
   288
divide_f32_ref (float *dest, float *src1, float *src2, int n)
sl@0
   289
{
sl@0
   290
  int i;
sl@0
   291
sl@0
   292
  for(i=0;i<n;i++){
sl@0
   293
    dest[i] = src1[i] / src2[i];
sl@0
   294
  }
sl@0
   295
}
sl@0
   296
OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
sl@0
   297
sl@0
   298
static void
sl@0
   299
minimum_f32_ref (float *dest, float *src1, float *src2, int n)
sl@0
   300
{
sl@0
   301
  int i;
sl@0
   302
sl@0
   303
  for(i=0;i<n;i++){
sl@0
   304
    dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
sl@0
   305
  }
sl@0
   306
}
sl@0
   307
OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
sl@0
   308
sl@0
   309
static void
sl@0
   310
maximum_f32_ref (float *dest, float *src1, float *src2, int n)
sl@0
   311
{
sl@0
   312
  int i;
sl@0
   313
sl@0
   314
  for(i=0;i<n;i++){
sl@0
   315
    dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
sl@0
   316
  }
sl@0
   317
}
sl@0
   318
OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
sl@0
   319
sl@0
   320
static void
sl@0
   321
negative_f32_ref (float *dest, float *src1, int n)
sl@0
   322
{
sl@0
   323
  int i;
sl@0
   324
sl@0
   325
  for(i=0;i<n;i++){
sl@0
   326
    dest[i] = -src1[i];
sl@0
   327
  }
sl@0
   328
}
sl@0
   329
OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
sl@0
   330
sl@0
   331
static void
sl@0
   332
inverse_f32_ref (float *dest, float *src1, int n)
sl@0
   333
{
sl@0
   334
  int i;
sl@0
   335
sl@0
   336
  for(i=0;i<n;i++){
sl@0
   337
    dest[i] = 1.0/src1[i];
sl@0
   338
  }
sl@0
   339
}
sl@0
   340
OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
sl@0
   341
sl@0
   342
static void
sl@0
   343
sign_f32_ref (float *dest, float *src1, int n)
sl@0
   344
{
sl@0
   345
  int i;
sl@0
   346
sl@0
   347
  for(i=0;i<n;i++){
sl@0
   348
    dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
sl@0
   349
  }
sl@0
   350
}
sl@0
   351
OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
sl@0
   352
sl@0
   353
static void
sl@0
   354
floor_f32_ref (float *dest, float *src1, int n)
sl@0
   355
{
sl@0
   356
  int i;
sl@0
   357
sl@0
   358
  for(i=0;i<n;i++){
sl@0
   359
    dest[i] = floor(src1[i]);
sl@0
   360
  }
sl@0
   361
}
sl@0
   362
OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
sl@0
   363
sl@0
   364
sl@0
   365
sl@0
   366
static void
sl@0
   367
scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
sl@0
   368
{
sl@0
   369
  int i;
sl@0
   370
sl@0
   371
  for(i=0;i<n;i++){
sl@0
   372
    dest[i] = src1[i] + src2[0];
sl@0
   373
  }
sl@0
   374
}
sl@0
   375
OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
sl@0
   376
sl@0
   377
static void
sl@0
   378
scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
sl@0
   379
{
sl@0
   380
  int i;
sl@0
   381
sl@0
   382
  for(i=0;i<n;i++){
sl@0
   383
    dest[i] = src1[i] * src2[0];
sl@0
   384
  }
sl@0
   385
}
sl@0
   386
OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);
sl@0
   387
sl@0
   388
sl@0
   389
";