os/ossrv/genericopenlibs/liboil/src/math_sse_unroll2.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
 * Copyright (c) 2005
sl@0
     3
 *	Eric Anholt.  All rights reserved.
sl@0
     4
 *
sl@0
     5
 * Redistribution and use in source and binary forms, with or without
sl@0
     6
 * modification, are permitted provided that the following conditions
sl@0
     7
 * are met:
sl@0
     8
 * 1. Redistributions of source code must retain the above copyright
sl@0
     9
 *    notice, this list of conditions and the following disclaimer.
sl@0
    10
 * 2. Redistributions in binary form must reproduce the above copyright
sl@0
    11
 *    notice, this list of conditions and the following disclaimer in the
sl@0
    12
 *    documentation and/or other materials provided with the distribution.
sl@0
    13
 *
sl@0
    14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
sl@0
    15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
sl@0
    16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
sl@0
    17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
sl@0
    18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
sl@0
    19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
sl@0
    20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
sl@0
    21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
sl@0
    22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
sl@0
    23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
sl@0
    24
 * SUCH DAMAGE.
sl@0
    25
 */
sl@0
    26
//Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
sl@0
    27
sl@0
    28
#ifdef HAVE_CONFIG_H
sl@0
    29
#include "config.h"
sl@0
    30
#endif
sl@0
    31
#include <liboil/liboilclasses.h>
sl@0
    32
#include <liboil/liboilfunction.h>
sl@0
    33
#include <emmintrin.h>
sl@0
    34
#include <xmmintrin.h>
sl@0
    35
sl@0
    36
#define SSE_FUNCTION __attribute__((force_align_arg_pointer))
sl@0
    37
sl@0
    38
SSE_FUNCTION static void
sl@0
    39
add_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n)
sl@0
    40
{
sl@0
    41
  /* Initial operations to align the destination pointer */
sl@0
    42
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
    43
    *dest++ = *src1++ + *src2++;
sl@0
    44
  }
sl@0
    45
  for (; n >= 8; n -= 8) {
sl@0
    46
    __m128 xmm0, xmm1;
sl@0
    47
    xmm0 = _mm_loadu_ps(src1);
sl@0
    48
    xmm1 = _mm_loadu_ps(src2);
sl@0
    49
    xmm0 = _mm_add_ps(xmm0, xmm1);
sl@0
    50
    _mm_store_ps(dest, xmm0);
sl@0
    51
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
    52
    xmm1 = _mm_loadu_ps(src2 + 4);
sl@0
    53
    xmm0 = _mm_add_ps(xmm0, xmm1);
sl@0
    54
    _mm_store_ps(dest + 4, xmm0);
sl@0
    55
    dest += 8;
sl@0
    56
    src1 += 8;
sl@0
    57
    src2 += 8;
sl@0
    58
  }
sl@0
    59
  for (; n > 0; n--) {
sl@0
    60
    *dest++ = *src1++ + *src2++;
sl@0
    61
  }
sl@0
    62
}
sl@0
    63
OIL_DEFINE_IMPL_FULL (add_f32_sse_unroll2, add_f32, OIL_IMPL_FLAG_SSE);
sl@0
    64
sl@0
    65
SSE_FUNCTION static void
sl@0
    66
subtract_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n)
sl@0
    67
{
sl@0
    68
  /* Initial operations to align the destination pointer */
sl@0
    69
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
    70
    *dest++ = *src1++ - *src2++;
sl@0
    71
  }
sl@0
    72
  for (; n >= 8; n -= 8) {
sl@0
    73
    __m128 xmm0, xmm1;
sl@0
    74
    xmm0 = _mm_loadu_ps(src1);
sl@0
    75
    xmm1 = _mm_loadu_ps(src2);
sl@0
    76
    xmm0 = _mm_sub_ps(xmm0, xmm1);
sl@0
    77
    _mm_store_ps(dest, xmm0);
sl@0
    78
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
    79
    xmm1 = _mm_loadu_ps(src2 + 4);
sl@0
    80
    xmm0 = _mm_sub_ps(xmm0, xmm1);
sl@0
    81
    _mm_store_ps(dest + 4, xmm0);
sl@0
    82
    dest += 8;
sl@0
    83
    src1 += 8;
sl@0
    84
    src2 += 8;
sl@0
    85
  }
sl@0
    86
  for (; n > 0; n--) {
sl@0
    87
    *dest++ = *src1++ - *src2++;
sl@0
    88
  }
sl@0
    89
}
sl@0
    90
OIL_DEFINE_IMPL_FULL (subtract_f32_sse_unroll2, subtract_f32, OIL_IMPL_FLAG_SSE);
sl@0
    91
sl@0
    92
SSE_FUNCTION static void
sl@0
    93
multiply_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n)
sl@0
    94
{
sl@0
    95
  /* Initial operations to align the destination pointer */
sl@0
    96
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
    97
    *dest++ = *src1++ * *src2++;
sl@0
    98
  }
sl@0
    99
  for (; n >= 8; n -= 8) {
sl@0
   100
    __m128 xmm0, xmm1;
sl@0
   101
    xmm0 = _mm_loadu_ps(src1);
sl@0
   102
    xmm1 = _mm_loadu_ps(src2);
sl@0
   103
    xmm0 = _mm_mul_ps(xmm0, xmm1);
sl@0
   104
    _mm_store_ps(dest, xmm0);
sl@0
   105
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
   106
    xmm1 = _mm_loadu_ps(src2 + 4);
sl@0
   107
    xmm0 = _mm_mul_ps(xmm0, xmm1);
sl@0
   108
    _mm_store_ps(dest + 4, xmm0);
sl@0
   109
    dest += 8;
sl@0
   110
    src1 += 8;
sl@0
   111
    src2 += 8;
sl@0
   112
  }
sl@0
   113
  for (; n > 0; n--) {
sl@0
   114
    *dest++ = *src1++ * *src2++;
sl@0
   115
  }
sl@0
   116
}
sl@0
   117
OIL_DEFINE_IMPL_FULL (multiply_f32_sse_unroll2, multiply_f32, OIL_IMPL_FLAG_SSE);
sl@0
   118
sl@0
   119
SSE_FUNCTION static void
sl@0
   120
divide_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n)
sl@0
   121
{
sl@0
   122
  /* Initial operations to align the destination pointer */
sl@0
   123
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   124
    *dest++ = *src1++ / *src2++;
sl@0
   125
  }
sl@0
   126
  for (; n >= 8; n -= 8) {
sl@0
   127
    __m128 xmm0, xmm1;
sl@0
   128
    xmm0 = _mm_loadu_ps(src1);
sl@0
   129
    xmm1 = _mm_loadu_ps(src2);
sl@0
   130
    xmm0 = _mm_div_ps(xmm0, xmm1);
sl@0
   131
    _mm_store_ps(dest, xmm0);
sl@0
   132
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
   133
    xmm1 = _mm_loadu_ps(src2 + 4);
sl@0
   134
    xmm0 = _mm_div_ps(xmm0, xmm1);
sl@0
   135
    _mm_store_ps(dest + 4, xmm0);
sl@0
   136
    dest += 8;
sl@0
   137
    src1 += 8;
sl@0
   138
    src2 += 8;
sl@0
   139
  }
sl@0
   140
  for (; n > 0; n--) {
sl@0
   141
    *dest++ = *src1++ / *src2++;
sl@0
   142
  }
sl@0
   143
}
sl@0
   144
OIL_DEFINE_IMPL_FULL (divide_f32_sse_unroll2, divide_f32, OIL_IMPL_FLAG_SSE);
sl@0
   145
sl@0
   146
SSE_FUNCTION static void
sl@0
   147
minimum_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n)
sl@0
   148
{
sl@0
   149
  /* Initial operations to align the destination pointer */
sl@0
   150
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   151
    *dest++ = *src1 < *src2 ? *src1 : *src2;
sl@0
   152
    src1++;
sl@0
   153
    src2++;
sl@0
   154
  }
sl@0
   155
  for (; n >= 8; n -= 8) {
sl@0
   156
    __m128 xmm0, xmm1;
sl@0
   157
    xmm0 = _mm_loadu_ps(src1);
sl@0
   158
    xmm1 = _mm_loadu_ps(src2);
sl@0
   159
    xmm0 = _mm_min_ps(xmm0, xmm1);
sl@0
   160
    _mm_store_ps(dest, xmm0);
sl@0
   161
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
   162
    xmm1 = _mm_loadu_ps(src2 + 4);
sl@0
   163
    xmm0 = _mm_min_ps(xmm0, xmm1);
sl@0
   164
    _mm_store_ps(dest + 4, xmm0);
sl@0
   165
    dest += 8;
sl@0
   166
    src1 += 8;
sl@0
   167
    src2 += 8;
sl@0
   168
  }
sl@0
   169
  for (; n > 0; n--) {
sl@0
   170
    *dest++ = *src1 < *src2 ? *src1 : *src2;
sl@0
   171
    src1++;
sl@0
   172
    src2++;
sl@0
   173
  }
sl@0
   174
}
sl@0
   175
OIL_DEFINE_IMPL_FULL (minimum_f32_sse_unroll2, minimum_f32, OIL_IMPL_FLAG_SSE);
sl@0
   176
sl@0
   177
SSE_FUNCTION static void
sl@0
   178
maximum_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n)
sl@0
   179
{
sl@0
   180
  /* Initial operations to align the destination pointer */
sl@0
   181
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   182
    *dest++ = *src1 > *src2 ? *src1 : *src2;
sl@0
   183
    src1++;
sl@0
   184
    src2++;
sl@0
   185
  }
sl@0
   186
  for (; n >= 8; n -= 8) {
sl@0
   187
    __m128 xmm0, xmm1;
sl@0
   188
    xmm0 = _mm_loadu_ps(src1);
sl@0
   189
    xmm1 = _mm_loadu_ps(src2);
sl@0
   190
    xmm0 = _mm_max_ps(xmm0, xmm1);
sl@0
   191
    _mm_store_ps(dest, xmm0);
sl@0
   192
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
   193
    xmm1 = _mm_loadu_ps(src2 + 4);
sl@0
   194
    xmm0 = _mm_max_ps(xmm0, xmm1);
sl@0
   195
    _mm_store_ps(dest + 4, xmm0);
sl@0
   196
    dest += 8;
sl@0
   197
    src1 += 8;
sl@0
   198
    src2 += 8;
sl@0
   199
  }
sl@0
   200
  for (; n > 0; n--) {
sl@0
   201
    *dest++ = *src1 > *src2 ? *src1 : *src2;
sl@0
   202
    src1++;
sl@0
   203
    src2++;
sl@0
   204
  }
sl@0
   205
}
sl@0
   206
OIL_DEFINE_IMPL_FULL (maximum_f32_sse_unroll2, maximum_f32, OIL_IMPL_FLAG_SSE);
sl@0
   207
sl@0
   208
SSE_FUNCTION static void
sl@0
   209
inverse_f32_sse_unroll2 (float *dest, float *src1, int n)
sl@0
   210
{
sl@0
   211
  /* Initial operations to align the destination pointer */
sl@0
   212
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   213
    *dest++ = 1.0 / *src1++;
sl@0
   214
  }
sl@0
   215
  for (; n >= 8; n -= 8) {
sl@0
   216
    __m128 xmm0, xmm1;
sl@0
   217
    /* While _mm_rcp_ps sounds promising, the results it gives are rather
sl@0
   218
     * different from the 1.0 / src1 reference implementation, so do that.
sl@0
   219
     */
sl@0
   220
    xmm0 = _mm_set_ps1(1.0);
sl@0
   221
    xmm1 = _mm_loadu_ps(src1);
sl@0
   222
    xmm0 = _mm_div_ps(xmm0, xmm1);
sl@0
   223
    _mm_store_ps(dest, xmm0);
sl@0
   224
    xmm0 = _mm_set_ps1(1.0);
sl@0
   225
    xmm1 = _mm_loadu_ps(src1 + 4);
sl@0
   226
    xmm0 = _mm_div_ps(xmm0, xmm1);
sl@0
   227
    _mm_store_ps(dest + 4, xmm0);
sl@0
   228
    dest += 8;
sl@0
   229
    src1 += 8;
sl@0
   230
  }
sl@0
   231
  for (; n > 0; n--) {
sl@0
   232
    *dest++ = 1.0 / *src1++;
sl@0
   233
  }
sl@0
   234
}
sl@0
   235
OIL_DEFINE_IMPL_FULL (inverse_f32_sse_unroll2, inverse_f32, OIL_IMPL_FLAG_SSE);
sl@0
   236
sl@0
   237
SSE_FUNCTION static void
sl@0
   238
negative_f32_sse_unroll2 (float *dest, float *src1, int n)
sl@0
   239
{
sl@0
   240
  /* Initial operations to align the destination pointer */
sl@0
   241
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   242
    *dest++ = -(*src1++);
sl@0
   243
  }
sl@0
   244
  for (; n >= 8; n -= 8) {
sl@0
   245
    __m128 xmm0, xmm1;
sl@0
   246
    xmm0 = _mm_setzero_ps();
sl@0
   247
    xmm1 = _mm_loadu_ps(src1);
sl@0
   248
    xmm0 = _mm_sub_ps(xmm0, xmm1);
sl@0
   249
    _mm_store_ps(dest, xmm0);
sl@0
   250
    xmm0 = _mm_setzero_ps();
sl@0
   251
    xmm1 = _mm_loadu_ps(src1 + 4);
sl@0
   252
    xmm0 = _mm_sub_ps(xmm0, xmm1);
sl@0
   253
    _mm_store_ps(dest + 4, xmm0);
sl@0
   254
    dest += 8;
sl@0
   255
    src1 += 8;
sl@0
   256
  }
sl@0
   257
  for (; n > 0; n--) {
sl@0
   258
    *dest++ = -(*src1++);
sl@0
   259
  }
sl@0
   260
}
sl@0
   261
OIL_DEFINE_IMPL_FULL (negative_f32_sse_unroll2, negative_f32, OIL_IMPL_FLAG_SSE);
sl@0
   262
sl@0
   263
SSE_FUNCTION static void
sl@0
   264
scalaradd_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n)
sl@0
   265
{
sl@0
   266
  __m128 xmm1;
sl@0
   267
sl@0
   268
  /* Initial operations to align the destination pointer */
sl@0
   269
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   270
    *dest++ = *src1++ + *val;
sl@0
   271
  }
sl@0
   272
  xmm1 = _mm_load_ps1(val);
sl@0
   273
  for (; n >= 8; n -= 8) {
sl@0
   274
    __m128 xmm0;
sl@0
   275
    xmm0 = _mm_loadu_ps(src1);
sl@0
   276
    xmm0 = _mm_add_ps(xmm0, xmm1);
sl@0
   277
    _mm_store_ps(dest, xmm0);
sl@0
   278
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
   279
    xmm0 = _mm_add_ps(xmm0, xmm1);
sl@0
   280
    _mm_store_ps(dest + 4, xmm0);
sl@0
   281
    dest += 8;
sl@0
   282
    src1 += 8;
sl@0
   283
  }
sl@0
   284
  for (; n > 0; n--) {
sl@0
   285
    *dest++ = *src1++ + *val;
sl@0
   286
  }
sl@0
   287
}
sl@0
   288
OIL_DEFINE_IMPL_FULL (scalaradd_f32_ns_sse_unroll2, scalaradd_f32_ns, OIL_IMPL_FLAG_SSE);
sl@0
   289
sl@0
   290
SSE_FUNCTION static void
sl@0
   291
scalarmultiply_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n)
sl@0
   292
{
sl@0
   293
  __m128 xmm1;
sl@0
   294
sl@0
   295
  /* Initial operations to align the destination pointer */
sl@0
   296
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   297
    *dest++ = *src1++ * *val;
sl@0
   298
  }
sl@0
   299
  xmm1 = _mm_load_ps1(val);
sl@0
   300
  for (; n >= 8; n -= 8) {
sl@0
   301
    __m128 xmm0;
sl@0
   302
    xmm0 = _mm_loadu_ps(src1);
sl@0
   303
    xmm0 = _mm_mul_ps(xmm0, xmm1);
sl@0
   304
    _mm_store_ps(dest, xmm0);
sl@0
   305
    xmm0 = _mm_loadu_ps(src1 + 4);
sl@0
   306
    xmm0 = _mm_mul_ps(xmm0, xmm1);
sl@0
   307
    _mm_store_ps(dest + 4, xmm0);
sl@0
   308
    dest += 8;
sl@0
   309
    src1 += 8;
sl@0
   310
  }
sl@0
   311
  for (; n > 0; n--) {
sl@0
   312
    *dest++ = *src1++ * *val;
sl@0
   313
  }
sl@0
   314
}
sl@0
   315
OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE);
sl@0
   316
sl@0
   317
SSE_FUNCTION static void
sl@0
   318
scalarmultiply_f64_ns_sse2_unroll2 (double *dest, double *src1, double *val, int n)
sl@0
   319
{
sl@0
   320
  __m128d xmm1;
sl@0
   321
sl@0
   322
  /* Initial operations to align the destination pointer */
sl@0
   323
  for (; ((long)dest & 15) && (n > 0); n--) {
sl@0
   324
    *dest++ = *src1++ * *val;
sl@0
   325
  }
sl@0
   326
  xmm1 = _mm_load_pd1(val);
sl@0
   327
  for (; n >= 4; n -= 4) {
sl@0
   328
    __m128d xmm0;
sl@0
   329
    xmm0 = _mm_loadu_pd(src1);
sl@0
   330
    xmm0 = _mm_mul_pd(xmm0, xmm1);
sl@0
   331
    _mm_store_pd(dest, xmm0);
sl@0
   332
    xmm0 = _mm_loadu_pd(src1 + 2);
sl@0
   333
    xmm0 = _mm_mul_pd(xmm0, xmm1);
sl@0
   334
    _mm_store_pd(dest + 2, xmm0);
sl@0
   335
    dest += 4;
sl@0
   336
    src1 += 4;
sl@0
   337
  }
sl@0
   338
  for (; n > 0; n--) {
sl@0
   339
    *dest++ = *src1++ * *val;
sl@0
   340
  }
sl@0
   341
}
sl@0
   342
OIL_DEFINE_IMPL_FULL (scalarmultiply_f64_ns_sse2_unroll2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2);
sl@0
   343
sl@0
   344
sl@0
   345
sl@0
   346
#ifdef	__SYMBIAN32__
sl@0
   347
 
sl@0
   348
OilFunctionImpl* __oil_function_impl_add_f32_sse_unroll2, add_f32() {
sl@0
   349
		return &_oil_function_impl_add_f32_sse_unroll2, add_f32;
sl@0
   350
}
sl@0
   351
#endif
sl@0
   352
sl@0
   353
#ifdef	__SYMBIAN32__
sl@0
   354
 
sl@0
   355
OilFunctionImpl* __oil_function_impl_subtract_f32_sse_unroll2, subtract_f32() {
sl@0
   356
		return &_oil_function_impl_subtract_f32_sse_unroll2, subtract_f32;
sl@0
   357
}
sl@0
   358
#endif
sl@0
   359
sl@0
   360
#ifdef	__SYMBIAN32__
sl@0
   361
 
sl@0
   362
OilFunctionImpl* __oil_function_impl_multiply_f32_sse_unroll2, multiply_f32() {
sl@0
   363
		return &_oil_function_impl_multiply_f32_sse_unroll2, multiply_f32;
sl@0
   364
}
sl@0
   365
#endif
sl@0
   366
sl@0
   367
#ifdef	__SYMBIAN32__
sl@0
   368
 
sl@0
   369
OilFunctionImpl* __oil_function_impl_divide_f32_sse_unroll2, divide_f32() {
sl@0
   370
		return &_oil_function_impl_divide_f32_sse_unroll2, divide_f32;
sl@0
   371
}
sl@0
   372
#endif
sl@0
   373
sl@0
   374
#ifdef	__SYMBIAN32__
sl@0
   375
 
sl@0
   376
OilFunctionImpl* __oil_function_impl_minimum_f32_sse_unroll2, minimum_f32() {
sl@0
   377
		return &_oil_function_impl_minimum_f32_sse_unroll2, minimum_f32;
sl@0
   378
}
sl@0
   379
#endif
sl@0
   380
sl@0
   381
#ifdef	__SYMBIAN32__
sl@0
   382
 
sl@0
   383
OilFunctionImpl* __oil_function_impl_maximum_f32_sse_unroll2, maximum_f32() {
sl@0
   384
		return &_oil_function_impl_maximum_f32_sse_unroll2, maximum_f32;
sl@0
   385
}
sl@0
   386
#endif
sl@0
   387
sl@0
   388
#ifdef	__SYMBIAN32__
sl@0
   389
 
sl@0
   390
OilFunctionImpl* __oil_function_impl_inverse_f32_sse_unroll2, inverse_f32() {
sl@0
   391
		return &_oil_function_impl_inverse_f32_sse_unroll2, inverse_f32;
sl@0
   392
}
sl@0
   393
#endif
sl@0
   394
sl@0
   395
#ifdef	__SYMBIAN32__
sl@0
   396
 
sl@0
   397
OilFunctionImpl* __oil_function_impl_negative_f32_sse_unroll2, negative_f32() {
sl@0
   398
		return &_oil_function_impl_negative_f32_sse_unroll2, negative_f32;
sl@0
   399
}
sl@0
   400
#endif
sl@0
   401
sl@0
   402
#ifdef	__SYMBIAN32__
sl@0
   403
 
sl@0
   404
OilFunctionImpl* __oil_function_impl_scalaradd_f32_ns_sse_unroll2, scalaradd_f32_ns() {
sl@0
   405
		return &_oil_function_impl_scalaradd_f32_ns_sse_unroll2, scalaradd_f32_ns;
sl@0
   406
}
sl@0
   407
#endif
sl@0
   408
sl@0
   409
#ifdef	__SYMBIAN32__
sl@0
   410
 
sl@0
   411
OilFunctionImpl* __oil_function_impl_scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns() {
sl@0
   412
		return &_oil_function_impl_scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns;
sl@0
   413
}
sl@0
   414
#endif
sl@0
   415
sl@0
   416
#ifdef	__SYMBIAN32__
sl@0
   417
 
sl@0
   418
OilFunctionImpl* __oil_function_impl_scalarmultiply_f64_ns_sse2_unroll2, scalarmultiply_f64_ns() {
sl@0
   419
		return &_oil_function_impl_scalarmultiply_f64_ns_sse2_unroll2, scalarmultiply_f64_ns;
sl@0
   420
}
sl@0
   421
#endif
sl@0
   422