os/ossrv/genericopenlibs/liboil/src/multsum_sse.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
#ifdef HAVE_CONFIG_H
sl@0
    19
#include "config.h"
sl@0
    20
#endif
sl@0
    21
#include <liboil/liboilclasses.h>
sl@0
    22
#include <liboil/liboilfunction.h>
sl@0
    23
#include <emmintrin.h>
sl@0
    24
sl@0
    25
#define SSE_FUNCTION __attribute__((force_align_arg_pointer))
sl@0
    26
sl@0
    27
#define MULTSUM_SSE2_NSTRIDED(i) { \
sl@0
    28
  t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \
sl@0
    29
  t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \
sl@0
    30
  t1 = _mm_mul_pd(t1,t2); \
sl@0
    31
  sum.reg = _mm_add_pd(sum.reg,t1); \
sl@0
    32
}
sl@0
    33
#define MULTSUM_SSE2_NSTRIDEDP(i) { \
sl@0
    34
  t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \
sl@0
    35
  t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
sl@0
    36
  t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
sl@0
    37
  t1 = _mm_mul_pd(t1,t2); \
sl@0
    38
  sum.reg = _mm_add_pd(sum.reg,t1); \
sl@0
    39
}
sl@0
    40
#define MULTSUM_SSE2_STRIDED(i) { \
sl@0
    41
  t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \
sl@0
    42
  t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \
sl@0
    43
  t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
sl@0
    44
  t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
sl@0
    45
  t1 = _mm_mul_pd(t1,t2); \
sl@0
    46
  sum.reg = _mm_add_pd(sum.reg,t1); \
sl@0
    47
}
sl@0
    48
sl@0
    49
sl@0
    50
#ifdef ENABLE_BROKEN_IMPLS
sl@0
    51
SSE_FUNCTION static void
sl@0
    52
multsum_f64_sse2_unroll4(double *dest,
sl@0
    53
     const double *src1, int sstr1,
sl@0
    54
     const double *src2, int sstr2,
sl@0
    55
     int n)
sl@0
    56
{
sl@0
    57
  __m128d t1, t2;
sl@0
    58
  union {
sl@0
    59
    __m128d reg;
sl@0
    60
    double vals[2];
sl@0
    61
  } sum;
sl@0
    62
  int i = 0;
sl@0
    63
sl@0
    64
  sum.reg = _mm_setzero_pd();
sl@0
    65
  while (i < n-3) {
sl@0
    66
    MULTSUM_SSE2_STRIDED(0);
sl@0
    67
    MULTSUM_SSE2_STRIDED(2);
sl@0
    68
sl@0
    69
    OIL_INCREMENT(src1, 4*sstr1);
sl@0
    70
    OIL_INCREMENT(src2, 4*sstr2);
sl@0
    71
    i += 4;
sl@0
    72
  }
sl@0
    73
  while (i < n-1) {
sl@0
    74
    MULTSUM_SSE2_STRIDED(0);
sl@0
    75
sl@0
    76
    OIL_INCREMENT(src1, 2*sstr1);
sl@0
    77
    OIL_INCREMENT(src2, 2*sstr2);
sl@0
    78
    i+=2;
sl@0
    79
  }
sl@0
    80
  *dest = sum.vals[0] + sum.vals[1];
sl@0
    81
  if (i < n) {
sl@0
    82
    *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
sl@0
    83
  }
sl@0
    84
}
sl@0
    85
OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2);
sl@0
    86
#endif
sl@0
    87
sl@0
    88
sl@0
    89
sl@0
    90
#ifdef	__SYMBIAN32__
sl@0
    91
 
sl@0
    92
OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() {
sl@0
    93
		return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64;
sl@0
    94
}
sl@0
    95
#endif
sl@0
    96