os/ossrv/genericopenlibs/liboil/src/multsum_sse.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/multsum_sse.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,96 @@
     1.4 +/*
     1.5 +* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +#ifdef HAVE_CONFIG_H
    1.22 +#include "config.h"
    1.23 +#endif
    1.24 +#include <liboil/liboilclasses.h>
    1.25 +#include <liboil/liboilfunction.h>
    1.26 +#include <emmintrin.h>
    1.27 +
    1.28 +#define SSE_FUNCTION __attribute__((force_align_arg_pointer))
    1.29 +
    1.30 +#define MULTSUM_SSE2_NSTRIDED(i) { \
    1.31 +  t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \
    1.32 +  t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \
    1.33 +  t1 = _mm_mul_pd(t1,t2); \
    1.34 +  sum.reg = _mm_add_pd(sum.reg,t1); \
    1.35 +}
    1.36 +#define MULTSUM_SSE2_NSTRIDEDP(i) { \
    1.37 +  t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \
    1.38 +  t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
    1.39 +  t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
    1.40 +  t1 = _mm_mul_pd(t1,t2); \
    1.41 +  sum.reg = _mm_add_pd(sum.reg,t1); \
    1.42 +}
    1.43 +#define MULTSUM_SSE2_STRIDED(i) { \
    1.44 +  t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \
    1.45 +  t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \
    1.46 +  t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
    1.47 +  t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
    1.48 +  t1 = _mm_mul_pd(t1,t2); \
    1.49 +  sum.reg = _mm_add_pd(sum.reg,t1); \
    1.50 +}
    1.51 +
    1.52 +
    1.53 +#ifdef ENABLE_BROKEN_IMPLS
    1.54 +SSE_FUNCTION static void
    1.55 +multsum_f64_sse2_unroll4(double *dest,
    1.56 +     const double *src1, int sstr1,
    1.57 +     const double *src2, int sstr2,
    1.58 +     int n)
    1.59 +{
    1.60 +  __m128d t1, t2;
    1.61 +  union {
    1.62 +    __m128d reg;
    1.63 +    double vals[2];
    1.64 +  } sum;
    1.65 +  int i = 0;
    1.66 +
    1.67 +  sum.reg = _mm_setzero_pd();
    1.68 +  while (i < n-3) {
    1.69 +    MULTSUM_SSE2_STRIDED(0);
    1.70 +    MULTSUM_SSE2_STRIDED(2);
    1.71 +
    1.72 +    OIL_INCREMENT(src1, 4*sstr1);
    1.73 +    OIL_INCREMENT(src2, 4*sstr2);
    1.74 +    i += 4;
    1.75 +  }
    1.76 +  while (i < n-1) {
    1.77 +    MULTSUM_SSE2_STRIDED(0);
    1.78 +
    1.79 +    OIL_INCREMENT(src1, 2*sstr1);
    1.80 +    OIL_INCREMENT(src2, 2*sstr2);
    1.81 +    i+=2;
    1.82 +  }
    1.83 +  *dest = sum.vals[0] + sum.vals[1];
    1.84 +  if (i < n) {
    1.85 +    *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
    1.86 +  }
    1.87 +}
    1.88 +OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2);
    1.89 +#endif
    1.90 +
    1.91 +
    1.92 +
    1.93 +#ifdef	__SYMBIAN32__
    1.94 + 
    1.95 +OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() {
    1.96 +		return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64;
    1.97 +}
    1.98 +#endif
    1.99 +