os/ossrv/genericopenlibs/liboil/src/multsum_sse.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /*
     2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 #ifdef HAVE_CONFIG_H
    19 #include "config.h"
    20 #endif
    21 #include <liboil/liboilclasses.h>
    22 #include <liboil/liboilfunction.h>
    23 #include <emmintrin.h>
    24 
    25 #define SSE_FUNCTION __attribute__((force_align_arg_pointer))
    26 
    27 #define MULTSUM_SSE2_NSTRIDED(i) { \
    28   t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \
    29   t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \
    30   t1 = _mm_mul_pd(t1,t2); \
    31   sum.reg = _mm_add_pd(sum.reg,t1); \
    32 }
    33 #define MULTSUM_SSE2_NSTRIDEDP(i) { \
    34   t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \
    35   t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
    36   t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
    37   t1 = _mm_mul_pd(t1,t2); \
    38   sum.reg = _mm_add_pd(sum.reg,t1); \
    39 }
    40 #define MULTSUM_SSE2_STRIDED(i) { \
    41   t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \
    42   t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \
    43   t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
    44   t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
    45   t1 = _mm_mul_pd(t1,t2); \
    46   sum.reg = _mm_add_pd(sum.reg,t1); \
    47 }
    48 
    49 
    50 #ifdef ENABLE_BROKEN_IMPLS
    51 SSE_FUNCTION static void
    52 multsum_f64_sse2_unroll4(double *dest,
    53      const double *src1, int sstr1,
    54      const double *src2, int sstr2,
    55      int n)
    56 {
    57   __m128d t1, t2;
    58   union {
    59     __m128d reg;
    60     double vals[2];
    61   } sum;
    62   int i = 0;
    63 
    64   sum.reg = _mm_setzero_pd();
    65   while (i < n-3) {
    66     MULTSUM_SSE2_STRIDED(0);
    67     MULTSUM_SSE2_STRIDED(2);
    68 
    69     OIL_INCREMENT(src1, 4*sstr1);
    70     OIL_INCREMENT(src2, 4*sstr2);
    71     i += 4;
    72   }
    73   while (i < n-1) {
    74     MULTSUM_SSE2_STRIDED(0);
    75 
    76     OIL_INCREMENT(src1, 2*sstr1);
    77     OIL_INCREMENT(src2, 2*sstr2);
    78     i+=2;
    79   }
    80   *dest = sum.vals[0] + sum.vals[1];
    81   if (i < n) {
    82     *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
    83   }
    84 }
    85 OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2);
    86 #endif
    87 
    88 
    89 
    90 #ifdef	__SYMBIAN32__
    91  
    92 OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() {
    93 		return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64;
    94 }
    95 #endif
    96