1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/multsum_sse.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,96 @@
1.4 +/*
1.5 +* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +#ifdef HAVE_CONFIG_H
1.22 +#include "config.h"
1.23 +#endif
1.24 +#include <liboil/liboilclasses.h>
1.25 +#include <liboil/liboilfunction.h>
1.26 +#include <emmintrin.h>
1.27 +
1.28 +#define SSE_FUNCTION __attribute__((force_align_arg_pointer))
1.29 +
1.30 +#define MULTSUM_SSE2_NSTRIDED(i) { \
1.31 + t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \
1.32 + t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \
1.33 + t1 = _mm_mul_pd(t1,t2); \
1.34 + sum.reg = _mm_add_pd(sum.reg,t1); \
1.35 +}
1.36 +#define MULTSUM_SSE2_NSTRIDEDP(i) { \
1.37 + t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \
1.38 + t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
1.39 + t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
1.40 + t1 = _mm_mul_pd(t1,t2); \
1.41 + sum.reg = _mm_add_pd(sum.reg,t1); \
1.42 +}
1.43 +#define MULTSUM_SSE2_STRIDED(i) { \
1.44 + t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \
1.45 + t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \
1.46 + t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
1.47 + t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
1.48 + t1 = _mm_mul_pd(t1,t2); \
1.49 + sum.reg = _mm_add_pd(sum.reg,t1); \
1.50 +}
1.51 +
1.52 +
1.53 +#ifdef ENABLE_BROKEN_IMPLS
1.54 +SSE_FUNCTION static void
1.55 +multsum_f64_sse2_unroll4(double *dest,
1.56 + const double *src1, int sstr1,
1.57 + const double *src2, int sstr2,
1.58 + int n)
1.59 +{
1.60 + __m128d t1, t2;
1.61 + union {
1.62 + __m128d reg;
1.63 + double vals[2];
1.64 + } sum;
1.65 + int i = 0;
1.66 +
1.67 + sum.reg = _mm_setzero_pd();
1.68 + while (i < n-3) {
1.69 + MULTSUM_SSE2_STRIDED(0);
1.70 + MULTSUM_SSE2_STRIDED(2);
1.71 +
1.72 + OIL_INCREMENT(src1, 4*sstr1);
1.73 + OIL_INCREMENT(src2, 4*sstr2);
1.74 + i += 4;
1.75 + }
1.76 + while (i < n-1) {
1.77 + MULTSUM_SSE2_STRIDED(0);
1.78 +
1.79 + OIL_INCREMENT(src1, 2*sstr1);
1.80 + OIL_INCREMENT(src2, 2*sstr2);
1.81 + i+=2;
1.82 + }
1.83 + *dest = sum.vals[0] + sum.vals[1];
1.84 + if (i < n) {
1.85 + *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
1.86 + }
1.87 +}
1.88 +OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2);
1.89 +#endif
1.90 +
1.91 +
1.92 +
1.93 +#ifdef __SYMBIAN32__
1.94 +
1.95 +OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() {
1.96 + return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64;
1.97 +}
1.98 +#endif
1.99 +