1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/simdpack/multsum.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,118 @@
1.4 +/*
1.5 + * LIBOIL - Library of Optimized Inner Loops
1.6 + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
1.7 + * All rights reserved.
1.8 + *
1.9 + * Redistribution and use in source and binary forms, with or without
1.10 + * modification, are permitted provided that the following conditions
1.11 + * are met:
1.12 + * 1. Redistributions of source code must retain the above copyright
1.13 + * notice, this list of conditions and the following disclaimer.
1.14 + * 2. Redistributions in binary form must reproduce the above copyright
1.15 + * notice, this list of conditions and the following disclaimer in the
1.16 + * documentation and/or other materials provided with the distribution.
1.17 + *
1.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1.20 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.21 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1.22 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1.23 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.25 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1.26 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1.27 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.28 + * POSSIBILITY OF SUCH DAMAGE.
1.29 + */
1.30 +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.31 +
1.32 +#ifdef HAVE_CONFIG_H
1.33 +#include "config.h"
1.34 +#endif
1.35 +
1.36 +#include <liboil/liboilfunction.h>
1.37 +#include "liboil/simdpack/simdpack.h"
1.38 +#include <math.h>
1.39 +
1.40 +
1.41 +static void multsum_f32_unroll2 (float *dest, float *src1, int sstr1,
1.42 + float *src2, int sstr2, int n)
1.43 +{
1.44 + int i;
1.45 + double sum1 = 0;
1.46 + double sum2 = 0;
1.47 +
1.48 + for(i=0;i<n-1;i+=2){
1.49 + sum1 += OIL_GET(src1,0, float) * OIL_GET(src2,0, float);
1.50 + sum2 += OIL_GET(src1,sstr1, float) * OIL_GET(src2,sstr2, float);
1.51 + OIL_INCREMENT (src1, sstr1*2);
1.52 + OIL_INCREMENT (src2, sstr2*2);
1.53 + }
1.54 + if (i<n) {
1.55 + sum1 += OIL_GET(src1,0, float) * OIL_GET(src2,0, float);
1.56 + }
1.57 +
1.58 + *dest = sum1 + sum2;
1.59 +}
1.60 +OIL_DEFINE_IMPL (multsum_f32_unroll2, multsum_f32);
1.61 +
1.62 +
1.63 +static void multsum_f64_unroll8 (double *dest, double *src1, int sstr1,
1.64 + double *src2, int sstr2, int n)
1.65 +{
1.66 + int i = 0;
1.67 + double sum = 0;
1.68 +
1.69 + while(i<n-7) {
1.70 + sum += (OIL_GET(src1,0, double) * OIL_GET(src2,0, double)) +
1.71 + (OIL_GET(src1,sstr1, double) * OIL_GET(src2,sstr2, double)) +
1.72 + (OIL_GET(src1,2*sstr1, double) * OIL_GET(src2,2*sstr2, double)) +
1.73 + (OIL_GET(src1,3*sstr1, double) * OIL_GET(src2,3*sstr2, double)) +
1.74 + (OIL_GET(src1,4*sstr1, double) * OIL_GET(src2,4*sstr2, double)) +
1.75 + (OIL_GET(src1,5*sstr1, double) * OIL_GET(src2,5*sstr2, double)) +
1.76 + (OIL_GET(src1,6*sstr1, double) * OIL_GET(src2,6*sstr2, double)) +
1.77 + (OIL_GET(src1,7*sstr1, double) * OIL_GET(src2,7*sstr2, double));
1.78 + OIL_INCREMENT (src1, sstr1*8);
1.79 + OIL_INCREMENT (src2, sstr2*8);
1.80 + i+=8;
1.81 + }
1.82 + while(i<n-3) {
1.83 + sum += (OIL_GET(src1,0, double) * OIL_GET(src2,0, double)) +
1.84 + (OIL_GET(src1,sstr1, double) * OIL_GET(src2,sstr2, double)) +
1.85 + (OIL_GET(src1,2*sstr1, double) * OIL_GET(src2,2*sstr2, double)) +
1.86 + (OIL_GET(src1,3*sstr1, double) * OIL_GET(src2,3*sstr2, double));
1.87 + OIL_INCREMENT (src1, sstr1*4);
1.88 + OIL_INCREMENT (src2, sstr2*4);
1.89 + i+=4;
1.90 + }
1.91 + while(i<n-1) {
1.92 + sum += (OIL_GET(src1,0, double) * OIL_GET(src2,0, double)) +
1.93 + (OIL_GET(src1,sstr1, double) * OIL_GET(src2,sstr2, double));
1.94 + OIL_INCREMENT (src1, sstr1*2);
1.95 + OIL_INCREMENT (src2, sstr2*2);
1.96 + i+=2;
1.97 + }
1.98 + if (i<n) {
1.99 + sum += OIL_GET(src1,0, double) * OIL_GET(src2,0, double);
1.100 + }
1.101 +
1.102 + *dest = sum;
1.103 +}
1.104 +OIL_DEFINE_IMPL (multsum_f64_unroll8, multsum_f64);
1.105 +
1.106 +
1.107 +
1.108 +#ifdef __SYMBIAN32__
1.109 +
1.110 +OilFunctionImpl* __oil_function_impl_multsum_f32_unroll2() {
1.111 + return &_oil_function_impl_multsum_f32_unroll2;
1.112 +}
1.113 +#endif
1.114 +
1.115 +#ifdef __SYMBIAN32__
1.116 +
1.117 +OilFunctionImpl* __oil_function_impl_multsum_f64_unroll8() {
1.118 + return &_oil_function_impl_multsum_f64_unroll8;
1.119 +}
1.120 +#endif
1.121 +