1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/simdpack/diffsquaresum_f64.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,177 @@
1.4 +/*
1.5 + * LIBOIL - Library of Optimized Inner Loops
1.6 + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
1.7 + * All rights reserved.
1.8 + *
1.9 + * Redistribution and use in source and binary forms, with or without
1.10 + * modification, are permitted provided that the following conditions
1.11 + * are met:
1.12 + * 1. Redistributions of source code must retain the above copyright
1.13 + * notice, this list of conditions and the following disclaimer.
1.14 + * 2. Redistributions in binary form must reproduce the above copyright
1.15 + * notice, this list of conditions and the following disclaimer in the
1.16 + * documentation and/or other materials provided with the distribution.
1.17 + *
1.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1.20 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.21 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1.22 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1.23 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.25 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1.26 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1.27 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.28 + * POSSIBILITY OF SUCH DAMAGE.
1.29 + */
1.30 +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.31 +
1.32 +#ifdef HAVE_CONFIG_H
1.33 +#include "config.h"
1.34 +#endif
1.35 +
1.36 +#include <liboil/liboilfunction.h>
1.37 +#include "liboil/simdpack/simdpack.h"
1.38 +#include <math.h>
1.39 +
1.40 +static void
1.41 +diffsquaresum_f64_i10_simple(double *dest, double *src1, int sstr1, double *src2,
1.42 + int sstr2, int n)
1.43 +{
1.44 + double sum = 0;
1.45 + double x;
1.46 + int i;
1.47 +
1.48 + for(i=0;i<n;i++){
1.49 + x = OIL_GET(src1, i*sstr1, double) -
1.50 + OIL_GET(src2, i*sstr2, double);
1.51 + x = x*x;
1.52 + sum += x;
1.53 + }
1.54 +
1.55 + *dest = sum;
1.56 +}
1.57 +OIL_DEFINE_IMPL (diffsquaresum_f64_i10_simple, diffsquaresum_f64);
1.58 +
1.59 +static void
1.60 +diffsquaresum_f64_i10_fast(double *dest, double *src1, int sstr1, double *src2,
1.61 + int sstr2, int n)
1.62 +{
1.63 + double sum0 = 0;
1.64 + double x;
1.65 +
1.66 + while(n>0){
1.67 + x = *src1 - *src2;
1.68 + sum0 += x * x;
1.69 + OIL_INCREMENT (src1, sstr1);
1.70 + OIL_INCREMENT (src2, sstr2);
1.71 + n--;
1.72 + }
1.73 +
1.74 + *dest = sum0;
1.75 +}
1.76 +OIL_DEFINE_IMPL (diffsquaresum_f64_i10_fast, diffsquaresum_f64);
1.77 +
1.78 +static void
1.79 +diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src2,
1.80 + int sstr2, int n)
1.81 +{
1.82 + double sum0 = 0;
1.83 + double sum1 = 0;
1.84 + double x;
1.85 +
1.86 + if(n&1){
1.87 + x = *src1 - *src2;
1.88 + sum0 += x * x;
1.89 + OIL_INCREMENT (src1, sstr1);
1.90 + OIL_INCREMENT (src2, sstr2);
1.91 + n--;
1.92 + }
1.93 + while(n>0){
1.94 + x = *src1 - *src2;
1.95 + sum0 += x * x;
1.96 + OIL_INCREMENT (src1, sstr1);
1.97 + OIL_INCREMENT (src2, sstr2);
1.98 + x = *src1 - *src2;
1.99 + sum1 += x * x;
1.100 + OIL_INCREMENT (src1, sstr1);
1.101 + OIL_INCREMENT (src2, sstr2);
1.102 + n-=2;
1.103 + }
1.104 +
1.105 + *dest = sum0 + sum1;
1.106 +}
1.107 +OIL_DEFINE_IMPL (diffsquaresum_f64_i10_unroll2, diffsquaresum_f64);
1.108 +
1.109 +static void
1.110 +diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1,
1.111 + double *src2, int sstr2, int n)
1.112 +{
1.113 + double sum0 = 0;
1.114 + double sum1 = 0;
1.115 + double sum2 = 0;
1.116 + double sum3 = 0;
1.117 + double x;
1.118 +
1.119 + while(n&3){
1.120 + x = *src1 - *src2;
1.121 + sum0 += x * x;
1.122 + OIL_INCREMENT (src1, sstr1);
1.123 + OIL_INCREMENT (src2, sstr2);
1.124 + n--;
1.125 + }
1.126 + while(n>0){
1.127 + x = *src1 - *src2;
1.128 + sum0 += x * x;
1.129 + OIL_INCREMENT (src1, sstr1);
1.130 + OIL_INCREMENT (src2, sstr2);
1.131 + x = *src1 - *src2;
1.132 + sum1 += x * x;
1.133 + OIL_INCREMENT (src1, sstr1);
1.134 + OIL_INCREMENT (src2, sstr2);
1.135 + x = *src1 - *src2;
1.136 + sum2 += x * x;
1.137 + OIL_INCREMENT (src1, sstr1);
1.138 + OIL_INCREMENT (src2, sstr2);
1.139 + x = *src1 - *src2;
1.140 + sum3 += x * x;
1.141 + OIL_INCREMENT (src1, sstr1);
1.142 + OIL_INCREMENT (src2, sstr2);
1.143 + n-=4;
1.144 + }
1.145 +
1.146 + *dest = sum0 + sum1 + sum2 + sum3;
1.147 +}
1.148 +OIL_DEFINE_IMPL (diffsquaresum_f64_i10_unroll4, diffsquaresum_f64);
1.149 +
1.150 +
1.151 +
1.152 +
1.153 +#ifdef __SYMBIAN32__
1.154 +
1.155 +OilFunctionImpl* __oil_function_impl_diffsquaresum_f64_i10_simple() {
1.156 + return &_oil_function_impl_diffsquaresum_f64_i10_simple;
1.157 +}
1.158 +#endif
1.159 +
1.160 +#ifdef __SYMBIAN32__
1.161 +
1.162 +OilFunctionImpl* __oil_function_impl_diffsquaresum_f64_i10_fast() {
1.163 + return &_oil_function_impl_diffsquaresum_f64_i10_fast;
1.164 +}
1.165 +#endif
1.166 +
1.167 +#ifdef __SYMBIAN32__
1.168 +
1.169 +OilFunctionImpl* __oil_function_impl_diffsquaresum_f64_i10_unroll2() {
1.170 + return &_oil_function_impl_diffsquaresum_f64_i10_unroll2;
1.171 +}
1.172 +#endif
1.173 +
1.174 +#ifdef __SYMBIAN32__
1.175 +
1.176 +OilFunctionImpl* __oil_function_impl_diffsquaresum_f64_i10_unroll4() {
1.177 + return &_oil_function_impl_diffsquaresum_f64_i10_unroll4;
1.178 +}
1.179 +#endif
1.180 +