diff -r 000000000000 -r bde4ae8d615e os/ossrv/genericopenlibs/liboil/src/simdpack/squaresum_f64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/os/ossrv/genericopenlibs/liboil/src/simdpack/squaresum_f64.c Fri Jun 15 03:10:57 2012 +0200 @@ -0,0 +1,176 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2003,2004 David A. Schleef + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include "liboil/simdpack/simdpack.h" +#include + +static void +squaresum_f64_i10_simple(double *dest, double *src, int n) +{ + double sum2 = 0; + int i; + + for(i=0;i +static void +squaresum_f64_i10_multsum(double *dest, double *src, int n) +{ + multsum_f64(dest,src,src,n); +} +#endif + +static void +squaresum_f64_i10_unroll4a(double *dest, double *src, int n) +{ + double sum1 = 0; + double sum2 = 0; + double sum3 = 0; + double sum4 = 0; + + while(n&0x3){ + sum1 += *src * *src; + src++; + n--; + } + while(n>0){ + sum1 += *src * *src; + src++; + sum2 += *src * *src; + src++; + sum3 += *src * *src; + src++; + sum4 += *src * *src; + src++; + n-=4; + } + + *dest = sum1 + sum2 + sum3 + sum4; +} +OIL_DEFINE_IMPL (squaresum_f64_i10_unroll4a, squaresum_f64); + +static void +squaresum_f64_i10_unroll4(double *dest, double *src, int n) +{ + double sum1 = 0; + double sum2 = 0; + double sum3 = 0; + double sum4 = 0; + int i; + + while(n&0x3){ + sum1 += src[0]*src[0]; + src++; + n--; + } + for(i=0;i