1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/simdpack/scalarmult.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,413 @@
1.4 +/*
1.5 + * LIBOIL - Library of Optimized Inner Loops
1.6 + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
1.7 + * All rights reserved.
1.8 + *
1.9 + * Redistribution and use in source and binary forms, with or without
1.10 + * modification, are permitted provided that the following conditions
1.11 + * are met:
1.12 + * 1. Redistributions of source code must retain the above copyright
1.13 + * notice, this list of conditions and the following disclaimer.
1.14 + * 2. Redistributions in binary form must reproduce the above copyright
1.15 + * notice, this list of conditions and the following disclaimer in the
1.16 + * documentation and/or other materials provided with the distribution.
1.17 + *
1.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1.20 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.21 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1.22 + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1.23 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.25 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1.26 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1.27 + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.28 + * POSSIBILITY OF SUCH DAMAGE.
1.29 + */
1.30 +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.31 +
1.32 +#ifdef HAVE_CONFIG_H
1.33 +#include "config.h"
1.34 +#endif
1.35 +
1.36 +#include <liboil/liboilfunction.h>
1.37 +#include "liboil/simdpack/simdpack.h"
1.38 +
1.39 +#ifdef __SYMBIAN32__
1.40 +#ifdef __WINSCW__
1.41 +#pragma warn_emptydecl off
1.42 +#endif//__WINSCW__
1.43 +#endif//__SYMBIAN32__
1.44 +
1.45 +#define SCALARMULT_DEFINE_UNROLL2(type) \
1.46 +static void scalarmult_ ## type ## _unroll2( \
1.47 + oil_type_ ## type *dest, int dstr, \
1.48 + oil_type_ ## type *src, int sstr, \
1.49 + oil_type_ ## type *val, int n) \
1.50 +{ \
1.51 + if(n&1) { \
1.52 + *dest = *src * *val; \
1.53 + OIL_INCREMENT(dest, dstr); \
1.54 + OIL_INCREMENT(src, sstr); \
1.55 + } \
1.56 + n /= 2; \
1.57 + while(n>0){ \
1.58 + *dest = *src * *val; \
1.59 + OIL_INCREMENT(dest,dstr); \
1.60 + OIL_INCREMENT(src,sstr); \
1.61 + *dest = *src * *val; \
1.62 + OIL_INCREMENT(dest,dstr); \
1.63 + OIL_INCREMENT(src,sstr); \
1.64 + n--; \
1.65 + } \
1.66 +} \
1.67 +OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll2, scalarmult_ ## type);
1.68 +
1.69 +SCALARMULT_DEFINE_UNROLL2 (s8);
1.70 +SCALARMULT_DEFINE_UNROLL2 (u8);
1.71 +SCALARMULT_DEFINE_UNROLL2 (s16);
1.72 +SCALARMULT_DEFINE_UNROLL2 (u16);
1.73 +SCALARMULT_DEFINE_UNROLL2 (s32);
1.74 +SCALARMULT_DEFINE_UNROLL2 (u32);
1.75 +SCALARMULT_DEFINE_UNROLL2 (f32);
1.76 +SCALARMULT_DEFINE_UNROLL2 (f64);
1.77 +
1.78 +#define SCALARMULT_DEFINE_UNROLL2x(type) \
1.79 +static void scalarmult_ ## type ## _unroll2x( \
1.80 + oil_type_ ## type *dest, int dstr, \
1.81 + oil_type_ ## type *src, int sstr, \
1.82 + oil_type_ ## type *val, int n) \
1.83 +{ \
1.84 + oil_type_ ## type *dest2; \
1.85 + oil_type_ ## type *src2; \
1.86 + int i; \
1.87 + if(n&1) { \
1.88 + *dest = *src * *val; \
1.89 + OIL_INCREMENT(dest, dstr); \
1.90 + OIL_INCREMENT(src, sstr); \
1.91 + } \
1.92 + src2 = OIL_OFFSET(src, sstr); \
1.93 + dest2 = OIL_OFFSET(dest, dstr); \
1.94 + n /= 2; \
1.95 + sstr *= 2; \
1.96 + dstr *= 2; \
1.97 + for(i=0;i<n;i++){ \
1.98 + OIL_GET(dest,dstr*i,oil_type_ ## type) = OIL_GET(src,sstr*i,oil_type_ ## type) * *val; \
1.99 + OIL_GET(dest2,dstr*i,oil_type_ ## type) = OIL_GET(src2,sstr*i,oil_type_ ## type) * *val; \
1.100 + } \
1.101 +} \
1.102 +OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll2x, scalarmult_ ## type);
1.103 +
1.104 +SCALARMULT_DEFINE_UNROLL2x (s8);
1.105 +SCALARMULT_DEFINE_UNROLL2x (u8);
1.106 +SCALARMULT_DEFINE_UNROLL2x (s16);
1.107 +SCALARMULT_DEFINE_UNROLL2x (u16);
1.108 +SCALARMULT_DEFINE_UNROLL2x (s32);
1.109 +SCALARMULT_DEFINE_UNROLL2x (u32);
1.110 +SCALARMULT_DEFINE_UNROLL2x (f32);
1.111 +SCALARMULT_DEFINE_UNROLL2x (f64);
1.112 +
1.113 +#define SCALARMULT_DEFINE_UNROLL4(type) \
1.114 +static void scalarmult_ ## type ## _unroll4( \
1.115 + oil_type_ ## type *dest, int dstr, \
1.116 + oil_type_ ## type *src, int sstr, \
1.117 + oil_type_ ## type *val, int n) \
1.118 +{ \
1.119 + if(n&1) { \
1.120 + *dest = *src * *val; \
1.121 + OIL_INCREMENT(dest, dstr); \
1.122 + OIL_INCREMENT(src, sstr); \
1.123 + } \
1.124 + if(n&2) { \
1.125 + *dest = *src * *val; \
1.126 + OIL_INCREMENT(dest, dstr); \
1.127 + OIL_INCREMENT(src, sstr); \
1.128 + *dest = *src * *val; \
1.129 + OIL_INCREMENT(dest, dstr); \
1.130 + OIL_INCREMENT(src, sstr); \
1.131 + } \
1.132 + n /= 4; \
1.133 + while(n>0){ \
1.134 + *dest = *src * *val; \
1.135 + OIL_INCREMENT(dest,dstr); \
1.136 + OIL_INCREMENT(src,sstr); \
1.137 + *dest = *src * *val; \
1.138 + OIL_INCREMENT(dest,dstr); \
1.139 + OIL_INCREMENT(src,sstr); \
1.140 + *dest = *src * *val; \
1.141 + OIL_INCREMENT(dest,dstr); \
1.142 + OIL_INCREMENT(src,sstr); \
1.143 + *dest = *src * *val; \
1.144 + OIL_INCREMENT(dest,dstr); \
1.145 + OIL_INCREMENT(src,sstr); \
1.146 + n--; \
1.147 + } \
1.148 +} \
1.149 +OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll4, scalarmult_ ## type);
1.150 +
1.151 +SCALARMULT_DEFINE_UNROLL4 (s8);
1.152 +SCALARMULT_DEFINE_UNROLL4 (u8);
1.153 +SCALARMULT_DEFINE_UNROLL4 (s16);
1.154 +SCALARMULT_DEFINE_UNROLL4 (u16);
1.155 +SCALARMULT_DEFINE_UNROLL4 (s32);
1.156 +SCALARMULT_DEFINE_UNROLL4 (u32);
1.157 +SCALARMULT_DEFINE_UNROLL4 (f32);
1.158 +SCALARMULT_DEFINE_UNROLL4 (f64);
1.159 +
1.160 +
1.161 +
1.162 +
1.163 +
1.164 +#define SCALARMULT_DEFINE_X(type) \
1.165 +static void scalarmult_ ## type ## _x( \
1.166 + oil_type_ ## type *dest, int dstr, \
1.167 + oil_type_ ## type *src, int sstr, \
1.168 + oil_type_ ## type *val, int n) \
1.169 +{ \
1.170 + int i; \
1.171 + for(i=0;i+1<n;i+=2){ \
1.172 + OIL_GET(dest, i*dstr,oil_type_ ## type) = \
1.173 + OIL_GET(src, i*sstr,oil_type_ ## type) * *val; \
1.174 + OIL_GET(dest,(i+1)*dstr,oil_type_ ## type) = \
1.175 + OIL_GET(src,(i+1)*sstr,oil_type_ ## type) * *val; \
1.176 + } \
1.177 + if (n&1) { \
1.178 + OIL_GET(dest,i*dstr,oil_type_ ## type) = \
1.179 + OIL_GET(src,i*sstr,oil_type_ ## type) * *val; \
1.180 + } \
1.181 +} \
1.182 +OIL_DEFINE_IMPL (scalarmult_ ## type ## _x, scalarmult_ ## type);
1.183 +
1.184 +
1.185 +SCALARMULT_DEFINE_X (s8);
1.186 +SCALARMULT_DEFINE_X (u8);
1.187 +SCALARMULT_DEFINE_X (s16);
1.188 +SCALARMULT_DEFINE_X (u16);
1.189 +SCALARMULT_DEFINE_X (s32);
1.190 +SCALARMULT_DEFINE_X (u32);
1.191 +SCALARMULT_DEFINE_X (f32);
1.192 +SCALARMULT_DEFINE_X (f64);
1.193 +
1.194 +#ifdef __SYMBIAN32__
1.195 +
1.196 +OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll2() {
1.197 + return &_oil_function_impl_scalarmult_s8_unroll2;
1.198 +}
1.199 +#endif
1.200 +
1.201 +#ifdef __SYMBIAN32__
1.202 +
1.203 +OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll2() {
1.204 + return &_oil_function_impl_scalarmult_u8_unroll2;
1.205 +}
1.206 +#endif
1.207 +
1.208 +#ifdef __SYMBIAN32__
1.209 +
1.210 +OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll2() {
1.211 + return &_oil_function_impl_scalarmult_s16_unroll2;
1.212 +}
1.213 +#endif
1.214 +
1.215 +#ifdef __SYMBIAN32__
1.216 +
1.217 +OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll2() {
1.218 + return &_oil_function_impl_scalarmult_u16_unroll2;
1.219 +}
1.220 +#endif
1.221 +
1.222 +#ifdef __SYMBIAN32__
1.223 +
1.224 +OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll2() {
1.225 + return &_oil_function_impl_scalarmult_s32_unroll2;
1.226 +}
1.227 +#endif
1.228 +
1.229 +#ifdef __SYMBIAN32__
1.230 +
1.231 +OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll2() {
1.232 + return &_oil_function_impl_scalarmult_u32_unroll2;
1.233 +}
1.234 +#endif
1.235 +
1.236 +#ifdef __SYMBIAN32__
1.237 +
1.238 +OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll2() {
1.239 + return &_oil_function_impl_scalarmult_f32_unroll2;
1.240 +}
1.241 +#endif
1.242 +
1.243 +#ifdef __SYMBIAN32__
1.244 +
1.245 +OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll2() {
1.246 + return &_oil_function_impl_scalarmult_f64_unroll2;
1.247 +}
1.248 +#endif
1.249 +
1.250 +#ifdef __SYMBIAN32__
1.251 +
1.252 +OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll4() {
1.253 + return &_oil_function_impl_scalarmult_s8_unroll4;
1.254 +}
1.255 +#endif
1.256 +
1.257 +#ifdef __SYMBIAN32__
1.258 +
1.259 +OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll4() {
1.260 + return &_oil_function_impl_scalarmult_u8_unroll4;
1.261 +}
1.262 +#endif
1.263 +
1.264 +#ifdef __SYMBIAN32__
1.265 +
1.266 +OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll4() {
1.267 + return &_oil_function_impl_scalarmult_s16_unroll4;
1.268 +}
1.269 +#endif
1.270 +
1.271 +#ifdef __SYMBIAN32__
1.272 +
1.273 +OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll4() {
1.274 + return &_oil_function_impl_scalarmult_u16_unroll4;
1.275 +}
1.276 +#endif
1.277 +
1.278 +#ifdef __SYMBIAN32__
1.279 +
1.280 +OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll4() {
1.281 + return &_oil_function_impl_scalarmult_s32_unroll4;
1.282 +}
1.283 +#endif
1.284 +
1.285 +#ifdef __SYMBIAN32__
1.286 +
1.287 +OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll4() {
1.288 + return &_oil_function_impl_scalarmult_u32_unroll4;
1.289 +}
1.290 +#endif
1.291 +
1.292 +#ifdef __SYMBIAN32__
1.293 +
1.294 +OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll4() {
1.295 + return &_oil_function_impl_scalarmult_f32_unroll4;
1.296 +}
1.297 +#endif
1.298 +
1.299 +#ifdef __SYMBIAN32__
1.300 +
1.301 +OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll4() {
1.302 + return &_oil_function_impl_scalarmult_f64_unroll4;
1.303 +}
1.304 +#endif
1.305 +
1.306 +#ifdef __SYMBIAN32__
1.307 +
1.308 +OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll2x() {
1.309 + return &_oil_function_impl_scalarmult_s8_unroll2x;
1.310 +}
1.311 +#endif
1.312 +
1.313 +#ifdef __SYMBIAN32__
1.314 +
1.315 +OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll2x() {
1.316 + return &_oil_function_impl_scalarmult_u8_unroll2x;
1.317 +}
1.318 +#endif
1.319 +
1.320 +#ifdef __SYMBIAN32__
1.321 +
1.322 +OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll2x() {
1.323 + return &_oil_function_impl_scalarmult_s16_unroll2x;
1.324 +}
1.325 +#endif
1.326 +
1.327 +#ifdef __SYMBIAN32__
1.328 +
1.329 +OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll2x() {
1.330 + return &_oil_function_impl_scalarmult_u16_unroll2x;
1.331 +}
1.332 +#endif
1.333 +
1.334 +#ifdef __SYMBIAN32__
1.335 +
1.336 +OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll2x() {
1.337 + return &_oil_function_impl_scalarmult_s32_unroll2x;
1.338 +}
1.339 +#endif
1.340 +
1.341 +#ifdef __SYMBIAN32__
1.342 +
1.343 +OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll2x() {
1.344 + return &_oil_function_impl_scalarmult_u32_unroll2x;
1.345 +}
1.346 +#endif
1.347 +
1.348 +#ifdef __SYMBIAN32__
1.349 +
1.350 +OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll2x() {
1.351 + return &_oil_function_impl_scalarmult_f32_unroll2x;
1.352 +}
1.353 +#endif
1.354 +
1.355 +#ifdef __SYMBIAN32__
1.356 +
1.357 +OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll2x() {
1.358 + return &_oil_function_impl_scalarmult_f64_unroll2x;
1.359 +}
1.360 +#endif
1.361 +
1.362 +#ifdef __SYMBIAN32__
1.363 +
1.364 +OilFunctionImpl* __oil_function_impl_scalarmult_s8_x() {
1.365 + return &_oil_function_impl_scalarmult_s8_x;
1.366 +}
1.367 +#endif
1.368 +
1.369 +#ifdef __SYMBIAN32__
1.370 +
1.371 +OilFunctionImpl* __oil_function_impl_scalarmult_u8_x() {
1.372 + return &_oil_function_impl_scalarmult_u8_x;
1.373 +}
1.374 +#endif
1.375 +
1.376 +#ifdef __SYMBIAN32__
1.377 +
1.378 +OilFunctionImpl* __oil_function_impl_scalarmult_s16_x() {
1.379 + return &_oil_function_impl_scalarmult_s16_x;
1.380 +}
1.381 +#endif
1.382 +
1.383 +#ifdef __SYMBIAN32__
1.384 +
1.385 +OilFunctionImpl* __oil_function_impl_scalarmult_u16_x() {
1.386 + return &_oil_function_impl_scalarmult_u16_x;
1.387 +}
1.388 +#endif
1.389 +
1.390 +#ifdef __SYMBIAN32__
1.391 +
1.392 +OilFunctionImpl* __oil_function_impl_scalarmult_s32_x() {
1.393 + return &_oil_function_impl_scalarmult_s32_x;
1.394 +}
1.395 +#endif
1.396 +
1.397 +#ifdef __SYMBIAN32__
1.398 +
1.399 +OilFunctionImpl* __oil_function_impl_scalarmult_u32_x() {
1.400 + return &_oil_function_impl_scalarmult_u32_x;
1.401 +}
1.402 +#endif
1.403 +
1.404 +#ifdef __SYMBIAN32__
1.405 +
1.406 +OilFunctionImpl* __oil_function_impl_scalarmult_f32_x() {
1.407 + return &_oil_function_impl_scalarmult_f32_x;
1.408 +}
1.409 +#endif
1.410 +
1.411 +#ifdef __SYMBIAN32__
1.412 +
1.413 +OilFunctionImpl* __oil_function_impl_scalarmult_f64_x() {
1.414 + return &_oil_function_impl_scalarmult_f64_x;
1.415 +}
1.416 +#endif