Update contrib.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboilfunction.h>
34 #include "liboil/simdpack/simdpack.h"
38 #pragma warn_emptydecl off
42 #define SCALARMULT_DEFINE_UNROLL2(type) \
43 static void scalarmult_ ## type ## _unroll2( \
44 oil_type_ ## type *dest, int dstr, \
45 oil_type_ ## type *src, int sstr, \
46 oil_type_ ## type *val, int n) \
49 *dest = *src * *val; \
50 OIL_INCREMENT(dest, dstr); \
51 OIL_INCREMENT(src, sstr); \
55 *dest = *src * *val; \
56 OIL_INCREMENT(dest,dstr); \
57 OIL_INCREMENT(src,sstr); \
58 *dest = *src * *val; \
59 OIL_INCREMENT(dest,dstr); \
60 OIL_INCREMENT(src,sstr); \
64 OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll2, scalarmult_ ## type);
66 SCALARMULT_DEFINE_UNROLL2 (s8);
67 SCALARMULT_DEFINE_UNROLL2 (u8);
68 SCALARMULT_DEFINE_UNROLL2 (s16);
69 SCALARMULT_DEFINE_UNROLL2 (u16);
70 SCALARMULT_DEFINE_UNROLL2 (s32);
71 SCALARMULT_DEFINE_UNROLL2 (u32);
72 SCALARMULT_DEFINE_UNROLL2 (f32);
73 SCALARMULT_DEFINE_UNROLL2 (f64);
75 #define SCALARMULT_DEFINE_UNROLL2x(type) \
76 static void scalarmult_ ## type ## _unroll2x( \
77 oil_type_ ## type *dest, int dstr, \
78 oil_type_ ## type *src, int sstr, \
79 oil_type_ ## type *val, int n) \
81 oil_type_ ## type *dest2; \
82 oil_type_ ## type *src2; \
85 *dest = *src * *val; \
86 OIL_INCREMENT(dest, dstr); \
87 OIL_INCREMENT(src, sstr); \
89 src2 = OIL_OFFSET(src, sstr); \
90 dest2 = OIL_OFFSET(dest, dstr); \
95 OIL_GET(dest,dstr*i,oil_type_ ## type) = OIL_GET(src,sstr*i,oil_type_ ## type) * *val; \
96 OIL_GET(dest2,dstr*i,oil_type_ ## type) = OIL_GET(src2,sstr*i,oil_type_ ## type) * *val; \
99 OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll2x, scalarmult_ ## type);
101 SCALARMULT_DEFINE_UNROLL2x (s8);
102 SCALARMULT_DEFINE_UNROLL2x (u8);
103 SCALARMULT_DEFINE_UNROLL2x (s16);
104 SCALARMULT_DEFINE_UNROLL2x (u16);
105 SCALARMULT_DEFINE_UNROLL2x (s32);
106 SCALARMULT_DEFINE_UNROLL2x (u32);
107 SCALARMULT_DEFINE_UNROLL2x (f32);
108 SCALARMULT_DEFINE_UNROLL2x (f64);
110 #define SCALARMULT_DEFINE_UNROLL4(type) \
111 static void scalarmult_ ## type ## _unroll4( \
112 oil_type_ ## type *dest, int dstr, \
113 oil_type_ ## type *src, int sstr, \
114 oil_type_ ## type *val, int n) \
117 *dest = *src * *val; \
118 OIL_INCREMENT(dest, dstr); \
119 OIL_INCREMENT(src, sstr); \
122 *dest = *src * *val; \
123 OIL_INCREMENT(dest, dstr); \
124 OIL_INCREMENT(src, sstr); \
125 *dest = *src * *val; \
126 OIL_INCREMENT(dest, dstr); \
127 OIL_INCREMENT(src, sstr); \
131 *dest = *src * *val; \
132 OIL_INCREMENT(dest,dstr); \
133 OIL_INCREMENT(src,sstr); \
134 *dest = *src * *val; \
135 OIL_INCREMENT(dest,dstr); \
136 OIL_INCREMENT(src,sstr); \
137 *dest = *src * *val; \
138 OIL_INCREMENT(dest,dstr); \
139 OIL_INCREMENT(src,sstr); \
140 *dest = *src * *val; \
141 OIL_INCREMENT(dest,dstr); \
142 OIL_INCREMENT(src,sstr); \
146 OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll4, scalarmult_ ## type);
148 SCALARMULT_DEFINE_UNROLL4 (s8);
149 SCALARMULT_DEFINE_UNROLL4 (u8);
150 SCALARMULT_DEFINE_UNROLL4 (s16);
151 SCALARMULT_DEFINE_UNROLL4 (u16);
152 SCALARMULT_DEFINE_UNROLL4 (s32);
153 SCALARMULT_DEFINE_UNROLL4 (u32);
154 SCALARMULT_DEFINE_UNROLL4 (f32);
155 SCALARMULT_DEFINE_UNROLL4 (f64);
161 #define SCALARMULT_DEFINE_X(type) \
162 static void scalarmult_ ## type ## _x( \
163 oil_type_ ## type *dest, int dstr, \
164 oil_type_ ## type *src, int sstr, \
165 oil_type_ ## type *val, int n) \
168 for(i=0;i+1<n;i+=2){ \
169 OIL_GET(dest, i*dstr,oil_type_ ## type) = \
170 OIL_GET(src, i*sstr,oil_type_ ## type) * *val; \
171 OIL_GET(dest,(i+1)*dstr,oil_type_ ## type) = \
172 OIL_GET(src,(i+1)*sstr,oil_type_ ## type) * *val; \
175 OIL_GET(dest,i*dstr,oil_type_ ## type) = \
176 OIL_GET(src,i*sstr,oil_type_ ## type) * *val; \
179 OIL_DEFINE_IMPL (scalarmult_ ## type ## _x, scalarmult_ ## type);
182 SCALARMULT_DEFINE_X (s8);
183 SCALARMULT_DEFINE_X (u8);
184 SCALARMULT_DEFINE_X (s16);
185 SCALARMULT_DEFINE_X (u16);
186 SCALARMULT_DEFINE_X (s32);
187 SCALARMULT_DEFINE_X (u32);
188 SCALARMULT_DEFINE_X (f32);
189 SCALARMULT_DEFINE_X (f64);
193 OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll2() {
194 return &_oil_function_impl_scalarmult_s8_unroll2;
200 OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll2() {
201 return &_oil_function_impl_scalarmult_u8_unroll2;
207 OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll2() {
208 return &_oil_function_impl_scalarmult_s16_unroll2;
214 OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll2() {
215 return &_oil_function_impl_scalarmult_u16_unroll2;
221 OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll2() {
222 return &_oil_function_impl_scalarmult_s32_unroll2;
228 OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll2() {
229 return &_oil_function_impl_scalarmult_u32_unroll2;
235 OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll2() {
236 return &_oil_function_impl_scalarmult_f32_unroll2;
242 OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll2() {
243 return &_oil_function_impl_scalarmult_f64_unroll2;
249 OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll4() {
250 return &_oil_function_impl_scalarmult_s8_unroll4;
256 OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll4() {
257 return &_oil_function_impl_scalarmult_u8_unroll4;
263 OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll4() {
264 return &_oil_function_impl_scalarmult_s16_unroll4;
270 OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll4() {
271 return &_oil_function_impl_scalarmult_u16_unroll4;
277 OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll4() {
278 return &_oil_function_impl_scalarmult_s32_unroll4;
284 OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll4() {
285 return &_oil_function_impl_scalarmult_u32_unroll4;
291 OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll4() {
292 return &_oil_function_impl_scalarmult_f32_unroll4;
298 OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll4() {
299 return &_oil_function_impl_scalarmult_f64_unroll4;
305 OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll2x() {
306 return &_oil_function_impl_scalarmult_s8_unroll2x;
312 OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll2x() {
313 return &_oil_function_impl_scalarmult_u8_unroll2x;
319 OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll2x() {
320 return &_oil_function_impl_scalarmult_s16_unroll2x;
326 OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll2x() {
327 return &_oil_function_impl_scalarmult_u16_unroll2x;
333 OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll2x() {
334 return &_oil_function_impl_scalarmult_s32_unroll2x;
340 OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll2x() {
341 return &_oil_function_impl_scalarmult_u32_unroll2x;
347 OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll2x() {
348 return &_oil_function_impl_scalarmult_f32_unroll2x;
354 OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll2x() {
355 return &_oil_function_impl_scalarmult_f64_unroll2x;
361 OilFunctionImpl* __oil_function_impl_scalarmult_s8_x() {
362 return &_oil_function_impl_scalarmult_s8_x;
368 OilFunctionImpl* __oil_function_impl_scalarmult_u8_x() {
369 return &_oil_function_impl_scalarmult_u8_x;
375 OilFunctionImpl* __oil_function_impl_scalarmult_s16_x() {
376 return &_oil_function_impl_scalarmult_s16_x;
382 OilFunctionImpl* __oil_function_impl_scalarmult_u16_x() {
383 return &_oil_function_impl_scalarmult_u16_x;
389 OilFunctionImpl* __oil_function_impl_scalarmult_s32_x() {
390 return &_oil_function_impl_scalarmult_s32_x;
396 OilFunctionImpl* __oil_function_impl_scalarmult_u32_x() {
397 return &_oil_function_impl_scalarmult_u32_x;
403 OilFunctionImpl* __oil_function_impl_scalarmult_f32_x() {
404 return &_oil_function_impl_scalarmult_f32_x;
410 OilFunctionImpl* __oil_function_impl_scalarmult_f64_x() {
411 return &_oil_function_impl_scalarmult_f64_x;