First public contribution.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboilfunction.h>
34 #include "liboil/simdpack/simdpack.h"
38 #pragma warn_emptydecl off
42 #define SCALARADD_DEFINE_UNROLL2(type) \
43 static void scalaradd_ ## type ## _unroll2( \
44 oil_type_ ## type *dest, int dstr, \
45 oil_type_ ## type *src, int sstr, \
46 oil_type_ ## type *val, int n) \
49 *dest = *src + *val; \
50 OIL_INCREMENT(dest, dstr); \
51 OIL_INCREMENT(src, sstr); \
55 *dest = *src + *val; \
56 OIL_INCREMENT(dest,dstr); \
57 OIL_INCREMENT(src,sstr); \
58 *dest = *src + *val; \
59 OIL_INCREMENT(dest,dstr); \
60 OIL_INCREMENT(src,sstr); \
64 OIL_DEFINE_IMPL (scalaradd_ ## type ## _unroll2, scalaradd_ ## type);
66 SCALARADD_DEFINE_UNROLL2 (s8);
67 SCALARADD_DEFINE_UNROLL2 (u8);
68 SCALARADD_DEFINE_UNROLL2 (s16);
69 SCALARADD_DEFINE_UNROLL2 (u16);
70 SCALARADD_DEFINE_UNROLL2 (s32);
71 SCALARADD_DEFINE_UNROLL2 (u32);
72 SCALARADD_DEFINE_UNROLL2 (f32);
73 SCALARADD_DEFINE_UNROLL2 (f64);
75 #define SCALARADD_DEFINE_UNROLL2x(type) \
76 static void scalaradd_ ## type ## _unroll2x( \
77 oil_type_ ## type *dest, int dstr, \
78 oil_type_ ## type *src, int sstr, \
79 oil_type_ ## type *val, int n) \
81 oil_type_ ## type *dest2; \
82 oil_type_ ## type *src2; \
85 *dest = *src + *val; \
86 OIL_INCREMENT(dest, dstr); \
87 OIL_INCREMENT(src, sstr); \
89 src2 = OIL_OFFSET(src, sstr); \
90 dest2 = OIL_OFFSET(dest, dstr); \
95 OIL_GET(dest,dstr*i, oil_type_ ## type) = OIL_GET(src,sstr*i, oil_type_ ## type) + *val; \
96 OIL_GET(dest2,dstr*i, oil_type_ ## type) = OIL_GET(src2,sstr*i, oil_type_ ## type) + *val; \
99 OIL_DEFINE_IMPL (scalaradd_ ## type ## _unroll2x, scalaradd_ ## type);
101 SCALARADD_DEFINE_UNROLL2x (s8);
102 SCALARADD_DEFINE_UNROLL2x (u8);
103 SCALARADD_DEFINE_UNROLL2x (s16);
104 SCALARADD_DEFINE_UNROLL2x (u16);
105 SCALARADD_DEFINE_UNROLL2x (s32);
106 SCALARADD_DEFINE_UNROLL2x (u32);
107 SCALARADD_DEFINE_UNROLL2x (f32);
108 SCALARADD_DEFINE_UNROLL2x (f64);
110 #define SCALARADD_DEFINE_UNROLL4(type) \
111 static void scalaradd_ ## type ## _unroll4( \
112 oil_type_ ## type *dest, int dstr, \
113 oil_type_ ## type *src, int sstr, \
114 oil_type_ ## type *val, int n) \
117 *dest = *src + *val; \
118 OIL_INCREMENT(dest, dstr); \
119 OIL_INCREMENT(src, sstr); \
122 *dest = *src + *val; \
123 OIL_INCREMENT(dest, dstr); \
124 OIL_INCREMENT(src, sstr); \
125 *dest = *src + *val; \
126 OIL_INCREMENT(dest, dstr); \
127 OIL_INCREMENT(src, sstr); \
131 *dest = *src + *val; \
132 OIL_INCREMENT(dest,dstr); \
133 OIL_INCREMENT(src,sstr); \
134 *dest = *src + *val; \
135 OIL_INCREMENT(dest,dstr); \
136 OIL_INCREMENT(src,sstr); \
137 *dest = *src + *val; \
138 OIL_INCREMENT(dest,dstr); \
139 OIL_INCREMENT(src,sstr); \
140 *dest = *src + *val; \
141 OIL_INCREMENT(dest,dstr); \
142 OIL_INCREMENT(src,sstr); \
146 OIL_DEFINE_IMPL (scalaradd_ ## type ## _unroll4, scalaradd_ ## type);
148 SCALARADD_DEFINE_UNROLL4 (s8);
149 SCALARADD_DEFINE_UNROLL4 (u8);
150 SCALARADD_DEFINE_UNROLL4 (s16);
151 SCALARADD_DEFINE_UNROLL4 (u16);
152 SCALARADD_DEFINE_UNROLL4 (s32);
153 SCALARADD_DEFINE_UNROLL4 (u32);
154 SCALARADD_DEFINE_UNROLL4 (f32);
155 SCALARADD_DEFINE_UNROLL4 (f64);
160 OilFunctionImpl* __oil_function_impl_scalaradd_s8_unroll2() {
161 return &_oil_function_impl_scalaradd_s8_unroll2;
167 OilFunctionImpl* __oil_function_impl_scalaradd_u8_unroll2() {
168 return &_oil_function_impl_scalaradd_u8_unroll2;
174 OilFunctionImpl* __oil_function_impl_scalaradd_s16_unroll2() {
175 return &_oil_function_impl_scalaradd_s16_unroll2;
181 OilFunctionImpl* __oil_function_impl_scalaradd_u16_unroll2() {
182 return &_oil_function_impl_scalaradd_u16_unroll2;
188 OilFunctionImpl* __oil_function_impl_scalaradd_s32_unroll2() {
189 return &_oil_function_impl_scalaradd_s32_unroll2;
195 OilFunctionImpl* __oil_function_impl_scalaradd_u32_unroll2() {
196 return &_oil_function_impl_scalaradd_u32_unroll2;
202 OilFunctionImpl* __oil_function_impl_scalaradd_f32_unroll2() {
203 return &_oil_function_impl_scalaradd_f32_unroll2;
209 OilFunctionImpl* __oil_function_impl_scalaradd_f64_unroll2() {
210 return &_oil_function_impl_scalaradd_f64_unroll2;
216 OilFunctionImpl* __oil_function_impl_scalaradd_s8_unroll4() {
217 return &_oil_function_impl_scalaradd_s8_unroll4;
222 OilFunctionImpl* __oil_function_impl_scalaradd_u8_unroll4() {
223 return &_oil_function_impl_scalaradd_u8_unroll4;
229 OilFunctionImpl* __oil_function_impl_scalaradd_s16_unroll4() {
230 return &_oil_function_impl_scalaradd_s16_unroll4;
236 OilFunctionImpl* __oil_function_impl_scalaradd_u16_unroll4() {
237 return &_oil_function_impl_scalaradd_u16_unroll4;
243 OilFunctionImpl* __oil_function_impl_scalaradd_s32_unroll4() {
244 return &_oil_function_impl_scalaradd_s32_unroll4;
250 OilFunctionImpl* __oil_function_impl_scalaradd_u32_unroll4() {
251 return &_oil_function_impl_scalaradd_u32_unroll4;
257 OilFunctionImpl* __oil_function_impl_scalaradd_f32_unroll4() {
258 return &_oil_function_impl_scalaradd_f32_unroll4;
264 OilFunctionImpl* __oil_function_impl_scalaradd_f64_unroll4() {
265 return &_oil_function_impl_scalaradd_f64_unroll4;
271 OilFunctionImpl* __oil_function_impl_scalaradd_s8_unroll2x() {
272 return &_oil_function_impl_scalaradd_s8_unroll2x;
278 OilFunctionImpl* __oil_function_impl_scalaradd_u8_unroll2x() {
279 return &_oil_function_impl_scalaradd_u8_unroll2x;
285 OilFunctionImpl* __oil_function_impl_scalaradd_s16_unroll2x() {
286 return &_oil_function_impl_scalaradd_s16_unroll2x;
292 OilFunctionImpl* __oil_function_impl_scalaradd_u16_unroll2x() {
293 return &_oil_function_impl_scalaradd_u16_unroll2x;
299 OilFunctionImpl* __oil_function_impl_scalaradd_s32_unroll2x() {
300 return &_oil_function_impl_scalaradd_s32_unroll2x;
306 OilFunctionImpl* __oil_function_impl_scalaradd_u32_unroll2x() {
307 return &_oil_function_impl_scalaradd_u32_unroll2x;
313 OilFunctionImpl* __oil_function_impl_scalaradd_f32_unroll2x() {
314 return &_oil_function_impl_scalaradd_f32_unroll2x;
320 OilFunctionImpl* __oil_function_impl_scalaradd_f64_unroll2x() {
321 return &_oil_function_impl_scalaradd_f64_unroll2x;