1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/liboil/src/composite_sse.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,195 @@
1.4 +/*
1.5 + * Copyright (c) 2005
1.6 + * Eric Anholt. All rights reserved.
1.7 + *
1.8 + * Redistribution and use in source and binary forms, with or without
1.9 + * modification, are permitted provided that the following conditions
1.10 + * are met:
1.11 + * 1. Redistributions of source code must retain the above copyright
1.12 + * notice, this list of conditions and the following disclaimer.
1.13 + * 2. Redistributions in binary form must reproduce the above copyright
1.14 + * notice, this list of conditions and the following disclaimer in the
1.15 + * documentation and/or other materials provided with the distribution.
1.16 + *
1.17 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
1.18 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1.19 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.20 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
1.21 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1.22 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1.23 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.24 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1.25 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1.26 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1.27 + * SUCH DAMAGE.
1.28 + */
1.29 +//Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.30 +
1.31 +#ifdef HAVE_CONFIG_H
1.32 +#include "config.h"
1.33 +#endif
1.34 +#include <liboilclasses.h>
1.35 +#include <liboilfunction.h>
1.36 +#include <emmintrin.h>
1.37 +#include <liboilcolorspace.h>
1.38 +
1.39 +#define SSE_FUNCTION __attribute__((force_align_arg_pointer))
1.40 +
1.41 +#define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s))
1.42 +
1.43 +SSE_FUNCTION static void
1.44 +composite_add_argb_sse (uint32_t *dest, const uint32_t *src, int n)
1.45 +{
1.46 + /* Initial operations to align the destination pointer */
1.47 + for (; ((long)dest & 15) && (n > 0); n--) {
1.48 + uint32_t d = *dest, s = *src++;
1.49 +
1.50 + *dest++ = oil_argb(
1.51 + COMPOSITE_ADD(oil_argb_A(d), oil_argb_A(s)),
1.52 + COMPOSITE_ADD(oil_argb_R(d), oil_argb_R(s)),
1.53 + COMPOSITE_ADD(oil_argb_G(d), oil_argb_G(s)),
1.54 + COMPOSITE_ADD(oil_argb_B(d), oil_argb_B(s)));
1.55 + }
1.56 + for (; n >= 4; n -= 4) {
1.57 + __m128i s;
1.58 + s = _mm_loadu_si128((__m128i *)src);
1.59 + *(__m128i *)dest = _mm_adds_epu8(s, *(__m128i *)dest);
1.60 + src += 4;
1.61 + dest += 4;
1.62 + }
1.63 + for (; n > 0; n--) {
1.64 + uint32_t d = *dest, s = *src++;
1.65 +
1.66 + *dest++ = oil_argb(
1.67 + COMPOSITE_ADD(oil_argb_A(d), oil_argb_A(s)),
1.68 + COMPOSITE_ADD(oil_argb_R(d), oil_argb_R(s)),
1.69 + COMPOSITE_ADD(oil_argb_G(d), oil_argb_G(s)),
1.70 + COMPOSITE_ADD(oil_argb_B(d), oil_argb_B(s)));
1.71 + }
1.72 +}
1.73 +OIL_DEFINE_IMPL_FULL (composite_add_argb_sse, composite_add_argb,
1.74 + OIL_IMPL_FLAG_SSE2);
1.75 +
1.76 +SSE_FUNCTION static void
1.77 +composite_add_argb_const_src_sse (uint32_t *dest, const uint32_t *src_1, int n)
1.78 +{
1.79 + __m128i s;
1.80 + uint32_t val = *src_1;
1.81 +
1.82 + /* Initial operations to align the destination pointer */
1.83 + for (; ((long)dest & 15) && (n > 0); n--) {
1.84 + uint32_t d = *dest;
1.85 +
1.86 + *dest++ = oil_argb(
1.87 + COMPOSITE_ADD(oil_argb_A(d), oil_argb_A(val)),
1.88 + COMPOSITE_ADD(oil_argb_R(d), oil_argb_R(val)),
1.89 + COMPOSITE_ADD(oil_argb_G(d), oil_argb_G(val)),
1.90 + COMPOSITE_ADD(oil_argb_B(d), oil_argb_B(val)));
1.91 + }
1.92 + s = _mm_set1_epi32(val);
1.93 + for (; n >= 4; n -= 4) {
1.94 + __m128i xmm0;
1.95 + xmm0 = _mm_adds_epu8(s, *(__m128i *)dest);
1.96 + _mm_store_si128((__m128i *)dest, xmm0);
1.97 + dest += 4;
1.98 + }
1.99 + for (; n > 0; n--) {
1.100 + uint32_t d = *dest;
1.101 +
1.102 + *dest++ = oil_argb(
1.103 + COMPOSITE_ADD(oil_argb_A(d), oil_argb_A(val)),
1.104 + COMPOSITE_ADD(oil_argb_R(d), oil_argb_R(val)),
1.105 + COMPOSITE_ADD(oil_argb_G(d), oil_argb_G(val)),
1.106 + COMPOSITE_ADD(oil_argb_B(d), oil_argb_B(val)));
1.107 + }
1.108 +}
1.109 +OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_sse,
1.110 + composite_add_argb_const_src, OIL_IMPL_FLAG_SSE2);
1.111 +
1.112 +SSE_FUNCTION static void
1.113 +composite_add_u8_sse (uint8_t *dest, const uint8_t *src, int n)
1.114 +{
1.115 + /* Initial operations to align the destination pointer */
1.116 + for (; ((long)dest & 15) && (n > 0); n--) {
1.117 + int x = (int)*dest + *src++;
1.118 + if (x > 255)
1.119 + x = 255;
1.120 + *dest++ = x;
1.121 + }
1.122 + for (; n >= 16; n -= 16) {
1.123 + __m128i d, s;
1.124 + s = _mm_loadu_si128((__m128i *)src);
1.125 + d = _mm_adds_epu8(s, *(__m128i *)dest);
1.126 + _mm_store_si128((__m128i *)dest, d);
1.127 + src += 16;
1.128 + dest += 16;
1.129 + }
1.130 + for (; n > 0; n--) {
1.131 + int x = (int)*dest + *src++;
1.132 + if (x > 255)
1.133 + x = 255;
1.134 + *dest++ = x;
1.135 + }
1.136 +}
1.137 +OIL_DEFINE_IMPL_FULL (composite_add_u8_sse, composite_add_u8,
1.138 + OIL_IMPL_FLAG_SSE2);
1.139 +
1.140 +SSE_FUNCTION static void
1.141 +composite_add_u8_const_src_sse (uint8_t *dest, const uint8_t *src_1, int n)
1.142 +{
1.143 + __m128i s;
1.144 + int val = *src_1;
1.145 +
1.146 + /* Initial operations to align the destination pointer */
1.147 + for (; ((long)dest & 15) && (n > 0); n--) {
1.148 + int x = *dest + val;
1.149 + if (x > 255)
1.150 + x = 255;
1.151 + *dest++ = x;
1.152 + }
1.153 + s = _mm_set1_epi8(val);
1.154 + for (; n >= 16; n -= 16) {
1.155 + __m128i d;
1.156 + d = _mm_adds_epu8(*(__m128i *)dest, s);
1.157 + _mm_store_si128((__m128i *)dest, d);
1.158 + dest += 16;
1.159 + }
1.160 + for (; n > 0; n--) {
1.161 + int x = *dest + val;
1.162 + if (x > 255)
1.163 + x = 255;
1.164 + *dest++ = x;
1.165 + }
1.166 +}
1.167 +OIL_DEFINE_IMPL_FULL (composite_add_u8_const_src_sse,
1.168 + composite_add_u8_const_src, OIL_IMPL_FLAG_SSE2);
1.169 +
1.170 +
1.171 +#ifdef __SYMBIAN32__
1.172 +
1.173 +OilFunctionImpl* __oil_function_impl_composite_add_argb_sse, composite_add_argb() {
1.174 + return &_oil_function_impl_composite_add_argb_sse, composite_add_argb;
1.175 +}
1.176 +#endif
1.177 +
1.178 +#ifdef __SYMBIAN32__
1.179 +
1.180 +OilFunctionImpl* __oil_function_impl_composite_add_argb_const_src_sse() {
1.181 + return &_oil_function_impl_composite_add_argb_const_src_sse;
1.182 +}
1.183 +#endif
1.184 +
1.185 +#ifdef __SYMBIAN32__
1.186 +
1.187 +OilFunctionImpl* __oil_function_impl_composite_add_u8_sse, composite_add_u8() {
1.188 + return &_oil_function_impl_composite_add_u8_sse, composite_add_u8;
1.189 +}
1.190 +#endif
1.191 +
1.192 +#ifdef __SYMBIAN32__
1.193 +
1.194 +OilFunctionImpl* __oil_function_impl_composite_add_u8_const_src_sse() {
1.195 + return &_oil_function_impl_composite_add_u8_const_src_sse;
1.196 +}
1.197 +#endif
1.198 +