sl@0: /* sl@0: * Copyright (c) 2005 sl@0: * Eric Anholt. All rights reserved. sl@0: * sl@0: * Redistribution and use in source and binary forms, with or without sl@0: * modification, are permitted provided that the following conditions sl@0: * are met: sl@0: * 1. Redistributions of source code must retain the above copyright sl@0: * notice, this list of conditions and the following disclaimer. sl@0: * 2. Redistributions in binary form must reproduce the above copyright sl@0: * notice, this list of conditions and the following disclaimer in the sl@0: * documentation and/or other materials provided with the distribution. sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND sl@0: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE sl@0: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE sl@0: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE sl@0: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL sl@0: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS sl@0: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) sl@0: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT sl@0: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY sl@0: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF sl@0: * SUCH DAMAGE. sl@0: */ sl@0: //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. sl@0: sl@0: #ifdef HAVE_CONFIG_H sl@0: #include "config.h" sl@0: #endif sl@0: #include "liboil/liboilclasses.h" sl@0: #include "liboil/liboilfunction.h" sl@0: #include sl@0: #include sl@0: sl@0: /* TODO: If we have gcc 4.2 or above, do this. Otherwise, disable all SSE use */ sl@0: #define SSE_FUNCTION __attribute__((force_align_arg_pointer)) sl@0: sl@0: SSE_FUNCTION static void sl@0: clamp_u8_sse (uint8_t *dest, uint8_t *src1, int n, uint8_t *src2_1, sl@0: uint8_t *src3_1) sl@0: { sl@0: __m128i xmm1, xmm2; sl@0: uint8_t min = *src2_1; sl@0: uint8_t max = *src3_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: uint8_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_epi8(min); sl@0: xmm2 = _mm_set1_epi8(max); sl@0: for (; n >= 16; n -= 16) { sl@0: __m128i xmm0; sl@0: xmm0 = _mm_loadu_si128((__m128i *)src1); sl@0: xmm0 = _mm_max_epu8(xmm0, xmm1); sl@0: xmm0 = _mm_min_epu8(xmm0, xmm2); sl@0: _mm_store_si128((__m128i *)dest, xmm0); sl@0: dest += 16; sl@0: src1 += 16; sl@0: } sl@0: for (; n > 0; n--) { sl@0: uint8_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamp_u8_sse, clamp_u8, OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamp_s16_sse (int16_t *dest, int16_t *src1, int n, int16_t *src2_1, sl@0: int16_t *src3_1) sl@0: { sl@0: __m128i xmm1, xmm2; sl@0: int16_t min = *src2_1; sl@0: int16_t max = *src3_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: int16_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_epi16(min); sl@0: xmm2 = _mm_set1_epi16(max); sl@0: for (; n >= 8; n -= 8) { sl@0: __m128i xmm0; sl@0: xmm0 = _mm_loadu_si128((__m128i *)src1); sl@0: xmm0 = _mm_max_epi16(xmm0, xmm1); sl@0: xmm0 = _mm_min_epi16(xmm0, xmm2); sl@0: _mm_store_si128((__m128i *)dest, xmm0); sl@0: dest += 8; sl@0: src1 += 8; sl@0: } sl@0: for (; n > 0; n--) { sl@0: int16_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamp_s16_sse, clamp_s16, OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamp_f32_sse (float *dest, const float *src1, int n, const float *src2_1, sl@0: const float *src3_1) sl@0: { sl@0: __m128 xmm1, xmm2; sl@0: float min = *src2_1; sl@0: float max = *src3_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: float x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set_ps1(min); sl@0: xmm2 = _mm_set_ps1(max); sl@0: for (; n >= 4; n -= 4) { sl@0: __m128 xmm0; sl@0: xmm0 = _mm_loadu_ps(src1); sl@0: xmm0 = _mm_max_ps(xmm0, xmm1); sl@0: xmm0 = _mm_min_ps(xmm0, xmm2); sl@0: _mm_store_ps(dest, xmm0); sl@0: dest += 4; sl@0: src1 += 4; sl@0: } sl@0: for (; n > 0; n--) { sl@0: float x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamp_f32_sse, clamp_f32, OIL_IMPL_FLAG_SSE); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamp_f64_sse (double *dest, const double *src1, int n, const double *src2_1, sl@0: const double *src3_1) sl@0: { sl@0: __m128d xmm1, xmm2; sl@0: double min = *src2_1; sl@0: double max = *src3_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: double x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_pd(min); sl@0: xmm2 = _mm_set1_pd(max); sl@0: for (; n >= 2; n -= 2) { sl@0: __m128d xmm0; sl@0: xmm0 = _mm_loadu_pd(src1); sl@0: xmm0 = _mm_max_pd(xmm0, xmm1); sl@0: xmm0 = _mm_min_pd(xmm0, xmm2); sl@0: _mm_store_pd(dest, xmm0); sl@0: dest += 2; sl@0: src1 += 2; sl@0: } sl@0: for (; n > 0; n--) { sl@0: double x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamp_f64_sse, clamp_f64, sl@0: OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamplow_u8_sse (uint8_t *dest, const uint8_t *src1, int n, sl@0: const uint8_t *src2_1) sl@0: { sl@0: __m128i xmm1; sl@0: uint8_t min = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: uint8_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_epi8(min); sl@0: for (; n >= 16; n -= 16) { sl@0: __m128i xmm0; sl@0: xmm0 = _mm_loadu_si128((__m128i *)src1); sl@0: xmm0 = _mm_max_epu8(xmm0, xmm1); sl@0: _mm_store_si128((__m128i *)dest, xmm0); sl@0: dest += 16; sl@0: src1 += 16; sl@0: } sl@0: for (; n > 0; n--) { sl@0: uint8_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamplow_u8_sse, clamplow_u8, OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamplow_s16_sse (int16_t *dest, const int16_t *src1, int n, sl@0: const int16_t *src2_1) sl@0: { sl@0: __m128i xmm1; sl@0: int16_t min = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: int16_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_epi16(min); sl@0: for (; n >= 8; n -= 8) { sl@0: __m128i xmm0; sl@0: xmm0 = _mm_loadu_si128((__m128i *)src1); sl@0: xmm0 = _mm_max_epi16(xmm0, xmm1); sl@0: _mm_store_si128((__m128i *)dest, xmm0); sl@0: dest += 8; sl@0: src1 += 8; sl@0: } sl@0: for (; n > 0; n--) { sl@0: int16_t x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamplow_s16_sse, clamplow_s16, OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamplow_f32_sse (float *dest, const float *src1, int n, const float *src2_1) sl@0: { sl@0: __m128 xmm1; sl@0: float min = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: float x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set_ps1(min); sl@0: for (; n >= 4; n -= 4) { sl@0: __m128 xmm0; sl@0: xmm0 = _mm_loadu_ps(src1); sl@0: xmm0 = _mm_max_ps(xmm0, xmm1); sl@0: _mm_store_ps(dest, xmm0); sl@0: dest += 4; sl@0: src1 += 4; sl@0: } sl@0: for (; n > 0; n--) { sl@0: float x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamplow_f32_sse, clamplow_f32, OIL_IMPL_FLAG_SSE); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamplow_f64_sse (double *dest, const double *src1, int n, const double *src2_1) sl@0: { sl@0: __m128d xmm1; sl@0: double min = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: double x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_pd(min); sl@0: for (; n >= 2; n -= 2) { sl@0: __m128d xmm0; sl@0: xmm0 = _mm_loadu_pd(src1); sl@0: xmm0 = _mm_max_pd(xmm0, xmm1); sl@0: _mm_store_pd(dest, xmm0); sl@0: dest += 2; sl@0: src1 += 2; sl@0: } sl@0: for (; n > 0; n--) { sl@0: double x = *src1++; sl@0: if (x < min) sl@0: x = min; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamplow_f64_sse, clamplow_f64, sl@0: OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamphigh_u8_sse (uint8_t *dest, const uint8_t *src1, int n, sl@0: const uint8_t *src2_1) sl@0: { sl@0: __m128i xmm1; sl@0: uint8_t max = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: uint8_t x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_epi8(max); sl@0: for (; n >= 16; n -= 16) { sl@0: __m128i xmm0; sl@0: xmm0 = _mm_loadu_si128((__m128i *)src1); sl@0: xmm0 = _mm_min_epu8(xmm0, xmm1); sl@0: _mm_store_si128((__m128i *)dest, xmm0); sl@0: dest += 16; sl@0: src1 += 16; sl@0: } sl@0: for (; n > 0; n--) { sl@0: uint8_t x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamphigh_u8_sse, clamphigh_u8, OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamphigh_s16_sse (int16_t *dest, const int16_t *src1, int n, sl@0: const int16_t *src2_1) sl@0: { sl@0: __m128i xmm1; sl@0: int16_t max = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: int16_t x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_epi16(max); sl@0: for (; n >= 8; n -= 8) { sl@0: __m128i xmm0; sl@0: xmm0 = _mm_loadu_si128((__m128i *)src1); sl@0: xmm0 = _mm_min_epi16(xmm0, xmm1); sl@0: _mm_store_si128((__m128i *)dest, xmm0); sl@0: dest += 8; sl@0: src1 += 8; sl@0: } sl@0: for (; n > 0; n--) { sl@0: int16_t x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamphigh_s16_sse, clamphigh_s16, OIL_IMPL_FLAG_SSE2); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamphigh_f32_sse (float *dest, const float *src1, int n, const float *src2_1) sl@0: { sl@0: __m128 xmm1; sl@0: float max = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: float x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set_ps1(max); sl@0: for (; n >= 4; n -= 4) { sl@0: __m128 xmm0; sl@0: xmm0 = _mm_loadu_ps(src1); sl@0: xmm0 = _mm_min_ps(xmm0, xmm1); sl@0: _mm_store_ps(dest, xmm0); sl@0: dest += 4; sl@0: src1 += 4; sl@0: } sl@0: for (; n > 0; n--) { sl@0: float x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamphigh_f32_sse, clamphigh_f32, OIL_IMPL_FLAG_SSE); sl@0: sl@0: SSE_FUNCTION static void sl@0: clamphigh_f64_sse (double *dest, const double *src1, int n, const double *src2_1) sl@0: { sl@0: __m128d xmm1; sl@0: double max = *src2_1; sl@0: sl@0: /* Initial operations to align the destination pointer */ sl@0: for (; ((long)dest & 15) && (n > 0); n--) { sl@0: double x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: xmm1 = _mm_set1_pd(max); sl@0: for (; n >= 2; n -= 2) { sl@0: __m128d xmm0; sl@0: xmm0 = _mm_loadu_pd(src1); sl@0: xmm0 = _mm_min_pd(xmm0, xmm1); sl@0: _mm_store_pd(dest, xmm0); sl@0: dest += 2; sl@0: src1 += 2; sl@0: } sl@0: for (; n > 0; n--) { sl@0: double x = *src1++; sl@0: if (x > max) sl@0: x = max; sl@0: *dest++ = x; sl@0: } sl@0: } sl@0: OIL_DEFINE_IMPL_FULL (clamphigh_f64_sse, clamphigh_f64, sl@0: OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2); sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamp_u8_sse, clamp_u8() { sl@0: return &_oil_function_impl_clamp_u8_sse, clamp_u8; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamp_s16_sse, clamp_s16() { sl@0: return &_oil_function_impl_clamp_s16_sse, clamp_s16; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamp_f32_sse, clamp_f32() { sl@0: return &_oil_function_impl_clamp_f32_sse, clamp_f32; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamp_f64_sse, clamp_f64() { sl@0: return &_oil_function_impl_clamp_f64_sse, clamp_f64; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamplow_u8_sse, clamplow_u8() { sl@0: return &_oil_function_impl_clamplow_u8_sse, clamplow_u8; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamplow_s16_sse, clamplow_s16() { sl@0: return &_oil_function_impl_clamplow_s16_sse, clamplow_s16; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamplow_f32_sse, clamplow_f32() { sl@0: return &_oil_function_impl_clamplow_f32_sse, clamplow_f32; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamplow_f64_sse, clamplow_f64() { sl@0: return &_oil_function_impl_clamplow_f64_sse, clamplow_f64; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamphigh_u8_sse, clamphigh_u8() { sl@0: return &_oil_function_impl_clamphigh_u8_sse, clamphigh_u8; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamphigh_s16_sse, clamphigh_s16() { sl@0: return &_oil_function_impl_clamphigh_s16_sse, clamphigh_s16; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamphigh_f32_sse, clamphigh_f32() { sl@0: return &_oil_function_impl_clamphigh_f32_sse, clamphigh_f32; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_clamphigh_f64_sse, clamphigh_f64() { sl@0: return &_oil_function_impl_clamphigh_f64_sse, clamphigh_f64; sl@0: } sl@0: #endif sl@0: