sl@0: /* sl@0: * LIBOIL - Library of Optimized Inner Loops sl@0: * Copyright (c) 2003,2004 David A. Schleef sl@0: * All rights reserved. sl@0: * sl@0: * Redistribution and use in source and binary forms, with or without sl@0: * modification, are permitted provided that the following conditions sl@0: * are met: sl@0: * 1. Redistributions of source code must retain the above copyright sl@0: * notice, this list of conditions and the following disclaimer. sl@0: * 2. Redistributions in binary form must reproduce the above copyright sl@0: * notice, this list of conditions and the following disclaimer in the sl@0: * documentation and/or other materials provided with the distribution. sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR sl@0: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED sl@0: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE sl@0: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, sl@0: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES sl@0: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR sl@0: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) sl@0: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, sl@0: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING sl@0: * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE sl@0: * POSSIBILITY OF SUCH DAMAGE. sl@0: */ sl@0: //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. sl@0: sl@0: #ifdef HAVE_CONFIG_H sl@0: #include "config.h" sl@0: #endif sl@0: sl@0: #include sl@0: #include "liboil/simdpack/simdpack.h" sl@0: #include sl@0: sl@0: #define ABS(x) ((x)>0 ? (x) : -(x)) sl@0: #define DSP_OP_ABS_DIFF(a,b) ABS((((int)(a)) - ((int)(b)))) sl@0: sl@0: /** sl@0: * oil_rowsad8x8_u8: sl@0: * @d_1: sl@0: * @s1_8x8: sl@0: * @s2_8x8: sl@0: * sl@0: * Calculates the sum of absolute differences between @s1_8x8 and @s1_8s8 sl@0: * for the first 4 elements of the first row, and the sum of absolute sl@0: * differences for the last 4 elements of the first row, and places the sl@0: * maximum of those values in @dest. sl@0: * sl@0: * FIXME: This function is declared incorrectly. sl@0: */ sl@0: OIL_DEFINE_CLASS (rowsad8x8_u8, sl@0: "uint32_t *d_1, uint8_t *s1_8x8, uint8_t *s2_8x8"); sl@0: /** sl@0: * oil_colsad8x8_u8: sl@0: * @d_1: sl@0: * @s1_8x8: sl@0: * @s2_8x8: sl@0: * sl@0: * Divides the 8x8 block into 16 1x4 regions, and calculates the sl@0: * sum of absolute differences between @s1_8x8 and @s2_8x8 for sl@0: * each region. The maximum of the results in each region is sl@0: * placed in @d_1. sl@0: */ sl@0: OIL_DEFINE_CLASS (colsad8x8_u8, sl@0: "uint32_t *d_1, uint8_t *s1_8x8, int ss1, uint8_t *s2_8x8, int ss2"); sl@0: sl@0: static void sl@0: rowsad8x8_u8_ref (uint32_t *dest, uint8_t *src1, uint8_t *src2) sl@0: { sl@0: uint32_t SadValue; sl@0: uint32_t SadValue1; sl@0: sl@0: SadValue = DSP_OP_ABS_DIFF (src1[0], src2[0]) + sl@0: DSP_OP_ABS_DIFF (src1[1], src2[1]) + sl@0: DSP_OP_ABS_DIFF (src1[2], src2[2]) + sl@0: DSP_OP_ABS_DIFF (src1[3], src2[3]); sl@0: sl@0: SadValue1 = DSP_OP_ABS_DIFF (src1[4], src2[4]) + sl@0: DSP_OP_ABS_DIFF (src1[5], src2[5]) + sl@0: DSP_OP_ABS_DIFF (src1[6], src2[6]) + sl@0: DSP_OP_ABS_DIFF (src1[7], src2[7]); sl@0: sl@0: *dest = (SadValue > SadValue1) ? SadValue : SadValue1; sl@0: } sl@0: OIL_DEFINE_IMPL_REF (rowsad8x8_u8_ref, rowsad8x8_u8); sl@0: sl@0: static void sl@0: colsad8x8_u8_ref (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2) sl@0: { sl@0: uint32_t SadValue[8] = {0,0,0,0,0,0,0,0}; sl@0: uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0}; sl@0: uint32_t MaxSad = 0; sl@0: uint32_t i; sl@0: sl@0: for ( i = 0; i < 4; i++ ){ sl@0: SadValue[0] += ABS(src1[0] - src2[0]); sl@0: SadValue[1] += ABS(src1[1] - src2[1]); sl@0: SadValue[2] += ABS(src1[2] - src2[2]); sl@0: SadValue[3] += ABS(src1[3] - src2[3]); sl@0: SadValue[4] += ABS(src1[4] - src2[4]); sl@0: SadValue[5] += ABS(src1[5] - src2[5]); sl@0: SadValue[6] += ABS(src1[6] - src2[6]); sl@0: SadValue[7] += ABS(src1[7] - src2[7]); sl@0: sl@0: src1 += ss1; sl@0: src2 += ss2; sl@0: } sl@0: sl@0: for ( i = 0; i < 4; i++ ){ sl@0: SadValue2[0] += ABS(src1[0] - src2[0]); sl@0: SadValue2[1] += ABS(src1[1] - src2[1]); sl@0: SadValue2[2] += ABS(src1[2] - src2[2]); sl@0: SadValue2[3] += ABS(src1[3] - src2[3]); sl@0: SadValue2[4] += ABS(src1[4] - src2[4]); sl@0: SadValue2[5] += ABS(src1[5] - src2[5]); sl@0: SadValue2[6] += ABS(src1[6] - src2[6]); sl@0: SadValue2[7] += ABS(src1[7] - src2[7]); sl@0: sl@0: src1 += ss1; sl@0: src2 += ss2; sl@0: } sl@0: sl@0: for ( i = 0; i < 8; i++ ){ sl@0: if ( SadValue[i] > MaxSad ) sl@0: MaxSad = SadValue[i]; sl@0: if ( SadValue2[i] > MaxSad ) sl@0: MaxSad = SadValue2[i]; sl@0: } sl@0: sl@0: *dest = MaxSad; sl@0: } sl@0: OIL_DEFINE_IMPL_REF (colsad8x8_u8_ref, colsad8x8_u8); sl@0: sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionClass* __oil_function_class_rowsad8x8_u8() { sl@0: return &_oil_function_class_rowsad8x8_u8; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionClass* __oil_function_class_colsad8x8_u8() { sl@0: return &_oil_function_class_colsad8x8_u8; sl@0: } sl@0: #endif sl@0: sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_rowsad8x8_u8_ref() { sl@0: return &_oil_function_impl_rowsad8x8_u8_ref; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_colsad8x8_u8_ref() { sl@0: return &_oil_function_impl_colsad8x8_u8_ref; sl@0: } sl@0: #endif sl@0: sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: EXPORT_C void** _oil_function_class_ptr_rowsad8x8_u8 () { sl@0: oil_function_class_ptr_rowsad8x8_u8 = __oil_function_class_rowsad8x8_u8(); sl@0: return &oil_function_class_ptr_rowsad8x8_u8->func; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: EXPORT_C void** _oil_function_class_ptr_colsad8x8_u8 () { sl@0: oil_function_class_ptr_colsad8x8_u8 = __oil_function_class_colsad8x8_u8(); sl@0: return &oil_function_class_ptr_colsad8x8_u8->func; sl@0: } sl@0: #endif sl@0: