sl@0: /* sl@0: * LIBOIL - Library of Optimized Inner Loops sl@0: * Copyright (c) 2003,2004 David A. Schleef sl@0: * All rights reserved. sl@0: * sl@0: * Redistribution and use in source and binary forms, with or without sl@0: * modification, are permitted provided that the following conditions sl@0: * are met: sl@0: * 1. Redistributions of source code must retain the above copyright sl@0: * notice, this list of conditions and the following disclaimer. sl@0: * 2. Redistributions in binary form must reproduce the above copyright sl@0: * notice, this list of conditions and the following disclaimer in the sl@0: * documentation and/or other materials provided with the distribution. sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR sl@0: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED sl@0: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE sl@0: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, sl@0: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES sl@0: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR sl@0: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) sl@0: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, sl@0: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING sl@0: * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE sl@0: * POSSIBILITY OF SUCH DAMAGE. sl@0: */ sl@0: //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. sl@0: sl@0: #ifdef HAVE_CONFIG_H sl@0: #include "config.h" sl@0: #endif sl@0: sl@0: #include sl@0: sl@0: sl@0: /** sl@0: * oil_err_intra8x8_u8: sl@0: * @d_1: sl@0: * @s1_8x8: sl@0: * @ss1: sl@0: * sl@0: * Calculates the sum of squared differences from the mean over sl@0: * @s1_8x8 and places the result in @d_1. This result is 64 times sl@0: * the variance of the mean of @s1_8x8. sl@0: */ sl@0: OIL_DEFINE_CLASS (err_intra8x8_u8, sl@0: "uint32_t *d_1, uint8_t *s1_8x8, int ss1"); sl@0: /** sl@0: * oil_err_inter8x8_u8: sl@0: * @d_1: sl@0: * @s1_8x8: sl@0: * @ss1: sl@0: * @s2_8x8: sl@0: * @ss2: sl@0: * sl@0: * Calculates an intermediate 8x8 block where each element is the sl@0: * difference between @s1_8x8 and @s2_8x8. sl@0: * The sum of squares of the difference of each element in the sl@0: * intermediate block and the mean of the intermediate block is sl@0: * placed into @d_1. This result is sl@0: * equal to 64 times the variance of the mean of the intermediate block. sl@0: */ sl@0: OIL_DEFINE_CLASS (err_inter8x8_u8, sl@0: "uint32_t *d_1, uint8_t *s1_8x8, int ss1, uint8_t *s2_8x8, int ss2"); sl@0: /** sl@0: * oil_err_inter8x8_u8_avg: sl@0: * @d_1: sl@0: * @s1_8x8: sl@0: * @ss1: sl@0: * @s2_8x8: sl@0: * @s3_8x8: sl@0: * @ss2: sl@0: * sl@0: * Calculates an intermediate 8x8 block where each element is the sl@0: * difference between @s1_8x8 and the average of @s2_8x8 and @s3_8x8. sl@0: * The sum of squares of the difference of each element in the sl@0: * intermediate block and the mean of the intermediate block is sl@0: * placed into @d_1. sl@0: * This result is 64 times the variance of the mean of the intermediate sl@0: * block. sl@0: * sl@0: * FIXME: This function is broken, since the reference function uses sl@0: * @ss2 as the stride for both @s2_8x8 and @s3_8x8. sl@0: */ sl@0: OIL_DEFINE_CLASS (err_inter8x8_u8_avg, sl@0: "uint32_t *d_1, uint8_t *s1_8x8, int ss1, uint8_t *s2_8x8, uint8_t *s3_8x8, int ss2"); sl@0: sl@0: #define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2) sl@0: #define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b))) sl@0: sl@0: static void sl@0: err_intra8x8_u8_ref (uint32_t *dest, uint8_t *src1, int ss1) sl@0: { sl@0: uint32_t i; sl@0: uint32_t xsum=0; sl@0: uint32_t xxsum=0; sl@0: sl@0: for (i=8; i; i--) { sl@0: /* Examine alternate pixel locations. */ sl@0: xsum += src1[0]; sl@0: xxsum += src1[0]*src1[0]; sl@0: xsum += src1[1]; sl@0: xxsum += src1[1]*src1[1]; sl@0: xsum += src1[2]; sl@0: xxsum += src1[2]*src1[2]; sl@0: xsum += src1[3]; sl@0: xxsum += src1[3]*src1[3]; sl@0: xsum += src1[4]; sl@0: xxsum += src1[4]*src1[4]; sl@0: xsum += src1[5]; sl@0: xxsum += src1[5]*src1[5]; sl@0: xsum += src1[6]; sl@0: xxsum += src1[6]*src1[6]; sl@0: xsum += src1[7]; sl@0: xxsum += src1[7]*src1[7]; sl@0: sl@0: /* Step to next row of block. */ sl@0: src1 += ss1; sl@0: } sl@0: /* Compute population variance as mis-match metric. */ sl@0: *dest = (((xxsum<<6) - xsum*xsum )); sl@0: } sl@0: OIL_DEFINE_IMPL_REF (err_intra8x8_u8_ref, err_intra8x8_u8); sl@0: sl@0: static void sl@0: err_inter8x8_u8_ref (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2) sl@0: { sl@0: uint32_t i; sl@0: uint32_t xsum=0; sl@0: uint32_t xxsum=0; sl@0: int32_t diff; sl@0: sl@0: for (i=8; i; i--) { sl@0: diff = DSP_OP_DIFF (src1[0], src2[0]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[1], src2[1]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[2], src2[2]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[3], src2[3]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[4], src2[4]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[5], src2[5]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[6], src2[6]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF (src1[7], src2[7]); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: /* Step to next row of block. */ sl@0: src1 += ss1; sl@0: src2 += ss2; sl@0: } sl@0: sl@0: /* Compute and return population variance as mis-match metric. */ sl@0: *dest = (((xxsum<<6) - xsum*xsum)); sl@0: } sl@0: OIL_DEFINE_IMPL_REF (err_inter8x8_u8_ref, err_inter8x8_u8); sl@0: sl@0: static void sl@0: err_inter8x8_u8_avg_ref (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2) sl@0: { sl@0: uint32_t i; sl@0: uint32_t xsum=0; sl@0: uint32_t xxsum=0; sl@0: int32_t diff; sl@0: sl@0: for (i=8; i; i--) { sl@0: diff = DSP_OP_DIFF(src1[0], DSP_OP_AVG (src2[0], src3[0])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[1], DSP_OP_AVG (src2[1], src3[1])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[2], DSP_OP_AVG (src2[2], src3[2])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[3], DSP_OP_AVG (src2[3], src3[3])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[4], DSP_OP_AVG (src2[4], src3[4])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[5], DSP_OP_AVG (src2[5], src3[5])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[6], DSP_OP_AVG (src2[6], src3[6])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: diff = DSP_OP_DIFF(src1[7], DSP_OP_AVG (src2[7], src3[7])); sl@0: xsum += diff; sl@0: xxsum += diff*diff; sl@0: sl@0: /* Step to next row of block. */ sl@0: src1 += ss1; sl@0: src2 += ss2; sl@0: src3 += ss2; sl@0: } sl@0: sl@0: /* Compute and return population variance as mis-match metric. */ sl@0: *dest = (((xxsum<<6) - xsum*xsum)); sl@0: } sl@0: sl@0: OIL_DEFINE_IMPL_REF (err_inter8x8_u8_avg_ref, err_inter8x8_u8_avg); sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionClass* __oil_function_class_err_intra8x8_u8() { sl@0: return &_oil_function_class_err_intra8x8_u8; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionClass* __oil_function_class_err_inter8x8_u8() { sl@0: return &_oil_function_class_err_inter8x8_u8; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionClass* __oil_function_class_err_inter8x8_u8_avg() { sl@0: return &_oil_function_class_err_inter8x8_u8_avg; sl@0: } sl@0: #endif sl@0: sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_err_intra8x8_u8_ref() { sl@0: return &_oil_function_impl_err_intra8x8_u8_ref; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_ref() { sl@0: return &_oil_function_impl_err_inter8x8_u8_ref; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_ref() { sl@0: return &_oil_function_impl_err_inter8x8_u8_avg_ref; sl@0: } sl@0: #endif sl@0: sl@0: sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: EXPORT_C void** _oil_function_class_ptr_err_intra8x8_u8 () { sl@0: oil_function_class_ptr_err_intra8x8_u8 = __oil_function_class_err_intra8x8_u8(); sl@0: return &oil_function_class_ptr_err_intra8x8_u8->func; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: EXPORT_C void** _oil_function_class_ptr_err_inter8x8_u8 () { sl@0: oil_function_class_ptr_err_inter8x8_u8 = __oil_function_class_err_inter8x8_u8(); sl@0: return &oil_function_class_ptr_err_inter8x8_u8->func; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: EXPORT_C void** _oil_function_class_ptr_err_inter8x8_u8_avg () { sl@0: oil_function_class_ptr_err_inter8x8_u8_avg = __oil_function_class_err_inter8x8_u8_avg(); sl@0: return &oil_function_class_ptr_err_inter8x8_u8_avg->func; sl@0: } sl@0: #endif sl@0: