os/ossrv/genericopenlibs/liboil/src/i386/diff8x8_i386.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /*
     2  * LIBOIL - Library of Optimized Inner Loops
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
     4  * All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions
     8  * are met:
     9  * 1. Redistributions of source code must retain the above copyright
    10  *    notice, this list of conditions and the following disclaimer.
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  *    notice, this list of conditions and the following disclaimer in the
    13  *    documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    25  * POSSIBILITY OF SUCH DAMAGE.
    26  */
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    28 
    29 #ifdef HAVE_CONFIG_H
    30 #include "config.h"
    31 #endif
    32 
    33 #include <liboil/liboilfunction.h>
    34 
    35 OIL_DECLARE_CLASS (diff8x8_s16_u8);
    36 OIL_DECLARE_CLASS (diff8x8_const128_s16_u8);
    37 OIL_DECLARE_CLASS (diff8x8_average_s16_u8);
    38 
    39 
    40 static void
    41 diff8x8_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2)
    42 {
    43 #if !defined(__WINSCW__) && !defined(__WINS__)      
    44   __asm__ __volatile__ (
    45     "  pxor        %%mm7, %%mm7     \n\t" 
    46 
    47 #define LOOP \
    48     "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */ \
    49     "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr */ \
    50     "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */ \
    51     "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */ \
    52     /* convert from UINT8 to INT16 */ \
    53     "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */ \
    54     "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr) */ \
    55     "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */ \
    56     "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr) */ \
    57     /* start calculation */ \
    58     "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ReconPtr */ \
    59     "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ReconPtr */ \
    60     "  movq        %%mm0,  (%2)     \n\t" /* write answer out */ \
    61     "  movq        %%mm2, 8(%2)     \n\t" /* write answer out */ \
    62     /* Increment pointers */ \
    63     "  add         $16, %2          \n\t" \
    64     "  add         %3, %0           \n\t" \
    65     "  add         %4, %1           \n\t"
    66 
    67     LOOP
    68     LOOP
    69     LOOP
    70     LOOP
    71     LOOP
    72     LOOP
    73     LOOP
    74     LOOP
    75 #undef LOOP
    76 
    77     "  emms                         \n\t"
    78 
    79      : "+r" (src1),
    80        "+r" (src2),
    81        "+r" (dest)
    82      : "m" (ss1),
    83        "m" (ss2) 
    84      : "memory"
    85   );
    86 #endif  
    87 }
    88 OIL_DEFINE_IMPL_FULL (diff8x8_s16_u8_mmx, diff8x8_s16_u8, OIL_IMPL_FLAG_MMX);
    89 
    90 static void
    91 diff8x8_const128_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1)
    92 {
    93 #if !defined(__WINSCW__) && !defined(__WINS__)      
    94   const int16_t tmp[4] = { 0x0080, 0x0080, 0x0080, 0x0080 };
    95 
    96   __asm__ __volatile__ (
    97     "  pxor        %%mm7, %%mm7     \n\t" 
    98     "  movq        (%3), %%mm1  \n\t"
    99 
   100 #define LOOP \
   101     "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */ \
   102     "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */ \
   103     /* convert from UINT8 to INT16 */ \
   104     "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */ \
   105     "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */ \
   106     /* start calculation */ \
   107     "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - 128 */ \
   108     "  psubw       %%mm1, %%mm2     \n\t" /* mm2 = FiltPtr - 128 */ \
   109     "  movq        %%mm0,  (%1)     \n\t" /* write answer out */ \
   110     "  movq        %%mm2, 8(%1)     \n\t" /* write answer out */ \
   111     /* Increment pointers */ \
   112     "  add         $16, %1           \n\t" \
   113     "  add         %2, %0           \n\t"
   114 
   115     LOOP
   116     LOOP
   117     LOOP
   118     LOOP
   119     LOOP
   120     LOOP
   121     LOOP
   122     LOOP
   123 #undef LOOP
   124 
   125     "  emms                         \n\t"
   126 
   127      : "+r" (src1),
   128        "+r" (dest)
   129      : "r" (ss1),
   130        "r" (tmp)
   131      : "memory"
   132   );
   133 #endif  
   134 }
   135 OIL_DEFINE_IMPL_FULL (diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8, OIL_IMPL_FLAG_MMX);
   136 
   137 static void
   138 diff8x8_average_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2, uint8_t *src3)
   139 {
   140 #if !defined(__WINSCW__) && !defined(__WINS__)      
   141   __asm__ __volatile__ (
   142     "  pxor        %%mm7, %%mm7     \n\t" 
   143 
   144 #define LOOP \
   145     "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */ \
   146     "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr1 */ \
   147     "  movq        (%2), %%mm4      \n\t" /* mm1 = ReconPtr2 */ \
   148     "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */ \
   149     "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */ \
   150     "  movq        %%mm4, %%mm5     \n\t" /* dup to prepare for up conversion */ \
   151     /* convert from UINT8 to INT16 */ \
   152     "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */ \
   153     "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr1) */ \
   154     "  punpcklbw   %%mm7, %%mm4     \n\t" /* mm1 = INT16(ReconPtr2) */ \
   155     "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */ \
   156     "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr1) */ \
   157     "  punpckhbw   %%mm7, %%mm5     \n\t" /* mm3 = INT16(ReconPtr2) */ \
   158     /* average ReconPtr1 and ReconPtr2 */ \
   159     "  paddw       %%mm4, %%mm1     \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ \
   160     "  paddw       %%mm5, %%mm3     \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ \
   161     "  psrlw       $1, %%mm1        \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \
   162     "  psrlw       $1, %%mm3        \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \
   163     "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
   164     "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
   165     "  movq        %%mm0,  (%3)     \n\t" /* write answer out */ \
   166     "  movq        %%mm2, 8(%3)     \n\t" /* write answer out */ \
   167     /* Increment pointers */ \
   168     "  add         $16, %3           \n\t" \
   169     "  add         %4, %0           \n\t" \
   170     "  add         %5, %1           \n\t" \
   171     "  add         %5, %2           \n\t"
   172 
   173     LOOP
   174     LOOP
   175     LOOP
   176     LOOP
   177     LOOP
   178     LOOP
   179     LOOP
   180     LOOP
   181 #undef LOOP
   182 
   183     "  emms                         \n\t"
   184 
   185      : "+r" (src1),
   186        "+r" (src2),
   187        "+r" (src3),
   188        "+r" (dest)
   189      : "m" (ss1),
   190        "m" (ss2) 
   191      : "memory"
   192   );
   193 #endif  
   194 }
   195 OIL_DEFINE_IMPL_FULL (diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8, OIL_IMPL_FLAG_MMX);
   196 
   197 
   198 
   199 #ifdef	__SYMBIAN32__
   200  
   201 OilFunctionImpl* __oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8() {
   202 		return &_oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8;
   203 }
   204 #endif
   205 
   206 #ifdef	__SYMBIAN32__
   207  
   208 OilFunctionImpl* __oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8() {
   209 		return &_oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8;
   210 }
   211 #endif
   212 
   213 #ifdef	__SYMBIAN32__
   214  
   215 OilFunctionImpl* __oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8() {
   216 		return &_oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8;
   217 }
   218 #endif
   219