os/ossrv/genericopenlibs/liboil/src/i386/recon8x8_i386.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  * LIBOIL - Library of Optimized Inner Loops
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
     4  * All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions
     8  * are met:
     9  * 1. Redistributions of source code must retain the above copyright
    10  *    notice, this list of conditions and the following disclaimer.
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  *    notice, this list of conditions and the following disclaimer in the
    13  *    documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    25  * POSSIBILITY OF SUCH DAMAGE.
    26  */
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    28 
    29 #ifdef HAVE_CONFIG_H
    30 #include "config.h"
    31 #endif
    32 
    33 #include <liboil/liboilfunction.h>
    34 #include "liboil/simdpack/simdpack.h"
    35 
    36 OIL_DECLARE_CLASS (recon8x8_intra);
    37 OIL_DECLARE_CLASS (recon8x8_inter);
    38 OIL_DECLARE_CLASS (recon8x8_inter2);
    39 
    40 const uint8_t c0x80[8] = {
    41   0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
    42 };
    43 
    44 static void
    45 recon8x8_intra_i386_mmx (uint8_t *dest, int ds, int16_t *change)
    46 {
    47 #if !defined(__WINSCW__) && !defined(__WINS__)      
    48   __asm__ __volatile__ (
    49     "  movq        (%3), %%mm0       \n\t" /* Set mm0 to 0x8080808080808080 */
    50 
    51     "  lea         128(%1), %%edi      \n\t" /* Endpoint in input buffer */
    52     "1:                                \n\t" 
    53     "  movq         (%1), %%mm2        \n\t" /* First four input values */
    54 
    55     "  packsswb    8(%1), %%mm2        \n\t" /* pack with next(high) four values */
    56     "  por         %%mm0, %%mm0        \n\t" 
    57     "  pxor        %%mm0, %%mm2        \n\t" /* Convert result to unsigned (same as add 128) */
    58     "  lea         16(%1), %1          \n\t" /* Step source buffer */
    59     "  cmp         %%edi, %1           \n\t" /* are we done */
    60 
    61     "  movq        %%mm2, (%0)         \n\t" /* store results */
    62 
    63     "  lea         (%0, %2), %0        \n\t" /* Step output buffer */
    64     "  jc          1b                  \n\t" /* Loop back if we are not done */
    65     "  emms                            \n\t"
    66       : "+r" (dest)
    67       : "r" (change),
    68         "r" (ds),
    69         "r" (c0x80)
    70       : "memory", "edi"
    71   );
    72 #endif  
    73 }
    74 
    75 OIL_DEFINE_IMPL_FULL (recon8x8_intra_i386_mmx, recon8x8_intra, OIL_IMPL_FLAG_MMX);
    76 
    77 #if 0
    78 static void
    79 recon8x8_inter_i386_mmx (uint8_t *dest, int ds, uint8_t *src, int ss, int16_t *change)
    80 {
    81   /* FIXME doesn't handle ss */
    82   __asm__ __volatile__ (
    83     "  pxor        %%mm0, %%mm0        \n\t"
    84     "  lea         128(%1), %%edi      \n\t"
    85 
    86     "1:                                \n\t"
    87     "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
    88 
    89     "  movq        (%1), %%mm4         \n\t" /* first 4 changes */
    90     "  movq        %%mm2, %%mm3        \n\t"
    91     "  movq        8(%1), %%mm5        \n\t" /* last 4 changes */
    92     "  punpcklbw   %%mm0, %%mm2        \n\t" /* turn first 4 refs into positive 16-bit #s */
    93     "  paddsw      %%mm4, %%mm2        \n\t" /* add in first 4 changes */
    94     "  punpckhbw   %%mm0, %%mm3        \n\t" /* turn last 4 refs into positive 16-bit #s */
    95     "  paddsw      %%mm5, %%mm3        \n\t" /* add in last 4 changes */
    96     "  add         %3, %2              \n\t" /* next row of reference pixels */
    97     "  packuswb    %%mm3, %%mm2        \n\t" /* pack result to unsigned 8-bit values */
    98     "  lea         16(%1), %1          \n\t" /* next row of changes */
    99     "  cmp         %%edi, %1           \n\t" /* are we done? */
   100 
   101     "  movq        %%mm2, (%0)         \n\t" /* store result */
   102 
   103     "  lea         (%0, %3), %0        \n\t" /* next row of output */
   104     "  jc          1b                  \n\t"
   105     "  emms                            \n\t"
   106       : "+r" (dest)
   107       : "r" (change),
   108         "r" (src),
   109         "r" (ds)
   110       : "memory", "edi"
   111   );
   112 }
   113 
   114 OIL_DEFINE_IMPL_FULL (recon8x8_inter_i386_mmx, recon8x8_inter, OIL_IMPL_FLAG_MMX);
   115 
   116 static void
   117 recon8x8_inter2_i386_mmx (uint8_t *dest, int ds, uint8_t *s1, int ss1, uint8_t *s2, int ss2, int16_t *change)
   118 {
   119   /* FIXME doesn't handle ss1, ss2 */
   120   __asm__ __volatile__ (
   121     "  pxor        %%mm0, %%mm0        \n\t"
   122     "  lea         128(%1), %%edi      \n\t"
   123 
   124     "1:                                \n\t"
   125     "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
   126     "  movq        (%3), %%mm4         \n\t" /* (+3 misaligned) 8 reference pixels */
   127 
   128     "  movq        %%mm2, %%mm3        \n\t"
   129     "  punpcklbw   %%mm0, %%mm2        \n\t" /* mm2 = start ref1 as positive 16-bit #s */
   130     "  movq        %%mm4, %%mm5        \n\t"
   131     "  movq        (%1), %%mm6         \n\t" /* first 4 changes */
   132     "  punpckhbw   %%mm0, %%mm3        \n\t" /* mm3 = end ref1 as positive 16-bit #s */
   133     "  movq        8(%1), %%mm7        \n\t" /* last 4 changes */
   134     "  punpcklbw   %%mm0, %%mm4        \n\t" /* mm4 = start ref2 as positive 16-bit #s */
   135     "  punpckhbw   %%mm0, %%mm5        \n\t" /* mm5 = end ref2 as positive 16-bit #s */
   136     "  paddw       %%mm4, %%mm2        \n\t" /* mm2 = start (ref1 + ref2) */
   137     "  paddw       %%mm5, %%mm3        \n\t" /* mm3 = end (ref1 + ref2) */
   138     "  psrlw       $1, %%mm2           \n\t" /* mm2 = start (ref1 + ref2)/2 */
   139     "  psrlw       $1, %%mm3           \n\t" /* mm3 = end (ref1 + ref2)/2 */
   140     "  paddw       %%mm6, %%mm2        \n\t" /* add changes to start */
   141     "  paddw       %%mm7, %%mm3        \n\t" /* add changes to end */
   142     "  lea         16(%1), %1          \n\t" /* next row of changes */
   143     "  packuswb    %%mm3, %%mm2        \n\t" /* pack start|end to unsigned 8-bit */
   144     "  add         %4, %2              \n\t" /* next row of reference pixels */
   145     "  add         %4, %3              \n\t" /* next row of reference pixels */
   146     "  movq        %%mm2, (%0)         \n\t" /* store result */
   147     "  add         %4, %0              \n\t" /* next row of output */
   148     "  cmp         %%edi, %1           \n\t" /* are we done? */
   149     "  jc          1b                  \n\t"
   150     "  emms                            \n\t"
   151       : "+r" (dest)
   152       : "r" (change),
   153         "r" (s1),
   154         "r" (s2),
   155         "m" (ds)
   156       : "memory", "edi"
   157   );
   158 }
   159 
   160 OIL_DEFINE_IMPL_FULL (recon8x8_inter2_i386_mmx, recon8x8_inter2, OIL_IMPL_FLAG_MMX);
   161 #endif
   162 
   163 
   164 #ifdef	__SYMBIAN32__
   165  
   166 OilFunctionImpl* __oil_function_impl_recon8x8_intra_i386_mmx, recon8x8_intra() {
   167 		return &_oil_function_impl_recon8x8_intra_i386_mmx, recon8x8_intra;
   168 }
   169 #endif
   170 
   171 #ifdef	__SYMBIAN32__
   172  
   173 OilFunctionImpl* __oil_function_impl_recon8x8_inter_i386_mmx, recon8x8_inter() {
   174 		return &_oil_function_impl_recon8x8_inter_i386_mmx, recon8x8_inter;
   175 }
   176 #endif
   177 
   178 #ifdef	__SYMBIAN32__
   179  
   180 OilFunctionImpl* __oil_function_impl_recon8x8_inter2_i386_mmx, recon8x8_inter2() {
   181 		return &_oil_function_impl_recon8x8_inter2_i386_mmx, recon8x8_inter2;
   182 }
   183 #endif
   184