os/ossrv/genericopenlibs/liboil/src/i386/error8x8_i386.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200 (2014-06-10)
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  * LIBOIL - Library of Optimized Inner Loops
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
     4  * All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions
     8  * are met:
     9  * 1. Redistributions of source code must retain the above copyright
    10  *    notice, this list of conditions and the following disclaimer.
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  *    notice, this list of conditions and the following disclaimer in the
    13  *    documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    25  * POSSIBILITY OF SUCH DAMAGE.
    26  */
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    28 
    29 #ifdef HAVE_CONFIG_H
    30 #include "config.h"
    31 #endif
    32 
    33 #include <liboil/liboilfunction.h>
    34 
    35 OIL_DECLARE_CLASS (err_intra8x8_u8);
    36 OIL_DECLARE_CLASS (err_inter8x8_u8);
    37 OIL_DECLARE_CLASS (err_inter8x8_u8_avg);
    38 
    39 static void
    40 err_intra8x8_u8_mmx (uint32_t *dest, uint8_t *src1, int ss1)
    41 {
    42 #if !defined(__WINSCW__) && !defined(__WINS__)      
    43   uint32_t  xsum;
    44   uint32_t  xxsum;
    45 
    46   __asm__ __volatile__ (
    47     "  pxor        %%mm5, %%mm5     \n\t"
    48     "  pxor        %%mm6, %%mm6     \n\t"
    49     "  pxor        %%mm7, %%mm7     \n\t"
    50     "  mov         $8, %%edi        \n\t"
    51     "1:                             \n\t"
    52     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
    53     "  movq        %%mm0, %%mm2     \n\t"
    54 
    55     "  punpcklbw   %%mm6, %%mm0     \n\t"
    56     "  punpckhbw   %%mm6, %%mm2     \n\t"
    57 
    58     "  paddw       %%mm0, %%mm5     \n\t"
    59     "  paddw       %%mm2, %%mm5     \n\t"
    60 
    61     "  pmaddwd     %%mm0, %%mm0     \n\t"
    62     "  pmaddwd     %%mm2, %%mm2     \n\t"
    63     
    64     "  paddd       %%mm0, %%mm7     \n\t"
    65     "  paddd       %%mm2, %%mm7     \n\t"
    66 
    67     "  add         %3, %2           \n\t"	/* Inc pointer into src data */
    68 
    69     "  dec         %%edi            \n\t"
    70     "  jnz 1b                       \n\t"
    71 
    72     "  movq        %%mm5, %%mm0     \n\t"
    73     "  psrlq       $32, %%mm5       \n\t"
    74     "  paddw       %%mm0, %%mm5     \n\t"
    75     "  movq        %%mm5, %%mm0     \n\t"
    76     "  psrlq       $16, %%mm5       \n\t"
    77     "  paddw       %%mm0, %%mm5     \n\t"
    78     "  movd        %%mm5, %%edi     \n\t"
    79     "  movswl       %%di, %%edi      \n\t"
    80     "  movl        %%edi, %0        \n\t"
    81 
    82     "  movq        %%mm7, %%mm0     \n\t"
    83     "  psrlq       $32, %%mm7       \n\t"
    84     "  paddd       %%mm0, %%mm7     \n\t"
    85     "  movd        %%mm7, %1        \n\t"
    86     "  emms                         \n\t"
    87 
    88      : "=r" (xsum),
    89        "=r" (xxsum),
    90        "+r" (src1) 
    91      : "r" (ss1)
    92      : "edi", "memory"
    93   );
    94 
    95   /* Compute population variance as mis-match metric. */
    96   *dest = (((xxsum<<6) - xsum*xsum)); 
    97 #endif
    98 }
    99 OIL_DEFINE_IMPL_FULL (err_intra8x8_u8_mmx, err_intra8x8_u8, OIL_IMPL_FLAG_MMX);
   100 
   101 static void
   102 err_inter8x8_u8_mmx (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2)
   103 {
   104   uint32_t  xsum;
   105   uint32_t  xxsum;
   106 #if !defined(__WINSCW__) && !defined(__WINS__)      
   107   __asm__ __volatile__ (
   108     "  pxor        %%mm5, %%mm5     \n\t"
   109     "  pxor        %%mm6, %%mm6     \n\t"
   110     "  pxor        %%mm7, %%mm7     \n\t"
   111     "  mov         $8, %%edi        \n\t"
   112     "1:                             \n\t"
   113     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
   114     "  movq        (%3), %%mm1      \n\t"
   115     "  movq        %%mm0, %%mm2     \n\t"
   116     "  movq        %%mm1, %%mm3     \n\t"
   117 
   118     "  punpcklbw   %%mm6, %%mm0     \n\t"
   119     "  punpcklbw   %%mm6, %%mm1     \n\t"
   120     "  punpckhbw   %%mm6, %%mm2     \n\t"
   121     "  punpckhbw   %%mm6, %%mm3     \n\t"
   122 
   123     "  psubsw      %%mm1, %%mm0     \n\t"
   124     "  psubsw      %%mm3, %%mm2     \n\t"
   125 
   126     "  paddw       %%mm0, %%mm5     \n\t"
   127     "  paddw       %%mm2, %%mm5     \n\t"
   128 
   129     "  pmaddwd     %%mm0, %%mm0     \n\t"
   130     "  pmaddwd     %%mm2, %%mm2     \n\t"
   131     
   132     "  paddd       %%mm0, %%mm7     \n\t"
   133     "  paddd       %%mm2, %%mm7     \n\t"
   134 
   135     "  add         %4, %2           \n\t"	/* Inc pointer into src data */
   136     "  add         %5, %3           \n\t"	/* Inc pointer into ref data */
   137 
   138     "  dec         %%edi            \n\t"
   139     "  jnz 1b                       \n\t"
   140 
   141     "  movq        %%mm5, %%mm0     \n\t"
   142     "  psrlq       $32, %%mm5       \n\t"
   143     "  paddw       %%mm0, %%mm5     \n\t"
   144     "  movq        %%mm5, %%mm0     \n\t"
   145     "  psrlq       $16, %%mm5       \n\t"
   146     "  paddw       %%mm0, %%mm5     \n\t"
   147     "  movd        %%mm5, %%edi     \n\t"
   148     "  movswl       %%di, %%edi      \n\t"
   149     "  movl        %%edi, %0        \n\t"
   150 
   151     "  movq        %%mm7, %%mm0     \n\t"
   152     "  psrlq       $32, %%mm7       \n\t"
   153     "  paddd       %%mm0, %%mm7     \n\t"
   154     "  movd        %%mm7, %1        \n\t"
   155     "  emms                         \n\t"
   156 
   157      : "=m" (xsum),
   158        "=m" (xxsum),
   159        "+r" (src1), 
   160        "+r" (src2) 
   161      : "m" (ss1),
   162        "m" (ss2)
   163      : "edi", "memory"
   164   );
   165 
   166   /* Compute and return population variance as mis-match metric. */
   167   *dest = (((xxsum<<6) - xsum*xsum));
   168 #endif
   169 }
   170 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_mmx, err_inter8x8_u8, OIL_IMPL_FLAG_MMX);
   171 
   172 static void
   173 err_inter8x8_u8_avg_mmx (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2)
   174 {
   175 #if !defined(__WINSCW__) && !defined(__WINS__)      
   176   uint32_t xsum;
   177   uint32_t xxsum;
   178 
   179   __asm__ __volatile__ (
   180     "  pcmpeqd     %%mm4, %%mm4     \n\t"	/* fefefefefefefefe in mm4 */
   181     "  paddb       %%mm4, %%mm4     \n\t"
   182     "  pxor        %%mm5, %%mm5     \n\t"
   183     "  pxor        %%mm6, %%mm6     \n\t"
   184     "  pxor        %%mm7, %%mm7     \n\t"
   185     "  mov         $8, %%edi        \n\t"
   186     "1:                             \n\t"
   187     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
   188 
   189     "  movq        (%3), %%mm2      \n\t"
   190     "  movq        (%4), %%mm3      \n\t"	/* take average of mm2 and mm3 */
   191     "  movq        %%mm2, %%mm1     \n\t"
   192     "  pand        %%mm3, %%mm1     \n\t"
   193     "  pxor        %%mm2, %%mm3     \n\t"
   194     "  pand        %%mm4, %%mm3     \n\t"
   195     "  psrlq       $1, %%mm3        \n\t"
   196     "  paddb       %%mm3, %%mm1     \n\t"
   197 
   198     "  movq        %%mm0, %%mm2     \n\t"
   199     "  movq        %%mm1, %%mm3     \n\t"
   200 
   201     "  punpcklbw   %%mm6, %%mm0     \n\t"
   202     "  punpcklbw   %%mm6, %%mm1     \n\t"
   203     "  punpckhbw   %%mm6, %%mm2     \n\t"
   204     "  punpckhbw   %%mm6, %%mm3     \n\t"
   205 
   206     "  psubsw      %%mm1, %%mm0     \n\t"
   207     "  psubsw      %%mm3, %%mm2     \n\t"
   208 
   209     "  paddw       %%mm0, %%mm5     \n\t"
   210     "  paddw       %%mm2, %%mm5     \n\t"
   211 
   212     "  pmaddwd     %%mm0, %%mm0     \n\t"
   213     "  pmaddwd     %%mm2, %%mm2     \n\t"
   214     
   215     "  paddd       %%mm0, %%mm7     \n\t"
   216     "  paddd       %%mm2, %%mm7     \n\t"
   217 
   218     "  add         %5, %2           \n\t"	/* Inc pointer into src data */
   219     "  add         %6, %3           \n\t"	/* Inc pointer into ref data */
   220     "  add         %6, %4           \n\t"	/* Inc pointer into ref data */
   221 
   222     "  dec         %%edi            \n\t"
   223     "  jnz 1b                       \n\t"
   224 
   225     "  movq        %%mm5, %%mm0     \n\t"
   226     "  psrlq       $32, %%mm5       \n\t"
   227     "  paddw       %%mm0, %%mm5     \n\t"
   228     "  movq        %%mm5, %%mm0     \n\t"
   229     "  psrlq       $16, %%mm5       \n\t"
   230     "  paddw       %%mm0, %%mm5     \n\t"
   231     "  movd        %%mm5, %%edi     \n\t"
   232     "  movswl       %%di, %%edi      \n\t"
   233     "  movl        %%edi, %0        \n\t"
   234 
   235     "  movq        %%mm7, %%mm0     \n\t"
   236     "  psrlq       $32, %%mm7       \n\t"
   237     "  paddd       %%mm0, %%mm7     \n\t"
   238     "  movd        %%mm7, %1        \n\t"
   239     "  emms                         \n\t"
   240 
   241      : "=m" (xsum),
   242        "=m" (xxsum),
   243        "+r" (src1), 
   244        "+r" (src2),
   245        "+r" (src3) 
   246      : "m" (ss1),
   247        "m" (ss2)
   248      : "edi", "memory"
   249   );
   250 
   251   /* Compute and return population variance as mis-match metric. */
   252   *dest = (((xxsum<<6) - xsum*xsum));
   253 #endif
   254 }
   255 
   256 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg, OIL_IMPL_FLAG_MMX);
   257  
   258 #ifdef ENABLE_BROKEN_IMPLS
   259 static void
   260 err_inter8x8_u8_avg_mmxext (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2)
   261 {
   262   uint32_t xsum;
   263   uint32_t xxsum;
   264 
   265   __asm__ __volatile__ (
   266     "  pxor        %%mm4, %%mm4     \n\t"
   267     "  pxor        %%mm5, %%mm5     \n\t"
   268     "  mov $0x01010101, %%edi \n\t"
   269     "  movd %%edi, %%mm6 \n\t"
   270     "  punpcklbw %%mm6, %%mm6 \n\t"
   271     "  pxor        %%mm7, %%mm7     \n\t"
   272     "  mov         $8, %%edi        \n\t"
   273     "1:                             \n\t"
   274     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
   275 
   276     "  movq        (%3), %%mm2      \n\t"
   277     "  movq        (%4), %%mm1      \n\t"	/* take average of mm2 and mm1 */
   278     "  movq        %%mm1, %%mm3     \n\t"
   279     "  pavgb       %%mm2, %%mm1     \n\t"
   280     "  pxor        %%mm2, %%mm3     \n\t"
   281     "  pand        %%mm6, %%mm3     \n\t"
   282     "  psubb       %%mm3, %%mm1     \n\t"
   283 
   284     "  movq        %%mm0, %%mm2     \n\t"
   285     "  movq        %%mm1, %%mm3     \n\t"
   286 
   287     "  punpcklbw   %%mm4, %%mm0     \n\t"
   288     "  punpcklbw   %%mm4, %%mm1     \n\t"
   289     "  punpckhbw   %%mm4, %%mm2     \n\t"
   290     "  punpckhbw   %%mm4, %%mm3     \n\t"
   291 
   292     "  psubsw      %%mm1, %%mm0     \n\t"
   293     "  psubsw      %%mm3, %%mm2     \n\t"
   294 
   295     "  paddw       %%mm0, %%mm5     \n\t"
   296     "  paddw       %%mm2, %%mm5     \n\t"
   297 
   298     "  pmaddwd     %%mm0, %%mm0     \n\t"
   299     "  pmaddwd     %%mm2, %%mm2     \n\t"
   300     
   301     "  paddd       %%mm0, %%mm7     \n\t"
   302     "  paddd       %%mm2, %%mm7     \n\t"
   303 
   304     "  add         %5, %2           \n\t"	/* Inc pointer into src data */
   305     "  add         %6, %3           \n\t"	/* Inc pointer into ref data */
   306     "  add         %6, %4           \n\t"	/* Inc pointer into ref data */
   307 
   308     "  dec         %%edi            \n\t"
   309     "  jnz 1b                       \n\t"
   310 
   311     "  movq        %%mm5, %%mm0     \n\t"
   312     "  psrlq       $32, %%mm5       \n\t"
   313     "  paddw       %%mm0, %%mm5     \n\t"
   314     "  movq        %%mm5, %%mm0     \n\t"
   315     "  psrlq       $16, %%mm5       \n\t"
   316     "  paddw       %%mm0, %%mm5     \n\t"
   317     "  movd        %%mm5, %%edi     \n\t"
   318     "  movswl       %%di, %%edi      \n\t"
   319     "  movl        %%edi, %0        \n\t"
   320 
   321     "  movq        %%mm7, %%mm0     \n\t"
   322     "  psrlq       $32, %%mm7       \n\t"
   323     "  paddd       %%mm0, %%mm7     \n\t"
   324     "  movd        %%mm7, %1        \n\t"
   325     "  emms                         \n\t"
   326 
   327      : "=m" (xsum),
   328        "=m" (xxsum),
   329        "+r" (src1), 
   330        "+r" (src2),
   331        "+r" (src3) 
   332      : "m" (ss1),
   333        "m" (ss2)
   334      : "edi", "memory"
   335   );
   336 
   337   /* Compute and return population variance as mis-match metric. */
   338   *dest = (((xxsum<<6) - xsum*xsum));
   339 }
   340 
   341 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
   342 #endif
   343 
   344 
   345 
   346 #ifdef	__SYMBIAN32__
   347  
   348 OilFunctionImpl* __oil_function_impl_err_intra8x8_u8_mmx, err_intra8x8_u8() {
   349 		return &_oil_function_impl_err_intra8x8_u8_mmx, err_intra8x8_u8;
   350 }
   351 #endif
   352 
   353 #ifdef	__SYMBIAN32__
   354  
   355 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_mmx, err_inter8x8_u8() {
   356 		return &_oil_function_impl_err_inter8x8_u8_mmx, err_inter8x8_u8;
   357 }
   358 #endif
   359 
   360 #ifdef	__SYMBIAN32__
   361  
   362 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg() {
   363 		return &_oil_function_impl_err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg;
   364 }
   365 #endif
   366 
   367 #ifdef	__SYMBIAN32__
   368  
   369 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg() {
   370 		return &_oil_function_impl_err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg;
   371 }
   372 #endif
   373