os/ossrv/genericopenlibs/liboil/src/i386/trans8x8_i386.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
 * LIBOIL - Library of Optimized Inner Loops
sl@0
     3
 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
sl@0
     4
 * All rights reserved.
sl@0
     5
 *
sl@0
     6
 * Redistribution and use in source and binary forms, with or without
sl@0
     7
 * modification, are permitted provided that the following conditions
sl@0
     8
 * are met:
sl@0
     9
 * 1. Redistributions of source code must retain the above copyright
sl@0
    10
 *    notice, this list of conditions and the following disclaimer.
sl@0
    11
 * 2. Redistributions in binary form must reproduce the above copyright
sl@0
    12
 *    notice, this list of conditions and the following disclaimer in the
sl@0
    13
 *    documentation and/or other materials provided with the distribution.
sl@0
    14
 * 
sl@0
    15
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
sl@0
    16
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
sl@0
    17
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
sl@0
    18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
sl@0
    19
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
sl@0
    20
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
sl@0
    21
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
sl@0
    22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
sl@0
    23
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
sl@0
    24
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
sl@0
    25
 * POSSIBILITY OF SUCH DAMAGE.
sl@0
    26
 */
sl@0
    27
//Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
sl@0
    28
sl@0
    29
#ifdef HAVE_CONFIG_H
sl@0
    30
#include "config.h"
sl@0
    31
#endif
sl@0
    32
sl@0
    33
#include <liboil/liboilfunction.h>
sl@0
    34
#include <math.h>
sl@0
    35
sl@0
    36
OIL_DECLARE_CLASS(trans8x8_u16);
sl@0
    37
sl@0
    38
/* this could use additional work. */
sl@0
    39
static void
sl@0
    40
trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
sl@0
    41
{
sl@0
    42
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
    43
  asm volatile (
sl@0
    44
      "  leal (%3,%3,2),%%eax \n"         // UBER 0:
sl@0
    45
      "  movq (%1), %%mm0 \n"             // UBER 1:
sl@0
    46
      "  movq (%1,%3,2), %%mm2 \n"        // UBER 2: 
sl@0
    47
      "  movq %%mm0, %%mm4 \n"            // UBER 3: 1
sl@0
    48
      "  movq %%mm2, %%mm5 \n"            // UBER 4: 2
sl@0
    49
      "  punpcklwd (%1,%3), %%mm0 \n"     // UBER 5: 1
sl@0
    50
      "  punpcklwd (%1,%%eax), %%mm2 \n"  // UBER 6: 0 2
sl@0
    51
      "  punpckhwd (%1,%3), %%mm4 \n"     // UBER 7: 3
sl@0
    52
      "  punpckhwd (%1,%%eax), %%mm5 \n"  // UBER 8: 4
sl@0
    53
      "  movq %%mm0, %%mm1 \n"            // UBER 9: 5
sl@0
    54
      "  movq %%mm4, %%mm3 \n"            // UBER 10: 7
sl@0
    55
      "  punpckldq %%mm2, %%mm0 \n"       // UBER 11: 5 6
sl@0
    56
      "  punpckldq %%mm5, %%mm4 \n"       // UBER 12: 7 8
sl@0
    57
      "  punpckhdq %%mm2, %%mm1 \n"       // UBER 13: 6 9
sl@0
    58
      "  punpckhdq %%mm5, %%mm3 \n"       // UBER 14: 9 10
sl@0
    59
      "  leal (%2,%2,2),%%eax \n"         // UBER 15: 8
sl@0
    60
      "  movq %%mm0, 0(%0) \n"            // UBER 16: 11
sl@0
    61
      "  movq %%mm1, (%0,%2) \n"          // UBER 17: 13
sl@0
    62
      "  movq %%mm4, (%0,%2,2) \n"        // UBER 18: 12
sl@0
    63
      "  movq %%mm3, (%0,%%eax) \n"       // UBER 19: 14 15
sl@0
    64
sl@0
    65
      "  leal (%3,%3,2),%%eax \n"
sl@0
    66
      "  movq 8(%1), %%mm0 \n"
sl@0
    67
      "  movq 8(%1,%3,2), %%mm2 \n"
sl@0
    68
      "  movq %%mm0, %%mm4 \n"
sl@0
    69
      "  movq %%mm2, %%mm5 \n"
sl@0
    70
      "  punpcklwd 8(%1,%3), %%mm0 \n"
sl@0
    71
      "  punpcklwd 8(%1,%%eax), %%mm2 \n"
sl@0
    72
      "  punpckhwd 8(%1,%3), %%mm4 \n"
sl@0
    73
      "  punpckhwd 8(%1,%%eax), %%mm5 \n"
sl@0
    74
      "  movq %%mm0, %%mm1 \n"
sl@0
    75
      "  movq %%mm4, %%mm3 \n"
sl@0
    76
      "  punpckldq %%mm2, %%mm0 \n"
sl@0
    77
      "  punpckldq %%mm5, %%mm4 \n"
sl@0
    78
      "  punpckhdq %%mm2, %%mm1 \n"
sl@0
    79
      "  punpckhdq %%mm5, %%mm3 \n"
sl@0
    80
      "  leal (%2,%2,2),%%eax \n"
sl@0
    81
      "  leal (%0,%2,4),%0 \n"
sl@0
    82
      "  movq %%mm0, 0(%0) \n"
sl@0
    83
      "  movq %%mm1, (%0,%2) \n"
sl@0
    84
      "  movq %%mm4, (%0,%2,2) \n"
sl@0
    85
      "  movq %%mm3, (%0,%%eax) \n"
sl@0
    86
sl@0
    87
      "  leal (%1,%3,4),%1 \n"
sl@0
    88
      "  leal (%3,%3,2),%%eax \n"
sl@0
    89
      "  movq 0(%1), %%mm0 \n"
sl@0
    90
      "  movq 0(%1,%3,2), %%mm2 \n"
sl@0
    91
      "  movq %%mm0, %%mm4 \n"
sl@0
    92
      "  movq %%mm2, %%mm5 \n"
sl@0
    93
      "  punpcklwd 0(%1,%3), %%mm0 \n"
sl@0
    94
      "  punpcklwd 0(%1,%%eax), %%mm2 \n"
sl@0
    95
      "  punpckhwd 0(%1,%3), %%mm4 \n"
sl@0
    96
      "  punpckhwd 0(%1,%%eax), %%mm5 \n"
sl@0
    97
      "  movq %%mm0, %%mm1 \n"
sl@0
    98
      "  movq %%mm4, %%mm3 \n"
sl@0
    99
      "  punpckldq %%mm2, %%mm0 \n"
sl@0
   100
      "  punpckldq %%mm5, %%mm4 \n"
sl@0
   101
      "  punpckhdq %%mm2, %%mm1 \n"
sl@0
   102
      "  punpckhdq %%mm5, %%mm3 \n"
sl@0
   103
      "  leal (%2,%2,2),%%eax \n"
sl@0
   104
      "  neg %2 \n"
sl@0
   105
      "  leal (%0,%2,4),%0 \n"
sl@0
   106
      "  neg %2 \n"
sl@0
   107
      "  movq %%mm0, 8(%0) \n"
sl@0
   108
      "  movq %%mm1, 8(%0,%2) \n"
sl@0
   109
      "  movq %%mm4, 8(%0,%2,2) \n"
sl@0
   110
      "  movq %%mm3, 8(%0,%%eax) \n"
sl@0
   111
sl@0
   112
      "  leal (%3,%3,2),%%eax \n"
sl@0
   113
      "  movq 8(%1), %%mm0 \n"
sl@0
   114
      "  movq 8(%1,%3,2), %%mm2 \n"
sl@0
   115
      "  movq %%mm0, %%mm4 \n"
sl@0
   116
      "  movq %%mm2, %%mm5 \n"
sl@0
   117
      "  punpcklwd 8(%1,%3), %%mm0 \n"
sl@0
   118
      "  punpcklwd 8(%1,%%eax), %%mm2 \n"
sl@0
   119
      "  punpckhwd 8(%1,%3), %%mm4 \n"
sl@0
   120
      "  punpckhwd 8(%1,%%eax), %%mm5 \n"
sl@0
   121
      "  movq %%mm0, %%mm1 \n"
sl@0
   122
      "  movq %%mm4, %%mm3 \n"
sl@0
   123
      "  punpckldq %%mm2, %%mm0 \n"
sl@0
   124
      "  punpckldq %%mm5, %%mm4 \n"
sl@0
   125
      "  punpckhdq %%mm2, %%mm1 \n"
sl@0
   126
      "  punpckhdq %%mm5, %%mm3 \n"
sl@0
   127
      "  leal (%2,%2,2),%%eax \n"
sl@0
   128
      "  leal (%0,%2,4),%0 \n"
sl@0
   129
      "  movq %%mm0, 8(%0) \n"
sl@0
   130
      "  movq %%mm1, 8(%0,%2) \n"
sl@0
   131
      "  movq %%mm4, 8(%0,%2,2) \n"
sl@0
   132
      "  movq %%mm3, 8(%0,%%eax) \n"
sl@0
   133
      "  emms \n"
sl@0
   134
      : "+r" (dest), "+r" (src), "+r" (dstr), "+r" (sstr)
sl@0
   135
      :
sl@0
   136
      : "eax");
sl@0
   137
#endif
sl@0
   138
}
sl@0
   139
OIL_DEFINE_IMPL_FULL (trans8x8_u16_mmx, trans8x8_u16, OIL_IMPL_FLAG_MMX);
sl@0
   140
sl@0
   141
static void
sl@0
   142
trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
sl@0
   143
{
sl@0
   144
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   145
  int saved_ebx = 0;
sl@0
   146
  asm (
sl@0
   147
      "  movl %%ebx, %4 \n"
sl@0
   148
      "  movl %0, %%ecx \n"
sl@0
   149
      "  movl %2, %%ebx \n"
sl@0
   150
      "  movl %1, %%edx \n"
sl@0
   151
      "  lea (%%ecx,%%edx,8), %%esi \n"
sl@0
   152
      "  sub %%edx, %%esi\n "
sl@0
   153
      "  movl $7, %%edi \n"
sl@0
   154
      "1: \n"
sl@0
   155
sl@0
   156
      "  mov (%%ebx), %%ax \n"
sl@0
   157
      "  mov %%ax,(%%ecx) \n"
sl@0
   158
      "  mov 2(%%ebx), %%ax \n"
sl@0
   159
      "  mov %%ax,(%%ecx,%%edx,1) \n"
sl@0
   160
      "  mov 4(%%ebx), %%ax \n"
sl@0
   161
      "  mov %%ax,(%%ecx,%%edx,2) \n"
sl@0
   162
      "  mov 8(%%ebx), %%ax \n"
sl@0
   163
      "  mov %%ax,(%%ecx,%%edx,4) \n"
sl@0
   164
sl@0
   165
      "  neg %%edx \n"
sl@0
   166
sl@0
   167
      "  mov 6(%%ebx), %%ax \n"
sl@0
   168
      "  mov %%ax,(%%esi,%%edx,4) \n"
sl@0
   169
      "  mov 10(%%ebx), %%ax \n"
sl@0
   170
      "  mov %%ax,(%%esi,%%edx,2) \n"
sl@0
   171
      "  mov 12(%%ebx), %%ax \n"
sl@0
   172
      "  mov %%ax,(%%esi,%%edx,1) \n"
sl@0
   173
      "  mov 14(%%ebx), %%ax \n"
sl@0
   174
      "  mov %%ax,(%%esi) \n"
sl@0
   175
sl@0
   176
      "  neg %%edx \n"
sl@0
   177
      "  add %3, %%ebx \n"
sl@0
   178
      "  add $2, %%ecx \n"
sl@0
   179
      "  add $2, %%esi \n"
sl@0
   180
sl@0
   181
      "  dec %%edi \n"
sl@0
   182
      "  jge 1b \n"
sl@0
   183
      "  movl %4, %%ebx \n"
sl@0
   184
      :
sl@0
   185
      : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (saved_ebx)
sl@0
   186
      : "eax", "ecx", "edx", "esi", "edi");
sl@0
   187
#endif
sl@0
   188
}
sl@0
   189
OIL_DEFINE_IMPL (trans8x8_u16_asm1, trans8x8_u16);
sl@0
   190
sl@0
   191
static void
sl@0
   192
trans8x8_u16_asm2 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
sl@0
   193
{
sl@0
   194
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   195
  int i;
sl@0
   196
  int saved_ebx = 0;
sl@0
   197
  asm (
sl@0
   198
      "  movl %%ebx, %5 \n"
sl@0
   199
      "  movl %0, %%ecx \n"
sl@0
   200
      "  movl %2, %%ebx \n"
sl@0
   201
      "  movl %1, %%edx \n"
sl@0
   202
      "  lea (%%ecx,%%edx,8), %%esi \n"
sl@0
   203
      "  sub %%edx, %%esi\n "
sl@0
   204
      "  movl $7, %4 \n"
sl@0
   205
      "  movl %%edx, %%edi \n"
sl@0
   206
      "  negl %%edi \n"
sl@0
   207
      "1: \n"
sl@0
   208
sl@0
   209
      "  movl (%%ebx), %%eax \n"
sl@0
   210
      "  mov %%ax,(%%ecx) \n"
sl@0
   211
      "  shr $16, %%eax \n"
sl@0
   212
      "  mov %%ax,(%%ecx,%%edx,1) \n"
sl@0
   213
sl@0
   214
      "  movl 4(%%ebx), %%eax \n"
sl@0
   215
      "  mov %%ax,(%%ecx,%%edx,2) \n"
sl@0
   216
      "  shr $16, %%eax \n"
sl@0
   217
      "  mov %%ax,(%%esi,%%edi,4) \n"
sl@0
   218
sl@0
   219
      "  movl 8(%%ebx), %%eax \n"
sl@0
   220
      "  mov %%ax,(%%ecx,%%edx,4) \n"
sl@0
   221
      "  shr $16, %%eax \n"
sl@0
   222
      "  mov %%ax,(%%esi,%%edi,2) \n"
sl@0
   223
sl@0
   224
      "  movl 12(%%ebx), %%eax \n"
sl@0
   225
      "  mov %%ax,(%%esi,%%edi,1) \n"
sl@0
   226
      "  shr $16, %%eax \n"
sl@0
   227
      "  mov %%ax,(%%esi) \n"
sl@0
   228
sl@0
   229
      "  add %3, %%ebx \n"
sl@0
   230
      "  add $2, %%ecx \n"
sl@0
   231
      "  add $2, %%esi \n"
sl@0
   232
sl@0
   233
      "  decl %4 \n"
sl@0
   234
      "  jge 1b \n"
sl@0
   235
      "  movl %5, %%ebx \n"
sl@0
   236
      :
sl@0
   237
      : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (i), "m" (saved_ebx)
sl@0
   238
      : "eax", "ecx", "edx", "esi", "edi");
sl@0
   239
#endif
sl@0
   240
}
sl@0
   241
OIL_DEFINE_IMPL (trans8x8_u16_asm2, trans8x8_u16);
sl@0
   242
sl@0
   243
sl@0
   244
sl@0
   245
#ifdef	__SYMBIAN32__
sl@0
   246
 
sl@0
   247
OilFunctionImpl* __oil_function_impl_trans8x8_u16_mmx, trans8x8_u16() {
sl@0
   248
		return &_oil_function_impl_trans8x8_u16_mmx, trans8x8_u16;
sl@0
   249
}
sl@0
   250
#endif
sl@0
   251
sl@0
   252
sl@0
   253
sl@0
   254
#ifdef	__SYMBIAN32__
sl@0
   255
 
sl@0
   256
OilFunctionImpl* __oil_function_impl_trans8x8_u16_asm1() {
sl@0
   257
		return &_oil_function_impl_trans8x8_u16_asm1;
sl@0
   258
}
sl@0
   259
#endif
sl@0
   260
sl@0
   261
#ifdef	__SYMBIAN32__
sl@0
   262
 
sl@0
   263
OilFunctionImpl* __oil_function_impl_trans8x8_u16_asm2() {
sl@0
   264
		return &_oil_function_impl_trans8x8_u16_asm2;
sl@0
   265
}
sl@0
   266
#endif
sl@0
   267