os/ossrv/genericopenlibs/liboil/src/i386/composite_i386.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
 * LIBOIL - Library of Optimized Inner Loops
sl@0
     3
 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
sl@0
     4
 * All rights reserved.
sl@0
     5
 *
sl@0
     6
 * Redistribution and use in source and binary forms, with or without
sl@0
     7
 * modification, are permitted provided that the following conditions
sl@0
     8
 * are met:
sl@0
     9
 * 1. Redistributions of source code must retain the above copyright
sl@0
    10
 *    notice, this list of conditions and the following disclaimer.
sl@0
    11
 * 2. Redistributions in binary form must reproduce the above copyright
sl@0
    12
 *    notice, this list of conditions and the following disclaimer in the
sl@0
    13
 *    documentation and/or other materials provided with the distribution.
sl@0
    14
 * 
sl@0
    15
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
sl@0
    16
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
sl@0
    17
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
sl@0
    18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
sl@0
    19
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
sl@0
    20
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
sl@0
    21
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
sl@0
    22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
sl@0
    23
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
sl@0
    24
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
sl@0
    25
 * POSSIBILITY OF SUCH DAMAGE.
sl@0
    26
 */
sl@0
    27
//Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
sl@0
    28
sl@0
    29
#ifdef HAVE_CONFIG_H
sl@0
    30
#include "config.h"
sl@0
    31
#endif
sl@0
    32
sl@0
    33
#include <liboil/liboil.h>
sl@0
    34
#include <liboil/liboilfunction.h>
sl@0
    35
sl@0
    36
OIL_DECLARE_CLASS (composite_in_argb);
sl@0
    37
OIL_DECLARE_CLASS (composite_in_argb_const_src);
sl@0
    38
OIL_DECLARE_CLASS (composite_in_argb_const_mask);
sl@0
    39
OIL_DECLARE_CLASS (composite_over_argb);
sl@0
    40
OIL_DECLARE_CLASS (composite_over_argb_const_src);
sl@0
    41
OIL_DECLARE_CLASS (composite_add_argb);
sl@0
    42
OIL_DECLARE_CLASS (composite_add_argb_const_src);
sl@0
    43
OIL_DECLARE_CLASS (composite_in_over_argb);
sl@0
    44
OIL_DECLARE_CLASS (composite_in_over_argb_const_src);
sl@0
    45
OIL_DECLARE_CLASS (composite_in_over_argb_const_mask);
sl@0
    46
sl@0
    47
#if 0
sl@0
    48
static void
sl@0
    49
composite_in_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
sl@0
    50
{
sl@0
    51
  int i;
sl@0
    52
sl@0
    53
  for(i=0;i<n;i++){
sl@0
    54
    dest[i] = ARGB(
sl@0
    55
        COMPOSITE_IN(ARGB_A(src[i]), mask[i]),
sl@0
    56
        COMPOSITE_IN(ARGB_R(src[i]), mask[i]),
sl@0
    57
        COMPOSITE_IN(ARGB_G(src[i]), mask[i]),
sl@0
    58
        COMPOSITE_IN(ARGB_B(src[i]), mask[i]));
sl@0
    59
  }
sl@0
    60
}
sl@0
    61
OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb);
sl@0
    62
#endif
sl@0
    63
sl@0
    64
/*
sl@0
    65
 * This macro loads the constants:
sl@0
    66
 * mm7 = { 0, 0, 0, 0 }
sl@0
    67
 * mm6 = { 128, 128, 128, 128 }
sl@0
    68
 * mm5 = { 255, 255, 255, 255 }
sl@0
    69
 */
sl@0
    70
#define MMX_LOAD_CONSTANTS \
sl@0
    71
      "  pxor %%mm7, %%mm7\n" \
sl@0
    72
      "  movl $0x80808080, %%eax\n" \
sl@0
    73
      "  movd %%eax, %%mm6\n" \
sl@0
    74
      "  punpcklbw %%mm7, %%mm6\n" \
sl@0
    75
      "  movl $0xffffffff, %%eax\n" \
sl@0
    76
      "  movd %%eax, %%mm5\n" \
sl@0
    77
      "  punpcklbw %%mm7, %%mm5\n"
sl@0
    78
sl@0
    79
/*
sl@0
    80
 * a = muldiv255(a, b)
sl@0
    81
 *   a, b are unpacked
sl@0
    82
 *   destroys both registers
sl@0
    83
 *   requires mm6 set up as above
sl@0
    84
 */
sl@0
    85
#define MMX_MULDIV255(a,b) \
sl@0
    86
      "  pmullw %%" #b ", %%" #a "\n" \
sl@0
    87
      "  paddw %%mm6, %%" #a "\n" \
sl@0
    88
      "  movq %%" #a ", %%" #b "\n" \
sl@0
    89
      "  psrlw $8, %%" #b "\n" \
sl@0
    90
      "  paddw %%" #b ", %%" #a "\n" \
sl@0
    91
      "  psrlw $8, %%" #a "\n"
sl@0
    92
sl@0
    93
static void
sl@0
    94
composite_in_argb_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
sl@0
    95
{
sl@0
    96
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
    97
  __asm__ __volatile__ (
sl@0
    98
      MMX_LOAD_CONSTANTS
sl@0
    99
      "1:\n"
sl@0
   100
      "  movd (%2), %%mm0\n"
sl@0
   101
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   102
      "  pshufw $0x00, %%mm0, %%mm1\n"
sl@0
   103
sl@0
   104
      "  movd (%1), %%mm2\n"
sl@0
   105
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   106
sl@0
   107
      MMX_MULDIV255(mm2, mm1)
sl@0
   108
sl@0
   109
      "  packuswb %%mm2, %%mm2\n"
sl@0
   110
      "  movd %%mm2, (%0)\n"
sl@0
   111
      "  addl $4, %0\n"
sl@0
   112
      "  addl $4, %1\n"
sl@0
   113
      "  addl $1, %2\n"
sl@0
   114
      "  decl %3\n"
sl@0
   115
      "  jnz 1b\n"
sl@0
   116
      "  emms\n"
sl@0
   117
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
   118
      :
sl@0
   119
      :"eax");
sl@0
   120
#endif
sl@0
   121
}
sl@0
   122
OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   123
sl@0
   124
/* 
sl@0
   125
 * This is a different style than the others.  Should be moved elsewhere.
sl@0
   126
 */
sl@0
   127
static void
sl@0
   128
composite_in_argb_mmx2 (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
sl@0
   129
{
sl@0
   130
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   131
  __asm__ __volatile__ (
sl@0
   132
      MMX_LOAD_CONSTANTS
sl@0
   133
      "1:\n"
sl@0
   134
      "  movl (%2), %%eax\n"
sl@0
   135
      /* if alpha == 0, write a 0 */
sl@0
   136
      "  testl $0x000000ff, %%eax\n"
sl@0
   137
      "  je 2f\n"
sl@0
   138
      /* if alpha == 0xff, write src value */
sl@0
   139
      "  cmp $0xff, %%al\n"
sl@0
   140
      "  je 3f\n"
sl@0
   141
sl@0
   142
      "  movd %%eax, %%mm0\n"
sl@0
   143
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   144
      "  pshufw $0x00, %%mm0, %%mm1\n"
sl@0
   145
sl@0
   146
      "  movd (%1), %%mm2\n"
sl@0
   147
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   148
sl@0
   149
      MMX_MULDIV255(mm2, mm1)
sl@0
   150
sl@0
   151
      "  packuswb %%mm2, %%mm2\n"
sl@0
   152
      "  movd %%mm2, (%0)\n"
sl@0
   153
      "  jmp 4f\n"
sl@0
   154
      "2:\n"
sl@0
   155
      "  movl $0, (%0)\n"
sl@0
   156
      "  jmp 4f\n"
sl@0
   157
      "3:\n"
sl@0
   158
      "  movl (%1), %%eax\n"
sl@0
   159
      "  movl %%eax, (%0)\n"
sl@0
   160
      "4:\n"
sl@0
   161
      "  addl $4, %0\n"
sl@0
   162
      "  addl $4, %1\n"
sl@0
   163
      "  addl $1, %2\n"
sl@0
   164
      "  decl %3\n"
sl@0
   165
      "  jnz 1b\n"
sl@0
   166
      "  emms\n"
sl@0
   167
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
   168
      :
sl@0
   169
      :"eax");
sl@0
   170
#endif
sl@0
   171
}
sl@0
   172
OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx2, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   173
sl@0
   174
static void
sl@0
   175
composite_in_argb_const_src_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
sl@0
   176
{
sl@0
   177
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   178
  __asm__ __volatile__ (
sl@0
   179
      MMX_LOAD_CONSTANTS
sl@0
   180
      "  movd (%1), %%mm3\n"
sl@0
   181
      "  punpcklbw %%mm7, %%mm3\n"
sl@0
   182
      "1:\n"
sl@0
   183
      "  movd (%2), %%mm0\n"
sl@0
   184
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   185
      "  pshufw $0x00, %%mm0, %%mm1\n"
sl@0
   186
sl@0
   187
      "  movq %%mm3, %%mm2\n"
sl@0
   188
sl@0
   189
      MMX_MULDIV255(mm2, mm1)
sl@0
   190
sl@0
   191
      "  packuswb %%mm2, %%mm2\n"
sl@0
   192
      "  movd %%mm2, (%0)\n"
sl@0
   193
      "  addl $4, %0\n"
sl@0
   194
      "  addl $1, %2\n"
sl@0
   195
      "  decl %3\n"
sl@0
   196
      "  jnz 1b\n"
sl@0
   197
      "  emms\n"
sl@0
   198
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
   199
      :
sl@0
   200
      :"eax");
sl@0
   201
#endif
sl@0
   202
}
sl@0
   203
OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_mmx, composite_in_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   204
sl@0
   205
static void
sl@0
   206
composite_in_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
sl@0
   207
{
sl@0
   208
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   209
  __asm__ __volatile__ (
sl@0
   210
      MMX_LOAD_CONSTANTS
sl@0
   211
      "  movd (%2), %%mm0\n"
sl@0
   212
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   213
      "  pshufw $0x00, %%mm0, %%mm3\n"
sl@0
   214
      "1:\n"
sl@0
   215
      "  movq %%mm3, %%mm1\n"
sl@0
   216
      "  movd (%1), %%mm2\n"
sl@0
   217
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   218
sl@0
   219
      MMX_MULDIV255(mm2, mm1)
sl@0
   220
sl@0
   221
      "  packuswb %%mm2, %%mm2\n"
sl@0
   222
      "  movd %%mm2, (%0)\n"
sl@0
   223
      "  addl $4, %0\n"
sl@0
   224
      "  addl $4, %1\n"
sl@0
   225
      "  decl %3\n"
sl@0
   226
      "  jnz 1b\n"
sl@0
   227
      "  emms\n"
sl@0
   228
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
   229
      :
sl@0
   230
      :"eax");
sl@0
   231
#endif
sl@0
   232
}
sl@0
   233
OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_mmx, composite_in_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   234
sl@0
   235
static void
sl@0
   236
composite_over_argb_mmx (uint32_t *dest, uint32_t *src, int n)
sl@0
   237
{
sl@0
   238
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   239
  __asm__ __volatile__ (
sl@0
   240
      MMX_LOAD_CONSTANTS
sl@0
   241
      "1:\n"
sl@0
   242
      "  movl (%1), %%eax\n"
sl@0
   243
      "  testl $0xff000000, %%eax\n"
sl@0
   244
      "  jz 2f\n"
sl@0
   245
sl@0
   246
      "  movd %%eax, %%mm0\n"
sl@0
   247
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   248
      "  pshufw $0xff, %%mm0, %%mm1\n"
sl@0
   249
      "  pxor %%mm5, %%mm1\n"
sl@0
   250
sl@0
   251
      "  movd (%0), %%mm2\n"
sl@0
   252
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   253
sl@0
   254
      MMX_MULDIV255(mm2, mm1)
sl@0
   255
sl@0
   256
      "  paddw %%mm0, %%mm2\n"
sl@0
   257
      "  packuswb %%mm2, %%mm2\n"
sl@0
   258
sl@0
   259
      "  movd %%mm2, (%0)\n"
sl@0
   260
      "2:\n"
sl@0
   261
      "  addl $4, %0\n"
sl@0
   262
      "  addl $4, %1\n"
sl@0
   263
      "  decl %2\n"
sl@0
   264
      "  jnz 1b\n"
sl@0
   265
      "  emms\n"
sl@0
   266
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   267
      :
sl@0
   268
      :"eax");
sl@0
   269
#endif
sl@0
   270
}
sl@0
   271
OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   272
sl@0
   273
/* unroll 2 */
sl@0
   274
static void
sl@0
   275
composite_over_argb_mmx_2 (uint32_t *dest, uint32_t *src, int n)
sl@0
   276
{
sl@0
   277
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   278
  __asm__ __volatile__ (
sl@0
   279
      MMX_LOAD_CONSTANTS
sl@0
   280
sl@0
   281
      "  testl $0x1, %2\n"
sl@0
   282
      "  jz 2f\n"
sl@0
   283
sl@0
   284
      "  movl (%1), %%eax\n"
sl@0
   285
      "  testl $0xff000000, %%eax\n"
sl@0
   286
      "  jz 1f\n"
sl@0
   287
sl@0
   288
      "  movd %%eax, %%mm0\n"
sl@0
   289
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   290
      "  pshufw $0xff, %%mm0, %%mm1\n"
sl@0
   291
      "  pxor %%mm5, %%mm1\n"
sl@0
   292
sl@0
   293
      "  movd (%0), %%mm2\n"
sl@0
   294
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   295
      "  pmullw %%mm1, %%mm2\n"
sl@0
   296
      "  paddw %%mm6, %%mm2\n"
sl@0
   297
      "  movq %%mm2, %%mm1\n"
sl@0
   298
      "  psrlw $8, %%mm1\n"
sl@0
   299
      "  paddw %%mm1, %%mm2\n"
sl@0
   300
      "  psrlw $8, %%mm2\n"
sl@0
   301
sl@0
   302
      "  paddw %%mm0, %%mm2\n"
sl@0
   303
      "  packuswb %%mm2, %%mm2\n"
sl@0
   304
sl@0
   305
      "  movd %%mm2, (%0)\n"
sl@0
   306
sl@0
   307
      "1:\n"
sl@0
   308
      "  addl $4, %0\n"
sl@0
   309
      "  addl $4, %1\n"
sl@0
   310
sl@0
   311
      "2:\n"
sl@0
   312
      "  shr $1, %2\n"
sl@0
   313
      "  jz 5f\n"
sl@0
   314
      "3:\n"
sl@0
   315
      "  movl (%1), %%eax\n"
sl@0
   316
      "  orl 4(%1), %%eax\n"
sl@0
   317
      "  testl $0xff000000, %%eax\n"
sl@0
   318
      "  jz 4f\n"
sl@0
   319
sl@0
   320
      "  movd (%1), %%mm0\n"
sl@0
   321
      "  movd (%0), %%mm2\n"
sl@0
   322
sl@0
   323
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   324
      "   movd 4(%1), %%mm3\n"
sl@0
   325
sl@0
   326
      "  pshufw $0xff, %%mm0, %%mm1\n"
sl@0
   327
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   328
sl@0
   329
      "  pxor %%mm5, %%mm1\n"
sl@0
   330
      "   movd 4(%0), %%mm4\n"
sl@0
   331
sl@0
   332
      "  pmullw %%mm1, %%mm2\n"
sl@0
   333
      "   punpcklbw %%mm7, %%mm3\n"
sl@0
   334
sl@0
   335
      "  paddw %%mm6, %%mm2\n"
sl@0
   336
      "   punpcklbw %%mm7, %%mm4\n"
sl@0
   337
sl@0
   338
      "  movq %%mm2, %%mm1\n"
sl@0
   339
      "   pshufw $0xff, %%mm3, %%mm7\n"
sl@0
   340
sl@0
   341
      "  psrlw $8, %%mm1\n"
sl@0
   342
      "   pxor %%mm5, %%mm7\n"
sl@0
   343
sl@0
   344
      "  paddw %%mm1, %%mm2\n"
sl@0
   345
      "   pmullw %%mm7, %%mm4\n"
sl@0
   346
sl@0
   347
      "  psrlw $8, %%mm2\n"
sl@0
   348
      "   paddw %%mm6, %%mm4\n"
sl@0
   349
sl@0
   350
      "  paddw %%mm0, %%mm2\n"
sl@0
   351
      "   movq %%mm4, %%mm7\n"
sl@0
   352
sl@0
   353
      "  packuswb %%mm2, %%mm2\n"
sl@0
   354
      "   psrlw $8, %%mm7\n"
sl@0
   355
sl@0
   356
      "  movd %%mm2, (%0)\n"
sl@0
   357
      "   paddw %%mm7, %%mm4\n"
sl@0
   358
sl@0
   359
      "   psrlw $8, %%mm4\n"
sl@0
   360
      "   paddw %%mm3, %%mm4\n"
sl@0
   361
      "   packuswb %%mm4, %%mm4\n"
sl@0
   362
      "   movd %%mm4, 4(%0)\n"
sl@0
   363
sl@0
   364
      "  pxor %%mm7, %%mm7\n"
sl@0
   365
      "4:\n"
sl@0
   366
      "  addl $8, %0\n"
sl@0
   367
      "  addl $8, %1\n"
sl@0
   368
      "  decl %2\n"
sl@0
   369
      "  jnz 3b\n"
sl@0
   370
      "5:\n"
sl@0
   371
      "  emms\n"
sl@0
   372
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   373
      :
sl@0
   374
      :"eax");
sl@0
   375
#endif
sl@0
   376
}
sl@0
   377
OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_2, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   378
sl@0
   379
/* replace pshufw with punpck */
sl@0
   380
static void
sl@0
   381
composite_over_argb_mmx_3 (uint32_t *dest, uint32_t *src, int n)
sl@0
   382
{
sl@0
   383
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   384
  __asm__ __volatile__ (
sl@0
   385
      MMX_LOAD_CONSTANTS
sl@0
   386
      "1:\n"
sl@0
   387
      "  movl (%1), %%eax\n"
sl@0
   388
      "  testl $0xff000000, %%eax\n"
sl@0
   389
      "  jz 2f\n"
sl@0
   390
sl@0
   391
      "  movd %%eax, %%mm0\n"
sl@0
   392
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   393
      "  movq %%mm0, %%mm1\n"
sl@0
   394
      "  punpckhwd %%mm1, %%mm1\n"
sl@0
   395
      "  punpckhdq %%mm1, %%mm1\n"
sl@0
   396
      "  pxor %%mm5, %%mm1\n"
sl@0
   397
sl@0
   398
      "  movd (%0), %%mm2\n"
sl@0
   399
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   400
      "  pmullw %%mm1, %%mm2\n"
sl@0
   401
      "  paddw %%mm6, %%mm2\n"
sl@0
   402
      "  movq %%mm2, %%mm1\n"
sl@0
   403
      "  psrlw $8, %%mm1\n"
sl@0
   404
      "  paddw %%mm1, %%mm2\n"
sl@0
   405
      "  psrlw $8, %%mm2\n"
sl@0
   406
sl@0
   407
      "  paddw %%mm0, %%mm2\n"
sl@0
   408
      "  packuswb %%mm2, %%mm2\n"
sl@0
   409
      "  movd %%mm2, (%0)\n"
sl@0
   410
sl@0
   411
      "2:\n"
sl@0
   412
      "  addl $4, %0\n"
sl@0
   413
      "  addl $4, %1\n"
sl@0
   414
      "  decl %2\n"
sl@0
   415
      "  jnz 1b\n"
sl@0
   416
      "  emms\n"
sl@0
   417
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   418
      :
sl@0
   419
      :"eax");
sl@0
   420
#endif
sl@0
   421
}
sl@0
   422
OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_3, composite_over_argb, OIL_IMPL_FLAG_MMX);
sl@0
   423
sl@0
   424
/* written for gromit */
sl@0
   425
static void
sl@0
   426
composite_over_argb_mmx_4 (uint32_t *dest, uint32_t *src, int n)
sl@0
   427
{
sl@0
   428
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   429
  __asm__ __volatile__ ("  pxor %%mm7, %%mm7\n"   // mm7 = { 0, 0, 0, 0 }
sl@0
   430
      "  movl $0x80808080, %%eax\n"
sl@0
   431
      "  movd %%eax, %%mm6\n"  // mm6 = { 128, 128, 128, 128 }
sl@0
   432
      "  punpcklbw %%mm7, %%mm6\n"
sl@0
   433
      "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
sl@0
   434
      "  movd %%eax, %%mm5\n"
sl@0
   435
      "  punpcklbw %%mm7, %%mm5\n"
sl@0
   436
      "  movl $0x02020202, %%eax\n"
sl@0
   437
      "  movd %%eax, %%mm4\n"
sl@0
   438
      "  punpcklbw %%mm7, %%mm4\n"
sl@0
   439
      "  paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
sl@0
   440
      "1:\n"
sl@0
   441
      "  movl (%1), %%eax\n"
sl@0
   442
      "  testl $0xff000000, %%eax\n"
sl@0
   443
      "  jz 2f\n"
sl@0
   444
sl@0
   445
      "  movd %%eax, %%mm0\n"
sl@0
   446
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   447
      "  pshufw $0xff, %%mm0, %%mm1\n"
sl@0
   448
      "  pxor %%mm5, %%mm1\n"
sl@0
   449
sl@0
   450
      "  movd (%0), %%mm2\n"
sl@0
   451
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   452
      "  pmullw %%mm1, %%mm2\n"
sl@0
   453
      "  paddw %%mm6, %%mm2\n"
sl@0
   454
      "  pmulhuw %%mm4, %%mm2\n"
sl@0
   455
sl@0
   456
      "  paddw %%mm0, %%mm2\n"
sl@0
   457
      "  packuswb %%mm2, %%mm2\n"
sl@0
   458
sl@0
   459
      "  movd %%mm2, (%0)\n"
sl@0
   460
      "2:\n"
sl@0
   461
      "  addl $4, %0\n"
sl@0
   462
      "  addl $4, %1\n"
sl@0
   463
      "  subl $1, %2\n"
sl@0
   464
      "  jnz 1b\n"
sl@0
   465
      "  emms\n"
sl@0
   466
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   467
      :
sl@0
   468
      :"eax");
sl@0
   469
#endif
sl@0
   470
}
sl@0
   471
OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_4, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   472
sl@0
   473
static void
sl@0
   474
composite_over_argb_mmx_5 (uint32_t *dest, uint32_t *src, int n)
sl@0
   475
{
sl@0
   476
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   477
  __asm__ __volatile__ ("  pxor %%mm7, %%mm7\n"   // mm7 = { 0, 0, 0, 0 }
sl@0
   478
      "  movl $0x80808080, %%eax\n"
sl@0
   479
      "  movd %%eax, %%mm6\n"  // mm6 = { 128, 128, 128, 128 }
sl@0
   480
      "  punpcklbw %%mm7, %%mm6\n"
sl@0
   481
#if 0
sl@0
   482
      "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
sl@0
   483
      "  movd %%eax, %%mm5\n"
sl@0
   484
      "  punpcklbw %%mm7, %%mm5\n"
sl@0
   485
#else
sl@0
   486
      "  pcmpeqw %%mm5, %%mm5\n"
sl@0
   487
      "  psrlw $8, %%mm5\n" // mm5 = { 255, 255, 255, 255 }
sl@0
   488
#endif
sl@0
   489
      "  movl $0x02020202, %%eax\n"
sl@0
   490
      "  movd %%eax, %%mm4\n"
sl@0
   491
      "  punpcklbw %%mm7, %%mm4\n"
sl@0
   492
      "  paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
sl@0
   493
      "1:\n"
sl@0
   494
      "  movd (%1), %%mm0\n"
sl@0
   495
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   496
      "  xor %%eax, %%eax\n"
sl@0
   497
      "  pextrw $3, %%mm0, %%eax\n"
sl@0
   498
      "  test %%eax, %%eax\n"
sl@0
   499
      "  jz 2f\n"
sl@0
   500
sl@0
   501
      "  pshufw $0xff, %%mm0, %%mm1\n"
sl@0
   502
      "  pxor %%mm5, %%mm1\n"
sl@0
   503
sl@0
   504
      "  movd (%0), %%mm2\n"
sl@0
   505
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   506
      "  pmullw %%mm1, %%mm2\n"
sl@0
   507
      "  paddw %%mm6, %%mm2\n"
sl@0
   508
      "  pmulhuw %%mm4, %%mm2\n"
sl@0
   509
sl@0
   510
      "  paddw %%mm0, %%mm2\n"
sl@0
   511
      "  packuswb %%mm2, %%mm2\n"
sl@0
   512
sl@0
   513
      "  movd %%mm2, (%0)\n"
sl@0
   514
      "2:\n"
sl@0
   515
      "  addl $4, %0\n"
sl@0
   516
      "  addl $4, %1\n"
sl@0
   517
      "  subl $1, %2\n"
sl@0
   518
      "  jnz 1b\n"
sl@0
   519
      "  emms\n"
sl@0
   520
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   521
      :
sl@0
   522
      :"eax");
sl@0
   523
#endif
sl@0
   524
}
sl@0
   525
OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_5, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   526
sl@0
   527
static void
sl@0
   528
composite_over_argb_sse2 (uint32_t *dest, uint32_t *src, int n)
sl@0
   529
{
sl@0
   530
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   531
  __asm__ __volatile__ ("  pxor %%xmm7, %%xmm7\n"   // mm7 = { 0, 0, 0, 0 }
sl@0
   532
      "  movl $0x80808080, %%eax\n"
sl@0
   533
      "  movd %%eax, %%xmm6\n"  // mm6 = { 128, 128, 128, 128 }
sl@0
   534
      "  punpcklbw %%xmm7, %%xmm6\n"
sl@0
   535
      "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
sl@0
   536
      "  movd %%eax, %%xmm5\n"
sl@0
   537
      "  punpcklbw %%xmm7, %%xmm5\n"
sl@0
   538
      "  movl $0x02020202, %%eax\n"
sl@0
   539
      "  movd %%eax, %%xmm4\n"
sl@0
   540
      "  punpcklbw %%xmm7, %%xmm4\n"
sl@0
   541
      "  paddw %%xmm5, %%xmm4\n" // mm4 = { 255, 255, 255, 255 }
sl@0
   542
      "1:\n"
sl@0
   543
      "  movl (%1), %%eax\n"
sl@0
   544
      "  testl $0xff000000, %%eax\n"
sl@0
   545
      "  jz 2f\n"
sl@0
   546
sl@0
   547
      "  movd (%1), %%xmm1\n"
sl@0
   548
      "  punpcklbw %%xmm7, %%xmm1\n"
sl@0
   549
      "  pshuflw $0xff, %%xmm1, %%xmm0\n"
sl@0
   550
      "  pxor %%xmm5, %%xmm0\n"
sl@0
   551
sl@0
   552
      "  movd (%0), %%xmm3\n"
sl@0
   553
      "  punpcklbw %%xmm7, %%xmm3\n"
sl@0
   554
      "  pmullw %%xmm0, %%xmm3\n"
sl@0
   555
      "  paddw %%xmm6, %%xmm3\n"
sl@0
   556
      "  pmulhuw %%xmm4, %%xmm3\n"
sl@0
   557
sl@0
   558
      "  paddw %%xmm1, %%xmm3\n"
sl@0
   559
      "  packuswb %%xmm3, %%xmm3\n"
sl@0
   560
      "  movd %%xmm3, (%0)\n"
sl@0
   561
      "2:\n"
sl@0
   562
      "  addl $4, %0\n"
sl@0
   563
      "  addl $4, %1\n"
sl@0
   564
      "  decl %2\n"
sl@0
   565
      "  jnz 1b\n"
sl@0
   566
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   567
      :
sl@0
   568
      :"eax");
sl@0
   569
#endif
sl@0
   570
}
sl@0
   571
OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
sl@0
   572
sl@0
   573
/* written for shaun */
sl@0
   574
static void
sl@0
   575
composite_over_argb_sse2_2 (uint32_t *dest, uint32_t *src, int n)
sl@0
   576
{
sl@0
   577
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   578
  __asm__ __volatile__ ("  pxor %%xmm7, %%xmm7\n"   // mm7 = { 0, 0, 0, 0 }
sl@0
   579
      "  movl $0x80808080, %%eax\n"
sl@0
   580
      "  movd %%eax, %%xmm6\n"  // mm6 = { 128, 128, 128, 128 }
sl@0
   581
      "  punpcklbw %%xmm7, %%xmm6\n"
sl@0
   582
      "  punpcklwd %%xmm6, %%xmm6\n"
sl@0
   583
      "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
sl@0
   584
      "  movd %%eax, %%xmm5\n"
sl@0
   585
      "  punpcklbw %%xmm7, %%xmm5\n"
sl@0
   586
      "  punpcklwd %%xmm5, %%xmm5\n"
sl@0
   587
      "  movl $0x02020202, %%eax\n"
sl@0
   588
      "  movd %%eax, %%xmm4\n"
sl@0
   589
      "  punpcklbw %%xmm7, %%xmm4\n"
sl@0
   590
      "  paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
sl@0
   591
      "  punpcklwd %%xmm4, %%xmm4\n"
sl@0
   592
      :
sl@0
   593
      :
sl@0
   594
      :"eax");
sl@0
   595
sl@0
   596
  if (n&1) {
sl@0
   597
    __asm__ __volatile__ (
sl@0
   598
      "  movl (%1), %%eax\n"
sl@0
   599
      "  testl $0xff000000, %%eax\n"
sl@0
   600
      "  jz 1f\n"
sl@0
   601
sl@0
   602
      "  movd (%1), %%xmm1\n"
sl@0
   603
      "  punpcklbw %%xmm7, %%xmm1\n"
sl@0
   604
      "  pshuflw $0xff, %%xmm1, %%xmm0\n"
sl@0
   605
      "  pxor %%xmm5, %%xmm0\n"
sl@0
   606
sl@0
   607
      "  movd (%0), %%xmm3\n"
sl@0
   608
      "  punpcklbw %%xmm7, %%xmm3\n"
sl@0
   609
      "  pmullw %%xmm0, %%xmm3\n"
sl@0
   610
      "  paddw %%xmm6, %%xmm3\n"
sl@0
   611
      "  pmulhuw %%xmm4, %%xmm3\n"
sl@0
   612
sl@0
   613
      "  paddw %%xmm1, %%xmm3\n"
sl@0
   614
      "  packuswb %%xmm3, %%xmm3\n"
sl@0
   615
      "  movd %%xmm3, (%0)\n"
sl@0
   616
sl@0
   617
      "1:\n"
sl@0
   618
      "  addl $4, %0\n"
sl@0
   619
      "  addl $4, %1\n"
sl@0
   620
      :"+r" (dest), "+r" (src)
sl@0
   621
      :
sl@0
   622
      :"eax");
sl@0
   623
  }
sl@0
   624
  n>>=1;
sl@0
   625
sl@0
   626
  if (n>0){
sl@0
   627
    __asm__ __volatile__ ("\n"
sl@0
   628
      "3:\n"
sl@0
   629
#if 0
sl@0
   630
      "  movl (%1), %%eax\n"
sl@0
   631
      "  orl 4(%1), %%eax\n"
sl@0
   632
      "  testl $0xff000000, %%eax\n"
sl@0
   633
      "  jz 4f\n"
sl@0
   634
#endif
sl@0
   635
sl@0
   636
      "  movq (%1), %%xmm1\n"
sl@0
   637
      "  punpcklbw %%xmm7, %%xmm1\n"
sl@0
   638
      "  pshuflw $0xff, %%xmm1, %%xmm0\n"
sl@0
   639
      "  pshufhw $0xff, %%xmm0, %%xmm0\n"
sl@0
   640
      "  pxor %%xmm5, %%xmm0\n"
sl@0
   641
sl@0
   642
      "  movq (%0), %%xmm3\n"
sl@0
   643
      "  punpcklbw %%xmm7, %%xmm3\n"
sl@0
   644
      "  pmullw %%xmm0, %%xmm3\n"
sl@0
   645
      "  paddw %%xmm6, %%xmm3\n"
sl@0
   646
      "  pmulhuw %%xmm4, %%xmm3\n"
sl@0
   647
      "  paddw %%xmm1, %%xmm3\n"
sl@0
   648
      "  packuswb %%xmm3, %%xmm3\n"
sl@0
   649
      "  movq %%xmm3, (%0)\n"
sl@0
   650
      "4:\n"
sl@0
   651
      "  addl $8, %0\n"
sl@0
   652
      "  addl $8, %1\n"
sl@0
   653
      "  subl $1, %2\n"
sl@0
   654
      "  jnz 3b\n"
sl@0
   655
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   656
      :
sl@0
   657
      :"eax");
sl@0
   658
  }
sl@0
   659
#endif
sl@0
   660
}
sl@0
   661
OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
sl@0
   662
sl@0
   663
/* written for shaun */
sl@0
   664
static void
sl@0
   665
composite_over_argb_sse2_3 (uint32_t *dest, uint32_t *src, int n)
sl@0
   666
{
sl@0
   667
  int begin;
sl@0
   668
  int middle;
sl@0
   669
  int end;
sl@0
   670
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   671
  __asm__ __volatile__ ("  pxor %%xmm7, %%xmm7\n"   // mm7 = { 0, 0, 0, 0 }
sl@0
   672
      "  movl $0x80808080, %%eax\n"
sl@0
   673
      "  movd %%eax, %%xmm6\n"  // mm6 = { 128, 128, 128, 128 }
sl@0
   674
      "  punpcklbw %%xmm7, %%xmm6\n"
sl@0
   675
      "  punpcklwd %%xmm6, %%xmm6\n"
sl@0
   676
      "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
sl@0
   677
      "  movd %%eax, %%xmm5\n"
sl@0
   678
      "  punpcklbw %%xmm7, %%xmm5\n"
sl@0
   679
      "  punpcklwd %%xmm5, %%xmm5\n"
sl@0
   680
      "  movl $0x02020202, %%eax\n"
sl@0
   681
      "  movd %%eax, %%xmm4\n"
sl@0
   682
      "  punpcklbw %%xmm7, %%xmm4\n"
sl@0
   683
      "  paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
sl@0
   684
      "  punpcklwd %%xmm4, %%xmm4\n"
sl@0
   685
      :
sl@0
   686
      :
sl@0
   687
      :"eax");
sl@0
   688
sl@0
   689
  begin = 0x3 & (4 - (((unsigned long)dest & 0xf) >> 2));
sl@0
   690
  if (begin>n) {
sl@0
   691
    begin = n;
sl@0
   692
    middle = 0;
sl@0
   693
    end = 0;
sl@0
   694
  } else {
sl@0
   695
    middle = (n-begin)>>2;
sl@0
   696
    end = n - begin - middle*4;
sl@0
   697
  }
sl@0
   698
sl@0
   699
  if (begin>0) {
sl@0
   700
    __asm__ __volatile__ ("\n"
sl@0
   701
      "1:\n"
sl@0
   702
      "  movl (%1), %%eax\n"
sl@0
   703
      "  testl $0xff000000, %%eax\n"
sl@0
   704
      "  jz 2f\n"
sl@0
   705
sl@0
   706
      "  movd (%1), %%xmm1\n"
sl@0
   707
      "  punpcklbw %%xmm7, %%xmm1\n"
sl@0
   708
      "  pshuflw $0xff, %%xmm1, %%xmm0\n"
sl@0
   709
      "  pxor %%xmm5, %%xmm0\n"
sl@0
   710
sl@0
   711
      "  movd (%0), %%xmm3\n"
sl@0
   712
      "  punpcklbw %%xmm7, %%xmm3\n"
sl@0
   713
      "  pmullw %%xmm0, %%xmm3\n"
sl@0
   714
      "  paddw %%xmm6, %%xmm3\n"
sl@0
   715
      "  pmulhuw %%xmm4, %%xmm3\n"
sl@0
   716
sl@0
   717
      "  paddw %%xmm1, %%xmm3\n"
sl@0
   718
      "  packuswb %%xmm3, %%xmm3\n"
sl@0
   719
      "  movd %%xmm3, (%0)\n"
sl@0
   720
sl@0
   721
      "2:\n"
sl@0
   722
      "  addl $4, %0\n"
sl@0
   723
      "  addl $4, %1\n"
sl@0
   724
      "  subl $1, %2\n"
sl@0
   725
      "  jnz 1b\n"
sl@0
   726
      :"+r" (dest), "+r" (src), "+r" (begin)
sl@0
   727
      :
sl@0
   728
      :"eax");
sl@0
   729
  }
sl@0
   730
sl@0
   731
  if (middle>0){
sl@0
   732
    __asm__ __volatile__ ("\n"
sl@0
   733
      "1:\n"
sl@0
   734
      "  movq (%1), %%xmm1\n"
sl@0
   735
      "  movq 8(%1), %%xmm0\n"
sl@0
   736
      "  movl (%1), %%eax\n"
sl@0
   737
      "  orl 4(%1), %%eax\n"
sl@0
   738
      "  orl 8(%1), %%eax\n"
sl@0
   739
      "  orl 12(%1), %%eax\n"
sl@0
   740
      "  test $0xff000000, %%eax\n"
sl@0
   741
      "  jz 2f\n"
sl@0
   742
      "  punpcklbw %%xmm7, %%xmm1\n"
sl@0
   743
      "  punpcklbw %%xmm7, %%xmm0\n"
sl@0
   744
      "  pshuflw $0xff, %%xmm1, %%xmm1\n"
sl@0
   745
      "  pshuflw $0xff, %%xmm0, %%xmm0\n"
sl@0
   746
      "  pshufhw $0xff, %%xmm1, %%xmm1\n"
sl@0
   747
      "  pshufhw $0xff, %%xmm0, %%xmm0\n"
sl@0
   748
sl@0
   749
      "  pxor %%xmm5, %%xmm1\n"
sl@0
   750
      "  pxor %%xmm5, %%xmm0\n"
sl@0
   751
sl@0
   752
      "  movq (%0), %%xmm3\n"
sl@0
   753
      "  movq 8(%0), %%xmm2\n"
sl@0
   754
      "  punpcklbw %%xmm7, %%xmm3\n"
sl@0
   755
      "  punpcklbw %%xmm7, %%xmm2\n"
sl@0
   756
sl@0
   757
      "  pmullw %%xmm1, %%xmm3\n"
sl@0
   758
      "  paddw %%xmm6, %%xmm3\n"
sl@0
   759
      "  pmulhuw %%xmm4, %%xmm3\n"
sl@0
   760
      "  pmullw %%xmm0, %%xmm2\n"
sl@0
   761
      "  paddw %%xmm6, %%xmm2\n"
sl@0
   762
      "  pmulhuw %%xmm4, %%xmm2\n"
sl@0
   763
      "  packuswb %%xmm2, %%xmm3\n"
sl@0
   764
sl@0
   765
      "  movdqu (%1), %%xmm1\n"
sl@0
   766
      "  paddb %%xmm1, %%xmm3\n"
sl@0
   767
      "  movdqa %%xmm3, (%0)\n"
sl@0
   768
      "2:\n"
sl@0
   769
      "  addl $16, %0\n"
sl@0
   770
      "  addl $16, %1\n"
sl@0
   771
      "  subl $1, %2\n"
sl@0
   772
      "  jnz 1b\n"
sl@0
   773
      :"+r" (dest), "+r" (src), "+r" (middle)
sl@0
   774
      :
sl@0
   775
      :"eax");
sl@0
   776
  }
sl@0
   777
  if (end>0) {
sl@0
   778
    __asm__ __volatile__ ("\n"
sl@0
   779
      "1:\n"
sl@0
   780
      "  movl (%1), %%eax\n"
sl@0
   781
      "  testl $0xff000000, %%eax\n"
sl@0
   782
      "  jz 2f\n"
sl@0
   783
sl@0
   784
      "  movd (%1), %%xmm1\n"
sl@0
   785
      "  punpcklbw %%xmm7, %%xmm1\n"
sl@0
   786
      "  pshuflw $0xff, %%xmm1, %%xmm0\n"
sl@0
   787
      "  pxor %%xmm5, %%xmm0\n"
sl@0
   788
sl@0
   789
      "  movd (%0), %%xmm3\n"
sl@0
   790
      "  punpcklbw %%xmm7, %%xmm3\n"
sl@0
   791
      "  pmullw %%xmm0, %%xmm3\n"
sl@0
   792
      "  paddw %%xmm6, %%xmm3\n"
sl@0
   793
      "  pmulhuw %%xmm4, %%xmm3\n"
sl@0
   794
sl@0
   795
      "  paddw %%xmm1, %%xmm3\n"
sl@0
   796
      "  packuswb %%xmm3, %%xmm3\n"
sl@0
   797
      "  movd %%xmm3, (%0)\n"
sl@0
   798
sl@0
   799
      "2:\n"
sl@0
   800
      "  addl $4, %0\n"
sl@0
   801
      "  addl $4, %1\n"
sl@0
   802
      "  subl $1, %2\n"
sl@0
   803
      "  jnz 1b\n"
sl@0
   804
      :"+r" (dest), "+r" (src), "+r" (end)
sl@0
   805
      :
sl@0
   806
      :"eax");
sl@0
   807
  }
sl@0
   808
#endif
sl@0
   809
}
sl@0
   810
OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_3, composite_over_argb, OIL_IMPL_FLAG_SSE2);
sl@0
   811
sl@0
   812
sl@0
   813
static void
sl@0
   814
composite_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
sl@0
   815
{
sl@0
   816
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   817
  __asm__ __volatile__ (
sl@0
   818
      MMX_LOAD_CONSTANTS
sl@0
   819
      "  movl (%1), %%eax\n"
sl@0
   820
      "  movd %%eax, %%mm0\n"
sl@0
   821
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   822
      "  pshufw $0xff, %%mm0, %%mm3\n"
sl@0
   823
      "  pxor %%mm5, %%mm3\n"
sl@0
   824
      "1:\n"
sl@0
   825
      "  movq %%mm3, %%mm1\n"
sl@0
   826
      "  movd (%0), %%mm2\n"
sl@0
   827
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   828
sl@0
   829
      MMX_MULDIV255(mm2, mm1)
sl@0
   830
sl@0
   831
      "  paddw %%mm0, %%mm2\n"
sl@0
   832
      "  packuswb %%mm2, %%mm2\n"
sl@0
   833
sl@0
   834
      "  movd %%mm2, (%0)\n"
sl@0
   835
      "  addl $4, %0\n"
sl@0
   836
      "  decl %2\n"
sl@0
   837
      "  jnz 1b\n"
sl@0
   838
      "  emms\n"
sl@0
   839
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   840
      :
sl@0
   841
      :"eax");
sl@0
   842
#endif
sl@0
   843
}
sl@0
   844
OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_mmx, composite_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   845
sl@0
   846
static void
sl@0
   847
composite_add_argb_mmx (uint32_t *dest, uint32_t *src, int n)
sl@0
   848
{
sl@0
   849
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   850
  __asm__ __volatile__ (
sl@0
   851
      "1:\n"
sl@0
   852
      "  movd (%1), %%mm0\n"
sl@0
   853
      "  movd (%0), %%mm2\n"
sl@0
   854
      "  paddusb %%mm0, %%mm2\n"
sl@0
   855
      "  movd %%mm2, (%0)\n"
sl@0
   856
      "  addl $4, %0\n"
sl@0
   857
      "  addl $4, %1\n"
sl@0
   858
      "  decl %2\n"
sl@0
   859
      "  jnz 1b\n"
sl@0
   860
      "  emms\n"
sl@0
   861
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   862
      :
sl@0
   863
      :"eax");
sl@0
   864
#endif
sl@0
   865
}
sl@0
   866
OIL_DEFINE_IMPL_FULL (composite_add_argb_mmx, composite_add_argb, OIL_IMPL_FLAG_MMX);
sl@0
   867
sl@0
   868
static void
sl@0
   869
composite_add_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
sl@0
   870
{
sl@0
   871
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   872
  __asm__ __volatile__ (
sl@0
   873
      "  movd (%1), %%mm0\n"
sl@0
   874
      "1:\n"
sl@0
   875
      "  movd (%0), %%mm2\n"
sl@0
   876
      "  paddusb %%mm0, %%mm2\n"
sl@0
   877
      "  movd %%mm2, (%0)\n"
sl@0
   878
      "  addl $4, %0\n"
sl@0
   879
      "  decl %2\n"
sl@0
   880
      "  jnz 1b\n"
sl@0
   881
      "  emms\n"
sl@0
   882
      :"+r" (dest), "+r" (src), "+r" (n)
sl@0
   883
      :
sl@0
   884
      :"eax");
sl@0
   885
#endif
sl@0
   886
}
sl@0
   887
OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_mmx, composite_add_argb_const_src, OIL_IMPL_FLAG_MMX);
sl@0
   888
sl@0
   889
static void
sl@0
   890
composite_in_over_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
sl@0
   891
{
sl@0
   892
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   893
  __asm__ __volatile__ (
sl@0
   894
      MMX_LOAD_CONSTANTS
sl@0
   895
      "1:\n"
sl@0
   896
      "  movd (%2), %%mm0\n"
sl@0
   897
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   898
      "  pshufw $0x00, %%mm0, %%mm1\n"
sl@0
   899
sl@0
   900
      "  movd (%1), %%mm2\n"
sl@0
   901
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   902
sl@0
   903
      MMX_MULDIV255(mm2, mm1)
sl@0
   904
sl@0
   905
      "  movd (%0), %%mm0\n"
sl@0
   906
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   907
sl@0
   908
      "  pshufw $0xff, %%mm2, %%mm1\n"
sl@0
   909
      "  pxor %%mm5, %%mm1\n"
sl@0
   910
sl@0
   911
      MMX_MULDIV255(mm0, mm1)
sl@0
   912
sl@0
   913
      "  paddw %%mm0, %%mm2\n"
sl@0
   914
      "  packuswb %%mm2, %%mm2\n"
sl@0
   915
sl@0
   916
      "  movd %%mm2, (%0)\n"
sl@0
   917
      "  addl $4, %0\n"
sl@0
   918
      "  addl $4, %1\n"
sl@0
   919
      "  addl $1, %2\n"
sl@0
   920
      "  decl %3\n"
sl@0
   921
      "  jnz 1b\n"
sl@0
   922
      "  emms\n"
sl@0
   923
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
   924
      :
sl@0
   925
      :"eax");
sl@0
   926
#endif
sl@0
   927
}
sl@0
   928
OIL_DEFINE_IMPL_FULL (composite_in_over_argb_mmx, composite_in_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   929
sl@0
   930
static void
sl@0
   931
composite_in_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
sl@0
   932
{
sl@0
   933
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   934
  __asm__ __volatile__ (
sl@0
   935
      MMX_LOAD_CONSTANTS
sl@0
   936
sl@0
   937
      "  movd (%1), %%mm3\n"
sl@0
   938
      "  punpcklbw %%mm7, %%mm3\n"
sl@0
   939
      "1:\n"
sl@0
   940
      "  movd (%2), %%mm0\n"
sl@0
   941
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   942
      "  pshufw $0x00, %%mm0, %%mm1\n"
sl@0
   943
sl@0
   944
      "  movq %%mm3, %%mm2\n"
sl@0
   945
sl@0
   946
      MMX_MULDIV255(mm2, mm1)
sl@0
   947
sl@0
   948
      "  movd (%0), %%mm0\n"
sl@0
   949
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   950
sl@0
   951
      "  pshufw $0xff, %%mm2, %%mm1\n"
sl@0
   952
      "  pxor %%mm5, %%mm1\n"
sl@0
   953
sl@0
   954
      MMX_MULDIV255(mm0, mm1)
sl@0
   955
sl@0
   956
      "  paddw %%mm0, %%mm2\n"
sl@0
   957
      "  packuswb %%mm2, %%mm2\n"
sl@0
   958
sl@0
   959
      "  movd %%mm2, (%0)\n"
sl@0
   960
      "  addl $4, %0\n"
sl@0
   961
      "  addl $1, %2\n"
sl@0
   962
      "  decl %3\n"
sl@0
   963
      "  jnz 1b\n"
sl@0
   964
      "  emms\n"
sl@0
   965
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
   966
      :
sl@0
   967
      :"eax");
sl@0
   968
#endif
sl@0
   969
}
sl@0
   970
OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
   971
sl@0
   972
static void
sl@0
   973
composite_in_over_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
sl@0
   974
{
sl@0
   975
#if !defined(__WINSCW__) && !defined(__WINS__)      
sl@0
   976
  __asm__ __volatile__ (
sl@0
   977
      MMX_LOAD_CONSTANTS
sl@0
   978
      "  movd (%2), %%mm0\n"
sl@0
   979
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   980
      "  pshufw $0x00, %%mm0, %%mm3\n"
sl@0
   981
sl@0
   982
      "1:\n"
sl@0
   983
      "  movd (%1), %%mm2\n"
sl@0
   984
      "  punpcklbw %%mm7, %%mm2\n"
sl@0
   985
      "  movq %%mm3, %%mm1\n"
sl@0
   986
sl@0
   987
      MMX_MULDIV255(mm2, mm1)
sl@0
   988
sl@0
   989
      "  movd (%0), %%mm0\n"
sl@0
   990
      "  punpcklbw %%mm7, %%mm0\n"
sl@0
   991
sl@0
   992
      "  pshufw $0xff, %%mm2, %%mm1\n"
sl@0
   993
      "  pxor %%mm5, %%mm1\n"
sl@0
   994
sl@0
   995
      MMX_MULDIV255(mm0, mm1)
sl@0
   996
sl@0
   997
      "  paddw %%mm0, %%mm2\n"
sl@0
   998
      "  packuswb %%mm2, %%mm2\n"
sl@0
   999
sl@0
  1000
      "  movd %%mm2, (%0)\n"
sl@0
  1001
      "  addl $4, %0\n"
sl@0
  1002
      "  addl $4, %1\n"
sl@0
  1003
      "  decl %3\n"
sl@0
  1004
      "  jnz 1b\n"
sl@0
  1005
      "  emms\n"
sl@0
  1006
      :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
sl@0
  1007
      :
sl@0
  1008
      :"eax");
sl@0
  1009
#endif
sl@0
  1010
}
sl@0
  1011
OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
sl@0
  1012
sl@0
  1013
sl@0
  1014
sl@0
  1015
#ifdef	__SYMBIAN32__
sl@0
  1016
 
sl@0
  1017
OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx() {
sl@0
  1018
		return &_oil_function_impl_composite_in_argb_mmx;
sl@0
  1019
}
sl@0
  1020
#endif
sl@0
  1021
sl@0
  1022
#ifdef	__SYMBIAN32__
sl@0
  1023
 
sl@0
  1024
OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx, composite_in_argb() {
sl@0
  1025
		return &_oil_function_impl_composite_in_argb_mmx, composite_in_argb;
sl@0
  1026
}
sl@0
  1027
#endif
sl@0
  1028
sl@0
  1029
#ifdef	__SYMBIAN32__
sl@0
  1030
 
sl@0
  1031
OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx2, composite_in_argb() {
sl@0
  1032
		return &_oil_function_impl_composite_in_argb_mmx2, composite_in_argb;
sl@0
  1033
}
sl@0
  1034
#endif
sl@0
  1035
sl@0
  1036
#ifdef	__SYMBIAN32__
sl@0
  1037
 
sl@0
  1038
OilFunctionImpl* __oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src() {
sl@0
  1039
		return &_oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src;
sl@0
  1040
}
sl@0
  1041
#endif
sl@0
  1042
sl@0
  1043
#ifdef	__SYMBIAN32__
sl@0
  1044
 
sl@0
  1045
OilFunctionImpl* __oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask() {
sl@0
  1046
		return &_oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask;
sl@0
  1047
}
sl@0
  1048
#endif
sl@0
  1049
sl@0
  1050
#ifdef	__SYMBIAN32__
sl@0
  1051
 
sl@0
  1052
OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx, composite_over_argb() {
sl@0
  1053
		return &_oil_function_impl_composite_over_argb_mmx, composite_over_argb;
sl@0
  1054
}
sl@0
  1055
#endif
sl@0
  1056
sl@0
  1057
#ifdef	__SYMBIAN32__
sl@0
  1058
 
sl@0
  1059
OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_2, composite_over_argb() {
sl@0
  1060
		return &_oil_function_impl_composite_over_argb_mmx_2, composite_over_argb;
sl@0
  1061
}
sl@0
  1062
#endif
sl@0
  1063
sl@0
  1064
#ifdef	__SYMBIAN32__
sl@0
  1065
 
sl@0
  1066
OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_3, composite_over_argb() {
sl@0
  1067
		return &_oil_function_impl_composite_over_argb_mmx_3, composite_over_argb;
sl@0
  1068
}
sl@0
  1069
#endif
sl@0
  1070
sl@0
  1071
#ifdef	__SYMBIAN32__
sl@0
  1072
 
sl@0
  1073
OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_4, composite_over_argb() {
sl@0
  1074
		return &_oil_function_impl_composite_over_argb_mmx_4, composite_over_argb;
sl@0
  1075
}
sl@0
  1076
#endif
sl@0
  1077
sl@0
  1078
#ifdef	__SYMBIAN32__
sl@0
  1079
 
sl@0
  1080
OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_5, composite_over_argb() {
sl@0
  1081
		return &_oil_function_impl_composite_over_argb_mmx_5, composite_over_argb;
sl@0
  1082
}
sl@0
  1083
#endif
sl@0
  1084
sl@0
  1085
#ifdef	__SYMBIAN32__
sl@0
  1086
 
sl@0
  1087
OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2, composite_over_argb() {
sl@0
  1088
		return &_oil_function_impl_composite_over_argb_sse2, composite_over_argb;
sl@0
  1089
}
sl@0
  1090
#endif
sl@0
  1091
sl@0
  1092
#ifdef	__SYMBIAN32__
sl@0
  1093
 
sl@0
  1094
OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_2, composite_over_argb() {
sl@0
  1095
		return &_oil_function_impl_composite_over_argb_sse2_2, composite_over_argb;
sl@0
  1096
}
sl@0
  1097
#endif
sl@0
  1098
sl@0
  1099
#ifdef	__SYMBIAN32__
sl@0
  1100
 
sl@0
  1101
OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_3, composite_over_argb() {
sl@0
  1102
		return &_oil_function_impl_composite_over_argb_sse2_3, composite_over_argb;
sl@0
  1103
}
sl@0
  1104
#endif
sl@0
  1105
sl@0
  1106
#ifdef	__SYMBIAN32__
sl@0
  1107
 
sl@0
  1108
OilFunctionImpl* __oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src() {
sl@0
  1109
		return &_oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src;
sl@0
  1110
}
sl@0
  1111
#endif
sl@0
  1112
sl@0
  1113
#ifdef	__SYMBIAN32__
sl@0
  1114
 
sl@0
  1115
OilFunctionImpl* __oil_function_impl_composite_add_argb_mmx, composite_add_argb() {
sl@0
  1116
		return &_oil_function_impl_composite_add_argb_mmx, composite_add_argb;
sl@0
  1117
}
sl@0
  1118
#endif
sl@0
  1119
sl@0
  1120
#ifdef	__SYMBIAN32__
sl@0
  1121
 
sl@0
  1122
OilFunctionImpl* __oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src() {
sl@0
  1123
		return &_oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src;
sl@0
  1124
}
sl@0
  1125
#endif
sl@0
  1126
sl@0
  1127
#ifdef	__SYMBIAN32__
sl@0
  1128
 
sl@0
  1129
OilFunctionImpl* __oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb() {
sl@0
  1130
		return &_oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb;
sl@0
  1131
}
sl@0
  1132
#endif
sl@0
  1133
sl@0
  1134
#ifdef	__SYMBIAN32__
sl@0
  1135
 
sl@0
  1136
OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src() {
sl@0
  1137
		return &_oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src;
sl@0
  1138
}
sl@0
  1139
#endif
sl@0
  1140
sl@0
  1141
#ifdef	__SYMBIAN32__
sl@0
  1142
 
sl@0
  1143
OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask() {
sl@0
  1144
		return &_oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask;
sl@0
  1145
}
sl@0
  1146
#endif
sl@0
  1147