os/ossrv/genericopenlibs/liboil/src/clamp_sse.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  * Copyright (c) 2005
     3  *	Eric Anholt.  All rights reserved.
     4  *
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions
     7  * are met:
     8  * 1. Redistributions of source code must retain the above copyright
     9  *    notice, this list of conditions and the following disclaimer.
    10  * 2. Redistributions in binary form must reproduce the above copyright
    11  *    notice, this list of conditions and the following disclaimer in the
    12  *    documentation and/or other materials provided with the distribution.
    13  *
    14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
    15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    24  * SUCH DAMAGE.
    25  */
    26 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    27 
    28 #ifdef HAVE_CONFIG_H
    29 #include "config.h"
    30 #endif
    31 #include "liboil/liboilclasses.h"
    32 #include "liboil/liboilfunction.h"
    33 #include <emmintrin.h>
    34 #include <xmmintrin.h>
    35 
    36 /* TODO: If we have gcc 4.2 or above, do this. Otherwise, disable all SSE use */
    37 #define SSE_FUNCTION __attribute__((force_align_arg_pointer))
    38 
    39 SSE_FUNCTION static void
    40 clamp_u8_sse (uint8_t *dest, uint8_t *src1, int n, uint8_t *src2_1,
    41     uint8_t *src3_1)
    42 {
    43   __m128i xmm1, xmm2;
    44   uint8_t min = *src2_1;
    45   uint8_t max = *src3_1;
    46 
    47   /* Initial operations to align the destination pointer */
    48   for (; ((long)dest & 15) && (n > 0); n--) {
    49     uint8_t x = *src1++;
    50     if (x < min)
    51       x = min;
    52     if (x > max)
    53       x = max;
    54     *dest++ = x;
    55   }
    56   xmm1 = _mm_set1_epi8(min);
    57   xmm2 = _mm_set1_epi8(max);
    58   for (; n >= 16; n -= 16) {
    59     __m128i xmm0;
    60     xmm0 = _mm_loadu_si128((__m128i *)src1);
    61     xmm0 = _mm_max_epu8(xmm0, xmm1);
    62     xmm0 = _mm_min_epu8(xmm0, xmm2);
    63     _mm_store_si128((__m128i *)dest, xmm0);
    64     dest += 16;
    65     src1 += 16;
    66   }
    67   for (; n > 0; n--) {
    68     uint8_t x = *src1++;
    69     if (x < min)
    70       x = min;
    71     if (x > max)
    72       x = max;
    73     *dest++ = x;
    74   }
    75 }
    76 OIL_DEFINE_IMPL_FULL (clamp_u8_sse, clamp_u8, OIL_IMPL_FLAG_SSE2);
    77 
    78 SSE_FUNCTION static void
    79 clamp_s16_sse (int16_t *dest, int16_t *src1, int n, int16_t *src2_1,
    80     int16_t *src3_1)
    81 {
    82   __m128i xmm1, xmm2;
    83   int16_t min = *src2_1;
    84   int16_t max = *src3_1;
    85 
    86   /* Initial operations to align the destination pointer */
    87   for (; ((long)dest & 15) && (n > 0); n--) {
    88     int16_t x = *src1++;
    89     if (x < min)
    90       x = min;
    91     if (x > max)
    92       x = max;
    93     *dest++ = x;
    94   }
    95   xmm1 = _mm_set1_epi16(min);
    96   xmm2 = _mm_set1_epi16(max);
    97   for (; n >= 8; n -= 8) {
    98     __m128i xmm0;
    99     xmm0 = _mm_loadu_si128((__m128i *)src1);
   100     xmm0 = _mm_max_epi16(xmm0, xmm1);
   101     xmm0 = _mm_min_epi16(xmm0, xmm2);
   102     _mm_store_si128((__m128i *)dest, xmm0);
   103     dest += 8;
   104     src1 += 8;
   105   }
   106   for (; n > 0; n--) {
   107     int16_t x = *src1++;
   108     if (x < min)
   109       x = min;
   110     if (x > max)
   111       x = max;
   112     *dest++ = x;
   113   }
   114 }
   115 OIL_DEFINE_IMPL_FULL (clamp_s16_sse, clamp_s16, OIL_IMPL_FLAG_SSE2);
   116 
   117 SSE_FUNCTION static void
   118 clamp_f32_sse (float *dest, const float *src1, int n, const float *src2_1,
   119     const float *src3_1)
   120 {
   121   __m128 xmm1, xmm2;
   122   float min = *src2_1;
   123   float max = *src3_1;
   124 
   125   /* Initial operations to align the destination pointer */
   126   for (; ((long)dest & 15) && (n > 0); n--) {
   127     float x = *src1++;
   128     if (x < min)
   129       x = min;
   130     if (x > max)
   131       x = max;
   132     *dest++ = x;
   133   }
   134   xmm1 = _mm_set_ps1(min);
   135   xmm2 = _mm_set_ps1(max);
   136   for (; n >= 4; n -= 4) {
   137     __m128 xmm0;
   138     xmm0 = _mm_loadu_ps(src1);
   139     xmm0 = _mm_max_ps(xmm0, xmm1);
   140     xmm0 = _mm_min_ps(xmm0, xmm2);
   141     _mm_store_ps(dest, xmm0);
   142     dest += 4;
   143     src1 += 4;
   144   }
   145   for (; n > 0; n--) {
   146     float x = *src1++;
   147     if (x < min)
   148       x = min;
   149     if (x > max)
   150       x = max;
   151     *dest++ = x;
   152   }
   153 }
   154 OIL_DEFINE_IMPL_FULL (clamp_f32_sse, clamp_f32, OIL_IMPL_FLAG_SSE);
   155 
   156 SSE_FUNCTION static void
   157 clamp_f64_sse (double *dest, const double *src1, int n, const double *src2_1,
   158     const double *src3_1)
   159 {
   160   __m128d xmm1, xmm2;
   161   double min = *src2_1;
   162   double max = *src3_1;
   163 
   164   /* Initial operations to align the destination pointer */
   165   for (; ((long)dest & 15) && (n > 0); n--) {
   166     double x = *src1++;
   167     if (x < min)
   168       x = min;
   169     if (x > max)
   170       x = max;
   171     *dest++ = x;
   172   }
   173   xmm1 = _mm_set1_pd(min);
   174   xmm2 = _mm_set1_pd(max);
   175   for (; n >= 2; n -= 2) {
   176     __m128d xmm0;
   177     xmm0 = _mm_loadu_pd(src1);
   178     xmm0 = _mm_max_pd(xmm0, xmm1);
   179     xmm0 = _mm_min_pd(xmm0, xmm2);
   180     _mm_store_pd(dest, xmm0);
   181     dest += 2;
   182     src1 += 2;
   183   }
   184   for (; n > 0; n--) {
   185     double x = *src1++;
   186     if (x < min)
   187       x = min;
   188     if (x > max)
   189       x = max;
   190     *dest++ = x;
   191   }
   192 }
   193 OIL_DEFINE_IMPL_FULL (clamp_f64_sse, clamp_f64,
   194     OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2);
   195 
   196 SSE_FUNCTION static void
   197 clamplow_u8_sse (uint8_t *dest, const uint8_t *src1, int n,
   198     const uint8_t *src2_1)
   199 {
   200   __m128i xmm1;
   201   uint8_t min = *src2_1;
   202 
   203   /* Initial operations to align the destination pointer */
   204   for (; ((long)dest & 15) && (n > 0); n--) {
   205     uint8_t x = *src1++;
   206     if (x < min)
   207       x = min;
   208     *dest++ = x;
   209   }
   210   xmm1 = _mm_set1_epi8(min);
   211   for (; n >= 16; n -= 16) {
   212     __m128i xmm0;
   213     xmm0 = _mm_loadu_si128((__m128i *)src1);
   214     xmm0 = _mm_max_epu8(xmm0, xmm1);
   215     _mm_store_si128((__m128i *)dest, xmm0);
   216     dest += 16;
   217     src1 += 16;
   218   }
   219   for (; n > 0; n--) {
   220     uint8_t x = *src1++;
   221     if (x < min)
   222       x = min;
   223     *dest++ = x;
   224   }
   225 }
   226 OIL_DEFINE_IMPL_FULL (clamplow_u8_sse, clamplow_u8, OIL_IMPL_FLAG_SSE2);
   227 
   228 SSE_FUNCTION static void
   229 clamplow_s16_sse (int16_t *dest, const int16_t *src1, int n,
   230     const int16_t *src2_1)
   231 {
   232   __m128i xmm1;
   233   int16_t min = *src2_1;
   234 
   235   /* Initial operations to align the destination pointer */
   236   for (; ((long)dest & 15) && (n > 0); n--) {
   237     int16_t x = *src1++;
   238     if (x < min)
   239       x = min;
   240     *dest++ = x;
   241   }
   242   xmm1 = _mm_set1_epi16(min);
   243   for (; n >= 8; n -= 8) {
   244     __m128i xmm0;
   245     xmm0 = _mm_loadu_si128((__m128i *)src1);
   246     xmm0 = _mm_max_epi16(xmm0, xmm1);
   247     _mm_store_si128((__m128i *)dest, xmm0);
   248     dest += 8;
   249     src1 += 8;
   250   }
   251   for (; n > 0; n--) {
   252     int16_t x = *src1++;
   253     if (x < min)
   254       x = min;
   255     *dest++ = x;
   256   }
   257 }
   258 OIL_DEFINE_IMPL_FULL (clamplow_s16_sse, clamplow_s16, OIL_IMPL_FLAG_SSE2);
   259 
   260 SSE_FUNCTION static void
   261 clamplow_f32_sse (float *dest, const float *src1, int n, const float *src2_1)
   262 {
   263   __m128 xmm1;
   264   float min = *src2_1;
   265 
   266   /* Initial operations to align the destination pointer */
   267   for (; ((long)dest & 15) && (n > 0); n--) {
   268     float x = *src1++;
   269     if (x < min)
   270       x = min;
   271     *dest++ = x;
   272   }
   273   xmm1 = _mm_set_ps1(min);
   274   for (; n >= 4; n -= 4) {
   275     __m128 xmm0;
   276     xmm0 = _mm_loadu_ps(src1);
   277     xmm0 = _mm_max_ps(xmm0, xmm1);
   278     _mm_store_ps(dest, xmm0);
   279     dest += 4;
   280     src1 += 4;
   281   }
   282   for (; n > 0; n--) {
   283     float x = *src1++;
   284     if (x < min)
   285       x = min;
   286     *dest++ = x;
   287   }
   288 }
   289 OIL_DEFINE_IMPL_FULL (clamplow_f32_sse, clamplow_f32, OIL_IMPL_FLAG_SSE);
   290 
   291 SSE_FUNCTION static void
   292 clamplow_f64_sse (double *dest, const double *src1, int n, const double *src2_1)
   293 {
   294   __m128d xmm1;
   295   double min = *src2_1;
   296 
   297   /* Initial operations to align the destination pointer */
   298   for (; ((long)dest & 15) && (n > 0); n--) {
   299     double x = *src1++;
   300     if (x < min)
   301       x = min;
   302     *dest++ = x;
   303   }
   304   xmm1 = _mm_set1_pd(min);
   305   for (; n >= 2; n -= 2) {
   306     __m128d xmm0;
   307     xmm0 = _mm_loadu_pd(src1);
   308     xmm0 = _mm_max_pd(xmm0, xmm1);
   309     _mm_store_pd(dest, xmm0);
   310     dest += 2;
   311     src1 += 2;
   312   }
   313   for (; n > 0; n--) {
   314     double x = *src1++;
   315     if (x < min)
   316       x = min;
   317     *dest++ = x;
   318   }
   319 }
   320 OIL_DEFINE_IMPL_FULL (clamplow_f64_sse, clamplow_f64,
   321     OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2);
   322 
   323 SSE_FUNCTION static void
   324 clamphigh_u8_sse (uint8_t *dest, const uint8_t *src1, int n,
   325     const uint8_t *src2_1)
   326 {
   327   __m128i xmm1;
   328   uint8_t max = *src2_1;
   329 
   330   /* Initial operations to align the destination pointer */
   331   for (; ((long)dest & 15) && (n > 0); n--) {
   332     uint8_t x = *src1++;
   333     if (x > max)
   334       x = max;
   335     *dest++ = x;
   336   }
   337   xmm1 = _mm_set1_epi8(max);
   338   for (; n >= 16; n -= 16) {
   339     __m128i xmm0;
   340     xmm0 = _mm_loadu_si128((__m128i *)src1);
   341     xmm0 = _mm_min_epu8(xmm0, xmm1);
   342     _mm_store_si128((__m128i *)dest, xmm0);
   343     dest += 16;
   344     src1 += 16;
   345   }
   346   for (; n > 0; n--) {
   347     uint8_t x = *src1++;
   348     if (x > max)
   349       x = max;
   350     *dest++ = x;
   351   }
   352 }
   353 OIL_DEFINE_IMPL_FULL (clamphigh_u8_sse, clamphigh_u8, OIL_IMPL_FLAG_SSE2);
   354 
   355 SSE_FUNCTION static void
   356 clamphigh_s16_sse (int16_t *dest, const int16_t *src1, int n,
   357     const int16_t *src2_1)
   358 {
   359   __m128i xmm1;
   360   int16_t max = *src2_1;
   361 
   362   /* Initial operations to align the destination pointer */
   363   for (; ((long)dest & 15) && (n > 0); n--) {
   364     int16_t x = *src1++;
   365     if (x > max)
   366       x = max;
   367     *dest++ = x;
   368   }
   369   xmm1 = _mm_set1_epi16(max);
   370   for (; n >= 8; n -= 8) {
   371     __m128i xmm0;
   372     xmm0 = _mm_loadu_si128((__m128i *)src1);
   373     xmm0 = _mm_min_epi16(xmm0, xmm1);
   374     _mm_store_si128((__m128i *)dest, xmm0);
   375     dest += 8;
   376     src1 += 8;
   377   }
   378   for (; n > 0; n--) {
   379     int16_t x = *src1++;
   380     if (x > max)
   381       x = max;
   382     *dest++ = x;
   383   }
   384 }
   385 OIL_DEFINE_IMPL_FULL (clamphigh_s16_sse, clamphigh_s16, OIL_IMPL_FLAG_SSE2);
   386 
   387 SSE_FUNCTION static void
   388 clamphigh_f32_sse (float *dest, const float *src1, int n, const float *src2_1)
   389 {
   390   __m128 xmm1;
   391   float max = *src2_1;
   392 
   393   /* Initial operations to align the destination pointer */
   394   for (; ((long)dest & 15) && (n > 0); n--) {
   395     float x = *src1++;
   396     if (x > max)
   397       x = max;
   398     *dest++ = x;
   399   }
   400   xmm1 = _mm_set_ps1(max);
   401   for (; n >= 4; n -= 4) {
   402     __m128 xmm0;
   403     xmm0 = _mm_loadu_ps(src1);
   404     xmm0 = _mm_min_ps(xmm0, xmm1);
   405     _mm_store_ps(dest, xmm0);
   406     dest += 4;
   407     src1 += 4;
   408   }
   409   for (; n > 0; n--) {
   410     float x = *src1++;
   411     if (x > max)
   412       x = max;
   413     *dest++ = x;
   414   }
   415 }
   416 OIL_DEFINE_IMPL_FULL (clamphigh_f32_sse, clamphigh_f32, OIL_IMPL_FLAG_SSE);
   417 
   418 SSE_FUNCTION static void
   419 clamphigh_f64_sse (double *dest, const double *src1, int n, const double *src2_1)
   420 {
   421   __m128d xmm1;
   422   double max = *src2_1;
   423 
   424   /* Initial operations to align the destination pointer */
   425   for (; ((long)dest & 15) && (n > 0); n--) {
   426     double x = *src1++;
   427     if (x > max)
   428       x = max;
   429     *dest++ = x;
   430   }
   431   xmm1 = _mm_set1_pd(max);
   432   for (; n >= 2; n -= 2) {
   433     __m128d xmm0;
   434     xmm0 = _mm_loadu_pd(src1);
   435     xmm0 = _mm_min_pd(xmm0, xmm1);
   436     _mm_store_pd(dest, xmm0);
   437     dest += 2;
   438     src1 += 2;
   439   }
   440   for (; n > 0; n--) {
   441     double x = *src1++;
   442     if (x > max)
   443       x = max;
   444     *dest++ = x;
   445   }
   446 }
   447 OIL_DEFINE_IMPL_FULL (clamphigh_f64_sse, clamphigh_f64,
   448     OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2);
   449 
   450 
   451 #ifdef	__SYMBIAN32__
   452  
   453 OilFunctionImpl* __oil_function_impl_clamp_u8_sse, clamp_u8() {
   454 		return &_oil_function_impl_clamp_u8_sse, clamp_u8;
   455 }
   456 #endif
   457 
   458 #ifdef	__SYMBIAN32__
   459  
   460 OilFunctionImpl* __oil_function_impl_clamp_s16_sse, clamp_s16() {
   461 		return &_oil_function_impl_clamp_s16_sse, clamp_s16;
   462 }
   463 #endif
   464 
   465 #ifdef	__SYMBIAN32__
   466  
   467 OilFunctionImpl* __oil_function_impl_clamp_f32_sse, clamp_f32() {
   468 		return &_oil_function_impl_clamp_f32_sse, clamp_f32;
   469 }
   470 #endif
   471 
   472 #ifdef	__SYMBIAN32__
   473  
   474 OilFunctionImpl* __oil_function_impl_clamp_f64_sse, clamp_f64() {
   475 		return &_oil_function_impl_clamp_f64_sse, clamp_f64;
   476 }
   477 #endif
   478 
   479 #ifdef	__SYMBIAN32__
   480  
   481 OilFunctionImpl* __oil_function_impl_clamplow_u8_sse, clamplow_u8() {
   482 		return &_oil_function_impl_clamplow_u8_sse, clamplow_u8;
   483 }
   484 #endif
   485 
   486 #ifdef	__SYMBIAN32__
   487  
   488 OilFunctionImpl* __oil_function_impl_clamplow_s16_sse, clamplow_s16() {
   489 		return &_oil_function_impl_clamplow_s16_sse, clamplow_s16;
   490 }
   491 #endif
   492 
   493 #ifdef	__SYMBIAN32__
   494  
   495 OilFunctionImpl* __oil_function_impl_clamplow_f32_sse, clamplow_f32() {
   496 		return &_oil_function_impl_clamplow_f32_sse, clamplow_f32;
   497 }
   498 #endif
   499 
   500 #ifdef	__SYMBIAN32__
   501  
   502 OilFunctionImpl* __oil_function_impl_clamplow_f64_sse, clamplow_f64() {
   503 		return &_oil_function_impl_clamplow_f64_sse, clamplow_f64;
   504 }
   505 #endif
   506 
   507 #ifdef	__SYMBIAN32__
   508  
   509 OilFunctionImpl* __oil_function_impl_clamphigh_u8_sse, clamphigh_u8() {
   510 		return &_oil_function_impl_clamphigh_u8_sse, clamphigh_u8;
   511 }
   512 #endif
   513 
   514 #ifdef	__SYMBIAN32__
   515  
   516 OilFunctionImpl* __oil_function_impl_clamphigh_s16_sse, clamphigh_s16() {
   517 		return &_oil_function_impl_clamphigh_s16_sse, clamphigh_s16;
   518 }
   519 #endif
   520 
   521 #ifdef	__SYMBIAN32__
   522  
   523 OilFunctionImpl* __oil_function_impl_clamphigh_f32_sse, clamphigh_f32() {
   524 		return &_oil_function_impl_clamphigh_f32_sse, clamphigh_f32;
   525 }
   526 #endif
   527 
   528 #ifdef	__SYMBIAN32__
   529  
   530 OilFunctionImpl* __oil_function_impl_clamphigh_f64_sse, clamphigh_f64() {
   531 		return &_oil_function_impl_clamphigh_f64_sse, clamphigh_f64;
   532 }
   533 #endif
   534