os/ossrv/genericopenlibs/liboil/src/splat_sse.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /*
     2  * Copyright (c) 2005
     3  *	Eric Anholt.  All rights reserved.
     4  *
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions
     7  * are met:
     8  * 1. Redistributions of source code must retain the above copyright
     9  *    notice, this list of conditions and the following disclaimer.
    10  * 2. Redistributions in binary form must reproduce the above copyright
    11  *    notice, this list of conditions and the following disclaimer in the
    12  *    documentation and/or other materials provided with the distribution.
    13  *
    14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
    15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    24  * SUCH DAMAGE.
    25  */
    26 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    27 
    28 #ifdef HAVE_CONFIG_H
    29 #include "config.h"
    30 #endif
    31 #include <liboil/liboilclasses.h>
    32 #include <liboil/liboilfunction.h>
    33 #include <emmintrin.h>
    34 
    35 #define SSE_FUNCTION __attribute__((force_align_arg_pointer))
    36 
    37 SSE_FUNCTION static void
    38 splat_u32_ns_sse (uint32_t *dest, const uint32_t *param, int n)
    39 {
    40   __m128i v;
    41 
    42   v = _mm_set1_epi32(*param);
    43 
    44   for (; ((long)dest & 15) && (n > 0); n--) {
    45     *dest++ = *param;
    46   }
    47   for (; n >= 4; n -= 4) {
    48     _mm_store_si128((__m128i *)dest, v);
    49     dest += 4;
    50   }
    51   for (; n > 0; n--) {
    52     *dest++ = *param;
    53   }
    54 }
    55 OIL_DEFINE_IMPL_FULL (splat_u32_ns_sse, splat_u32_ns, OIL_IMPL_FLAG_SSE2);
    56 
    57 SSE_FUNCTION static void
    58 splat_u32_ns_sse_unroll2 (uint32_t *dest, const uint32_t *param, int n)
    59 {
    60   __m128i v;
    61 
    62   v = _mm_set1_epi32(*param);
    63 
    64   for (; ((long)dest & 15) && (n > 0); n--) {
    65     *dest++ = *param;
    66   }
    67   for (; n >= 8; n -= 8) {
    68     _mm_store_si128((__m128i *)dest, v);
    69     _mm_store_si128((__m128i *)(dest + 4), v);
    70     dest += 8;
    71   }
    72   for (; n >= 4; n -= 4) {
    73     _mm_store_si128((__m128i *)dest, v);
    74     dest += 4;
    75   }
    76   for (; n > 0; n--) {
    77     *dest++ = *param;
    78   }
    79 }
    80 OIL_DEFINE_IMPL_FULL (splat_u32_ns_sse_unroll2, splat_u32_ns, OIL_IMPL_FLAG_SSE2);
    81 
    82 SSE_FUNCTION static void
    83 splat_u8_ns_sse (uint8_t *dest, const uint8_t *param, int n)
    84 {
    85   __m128i v;
    86 
    87   v = _mm_set1_epi8(*param);
    88 
    89   for (; ((long)dest & 15) && (n > 0); n--) {
    90     *dest++ = *param;
    91   }
    92   for (; n >= 16; n -= 16) {
    93     _mm_store_si128((__m128i *)dest, v);
    94     dest += 16;
    95   }
    96   for (; n > 0; n--) {
    97     *dest++ = *param;
    98   }
    99 }
   100 OIL_DEFINE_IMPL_FULL (splat_u8_ns_sse, splat_u8_ns, OIL_IMPL_FLAG_SSE2);
   101 
   102 SSE_FUNCTION static void
   103 splat_u8_ns_sse_unroll2 (uint8_t *dest, const uint8_t *param, int n)
   104 {
   105   __m128i v;
   106 
   107   v = _mm_set1_epi8(*param);
   108 
   109   for (; ((long)dest & 15) && (n > 0); n--) {
   110     *dest++ = *param;
   111   }
   112   for (; n >= 32; n -= 32) {
   113     _mm_store_si128((__m128i *)dest, v);
   114     _mm_store_si128((__m128i *)(dest + 16), v);
   115     dest += 32;
   116   }
   117   for (; n >= 16; n -= 16) {
   118     _mm_store_si128((__m128i *)dest, v);
   119     dest += 16;
   120   }
   121   for (; n > 0; n--) {
   122     *dest++ = *param;
   123   }
   124 }
   125 OIL_DEFINE_IMPL_FULL (splat_u8_ns_sse_unroll2, splat_u8_ns, OIL_IMPL_FLAG_SSE2);
   126 
   127 
   128 #ifdef	__SYMBIAN32__
   129  
   130 OilFunctionImpl* __oil_function_impl_splat_u32_ns_sse, splat_u32_ns() {
   131 		return &_oil_function_impl_splat_u32_ns_sse, splat_u32_ns;
   132 }
   133 #endif
   134 
   135 #ifdef	__SYMBIAN32__
   136  
   137 OilFunctionImpl* __oil_function_impl_splat_u32_ns_sse_unroll2, splat_u32_ns() {
   138 		return &_oil_function_impl_splat_u32_ns_sse_unroll2, splat_u32_ns;
   139 }
   140 #endif
   141 
   142 #ifdef	__SYMBIAN32__
   143  
   144 OilFunctionImpl* __oil_function_impl_splat_u8_ns_sse, splat_u8_ns() {
   145 		return &_oil_function_impl_splat_u8_ns_sse, splat_u8_ns;
   146 }
   147 #endif
   148 
   149 #ifdef	__SYMBIAN32__
   150  
   151 OilFunctionImpl* __oil_function_impl_splat_u8_ns_sse_unroll2, splat_u8_ns() {
   152 		return &_oil_function_impl_splat_u8_ns_sse_unroll2, splat_u8_ns;
   153 }
   154 #endif
   155