First public contribution.
2 * LIBOIL - Library of Optimized Inner Loops
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
33 #include <liboil/liboilfunction.h>
35 OIL_DECLARE_CLASS (diff8x8_s16_u8);
36 OIL_DECLARE_CLASS (diff8x8_const128_s16_u8);
37 OIL_DECLARE_CLASS (diff8x8_average_s16_u8);
41 diff8x8_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2)
43 #if !defined(__WINSCW__) && !defined(__WINS__)
44 __asm__ __volatile__ (
45 " pxor %%mm7, %%mm7 \n\t"
48 " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \
49 " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr */ \
50 " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ \
51 " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */ \
52 /* convert from UINT8 to INT16 */ \
53 " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \
54 " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr) */ \
55 " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \
56 " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr) */ \
57 /* start calculation */ \
58 " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ReconPtr */ \
59 " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ReconPtr */ \
60 " movq %%mm0, (%2) \n\t" /* write answer out */ \
61 " movq %%mm2, 8(%2) \n\t" /* write answer out */ \
62 /* Increment pointers */ \
88 OIL_DEFINE_IMPL_FULL (diff8x8_s16_u8_mmx, diff8x8_s16_u8, OIL_IMPL_FLAG_MMX);
91 diff8x8_const128_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1)
93 #if !defined(__WINSCW__) && !defined(__WINS__)
94 const int16_t tmp[4] = { 0x0080, 0x0080, 0x0080, 0x0080 };
96 __asm__ __volatile__ (
97 " pxor %%mm7, %%mm7 \n\t"
98 " movq (%3), %%mm1 \n\t"
101 " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \
102 " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ \
103 /* convert from UINT8 to INT16 */ \
104 " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \
105 " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \
106 /* start calculation */ \
107 " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - 128 */ \
108 " psubw %%mm1, %%mm2 \n\t" /* mm2 = FiltPtr - 128 */ \
109 " movq %%mm0, (%1) \n\t" /* write answer out */ \
110 " movq %%mm2, 8(%1) \n\t" /* write answer out */ \
111 /* Increment pointers */ \
112 " add $16, %1 \n\t" \
135 OIL_DEFINE_IMPL_FULL (diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8, OIL_IMPL_FLAG_MMX);
138 diff8x8_average_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2, uint8_t *src3)
140 #if !defined(__WINSCW__) && !defined(__WINS__)
141 __asm__ __volatile__ (
142 " pxor %%mm7, %%mm7 \n\t"
145 " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \
146 " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */ \
147 " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */ \
148 " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ \
149 " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */ \
150 " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */ \
151 /* convert from UINT8 to INT16 */ \
152 " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \
153 " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */ \
154 " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */ \
155 " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \
156 " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */ \
157 " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */ \
158 /* average ReconPtr1 and ReconPtr2 */ \
159 " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ \
160 " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ \
161 " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \
162 " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \
163 " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
164 " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
165 " movq %%mm0, (%3) \n\t" /* write answer out */ \
166 " movq %%mm2, 8(%3) \n\t" /* write answer out */ \
167 /* Increment pointers */ \
168 " add $16, %3 \n\t" \
195 OIL_DEFINE_IMPL_FULL (diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8, OIL_IMPL_FLAG_MMX);
201 OilFunctionImpl* __oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8() {
202 return &_oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8;
208 OilFunctionImpl* __oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8() {
209 return &_oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8;
215 OilFunctionImpl* __oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8() {
216 return &_oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8;