Symaptic: os/graphics/m3g/m3gcore11/src/m3g

     1 /*

     2 * Copyright (c) 2003 Nokia Corporation and/or its subsidiary(-ies).

     3 * All rights reserved.

     4 * This component and the accompanying materials are made available

     5 * under the terms of the License "Eclipse Public License v1.0"

     6 * which accompanies this distribution, and is available

     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".

8 *

     9 * Initial Contributors:

    10 * Nokia Corporation - initial contribution.

    11 *

    12 * Contributors:

    13 *

    14 * Description: Internal math function implementations

    15 *

    16 */

    19 /*!

    20  * \internal

    21  * \file

    22  * \brief Internal math function implementations

    23  *

    24  */

    26 #ifndef M3G_CORE_INCLUDE

    27 #   error included by m3g_core.c; do not compile separately.

    28 #endif

    30 #include "m3g_defs.h"

    31 #include "m3g_memory.h"

    33 #if defined(M3G_SOFT_FLOAT)

    34 #include <math.h>

    35 #endif

    37 /*----------------------------------------------------------------------

    38  * Private types and definitions

    39  *--------------------------------------------------------------------*/

    41 /* Enumerated common matrix classifications */

    42 #define MC_IDENTITY             0x40100401 // 01000000 00010000 00000100 00000001

    43 #define MC_FRUSTUM              0x30BF0C03 // 00110000 10111111 00001100 00000011

    44 #define MC_PERSPECTIVE          0x30B00C03 // 00110000 10110000 00001100 00000011

    45 #define MC_ORTHO                0x7F300C03 // 01111111 00110000 00001100 00000011

    46 #define MC_PARALLEL             0x70300C03 // 01110000 00110000 00001100 00000011

    47 #define MC_SCALING_ROTATION     0x403F3F3F // 01000000 00111111 00111111 00111111

    48 #define MC_SCALING              0x40300C03 // 01000000 00110000 00001100 00000011

    49 #define MC_TRANSLATION          0x7F100401 // 01111111 00010000 00000100 00000001

    50 #define MC_X_ROTATION           0x403C3C01 // 01000000 00111100 00111100 00000001

    51 #define MC_Y_ROTATION           0x40330433 // 01000000 00110011 00000100 00110011

    52 #define MC_Z_ROTATION           0x40100F0F // 01000000 00010000 00001111 00001111

    53 #define MC_W_UNITY              0x7F3F3F3F // 01111111 00111111 00111111 00111111

    54 #define MC_GENERIC              0xFFFFFFFF

    56 /* Partial masks for individual matrix components */

    57 #define MC_TRANSLATION_PART     0x3F000000 // 00111111 00000000 00000000 00000000

    58 #define MC_SCALE_PART           0x00300C03 // 00000000 00110000 00001100 00000011

    59 #define MC_SCALE_ROTATION_PART  0x003F3F3F // 00000000 00111111 00111111 00111111

    61 /* Matrix element classification masks */

    62 #define ELEM_ZERO       0x00

    63 #define ELEM_ONE        0x01

    64 #define ELEM_MINUS_ONE  0x02

    65 #define ELEM_ANY        0x03

    67 /*!

    68  * \internal

    69  * \brief calculates the offset of a 4x4 matrix element in a linear

    70  * array

    71  *

    72  * \notes The current convention is column-major, as in OpenGL ES

    73  */

    74 #define MELEM(row, col) ((row) + ((col) << 2))

    76 /*!

    77  * \internal

    78  * \brief Macro for accessing 4x4 float matrix elements

    79  *

    80  * \param mtx pointer to the first element of the matrix

    81  * \param row matrix row

    82  * \param col matrix column

    83  */

    84 #define M44F(mtx, row, col)     ((mtx)->elem[MELEM((row), (col))])

    86 /*--------------------------------------------------------------------*/

    88 /*----------------------------------------------------------------------

    89  * Private functions

    90  *--------------------------------------------------------------------*/

    93 /*!

    94  * \internal

    95  * \brief ARM VFPv2 implementation of 4x4 matrix multiplication.

    96  *

    97  * \param dst	multiplication result

    98  * \param left	left-hand matrix

    99  * \param right right-hand matrix

   100  */

   101 #if defined(M3G_HW_FLOAT_VFPV2)

   103 __asm void _m3gGenericMatrixProduct(Matrix *dst,

   104                                     const Matrix *left, const Matrix *right)

   105 {

   106 // r0 = *dst

   107 // r1 = *left

   108 // r2 = *right

   110 		CODE32

   112 // save the VFP registers and set the vector STRIDE = 1, LENGTH = 4

   114 		FSTMFDD	sp!, {d8-d15}

   116 		FMRX	r3, FPSCR

   117 		BIC		r12, r3, #0x00370000

   118 		ORR		r12, #0x00030000

   119 		FMXR	FPSCR, r12

   121 // left = [a0  a1  a2  a3	right = [b0  b1  b2  b3

   122 //	 	   a4  a5  a6  a7		     b4  b5  b6  b7

   123 //		   a8  a9 a10 a11		     b8  b9 b10 b11

   124 //		  a12 a13 a14 a15]		    b12 b13 b14 b15]

   125 //

   126 // dst = [a0*b0+a4*b1+a8*b2+a12*b3  a1*b0+a5*b1+a9*b2+a13*b3  ..

   127 //		  a0*b4+a4*b5+a8*b6+a12*b7  a1*b4+a5*b5+a9*b6+a13*b7  ..

   128 //					.							.

   129 //					.							.

   131 		FLDMIAS	r1!, {s8-s23}		// load the left matrix [a0-a15] to registers s8-s23

   132 		FLDMIAS	r2!, {s0-s7}		// load [b0-b7] of right matrix to registers s0-s7

   133 		FMULS	s24, s8, s0			// [s24-s27]  = [a0*b0  a1*b0  a2*b0  a3*b0]

   134 		FMULS	s28, s8, s4			// [s28-s31]  = [a0*b4  a1*b4  a2*b4  a3*b4]

   135 		FMACS	s24, s12, s1		// [s24-s27] += [a4*b1  a5*b1  a6*b1  a7*b1]

   136 		FMACS	s28, s12, s5		// [s28-s31] += [a4*b5  a5*b5  a6*b5  a7*b5]

   137 		FMACS	s24, s16, s2		// [s24-s27] += [a8*b2  a9*b2  a10*b2 a11*b2]

   138 		FMACS	s28, s16, s6		// [s28-s31] += [a8*b6  a9*b6  a10*b6 a11*b6]

   139 		FMACS	s24, s20, s3		// [s24-s27] += [a12*b3 a13*b3 a14*b3 a15*b3]

   140 		FMACS	s28, s20, s7		// [s28-s31] += [a12*b7 a13*b37a14*b7 a15*b7]

   141 		FLDMIAS	r2!, {s0-s7}		// load [b8-b15]

   142 		FSTMIAS	r0!, {s24-s31}		// write [dst0-dst7]

   143 		FMULS	s24, s8, s0

   144 		FMULS	s28, s8, s4

   145 		FMACS	s24, s12, s1

   146 		FMACS	s28, s12, s5

   147 		FMACS	s24, s16, s2

   148 		FMACS	s28, s16, s6

   149 		FMACS	s24, s20, s3

   150 		FMACS	s28, s20, s7

   151 		FSTMIAS	r0!, {s24-s31}

   153 // Restore the VFP registers and return.

   155 		FMXR	FPSCR, r3

   157 		FLDMFDD	sp!, {d8-d15}

   159 		BX		lr

   161 }

   162 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */

   165 /*------------------ Elementary float ------------------*/

   167 #if defined(M3G_SOFT_FLOAT)

   169 #if defined (M3G_BUILD_ARM)

   171 /*!

   172  * \internal

   173  * \brief Floating point multiplication implementation for ARM.

   174  */

   175 __asm M3Gfloat m3gMul(const M3Gfloat a,

   176                       const M3Gfloat b)

   177 {

   178     /**

   179      * Extract the exponents of the multiplicands and add them

   180      * together. Flush to zero if either exponent or their sum

   181      * is zero.

   182      */

   184     mov     r12, #0xff;

   185     ands    r2, r0, r12, lsl #23;   // exit if e1 == 0

   186     andnes  r3, r1, r12, lsl #23;   // exit if e2 == 0

   187     subne   r2, r2, #(127 << 23);

   188     addnes  r12, r2, r3;            // exit if e1+e2-127 <= 0

   189     movle   r0, #0;

   190     bxle    lr;

   192     /**

   193      * Determine the sign of the result. Note that the exponent

   194      * may have overflowed to the sign bit, and thus the result

   195      * may be an arbitrary negative value when it really should

   196      * be +Inf or -Inf.

   197      */

   199     teq     r0, r1;

   200     orrmi   r12, r12, #0x80000000;

   202     /**

   203      * Multiply the mantissas. First shift the mantissas up to

   204      * unsigned 1.31 fixed point, adding the leading "1" bit at

   205      * the same time, and finally do a 32x32 -> 64 bit unsigned

   206      * multiplication. The result is in unsigned 2.62 fixed point,

   207      * representing the interval [1.0, 4.0).

   208      */

   210     mov     r2, #0x80000000;

   211     orr     r0, r2, r0, lsl #8;

   212     orr     r1, r2, r1, lsl #8;

   213     umulls  r2, r3, r0, r1;

   215     /**

   216      * If the highest bit of the 64-bit result is set, then the

   217      * mantissa lies in [2.0, 4.0) and needs to be renormalized.

   218      * That is, the mantissa is shifted one bit to the right and

   219      * the exponent correspondingly increased by 1. Note that we

   220      * lose the leading "1" bit from the mantissa by adding it up

   221      * with the exponent.

   222      */

   224     subpl   r12, r12, #(1 << 23);    // no overflow: exponent -= 1

   225     addpl   r0, r12, r3, lsr #7;     // no overflow: exponent += 1

   226     addmi   r0, r12, r3, lsr #8;     // overflow: exponent += 1

   227     bx      lr;

   228 }

   230 /*!

   231  * \internal

   232  * \brief Floating point addition implementation for ARM.

   233  */

   234 __asm M3Gfloat m3gAdd(const M3Gfloat a,

   235                       const M3Gfloat b)

   236 {

   237     /**

   238      * If the operands have opposite signs then this is not really

   239      * an addition but a subtraction. Subtraction is much slower,

   240      * so we have a separate code path for it, rather than trying

   241      * to save space by handling both in the same place.

   242      */

   244     teq     r0, r1;

   245     bmi     _m3gSub;

   247     /**

   248      * Sort the operands such that the larger operand is in R0 and

   249      * the smaller in R1. The sign bits do not affect the ordering,

   250      * since they are known to be equal.

   251      */

   253     subs    r2, r0, r1;

   254     submi   r0, r0, r2;

   255     addmi   r1, r1, r2;

   257     /**

   258      * Extract the exponent of the smaller operand into R2 and compute

   259      * the difference between the larger and smaller exponent into R3.

   260      * (Note that the sign bits cancel out in the subtraction.) The

   261      * exponent delta tells how many bits the mantissa of the smaller

   262      * operand must be shifted to the right in order to bring the

   263      * operands into equal scale.

   264      */

   266     mov     r2, r1, lsr #23;

   267     rsb     r3, r2, r0, lsr #23;

   269     /**

   270      * Check if the exponent delta is bigger than 23 bits, or if the

   271      * smaller exponent is zero. In either case, exit the routine and

   272      * return the larger operand (which is already in R0). Note that

   273      * this means that subnormals are treated as zero.

   274      */

   276     rsbs    r12, r3, #23;               // N set, V clear if R3 > 23

   277     tstpl   r2, #0xff;                  // execute only if R3 <= 23

   278     bxle    lr;                         // exit if Z set or N != V

   280     /**

   281      * Extract the mantissas and shift them up to unsigned 1.31 fixed

   282      * point, inserting the implied leading "1" bit at the same time.

   283      * Finally, align the decimal points and add up the mantissas.

   284      */

   286     mov     r12, #0x80000000;

   287     orr     r0, r12, r0, lsl #8;

   288     orr     r1, r12, r1, lsl #8;

   289     adds    r0, r0, r1, lsr r3;

   291     /**

   292      * Compute the final exponent by adding up the smaller exponent

   293      * (R2), the exponent delta (R3), and the possible overflow bit

   294      * (carry flag). Note that in case of overflow, the leading "1"

   295      * has ended up in the carry flag and thus needs not be explicitly

   296      * discarded. Finally, put the mantissa together with the sign and

   297      * exponent.

   298      */

   300     adc     r2, r2, r3;                 // r2 = smallExp + deltaExp + overflow

   301     movcc   r0, r0, lsl #1;             // no overflow: discard leading 1

   302     mov     r0, r0, lsr #9;

   303     orr     r0, r0, r2, lsl #23;

   304     bx      lr;

   306 _m3gSub

   308     /**

   309      * Sort the operands such that the one with larger magnitude is

   310      * in R0 and has the correct sign (the sign of the final result),

   311      * while the smaller operand is in R1 with an inverted sign bit.

   312      */

   314     eor     r1, r1, #0x80000000;

   315     subs    r2, r0, r1;

   316     eormi   r2, r2, #0x80000000;

   317     submi   r0, r0, r2;

   318     addmi   r1, r1, r2;

   320     /**

   321      * Extract the exponent of the smaller operand into R2 and compute

   322      * the difference between the larger and smaller exponent into R3.

   323      * (Note that the sign bits cancel out in the subtraction.) The

   324      * exponent delta tells how many bits the mantissa of the smaller

   325      * operand must be shifted to the right in order to bring the

   326      * operands into equal scale.

   327      */

   329     mov     r2, r1, lsr #23;

   330     rsbs    r3, r2, r0, lsr #23;

   332     /**

   333      * Check if the exponent delta is bigger than 31 bits, or if the

   334      * smaller exponent is zero. In either case, exit the routine and

   335      * return the larger operand (which is already in R0). Note that

   336      * this means that subnormals are treated as zero.

   337      */

   339     rsbs    r12, r3, #31;               // N set, V clear if R3 > 31

   340     tstpl   r2, #0xff;                  // execute only if R3 <= 31

   341     bxle    lr;                         // exit if Z set or N != V

   343     /**

   344      * Extract the mantissas and shift them up to unsigned 1.31 fixed

   345      * point, inserting the implied leading "1" bit at the same time.

   346      * Then align the decimal points and subtract the smaller mantissa

   347      * from the larger one.

   348      */

   350     mov     r12, #0x80000000;

   351     orr     r0, r12, r0, lsl #8;

   352     orr     r1, r12, r1, lsl #8;

   353     subs    r0, r0, r1, lsr r3;

   355     /**

   356      * We split the range of possible results into three categories:

   357      *

   358      *   1. [1.0, 2.0) ==> no renormalization needed (highest bit set)

   359      *   2. [0.5, 1.0) ==> only one left-shift needed

   360      *   3. (0.0, 0.5) ==> multiple left-shifts needed

   361      *   4. zero       ==> just return

   362      *

   363      * Cases 1 and 2 are handled in the main code path. Cases 3 and 4

   364      * are less common by far, so we branch to a separate code fragment

   365      * for those.

   366      */

   368     movpls  r0, r0, lsl #1;             // Cases 2,3,4: shift left

   369     bpl     _m3gSubRenormalize;         // Cases 3 & 4: branch out

   371     /**

   372      * Now we have normalized the mantissa such that the highest bit

   373      * is set. Here we only need to adjust the exponent, if necessary,

   374      * and put the pieces together. Note that we lose the leading "1"

   375      * bit from the mantissa by adding it up with the exponent. We can

   376      * also do proper rounding (towards nearest) instead of truncation

   377      * (towards zero) at no extra cost!

   378      */

   380     sbc     r3, r3, #1;                 // deltaExp -= 1 (Case 1) or 2 (Case 2)

   381     add     r2, r2, r3;                 // resultExp = smallExp + deltaExp

   382     movs    r0, r0, lsr #8;             // shift mantissa, keep leading "1"

   383     adc     r0, r0, r2, lsl #23;        // resultExp += 1, mantissa += carry

   384     bx      lr;

   386     /**

   387      * Separate code path for cases 3 and 4 (see above). The mantissa

   388      * has already been shifted up by one, but the exponent has not

   389      * been correspondingly decreased. We also know that the highest

   390      * bit is still not set, and that the carry flag is clear.

   391      */

   393 _m3gSubRenormalize

   394     bxeq    lr;

   395     subcc   r3, r3, #2;

   396     movccs  r0, r0, lsl #2;

   398     /**

   399      * If the carry flag is still not set, i.e. there were more than

   400      * two leading zeros in the mantissa initially, loop until we

   401      * find the highest set bit.

   402      */

   404 _m3gSubRenormalizeLoop

   405     subcc   r3, r3, #1;

   406     movccs  r0, r0, lsl #1;

   407     bcc     _m3gSubRenormalizeLoop;

   409     /**

   410      * Now the leading "1" is in the carry flag, so we can just add up

   411      * the exponent and mantissa as usual, doing proper rounding at

   412      * the same time. However, cases where the exponent goes negative

   413      * (that is, underflows) must be detected and flushed to zero.

   414      */

   416     add     r3, r2, r3;

   417     movs    r0, r0, lsr #9;

   418     adc     r0, r0, r3, lsl #23;

   419     teq     r0, r2, lsl #23;

   420     movmi   r0, #0;

   421     bx      lr;

   422 }

   424 #else /* M3G_BUILD_ARM */

   426 /*!

   427  * \internal

   428  * \brief Floating point addition implementation

   429  *

   430  */

   431 static M3G_INLINE M3Gfloat m3gFloatAdd(const M3Guint aBits,

   432                                        const M3Guint bBits)

   433 {

   434     M3Guint large, small, signMask;

   436     /* Early exits for underflow cases */

   438     large = (M3Gint)(aBits & ~SIGN_MASK);

   439     if (large <= 0x00800000) {

   440         return INT_AS_FLOAT(bBits);

   441     }

   442     small = (M3Gint)(bBits & ~SIGN_MASK);

   443     if (small <= 0x00800000) {

   444         return INT_AS_FLOAT(aBits);

   445     }

   447     /* Swap the numbers so that "large" really is larger; the unsigned

   448      * (or de-signed) bitmasks for floats are nicely monotonous, so we

   449      * can compare directly.  Also store the sign of the larger number

   450      * for future reference. */

   452     if (small > large) {

   453         M3Gint temp = small;

   454         small = large;

   455         large = temp;

   456         signMask = (bBits & SIGN_MASK);

   457     }

   458     else {

   459         signMask = (aBits & SIGN_MASK);

   460     }

   462     {

   463         M3Guint res, overflow;

   464         M3Guint resExp, expDelta;

   466         /* Store the larger exponent as our candidate result exponent,

   467          * and compute the difference between the exponents */

   469         resExp = (large >> 23);

   470         expDelta = resExp - (small >> 23);

   472         /* Take an early exit if the change would be insignificant;

   473          * this also guards against odd results from shifting by more

   474          * than 31 (undefined in C) */

   476         if (expDelta >= 24) {

   477             res = large | signMask;

   478             return INT_AS_FLOAT(res);

   479         }

   481         /* Convert the mantissas into shifted integers, and shift the

   482          * smaller number to the same scale with the larger one. */

   484         large = (large & MANTISSA_MASK) | LEADING_ONE;

   485         small = (small & MANTISSA_MASK) | LEADING_ONE;

   486         small >>= expDelta;

   487         M3G_ASSERT(large >= small);

   489         /* Check whether we're really adding or subtracting the

   490          * smaller number, and branch to slightly different routines

   491          * respectively */

   493         if (((aBits ^ bBits) & SIGN_MASK) == 0) {

   495             /* Matching signs; just add the numbers and check for

   496              * overflow, shifting to compensate as necessary. */

   498             res = large + small;

   500             overflow = (res >> 24);

   501             res >>= overflow;

   502             resExp += overflow;

   503         }

   504         else {

   506             /* Different signs, so let's subtract the smaller value;

   507              * also check that we're not subtracting a number from

   508              * itself (so we don't have to normalize a zero below) */

   510             if (small == large) {

   511                 return 0.0f; /* x - x = 0 */

   512             }

   514             res = (large << 8) - (small << 8);

   516             /* Renormalize the number by shifting until we've got the

   517              * high bit in place */

   519             while ((res >> 24) == 0) {

   520                 res <<= 8;

   521                 resExp -= 8;

   522             }

   523             while ((res >> 31) == 0) {

   524                 res <<= 1;

   525                 --resExp;

   526             }

   527             res >>= 8;

   528         }

   530         /* Flush to zero in case of over/underflow of the exponent */

   532         if (resExp >= 255) {

   533             return 0.0f;

   534         }

   536         /* Compose the final number into "res"; note that we pull in

   537          * the sign of the original larger number, which should still

   538          * be valid */

   540         res &= MANTISSA_MASK;

   541         res |= (resExp << 23);

   542         res |= signMask;

   544         return INT_AS_FLOAT(res);

   545     }

   546 }

   548 /*!

   549  * \internal

   550  * \brief Floating point multiplication implementation

   551  */

   552 static M3G_INLINE M3Gfloat m3gFloatMul(const M3Guint aBits,

   553                                        const M3Guint bBits)

   554 {

   555     M3Guint a, b;

   557     /* Early exits for underflow and multiplication by zero */

   559     a = (aBits & ~SIGN_MASK);

   560     if (a <= 0x00800000) {

   561         return 0.0f;

   562     }

   564     b = (bBits & ~SIGN_MASK);

   565     if (b <= 0x00800000) {

   566         return 0.0f;

   567     }

   569     {

   570         M3Guint res, exponent, overflow;

   572         /* Compute the exponent of the result, assuming the mantissas

   573          * don't overflow; then mask out the original exponents */

   575         exponent = (a >> 23) + (b >> 23) - 127;

   576         a &= MANTISSA_MASK;

   577         b &= MANTISSA_MASK;

   579         /* Compute the new mantissa from:

   580          *

   581          *   (1 + a)(1 + b) = ab + a + b + 1

   582          *

   583          * First shift the mantissas from 0.23 down to 0.16 for the

   584          * multiplication, then shift back to 0.23 for adding in the

   585          * "a + b + 1" part of the equation.  */

   587         res = (a >> 7) * (b >> 7);              /* "ab" at 0.32 */

   588         res = (res >> 9) + a + b + LEADING_ONE;

   590         /* Add the leading one, then normalize the result by checking

   591          * the overflow bit and dividing by two if necessary */

   593         overflow = (res >> 24);

   594         res >>= overflow;

   595         exponent += overflow;

   597         /* Flush to zero in case of over/underflow of the exponent */

   599         if (exponent >= 255) {

   600             return 0.0f;

   601         }

   603         /* Compose the final number into "res" */

   605         res &= MANTISSA_MASK;

   606         res |= (exponent << 23);

   607         res |= (aBits ^ bBits) & SIGN_MASK;

   609         return INT_AS_FLOAT(res);

   610     }

   611 }

   613 #endif /* M3G_BUILD_ARM */

   615 /*!

   616  * \internal

   617  * \brief Computes the signed fractional part of a floating point

   618  * number

   619  *

   620  * \param x  floating point value

   621  * \return x signed fraction of x in ]-1, 1[

   622  */

   623 static M3Gfloat m3gFrac(M3Gfloat x)

   624 {

   625     /* Quick exit for -1 < x < 1 */

   627     if (m3gAbs(x) < 1.0f) {

   628         return x;

   629     }

   631     /* Shift the mantissa to the proper place, mask out the integer

   632      * part, and renormalize */

   633     {

   634         M3Guint ix = FLOAT_AS_UINT(x);

   635         M3Gint expo = ((ix >> 23) & 0xFF) - 127;

   636         M3G_ASSERT(expo >= 0);

   638         /* The decimal part will always be zero for large values with

   639          * exponents over 24 */

   641         if (expo >= 24) {

   642             return 0.f;

   643         }

   644         else {

   646             /* Shift the integer part out of the mantissa and see what

   647              * we have left */

   649             M3Guint base = (ix & MANTISSA_MASK) | LEADING_ONE;

   650             base = (base << expo) & MANTISSA_MASK;

   652             /* Quick exit (and guard against infinite looping) for

   653              * zero */

   655             if (base == 0) {

   656                 return 0.f;

   657             }

   659             /* We now have an exponent of 0 (i.e. no shifting), but

   660              * must renormalize to get a set bit in place of the

   661              * hidden (implicit one) bit */

   663             expo = 0;

   665             while ((base >> 19) == 0) {

   666                 base <<= 4;

   667                 expo -= 4;

   668             }

   669             while ((base >> 23) == 0) {

   670                 base <<= 1;

   671                 --expo;

   672             }

   674             /* Compose the final number */

   676             ix =

   677                 (base & MANTISSA_MASK) |

   678                 ((expo + 127) << 23) |

   679                 (ix & SIGN_MASK);

   680             return INT_AS_FLOAT(ix);

   681         }

   682     }

   683 }

   685 #endif /* M3G_SOFT_FLOAT */

   687 #if defined(M3G_DEBUG)

   688 /*!

   689  * \internal

   690  * \brief Checks for NaN or infinity in a floating point input

   691  */

   692 static void m3gValidateFloats(int n, float *p)

   693 {

   694     while (n-- > 0) {

   695         M3G_ASSERT(EXPONENT(*p) < 120);

   696         ++p;

   697     }

   698 }

   699 #else

   700 #   define m3gValidateFloats(n, p)

   701 #endif

   703 /*------------------ Trigonometry and exp ----------*/

   706 #if defined(M3G_SOFT_FLOAT)

   707 /*!

   708  * \internal

   709  * \brief Sine for the first quadrant

   710  *

   711  * \param x floating point value in [0, PI/2]

   712  * \return sine of \c x

   713  */

   714 static M3Gfloat m3gSinFirstQuadrant(M3Gfloat x)

   715 {

   716     M3Guint bits = FLOAT_AS_UINT(x);

   718     if (bits <= 0x3ba3d70au)    /* 0.005f */

   719         return x;

   720     else {

   721         static const M3Gfloat sinTermLut[4] = {

   722             -1.0f / (2*3),

   723             -1.0f / (4*5),

   724             -1.0f / (6*7),

   725             -1.0f / (8*9)

   726         };

   728         M3Gfloat xx = m3gSquare(x);

   729         M3Gfloat sinx = x;

   730         int i;

   732         for (i = 0; i < 4; ++i) {

   733             x    = m3gMul(x, m3gMul(xx, sinTermLut[i]));

   734             sinx = m3gAdd(sinx, x);

   735         }

   737         return sinx;

   738     }

   739 }

   740 #endif /* M3G_SOFT_FLOAT */

   742 #if defined(M3G_SOFT_FLOAT)

   743 /*!

   744  * \internal

   745  * \brief Computes sine for the first period

   746  *

   747  * \param x floating point value in [0, 2PI]

   748  * \return sine of x

   749  */

   750 static M3Gfloat m3gSinFirstPeriod(const M3Gfloat x)

   751 {

   752     M3G_ASSERT(x >= 0 && x <= TWO_PI);

   754     if (x >= PI) {

   755         return m3gNegate(m3gSinFirstQuadrant(x >= ONE_AND_HALF_PI ?

   756                                              m3gSub(TWO_PI, x) :

   757                                              m3gSub(x, PI)));

   758     }

   759     return m3gSinFirstQuadrant((x >= HALF_PI) ? m3gSub(PI, x) : x);

   760 }

   761 #endif /* M3G_SOFT_FLOAT */

   763 /*------------- Float vs. int conversion helpers -------------*/

   765 /*!

   766  * \internal

   767  * \brief Scales and clamps a float to unsigned byte range

   768  */

   769 static M3G_INLINE M3Gint m3gFloatToByte(const M3Gfloat a)

   770 {

   771     return m3gRoundToInt(m3gMul(255.f, m3gClampFloat(a, 0.f, 1.f)));

   772 }

   774 /*------------------ Vector helpers ------------------*/

   776 /*!

   777  * \internal

   778  * \brief Computes the norm of a floating-point 3-vector

   779  */

   780 static M3G_INLINE M3Gfloat m3gNorm3(const M3Gfloat *v)

   781 {

   782     return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),

   783                   m3gSquare(v[2]));

   784 }

   786 /*!

   787  * \internal

   788  * \brief Computes the norm of a floating-point 4-vector

   789  */

   790 static M3G_INLINE M3Gfloat m3gNorm4(const M3Gfloat *v)

   791 {

   792     return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),

   793                   m3gAdd(m3gSquare(v[2]), m3gSquare(v[3])));

   794 }

   796 /*!

   797  * \internal

   798  * \brief Scales a floating-point 3-vector

   799  */

   800 static M3G_INLINE void m3gScale3(M3Gfloat *v, M3Gfloat s)

   801 {

   802     v[0] = m3gMul(v[0], s);

   803     v[1] = m3gMul(v[1], s);

   804     v[2] = m3gMul(v[2], s);

   805 }

   807 /*!

   808  * \internal

   809  * \brief Scales a floating-point 4-vector

   810  */

   811 static M3G_INLINE void m3gScale4(M3Gfloat *v, M3Gfloat s)

   812 {

   813     v[0] = m3gMul(v[0], s);

   814     v[1] = m3gMul(v[1], s);

   815     v[2] = m3gMul(v[2], s);

   816     v[3] = m3gMul(v[3], s);

   817 }

   820 /*------------------ Matrices ------------------*/

   822 /*!

   823  * \internal

   824  */

   825 static M3G_INLINE M3Gbool m3gIsClassified(const Matrix *mtx)

   826 {

   827     M3G_ASSERT(mtx != NULL);

   828     return (M3Gbool) mtx->classified;

   829 }

   831 /*!

   832  * \internal

   833  * \brief Returns a classification for a single floating point number

   834  */

   835 static M3G_INLINE M3Guint m3gElementClass(const M3Gfloat x)

   836 {

   837     if (IS_ZERO(x)) {

   838         return ELEM_ZERO;

   839     }

   840     else if (IS_ONE(x)) {

   841         return ELEM_ONE;

   842     }

   843     else if (IS_MINUS_ONE(x)) {

   844         return ELEM_MINUS_ONE;

   845     }

   846     return ELEM_ANY;

   847 }

   849 /*!

   850  * \internal

   851  * \brief Computes the classification mask of a matrix

   852  *

   853  * The mask is constructed from two bits per elements, with the lowest

   854  * two bits corresponding to the first element in the \c elem array of

   855  * the matrix.

   856  */

   857 static void m3gClassify(Matrix *mtx)

   858 {

   859     M3Guint mask = 0;

   860     const M3Gfloat *p;

   861     int i;

   863     M3G_ASSERT(mtx != NULL);

   864     M3G_ASSERT(!m3gIsClassified(mtx));

   866     p = mtx->elem;

   867     for (i = 0; i < 16; ++i) {

   868         M3Gfloat elem = *p++;

   869         mask |= (m3gElementClass(elem) << (i*2));

   870     }

   871     mtx->mask = mask;

   872     mtx->classified = M3G_TRUE;

   873 }

   875 /*!

   876  * \internal

   877  * \brief Sets matrix classification directly

   878  */

   879 static M3G_INLINE void m3gClassifyAs(M3Guint mask, Matrix *mtx)

   880 {

   881     M3G_ASSERT(mtx != NULL);

   882     mtx->mask = mask;

   883     mtx->classified = M3G_TRUE;

   884     mtx->complete = M3G_FALSE;

   885 }

   887 /*!

   888  * \internal

   889  * \brief Attempts to classify a matrix more precisely

   890  *

   891  * Tries to classify all "free" elements of a matrix into one of the

   892  * predefined constants to improve precision and performance in

   893  * subsequent calculations.

   894  */

   895 static void m3gSubClassify(Matrix *mtx)

   896 {

   897     M3G_ASSERT_PTR(mtx);

   898     M3G_ASSERT(m3gIsClassified(mtx));

   899     {

   900         const M3Gfloat *p = mtx->elem;

   901         M3Guint inMask = mtx->mask;

   902         M3Guint outMask = inMask;

   903         int i;

   905         for (i = 0; i < 16; ++i, inMask >>= 2) {

   906             M3Gfloat elem = *p++;

   907             if ((inMask & 0x03) == ELEM_ANY) {

   908                 outMask &= ~(0x03u << (i*2));

   909                 outMask |= (m3gElementClass(elem) << (i*2));

   910             }

   911         }

   912         mtx->mask = outMask;

   913     }

   914 }

   916 /*!

   917  * \internal

   918  * \brief Fills in the implicit values for a classified matrix

   919  */

   920 static void m3gFillClassifiedMatrix(Matrix *mtx)

   921 {

   922     int i;

   923     M3Guint mask;

   924     M3Gfloat *p;

   926     M3G_ASSERT(mtx != NULL);

   927     M3G_ASSERT(mtx->classified);

   928     M3G_ASSERT(!mtx->complete);

   930     mask = mtx->mask;

   931     p = mtx->elem;

   933     for (i = 0; i < 16; ++i, mask >>= 2) {

   934         unsigned elem = (mask & 0x03);

   935         switch (elem) {

   936         case ELEM_ZERO:         *p++ =  0.0f; break;

   937         case ELEM_ONE:          *p++ =  1.0f; break;

   938         case ELEM_MINUS_ONE:    *p++ = -1.0f; break;

   939         default:                ++p;

   940         }

   941     }

   942     mtx->complete = M3G_TRUE;

   943 }

   946 #if !defined(M3G_HW_FLOAT)

   947 /*!

   948  * \internal

   949  * \brief Performs one multiply-add of classified matrix elements

   950  *

   951  * \param amask element class of the first multiplicand

   952  * \param a     float value of the first multiplicand

   953  * \param bmask element class of the second multiplicand

   954  * \param b     float value of the second multiplicand

   955  * \param c     float value to add

   956  * \return a * b + c

   957  *

   958  * \notes inline, as only called from the matrix product function

   959  */

   960 static M3G_INLINE M3Gfloat m3gClassifiedMadd(

   961     const M3Gbitmask amask, const M3Gfloat *pa,

   962     const M3Gbitmask bmask, const M3Gfloat *pb,

   963     const M3Gfloat c)

   964 {

   965     /* Check for zero product to reduce the switch cases below */

   967     if (amask == ELEM_ZERO || bmask == ELEM_ZERO) {

   968         return c;

   969     }

   971     /* Branch based on the classification of a */

   973     switch (amask) {

   975     case ELEM_ANY:

   976         if (bmask == ELEM_ONE) {

   977             return m3gAdd(*pa, c);      /*  a * 1 + c  =  a + c  */

   978         }

   979         if (bmask == ELEM_MINUS_ONE) {

   980             return m3gSub(c, *pa);      /*  a * -1 + c = -a + c  */

   981         }

   982         return m3gMadd(*pa, *pb, c);    /*  a * b + c            */

   984     case ELEM_ONE:

   985         if (bmask == ELEM_ONE) {

   986             return m3gAdd(c, 1.f);      /*  1 * 1 + c  = 1 + c   */

   987         }

   988         if (bmask == ELEM_MINUS_ONE) {

   989             return m3gSub(c, 1.f);      /*  1 * -1 + c = -1 + c  */

   990         }

   991         return m3gAdd(*pb, c);          /*  1 * b + c  =  b + c  */

   993     case ELEM_MINUS_ONE:

   994         if (bmask == ELEM_ONE) {

   995             return m3gSub(c, 1.f);      /* -1 * 1 + c  = -1 + c  */

   996         }

   997         if (bmask == ELEM_MINUS_ONE) {

   998             return m3gAdd(c, 1.f);      /* -1 * -1 + c =  1 + c  */

   999         }

  1000         return m3gSub(c, *pb);          /* -1 * b + c  = -b + c  */

  1002     default:

  1003         M3G_ASSERT(M3G_FALSE);

  1004         return 0.0f;

  1005     }

  1006 }

  1007 #endif /*!defined(M3G_HW_FLOAT)*/

  1009 /*!

  1010  * \internal

  1011  * \brief Computes a generic 4x4 matrix product

  1012  */

  1013 static void m3gGenericMatrixProduct(Matrix *dst,

  1014                                     const Matrix *left, const Matrix *right)

  1015 {

  1016     M3G_ASSERT(dst != NULL && left != NULL && right != NULL);

  1018     {

  1019 #       if defined(M3G_HW_FLOAT)

  1020         if (!left->complete) {

  1021             m3gFillClassifiedMatrix((Matrix*)left);

  1022         }

  1023         if (!right->complete) {

  1024             m3gFillClassifiedMatrix((Matrix*)right);

  1025         }

  1026 #       else

  1027         const unsigned lmask = left->mask;

  1028         const unsigned rmask = right->mask;

  1029 #       endif

  1031 #if defined(M3G_HW_FLOAT_VFPV2)

  1032 		_m3gGenericMatrixProduct(dst, left, right);

  1033 #else

  1034         {

  1035             int row;

  1037             for (row = 0; row < 4; ++row) {

  1038                 int col;

  1039                 for (col = 0; col < 4; ++col) {

  1040                     int k;

  1041                     M3Gfloat a = 0;

  1042                     for (k = 0; k < 4; ++k) {

  1043                         M3Gint lidx = MELEM(row, k);

  1044                         M3Gint ridx = MELEM(k, col);

  1045 #                       if defined(M3G_HW_FLOAT)

  1046                         a = m3gMadd(left->elem[lidx], right->elem[ridx], a);

  1047 #                       else

  1048                         a = m3gClassifiedMadd((lmask >> (2 * lidx)) & 3,

  1049                                               &left->elem[lidx],

  1050                                               (rmask >> (2 * ridx)) & 3,

  1051                                               &right->elem[ridx],

  1052                                               a);

  1053 #                       endif /*!M3G_HW_FLOAT*/

  1054                     }

  1055                     M44F(dst, row, col) = a;

  1056                 }

  1057             }

  1058         }

  1059 #endif /*!M3G_HW_FLOAT_VFPV2*/

  1060     }

  1061     dst->complete = M3G_TRUE;

  1062     dst->classified = M3G_FALSE;

  1063 }

  1065 /*!

  1066  * \internal

  1067  * \brief Converts a float vector to 16-bit integers

  1068  *

  1069  * \param size   vector length

  1070  * \param vec    pointer to float vector

  1071  * \param scale  scale of output values, as the number of bits to

  1072  *               shift left to get actual values

  1073  * \param outVec output value vector

  1074  */

  1075 static void m3gFloatVecToShort(M3Gint size, const M3Gfloat *vec,

  1076                                M3Gint scale, M3Gshort *outVec)

  1077 {

  1078     const M3Guint *vecInt = (const M3Guint*) vec;

  1079     M3Gint i;

  1081     for (i = 0; i < size; ++i) {

  1082         M3Guint a = vecInt[i];

  1083         if ((a & ~SIGN_MASK) < (1 << 23)) {

  1084             *outVec++ = 0;

  1085         }

  1086         else {

  1087             M3Gint shift =

  1088                 scale - ((M3Gint)((vecInt[i] >> 23) & 0xFFu) - (127 + 23));

  1089             M3G_ASSERT(shift > 8); /* or the high bits will overflow */

  1091             if (shift > 23) {

  1092                 *outVec++ = 0;

  1093             }

  1094             else {

  1095                 M3Gint out =

  1096                     (M3Gint) (((a & MANTISSA_MASK) | LEADING_ONE) >> shift);

  1097                 if (a >> 31) {

  1098                     out = -out;

  1099                 }

  1100                 M3G_ASSERT(m3gInRange(out, -32767, 32767));

  1101                 *outVec++ = (M3Gshort) out;

  1102             }

  1103         }

  1104     }

  1105 }

  1107 /*----------------------------------------------------------------------

  1108  * Internal functions

  1109  *--------------------------------------------------------------------*/

  1111 /*--------------------------------------------------------------------*/

  1113 #if defined(M3G_SOFT_FLOAT)

  1115 #if !defined (M3G_BUILD_ARM)

  1117 static M3Gfloat m3gAdd(const M3Gfloat a, const M3Gfloat b)

  1118 {

  1119     return m3gFloatAdd(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));

  1120 }

  1122 static M3Gfloat m3gMul(const M3Gfloat a, const M3Gfloat b)

  1123 {

  1124     return m3gFloatMul(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));

  1125 }

  1127 #endif /* M3G_BUILD_ARM */

  1129 /*!

  1130  * \internal

  1131  * \brief Computes the reciprocal of the square root

  1132  *

  1133  * \param x a floating point value

  1134  * \return 1 / square root of \c x

  1135  */

  1136 static M3Gfloat m3gRcpSqrt(const M3Gfloat x)

  1137 {

  1138     /* Approximation followed by Newton-Raphson iteration a'la

  1139      * "Floating-point tricks" by J. Blinn, but we iterate several

  1140      * times to improve precision */

  1142     M3Gint i = (M3G_FLOAT_ONE + (M3G_FLOAT_ONE >> 1))

  1143         - (FLOAT_AS_UINT(x) >> 1);

  1144     M3Gfloat y = INT_AS_FLOAT(i);

  1145     for (i = 0; i < 3; ++i) {

  1146         y = m3gMul(y, m3gSub(1.5f, m3gHalf(m3gMul(x, m3gSquare(y)))));

  1147     }

  1148     return y;

  1149 }

  1151 /*!

  1152  * \internal

  1153  * \brief Computes the square root

  1154  *

  1155  * \param x a floating point value

  1156  * \return square root of \c x

  1157  */

  1158 static M3Gfloat m3gSqrt(const M3Gfloat x)

  1159 {

  1160     /* Approximation followed by Newton-Raphson iteration a'la

  1161      * "Floating-point tricks" by J. Blinn, but we iterate several

  1162      * times to improve precision */

  1164     M3Gint i = (FLOAT_AS_UINT(x) >> 1) + (M3G_FLOAT_ONE >> 1);

  1165     M3Gfloat y = INT_AS_FLOAT(i);

  1166     for (i = 0; i < 2; ++i) {

  1167         y = m3gDiv(m3gAdd(m3gSquare(y), x), m3gDouble(y));

  1168     }

  1169     return y;

  1170 }

  1172 #endif /* M3G_SOFT_FLOAT */

  1174 /*--------------------------------------------------------------------*/

  1176 #if defined(M3G_SOFT_FLOAT)

  1177 /*!

  1178  * \internal

  1179  */

  1180 static M3Gfloat m3gArcCos(const M3Gfloat x)

  1181 {

  1182     return (M3Gfloat) acos(x);

  1183 }

  1185 /*!

  1186  * \internal

  1187  */

  1188 static M3Gfloat m3gArcTan(const M3Gfloat y, const M3Gfloat x)

  1189 {

  1190     return (M3Gfloat) atan2(y, x);

  1191 }

  1193 /*!

  1194  * \internal

  1195  */

  1196 static M3Gfloat m3gCos(const M3Gfloat x)

  1197 {

  1198     return m3gSin(m3gAdd(x, HALF_PI));

  1199 }

  1201 /*!

  1202  * \internal

  1203  * \brief

  1204  */

  1205 static M3Gfloat m3gSin(const M3Gfloat x)

  1206 {

  1207     M3Gfloat f = x;

  1209     /* If x is greater than two pi, do a modulo operation to bring it

  1210      * back in range for the internal sine function */

  1212     if (m3gAbs(f) >= TWO_PI) {

  1213         f = m3gMul (f, (1.f / TWO_PI));

  1214         f = m3gFrac(f);

  1215         f = m3gMul (f, TWO_PI);

  1216     }

  1218     /* Compute the result, negating both the input value and the

  1219      * result if x was negative */

  1220     {

  1221         M3Guint i = FLOAT_AS_UINT(f);

  1222         M3Guint neg = (i & SIGN_MASK);

  1223         i ^= neg;

  1224         f = m3gSinFirstPeriod(INT_AS_FLOAT(i));

  1225         i = FLOAT_AS_UINT(f) ^ neg;

  1226         return INT_AS_FLOAT(i);

  1227     }

  1228 }

  1230 /*!

  1231  * \internal

  1232  */

  1233 static M3Gfloat m3gTan(const M3Gfloat x)

  1234 {

  1235     return (M3Gfloat) tan(x);

  1236 }

  1238 /*!

  1239  * \internal

  1240  */

  1241 static M3Gfloat m3gExp(const M3Gfloat a)

  1242 {

  1243     return (M3Gfloat) exp(a);

  1244 }

  1245 #endif /* M3G_SOFT_FLOAT */

  1247 /*!

  1248  * \brief Checks whether the bottom row of a matrix is 0 0 0 1

  1249  */

  1250 static M3Gbool m3gIsWUnity(const Matrix *mtx)

  1251 {

  1252     M3G_ASSERT_PTR(mtx);

  1254     if (!m3gIsClassified(mtx)) {

  1255         return (IS_ZERO(M44F(mtx, 3, 0)) &&

  1256                 IS_ZERO(M44F(mtx, 3, 1)) &&

  1257                 IS_ZERO(M44F(mtx, 3, 2)) &&

  1258                 IS_ONE (M44F(mtx, 3, 3)));

  1259     }

  1260     else {

  1261         return ((mtx->mask & 0xC0C0C0C0) == (ELEM_ONE << 30));

  1262     }

  1263 }

  1265 /*!

  1266  * \brief Makes a quaternion by exponentiating a quaternion logarithm

  1267  */

  1268 static void m3gExpQuat(Quat *quat, const Vec3 *qExp)

  1269 {

  1270     M3Gfloat theta;

  1272     M3G_ASSERT_PTR(quat);

  1273     M3G_ASSERT_PTR(qExp);

  1275     theta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(qExp->x),

  1276                                   m3gSquare(qExp->y)),

  1277                            m3gSquare(qExp->z)));

  1279     if (theta > EPSILON) {

  1280         M3Gfloat s = m3gMul(m3gSin(theta), m3gRcp(theta));

  1281         quat->x = m3gMul(qExp->x, s);

  1282         quat->y = m3gMul(qExp->y, s);

  1283         quat->z = m3gMul(qExp->z, s);

  1284         quat->w = m3gCos(theta);

  1285     }

  1286     else {

  1287         quat->x = quat->y = quat->z = 0.0f;

  1288         quat->w = 1.0f;

  1289     }

  1290 }

  1292 /*!

  1293  * \brief Natural logarithm of a unit quaternion.

  1294  */

  1295 static void m3gLogQuat(Vec3 *qLog, const Quat *quat)

  1296 {

  1297     M3Gfloat sinTheta = m3gSqrt(m3gNorm3((const float *) quat));

  1299     if (sinTheta > EPSILON) {

  1300         M3Gfloat s = m3gArcTan(sinTheta, quat->w) / sinTheta;

  1301         qLog->x = m3gMul(s, quat->x);

  1302         qLog->y = m3gMul(s, quat->y);

  1303         qLog->z = m3gMul(s, quat->z);

  1304     }

  1305     else {

  1306         qLog->x = qLog->y = qLog->z = 0.0f;

  1307     }

  1308 }

  1310 /*!

  1311  * \brief Make quaternion the "logarithmic difference" between two

  1312  * other quaternions.

  1313  */

  1314 static void m3gLogDiffQuat(Vec3 *logDiff,

  1315                            const Quat *from, const Quat *to)

  1316 {

  1317     Quat temp;

  1318     temp.x = m3gNegate(from->x);

  1319     temp.y = m3gNegate(from->y);

  1320     temp.z = m3gNegate(from->z);

  1321     temp.w =           from->w;

  1322     m3gMulQuat(&temp, to);

  1323     m3gLogQuat(logDiff, &temp);

  1324 }

  1326 /*!

  1327  * \brief Rounds a float to the nearest integer

  1328  *

  1329  * Overflows are clamped to the maximum or minimum representable

  1330  * value.

  1331  */

  1332 static M3Gint m3gRoundToInt(const M3Gfloat a)

  1333 {

  1334     M3Guint base = FLOAT_AS_UINT(a);

  1335     M3Gint signMask, expo;

  1337     /* Decompose the number into sign, exponent, and base number */

  1339     signMask = ((M3Gint) base >> 31);   /* -> 0 or 0xFFFFFFFF */

  1340     expo = (M3Gint)((base >> 23) & 0xFF) - 127;

  1342     /* First check for large values and return either the negative or

  1343      * the positive maximum integer in case of overflow.  The overflow

  1344      * check can be made on the exponent alone, as large floats are

  1345      * spaced several integer values apart so that nothing will

  1346      * overflow because of rounding later on */

  1348     if (expo >= 31) {

  1349         return (M3Gint)((1U << 31) - 1 + (((M3Guint) signMask) & 1));

  1350     }

  1352     /* Also check for underflow to avoid problems with shifting by

  1353      * more than 31 */

  1355     if (expo < -1) {

  1356         return 0;

  1357     }

  1359     /* Mask out the sign and exponent bits, shift the base number so

  1360      * that the lowest bit corresponds to one half, then add one

  1361      * (half) and shift to round to the closest integer. */

  1363     base = (base | LEADING_ONE) << 8;   /* shift mantissa to 1.31 */

  1364     base =  base >> (30 - expo);        /* integer value as 31.1 */

  1365     base = (base + 1) >> 1;             /* round to nearest 32.0 */

  1367     /* Factor in the sign (negate if originally negative) and

  1368      * return */

  1370     return ((M3Gint) base ^ signMask) - signMask;

  1371 }

  1373 /*!

  1374  * \brief Calculates ray-triangle intersection.

  1375  *

  1376  * http://www.acm.org/jgt/papers/MollerTrumbore97

  1377  */

  1378 static M3Gbool m3gIntersectTriangle(const Vec3 *orig, const Vec3 *dir,

  1379                                     const Vec3 *vert0, const Vec3 *vert1, const Vec3 *vert2,

  1380                                     Vec3 *tuv, M3Gint cullMode)

  1381 {

  1382     Vec3 edge1, edge2, tvec, pvec, qvec;

  1383     M3Gfloat det,inv_det;

  1385     /* find vectors for two edges sharing vert0 */

  1386     edge1 = *vert1;

  1387     edge2 = *vert2;

  1388     m3gSubVec3(&edge1, vert0);

  1389     m3gSubVec3(&edge2, vert0);

  1391     /* begin calculating determinant - also used to calculate U parameter */

  1392     m3gCross(&pvec, dir, &edge2);

  1394     /* if determinant is near zero, ray lies in plane of triangle */

  1395     det = m3gDot3(&edge1, &pvec);

  1397     if (cullMode == 0 && det <= 0) return M3G_FALSE;

  1398     if (cullMode == 1 && det >= 0) return M3G_FALSE;

  1400     if (det > -EPSILON && det < EPSILON)

  1401         return M3G_FALSE;

  1402     inv_det = m3gRcp(det);

  1404     /* calculate distance from vert0 to ray origin */

  1405     tvec = *orig;

  1406     m3gSubVec3(&tvec, vert0);

  1408     /* calculate U parameter and test bounds */

  1409     tuv->y = m3gMul(m3gDot3(&tvec, &pvec), inv_det);

  1410     if (tuv->y < 0.0f || tuv->y > 1.0f)

  1411         return M3G_FALSE;

  1413     /* prepare to test V parameter */

  1414     m3gCross(&qvec, &tvec, &edge1);

  1416     /* calculate V parameter and test bounds */

  1417     tuv->z = m3gMul(m3gDot3(dir, &qvec), inv_det);

  1418     if (tuv->z < 0.0f || m3gAdd(tuv->y, tuv->z) > 1.0f)

  1419         return M3G_FALSE;

  1421     /* calculate t, ray intersects triangle */

  1422     tuv->x = m3gMul(m3gDot3(&edge2, &qvec), inv_det);

  1424     return M3G_TRUE;

  1425 }

  1427 /*!

  1428  * \brief Calculates ray-box intersection.

  1429  *

  1430  * http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm

  1431  */

  1433 #define XO	orig->x

  1434 #define YO	orig->y

  1435 #define ZO	orig->z

  1436 #define XD	dir->x

  1437 #define YD	dir->y

  1438 #define ZD	dir->z

  1440 #define XL	box->min[0]

  1441 #define YL	box->min[1]

  1442 #define ZL	box->min[2]

  1443 #define XH	box->max[0]

  1444 #define YH	box->max[1]

  1445 #define ZH	box->max[2]

  1447 /*!

  1448  * \internal

  1449  * \brief Ray - bounding box intersection

  1450  *

  1451  */

  1452 static M3Gbool m3gIntersectBox(const Vec3 *orig, const Vec3 *dir, const AABB *box)

  1453 {

  1454 	M3Gfloat tnear = M3G_MIN_NEGATIVE_FLOAT;

  1455 	M3Gfloat tfar  = M3G_MAX_POSITIVE_FLOAT;

  1456 	M3Gfloat t1, t2, temp;

  1458 	/* X slab */

  1459 	if(XD != 0) {

  1460 		t1 = m3gSub(XL, XO) / XD;

  1461 		t2 = m3gSub(XH, XO) / XD;

  1463 		if(t1 > t2) {

  1464 			temp = t1;

  1465 			t1 = t2;

  1466 			t2 = temp;

  1467 		}

  1469 		if(t1 > tnear) tnear = t1;

  1470 		if(t2 < tfar) tfar = t2;

  1472 		if(tnear > tfar) return M3G_FALSE;

  1473 		if(tfar < 0) return M3G_FALSE;

  1474 	}

  1475 	else {

  1476 		if(XO > XH || XO < XL) return M3G_FALSE;

  1477 	}

  1479 	/* Y slab */

  1480 	if(YD != 0) {

  1481 		t1 = m3gSub(YL, YO) / YD;

  1482 		t2 = m3gSub(YH, YO) / YD;

  1484 		if(t1 > t2) {

  1485 			temp = t1;

  1486 			t1 = t2;

  1487 			t2 = temp;

  1488 		}

  1490 		if(t1 > tnear) tnear = t1;

  1491 		if(t2 < tfar) tfar = t2;

  1493 		if(tnear > tfar) return M3G_FALSE;

  1494 		if(tfar < 0) return M3G_FALSE;

  1495 	}

  1496 	else {

  1497 		if(YO > YH || YO < YL) return M3G_FALSE;

  1498 	}

  1500 	/* Z slab */

  1501 	if(ZD != 0) {

  1502 		t1 = m3gSub(ZL, ZO) / ZD;

  1503 		t2 = m3gSub(ZH, ZO) / ZD;

  1505 		if(t1 > t2) {

  1506 			temp = t1;

  1507 			t1 = t2;

  1508 			t2 = temp;

  1509 		}

  1511 		if(t1 > tnear) tnear = t1;

  1512 		if(t2 < tfar) tfar = t2;

  1514 		if(tnear > tfar) return M3G_FALSE;

  1515 		if(tfar < 0) return M3G_FALSE;

  1516 	}

  1517 	else {

  1518 		if(ZO > ZH || ZO < ZL) return M3G_FALSE;

  1519 	}

  1521 	return M3G_TRUE;

  1522 }

  1524 /*!

  1525  * \brief Calculates the intersection of two rectangles. Always fills

  1526  * the intersection result.

  1527  *

  1528  * \param dst   result of the intersection

  1529  * \param r1    rectangle 1

  1530  * \param r2    rectangle 2

  1531  */

  1532 static M3Gbool m3gIntersectRectangle(M3GRectangle *dst, M3GRectangle *r1, M3GRectangle *r2)

  1533 {

  1534     M3Gbool intersects = M3G_TRUE;

  1535     M3Gint min, max;

  1537     max = (r1->x) >= (r2->x) ? (r1->x) : (r2->x);

  1538     min = (r1->x + r1->width) <= (r2->x + r2->width) ? (r1->x + r1->width) : (r2->x + r2->width);

  1539     if ((min - max) < 0) intersects = M3G_FALSE;

  1540     dst->x = max;

  1541     dst->width = min - max;

  1543     max = (r1->y) >= (r2->y) ? (r1->y) : (r2->y);

  1544     min = (r1->y + r1->height) <= (r2->y + r2->height) ? (r1->y + r1->height) : (r2->y + r2->height);

  1545     if ((min - max) < 0) intersects = M3G_FALSE;

  1546     dst->y = max;

  1547     dst->height = min - max;

  1549     return intersects;

  1550 }

  1552 /*-------- float-to-int color conversions --------*/

  1554 static M3Guint m3gAlpha1f(M3Gfloat a)

  1555 {

  1556     M3Guint alpha = (M3Guint) m3gFloatToByte(a);

  1557     return (alpha << 24) | M3G_RGB_MASK;

  1558 }

  1560 static M3Guint m3gColor3f(M3Gfloat r, M3Gfloat g, M3Gfloat b)

  1561 {

  1562     return ((M3Guint) m3gFloatToByte(r) << 16)

  1563         |  ((M3Guint) m3gFloatToByte(g) <<  8)

  1564         |   (M3Guint) m3gFloatToByte(b)

  1565         |   M3G_ALPHA_MASK;

  1566 }

  1568 static M3Guint m3gColor4f(M3Gfloat r, M3Gfloat g, M3Gfloat b, M3Gfloat a)

  1569 {

  1570     return ((M3Guint) m3gFloatToByte(r) << 16)

  1571         |  ((M3Guint) m3gFloatToByte(g) <<  8)

  1572         |   (M3Guint) m3gFloatToByte(b)

  1573         |  ((M3Guint) m3gFloatToByte(a) << 24);

  1574 }

  1576 static void m3gFloatColor(M3Gint argb, M3Gfloat intensity, M3Gfloat *rgba)

  1577 {

  1578     /* NOTE we intentionally aim a bit high here -- some GL

  1579      * implementations may round down instead of closest */

  1581     const M3Gfloat oneOver255 = (M3Gfloat)(1.0001 / 255.0);

  1583 	rgba[0] = (M3Gfloat)((argb >> 16) & 0xFF);

  1584 	rgba[1] = (M3Gfloat)((argb >>  8) & 0xFF);

  1585 	rgba[2] = (M3Gfloat)((argb      ) & 0xFF);

  1586 	rgba[3] = (M3Gfloat)((argb >> 24) & 0xFF);

  1588     m3gScale4(rgba, m3gMul(oneOver255, intensity));

  1589 }

  1591 /*!

  1592  * \brief Converts the 3x3 submatrix of a matrix into fixed point

  1593  *

  1594  * The input matrix must be an affine one, i.e. the bottom row must be

  1595  * 0 0 0 1.  The output matrix is guaranteed to be such that it can be

  1596  * multiplied with a 16-bit 3-vector without overflowing, while using

  1597  * the 32-bit range optimally.

  1598  *

  1599  * \param mtx  the original matrix

  1600  * \param elem 9 shorts to hold the fixed point base numbers

  1601  * \return floating point exponent for the values in \c elem

  1602  *         (number of bits to shift left for actual values)

  1603  */

  1604 static M3Gint m3gGetFixedPoint3x3Basis(const Matrix *mtx, M3Gshort *elem)

  1605 {

  1606     M3Gint outExp;

  1607     M3Gint row, col;

  1608     const M3Guint *m;

  1610     if (!mtx->complete) {

  1611         m3gFillClassifiedMatrix((Matrix*) mtx);

  1612     }

  1613     m = (const M3Guint*) mtx->elem;

  1615     /* First, find the maximum exponent value in the whole matrix */

  1617     outExp = 0;

  1618     for (col = 0; col < 3; ++col) {

  1619         for (row = 0; row < 3; ++row) {

  1620             M3Gint element = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);

  1621             outExp = M3G_MAX(outExp, element);

  1622         }

  1623     }

  1624     outExp >>= 23;

  1626     /* Our candidate exponent is the maximum found plus 9, which is

  1627      * guaranteed to shift the maximum unsigned 24-bit mantissa (which

  1628      * always will have the high bit set) down to the signed 16-bit

  1629      * range */

  1631     outExp += 9;

  1633     /* Now proceed to sum each row and see what's the actual smallest

  1634      * exponent we can safely use without overflowing in a 16+16

  1635      * matrix-vector multiplication; this will win us one bit

  1636      * (doubling the precision) compared to the conservative approach

  1637      * of just shifting everything down by 10 bits */

  1639     for (row = 0; row < 3; ++row) {

  1641         /* Sum the absolute values on this row */

  1643         M3Gint rowSum = 0;

  1644         for (col = 0; col < 3; ++col) {

  1645             M3Gint a = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);

  1646             M3Gint shift = outExp - (a >> 23);

  1647             M3G_ASSERT(shift < 265);

  1649             if (shift < 24) {

  1650                 rowSum += ((a & MANTISSA_MASK) | LEADING_ONE) >> shift;

  1651             }

  1652         }

  1654         /* Now we have a 26-bit sum of the absolute values on this

  1655          * row, and shift that down until we fit the target range of

  1656          * [0, 65535].  Note that this still leaves *exactly* enough

  1657          * space for adding in an arbitrary 16-bit translation vector

  1658          * after multiplying with the matrix! */

  1660         while (rowSum >= (1 << 16)) {

  1661             rowSum >>= 1;

  1662             ++outExp;

  1663         }

  1664     }

  1666     /* De-bias the exponent, but add in an extra 23 to account for the

  1667      * decimal bits in the floating point mantissa values we started

  1668      * with (we're returning the exponent as "bits to shift left to

  1669      * get integers", so we're off by 23 from IEEE notation) */

  1671     outExp = (outExp - 127) - 23;

  1673     /* Finally, shift all the matrix elements to our final output

  1674      * precision */

  1676     for (col = 0; col < 3; ++col) {

  1677         m3gFloatVecToShort(3, mtx->elem + MELEM(0, col), outExp, elem);

  1678         elem += 3;

  1679     }

  1680     return outExp;

  1681 }

  1683 /*!

  1684  * \brief Gets the translation component of a matrix as fixed point

  1685  *

  1686  * \param mtx  the matrix

  1687  * \param elem 3 shorts to write the vector into

  1688  * \return floating point exponent for the values in \c elem

  1689  *         (number of bits to shift left for actual values)

  1690  */

  1691 static M3Gint m3gGetFixedPointTranslation(const Matrix *mtx, M3Gshort *elem)

  1692 {

  1693     const M3Guint *m;

  1695     M3G_ASSERT(m3gIsWUnity(mtx));

  1696     if (!mtx->complete) {

  1697         m3gFillClassifiedMatrix((Matrix*) mtx);

  1698     }

  1699     m = (const M3Guint*) &mtx->elem[MELEM(0, 3)];

  1701     /* Find the maximum exponent, then scale down by 9 bits from that

  1702      * to shift the unsigned 24-bit mantissas to the signed 16-bit

  1703      * range */

  1704     {

  1705         M3Gint outExp;

  1706         M3Guint maxElem = m[0] & ~SIGN_MASK;

  1707         maxElem = M3G_MAX(maxElem, m[1] & ~SIGN_MASK);

  1708         maxElem = M3G_MAX(maxElem, m[2] & ~SIGN_MASK);

  1710         outExp = (M3Gint)(maxElem >> 23) - (127 + 23) + 9;

  1711         m3gFloatVecToShort(3, mtx->elem + MELEM(0, 3), outExp, elem);

  1712         return outExp;

  1713     }

  1714 }

  1716 /*!

  1717  * \internal

  1718  * \brief Compute a bounding box enclosing two other boxes

  1719  *

  1720  * \param box   box to fit

  1721  * \param a     first box to enclose or NULL

  1722  * \param b     second box to enclose or NULL

  1723  *

  1724  * \note If both input boxes are NULL, the box is not modified.

  1725  */

  1726 static void m3gFitAABB(AABB *box, const AABB *a, const AABB *b)

  1727 {

  1728     int i;

  1730     M3G_ASSERT_PTR(box);

  1732     if (a) {

  1733         m3gValidateAABB(a);

  1734     }

  1735     if (b) {

  1736         m3gValidateAABB(b);

  1737     }

  1739     if (a && b) {

  1740         for (i = 0; i < 3; ++i) {

  1741             box->min[i] = M3G_MIN(a->min[i], b->min[i]);

  1742             box->max[i] = M3G_MAX(a->max[i], b->max[i]);

  1743         }

  1744     }

  1745     else if (a) {

  1746         *box = *a;

  1747     }

  1748     else if (b) {

  1749         *box = *b;

  1750     }

  1751 }

  1753 /*

  1754  * \internal

  1755  * \brief Transform an axis-aligned bounding box with a matrix

  1756  *

  1757  * This results in a box that encloses the transformed original box.

  1758  *

  1759  * Based on "Transforming Axis-Aligned Bounding Boxes" by Jim Arvo

  1760  * from Graphics Gems.

  1761  *

  1762  * \note The bottom row of the matrix is ignored in the transformation.

  1763  */

  1764 static void m3gTransformAABB(AABB *box, const Matrix *mtx)

  1765 {

  1766     M3Gfloat boxMin[3], boxMax[3];

  1767     M3Gfloat newMin[3], newMax[3];

  1769     m3gValidateAABB(box);

  1771     if (!mtx->complete) {

  1772         m3gFillClassifiedMatrix((Matrix*) mtx);

  1773     }

  1775     /* Get the original minimum and maximum extents as floats, and add

  1776      * the translation as the base for the transformed box */

  1777     {

  1778         int i;

  1779         for (i = 0; i < 3; ++i) {

  1780             boxMin[i] = box->min[i];

  1781             boxMax[i] = box->max[i];

  1782             newMin[i] = newMax[i] = M44F(mtx, i, 3);

  1783         }

  1784     }

  1786     /* Transform into the new minimum and maximum coordinates using

  1787      * the upper left 3x3 part of the matrix */

  1788     {

  1789         M3Gint row, col;

  1791         for (row = 0; row < 3; ++row) {

  1792             for (col = 0; col < 3; ++col) {

  1793                 M3Gfloat a = m3gMul(M44F(mtx, row, col), boxMin[col]);

  1794                 M3Gfloat b = m3gMul(M44F(mtx, row, col), boxMax[col]);

  1796                 if (a < b) {

  1797                     newMin[row] = m3gAdd(newMin[row], a);

  1798                     newMax[row] = m3gAdd(newMax[row], b);

  1799                 }

  1800                 else {

  1801                     newMin[row] = m3gAdd(newMin[row], b);

  1802                     newMax[row] = m3gAdd(newMax[row], a);

  1803                 }

  1804             }

  1805         }

  1806     }

  1808     /* Write back into the bounding box */

  1809     {

  1810         int i;

  1811         for (i = 0; i < 3; ++i) {

  1812             box->min[i] = newMin[i];

  1813             box->max[i] = newMax[i];

  1814         }

  1815     }

  1817     m3gValidateAABB(box);

  1818 }

  1820 #if defined(M3G_DEBUG)

  1821 /*!

  1822  * \brief

  1823  */

  1824 static void m3gValidateAABB(const AABB *aabb)

  1825 {

  1826     m3gValidateFloats(6, (float*) aabb);

  1827 }

  1828 #endif

  1830 /*----------------------------------------------------------------------

  1831  * Public functions

  1832  *--------------------------------------------------------------------*/

  1834 /*!

  1835  * \brief Linear interpolation of vectors

  1836  *

  1837  * \param size     number of components

  1838  * \param vec      output vector

  1839  * \param s        interpolation factor

  1840  * \param start    initial value

  1841  * \param end      target value

  1842  */

  1843 #if defined(M3G_HW_FLOAT_VFPV2)

  1845 __weak __asm void m3gLerp(M3Gint size,

  1846 				   M3Gfloat *vec,

  1847 				   M3Gfloat s,

  1848 				   const M3Gfloat *start, const M3Gfloat *end)

  1849 {

  1850 // r0 = size

  1851 // r1 = *vec

  1852 // r2 = s

  1853 // r3 = *start

  1854 // sp[0] = *end

  1856 		EXPORT	m3gLerp[DYNAMIC]

  1857 		CODE32

  1858 /*

  1859     M3Gfloat sCompl = 1.0 - s;

  1860     for (i = 0; i < size; ++i) {

  1861         vec[i] = sCompl*start[i] + s*end[i];

  1862     }

  1863 */

  1864 //

  1865 // if size = 0, return

  1866 //

  1867 		CMP		r0, #0

  1868 		BXEQ	lr

  1870 		FMSR	s0, r2

  1871 		MOV		r2, r3

  1872 		LDR		r3, [sp]

  1874 		FLDS	s1,=1.0

  1875 		STMFD	sp!, {r4-r5}

  1876 		FSUBS	s2, s1, s0			// sCompl = 1 - s

  1878 		FMRX	r4, FPSCR

  1879 		CMP		r0, #4

  1880 		BGT		_m3gLerp_over4Loop

  1882 //

  1883 // if 1 < size <= 4

  1884 //

  1885 // set vector STRIDE = 1, LENGTH = 4

  1886 		BIC		r12, r4, #0x00370000

  1887 		ORR		r12, #(3<<16)

  1888 		FMXR	FPSCR, r12

  1890 		FLDMIAS	r2!, {s4-s7}		// load the start[i] values

  1891 		FLDMIAS	r3!, {s8-s11}		// load the end[i] values

  1892 		FMULS	s12, s4, s2			// [s12-s15]  = [sCompl*start[0] .. sCompl*start[3]]

  1893 		FMACS	s12, s8, s0			// [s12-s15] += [    	s*end[0] ..        s*end[3]]

  1894 		CMP		r0, #1

  1895 		FSTS	s12, [r1]

  1896 		FSTSGT	s13, [r1, #4]

  1897 		CMP		r0, #3

  1898 		FSTSGE	s14, [r1, #8]

  1899 		FSTSGT	s15, [r1, #12]

  1901 		FMXR	FPSCR, r4

  1903 		LDMFD	sp!, {r4-r5}

  1905 		BX		lr

  1906 //

  1907 // if size > 4, interpolate 8 values in one loop

  1908 //

  1909 _m3gLerp_over4Loop

  1911 		FSTMFDD	sp!, {d8-d9}

  1912 		MOVS	r5, r0, ASR #3			// size/8

  1913 		SUB		r0, r0, r5, LSL #3		// tail length

  1915 // set vector STRIDE = 1, LENGTH = 8

  1916 		BIC		r12, r4, #0x00370000

  1917 		ORR		r12, #(7<<16)

  1918 		FMXR	FPSCR, r12

  1921 _m3gLerp_alignedLoop

  1923 		FLDMIASNE	r2!, {s8-s15}		// load the start[i] values

  1924 		FLDMIASNE	r3!, {s16-s23}		// load the end[i] values

  1925 		FMULSNE		s24, s8, s2			// [s16-s23]  = [ sCompl*start[0] sCompl*start[1] .. sCompl*start[7]]

  1926 		FMACSNE		s24, s16, s0		// [s16-s23] += [		 s*end[0]        s*end[1] ..		s*end[7]]

  1927 		FSTMIASNE	r1!, {s24-s31}

  1928 		SUBSNE		r5, #1

  1930 		BNE			_m3gLerp_alignedLoop

  1932 // process the 0-7 remaining values in the tail

  1934 		CMP			r0, #1

  1935 		FLDMIASGE	r2!, {s8-s15}

  1936 		FLDMIASGE	r3!, {s16-s23}

  1937 		FMULSGE		s24, s8, s2

  1938 		FMACSGE		s24, s16, s0

  1939 		FSTSGE		s24, [r1]

  1940 		FSTSGT		s25, [r1, #4]

  1941 		CMP			r0, #3

  1942 		FSTSGE		s26, [r1, #8]

  1943 		FSTSGT		s27, [r1, #12]

  1944 		CMP			r0, #5

  1945 		FSTSGE		s28, [r1, #16]

  1946 		FSTSGT		s29, [r1, #20]

  1947 		CMP			r0, #7

  1948 		FSTSEQ		s30, [r1, #24]

  1950 		FMXR	FPSCR, r4

  1952 		FLDMFDD	sp!, {d8-d9}

  1953 		LDMFD	sp!, {r4-r5}

  1955 		BX		lr

  1957 }

  1958 #else /* #if defined(M3G_HW_FLOAT_VFPV2) */

  1960 M3G_API void m3gLerp(M3Gint size,

  1961              M3Gfloat *vec,

  1962              M3Gfloat s,

  1963              const M3Gfloat *start, const M3Gfloat *end)

  1964 {

  1965     int i;

  1967     M3Gfloat sCompl = m3gSub(1.f, s);

  1968     for (i = 0; i < size; ++i) {

  1969         vec[i] = m3gAdd(m3gMul(sCompl, start[i]), m3gMul(s, end[i]));

  1970     }

  1971 }

  1972 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */

  1974 /*!

  1975  * \brief Hermite spline interpolation of vectors

  1976  *

  1977  * \param size      number of components

  1978  * \param vec       output vector

  1979  * \param s         interpolation factor

  1980  * \param start     start value vector

  1981  * \param end       end value vector

  1982  * \param tStart    start tangent vector

  1983  * \param tEnd      end tangent vector

  1984  */

  1985 M3G_API void m3gHermite(M3Gint size,

  1986                         M3Gfloat *vec,

  1987                         M3Gfloat s,

  1988                         const M3Gfloat *start, const M3Gfloat *end,

  1989                         const M3Gfloat *tStart, const M3Gfloat *tEnd)

  1990 {

  1991     M3Gfloat s2 = m3gSquare(s);

  1992     M3Gfloat s3 = m3gMul(s2, s);

  1993     int i;

  1995     for (i = 0; i < size; ++i) {

  1996         vec[i] =

  1997             m3gMadd(start[i],

  1998                     m3gAdd(m3gSub(m3gDouble(s3), m3gMul(3.f, s2)), 1.f),

  1999                     m3gMadd(end[i],

  2000                             m3gSub(m3gMul(3.f, s2), m3gDouble(s3)),

  2001                             m3gMadd(tStart[i],

  2002                                     m3gAdd(m3gSub(s3, m3gDouble(s2)), s),

  2003                                     m3gMul(tEnd[i],

  2004                                            m3gSub(s3, s2)))));

  2006     }

  2008     /*  vec = ( 2*s3 - 3*s2 + 1) * start

  2009             + (-2*s3 + 3*s2    ) * end

  2010             + (   s3 - 2*s2 + s) * tStart

  2011             + (   s3 -   s2    ) * tEnd;    */

  2012 }

  2014 /*--------------------------------------------------------------------*/

  2016 /*!

  2017  * \brief Sets a matrix to a copy of another matrix

  2018  */

  2019 M3G_API void m3gCopyMatrix(Matrix *dst, const Matrix *src)

  2020 {

  2021     M3G_ASSERT(dst != NULL && src != NULL);

  2022     *dst = *src;

  2023 }

  2025 /*!

  2026  * \brief Vector addition

  2027  */

  2028 M3G_API void m3gAddVec3(Vec3 *vec, const Vec3 *other)

  2029 {

  2030     vec->x = m3gAdd(vec->x, other->x);

  2031     vec->y = m3gAdd(vec->y, other->y);

  2032     vec->z = m3gAdd(vec->z, other->z);

  2033 }

  2035 /*!

  2036  * \brief Vector addition

  2037  */

  2038 M3G_API void m3gAddVec4(Vec4 *vec, const Vec4 *other)

  2039 {

  2040     vec->x = m3gAdd(vec->x, other->x);

  2041     vec->y = m3gAdd(vec->y, other->y);

  2042     vec->z = m3gAdd(vec->z, other->z);

  2043     vec->w = m3gAdd(vec->w, other->w);

  2044 }

  2046 /*!

  2047  * \brief Cross product of two 3D vectors expressed as 4D vectors

  2048  */

  2049 M3G_API void m3gCross(Vec3 *dst, const Vec3 *a, const Vec3 *b)

  2050 {

  2051     dst->x = m3gSub(m3gMul(a->y, b->z), m3gMul(a->z, b->y));

  2052     dst->y = m3gSub(m3gMul(a->z, b->x), m3gMul(a->x, b->z));

  2053     dst->z = m3gSub(m3gMul(a->x, b->y), m3gMul(a->y, b->x));

  2054 }

  2056 /*!

  2057  * \brief Dot product of two vectors

  2058  */

  2059 M3G_API M3Gfloat m3gDot3(const Vec3 *a, const Vec3 *b)

  2060 {

  2061     M3Gfloat d;

  2062     d = m3gMul(a->x, b->x);

  2063     d = m3gMadd(a->y, b->y, d);

  2064     d = m3gMadd(a->z, b->z, d);

  2065     return d;

  2066 }

  2068 /*!

  2069  * \brief Dot product of two vectors

  2070  */

  2071 M3G_API M3Gfloat m3gDot4(const Vec4 *a, const Vec4 *b)

  2072 {

  2073     M3Gfloat d;

  2074     d = m3gMul(a->x, b->x);

  2075     d = m3gMadd(a->y, b->y, d);

  2076     d = m3gMadd(a->z, b->z, d);

  2077     d = m3gMadd(a->w, b->w, d);

  2078     return d;

  2079 }

  2081 /*!

  2082  * \brief

  2083  */

  2084 M3G_API void m3gSetVec3(Vec3 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z)

  2085 {

  2086     M3G_ASSERT_PTR(v);

  2087     v->x = x;

  2088     v->y = y;

  2089     v->z = z;

  2090 }

  2092 /*!

  2093  * \brief

  2094  */

  2095 M3G_API void m3gSetVec4(Vec4 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z, M3Gfloat w)

  2096 {

  2097     M3G_ASSERT_PTR(v);

  2098     v->x = x;

  2099     v->y = y;

  2100     v->z = z;

  2101     v->w = w;

  2102 }

  2104 /*!

  2105  * \brief Vector subtraction

  2106  */

  2107 M3G_API void m3gSubVec3(Vec3 *vec, const Vec3 *other)

  2108 {

  2109     vec->x = m3gSub(vec->x, other->x);

  2110     vec->y = m3gSub(vec->y, other->y);

  2111     vec->z = m3gSub(vec->z, other->z);

  2112 }

  2114 /*!

  2115  * \brief Vector subtraction

  2116  */

  2117 M3G_API void m3gSubVec4(Vec4 *vec, const Vec4 *other)

  2118 {

  2119     vec->x = m3gSub(vec->x, other->x);

  2120     vec->y = m3gSub(vec->y, other->y);

  2121     vec->z = m3gSub(vec->z, other->z);

  2122     vec->w = m3gSub(vec->w, other->w);

  2123 }

  2125 /*!

  2126  * \brief Vector length

  2127  */

  2128 M3G_API M3Gfloat m3gLengthVec3(const Vec3 *vec)

  2129 {

  2130     return m3gSqrt(m3gAdd(m3gAdd(m3gSquare(vec->x),

  2131                                  m3gSquare(vec->y)),

  2132                           m3gSquare(vec->z)));

  2133 }

  2135 /*!

  2136  * \brief Vector scaling

  2137  */

  2138 M3G_API void m3gScaleVec3(Vec3 *vec, const M3Gfloat s)

  2139 {

  2140     vec->x = m3gMul(vec->x, s);

  2141     vec->y = m3gMul(vec->y, s);

  2142     vec->z = m3gMul(vec->z, s);

  2143 }

  2145 /*!

  2146  * \brief Vector scaling

  2147  */

  2148 M3G_API void m3gScaleVec4(Vec4 *vec, const M3Gfloat s)

  2149 {

  2150     vec->x = m3gMul(vec->x, s);

  2151     vec->y = m3gMul(vec->y, s);

  2152     vec->z = m3gMul(vec->z, s);

  2153     vec->w = m3gMul(vec->w, s);

  2154 }

  2156 /*!

  2157  * \brief Returns an angle-axis representation for a quaternion

  2158  *

  2159  * \note There are many, and this is not guaranteed to return a

  2160  * particular one

  2161  */

  2162 M3G_API void m3gGetAngleAxis(const Quat *quat, M3Gfloat *angle, Vec3 *axis)

  2163 {

  2164     M3Gfloat x, y, z, sinTheta;

  2166     M3G_ASSERT_PTR(quat);

  2167     M3G_ASSERT_PTR(angle);

  2168     M3G_ASSERT_PTR(axis);

  2170     x = quat->x;

  2171     y = quat->y;

  2172     z = quat->z;

  2174     sinTheta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(x), m3gSquare(y)),

  2175                               m3gSquare(z)));

  2177     if (sinTheta > EPSILON) {

  2178         M3Gfloat ooSinTheta = m3gRcp(sinTheta);

  2179         axis->x = m3gMul(x, ooSinTheta);

  2180         axis->y = m3gMul(y, ooSinTheta);

  2181         axis->z = m3gMul(z, ooSinTheta);

  2182     }

  2183     else {

  2184         /* return a valid axis even for no rotation */

  2185         axis->x = axis->y = 0.0f;

  2186         axis->z = 1.0f;

  2187     }

  2188     *angle = m3gMul(2.0f * RAD2DEG, m3gArcCos(quat->w));

  2189 }

  2191 /*!

  2192  * \brief Gets a single matrix column

  2193  */

  2194 M3G_API void m3gGetMatrixColumn(const Matrix *mtx, M3Gint col, Vec4 *dst)

  2195 {

  2196     if ((col & ~3) == 0) {

  2197         if (!mtx->complete) {

  2198             m3gFillClassifiedMatrix((Matrix*)mtx);

  2199         }

  2200         dst->x = M44F(mtx, 0, col);

  2201         dst->y = M44F(mtx, 1, col);

  2202         dst->z = M44F(mtx, 2, col);

  2203         dst->w = M44F(mtx, 3, col);

  2204     }

  2205     else {

  2206         M3G_ASSERT(M3G_FALSE);

  2207     }

  2208 }

  2210 /*!

  2211  * \brief Returns the floating point values of a matrix as consecutive

  2212  * columns

  2213  */

  2214 M3G_API void m3gGetMatrixColumns(const Matrix *mtx, M3Gfloat *dst)

  2215 {

  2216     M3G_ASSERT(mtx != NULL && dst != NULL);

  2218     if (!mtx->complete) {

  2219         m3gFillClassifiedMatrix((Matrix*)mtx);

  2220     }

  2221     m3gCopy(dst, mtx->elem, sizeof(mtx->elem));

  2222 }

  2224 /*!

  2225  * \brief Gets a single matrix row

  2226  */

  2227 M3G_API void m3gGetMatrixRow(const Matrix *mtx, M3Gint row, Vec4 *dst)

  2228 {

  2229     if ((row & ~3) == 0) {

  2230         if (!mtx->complete) {

  2231             m3gFillClassifiedMatrix((Matrix*)mtx);

  2232         }

  2233         dst->x = M44F(mtx, row, 0);

  2234         dst->y = M44F(mtx, row, 1);

  2235         dst->z = M44F(mtx, row, 2);

  2236         dst->w = M44F(mtx, row, 3);

  2237     }

  2238     else {

  2239         M3G_ASSERT(M3G_FALSE);

  2240     }

  2241 }

  2243 /*!

  2244  * \brief Returns the floating point values of a matrix as consecutive

  2245  * rows

  2246  */

  2247 M3G_API void m3gGetMatrixRows(const Matrix *mtx, M3Gfloat *dst)

  2248 {

  2249     M3G_ASSERT(mtx != NULL && dst != NULL);

  2251     if (!mtx->complete) {

  2252         m3gFillClassifiedMatrix((Matrix*)mtx);

  2253     }

  2254     {

  2255         int row;

  2256         for (row = 0; row < 4; ++row) {

  2257             *dst++ = mtx->elem[ 0 + row];

  2258             *dst++ = mtx->elem[ 4 + row];

  2259             *dst++ = mtx->elem[ 8 + row];

  2260             *dst++ = mtx->elem[12 + row];

  2261         }

  2262     }

  2263 }

  2265 /*!

  2266  * \brief Sets a matrix to identity

  2267  */

  2268 M3G_API void m3gIdentityMatrix(Matrix *mtx)

  2269 {

  2270     M3G_ASSERT(mtx != NULL);

  2271     m3gClassifyAs(MC_IDENTITY, mtx);

  2272 }

  2274 /*!

  2275  * \brief Sets a quaternion to identity

  2276  */

  2277 M3G_API void m3gIdentityQuat(Quat *quat)

  2278 {

  2279     M3G_ASSERT(quat != NULL);

  2280     quat->x = quat->y = quat->z = 0.0f;

  2281     quat->w = 1.0f;

  2282 }

  2284 /*!

  2285  * \brief Inverts a matrix

  2286  */

  2287 M3G_API M3Gbool m3gInvertMatrix(Matrix *mtx)

  2288 {

  2289     M3Gfloat *matrix;

  2290     M3Gint i;

  2291     M3Gfloat tmp[12];

  2292     M3Gfloat src[16];

  2293     M3Gfloat det;

  2295     M3G_ASSERT(mtx != NULL);

  2297     if (!m3gIsClassified(mtx)) {

  2298         m3gClassify(mtx);

  2299     }

  2301     /* Quick exit for identity */

  2303     if (mtx->mask == MC_IDENTITY) {

  2304         return M3G_TRUE;

  2305     }

  2307     /* Look for other common cases; these require that we have valid

  2308      * values in all the elements, so fill in the values first */

  2309     {

  2310         M3Guint mask = mtx->mask;

  2312         if (!mtx->complete) {

  2313             m3gFillClassifiedMatrix(mtx);

  2314         }

  2316         if ((mask | (0x3F << 24)) == MC_TRANSLATION) {

  2317             M44F(mtx, 0, 3) = m3gNegate(M44F(mtx, 0, 3));

  2318             M44F(mtx, 1, 3) = m3gNegate(M44F(mtx, 1, 3));

  2319             M44F(mtx, 2, 3) = m3gNegate(M44F(mtx, 2, 3));

  2320             mtx->mask = MC_TRANSLATION;

  2321             return M3G_TRUE;

  2322         }

  2323         if ((mask | 0x300C03) == MC_SCALING) {

  2324             if ((mask &  3       ) == 0 ||

  2325                 (mask & (3 << 10)) == 0 ||

  2326                 (mask & (3 << 20)) == 0) {

  2327                 return M3G_FALSE; /* zero scale for at least one axis */

  2328             }

  2329             M44F(mtx, 0, 0) = m3gRcp(M44F(mtx, 0, 0));

  2330             M44F(mtx, 1, 1) = m3gRcp(M44F(mtx, 1, 1));

  2331             M44F(mtx, 2, 2) = m3gRcp(M44F(mtx, 2, 2));

  2332             return M3G_TRUE;

  2333         }

  2334     }

  2336     /* Do a full 4x4 inversion as a last resort */

  2338 	matrix = mtx->elem;

  2340     /* transpose matrix */

  2341     for (i = 0; i < 4; i++) {

  2342         src[i] = matrix[i*4];

  2343         src[i+4] = matrix[i*4+1];

  2344         src[i+8] = matrix[i*4+2];

  2345         src[i+12] = matrix[i*4+3];

  2346     }

  2348     /* calculate pairs for first 8 elements (cofactors) */

  2349     tmp[0] = src[10]*src[15];

  2350     tmp[1] = src[11]*src[14];

  2351     tmp[2] = src[9]*src[15];

  2352     tmp[3] = src[11]*src[13];

  2353     tmp[4] = src[9]*src[14];

  2354     tmp[5] = src[10]*src[13];

  2355     tmp[6] = src[8]*src[15];

  2356     tmp[7] = src[11]*src[12];

  2357     tmp[8] = src[8]*src[14];

  2358     tmp[9] = src[10]*src[12];

  2359     tmp[10] = src[8]*src[13];

  2360     tmp[11] = src[9]*src[12];

  2362     /* calculate first 8 elements (cofactors) */

  2363     matrix[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];

  2364     matrix[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];

  2365     matrix[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];

  2366     matrix[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];

  2367     matrix[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];

  2368     matrix[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];

  2369     matrix[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];

  2370     matrix[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];

  2371     matrix[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];

  2372     matrix[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];

  2373     matrix[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];

  2374     matrix[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];

  2375     matrix[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];

  2376     matrix[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];

  2377     matrix[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];

  2378     matrix[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];

  2380     /* calculate pairs for second 8 elements (cofactors) */

  2381     tmp[0] = src[2]*src[7];

  2382     tmp[1] = src[3]*src[6];

  2383     tmp[2] = src[1]*src[7];

  2384     tmp[3] = src[3]*src[5];

  2385     tmp[4] = src[1]*src[6];

  2386     tmp[5] = src[2]*src[5];

  2387     tmp[6] = src[0]*src[7];

  2388     tmp[7] = src[3]*src[4];

  2389     tmp[8] = src[0]*src[6];

  2390     tmp[9] = src[2]*src[4];

  2391     tmp[10] = src[0]*src[5];

  2392     tmp[11] = src[1]*src[4];

  2394     /* calculate second 8 elements (cofactors) */

  2395     matrix[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];

  2396     matrix[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];

  2397     matrix[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];

  2398     matrix[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];

  2399     matrix[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];

  2400     matrix[10] -= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];

  2401     matrix[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];

  2402     matrix[11] -= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];

  2403     matrix[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];

  2404     matrix[12] -= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];

  2405     matrix[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];

  2406     matrix[13] -= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];

  2407     matrix[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];

  2408     matrix[14] -= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];

  2409     matrix[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];

  2410     matrix[15] -= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];

  2412     /* calculate determinant */

  2413     det = src[0]*matrix[0]+src[1]*matrix[1]+src[2]*matrix[2]+src[3]*matrix[3];

  2415     /* matrix has no inverse */

  2416     if (det == 0.0f) {

  2417         return M3G_FALSE;

  2418     }

  2420     /* calculate matrix inverse */

  2421     det = 1/det;

  2422     for (i = 0; i < 16; i++) {

  2423         matrix[i] *= det;

  2424     }

  2426     mtx->classified = M3G_FALSE;

  2427 	return M3G_TRUE;

  2428 }

  2430 /*!

  2431  * \brief Sets a matrix to the inverse of another matrix

  2432  */

  2433 M3G_API M3Gbool m3gMatrixInverse(Matrix *mtx, const Matrix *other)

  2434 {

  2435     M3G_ASSERT(mtx != NULL && other != NULL);

  2437     if (!m3gIsClassified(other)) {

  2438         m3gClassify((Matrix*)other);

  2439     }

  2441 	m3gCopyMatrix(mtx, other);

  2442 	return m3gInvertMatrix(mtx);

  2443 }

  2445 /*!

  2446  * \brief Sets a matrix to the transpose of another matrix

  2447  */

  2448 M3G_API void m3gMatrixTranspose(Matrix *mtx, const Matrix *other)

  2449 {

  2450     M3Gbyte i;

  2451     M3G_ASSERT(mtx != NULL && other != NULL);

  2453     if (!other->complete) {

  2454         m3gFillClassifiedMatrix((Matrix *)other);

  2455     }

  2457     for (i = 0; i < 4; i++) {

  2458         mtx->elem[i] = other->elem[i*4];

  2459         mtx->elem[i+4] = other->elem[i*4+1];

  2460         mtx->elem[i+8] = other->elem[i*4+2];

  2461         mtx->elem[i+12] = other->elem[i*4+3];

  2462     }

  2463     mtx->classified = M3G_FALSE;

  2464     mtx->complete = M3G_TRUE;

  2465 }

  2467 M3G_API M3Gbool m3gInverseTranspose(Matrix *mtx, const Matrix *other)

  2468 {

  2469     Matrix temp;

  2470     if (!m3gMatrixInverse(&temp, other)) {

  2471         return M3G_FALSE;

  2472     }

  2473     m3gMatrixTranspose(mtx, &temp);

  2474     return M3G_TRUE;

  2475 }

  2477 /*!

  2478  * \brief Sets a matrix to the product of two other matrices

  2479  *

  2480  * \note \c dst can not be either of \c left or \c right; if it is,

  2481  * the results are undefined

  2482  */

  2483 M3G_API void m3gMatrixProduct(Matrix *dst, const Matrix *left, const Matrix *right)

  2484 {

  2485     M3G_ASSERT_PTR(dst);

  2486     M3G_ASSERT_PTR(left);

  2487     M3G_ASSERT_PTR(right);

  2488     M3G_ASSERT(dst != left && dst != right);

  2490     /* Classify input matrices and take early exits for identities */

  2492     if (!m3gIsClassified(left)) {

  2493         m3gClassify((Matrix*)left);

  2494     }

  2495     if (left->mask == MC_IDENTITY) {

  2496         m3gCopyMatrix(dst, right);

  2497         return;

  2498     }

  2500     if (!m3gIsClassified(right)) {

  2501         m3gClassify((Matrix*)right);

  2502     }

  2503     if (right->mask == MC_IDENTITY) {

  2504         m3gCopyMatrix(dst, left);

  2505         return;

  2506     }

  2508     /* Special quick paths for 3x4 matrices */

  2510     if (m3gIsWUnity(left) && m3gIsWUnity(right)) {

  2512         /* Translation? */

  2514         if ((left->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {

  2516             if (left->mask != MC_TRANSLATION && !left->complete) {

  2517                 m3gFillClassifiedMatrix((Matrix*)left);

  2518             }

  2519             if (right->mask != MC_TRANSLATION && !right->complete) {

  2520                 m3gFillClassifiedMatrix((Matrix*)right);

  2521             }

  2523             m3gCopyMatrix(dst, right);

  2525             M44F(dst, 0, 3) = m3gAdd(M44F(left, 0, 3), M44F(dst, 0, 3));

  2526             M44F(dst, 1, 3) = m3gAdd(M44F(left, 1, 3), M44F(dst, 1, 3));

  2527             M44F(dst, 2, 3) = m3gAdd(M44F(left, 2, 3), M44F(dst, 2, 3));

  2529             dst->mask |= MC_TRANSLATION_PART;

  2530             return;

  2531         }

  2533         if ((right->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {

  2534             Vec4 tvec;

  2536             if (left->mask != MC_TRANSLATION && !left->complete) {

  2537                 m3gFillClassifiedMatrix((Matrix*)left);

  2538             }

  2539             if (right->mask != MC_TRANSLATION && !right->complete) {

  2540                 m3gFillClassifiedMatrix((Matrix*)right);

  2541             }

  2543             m3gCopyMatrix(dst, left);

  2545             m3gGetMatrixColumn(right, 3, &tvec);

  2546             m3gTransformVec4(dst, &tvec);

  2548             M44F(dst, 0, 3) = tvec.x;

  2549             M44F(dst, 1, 3) = tvec.y;

  2550             M44F(dst, 2, 3) = tvec.z;

  2552             dst->mask |= MC_TRANSLATION_PART;

  2553             return;

  2554         }

  2556     }

  2558     /* Compute product and set output classification */

  2560     m3gGenericMatrixProduct(dst, left, right);

  2561 }

  2563 /*!

  2564  * \brief Normalizes a quaternion

  2565  */

  2566 M3G_API void m3gNormalizeQuat(Quat *q)

  2567 {

  2568     M3Gfloat norm;

  2569     M3G_ASSERT_PTR(q);

  2571     norm = m3gNorm4(&q->x);

  2573     if (norm > EPSILON) {

  2574         norm = m3gRcpSqrt(norm);

  2575         m3gScale4(&q->x, norm);

  2576     }

  2577     else {

  2578         m3gIdentityQuat(q);

  2579     }

  2580 }

  2582 /*!

  2583  * \brief Normalizes a three-vector

  2584  */

  2585 M3G_API void m3gNormalizeVec3(Vec3 *v)

  2586 {

  2587     M3Gfloat norm;

  2588     M3G_ASSERT_PTR(v);

  2590     norm = m3gNorm3(&v->x);

  2592     if (norm > EPSILON) {

  2593         norm = m3gRcpSqrt(norm);

  2594         m3gScale3(&v->x, norm);

  2595     }

  2596     else {

  2597         m3gZero(v, sizeof(Vec3));

  2598     }

  2599 }

  2601 /*!

  2602  * \brief Normalizes a four-vector

  2603  */

  2604 M3G_API void m3gNormalizeVec4(Vec4 *v)

  2605 {

  2606     M3Gfloat norm;

  2607     M3G_ASSERT_PTR(v);

  2609     norm = m3gNorm4(&v->x);

  2611     if (norm > EPSILON) {

  2612         norm = m3gRcpSqrt(norm);

  2613         m3gScale4(&v->x, norm);

  2614     }

  2615     else {

  2616         m3gZero(v, sizeof(Vec4));

  2617     }

  2618 }

  2620 /*!

  2621  * \brief Multiplies a matrix from the right with another matrix

  2622  */

  2623 M3G_API void m3gPostMultiplyMatrix(Matrix *mtx, const Matrix *other)

  2624 {

  2625     Matrix temp;

  2626     M3G_ASSERT_PTR(mtx);

  2627     M3G_ASSERT_PTR(other);

  2629     m3gCopyMatrix(&temp, mtx);

  2630     m3gMatrixProduct(mtx, &temp, other);

  2631 }

  2633 /*!

  2634  * \brief Multiplies a matrix from the left with another matrix

  2635  */

  2636 M3G_API void m3gPreMultiplyMatrix(Matrix *mtx, const Matrix *other)

  2637 {

  2638     Matrix temp;

  2639     M3G_ASSERT_PTR(mtx);

  2640     M3G_ASSERT_PTR(other);

  2642     m3gCopyMatrix(&temp, mtx);

  2643     m3gMatrixProduct(mtx, other, &temp);

  2644 }

  2646 /*!

  2647  * \brief Multiplies a matrix with a rotation matrix.

  2648  */

  2649 M3G_API void m3gPostRotateMatrix(Matrix *mtx,

  2650                          M3Gfloat angle,

  2651                          M3Gfloat ax, M3Gfloat ay, M3Gfloat az)

  2652 {

  2653     Quat q;

  2654     m3gSetAngleAxis(&q, angle, ax, ay, az);

  2655     m3gPostRotateMatrixQuat(mtx, &q);

  2656 }

  2658 /*!

  2659  * \brief Multiplies a matrix with a translation matrix.

  2660  */

  2661 M3G_API void m3gPostRotateMatrixQuat(Matrix *mtx, const Quat *quat)

  2662 {

  2663     Matrix temp;

  2664     m3gQuatMatrix(&temp, quat);

  2665     m3gPostMultiplyMatrix(mtx, &temp);

  2666 }

  2668 /*!

  2669  * \brief Multiplies a matrix with a scale matrix.

  2670  */

  2671 M3G_API void m3gPostScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)

  2672 {

  2673     Matrix temp;

  2674     m3gScalingMatrix(&temp, sx, sy, sz);

  2675     m3gPostMultiplyMatrix(mtx, &temp);

  2676 }

  2678 /*!

  2679  * \brief Multiplies a matrix with a translation (matrix).

  2680  */

  2681 M3G_API void m3gPostTranslateMatrix(Matrix *mtx,

  2682                             M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)

  2683 {

  2684     Matrix temp;

  2685     m3gTranslationMatrix(&temp, tx, ty, tz);

  2686     m3gPostMultiplyMatrix(mtx, &temp);

  2687 }

  2689 /*!

  2690  * \brief Multiplies a matrix with a rotation matrix

  2691  */

  2692 M3G_API void m3gPreRotateMatrix(Matrix *mtx,

  2693                         M3Gfloat angle,

  2694                         M3Gfloat ax, M3Gfloat ay, M3Gfloat az)

  2695 {

  2696     Quat q;

  2697     m3gSetAngleAxis(&q, angle, ax, ay, az);

  2698     m3gPreRotateMatrixQuat(mtx, &q);

  2699 }

  2701 /*!

  2702  * \brief Multiplies a matrix with a quaternion rotation

  2703  */

  2704 M3G_API void m3gPreRotateMatrixQuat(Matrix *mtx, const Quat *quat)

  2705 {

  2706     Matrix temp;

  2707     m3gQuatMatrix(&temp, quat);

  2708     m3gPreMultiplyMatrix(mtx, &temp);

  2709 }

  2711 /*!

  2712  * \brief Multiplies a matrix with a scale matrix.

  2713  */

  2714 M3G_API void m3gPreScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)

  2715 {

  2716     Matrix temp;

  2717     m3gScalingMatrix(&temp, sx, sy, sz);

  2718     m3gPreMultiplyMatrix(mtx, &temp);

  2719 }

  2721 /*!

  2722  * \brief Multiplies a matrix with a translation (matrix).

  2723  */

  2724 M3G_API void m3gPreTranslateMatrix(Matrix *mtx,

  2725                            M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)

  2726 {

  2727     Matrix temp;

  2728     m3gTranslationMatrix(&temp, tx, ty, tz);

  2729     m3gPreMultiplyMatrix(mtx, &temp);

  2730 }

  2732 /*!

  2733  * \brief Converts a quaternion into a matrix

  2734  *

  2735  * The output is a matrix effecting the same rotation as the

  2736  * quaternion passed as input

  2737  */

  2738 M3G_API void m3gQuatMatrix(Matrix *mtx, const Quat *quat)

  2739 {

  2740     M3Gfloat qx = quat->x;

  2741     M3Gfloat qy = quat->y;

  2742     M3Gfloat qz = quat->z;

  2743     M3Gfloat qw = quat->w;

  2745     /* Quick exit for identity rotations */

  2747     if (IS_ZERO(qx) && IS_ZERO(qy) && IS_ZERO(qz)) {

  2748         m3gIdentityMatrix(mtx);

  2749         return;

  2750     }

  2752     {

  2753         /* Determine the rough type of the output matrix */

  2755         M3Guint type = MC_SCALING_ROTATION;

  2756         if (IS_ZERO(qz) && IS_ZERO(qy)) {

  2757             type = MC_X_ROTATION;

  2758         }

  2759         else if (IS_ZERO(qz) && IS_ZERO(qx)) {

  2760             type = MC_Y_ROTATION;

  2761         }

  2762         else if (IS_ZERO(qx) && IS_ZERO(qy)) {

  2763             type = MC_Z_ROTATION;

  2764         }

  2765         m3gClassifyAs(type, mtx);

  2767         /* Generate the non-constant parts of the matrix */

  2768         {

  2769             M3Gfloat wx, wy, wz, xx, yy, yz, xy, xz, zz;

  2771             xx = m3gMul(qx, qx);

  2772             xy = m3gMul(qx, qy);

  2773             xz = m3gMul(qx, qz);

  2774             yy = m3gMul(qy, qy);

  2775             yz = m3gMul(qy, qz);

  2776             zz = m3gMul(qz, qz);

  2777             wx = m3gMul(qw, qx);

  2778             wy = m3gMul(qw, qy);

  2779             wz = m3gMul(qw, qz);

  2781             if (type != MC_X_ROTATION) {

  2782                 M44F(mtx, 0, 0) = m3gSub(1.f, m3gDouble(m3gAdd(yy, zz)));

  2783                 M44F(mtx, 0, 1) =             m3gDouble(m3gSub(xy, wz));

  2784                 M44F(mtx, 0, 2) =             m3gDouble(m3gAdd(xz, wy));

  2785             }

  2787             if (type != MC_Y_ROTATION) {

  2788                 M44F(mtx, 1, 0) =             m3gDouble(m3gAdd(xy, wz));

  2789                 M44F(mtx, 1, 1) = m3gSub(1.f, m3gDouble(m3gAdd(xx, zz)));

  2790                 M44F(mtx, 1, 2) =             m3gDouble(m3gSub(yz, wx));

  2791             }

  2793             if (type != MC_Z_ROTATION) {

  2794                 M44F(mtx, 2, 0) =             m3gDouble(m3gSub(xz, wy));

  2795                 M44F(mtx, 2, 1) =             m3gDouble(m3gAdd(yz, wx));

  2796                 M44F(mtx, 2, 2) = m3gSub(1.f, m3gDouble(m3gAdd(xx, yy)));

  2797             }

  2798         }

  2799         m3gSubClassify(mtx);

  2800     }

  2801 }

  2803 /*!

  2804  * \brief Generates a scaling matrix

  2805  */

  2806 M3G_API void m3gScalingMatrix(

  2807     Matrix *mtx,

  2808     const M3Gfloat sx, const M3Gfloat sy, const M3Gfloat sz)

  2809 {

  2810     M3G_ASSERT_PTR(mtx);

  2811     M44F(mtx, 0, 0) = sx;

  2812     M44F(mtx, 1, 1) = sy;

  2813     M44F(mtx, 2, 2) = sz;

  2814     m3gClassifyAs(MC_SCALING, mtx);

  2815     m3gSubClassify(mtx);

  2816 }

  2818 /*!

  2819  * \brief Sets a quaternion to represent an angle-axis rotation

  2820  */

  2821 M3G_API void m3gSetAngleAxis(Quat *quat,

  2822                      M3Gfloat angle,

  2823                      M3Gfloat ax, M3Gfloat ay, M3Gfloat az)

  2824 {

  2825     m3gSetAngleAxisRad(quat, m3gMul(angle, M3G_DEG2RAD), ax, ay, az);

  2826 }

  2828 /*!

  2829  * \brief Sets a quaternion to represent an angle-axis rotation

  2830  */

  2831 M3G_API void m3gSetAngleAxisRad(Quat *quat,

  2832                         M3Gfloat angleRad,

  2833                         M3Gfloat ax, M3Gfloat ay, M3Gfloat az)

  2834 {

  2835     M3G_ASSERT_PTR(quat);

  2837     if (!IS_ZERO(angleRad)) {

  2838         M3Gfloat s;

  2839         M3Gfloat halfAngle = m3gHalf(angleRad);

  2841         s = m3gSin(halfAngle);

  2843         {

  2844             M3Gfloat sqrNorm = m3gMadd(ax, ax, m3gMadd(ay, ay, m3gMul(az, az)));

  2845             if (sqrNorm < 0.995f || sqrNorm > 1.005f) {

  2846                 if (sqrNorm > EPSILON) {

  2847                     M3Gfloat ooNorm = m3gRcpSqrt(sqrNorm);

  2848                     ax = m3gMul(ax, ooNorm);

  2849                     ay = m3gMul(ay, ooNorm);

  2850                     az = m3gMul(az, ooNorm);

  2851                 }

  2852                 else {

  2853                     ax = ay = az = 0.0f;

  2854                 }

  2855             }

  2856         }

  2858         quat->x = m3gMul(s, ax);

  2859         quat->y = m3gMul(s, ay);

  2860         quat->z = m3gMul(s, az);

  2861         quat->w = m3gCos(halfAngle);

  2862     }

  2863     else {

  2864         m3gIdentityQuat(quat);

  2865     }

  2866 }

  2868 /*!

  2869  * \brief Quaternion multiplication.

  2870  */

  2871 M3G_API void m3gMulQuat(Quat *quat, const Quat *other)

  2872 {

  2873     Quat q;

  2874     q = *quat;

  2876     quat->w = m3gMul(q.w, other->w) - m3gMul(q.x, other->x) - m3gMul(q.y, other->y) - m3gMul(q.z, other->z);

  2877     quat->x = m3gMul(q.w, other->x) + m3gMul(q.x, other->w) + m3gMul(q.y, other->z) - m3gMul(q.z, other->y);

  2878     quat->y = m3gMul(q.w, other->y) - m3gMul(q.x, other->z) + m3gMul(q.y, other->w) + m3gMul(q.z, other->x);

  2879     quat->z = m3gMul(q.w, other->z) + m3gMul(q.x, other->y) - m3gMul(q.y, other->x) + m3gMul(q.z, other->w);

  2880 }

  2882 /*!

  2883  * \brief Makes this quaternion represent the rotation from one 3D

  2884  * vector to another

  2885  */

  2886 M3G_API void m3gSetQuatRotation(Quat *quat,

  2887                                 const Vec3 *from, const Vec3 *to)

  2888 {

  2889     M3Gfloat cosAngle;

  2891     M3G_ASSERT_PTR(quat);

  2892     M3G_ASSERT_PTR(from);

  2893     M3G_ASSERT_PTR(to);

  2895     cosAngle = m3gDot3(from, to);

  2897     if (cosAngle > (1.0f - EPSILON)) {  /* zero angle */

  2898         m3gIdentityQuat(quat);

  2899         return;

  2900     }

  2901     else if (cosAngle > (1.0e-3f - 1.0f)) { /* normal case */

  2902         Vec3 axis;

  2903         m3gCross(&axis, from, to);

  2904         m3gSetAngleAxisRad(quat, m3gArcCos(cosAngle), axis.x, axis.y, axis.z);

  2905     }

  2906     else {

  2908         /* Opposite vectors; must generate an arbitrary perpendicular

  2909          * vector and use that as the rotation axis. Here, we try the

  2910          * Z axis first, and if that seems too parallel to the

  2911          * vectors, project the Y axis instead: Z is the only good

  2912          * choice for Z-constrained rotations, and Y by definition

  2913          * must be perpendicular to that. */

  2915         Vec3 axis, temp;

  2916         M3Gfloat s;

  2918         axis.x = axis.y = axis.z = 0.0f;

  2919         if (m3gAbs(from->z) < (1.0f - EPSILON)) {

  2920             axis.z = 1.0f;

  2921         }

  2922         else {

  2923             axis.y = 1.0f;

  2924         }

  2926         s = m3gDot3(&axis, from);

  2927         temp = *from;

  2928         m3gScaleVec3(&temp, s);

  2929         m3gSubVec3(&axis, &temp);

  2931         m3gSetAngleAxis(quat, 180.f, axis.x, axis.y, axis.z);

  2932     }

  2933 }

  2935 /*!

  2936  * \brief Sets the values of a matrix

  2937  *

  2938  */

  2939 M3G_API void m3gSetMatrixColumns(Matrix *mtx, const M3Gfloat *src)

  2940 {

  2941     M3G_ASSERT(mtx != NULL && src != NULL);

  2943     m3gCopy(mtx->elem, src, sizeof(mtx->elem));

  2944     mtx->classified = M3G_FALSE;

  2945     mtx->complete = M3G_TRUE;

  2946 }

  2948 /*!

  2949  * \brief Sets the values of a matrix

  2950  *

  2951  */

  2952 M3G_API void m3gSetMatrixRows(Matrix *mtx, const M3Gfloat *src)

  2953 {

  2954     M3G_ASSERT(mtx != NULL && src != NULL);

  2955     {

  2956         int row;

  2957         for (row = 0; row < 4; ++row) {

  2958             mtx->elem[ 0 + row] = *src++;

  2959             mtx->elem[ 4 + row] = *src++;

  2960             mtx->elem[ 8 + row] = *src++;

  2961             mtx->elem[12 + row] = *src++;

  2962         }

  2963     }

  2964     mtx->classified = M3G_FALSE;

  2965     mtx->complete = M3G_TRUE;

  2966 }

  2968 /*!

  2969  * \brief Transforms a 4-vector with a matrix

  2970  */

  2971 #if defined(M3G_HW_FLOAT_VFPV2)

  2973 __asm void _m3gTransformVec4(const Matrix *mtx, Vec4 *vec, M3Gint n)

  2974 {

  2976 		CODE32

  2978 		FSTMFDD	sp!, {d8-d11}

  2980 		FMRX	r3, FPSCR

  2981 		BIC		r12, r3, #0x00370000

  2982 		ORR		r12, #(3<<16)

  2983 		FMXR	FPSCR, r12

  2984 		CMP		r2, #4

  2986 		FLDMIAS	r0, {s4-s19}		// [mtx0  mtx1 ..]

  2987 		FLDMIAS	r1, {s0-s3}			// [vec.x  vec.y  vec.z  vec.w]

  2988 		FMULS	s20, s4, s0			// [s20-s23]  = [v.x*mtx0  v.x*mtx1  v.x*mtx2  v.x*mtx3 ]

  2989 		FMACS	s20, s8, s1			// [s20-s23] += [v.y*mtx4  v.y*mtx5  v.y*mtx6  v.y*mtx7 ]

  2990 		FMACS	s20, s12, s2		// [s20-s23] += [v.z*mtx8  v.z*mtx9  v.z*mtx10 v.z*mtx11]

  2991 		FMACS	s20, s16, s3		// [s20-s23] += [v.w*mtx12 v.w*mtx13 v.w*mtx14 v.w*mtx15]

  2992 		FSTMIAS		r1!, {s20-s22}

  2993 		FSTMIASEQ	r1, {s23}

  2995 		FMXR	FPSCR, r3

  2996 		FLDMFDD	sp!, {d8-d11}

  2998 		BX		lr

  2999 }

  3000 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */

  3002 M3G_API void m3gTransformVec4(const Matrix *mtx, Vec4 *vec)

  3003 {

  3004     M3Guint type;

  3005     M3G_ASSERT(mtx != NULL && vec != NULL);

  3007     if (!m3gIsClassified(mtx)) {

  3008         m3gClassify((Matrix*)mtx);

  3009     }

  3011     type = mtx->mask;

  3013     if (type == MC_IDENTITY) {

  3014         return;

  3015     }

  3016     else {

  3017         int n = m3gIsWUnity(mtx) ? 3 : 4;

  3019         if (!mtx->complete) {

  3020             m3gFillClassifiedMatrix((Matrix*)mtx);

  3021         }

  3022 #if	defined(M3G_HW_FLOAT_VFPV2)

  3023 		_m3gTransformVec4(mtx, vec, n);

  3024 #else

  3025         {

  3026             Vec4 v = *vec;

  3027             int i;

  3029             for (i = 0; i < n; ++i) {

  3030                 M3Gfloat d = m3gMul(M44F(mtx, i, 0), v.x);

  3031                 d = m3gMadd(M44F(mtx, i, 1), v.y, d);

  3032                 d = m3gMadd(M44F(mtx, i, 2), v.z, d);

  3033                 d = m3gMadd(M44F(mtx, i, 3), v.w, d);

  3034                 (&vec->x)[i] = d;

  3035             }

  3036         }

  3037 #endif

  3038     }

  3039 }

  3041 /*!

  3042  * \brief Generates a translation matrix

  3043  */

  3044 M3G_API void m3gTranslationMatrix(

  3045     Matrix *mtx,

  3046     const M3Gfloat tx, const M3Gfloat ty, const M3Gfloat tz)

  3047 {

  3048     M3G_ASSERT_PTR(mtx);

  3049     M44F(mtx, 0, 3) = tx;

  3050     M44F(mtx, 1, 3) = ty;

  3051     M44F(mtx, 2, 3) = tz;

  3052     m3gClassifyAs(MC_TRANSLATION, mtx);

  3053     m3gSubClassify(mtx);

  3054 }

  3056 /*!

  3057  * \brief Float vector assignment.

  3058  */

  3059 M3G_API void m3gSetQuat(Quat *quat, const M3Gfloat *vec)

  3060 {

  3061     quat->x = vec[0];

  3062     quat->y = vec[1];

  3063     quat->z = vec[2];

  3064     quat->w = vec[3];

  3065 }

  3067 /*!

  3068  * \brief Slerp between quaternions q0 and q1, storing the result in quat.

  3069  */

  3070 M3G_API void m3gSlerpQuat(Quat *quat,

  3071                   M3Gfloat s,

  3072                   const Quat *q0, const Quat *q1)

  3073 {

  3074     M3Gfloat s0, s1;

  3075     M3Gfloat cosTheta = m3gDot4((const Vec4 *)q0, (const Vec4 *)q1);

  3076     M3Gfloat oneMinusS = m3gSub(1.0f, s);

  3078     if (cosTheta > EPSILON - 1.0f) {

  3079         if (cosTheta < 1.0f - EPSILON) {

  3080             M3Gfloat theta    = m3gArcCos(cosTheta);

  3081             M3Gfloat sinTheta = m3gSin(theta);

  3082             s0 = m3gSin(m3gMul(oneMinusS, theta)) / sinTheta;

  3083             s1 = m3gSin(m3gMul(s, theta)) / sinTheta;

  3084         }

  3085         else {

  3086             /* For quaternions very close to each other, use plain

  3087                linear interpolation for numerical stability. */

  3088             s0 = oneMinusS;

  3089             s1 = s;

  3090         }

  3091         quat->x = m3gMadd(s0, q0->x, m3gMul(s1, q1->x));

  3092         quat->y = m3gMadd(s0, q0->y, m3gMul(s1, q1->y));

  3093         quat->z = m3gMadd(s0, q0->z, m3gMul(s1, q1->z));

  3094         quat->w = m3gMadd(s0, q0->w, m3gMul(s1, q1->w));

  3095     }

  3096     else {

  3097         /* Slerp is undefined if the two quaternions are (nearly)

  3098            opposite, so we just pick an arbitrary plane of

  3099            rotation here. */

  3101         quat->x = -q0->y;

  3102         quat->y =  q0->x;

  3103         quat->z = -q0->w;

  3104         quat->w =  q0->z;

  3106         s0 = m3gSin(m3gMul(oneMinusS, HALF_PI));

  3107         s1 = m3gSin(m3gMul(s, HALF_PI));

  3109         quat->x = m3gMadd(s0, q0->x, m3gMul(s1, quat->x));

  3110         quat->y = m3gMadd(s0, q0->y, m3gMul(s1, quat->y));

  3111         quat->z = m3gMadd(s0, q0->z, m3gMul(s1, quat->z));

  3112     }

  3113 }

  3115 /*!

  3116  * \brief Interpolate quaternions using spline, or "squad" interpolation.

  3117  */

  3118 M3G_API void m3gSquadQuat(Quat *quat,

  3119                   M3Gfloat s,

  3120                   const Quat *q0, const Quat *a, const Quat *b, const Quat *q1)

  3121 {

  3123     Quat temp0;

  3124     Quat temp1;

  3125     m3gSlerpQuat(&temp0, s, q0, q1);

  3126     m3gSlerpQuat(&temp1, s, a, b);

  3128     m3gSlerpQuat(quat, m3gDouble(m3gMul(s, m3gSub(1.0f, s))), &temp0, &temp1);

  3129 }

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--