First public contribution.
2 * Copyright (c) 2003 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of the License "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
14 * Description: Internal math function implementations
22 * \brief Internal math function implementations
26 #ifndef M3G_CORE_INCLUDE
27 # error included by m3g_core.c; do not compile separately.
31 #include "m3g_memory.h"
33 #if defined(M3G_SOFT_FLOAT)
37 /*----------------------------------------------------------------------
38 * Private types and definitions
39 *--------------------------------------------------------------------*/
41 /* Enumerated common matrix classifications */
42 #define MC_IDENTITY 0x40100401 // 01000000 00010000 00000100 00000001
43 #define MC_FRUSTUM 0x30BF0C03 // 00110000 10111111 00001100 00000011
44 #define MC_PERSPECTIVE 0x30B00C03 // 00110000 10110000 00001100 00000011
45 #define MC_ORTHO 0x7F300C03 // 01111111 00110000 00001100 00000011
46 #define MC_PARALLEL 0x70300C03 // 01110000 00110000 00001100 00000011
47 #define MC_SCALING_ROTATION 0x403F3F3F // 01000000 00111111 00111111 00111111
48 #define MC_SCALING 0x40300C03 // 01000000 00110000 00001100 00000011
49 #define MC_TRANSLATION 0x7F100401 // 01111111 00010000 00000100 00000001
50 #define MC_X_ROTATION 0x403C3C01 // 01000000 00111100 00111100 00000001
51 #define MC_Y_ROTATION 0x40330433 // 01000000 00110011 00000100 00110011
52 #define MC_Z_ROTATION 0x40100F0F // 01000000 00010000 00001111 00001111
53 #define MC_W_UNITY 0x7F3F3F3F // 01111111 00111111 00111111 00111111
54 #define MC_GENERIC 0xFFFFFFFF
56 /* Partial masks for individual matrix components */
57 #define MC_TRANSLATION_PART 0x3F000000 // 00111111 00000000 00000000 00000000
58 #define MC_SCALE_PART 0x00300C03 // 00000000 00110000 00001100 00000011
59 #define MC_SCALE_ROTATION_PART 0x003F3F3F // 00000000 00111111 00111111 00111111
61 /* Matrix element classification masks */
62 #define ELEM_ZERO 0x00
64 #define ELEM_MINUS_ONE 0x02
69 * \brief calculates the offset of a 4x4 matrix element in a linear
72 * \notes The current convention is column-major, as in OpenGL ES
74 #define MELEM(row, col) ((row) + ((col) << 2))
78 * \brief Macro for accessing 4x4 float matrix elements
80 * \param mtx pointer to the first element of the matrix
81 * \param row matrix row
82 * \param col matrix column
84 #define M44F(mtx, row, col) ((mtx)->elem[MELEM((row), (col))])
86 /*--------------------------------------------------------------------*/
88 /*----------------------------------------------------------------------
90 *--------------------------------------------------------------------*/
95 * \brief ARM VFPv2 implementation of 4x4 matrix multiplication.
97 * \param dst multiplication result
98 * \param left left-hand matrix
99 * \param right right-hand matrix
101 #if defined(M3G_HW_FLOAT_VFPV2)
103 __asm void _m3gGenericMatrixProduct(Matrix *dst,
104 const Matrix *left, const Matrix *right)
112 // save the VFP registers and set the vector STRIDE = 1, LENGTH = 4
114 FSTMFDD sp!, {d8-d15}
117 BIC r12, r3, #0x00370000
121 // left = [a0 a1 a2 a3 right = [b0 b1 b2 b3
122 // a4 a5 a6 a7 b4 b5 b6 b7
123 // a8 a9 a10 a11 b8 b9 b10 b11
124 // a12 a13 a14 a15] b12 b13 b14 b15]
126 // dst = [a0*b0+a4*b1+a8*b2+a12*b3 a1*b0+a5*b1+a9*b2+a13*b3 ..
127 // a0*b4+a4*b5+a8*b6+a12*b7 a1*b4+a5*b5+a9*b6+a13*b7 ..
131 FLDMIAS r1!, {s8-s23} // load the left matrix [a0-a15] to registers s8-s23
132 FLDMIAS r2!, {s0-s7} // load [b0-b7] of right matrix to registers s0-s7
133 FMULS s24, s8, s0 // [s24-s27] = [a0*b0 a1*b0 a2*b0 a3*b0]
134 FMULS s28, s8, s4 // [s28-s31] = [a0*b4 a1*b4 a2*b4 a3*b4]
135 FMACS s24, s12, s1 // [s24-s27] += [a4*b1 a5*b1 a6*b1 a7*b1]
136 FMACS s28, s12, s5 // [s28-s31] += [a4*b5 a5*b5 a6*b5 a7*b5]
137 FMACS s24, s16, s2 // [s24-s27] += [a8*b2 a9*b2 a10*b2 a11*b2]
138 FMACS s28, s16, s6 // [s28-s31] += [a8*b6 a9*b6 a10*b6 a11*b6]
139 FMACS s24, s20, s3 // [s24-s27] += [a12*b3 a13*b3 a14*b3 a15*b3]
140 FMACS s28, s20, s7 // [s28-s31] += [a12*b7 a13*b37a14*b7 a15*b7]
141 FLDMIAS r2!, {s0-s7} // load [b8-b15]
142 FSTMIAS r0!, {s24-s31} // write [dst0-dst7]
151 FSTMIAS r0!, {s24-s31}
153 // Restore the VFP registers and return.
157 FLDMFDD sp!, {d8-d15}
162 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
165 /*------------------ Elementary float ------------------*/
167 #if defined(M3G_SOFT_FLOAT)
169 #if defined (M3G_BUILD_ARM)
173 * \brief Floating point multiplication implementation for ARM.
175 __asm M3Gfloat m3gMul(const M3Gfloat a,
179 * Extract the exponents of the multiplicands and add them
180 * together. Flush to zero if either exponent or their sum
185 ands r2, r0, r12, lsl #23; // exit if e1 == 0
186 andnes r3, r1, r12, lsl #23; // exit if e2 == 0
187 subne r2, r2, #(127 << 23);
188 addnes r12, r2, r3; // exit if e1+e2-127 <= 0
193 * Determine the sign of the result. Note that the exponent
194 * may have overflowed to the sign bit, and thus the result
195 * may be an arbitrary negative value when it really should
200 orrmi r12, r12, #0x80000000;
203 * Multiply the mantissas. First shift the mantissas up to
204 * unsigned 1.31 fixed point, adding the leading "1" bit at
205 * the same time, and finally do a 32x32 -> 64 bit unsigned
206 * multiplication. The result is in unsigned 2.62 fixed point,
207 * representing the interval [1.0, 4.0).
211 orr r0, r2, r0, lsl #8;
212 orr r1, r2, r1, lsl #8;
213 umulls r2, r3, r0, r1;
216 * If the highest bit of the 64-bit result is set, then the
217 * mantissa lies in [2.0, 4.0) and needs to be renormalized.
218 * That is, the mantissa is shifted one bit to the right and
219 * the exponent correspondingly increased by 1. Note that we
220 * lose the leading "1" bit from the mantissa by adding it up
224 subpl r12, r12, #(1 << 23); // no overflow: exponent -= 1
225 addpl r0, r12, r3, lsr #7; // no overflow: exponent += 1
226 addmi r0, r12, r3, lsr #8; // overflow: exponent += 1
232 * \brief Floating point addition implementation for ARM.
234 __asm M3Gfloat m3gAdd(const M3Gfloat a,
238 * If the operands have opposite signs then this is not really
239 * an addition but a subtraction. Subtraction is much slower,
240 * so we have a separate code path for it, rather than trying
241 * to save space by handling both in the same place.
248 * Sort the operands such that the larger operand is in R0 and
249 * the smaller in R1. The sign bits do not affect the ordering,
250 * since they are known to be equal.
258 * Extract the exponent of the smaller operand into R2 and compute
259 * the difference between the larger and smaller exponent into R3.
260 * (Note that the sign bits cancel out in the subtraction.) The
261 * exponent delta tells how many bits the mantissa of the smaller
262 * operand must be shifted to the right in order to bring the
263 * operands into equal scale.
267 rsb r3, r2, r0, lsr #23;
270 * Check if the exponent delta is bigger than 23 bits, or if the
271 * smaller exponent is zero. In either case, exit the routine and
272 * return the larger operand (which is already in R0). Note that
273 * this means that subnormals are treated as zero.
276 rsbs r12, r3, #23; // N set, V clear if R3 > 23
277 tstpl r2, #0xff; // execute only if R3 <= 23
278 bxle lr; // exit if Z set or N != V
281 * Extract the mantissas and shift them up to unsigned 1.31 fixed
282 * point, inserting the implied leading "1" bit at the same time.
283 * Finally, align the decimal points and add up the mantissas.
286 mov r12, #0x80000000;
287 orr r0, r12, r0, lsl #8;
288 orr r1, r12, r1, lsl #8;
289 adds r0, r0, r1, lsr r3;
292 * Compute the final exponent by adding up the smaller exponent
293 * (R2), the exponent delta (R3), and the possible overflow bit
294 * (carry flag). Note that in case of overflow, the leading "1"
295 * has ended up in the carry flag and thus needs not be explicitly
296 * discarded. Finally, put the mantissa together with the sign and
300 adc r2, r2, r3; // r2 = smallExp + deltaExp + overflow
301 movcc r0, r0, lsl #1; // no overflow: discard leading 1
303 orr r0, r0, r2, lsl #23;
309 * Sort the operands such that the one with larger magnitude is
310 * in R0 and has the correct sign (the sign of the final result),
311 * while the smaller operand is in R1 with an inverted sign bit.
314 eor r1, r1, #0x80000000;
316 eormi r2, r2, #0x80000000;
321 * Extract the exponent of the smaller operand into R2 and compute
322 * the difference between the larger and smaller exponent into R3.
323 * (Note that the sign bits cancel out in the subtraction.) The
324 * exponent delta tells how many bits the mantissa of the smaller
325 * operand must be shifted to the right in order to bring the
326 * operands into equal scale.
330 rsbs r3, r2, r0, lsr #23;
333 * Check if the exponent delta is bigger than 31 bits, or if the
334 * smaller exponent is zero. In either case, exit the routine and
335 * return the larger operand (which is already in R0). Note that
336 * this means that subnormals are treated as zero.
339 rsbs r12, r3, #31; // N set, V clear if R3 > 31
340 tstpl r2, #0xff; // execute only if R3 <= 31
341 bxle lr; // exit if Z set or N != V
344 * Extract the mantissas and shift them up to unsigned 1.31 fixed
345 * point, inserting the implied leading "1" bit at the same time.
346 * Then align the decimal points and subtract the smaller mantissa
347 * from the larger one.
350 mov r12, #0x80000000;
351 orr r0, r12, r0, lsl #8;
352 orr r1, r12, r1, lsl #8;
353 subs r0, r0, r1, lsr r3;
356 * We split the range of possible results into three categories:
358 * 1. [1.0, 2.0) ==> no renormalization needed (highest bit set)
359 * 2. [0.5, 1.0) ==> only one left-shift needed
360 * 3. (0.0, 0.5) ==> multiple left-shifts needed
361 * 4. zero ==> just return
363 * Cases 1 and 2 are handled in the main code path. Cases 3 and 4
364 * are less common by far, so we branch to a separate code fragment
368 movpls r0, r0, lsl #1; // Cases 2,3,4: shift left
369 bpl _m3gSubRenormalize; // Cases 3 & 4: branch out
372 * Now we have normalized the mantissa such that the highest bit
373 * is set. Here we only need to adjust the exponent, if necessary,
374 * and put the pieces together. Note that we lose the leading "1"
375 * bit from the mantissa by adding it up with the exponent. We can
376 * also do proper rounding (towards nearest) instead of truncation
377 * (towards zero) at no extra cost!
380 sbc r3, r3, #1; // deltaExp -= 1 (Case 1) or 2 (Case 2)
381 add r2, r2, r3; // resultExp = smallExp + deltaExp
382 movs r0, r0, lsr #8; // shift mantissa, keep leading "1"
383 adc r0, r0, r2, lsl #23; // resultExp += 1, mantissa += carry
387 * Separate code path for cases 3 and 4 (see above). The mantissa
388 * has already been shifted up by one, but the exponent has not
389 * been correspondingly decreased. We also know that the highest
390 * bit is still not set, and that the carry flag is clear.
396 movccs r0, r0, lsl #2;
399 * If the carry flag is still not set, i.e. there were more than
400 * two leading zeros in the mantissa initially, loop until we
401 * find the highest set bit.
404 _m3gSubRenormalizeLoop
406 movccs r0, r0, lsl #1;
407 bcc _m3gSubRenormalizeLoop;
410 * Now the leading "1" is in the carry flag, so we can just add up
411 * the exponent and mantissa as usual, doing proper rounding at
412 * the same time. However, cases where the exponent goes negative
413 * (that is, underflows) must be detected and flushed to zero.
418 adc r0, r0, r3, lsl #23;
424 #else /* M3G_BUILD_ARM */
428 * \brief Floating point addition implementation
431 static M3G_INLINE M3Gfloat m3gFloatAdd(const M3Guint aBits,
434 M3Guint large, small, signMask;
436 /* Early exits for underflow cases */
438 large = (M3Gint)(aBits & ~SIGN_MASK);
439 if (large <= 0x00800000) {
440 return INT_AS_FLOAT(bBits);
442 small = (M3Gint)(bBits & ~SIGN_MASK);
443 if (small <= 0x00800000) {
444 return INT_AS_FLOAT(aBits);
447 /* Swap the numbers so that "large" really is larger; the unsigned
448 * (or de-signed) bitmasks for floats are nicely monotonous, so we
449 * can compare directly. Also store the sign of the larger number
450 * for future reference. */
456 signMask = (bBits & SIGN_MASK);
459 signMask = (aBits & SIGN_MASK);
463 M3Guint res, overflow;
464 M3Guint resExp, expDelta;
466 /* Store the larger exponent as our candidate result exponent,
467 * and compute the difference between the exponents */
469 resExp = (large >> 23);
470 expDelta = resExp - (small >> 23);
472 /* Take an early exit if the change would be insignificant;
473 * this also guards against odd results from shifting by more
474 * than 31 (undefined in C) */
476 if (expDelta >= 24) {
477 res = large | signMask;
478 return INT_AS_FLOAT(res);
481 /* Convert the mantissas into shifted integers, and shift the
482 * smaller number to the same scale with the larger one. */
484 large = (large & MANTISSA_MASK) | LEADING_ONE;
485 small = (small & MANTISSA_MASK) | LEADING_ONE;
487 M3G_ASSERT(large >= small);
489 /* Check whether we're really adding or subtracting the
490 * smaller number, and branch to slightly different routines
493 if (((aBits ^ bBits) & SIGN_MASK) == 0) {
495 /* Matching signs; just add the numbers and check for
496 * overflow, shifting to compensate as necessary. */
500 overflow = (res >> 24);
506 /* Different signs, so let's subtract the smaller value;
507 * also check that we're not subtracting a number from
508 * itself (so we don't have to normalize a zero below) */
510 if (small == large) {
511 return 0.0f; /* x - x = 0 */
514 res = (large << 8) - (small << 8);
516 /* Renormalize the number by shifting until we've got the
517 * high bit in place */
519 while ((res >> 24) == 0) {
523 while ((res >> 31) == 0) {
530 /* Flush to zero in case of over/underflow of the exponent */
536 /* Compose the final number into "res"; note that we pull in
537 * the sign of the original larger number, which should still
540 res &= MANTISSA_MASK;
541 res |= (resExp << 23);
544 return INT_AS_FLOAT(res);
550 * \brief Floating point multiplication implementation
552 static M3G_INLINE M3Gfloat m3gFloatMul(const M3Guint aBits,
557 /* Early exits for underflow and multiplication by zero */
559 a = (aBits & ~SIGN_MASK);
560 if (a <= 0x00800000) {
564 b = (bBits & ~SIGN_MASK);
565 if (b <= 0x00800000) {
570 M3Guint res, exponent, overflow;
572 /* Compute the exponent of the result, assuming the mantissas
573 * don't overflow; then mask out the original exponents */
575 exponent = (a >> 23) + (b >> 23) - 127;
579 /* Compute the new mantissa from:
581 * (1 + a)(1 + b) = ab + a + b + 1
583 * First shift the mantissas from 0.23 down to 0.16 for the
584 * multiplication, then shift back to 0.23 for adding in the
585 * "a + b + 1" part of the equation. */
587 res = (a >> 7) * (b >> 7); /* "ab" at 0.32 */
588 res = (res >> 9) + a + b + LEADING_ONE;
590 /* Add the leading one, then normalize the result by checking
591 * the overflow bit and dividing by two if necessary */
593 overflow = (res >> 24);
595 exponent += overflow;
597 /* Flush to zero in case of over/underflow of the exponent */
599 if (exponent >= 255) {
603 /* Compose the final number into "res" */
605 res &= MANTISSA_MASK;
606 res |= (exponent << 23);
607 res |= (aBits ^ bBits) & SIGN_MASK;
609 return INT_AS_FLOAT(res);
613 #endif /* M3G_BUILD_ARM */
617 * \brief Computes the signed fractional part of a floating point
620 * \param x floating point value
621 * \return x signed fraction of x in ]-1, 1[
623 static M3Gfloat m3gFrac(M3Gfloat x)
625 /* Quick exit for -1 < x < 1 */
627 if (m3gAbs(x) < 1.0f) {
631 /* Shift the mantissa to the proper place, mask out the integer
632 * part, and renormalize */
634 M3Guint ix = FLOAT_AS_UINT(x);
635 M3Gint expo = ((ix >> 23) & 0xFF) - 127;
636 M3G_ASSERT(expo >= 0);
638 /* The decimal part will always be zero for large values with
639 * exponents over 24 */
646 /* Shift the integer part out of the mantissa and see what
649 M3Guint base = (ix & MANTISSA_MASK) | LEADING_ONE;
650 base = (base << expo) & MANTISSA_MASK;
652 /* Quick exit (and guard against infinite looping) for
659 /* We now have an exponent of 0 (i.e. no shifting), but
660 * must renormalize to get a set bit in place of the
661 * hidden (implicit one) bit */
665 while ((base >> 19) == 0) {
669 while ((base >> 23) == 0) {
674 /* Compose the final number */
677 (base & MANTISSA_MASK) |
678 ((expo + 127) << 23) |
680 return INT_AS_FLOAT(ix);
685 #endif /* M3G_SOFT_FLOAT */
687 #if defined(M3G_DEBUG)
690 * \brief Checks for NaN or infinity in a floating point input
692 static void m3gValidateFloats(int n, float *p)
695 M3G_ASSERT(EXPONENT(*p) < 120);
700 # define m3gValidateFloats(n, p)
703 /*------------------ Trigonometry and exp ----------*/
706 #if defined(M3G_SOFT_FLOAT)
709 * \brief Sine for the first quadrant
711 * \param x floating point value in [0, PI/2]
712 * \return sine of \c x
714 static M3Gfloat m3gSinFirstQuadrant(M3Gfloat x)
716 M3Guint bits = FLOAT_AS_UINT(x);
718 if (bits <= 0x3ba3d70au) /* 0.005f */
721 static const M3Gfloat sinTermLut[4] = {
728 M3Gfloat xx = m3gSquare(x);
732 for (i = 0; i < 4; ++i) {
733 x = m3gMul(x, m3gMul(xx, sinTermLut[i]));
734 sinx = m3gAdd(sinx, x);
740 #endif /* M3G_SOFT_FLOAT */
742 #if defined(M3G_SOFT_FLOAT)
745 * \brief Computes sine for the first period
747 * \param x floating point value in [0, 2PI]
750 static M3Gfloat m3gSinFirstPeriod(const M3Gfloat x)
752 M3G_ASSERT(x >= 0 && x <= TWO_PI);
755 return m3gNegate(m3gSinFirstQuadrant(x >= ONE_AND_HALF_PI ?
759 return m3gSinFirstQuadrant((x >= HALF_PI) ? m3gSub(PI, x) : x);
761 #endif /* M3G_SOFT_FLOAT */
763 /*------------- Float vs. int conversion helpers -------------*/
767 * \brief Scales and clamps a float to unsigned byte range
769 static M3G_INLINE M3Gint m3gFloatToByte(const M3Gfloat a)
771 return m3gRoundToInt(m3gMul(255.f, m3gClampFloat(a, 0.f, 1.f)));
774 /*------------------ Vector helpers ------------------*/
778 * \brief Computes the norm of a floating-point 3-vector
780 static M3G_INLINE M3Gfloat m3gNorm3(const M3Gfloat *v)
782 return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),
788 * \brief Computes the norm of a floating-point 4-vector
790 static M3G_INLINE M3Gfloat m3gNorm4(const M3Gfloat *v)
792 return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),
793 m3gAdd(m3gSquare(v[2]), m3gSquare(v[3])));
798 * \brief Scales a floating-point 3-vector
800 static M3G_INLINE void m3gScale3(M3Gfloat *v, M3Gfloat s)
802 v[0] = m3gMul(v[0], s);
803 v[1] = m3gMul(v[1], s);
804 v[2] = m3gMul(v[2], s);
809 * \brief Scales a floating-point 4-vector
811 static M3G_INLINE void m3gScale4(M3Gfloat *v, M3Gfloat s)
813 v[0] = m3gMul(v[0], s);
814 v[1] = m3gMul(v[1], s);
815 v[2] = m3gMul(v[2], s);
816 v[3] = m3gMul(v[3], s);
820 /*------------------ Matrices ------------------*/
825 static M3G_INLINE M3Gbool m3gIsClassified(const Matrix *mtx)
827 M3G_ASSERT(mtx != NULL);
828 return (M3Gbool) mtx->classified;
833 * \brief Returns a classification for a single floating point number
835 static M3G_INLINE M3Guint m3gElementClass(const M3Gfloat x)
840 else if (IS_ONE(x)) {
843 else if (IS_MINUS_ONE(x)) {
844 return ELEM_MINUS_ONE;
851 * \brief Computes the classification mask of a matrix
853 * The mask is constructed from two bits per elements, with the lowest
854 * two bits corresponding to the first element in the \c elem array of
857 static void m3gClassify(Matrix *mtx)
863 M3G_ASSERT(mtx != NULL);
864 M3G_ASSERT(!m3gIsClassified(mtx));
867 for (i = 0; i < 16; ++i) {
868 M3Gfloat elem = *p++;
869 mask |= (m3gElementClass(elem) << (i*2));
872 mtx->classified = M3G_TRUE;
877 * \brief Sets matrix classification directly
879 static M3G_INLINE void m3gClassifyAs(M3Guint mask, Matrix *mtx)
881 M3G_ASSERT(mtx != NULL);
883 mtx->classified = M3G_TRUE;
884 mtx->complete = M3G_FALSE;
889 * \brief Attempts to classify a matrix more precisely
891 * Tries to classify all "free" elements of a matrix into one of the
892 * predefined constants to improve precision and performance in
893 * subsequent calculations.
895 static void m3gSubClassify(Matrix *mtx)
898 M3G_ASSERT(m3gIsClassified(mtx));
900 const M3Gfloat *p = mtx->elem;
901 M3Guint inMask = mtx->mask;
902 M3Guint outMask = inMask;
905 for (i = 0; i < 16; ++i, inMask >>= 2) {
906 M3Gfloat elem = *p++;
907 if ((inMask & 0x03) == ELEM_ANY) {
908 outMask &= ~(0x03u << (i*2));
909 outMask |= (m3gElementClass(elem) << (i*2));
918 * \brief Fills in the implicit values for a classified matrix
920 static void m3gFillClassifiedMatrix(Matrix *mtx)
926 M3G_ASSERT(mtx != NULL);
927 M3G_ASSERT(mtx->classified);
928 M3G_ASSERT(!mtx->complete);
933 for (i = 0; i < 16; ++i, mask >>= 2) {
934 unsigned elem = (mask & 0x03);
936 case ELEM_ZERO: *p++ = 0.0f; break;
937 case ELEM_ONE: *p++ = 1.0f; break;
938 case ELEM_MINUS_ONE: *p++ = -1.0f; break;
942 mtx->complete = M3G_TRUE;
946 #if !defined(M3G_HW_FLOAT)
949 * \brief Performs one multiply-add of classified matrix elements
951 * \param amask element class of the first multiplicand
952 * \param a float value of the first multiplicand
953 * \param bmask element class of the second multiplicand
954 * \param b float value of the second multiplicand
955 * \param c float value to add
958 * \notes inline, as only called from the matrix product function
960 static M3G_INLINE M3Gfloat m3gClassifiedMadd(
961 const M3Gbitmask amask, const M3Gfloat *pa,
962 const M3Gbitmask bmask, const M3Gfloat *pb,
965 /* Check for zero product to reduce the switch cases below */
967 if (amask == ELEM_ZERO || bmask == ELEM_ZERO) {
971 /* Branch based on the classification of a */
976 if (bmask == ELEM_ONE) {
977 return m3gAdd(*pa, c); /* a * 1 + c = a + c */
979 if (bmask == ELEM_MINUS_ONE) {
980 return m3gSub(c, *pa); /* a * -1 + c = -a + c */
982 return m3gMadd(*pa, *pb, c); /* a * b + c */
985 if (bmask == ELEM_ONE) {
986 return m3gAdd(c, 1.f); /* 1 * 1 + c = 1 + c */
988 if (bmask == ELEM_MINUS_ONE) {
989 return m3gSub(c, 1.f); /* 1 * -1 + c = -1 + c */
991 return m3gAdd(*pb, c); /* 1 * b + c = b + c */
994 if (bmask == ELEM_ONE) {
995 return m3gSub(c, 1.f); /* -1 * 1 + c = -1 + c */
997 if (bmask == ELEM_MINUS_ONE) {
998 return m3gAdd(c, 1.f); /* -1 * -1 + c = 1 + c */
1000 return m3gSub(c, *pb); /* -1 * b + c = -b + c */
1003 M3G_ASSERT(M3G_FALSE);
1007 #endif /*!defined(M3G_HW_FLOAT)*/
1011 * \brief Computes a generic 4x4 matrix product
1013 static void m3gGenericMatrixProduct(Matrix *dst,
1014 const Matrix *left, const Matrix *right)
1016 M3G_ASSERT(dst != NULL && left != NULL && right != NULL);
1019 # if defined(M3G_HW_FLOAT)
1020 if (!left->complete) {
1021 m3gFillClassifiedMatrix((Matrix*)left);
1023 if (!right->complete) {
1024 m3gFillClassifiedMatrix((Matrix*)right);
1027 const unsigned lmask = left->mask;
1028 const unsigned rmask = right->mask;
1031 #if defined(M3G_HW_FLOAT_VFPV2)
1032 _m3gGenericMatrixProduct(dst, left, right);
1037 for (row = 0; row < 4; ++row) {
1039 for (col = 0; col < 4; ++col) {
1042 for (k = 0; k < 4; ++k) {
1043 M3Gint lidx = MELEM(row, k);
1044 M3Gint ridx = MELEM(k, col);
1045 # if defined(M3G_HW_FLOAT)
1046 a = m3gMadd(left->elem[lidx], right->elem[ridx], a);
1048 a = m3gClassifiedMadd((lmask >> (2 * lidx)) & 3,
1050 (rmask >> (2 * ridx)) & 3,
1053 # endif /*!M3G_HW_FLOAT*/
1055 M44F(dst, row, col) = a;
1059 #endif /*!M3G_HW_FLOAT_VFPV2*/
1061 dst->complete = M3G_TRUE;
1062 dst->classified = M3G_FALSE;
1067 * \brief Converts a float vector to 16-bit integers
1069 * \param size vector length
1070 * \param vec pointer to float vector
1071 * \param scale scale of output values, as the number of bits to
1072 * shift left to get actual values
1073 * \param outVec output value vector
1075 static void m3gFloatVecToShort(M3Gint size, const M3Gfloat *vec,
1076 M3Gint scale, M3Gshort *outVec)
1078 const M3Guint *vecInt = (const M3Guint*) vec;
1081 for (i = 0; i < size; ++i) {
1082 M3Guint a = vecInt[i];
1083 if ((a & ~SIGN_MASK) < (1 << 23)) {
1088 scale - ((M3Gint)((vecInt[i] >> 23) & 0xFFu) - (127 + 23));
1089 M3G_ASSERT(shift > 8); /* or the high bits will overflow */
1096 (M3Gint) (((a & MANTISSA_MASK) | LEADING_ONE) >> shift);
1100 M3G_ASSERT(m3gInRange(out, -32767, 32767));
1101 *outVec++ = (M3Gshort) out;
1107 /*----------------------------------------------------------------------
1108 * Internal functions
1109 *--------------------------------------------------------------------*/
1111 /*--------------------------------------------------------------------*/
1113 #if defined(M3G_SOFT_FLOAT)
1115 #if !defined (M3G_BUILD_ARM)
1117 static M3Gfloat m3gAdd(const M3Gfloat a, const M3Gfloat b)
1119 return m3gFloatAdd(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));
1122 static M3Gfloat m3gMul(const M3Gfloat a, const M3Gfloat b)
1124 return m3gFloatMul(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));
1127 #endif /* M3G_BUILD_ARM */
1131 * \brief Computes the reciprocal of the square root
1133 * \param x a floating point value
1134 * \return 1 / square root of \c x
1136 static M3Gfloat m3gRcpSqrt(const M3Gfloat x)
1138 /* Approximation followed by Newton-Raphson iteration a'la
1139 * "Floating-point tricks" by J. Blinn, but we iterate several
1140 * times to improve precision */
1142 M3Gint i = (M3G_FLOAT_ONE + (M3G_FLOAT_ONE >> 1))
1143 - (FLOAT_AS_UINT(x) >> 1);
1144 M3Gfloat y = INT_AS_FLOAT(i);
1145 for (i = 0; i < 3; ++i) {
1146 y = m3gMul(y, m3gSub(1.5f, m3gHalf(m3gMul(x, m3gSquare(y)))));
1153 * \brief Computes the square root
1155 * \param x a floating point value
1156 * \return square root of \c x
1158 static M3Gfloat m3gSqrt(const M3Gfloat x)
1160 /* Approximation followed by Newton-Raphson iteration a'la
1161 * "Floating-point tricks" by J. Blinn, but we iterate several
1162 * times to improve precision */
1164 M3Gint i = (FLOAT_AS_UINT(x) >> 1) + (M3G_FLOAT_ONE >> 1);
1165 M3Gfloat y = INT_AS_FLOAT(i);
1166 for (i = 0; i < 2; ++i) {
1167 y = m3gDiv(m3gAdd(m3gSquare(y), x), m3gDouble(y));
1172 #endif /* M3G_SOFT_FLOAT */
1174 /*--------------------------------------------------------------------*/
1176 #if defined(M3G_SOFT_FLOAT)
1180 static M3Gfloat m3gArcCos(const M3Gfloat x)
1182 return (M3Gfloat) acos(x);
1188 static M3Gfloat m3gArcTan(const M3Gfloat y, const M3Gfloat x)
1190 return (M3Gfloat) atan2(y, x);
1196 static M3Gfloat m3gCos(const M3Gfloat x)
1198 return m3gSin(m3gAdd(x, HALF_PI));
1205 static M3Gfloat m3gSin(const M3Gfloat x)
1209 /* If x is greater than two pi, do a modulo operation to bring it
1210 * back in range for the internal sine function */
1212 if (m3gAbs(f) >= TWO_PI) {
1213 f = m3gMul (f, (1.f / TWO_PI));
1215 f = m3gMul (f, TWO_PI);
1218 /* Compute the result, negating both the input value and the
1219 * result if x was negative */
1221 M3Guint i = FLOAT_AS_UINT(f);
1222 M3Guint neg = (i & SIGN_MASK);
1224 f = m3gSinFirstPeriod(INT_AS_FLOAT(i));
1225 i = FLOAT_AS_UINT(f) ^ neg;
1226 return INT_AS_FLOAT(i);
1233 static M3Gfloat m3gTan(const M3Gfloat x)
1235 return (M3Gfloat) tan(x);
1241 static M3Gfloat m3gExp(const M3Gfloat a)
1243 return (M3Gfloat) exp(a);
1245 #endif /* M3G_SOFT_FLOAT */
1248 * \brief Checks whether the bottom row of a matrix is 0 0 0 1
1250 static M3Gbool m3gIsWUnity(const Matrix *mtx)
1252 M3G_ASSERT_PTR(mtx);
1254 if (!m3gIsClassified(mtx)) {
1255 return (IS_ZERO(M44F(mtx, 3, 0)) &&
1256 IS_ZERO(M44F(mtx, 3, 1)) &&
1257 IS_ZERO(M44F(mtx, 3, 2)) &&
1258 IS_ONE (M44F(mtx, 3, 3)));
1261 return ((mtx->mask & 0xC0C0C0C0) == (ELEM_ONE << 30));
1266 * \brief Makes a quaternion by exponentiating a quaternion logarithm
1268 static void m3gExpQuat(Quat *quat, const Vec3 *qExp)
1272 M3G_ASSERT_PTR(quat);
1273 M3G_ASSERT_PTR(qExp);
1275 theta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(qExp->x),
1276 m3gSquare(qExp->y)),
1277 m3gSquare(qExp->z)));
1279 if (theta > EPSILON) {
1280 M3Gfloat s = m3gMul(m3gSin(theta), m3gRcp(theta));
1281 quat->x = m3gMul(qExp->x, s);
1282 quat->y = m3gMul(qExp->y, s);
1283 quat->z = m3gMul(qExp->z, s);
1284 quat->w = m3gCos(theta);
1287 quat->x = quat->y = quat->z = 0.0f;
1293 * \brief Natural logarithm of a unit quaternion.
1295 static void m3gLogQuat(Vec3 *qLog, const Quat *quat)
1297 M3Gfloat sinTheta = m3gSqrt(m3gNorm3((const float *) quat));
1299 if (sinTheta > EPSILON) {
1300 M3Gfloat s = m3gArcTan(sinTheta, quat->w) / sinTheta;
1301 qLog->x = m3gMul(s, quat->x);
1302 qLog->y = m3gMul(s, quat->y);
1303 qLog->z = m3gMul(s, quat->z);
1306 qLog->x = qLog->y = qLog->z = 0.0f;
1311 * \brief Make quaternion the "logarithmic difference" between two
1312 * other quaternions.
1314 static void m3gLogDiffQuat(Vec3 *logDiff,
1315 const Quat *from, const Quat *to)
1318 temp.x = m3gNegate(from->x);
1319 temp.y = m3gNegate(from->y);
1320 temp.z = m3gNegate(from->z);
1322 m3gMulQuat(&temp, to);
1323 m3gLogQuat(logDiff, &temp);
1327 * \brief Rounds a float to the nearest integer
1329 * Overflows are clamped to the maximum or minimum representable
1332 static M3Gint m3gRoundToInt(const M3Gfloat a)
1334 M3Guint base = FLOAT_AS_UINT(a);
1335 M3Gint signMask, expo;
1337 /* Decompose the number into sign, exponent, and base number */
1339 signMask = ((M3Gint) base >> 31); /* -> 0 or 0xFFFFFFFF */
1340 expo = (M3Gint)((base >> 23) & 0xFF) - 127;
1342 /* First check for large values and return either the negative or
1343 * the positive maximum integer in case of overflow. The overflow
1344 * check can be made on the exponent alone, as large floats are
1345 * spaced several integer values apart so that nothing will
1346 * overflow because of rounding later on */
1349 return (M3Gint)((1U << 31) - 1 + (((M3Guint) signMask) & 1));
1352 /* Also check for underflow to avoid problems with shifting by
1359 /* Mask out the sign and exponent bits, shift the base number so
1360 * that the lowest bit corresponds to one half, then add one
1361 * (half) and shift to round to the closest integer. */
1363 base = (base | LEADING_ONE) << 8; /* shift mantissa to 1.31 */
1364 base = base >> (30 - expo); /* integer value as 31.1 */
1365 base = (base + 1) >> 1; /* round to nearest 32.0 */
1367 /* Factor in the sign (negate if originally negative) and
1370 return ((M3Gint) base ^ signMask) - signMask;
1374 * \brief Calculates ray-triangle intersection.
1376 * http://www.acm.org/jgt/papers/MollerTrumbore97
1378 static M3Gbool m3gIntersectTriangle(const Vec3 *orig, const Vec3 *dir,
1379 const Vec3 *vert0, const Vec3 *vert1, const Vec3 *vert2,
1380 Vec3 *tuv, M3Gint cullMode)
1382 Vec3 edge1, edge2, tvec, pvec, qvec;
1383 M3Gfloat det,inv_det;
1385 /* find vectors for two edges sharing vert0 */
1388 m3gSubVec3(&edge1, vert0);
1389 m3gSubVec3(&edge2, vert0);
1391 /* begin calculating determinant - also used to calculate U parameter */
1392 m3gCross(&pvec, dir, &edge2);
1394 /* if determinant is near zero, ray lies in plane of triangle */
1395 det = m3gDot3(&edge1, &pvec);
1397 if (cullMode == 0 && det <= 0) return M3G_FALSE;
1398 if (cullMode == 1 && det >= 0) return M3G_FALSE;
1400 if (det > -EPSILON && det < EPSILON)
1402 inv_det = m3gRcp(det);
1404 /* calculate distance from vert0 to ray origin */
1406 m3gSubVec3(&tvec, vert0);
1408 /* calculate U parameter and test bounds */
1409 tuv->y = m3gMul(m3gDot3(&tvec, &pvec), inv_det);
1410 if (tuv->y < 0.0f || tuv->y > 1.0f)
1413 /* prepare to test V parameter */
1414 m3gCross(&qvec, &tvec, &edge1);
1416 /* calculate V parameter and test bounds */
1417 tuv->z = m3gMul(m3gDot3(dir, &qvec), inv_det);
1418 if (tuv->z < 0.0f || m3gAdd(tuv->y, tuv->z) > 1.0f)
1421 /* calculate t, ray intersects triangle */
1422 tuv->x = m3gMul(m3gDot3(&edge2, &qvec), inv_det);
1428 * \brief Calculates ray-box intersection.
1430 * http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
1440 #define XL box->min[0]
1441 #define YL box->min[1]
1442 #define ZL box->min[2]
1443 #define XH box->max[0]
1444 #define YH box->max[1]
1445 #define ZH box->max[2]
1449 * \brief Ray - bounding box intersection
1452 static M3Gbool m3gIntersectBox(const Vec3 *orig, const Vec3 *dir, const AABB *box)
1454 M3Gfloat tnear = M3G_MIN_NEGATIVE_FLOAT;
1455 M3Gfloat tfar = M3G_MAX_POSITIVE_FLOAT;
1456 M3Gfloat t1, t2, temp;
1460 t1 = m3gSub(XL, XO) / XD;
1461 t2 = m3gSub(XH, XO) / XD;
1469 if(t1 > tnear) tnear = t1;
1470 if(t2 < tfar) tfar = t2;
1472 if(tnear > tfar) return M3G_FALSE;
1473 if(tfar < 0) return M3G_FALSE;
1476 if(XO > XH || XO < XL) return M3G_FALSE;
1481 t1 = m3gSub(YL, YO) / YD;
1482 t2 = m3gSub(YH, YO) / YD;
1490 if(t1 > tnear) tnear = t1;
1491 if(t2 < tfar) tfar = t2;
1493 if(tnear > tfar) return M3G_FALSE;
1494 if(tfar < 0) return M3G_FALSE;
1497 if(YO > YH || YO < YL) return M3G_FALSE;
1502 t1 = m3gSub(ZL, ZO) / ZD;
1503 t2 = m3gSub(ZH, ZO) / ZD;
1511 if(t1 > tnear) tnear = t1;
1512 if(t2 < tfar) tfar = t2;
1514 if(tnear > tfar) return M3G_FALSE;
1515 if(tfar < 0) return M3G_FALSE;
1518 if(ZO > ZH || ZO < ZL) return M3G_FALSE;
1525 * \brief Calculates the intersection of two rectangles. Always fills
1526 * the intersection result.
1528 * \param dst result of the intersection
1529 * \param r1 rectangle 1
1530 * \param r2 rectangle 2
1532 static M3Gbool m3gIntersectRectangle(M3GRectangle *dst, M3GRectangle *r1, M3GRectangle *r2)
1534 M3Gbool intersects = M3G_TRUE;
1537 max = (r1->x) >= (r2->x) ? (r1->x) : (r2->x);
1538 min = (r1->x + r1->width) <= (r2->x + r2->width) ? (r1->x + r1->width) : (r2->x + r2->width);
1539 if ((min - max) < 0) intersects = M3G_FALSE;
1541 dst->width = min - max;
1543 max = (r1->y) >= (r2->y) ? (r1->y) : (r2->y);
1544 min = (r1->y + r1->height) <= (r2->y + r2->height) ? (r1->y + r1->height) : (r2->y + r2->height);
1545 if ((min - max) < 0) intersects = M3G_FALSE;
1547 dst->height = min - max;
1552 /*-------- float-to-int color conversions --------*/
1554 static M3Guint m3gAlpha1f(M3Gfloat a)
1556 M3Guint alpha = (M3Guint) m3gFloatToByte(a);
1557 return (alpha << 24) | M3G_RGB_MASK;
1560 static M3Guint m3gColor3f(M3Gfloat r, M3Gfloat g, M3Gfloat b)
1562 return ((M3Guint) m3gFloatToByte(r) << 16)
1563 | ((M3Guint) m3gFloatToByte(g) << 8)
1564 | (M3Guint) m3gFloatToByte(b)
1568 static M3Guint m3gColor4f(M3Gfloat r, M3Gfloat g, M3Gfloat b, M3Gfloat a)
1570 return ((M3Guint) m3gFloatToByte(r) << 16)
1571 | ((M3Guint) m3gFloatToByte(g) << 8)
1572 | (M3Guint) m3gFloatToByte(b)
1573 | ((M3Guint) m3gFloatToByte(a) << 24);
1576 static void m3gFloatColor(M3Gint argb, M3Gfloat intensity, M3Gfloat *rgba)
1578 /* NOTE we intentionally aim a bit high here -- some GL
1579 * implementations may round down instead of closest */
1581 const M3Gfloat oneOver255 = (M3Gfloat)(1.0001 / 255.0);
1583 rgba[0] = (M3Gfloat)((argb >> 16) & 0xFF);
1584 rgba[1] = (M3Gfloat)((argb >> 8) & 0xFF);
1585 rgba[2] = (M3Gfloat)((argb ) & 0xFF);
1586 rgba[3] = (M3Gfloat)((argb >> 24) & 0xFF);
1588 m3gScale4(rgba, m3gMul(oneOver255, intensity));
1592 * \brief Converts the 3x3 submatrix of a matrix into fixed point
1594 * The input matrix must be an affine one, i.e. the bottom row must be
1595 * 0 0 0 1. The output matrix is guaranteed to be such that it can be
1596 * multiplied with a 16-bit 3-vector without overflowing, while using
1597 * the 32-bit range optimally.
1599 * \param mtx the original matrix
1600 * \param elem 9 shorts to hold the fixed point base numbers
1601 * \return floating point exponent for the values in \c elem
1602 * (number of bits to shift left for actual values)
1604 static M3Gint m3gGetFixedPoint3x3Basis(const Matrix *mtx, M3Gshort *elem)
1610 if (!mtx->complete) {
1611 m3gFillClassifiedMatrix((Matrix*) mtx);
1613 m = (const M3Guint*) mtx->elem;
1615 /* First, find the maximum exponent value in the whole matrix */
1618 for (col = 0; col < 3; ++col) {
1619 for (row = 0; row < 3; ++row) {
1620 M3Gint element = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);
1621 outExp = M3G_MAX(outExp, element);
1626 /* Our candidate exponent is the maximum found plus 9, which is
1627 * guaranteed to shift the maximum unsigned 24-bit mantissa (which
1628 * always will have the high bit set) down to the signed 16-bit
1633 /* Now proceed to sum each row and see what's the actual smallest
1634 * exponent we can safely use without overflowing in a 16+16
1635 * matrix-vector multiplication; this will win us one bit
1636 * (doubling the precision) compared to the conservative approach
1637 * of just shifting everything down by 10 bits */
1639 for (row = 0; row < 3; ++row) {
1641 /* Sum the absolute values on this row */
1644 for (col = 0; col < 3; ++col) {
1645 M3Gint a = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);
1646 M3Gint shift = outExp - (a >> 23);
1647 M3G_ASSERT(shift < 265);
1650 rowSum += ((a & MANTISSA_MASK) | LEADING_ONE) >> shift;
1654 /* Now we have a 26-bit sum of the absolute values on this
1655 * row, and shift that down until we fit the target range of
1656 * [0, 65535]. Note that this still leaves *exactly* enough
1657 * space for adding in an arbitrary 16-bit translation vector
1658 * after multiplying with the matrix! */
1660 while (rowSum >= (1 << 16)) {
1666 /* De-bias the exponent, but add in an extra 23 to account for the
1667 * decimal bits in the floating point mantissa values we started
1668 * with (we're returning the exponent as "bits to shift left to
1669 * get integers", so we're off by 23 from IEEE notation) */
1671 outExp = (outExp - 127) - 23;
1673 /* Finally, shift all the matrix elements to our final output
1676 for (col = 0; col < 3; ++col) {
1677 m3gFloatVecToShort(3, mtx->elem + MELEM(0, col), outExp, elem);
1684 * \brief Gets the translation component of a matrix as fixed point
1686 * \param mtx the matrix
1687 * \param elem 3 shorts to write the vector into
1688 * \return floating point exponent for the values in \c elem
1689 * (number of bits to shift left for actual values)
1691 static M3Gint m3gGetFixedPointTranslation(const Matrix *mtx, M3Gshort *elem)
1695 M3G_ASSERT(m3gIsWUnity(mtx));
1696 if (!mtx->complete) {
1697 m3gFillClassifiedMatrix((Matrix*) mtx);
1699 m = (const M3Guint*) &mtx->elem[MELEM(0, 3)];
1701 /* Find the maximum exponent, then scale down by 9 bits from that
1702 * to shift the unsigned 24-bit mantissas to the signed 16-bit
1706 M3Guint maxElem = m[0] & ~SIGN_MASK;
1707 maxElem = M3G_MAX(maxElem, m[1] & ~SIGN_MASK);
1708 maxElem = M3G_MAX(maxElem, m[2] & ~SIGN_MASK);
1710 outExp = (M3Gint)(maxElem >> 23) - (127 + 23) + 9;
1711 m3gFloatVecToShort(3, mtx->elem + MELEM(0, 3), outExp, elem);
1718 * \brief Compute a bounding box enclosing two other boxes
1720 * \param box box to fit
1721 * \param a first box to enclose or NULL
1722 * \param b second box to enclose or NULL
1724 * \note If both input boxes are NULL, the box is not modified.
1726 static void m3gFitAABB(AABB *box, const AABB *a, const AABB *b)
1730 M3G_ASSERT_PTR(box);
1740 for (i = 0; i < 3; ++i) {
1741 box->min[i] = M3G_MIN(a->min[i], b->min[i]);
1742 box->max[i] = M3G_MAX(a->max[i], b->max[i]);
1755 * \brief Transform an axis-aligned bounding box with a matrix
1757 * This results in a box that encloses the transformed original box.
1759 * Based on "Transforming Axis-Aligned Bounding Boxes" by Jim Arvo
1760 * from Graphics Gems.
1762 * \note The bottom row of the matrix is ignored in the transformation.
1764 static void m3gTransformAABB(AABB *box, const Matrix *mtx)
1766 M3Gfloat boxMin[3], boxMax[3];
1767 M3Gfloat newMin[3], newMax[3];
1769 m3gValidateAABB(box);
1771 if (!mtx->complete) {
1772 m3gFillClassifiedMatrix((Matrix*) mtx);
1775 /* Get the original minimum and maximum extents as floats, and add
1776 * the translation as the base for the transformed box */
1779 for (i = 0; i < 3; ++i) {
1780 boxMin[i] = box->min[i];
1781 boxMax[i] = box->max[i];
1782 newMin[i] = newMax[i] = M44F(mtx, i, 3);
1786 /* Transform into the new minimum and maximum coordinates using
1787 * the upper left 3x3 part of the matrix */
1791 for (row = 0; row < 3; ++row) {
1792 for (col = 0; col < 3; ++col) {
1793 M3Gfloat a = m3gMul(M44F(mtx, row, col), boxMin[col]);
1794 M3Gfloat b = m3gMul(M44F(mtx, row, col), boxMax[col]);
1797 newMin[row] = m3gAdd(newMin[row], a);
1798 newMax[row] = m3gAdd(newMax[row], b);
1801 newMin[row] = m3gAdd(newMin[row], b);
1802 newMax[row] = m3gAdd(newMax[row], a);
1808 /* Write back into the bounding box */
1811 for (i = 0; i < 3; ++i) {
1812 box->min[i] = newMin[i];
1813 box->max[i] = newMax[i];
1817 m3gValidateAABB(box);
1820 #if defined(M3G_DEBUG)
1824 static void m3gValidateAABB(const AABB *aabb)
1826 m3gValidateFloats(6, (float*) aabb);
1830 /*----------------------------------------------------------------------
1832 *--------------------------------------------------------------------*/
1835 * \brief Linear interpolation of vectors
1837 * \param size number of components
1838 * \param vec output vector
1839 * \param s interpolation factor
1840 * \param start initial value
1841 * \param end target value
1843 #if defined(M3G_HW_FLOAT_VFPV2)
1845 __weak __asm void m3gLerp(M3Gint size,
1848 const M3Gfloat *start, const M3Gfloat *end)
1856 EXPORT m3gLerp[DYNAMIC]
1859 M3Gfloat sCompl = 1.0 - s;
1860 for (i = 0; i < size; ++i) {
1861 vec[i] = sCompl*start[i] + s*end[i];
1865 // if size = 0, return
1876 FSUBS s2, s1, s0 // sCompl = 1 - s
1880 BGT _m3gLerp_over4Loop
1885 // set vector STRIDE = 1, LENGTH = 4
1886 BIC r12, r4, #0x00370000
1890 FLDMIAS r2!, {s4-s7} // load the start[i] values
1891 FLDMIAS r3!, {s8-s11} // load the end[i] values
1892 FMULS s12, s4, s2 // [s12-s15] = [sCompl*start[0] .. sCompl*start[3]]
1893 FMACS s12, s8, s0 // [s12-s15] += [ s*end[0] .. s*end[3]]
1896 FSTSGT s13, [r1, #4]
1898 FSTSGE s14, [r1, #8]
1899 FSTSGT s15, [r1, #12]
1907 // if size > 4, interpolate 8 values in one loop
1911 FSTMFDD sp!, {d8-d9}
1912 MOVS r5, r0, ASR #3 // size/8
1913 SUB r0, r0, r5, LSL #3 // tail length
1915 // set vector STRIDE = 1, LENGTH = 8
1916 BIC r12, r4, #0x00370000
1921 _m3gLerp_alignedLoop
1923 FLDMIASNE r2!, {s8-s15} // load the start[i] values
1924 FLDMIASNE r3!, {s16-s23} // load the end[i] values
1925 FMULSNE s24, s8, s2 // [s16-s23] = [ sCompl*start[0] sCompl*start[1] .. sCompl*start[7]]
1926 FMACSNE s24, s16, s0 // [s16-s23] += [ s*end[0] s*end[1] .. s*end[7]]
1927 FSTMIASNE r1!, {s24-s31}
1930 BNE _m3gLerp_alignedLoop
1932 // process the 0-7 remaining values in the tail
1935 FLDMIASGE r2!, {s8-s15}
1936 FLDMIASGE r3!, {s16-s23}
1938 FMACSGE s24, s16, s0
1940 FSTSGT s25, [r1, #4]
1942 FSTSGE s26, [r1, #8]
1943 FSTSGT s27, [r1, #12]
1945 FSTSGE s28, [r1, #16]
1946 FSTSGT s29, [r1, #20]
1948 FSTSEQ s30, [r1, #24]
1952 FLDMFDD sp!, {d8-d9}
1958 #else /* #if defined(M3G_HW_FLOAT_VFPV2) */
1960 M3G_API void m3gLerp(M3Gint size,
1963 const M3Gfloat *start, const M3Gfloat *end)
1967 M3Gfloat sCompl = m3gSub(1.f, s);
1968 for (i = 0; i < size; ++i) {
1969 vec[i] = m3gAdd(m3gMul(sCompl, start[i]), m3gMul(s, end[i]));
1972 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
1975 * \brief Hermite spline interpolation of vectors
1977 * \param size number of components
1978 * \param vec output vector
1979 * \param s interpolation factor
1980 * \param start start value vector
1981 * \param end end value vector
1982 * \param tStart start tangent vector
1983 * \param tEnd end tangent vector
1985 M3G_API void m3gHermite(M3Gint size,
1988 const M3Gfloat *start, const M3Gfloat *end,
1989 const M3Gfloat *tStart, const M3Gfloat *tEnd)
1991 M3Gfloat s2 = m3gSquare(s);
1992 M3Gfloat s3 = m3gMul(s2, s);
1995 for (i = 0; i < size; ++i) {
1998 m3gAdd(m3gSub(m3gDouble(s3), m3gMul(3.f, s2)), 1.f),
2000 m3gSub(m3gMul(3.f, s2), m3gDouble(s3)),
2002 m3gAdd(m3gSub(s3, m3gDouble(s2)), s),
2008 /* vec = ( 2*s3 - 3*s2 + 1) * start
2009 + (-2*s3 + 3*s2 ) * end
2010 + ( s3 - 2*s2 + s) * tStart
2011 + ( s3 - s2 ) * tEnd; */
2014 /*--------------------------------------------------------------------*/
2017 * \brief Sets a matrix to a copy of another matrix
2019 M3G_API void m3gCopyMatrix(Matrix *dst, const Matrix *src)
2021 M3G_ASSERT(dst != NULL && src != NULL);
2026 * \brief Vector addition
2028 M3G_API void m3gAddVec3(Vec3 *vec, const Vec3 *other)
2030 vec->x = m3gAdd(vec->x, other->x);
2031 vec->y = m3gAdd(vec->y, other->y);
2032 vec->z = m3gAdd(vec->z, other->z);
2036 * \brief Vector addition
2038 M3G_API void m3gAddVec4(Vec4 *vec, const Vec4 *other)
2040 vec->x = m3gAdd(vec->x, other->x);
2041 vec->y = m3gAdd(vec->y, other->y);
2042 vec->z = m3gAdd(vec->z, other->z);
2043 vec->w = m3gAdd(vec->w, other->w);
2047 * \brief Cross product of two 3D vectors expressed as 4D vectors
2049 M3G_API void m3gCross(Vec3 *dst, const Vec3 *a, const Vec3 *b)
2051 dst->x = m3gSub(m3gMul(a->y, b->z), m3gMul(a->z, b->y));
2052 dst->y = m3gSub(m3gMul(a->z, b->x), m3gMul(a->x, b->z));
2053 dst->z = m3gSub(m3gMul(a->x, b->y), m3gMul(a->y, b->x));
2057 * \brief Dot product of two vectors
2059 M3G_API M3Gfloat m3gDot3(const Vec3 *a, const Vec3 *b)
2062 d = m3gMul(a->x, b->x);
2063 d = m3gMadd(a->y, b->y, d);
2064 d = m3gMadd(a->z, b->z, d);
2069 * \brief Dot product of two vectors
2071 M3G_API M3Gfloat m3gDot4(const Vec4 *a, const Vec4 *b)
2074 d = m3gMul(a->x, b->x);
2075 d = m3gMadd(a->y, b->y, d);
2076 d = m3gMadd(a->z, b->z, d);
2077 d = m3gMadd(a->w, b->w, d);
2084 M3G_API void m3gSetVec3(Vec3 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z)
2095 M3G_API void m3gSetVec4(Vec4 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z, M3Gfloat w)
2105 * \brief Vector subtraction
2107 M3G_API void m3gSubVec3(Vec3 *vec, const Vec3 *other)
2109 vec->x = m3gSub(vec->x, other->x);
2110 vec->y = m3gSub(vec->y, other->y);
2111 vec->z = m3gSub(vec->z, other->z);
2115 * \brief Vector subtraction
2117 M3G_API void m3gSubVec4(Vec4 *vec, const Vec4 *other)
2119 vec->x = m3gSub(vec->x, other->x);
2120 vec->y = m3gSub(vec->y, other->y);
2121 vec->z = m3gSub(vec->z, other->z);
2122 vec->w = m3gSub(vec->w, other->w);
2126 * \brief Vector length
2128 M3G_API M3Gfloat m3gLengthVec3(const Vec3 *vec)
2130 return m3gSqrt(m3gAdd(m3gAdd(m3gSquare(vec->x),
2132 m3gSquare(vec->z)));
2136 * \brief Vector scaling
2138 M3G_API void m3gScaleVec3(Vec3 *vec, const M3Gfloat s)
2140 vec->x = m3gMul(vec->x, s);
2141 vec->y = m3gMul(vec->y, s);
2142 vec->z = m3gMul(vec->z, s);
2146 * \brief Vector scaling
2148 M3G_API void m3gScaleVec4(Vec4 *vec, const M3Gfloat s)
2150 vec->x = m3gMul(vec->x, s);
2151 vec->y = m3gMul(vec->y, s);
2152 vec->z = m3gMul(vec->z, s);
2153 vec->w = m3gMul(vec->w, s);
2157 * \brief Returns an angle-axis representation for a quaternion
2159 * \note There are many, and this is not guaranteed to return a
2162 M3G_API void m3gGetAngleAxis(const Quat *quat, M3Gfloat *angle, Vec3 *axis)
2164 M3Gfloat x, y, z, sinTheta;
2166 M3G_ASSERT_PTR(quat);
2167 M3G_ASSERT_PTR(angle);
2168 M3G_ASSERT_PTR(axis);
2174 sinTheta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(x), m3gSquare(y)),
2177 if (sinTheta > EPSILON) {
2178 M3Gfloat ooSinTheta = m3gRcp(sinTheta);
2179 axis->x = m3gMul(x, ooSinTheta);
2180 axis->y = m3gMul(y, ooSinTheta);
2181 axis->z = m3gMul(z, ooSinTheta);
2184 /* return a valid axis even for no rotation */
2185 axis->x = axis->y = 0.0f;
2188 *angle = m3gMul(2.0f * RAD2DEG, m3gArcCos(quat->w));
2192 * \brief Gets a single matrix column
2194 M3G_API void m3gGetMatrixColumn(const Matrix *mtx, M3Gint col, Vec4 *dst)
2196 if ((col & ~3) == 0) {
2197 if (!mtx->complete) {
2198 m3gFillClassifiedMatrix((Matrix*)mtx);
2200 dst->x = M44F(mtx, 0, col);
2201 dst->y = M44F(mtx, 1, col);
2202 dst->z = M44F(mtx, 2, col);
2203 dst->w = M44F(mtx, 3, col);
2206 M3G_ASSERT(M3G_FALSE);
2211 * \brief Returns the floating point values of a matrix as consecutive
2214 M3G_API void m3gGetMatrixColumns(const Matrix *mtx, M3Gfloat *dst)
2216 M3G_ASSERT(mtx != NULL && dst != NULL);
2218 if (!mtx->complete) {
2219 m3gFillClassifiedMatrix((Matrix*)mtx);
2221 m3gCopy(dst, mtx->elem, sizeof(mtx->elem));
2225 * \brief Gets a single matrix row
2227 M3G_API void m3gGetMatrixRow(const Matrix *mtx, M3Gint row, Vec4 *dst)
2229 if ((row & ~3) == 0) {
2230 if (!mtx->complete) {
2231 m3gFillClassifiedMatrix((Matrix*)mtx);
2233 dst->x = M44F(mtx, row, 0);
2234 dst->y = M44F(mtx, row, 1);
2235 dst->z = M44F(mtx, row, 2);
2236 dst->w = M44F(mtx, row, 3);
2239 M3G_ASSERT(M3G_FALSE);
2244 * \brief Returns the floating point values of a matrix as consecutive
2247 M3G_API void m3gGetMatrixRows(const Matrix *mtx, M3Gfloat *dst)
2249 M3G_ASSERT(mtx != NULL && dst != NULL);
2251 if (!mtx->complete) {
2252 m3gFillClassifiedMatrix((Matrix*)mtx);
2256 for (row = 0; row < 4; ++row) {
2257 *dst++ = mtx->elem[ 0 + row];
2258 *dst++ = mtx->elem[ 4 + row];
2259 *dst++ = mtx->elem[ 8 + row];
2260 *dst++ = mtx->elem[12 + row];
2266 * \brief Sets a matrix to identity
2268 M3G_API void m3gIdentityMatrix(Matrix *mtx)
2270 M3G_ASSERT(mtx != NULL);
2271 m3gClassifyAs(MC_IDENTITY, mtx);
2275 * \brief Sets a quaternion to identity
2277 M3G_API void m3gIdentityQuat(Quat *quat)
2279 M3G_ASSERT(quat != NULL);
2280 quat->x = quat->y = quat->z = 0.0f;
2285 * \brief Inverts a matrix
2287 M3G_API M3Gbool m3gInvertMatrix(Matrix *mtx)
2295 M3G_ASSERT(mtx != NULL);
2297 if (!m3gIsClassified(mtx)) {
2301 /* Quick exit for identity */
2303 if (mtx->mask == MC_IDENTITY) {
2307 /* Look for other common cases; these require that we have valid
2308 * values in all the elements, so fill in the values first */
2310 M3Guint mask = mtx->mask;
2312 if (!mtx->complete) {
2313 m3gFillClassifiedMatrix(mtx);
2316 if ((mask | (0x3F << 24)) == MC_TRANSLATION) {
2317 M44F(mtx, 0, 3) = m3gNegate(M44F(mtx, 0, 3));
2318 M44F(mtx, 1, 3) = m3gNegate(M44F(mtx, 1, 3));
2319 M44F(mtx, 2, 3) = m3gNegate(M44F(mtx, 2, 3));
2320 mtx->mask = MC_TRANSLATION;
2323 if ((mask | 0x300C03) == MC_SCALING) {
2324 if ((mask & 3 ) == 0 ||
2325 (mask & (3 << 10)) == 0 ||
2326 (mask & (3 << 20)) == 0) {
2327 return M3G_FALSE; /* zero scale for at least one axis */
2329 M44F(mtx, 0, 0) = m3gRcp(M44F(mtx, 0, 0));
2330 M44F(mtx, 1, 1) = m3gRcp(M44F(mtx, 1, 1));
2331 M44F(mtx, 2, 2) = m3gRcp(M44F(mtx, 2, 2));
2336 /* Do a full 4x4 inversion as a last resort */
2340 /* transpose matrix */
2341 for (i = 0; i < 4; i++) {
2342 src[i] = matrix[i*4];
2343 src[i+4] = matrix[i*4+1];
2344 src[i+8] = matrix[i*4+2];
2345 src[i+12] = matrix[i*4+3];
2348 /* calculate pairs for first 8 elements (cofactors) */
2349 tmp[0] = src[10]*src[15];
2350 tmp[1] = src[11]*src[14];
2351 tmp[2] = src[9]*src[15];
2352 tmp[3] = src[11]*src[13];
2353 tmp[4] = src[9]*src[14];
2354 tmp[5] = src[10]*src[13];
2355 tmp[6] = src[8]*src[15];
2356 tmp[7] = src[11]*src[12];
2357 tmp[8] = src[8]*src[14];
2358 tmp[9] = src[10]*src[12];
2359 tmp[10] = src[8]*src[13];
2360 tmp[11] = src[9]*src[12];
2362 /* calculate first 8 elements (cofactors) */
2363 matrix[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];
2364 matrix[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];
2365 matrix[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];
2366 matrix[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];
2367 matrix[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];
2368 matrix[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];
2369 matrix[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];
2370 matrix[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];
2371 matrix[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];
2372 matrix[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];
2373 matrix[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];
2374 matrix[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];
2375 matrix[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];
2376 matrix[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];
2377 matrix[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];
2378 matrix[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];
2380 /* calculate pairs for second 8 elements (cofactors) */
2381 tmp[0] = src[2]*src[7];
2382 tmp[1] = src[3]*src[6];
2383 tmp[2] = src[1]*src[7];
2384 tmp[3] = src[3]*src[5];
2385 tmp[4] = src[1]*src[6];
2386 tmp[5] = src[2]*src[5];
2387 tmp[6] = src[0]*src[7];
2388 tmp[7] = src[3]*src[4];
2389 tmp[8] = src[0]*src[6];
2390 tmp[9] = src[2]*src[4];
2391 tmp[10] = src[0]*src[5];
2392 tmp[11] = src[1]*src[4];
2394 /* calculate second 8 elements (cofactors) */
2395 matrix[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];
2396 matrix[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];
2397 matrix[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];
2398 matrix[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];
2399 matrix[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];
2400 matrix[10] -= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];
2401 matrix[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];
2402 matrix[11] -= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];
2403 matrix[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];
2404 matrix[12] -= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];
2405 matrix[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];
2406 matrix[13] -= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];
2407 matrix[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];
2408 matrix[14] -= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];
2409 matrix[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];
2410 matrix[15] -= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];
2412 /* calculate determinant */
2413 det = src[0]*matrix[0]+src[1]*matrix[1]+src[2]*matrix[2]+src[3]*matrix[3];
2415 /* matrix has no inverse */
2420 /* calculate matrix inverse */
2422 for (i = 0; i < 16; i++) {
2426 mtx->classified = M3G_FALSE;
2431 * \brief Sets a matrix to the inverse of another matrix
2433 M3G_API M3Gbool m3gMatrixInverse(Matrix *mtx, const Matrix *other)
2435 M3G_ASSERT(mtx != NULL && other != NULL);
2437 if (!m3gIsClassified(other)) {
2438 m3gClassify((Matrix*)other);
2441 m3gCopyMatrix(mtx, other);
2442 return m3gInvertMatrix(mtx);
2446 * \brief Sets a matrix to the transpose of another matrix
2448 M3G_API void m3gMatrixTranspose(Matrix *mtx, const Matrix *other)
2451 M3G_ASSERT(mtx != NULL && other != NULL);
2453 if (!other->complete) {
2454 m3gFillClassifiedMatrix((Matrix *)other);
2457 for (i = 0; i < 4; i++) {
2458 mtx->elem[i] = other->elem[i*4];
2459 mtx->elem[i+4] = other->elem[i*4+1];
2460 mtx->elem[i+8] = other->elem[i*4+2];
2461 mtx->elem[i+12] = other->elem[i*4+3];
2463 mtx->classified = M3G_FALSE;
2464 mtx->complete = M3G_TRUE;
2467 M3G_API M3Gbool m3gInverseTranspose(Matrix *mtx, const Matrix *other)
2470 if (!m3gMatrixInverse(&temp, other)) {
2473 m3gMatrixTranspose(mtx, &temp);
2478 * \brief Sets a matrix to the product of two other matrices
2480 * \note \c dst can not be either of \c left or \c right; if it is,
2481 * the results are undefined
2483 M3G_API void m3gMatrixProduct(Matrix *dst, const Matrix *left, const Matrix *right)
2485 M3G_ASSERT_PTR(dst);
2486 M3G_ASSERT_PTR(left);
2487 M3G_ASSERT_PTR(right);
2488 M3G_ASSERT(dst != left && dst != right);
2490 /* Classify input matrices and take early exits for identities */
2492 if (!m3gIsClassified(left)) {
2493 m3gClassify((Matrix*)left);
2495 if (left->mask == MC_IDENTITY) {
2496 m3gCopyMatrix(dst, right);
2500 if (!m3gIsClassified(right)) {
2501 m3gClassify((Matrix*)right);
2503 if (right->mask == MC_IDENTITY) {
2504 m3gCopyMatrix(dst, left);
2508 /* Special quick paths for 3x4 matrices */
2510 if (m3gIsWUnity(left) && m3gIsWUnity(right)) {
2514 if ((left->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {
2516 if (left->mask != MC_TRANSLATION && !left->complete) {
2517 m3gFillClassifiedMatrix((Matrix*)left);
2519 if (right->mask != MC_TRANSLATION && !right->complete) {
2520 m3gFillClassifiedMatrix((Matrix*)right);
2523 m3gCopyMatrix(dst, right);
2525 M44F(dst, 0, 3) = m3gAdd(M44F(left, 0, 3), M44F(dst, 0, 3));
2526 M44F(dst, 1, 3) = m3gAdd(M44F(left, 1, 3), M44F(dst, 1, 3));
2527 M44F(dst, 2, 3) = m3gAdd(M44F(left, 2, 3), M44F(dst, 2, 3));
2529 dst->mask |= MC_TRANSLATION_PART;
2533 if ((right->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {
2536 if (left->mask != MC_TRANSLATION && !left->complete) {
2537 m3gFillClassifiedMatrix((Matrix*)left);
2539 if (right->mask != MC_TRANSLATION && !right->complete) {
2540 m3gFillClassifiedMatrix((Matrix*)right);
2543 m3gCopyMatrix(dst, left);
2545 m3gGetMatrixColumn(right, 3, &tvec);
2546 m3gTransformVec4(dst, &tvec);
2548 M44F(dst, 0, 3) = tvec.x;
2549 M44F(dst, 1, 3) = tvec.y;
2550 M44F(dst, 2, 3) = tvec.z;
2552 dst->mask |= MC_TRANSLATION_PART;
2558 /* Compute product and set output classification */
2560 m3gGenericMatrixProduct(dst, left, right);
2564 * \brief Normalizes a quaternion
2566 M3G_API void m3gNormalizeQuat(Quat *q)
2571 norm = m3gNorm4(&q->x);
2573 if (norm > EPSILON) {
2574 norm = m3gRcpSqrt(norm);
2575 m3gScale4(&q->x, norm);
2583 * \brief Normalizes a three-vector
2585 M3G_API void m3gNormalizeVec3(Vec3 *v)
2590 norm = m3gNorm3(&v->x);
2592 if (norm > EPSILON) {
2593 norm = m3gRcpSqrt(norm);
2594 m3gScale3(&v->x, norm);
2597 m3gZero(v, sizeof(Vec3));
2602 * \brief Normalizes a four-vector
2604 M3G_API void m3gNormalizeVec4(Vec4 *v)
2609 norm = m3gNorm4(&v->x);
2611 if (norm > EPSILON) {
2612 norm = m3gRcpSqrt(norm);
2613 m3gScale4(&v->x, norm);
2616 m3gZero(v, sizeof(Vec4));
2621 * \brief Multiplies a matrix from the right with another matrix
2623 M3G_API void m3gPostMultiplyMatrix(Matrix *mtx, const Matrix *other)
2626 M3G_ASSERT_PTR(mtx);
2627 M3G_ASSERT_PTR(other);
2629 m3gCopyMatrix(&temp, mtx);
2630 m3gMatrixProduct(mtx, &temp, other);
2634 * \brief Multiplies a matrix from the left with another matrix
2636 M3G_API void m3gPreMultiplyMatrix(Matrix *mtx, const Matrix *other)
2639 M3G_ASSERT_PTR(mtx);
2640 M3G_ASSERT_PTR(other);
2642 m3gCopyMatrix(&temp, mtx);
2643 m3gMatrixProduct(mtx, other, &temp);
2647 * \brief Multiplies a matrix with a rotation matrix.
2649 M3G_API void m3gPostRotateMatrix(Matrix *mtx,
2651 M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
2654 m3gSetAngleAxis(&q, angle, ax, ay, az);
2655 m3gPostRotateMatrixQuat(mtx, &q);
2659 * \brief Multiplies a matrix with a translation matrix.
2661 M3G_API void m3gPostRotateMatrixQuat(Matrix *mtx, const Quat *quat)
2664 m3gQuatMatrix(&temp, quat);
2665 m3gPostMultiplyMatrix(mtx, &temp);
2669 * \brief Multiplies a matrix with a scale matrix.
2671 M3G_API void m3gPostScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)
2674 m3gScalingMatrix(&temp, sx, sy, sz);
2675 m3gPostMultiplyMatrix(mtx, &temp);
2679 * \brief Multiplies a matrix with a translation (matrix).
2681 M3G_API void m3gPostTranslateMatrix(Matrix *mtx,
2682 M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)
2685 m3gTranslationMatrix(&temp, tx, ty, tz);
2686 m3gPostMultiplyMatrix(mtx, &temp);
2690 * \brief Multiplies a matrix with a rotation matrix
2692 M3G_API void m3gPreRotateMatrix(Matrix *mtx,
2694 M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
2697 m3gSetAngleAxis(&q, angle, ax, ay, az);
2698 m3gPreRotateMatrixQuat(mtx, &q);
2702 * \brief Multiplies a matrix with a quaternion rotation
2704 M3G_API void m3gPreRotateMatrixQuat(Matrix *mtx, const Quat *quat)
2707 m3gQuatMatrix(&temp, quat);
2708 m3gPreMultiplyMatrix(mtx, &temp);
2712 * \brief Multiplies a matrix with a scale matrix.
2714 M3G_API void m3gPreScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)
2717 m3gScalingMatrix(&temp, sx, sy, sz);
2718 m3gPreMultiplyMatrix(mtx, &temp);
2722 * \brief Multiplies a matrix with a translation (matrix).
2724 M3G_API void m3gPreTranslateMatrix(Matrix *mtx,
2725 M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)
2728 m3gTranslationMatrix(&temp, tx, ty, tz);
2729 m3gPreMultiplyMatrix(mtx, &temp);
2733 * \brief Converts a quaternion into a matrix
2735 * The output is a matrix effecting the same rotation as the
2736 * quaternion passed as input
2738 M3G_API void m3gQuatMatrix(Matrix *mtx, const Quat *quat)
2740 M3Gfloat qx = quat->x;
2741 M3Gfloat qy = quat->y;
2742 M3Gfloat qz = quat->z;
2743 M3Gfloat qw = quat->w;
2745 /* Quick exit for identity rotations */
2747 if (IS_ZERO(qx) && IS_ZERO(qy) && IS_ZERO(qz)) {
2748 m3gIdentityMatrix(mtx);
2753 /* Determine the rough type of the output matrix */
2755 M3Guint type = MC_SCALING_ROTATION;
2756 if (IS_ZERO(qz) && IS_ZERO(qy)) {
2757 type = MC_X_ROTATION;
2759 else if (IS_ZERO(qz) && IS_ZERO(qx)) {
2760 type = MC_Y_ROTATION;
2762 else if (IS_ZERO(qx) && IS_ZERO(qy)) {
2763 type = MC_Z_ROTATION;
2765 m3gClassifyAs(type, mtx);
2767 /* Generate the non-constant parts of the matrix */
2769 M3Gfloat wx, wy, wz, xx, yy, yz, xy, xz, zz;
2771 xx = m3gMul(qx, qx);
2772 xy = m3gMul(qx, qy);
2773 xz = m3gMul(qx, qz);
2774 yy = m3gMul(qy, qy);
2775 yz = m3gMul(qy, qz);
2776 zz = m3gMul(qz, qz);
2777 wx = m3gMul(qw, qx);
2778 wy = m3gMul(qw, qy);
2779 wz = m3gMul(qw, qz);
2781 if (type != MC_X_ROTATION) {
2782 M44F(mtx, 0, 0) = m3gSub(1.f, m3gDouble(m3gAdd(yy, zz)));
2783 M44F(mtx, 0, 1) = m3gDouble(m3gSub(xy, wz));
2784 M44F(mtx, 0, 2) = m3gDouble(m3gAdd(xz, wy));
2787 if (type != MC_Y_ROTATION) {
2788 M44F(mtx, 1, 0) = m3gDouble(m3gAdd(xy, wz));
2789 M44F(mtx, 1, 1) = m3gSub(1.f, m3gDouble(m3gAdd(xx, zz)));
2790 M44F(mtx, 1, 2) = m3gDouble(m3gSub(yz, wx));
2793 if (type != MC_Z_ROTATION) {
2794 M44F(mtx, 2, 0) = m3gDouble(m3gSub(xz, wy));
2795 M44F(mtx, 2, 1) = m3gDouble(m3gAdd(yz, wx));
2796 M44F(mtx, 2, 2) = m3gSub(1.f, m3gDouble(m3gAdd(xx, yy)));
2799 m3gSubClassify(mtx);
2804 * \brief Generates a scaling matrix
2806 M3G_API void m3gScalingMatrix(
2808 const M3Gfloat sx, const M3Gfloat sy, const M3Gfloat sz)
2810 M3G_ASSERT_PTR(mtx);
2811 M44F(mtx, 0, 0) = sx;
2812 M44F(mtx, 1, 1) = sy;
2813 M44F(mtx, 2, 2) = sz;
2814 m3gClassifyAs(MC_SCALING, mtx);
2815 m3gSubClassify(mtx);
2819 * \brief Sets a quaternion to represent an angle-axis rotation
2821 M3G_API void m3gSetAngleAxis(Quat *quat,
2823 M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
2825 m3gSetAngleAxisRad(quat, m3gMul(angle, M3G_DEG2RAD), ax, ay, az);
2829 * \brief Sets a quaternion to represent an angle-axis rotation
2831 M3G_API void m3gSetAngleAxisRad(Quat *quat,
2833 M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
2835 M3G_ASSERT_PTR(quat);
2837 if (!IS_ZERO(angleRad)) {
2839 M3Gfloat halfAngle = m3gHalf(angleRad);
2841 s = m3gSin(halfAngle);
2844 M3Gfloat sqrNorm = m3gMadd(ax, ax, m3gMadd(ay, ay, m3gMul(az, az)));
2845 if (sqrNorm < 0.995f || sqrNorm > 1.005f) {
2846 if (sqrNorm > EPSILON) {
2847 M3Gfloat ooNorm = m3gRcpSqrt(sqrNorm);
2848 ax = m3gMul(ax, ooNorm);
2849 ay = m3gMul(ay, ooNorm);
2850 az = m3gMul(az, ooNorm);
2853 ax = ay = az = 0.0f;
2858 quat->x = m3gMul(s, ax);
2859 quat->y = m3gMul(s, ay);
2860 quat->z = m3gMul(s, az);
2861 quat->w = m3gCos(halfAngle);
2864 m3gIdentityQuat(quat);
2869 * \brief Quaternion multiplication.
2871 M3G_API void m3gMulQuat(Quat *quat, const Quat *other)
2876 quat->w = m3gMul(q.w, other->w) - m3gMul(q.x, other->x) - m3gMul(q.y, other->y) - m3gMul(q.z, other->z);
2877 quat->x = m3gMul(q.w, other->x) + m3gMul(q.x, other->w) + m3gMul(q.y, other->z) - m3gMul(q.z, other->y);
2878 quat->y = m3gMul(q.w, other->y) - m3gMul(q.x, other->z) + m3gMul(q.y, other->w) + m3gMul(q.z, other->x);
2879 quat->z = m3gMul(q.w, other->z) + m3gMul(q.x, other->y) - m3gMul(q.y, other->x) + m3gMul(q.z, other->w);
2883 * \brief Makes this quaternion represent the rotation from one 3D
2886 M3G_API void m3gSetQuatRotation(Quat *quat,
2887 const Vec3 *from, const Vec3 *to)
2891 M3G_ASSERT_PTR(quat);
2892 M3G_ASSERT_PTR(from);
2895 cosAngle = m3gDot3(from, to);
2897 if (cosAngle > (1.0f - EPSILON)) { /* zero angle */
2898 m3gIdentityQuat(quat);
2901 else if (cosAngle > (1.0e-3f - 1.0f)) { /* normal case */
2903 m3gCross(&axis, from, to);
2904 m3gSetAngleAxisRad(quat, m3gArcCos(cosAngle), axis.x, axis.y, axis.z);
2908 /* Opposite vectors; must generate an arbitrary perpendicular
2909 * vector and use that as the rotation axis. Here, we try the
2910 * Z axis first, and if that seems too parallel to the
2911 * vectors, project the Y axis instead: Z is the only good
2912 * choice for Z-constrained rotations, and Y by definition
2913 * must be perpendicular to that. */
2918 axis.x = axis.y = axis.z = 0.0f;
2919 if (m3gAbs(from->z) < (1.0f - EPSILON)) {
2926 s = m3gDot3(&axis, from);
2928 m3gScaleVec3(&temp, s);
2929 m3gSubVec3(&axis, &temp);
2931 m3gSetAngleAxis(quat, 180.f, axis.x, axis.y, axis.z);
2936 * \brief Sets the values of a matrix
2939 M3G_API void m3gSetMatrixColumns(Matrix *mtx, const M3Gfloat *src)
2941 M3G_ASSERT(mtx != NULL && src != NULL);
2943 m3gCopy(mtx->elem, src, sizeof(mtx->elem));
2944 mtx->classified = M3G_FALSE;
2945 mtx->complete = M3G_TRUE;
2949 * \brief Sets the values of a matrix
2952 M3G_API void m3gSetMatrixRows(Matrix *mtx, const M3Gfloat *src)
2954 M3G_ASSERT(mtx != NULL && src != NULL);
2957 for (row = 0; row < 4; ++row) {
2958 mtx->elem[ 0 + row] = *src++;
2959 mtx->elem[ 4 + row] = *src++;
2960 mtx->elem[ 8 + row] = *src++;
2961 mtx->elem[12 + row] = *src++;
2964 mtx->classified = M3G_FALSE;
2965 mtx->complete = M3G_TRUE;
2969 * \brief Transforms a 4-vector with a matrix
2971 #if defined(M3G_HW_FLOAT_VFPV2)
2973 __asm void _m3gTransformVec4(const Matrix *mtx, Vec4 *vec, M3Gint n)
2978 FSTMFDD sp!, {d8-d11}
2981 BIC r12, r3, #0x00370000
2986 FLDMIAS r0, {s4-s19} // [mtx0 mtx1 ..]
2987 FLDMIAS r1, {s0-s3} // [vec.x vec.y vec.z vec.w]
2988 FMULS s20, s4, s0 // [s20-s23] = [v.x*mtx0 v.x*mtx1 v.x*mtx2 v.x*mtx3 ]
2989 FMACS s20, s8, s1 // [s20-s23] += [v.y*mtx4 v.y*mtx5 v.y*mtx6 v.y*mtx7 ]
2990 FMACS s20, s12, s2 // [s20-s23] += [v.z*mtx8 v.z*mtx9 v.z*mtx10 v.z*mtx11]
2991 FMACS s20, s16, s3 // [s20-s23] += [v.w*mtx12 v.w*mtx13 v.w*mtx14 v.w*mtx15]
2992 FSTMIAS r1!, {s20-s22}
2996 FLDMFDD sp!, {d8-d11}
3000 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
3002 M3G_API void m3gTransformVec4(const Matrix *mtx, Vec4 *vec)
3005 M3G_ASSERT(mtx != NULL && vec != NULL);
3007 if (!m3gIsClassified(mtx)) {
3008 m3gClassify((Matrix*)mtx);
3013 if (type == MC_IDENTITY) {
3017 int n = m3gIsWUnity(mtx) ? 3 : 4;
3019 if (!mtx->complete) {
3020 m3gFillClassifiedMatrix((Matrix*)mtx);
3022 #if defined(M3G_HW_FLOAT_VFPV2)
3023 _m3gTransformVec4(mtx, vec, n);
3029 for (i = 0; i < n; ++i) {
3030 M3Gfloat d = m3gMul(M44F(mtx, i, 0), v.x);
3031 d = m3gMadd(M44F(mtx, i, 1), v.y, d);
3032 d = m3gMadd(M44F(mtx, i, 2), v.z, d);
3033 d = m3gMadd(M44F(mtx, i, 3), v.w, d);
3042 * \brief Generates a translation matrix
3044 M3G_API void m3gTranslationMatrix(
3046 const M3Gfloat tx, const M3Gfloat ty, const M3Gfloat tz)
3048 M3G_ASSERT_PTR(mtx);
3049 M44F(mtx, 0, 3) = tx;
3050 M44F(mtx, 1, 3) = ty;
3051 M44F(mtx, 2, 3) = tz;
3052 m3gClassifyAs(MC_TRANSLATION, mtx);
3053 m3gSubClassify(mtx);
3057 * \brief Float vector assignment.
3059 M3G_API void m3gSetQuat(Quat *quat, const M3Gfloat *vec)
3068 * \brief Slerp between quaternions q0 and q1, storing the result in quat.
3070 M3G_API void m3gSlerpQuat(Quat *quat,
3072 const Quat *q0, const Quat *q1)
3075 M3Gfloat cosTheta = m3gDot4((const Vec4 *)q0, (const Vec4 *)q1);
3076 M3Gfloat oneMinusS = m3gSub(1.0f, s);
3078 if (cosTheta > EPSILON - 1.0f) {
3079 if (cosTheta < 1.0f - EPSILON) {
3080 M3Gfloat theta = m3gArcCos(cosTheta);
3081 M3Gfloat sinTheta = m3gSin(theta);
3082 s0 = m3gSin(m3gMul(oneMinusS, theta)) / sinTheta;
3083 s1 = m3gSin(m3gMul(s, theta)) / sinTheta;
3086 /* For quaternions very close to each other, use plain
3087 linear interpolation for numerical stability. */
3091 quat->x = m3gMadd(s0, q0->x, m3gMul(s1, q1->x));
3092 quat->y = m3gMadd(s0, q0->y, m3gMul(s1, q1->y));
3093 quat->z = m3gMadd(s0, q0->z, m3gMul(s1, q1->z));
3094 quat->w = m3gMadd(s0, q0->w, m3gMul(s1, q1->w));
3097 /* Slerp is undefined if the two quaternions are (nearly)
3098 opposite, so we just pick an arbitrary plane of
3106 s0 = m3gSin(m3gMul(oneMinusS, HALF_PI));
3107 s1 = m3gSin(m3gMul(s, HALF_PI));
3109 quat->x = m3gMadd(s0, q0->x, m3gMul(s1, quat->x));
3110 quat->y = m3gMadd(s0, q0->y, m3gMul(s1, quat->y));
3111 quat->z = m3gMadd(s0, q0->z, m3gMul(s1, quat->z));
3116 * \brief Interpolate quaternions using spline, or "squad" interpolation.
3118 M3G_API void m3gSquadQuat(Quat *quat,
3120 const Quat *q0, const Quat *a, const Quat *b, const Quat *q1)
3125 m3gSlerpQuat(&temp0, s, q0, q1);
3126 m3gSlerpQuat(&temp1, s, a, b);
3128 m3gSlerpQuat(quat, m3gDouble(m3gMul(s, m3gSub(1.0f, s))), &temp0, &temp1);