1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/graphics/m3g/m3gcore11/src/m3g_math.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,3132 @@
1.4 +/*
1.5 +* Copyright (c) 2003 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of the License "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description: Internal math function implementations
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +/*!
1.23 + * \internal
1.24 + * \file
1.25 + * \brief Internal math function implementations
1.26 + *
1.27 + */
1.28 +
1.29 +#ifndef M3G_CORE_INCLUDE
1.30 +# error included by m3g_core.c; do not compile separately.
1.31 +#endif
1.32 +
1.33 +#include "m3g_defs.h"
1.34 +#include "m3g_memory.h"
1.35 +
1.36 +#if defined(M3G_SOFT_FLOAT)
1.37 +#include <math.h>
1.38 +#endif
1.39 +
1.40 +/*----------------------------------------------------------------------
1.41 + * Private types and definitions
1.42 + *--------------------------------------------------------------------*/
1.43 +
1.44 +/* Enumerated common matrix classifications */
1.45 +#define MC_IDENTITY 0x40100401 // 01000000 00010000 00000100 00000001
1.46 +#define MC_FRUSTUM 0x30BF0C03 // 00110000 10111111 00001100 00000011
1.47 +#define MC_PERSPECTIVE 0x30B00C03 // 00110000 10110000 00001100 00000011
1.48 +#define MC_ORTHO 0x7F300C03 // 01111111 00110000 00001100 00000011
1.49 +#define MC_PARALLEL 0x70300C03 // 01110000 00110000 00001100 00000011
1.50 +#define MC_SCALING_ROTATION 0x403F3F3F // 01000000 00111111 00111111 00111111
1.51 +#define MC_SCALING 0x40300C03 // 01000000 00110000 00001100 00000011
1.52 +#define MC_TRANSLATION 0x7F100401 // 01111111 00010000 00000100 00000001
1.53 +#define MC_X_ROTATION 0x403C3C01 // 01000000 00111100 00111100 00000001
1.54 +#define MC_Y_ROTATION 0x40330433 // 01000000 00110011 00000100 00110011
1.55 +#define MC_Z_ROTATION 0x40100F0F // 01000000 00010000 00001111 00001111
1.56 +#define MC_W_UNITY 0x7F3F3F3F // 01111111 00111111 00111111 00111111
1.57 +#define MC_GENERIC 0xFFFFFFFF
1.58 +
1.59 +/* Partial masks for individual matrix components */
1.60 +#define MC_TRANSLATION_PART 0x3F000000 // 00111111 00000000 00000000 00000000
1.61 +#define MC_SCALE_PART 0x00300C03 // 00000000 00110000 00001100 00000011
1.62 +#define MC_SCALE_ROTATION_PART 0x003F3F3F // 00000000 00111111 00111111 00111111
1.63 +
1.64 +/* Matrix element classification masks */
1.65 +#define ELEM_ZERO 0x00
1.66 +#define ELEM_ONE 0x01
1.67 +#define ELEM_MINUS_ONE 0x02
1.68 +#define ELEM_ANY 0x03
1.69 +
1.70 +/*!
1.71 + * \internal
1.72 + * \brief calculates the offset of a 4x4 matrix element in a linear
1.73 + * array
1.74 + *
1.75 + * \notes The current convention is column-major, as in OpenGL ES
1.76 + */
1.77 +#define MELEM(row, col) ((row) + ((col) << 2))
1.78 +
1.79 +/*!
1.80 + * \internal
1.81 + * \brief Macro for accessing 4x4 float matrix elements
1.82 + *
1.83 + * \param mtx pointer to the first element of the matrix
1.84 + * \param row matrix row
1.85 + * \param col matrix column
1.86 + */
1.87 +#define M44F(mtx, row, col) ((mtx)->elem[MELEM((row), (col))])
1.88 +
1.89 +/*--------------------------------------------------------------------*/
1.90 +
1.91 +/*----------------------------------------------------------------------
1.92 + * Private functions
1.93 + *--------------------------------------------------------------------*/
1.94 +
1.95 +
1.96 +/*!
1.97 + * \internal
1.98 + * \brief ARM VFPv2 implementation of 4x4 matrix multiplication.
1.99 + *
1.100 + * \param dst multiplication result
1.101 + * \param left left-hand matrix
1.102 + * \param right right-hand matrix
1.103 + */
1.104 +#if defined(M3G_HW_FLOAT_VFPV2)
1.105 +
1.106 +__asm void _m3gGenericMatrixProduct(Matrix *dst,
1.107 + const Matrix *left, const Matrix *right)
1.108 +{
1.109 +// r0 = *dst
1.110 +// r1 = *left
1.111 +// r2 = *right
1.112 +
1.113 + CODE32
1.114 +
1.115 +// save the VFP registers and set the vector STRIDE = 1, LENGTH = 4
1.116 +
1.117 + FSTMFDD sp!, {d8-d15}
1.118 +
1.119 + FMRX r3, FPSCR
1.120 + BIC r12, r3, #0x00370000
1.121 + ORR r12, #0x00030000
1.122 + FMXR FPSCR, r12
1.123 +
1.124 +// left = [a0 a1 a2 a3 right = [b0 b1 b2 b3
1.125 +// a4 a5 a6 a7 b4 b5 b6 b7
1.126 +// a8 a9 a10 a11 b8 b9 b10 b11
1.127 +// a12 a13 a14 a15] b12 b13 b14 b15]
1.128 +//
1.129 +// dst = [a0*b0+a4*b1+a8*b2+a12*b3 a1*b0+a5*b1+a9*b2+a13*b3 ..
1.130 +// a0*b4+a4*b5+a8*b6+a12*b7 a1*b4+a5*b5+a9*b6+a13*b7 ..
1.131 +// . .
1.132 +// . .
1.133 +
1.134 + FLDMIAS r1!, {s8-s23} // load the left matrix [a0-a15] to registers s8-s23
1.135 + FLDMIAS r2!, {s0-s7} // load [b0-b7] of right matrix to registers s0-s7
1.136 + FMULS s24, s8, s0 // [s24-s27] = [a0*b0 a1*b0 a2*b0 a3*b0]
1.137 + FMULS s28, s8, s4 // [s28-s31] = [a0*b4 a1*b4 a2*b4 a3*b4]
1.138 + FMACS s24, s12, s1 // [s24-s27] += [a4*b1 a5*b1 a6*b1 a7*b1]
1.139 + FMACS s28, s12, s5 // [s28-s31] += [a4*b5 a5*b5 a6*b5 a7*b5]
1.140 + FMACS s24, s16, s2 // [s24-s27] += [a8*b2 a9*b2 a10*b2 a11*b2]
1.141 + FMACS s28, s16, s6 // [s28-s31] += [a8*b6 a9*b6 a10*b6 a11*b6]
1.142 + FMACS s24, s20, s3 // [s24-s27] += [a12*b3 a13*b3 a14*b3 a15*b3]
1.143 + FMACS s28, s20, s7 // [s28-s31] += [a12*b7 a13*b37a14*b7 a15*b7]
1.144 + FLDMIAS r2!, {s0-s7} // load [b8-b15]
1.145 + FSTMIAS r0!, {s24-s31} // write [dst0-dst7]
1.146 + FMULS s24, s8, s0
1.147 + FMULS s28, s8, s4
1.148 + FMACS s24, s12, s1
1.149 + FMACS s28, s12, s5
1.150 + FMACS s24, s16, s2
1.151 + FMACS s28, s16, s6
1.152 + FMACS s24, s20, s3
1.153 + FMACS s28, s20, s7
1.154 + FSTMIAS r0!, {s24-s31}
1.155 +
1.156 +// Restore the VFP registers and return.
1.157 +
1.158 + FMXR FPSCR, r3
1.159 +
1.160 + FLDMFDD sp!, {d8-d15}
1.161 +
1.162 + BX lr
1.163 +
1.164 +}
1.165 +#endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
1.166 +
1.167 +
1.168 +/*------------------ Elementary float ------------------*/
1.169 +
1.170 +#if defined(M3G_SOFT_FLOAT)
1.171 +
1.172 +#if defined (M3G_BUILD_ARM)
1.173 +
1.174 +/*!
1.175 + * \internal
1.176 + * \brief Floating point multiplication implementation for ARM.
1.177 + */
1.178 +__asm M3Gfloat m3gMul(const M3Gfloat a,
1.179 + const M3Gfloat b)
1.180 +{
1.181 + /**
1.182 + * Extract the exponents of the multiplicands and add them
1.183 + * together. Flush to zero if either exponent or their sum
1.184 + * is zero.
1.185 + */
1.186 +
1.187 + mov r12, #0xff;
1.188 + ands r2, r0, r12, lsl #23; // exit if e1 == 0
1.189 + andnes r3, r1, r12, lsl #23; // exit if e2 == 0
1.190 + subne r2, r2, #(127 << 23);
1.191 + addnes r12, r2, r3; // exit if e1+e2-127 <= 0
1.192 + movle r0, #0;
1.193 + bxle lr;
1.194 +
1.195 + /**
1.196 + * Determine the sign of the result. Note that the exponent
1.197 + * may have overflowed to the sign bit, and thus the result
1.198 + * may be an arbitrary negative value when it really should
1.199 + * be +Inf or -Inf.
1.200 + */
1.201 +
1.202 + teq r0, r1;
1.203 + orrmi r12, r12, #0x80000000;
1.204 +
1.205 + /**
1.206 + * Multiply the mantissas. First shift the mantissas up to
1.207 + * unsigned 1.31 fixed point, adding the leading "1" bit at
1.208 + * the same time, and finally do a 32x32 -> 64 bit unsigned
1.209 + * multiplication. The result is in unsigned 2.62 fixed point,
1.210 + * representing the interval [1.0, 4.0).
1.211 + */
1.212 +
1.213 + mov r2, #0x80000000;
1.214 + orr r0, r2, r0, lsl #8;
1.215 + orr r1, r2, r1, lsl #8;
1.216 + umulls r2, r3, r0, r1;
1.217 +
1.218 + /**
1.219 + * If the highest bit of the 64-bit result is set, then the
1.220 + * mantissa lies in [2.0, 4.0) and needs to be renormalized.
1.221 + * That is, the mantissa is shifted one bit to the right and
1.222 + * the exponent correspondingly increased by 1. Note that we
1.223 + * lose the leading "1" bit from the mantissa by adding it up
1.224 + * with the exponent.
1.225 + */
1.226 +
1.227 + subpl r12, r12, #(1 << 23); // no overflow: exponent -= 1
1.228 + addpl r0, r12, r3, lsr #7; // no overflow: exponent += 1
1.229 + addmi r0, r12, r3, lsr #8; // overflow: exponent += 1
1.230 + bx lr;
1.231 +}
1.232 +
1.233 +/*!
1.234 + * \internal
1.235 + * \brief Floating point addition implementation for ARM.
1.236 + */
1.237 +__asm M3Gfloat m3gAdd(const M3Gfloat a,
1.238 + const M3Gfloat b)
1.239 +{
1.240 + /**
1.241 + * If the operands have opposite signs then this is not really
1.242 + * an addition but a subtraction. Subtraction is much slower,
1.243 + * so we have a separate code path for it, rather than trying
1.244 + * to save space by handling both in the same place.
1.245 + */
1.246 +
1.247 + teq r0, r1;
1.248 + bmi _m3gSub;
1.249 +
1.250 + /**
1.251 + * Sort the operands such that the larger operand is in R0 and
1.252 + * the smaller in R1. The sign bits do not affect the ordering,
1.253 + * since they are known to be equal.
1.254 + */
1.255 +
1.256 + subs r2, r0, r1;
1.257 + submi r0, r0, r2;
1.258 + addmi r1, r1, r2;
1.259 +
1.260 + /**
1.261 + * Extract the exponent of the smaller operand into R2 and compute
1.262 + * the difference between the larger and smaller exponent into R3.
1.263 + * (Note that the sign bits cancel out in the subtraction.) The
1.264 + * exponent delta tells how many bits the mantissa of the smaller
1.265 + * operand must be shifted to the right in order to bring the
1.266 + * operands into equal scale.
1.267 + */
1.268 +
1.269 + mov r2, r1, lsr #23;
1.270 + rsb r3, r2, r0, lsr #23;
1.271 +
1.272 + /**
1.273 + * Check if the exponent delta is bigger than 23 bits, or if the
1.274 + * smaller exponent is zero. In either case, exit the routine and
1.275 + * return the larger operand (which is already in R0). Note that
1.276 + * this means that subnormals are treated as zero.
1.277 + */
1.278 +
1.279 + rsbs r12, r3, #23; // N set, V clear if R3 > 23
1.280 + tstpl r2, #0xff; // execute only if R3 <= 23
1.281 + bxle lr; // exit if Z set or N != V
1.282 +
1.283 + /**
1.284 + * Extract the mantissas and shift them up to unsigned 1.31 fixed
1.285 + * point, inserting the implied leading "1" bit at the same time.
1.286 + * Finally, align the decimal points and add up the mantissas.
1.287 + */
1.288 +
1.289 + mov r12, #0x80000000;
1.290 + orr r0, r12, r0, lsl #8;
1.291 + orr r1, r12, r1, lsl #8;
1.292 + adds r0, r0, r1, lsr r3;
1.293 +
1.294 + /**
1.295 + * Compute the final exponent by adding up the smaller exponent
1.296 + * (R2), the exponent delta (R3), and the possible overflow bit
1.297 + * (carry flag). Note that in case of overflow, the leading "1"
1.298 + * has ended up in the carry flag and thus needs not be explicitly
1.299 + * discarded. Finally, put the mantissa together with the sign and
1.300 + * exponent.
1.301 + */
1.302 +
1.303 + adc r2, r2, r3; // r2 = smallExp + deltaExp + overflow
1.304 + movcc r0, r0, lsl #1; // no overflow: discard leading 1
1.305 + mov r0, r0, lsr #9;
1.306 + orr r0, r0, r2, lsl #23;
1.307 + bx lr;
1.308 +
1.309 +_m3gSub
1.310 +
1.311 + /**
1.312 + * Sort the operands such that the one with larger magnitude is
1.313 + * in R0 and has the correct sign (the sign of the final result),
1.314 + * while the smaller operand is in R1 with an inverted sign bit.
1.315 + */
1.316 +
1.317 + eor r1, r1, #0x80000000;
1.318 + subs r2, r0, r1;
1.319 + eormi r2, r2, #0x80000000;
1.320 + submi r0, r0, r2;
1.321 + addmi r1, r1, r2;
1.322 +
1.323 + /**
1.324 + * Extract the exponent of the smaller operand into R2 and compute
1.325 + * the difference between the larger and smaller exponent into R3.
1.326 + * (Note that the sign bits cancel out in the subtraction.) The
1.327 + * exponent delta tells how many bits the mantissa of the smaller
1.328 + * operand must be shifted to the right in order to bring the
1.329 + * operands into equal scale.
1.330 + */
1.331 +
1.332 + mov r2, r1, lsr #23;
1.333 + rsbs r3, r2, r0, lsr #23;
1.334 +
1.335 + /**
1.336 + * Check if the exponent delta is bigger than 31 bits, or if the
1.337 + * smaller exponent is zero. In either case, exit the routine and
1.338 + * return the larger operand (which is already in R0). Note that
1.339 + * this means that subnormals are treated as zero.
1.340 + */
1.341 +
1.342 + rsbs r12, r3, #31; // N set, V clear if R3 > 31
1.343 + tstpl r2, #0xff; // execute only if R3 <= 31
1.344 + bxle lr; // exit if Z set or N != V
1.345 +
1.346 + /**
1.347 + * Extract the mantissas and shift them up to unsigned 1.31 fixed
1.348 + * point, inserting the implied leading "1" bit at the same time.
1.349 + * Then align the decimal points and subtract the smaller mantissa
1.350 + * from the larger one.
1.351 + */
1.352 +
1.353 + mov r12, #0x80000000;
1.354 + orr r0, r12, r0, lsl #8;
1.355 + orr r1, r12, r1, lsl #8;
1.356 + subs r0, r0, r1, lsr r3;
1.357 +
1.358 + /**
1.359 + * We split the range of possible results into three categories:
1.360 + *
1.361 + * 1. [1.0, 2.0) ==> no renormalization needed (highest bit set)
1.362 + * 2. [0.5, 1.0) ==> only one left-shift needed
1.363 + * 3. (0.0, 0.5) ==> multiple left-shifts needed
1.364 + * 4. zero ==> just return
1.365 + *
1.366 + * Cases 1 and 2 are handled in the main code path. Cases 3 and 4
1.367 + * are less common by far, so we branch to a separate code fragment
1.368 + * for those.
1.369 + */
1.370 +
1.371 + movpls r0, r0, lsl #1; // Cases 2,3,4: shift left
1.372 + bpl _m3gSubRenormalize; // Cases 3 & 4: branch out
1.373 +
1.374 + /**
1.375 + * Now we have normalized the mantissa such that the highest bit
1.376 + * is set. Here we only need to adjust the exponent, if necessary,
1.377 + * and put the pieces together. Note that we lose the leading "1"
1.378 + * bit from the mantissa by adding it up with the exponent. We can
1.379 + * also do proper rounding (towards nearest) instead of truncation
1.380 + * (towards zero) at no extra cost!
1.381 + */
1.382 +
1.383 + sbc r3, r3, #1; // deltaExp -= 1 (Case 1) or 2 (Case 2)
1.384 + add r2, r2, r3; // resultExp = smallExp + deltaExp
1.385 + movs r0, r0, lsr #8; // shift mantissa, keep leading "1"
1.386 + adc r0, r0, r2, lsl #23; // resultExp += 1, mantissa += carry
1.387 + bx lr;
1.388 +
1.389 + /**
1.390 + * Separate code path for cases 3 and 4 (see above). The mantissa
1.391 + * has already been shifted up by one, but the exponent has not
1.392 + * been correspondingly decreased. We also know that the highest
1.393 + * bit is still not set, and that the carry flag is clear.
1.394 + */
1.395 +
1.396 +_m3gSubRenormalize
1.397 + bxeq lr;
1.398 + subcc r3, r3, #2;
1.399 + movccs r0, r0, lsl #2;
1.400 +
1.401 + /**
1.402 + * If the carry flag is still not set, i.e. there were more than
1.403 + * two leading zeros in the mantissa initially, loop until we
1.404 + * find the highest set bit.
1.405 + */
1.406 +
1.407 +_m3gSubRenormalizeLoop
1.408 + subcc r3, r3, #1;
1.409 + movccs r0, r0, lsl #1;
1.410 + bcc _m3gSubRenormalizeLoop;
1.411 +
1.412 + /**
1.413 + * Now the leading "1" is in the carry flag, so we can just add up
1.414 + * the exponent and mantissa as usual, doing proper rounding at
1.415 + * the same time. However, cases where the exponent goes negative
1.416 + * (that is, underflows) must be detected and flushed to zero.
1.417 + */
1.418 +
1.419 + add r3, r2, r3;
1.420 + movs r0, r0, lsr #9;
1.421 + adc r0, r0, r3, lsl #23;
1.422 + teq r0, r2, lsl #23;
1.423 + movmi r0, #0;
1.424 + bx lr;
1.425 +}
1.426 +
1.427 +#else /* M3G_BUILD_ARM */
1.428 +
1.429 +/*!
1.430 + * \internal
1.431 + * \brief Floating point addition implementation
1.432 + *
1.433 + */
1.434 +static M3G_INLINE M3Gfloat m3gFloatAdd(const M3Guint aBits,
1.435 + const M3Guint bBits)
1.436 +{
1.437 + M3Guint large, small, signMask;
1.438 +
1.439 + /* Early exits for underflow cases */
1.440 +
1.441 + large = (M3Gint)(aBits & ~SIGN_MASK);
1.442 + if (large <= 0x00800000) {
1.443 + return INT_AS_FLOAT(bBits);
1.444 + }
1.445 + small = (M3Gint)(bBits & ~SIGN_MASK);
1.446 + if (small <= 0x00800000) {
1.447 + return INT_AS_FLOAT(aBits);
1.448 + }
1.449 +
1.450 + /* Swap the numbers so that "large" really is larger; the unsigned
1.451 + * (or de-signed) bitmasks for floats are nicely monotonous, so we
1.452 + * can compare directly. Also store the sign of the larger number
1.453 + * for future reference. */
1.454 +
1.455 + if (small > large) {
1.456 + M3Gint temp = small;
1.457 + small = large;
1.458 + large = temp;
1.459 + signMask = (bBits & SIGN_MASK);
1.460 + }
1.461 + else {
1.462 + signMask = (aBits & SIGN_MASK);
1.463 + }
1.464 +
1.465 + {
1.466 + M3Guint res, overflow;
1.467 + M3Guint resExp, expDelta;
1.468 +
1.469 + /* Store the larger exponent as our candidate result exponent,
1.470 + * and compute the difference between the exponents */
1.471 +
1.472 + resExp = (large >> 23);
1.473 + expDelta = resExp - (small >> 23);
1.474 +
1.475 + /* Take an early exit if the change would be insignificant;
1.476 + * this also guards against odd results from shifting by more
1.477 + * than 31 (undefined in C) */
1.478 +
1.479 + if (expDelta >= 24) {
1.480 + res = large | signMask;
1.481 + return INT_AS_FLOAT(res);
1.482 + }
1.483 +
1.484 + /* Convert the mantissas into shifted integers, and shift the
1.485 + * smaller number to the same scale with the larger one. */
1.486 +
1.487 + large = (large & MANTISSA_MASK) | LEADING_ONE;
1.488 + small = (small & MANTISSA_MASK) | LEADING_ONE;
1.489 + small >>= expDelta;
1.490 + M3G_ASSERT(large >= small);
1.491 +
1.492 + /* Check whether we're really adding or subtracting the
1.493 + * smaller number, and branch to slightly different routines
1.494 + * respectively */
1.495 +
1.496 + if (((aBits ^ bBits) & SIGN_MASK) == 0) {
1.497 +
1.498 + /* Matching signs; just add the numbers and check for
1.499 + * overflow, shifting to compensate as necessary. */
1.500 +
1.501 + res = large + small;
1.502 +
1.503 + overflow = (res >> 24);
1.504 + res >>= overflow;
1.505 + resExp += overflow;
1.506 + }
1.507 + else {
1.508 +
1.509 + /* Different signs, so let's subtract the smaller value;
1.510 + * also check that we're not subtracting a number from
1.511 + * itself (so we don't have to normalize a zero below) */
1.512 +
1.513 + if (small == large) {
1.514 + return 0.0f; /* x - x = 0 */
1.515 + }
1.516 +
1.517 + res = (large << 8) - (small << 8);
1.518 +
1.519 + /* Renormalize the number by shifting until we've got the
1.520 + * high bit in place */
1.521 +
1.522 + while ((res >> 24) == 0) {
1.523 + res <<= 8;
1.524 + resExp -= 8;
1.525 + }
1.526 + while ((res >> 31) == 0) {
1.527 + res <<= 1;
1.528 + --resExp;
1.529 + }
1.530 + res >>= 8;
1.531 + }
1.532 +
1.533 + /* Flush to zero in case of over/underflow of the exponent */
1.534 +
1.535 + if (resExp >= 255) {
1.536 + return 0.0f;
1.537 + }
1.538 +
1.539 + /* Compose the final number into "res"; note that we pull in
1.540 + * the sign of the original larger number, which should still
1.541 + * be valid */
1.542 +
1.543 + res &= MANTISSA_MASK;
1.544 + res |= (resExp << 23);
1.545 + res |= signMask;
1.546 +
1.547 + return INT_AS_FLOAT(res);
1.548 + }
1.549 +}
1.550 +
1.551 +/*!
1.552 + * \internal
1.553 + * \brief Floating point multiplication implementation
1.554 + */
1.555 +static M3G_INLINE M3Gfloat m3gFloatMul(const M3Guint aBits,
1.556 + const M3Guint bBits)
1.557 +{
1.558 + M3Guint a, b;
1.559 +
1.560 + /* Early exits for underflow and multiplication by zero */
1.561 +
1.562 + a = (aBits & ~SIGN_MASK);
1.563 + if (a <= 0x00800000) {
1.564 + return 0.0f;
1.565 + }
1.566 +
1.567 + b = (bBits & ~SIGN_MASK);
1.568 + if (b <= 0x00800000) {
1.569 + return 0.0f;
1.570 + }
1.571 +
1.572 + {
1.573 + M3Guint res, exponent, overflow;
1.574 +
1.575 + /* Compute the exponent of the result, assuming the mantissas
1.576 + * don't overflow; then mask out the original exponents */
1.577 +
1.578 + exponent = (a >> 23) + (b >> 23) - 127;
1.579 + a &= MANTISSA_MASK;
1.580 + b &= MANTISSA_MASK;
1.581 +
1.582 + /* Compute the new mantissa from:
1.583 + *
1.584 + * (1 + a)(1 + b) = ab + a + b + 1
1.585 + *
1.586 + * First shift the mantissas from 0.23 down to 0.16 for the
1.587 + * multiplication, then shift back to 0.23 for adding in the
1.588 + * "a + b + 1" part of the equation. */
1.589 +
1.590 + res = (a >> 7) * (b >> 7); /* "ab" at 0.32 */
1.591 + res = (res >> 9) + a + b + LEADING_ONE;
1.592 +
1.593 + /* Add the leading one, then normalize the result by checking
1.594 + * the overflow bit and dividing by two if necessary */
1.595 +
1.596 + overflow = (res >> 24);
1.597 + res >>= overflow;
1.598 + exponent += overflow;
1.599 +
1.600 + /* Flush to zero in case of over/underflow of the exponent */
1.601 +
1.602 + if (exponent >= 255) {
1.603 + return 0.0f;
1.604 + }
1.605 +
1.606 + /* Compose the final number into "res" */
1.607 +
1.608 + res &= MANTISSA_MASK;
1.609 + res |= (exponent << 23);
1.610 + res |= (aBits ^ bBits) & SIGN_MASK;
1.611 +
1.612 + return INT_AS_FLOAT(res);
1.613 + }
1.614 +}
1.615 +
1.616 +#endif /* M3G_BUILD_ARM */
1.617 +
1.618 +/*!
1.619 + * \internal
1.620 + * \brief Computes the signed fractional part of a floating point
1.621 + * number
1.622 + *
1.623 + * \param x floating point value
1.624 + * \return x signed fraction of x in ]-1, 1[
1.625 + */
1.626 +static M3Gfloat m3gFrac(M3Gfloat x)
1.627 +{
1.628 + /* Quick exit for -1 < x < 1 */
1.629 +
1.630 + if (m3gAbs(x) < 1.0f) {
1.631 + return x;
1.632 + }
1.633 +
1.634 + /* Shift the mantissa to the proper place, mask out the integer
1.635 + * part, and renormalize */
1.636 + {
1.637 + M3Guint ix = FLOAT_AS_UINT(x);
1.638 + M3Gint expo = ((ix >> 23) & 0xFF) - 127;
1.639 + M3G_ASSERT(expo >= 0);
1.640 +
1.641 + /* The decimal part will always be zero for large values with
1.642 + * exponents over 24 */
1.643 +
1.644 + if (expo >= 24) {
1.645 + return 0.f;
1.646 + }
1.647 + else {
1.648 +
1.649 + /* Shift the integer part out of the mantissa and see what
1.650 + * we have left */
1.651 +
1.652 + M3Guint base = (ix & MANTISSA_MASK) | LEADING_ONE;
1.653 + base = (base << expo) & MANTISSA_MASK;
1.654 +
1.655 + /* Quick exit (and guard against infinite looping) for
1.656 + * zero */
1.657 +
1.658 + if (base == 0) {
1.659 + return 0.f;
1.660 + }
1.661 +
1.662 + /* We now have an exponent of 0 (i.e. no shifting), but
1.663 + * must renormalize to get a set bit in place of the
1.664 + * hidden (implicit one) bit */
1.665 +
1.666 + expo = 0;
1.667 +
1.668 + while ((base >> 19) == 0) {
1.669 + base <<= 4;
1.670 + expo -= 4;
1.671 + }
1.672 + while ((base >> 23) == 0) {
1.673 + base <<= 1;
1.674 + --expo;
1.675 + }
1.676 +
1.677 + /* Compose the final number */
1.678 +
1.679 + ix =
1.680 + (base & MANTISSA_MASK) |
1.681 + ((expo + 127) << 23) |
1.682 + (ix & SIGN_MASK);
1.683 + return INT_AS_FLOAT(ix);
1.684 + }
1.685 + }
1.686 +}
1.687 +
1.688 +#endif /* M3G_SOFT_FLOAT */
1.689 +
1.690 +#if defined(M3G_DEBUG)
1.691 +/*!
1.692 + * \internal
1.693 + * \brief Checks for NaN or infinity in a floating point input
1.694 + */
1.695 +static void m3gValidateFloats(int n, float *p)
1.696 +{
1.697 + while (n-- > 0) {
1.698 + M3G_ASSERT(EXPONENT(*p) < 120);
1.699 + ++p;
1.700 + }
1.701 +}
1.702 +#else
1.703 +# define m3gValidateFloats(n, p)
1.704 +#endif
1.705 +
1.706 +/*------------------ Trigonometry and exp ----------*/
1.707 +
1.708 +
1.709 +#if defined(M3G_SOFT_FLOAT)
1.710 +/*!
1.711 + * \internal
1.712 + * \brief Sine for the first quadrant
1.713 + *
1.714 + * \param x floating point value in [0, PI/2]
1.715 + * \return sine of \c x
1.716 + */
1.717 +static M3Gfloat m3gSinFirstQuadrant(M3Gfloat x)
1.718 +{
1.719 + M3Guint bits = FLOAT_AS_UINT(x);
1.720 +
1.721 + if (bits <= 0x3ba3d70au) /* 0.005f */
1.722 + return x;
1.723 + else {
1.724 + static const M3Gfloat sinTermLut[4] = {
1.725 + -1.0f / (2*3),
1.726 + -1.0f / (4*5),
1.727 + -1.0f / (6*7),
1.728 + -1.0f / (8*9)
1.729 + };
1.730 +
1.731 + M3Gfloat xx = m3gSquare(x);
1.732 + M3Gfloat sinx = x;
1.733 + int i;
1.734 +
1.735 + for (i = 0; i < 4; ++i) {
1.736 + x = m3gMul(x, m3gMul(xx, sinTermLut[i]));
1.737 + sinx = m3gAdd(sinx, x);
1.738 + }
1.739 +
1.740 + return sinx;
1.741 + }
1.742 +}
1.743 +#endif /* M3G_SOFT_FLOAT */
1.744 +
1.745 +#if defined(M3G_SOFT_FLOAT)
1.746 +/*!
1.747 + * \internal
1.748 + * \brief Computes sine for the first period
1.749 + *
1.750 + * \param x floating point value in [0, 2PI]
1.751 + * \return sine of x
1.752 + */
1.753 +static M3Gfloat m3gSinFirstPeriod(const M3Gfloat x)
1.754 +{
1.755 + M3G_ASSERT(x >= 0 && x <= TWO_PI);
1.756 +
1.757 + if (x >= PI) {
1.758 + return m3gNegate(m3gSinFirstQuadrant(x >= ONE_AND_HALF_PI ?
1.759 + m3gSub(TWO_PI, x) :
1.760 + m3gSub(x, PI)));
1.761 + }
1.762 + return m3gSinFirstQuadrant((x >= HALF_PI) ? m3gSub(PI, x) : x);
1.763 +}
1.764 +#endif /* M3G_SOFT_FLOAT */
1.765 +
1.766 +/*------------- Float vs. int conversion helpers -------------*/
1.767 +
1.768 +/*!
1.769 + * \internal
1.770 + * \brief Scales and clamps a float to unsigned byte range
1.771 + */
1.772 +static M3G_INLINE M3Gint m3gFloatToByte(const M3Gfloat a)
1.773 +{
1.774 + return m3gRoundToInt(m3gMul(255.f, m3gClampFloat(a, 0.f, 1.f)));
1.775 +}
1.776 +
1.777 +/*------------------ Vector helpers ------------------*/
1.778 +
1.779 +/*!
1.780 + * \internal
1.781 + * \brief Computes the norm of a floating-point 3-vector
1.782 + */
1.783 +static M3G_INLINE M3Gfloat m3gNorm3(const M3Gfloat *v)
1.784 +{
1.785 + return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),
1.786 + m3gSquare(v[2]));
1.787 +}
1.788 +
1.789 +/*!
1.790 + * \internal
1.791 + * \brief Computes the norm of a floating-point 4-vector
1.792 + */
1.793 +static M3G_INLINE M3Gfloat m3gNorm4(const M3Gfloat *v)
1.794 +{
1.795 + return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),
1.796 + m3gAdd(m3gSquare(v[2]), m3gSquare(v[3])));
1.797 +}
1.798 +
1.799 +/*!
1.800 + * \internal
1.801 + * \brief Scales a floating-point 3-vector
1.802 + */
1.803 +static M3G_INLINE void m3gScale3(M3Gfloat *v, M3Gfloat s)
1.804 +{
1.805 + v[0] = m3gMul(v[0], s);
1.806 + v[1] = m3gMul(v[1], s);
1.807 + v[2] = m3gMul(v[2], s);
1.808 +}
1.809 +
1.810 +/*!
1.811 + * \internal
1.812 + * \brief Scales a floating-point 4-vector
1.813 + */
1.814 +static M3G_INLINE void m3gScale4(M3Gfloat *v, M3Gfloat s)
1.815 +{
1.816 + v[0] = m3gMul(v[0], s);
1.817 + v[1] = m3gMul(v[1], s);
1.818 + v[2] = m3gMul(v[2], s);
1.819 + v[3] = m3gMul(v[3], s);
1.820 +}
1.821 +
1.822 +
1.823 +/*------------------ Matrices ------------------*/
1.824 +
1.825 +/*!
1.826 + * \internal
1.827 + */
1.828 +static M3G_INLINE M3Gbool m3gIsClassified(const Matrix *mtx)
1.829 +{
1.830 + M3G_ASSERT(mtx != NULL);
1.831 + return (M3Gbool) mtx->classified;
1.832 +}
1.833 +
1.834 +/*!
1.835 + * \internal
1.836 + * \brief Returns a classification for a single floating point number
1.837 + */
1.838 +static M3G_INLINE M3Guint m3gElementClass(const M3Gfloat x)
1.839 +{
1.840 + if (IS_ZERO(x)) {
1.841 + return ELEM_ZERO;
1.842 + }
1.843 + else if (IS_ONE(x)) {
1.844 + return ELEM_ONE;
1.845 + }
1.846 + else if (IS_MINUS_ONE(x)) {
1.847 + return ELEM_MINUS_ONE;
1.848 + }
1.849 + return ELEM_ANY;
1.850 +}
1.851 +
1.852 +/*!
1.853 + * \internal
1.854 + * \brief Computes the classification mask of a matrix
1.855 + *
1.856 + * The mask is constructed from two bits per elements, with the lowest
1.857 + * two bits corresponding to the first element in the \c elem array of
1.858 + * the matrix.
1.859 + */
1.860 +static void m3gClassify(Matrix *mtx)
1.861 +{
1.862 + M3Guint mask = 0;
1.863 + const M3Gfloat *p;
1.864 + int i;
1.865 +
1.866 + M3G_ASSERT(mtx != NULL);
1.867 + M3G_ASSERT(!m3gIsClassified(mtx));
1.868 +
1.869 + p = mtx->elem;
1.870 + for (i = 0; i < 16; ++i) {
1.871 + M3Gfloat elem = *p++;
1.872 + mask |= (m3gElementClass(elem) << (i*2));
1.873 + }
1.874 + mtx->mask = mask;
1.875 + mtx->classified = M3G_TRUE;
1.876 +}
1.877 +
1.878 +/*!
1.879 + * \internal
1.880 + * \brief Sets matrix classification directly
1.881 + */
1.882 +static M3G_INLINE void m3gClassifyAs(M3Guint mask, Matrix *mtx)
1.883 +{
1.884 + M3G_ASSERT(mtx != NULL);
1.885 + mtx->mask = mask;
1.886 + mtx->classified = M3G_TRUE;
1.887 + mtx->complete = M3G_FALSE;
1.888 +}
1.889 +
1.890 +/*!
1.891 + * \internal
1.892 + * \brief Attempts to classify a matrix more precisely
1.893 + *
1.894 + * Tries to classify all "free" elements of a matrix into one of the
1.895 + * predefined constants to improve precision and performance in
1.896 + * subsequent calculations.
1.897 + */
1.898 +static void m3gSubClassify(Matrix *mtx)
1.899 +{
1.900 + M3G_ASSERT_PTR(mtx);
1.901 + M3G_ASSERT(m3gIsClassified(mtx));
1.902 + {
1.903 + const M3Gfloat *p = mtx->elem;
1.904 + M3Guint inMask = mtx->mask;
1.905 + M3Guint outMask = inMask;
1.906 + int i;
1.907 +
1.908 + for (i = 0; i < 16; ++i, inMask >>= 2) {
1.909 + M3Gfloat elem = *p++;
1.910 + if ((inMask & 0x03) == ELEM_ANY) {
1.911 + outMask &= ~(0x03u << (i*2));
1.912 + outMask |= (m3gElementClass(elem) << (i*2));
1.913 + }
1.914 + }
1.915 + mtx->mask = outMask;
1.916 + }
1.917 +}
1.918 +
1.919 +/*!
1.920 + * \internal
1.921 + * \brief Fills in the implicit values for a classified matrix
1.922 + */
1.923 +static void m3gFillClassifiedMatrix(Matrix *mtx)
1.924 +{
1.925 + int i;
1.926 + M3Guint mask;
1.927 + M3Gfloat *p;
1.928 +
1.929 + M3G_ASSERT(mtx != NULL);
1.930 + M3G_ASSERT(mtx->classified);
1.931 + M3G_ASSERT(!mtx->complete);
1.932 +
1.933 + mask = mtx->mask;
1.934 + p = mtx->elem;
1.935 +
1.936 + for (i = 0; i < 16; ++i, mask >>= 2) {
1.937 + unsigned elem = (mask & 0x03);
1.938 + switch (elem) {
1.939 + case ELEM_ZERO: *p++ = 0.0f; break;
1.940 + case ELEM_ONE: *p++ = 1.0f; break;
1.941 + case ELEM_MINUS_ONE: *p++ = -1.0f; break;
1.942 + default: ++p;
1.943 + }
1.944 + }
1.945 + mtx->complete = M3G_TRUE;
1.946 +}
1.947 +
1.948 +
1.949 +#if !defined(M3G_HW_FLOAT)
1.950 +/*!
1.951 + * \internal
1.952 + * \brief Performs one multiply-add of classified matrix elements
1.953 + *
1.954 + * \param amask element class of the first multiplicand
1.955 + * \param a float value of the first multiplicand
1.956 + * \param bmask element class of the second multiplicand
1.957 + * \param b float value of the second multiplicand
1.958 + * \param c float value to add
1.959 + * \return a * b + c
1.960 + *
1.961 + * \notes inline, as only called from the matrix product function
1.962 + */
1.963 +static M3G_INLINE M3Gfloat m3gClassifiedMadd(
1.964 + const M3Gbitmask amask, const M3Gfloat *pa,
1.965 + const M3Gbitmask bmask, const M3Gfloat *pb,
1.966 + const M3Gfloat c)
1.967 +{
1.968 + /* Check for zero product to reduce the switch cases below */
1.969 +
1.970 + if (amask == ELEM_ZERO || bmask == ELEM_ZERO) {
1.971 + return c;
1.972 + }
1.973 +
1.974 + /* Branch based on the classification of a */
1.975 +
1.976 + switch (amask) {
1.977 +
1.978 + case ELEM_ANY:
1.979 + if (bmask == ELEM_ONE) {
1.980 + return m3gAdd(*pa, c); /* a * 1 + c = a + c */
1.981 + }
1.982 + if (bmask == ELEM_MINUS_ONE) {
1.983 + return m3gSub(c, *pa); /* a * -1 + c = -a + c */
1.984 + }
1.985 + return m3gMadd(*pa, *pb, c); /* a * b + c */
1.986 +
1.987 + case ELEM_ONE:
1.988 + if (bmask == ELEM_ONE) {
1.989 + return m3gAdd(c, 1.f); /* 1 * 1 + c = 1 + c */
1.990 + }
1.991 + if (bmask == ELEM_MINUS_ONE) {
1.992 + return m3gSub(c, 1.f); /* 1 * -1 + c = -1 + c */
1.993 + }
1.994 + return m3gAdd(*pb, c); /* 1 * b + c = b + c */
1.995 +
1.996 + case ELEM_MINUS_ONE:
1.997 + if (bmask == ELEM_ONE) {
1.998 + return m3gSub(c, 1.f); /* -1 * 1 + c = -1 + c */
1.999 + }
1.1000 + if (bmask == ELEM_MINUS_ONE) {
1.1001 + return m3gAdd(c, 1.f); /* -1 * -1 + c = 1 + c */
1.1002 + }
1.1003 + return m3gSub(c, *pb); /* -1 * b + c = -b + c */
1.1004 +
1.1005 + default:
1.1006 + M3G_ASSERT(M3G_FALSE);
1.1007 + return 0.0f;
1.1008 + }
1.1009 +}
1.1010 +#endif /*!defined(M3G_HW_FLOAT)*/
1.1011 +
1.1012 +/*!
1.1013 + * \internal
1.1014 + * \brief Computes a generic 4x4 matrix product
1.1015 + */
1.1016 +static void m3gGenericMatrixProduct(Matrix *dst,
1.1017 + const Matrix *left, const Matrix *right)
1.1018 +{
1.1019 + M3G_ASSERT(dst != NULL && left != NULL && right != NULL);
1.1020 +
1.1021 + {
1.1022 +# if defined(M3G_HW_FLOAT)
1.1023 + if (!left->complete) {
1.1024 + m3gFillClassifiedMatrix((Matrix*)left);
1.1025 + }
1.1026 + if (!right->complete) {
1.1027 + m3gFillClassifiedMatrix((Matrix*)right);
1.1028 + }
1.1029 +# else
1.1030 + const unsigned lmask = left->mask;
1.1031 + const unsigned rmask = right->mask;
1.1032 +# endif
1.1033 +
1.1034 +#if defined(M3G_HW_FLOAT_VFPV2)
1.1035 + _m3gGenericMatrixProduct(dst, left, right);
1.1036 +#else
1.1037 + {
1.1038 + int row;
1.1039 +
1.1040 + for (row = 0; row < 4; ++row) {
1.1041 + int col;
1.1042 + for (col = 0; col < 4; ++col) {
1.1043 + int k;
1.1044 + M3Gfloat a = 0;
1.1045 + for (k = 0; k < 4; ++k) {
1.1046 + M3Gint lidx = MELEM(row, k);
1.1047 + M3Gint ridx = MELEM(k, col);
1.1048 +# if defined(M3G_HW_FLOAT)
1.1049 + a = m3gMadd(left->elem[lidx], right->elem[ridx], a);
1.1050 +# else
1.1051 + a = m3gClassifiedMadd((lmask >> (2 * lidx)) & 3,
1.1052 + &left->elem[lidx],
1.1053 + (rmask >> (2 * ridx)) & 3,
1.1054 + &right->elem[ridx],
1.1055 + a);
1.1056 +# endif /*!M3G_HW_FLOAT*/
1.1057 + }
1.1058 + M44F(dst, row, col) = a;
1.1059 + }
1.1060 + }
1.1061 + }
1.1062 +#endif /*!M3G_HW_FLOAT_VFPV2*/
1.1063 + }
1.1064 + dst->complete = M3G_TRUE;
1.1065 + dst->classified = M3G_FALSE;
1.1066 +}
1.1067 +
1.1068 +/*!
1.1069 + * \internal
1.1070 + * \brief Converts a float vector to 16-bit integers
1.1071 + *
1.1072 + * \param size vector length
1.1073 + * \param vec pointer to float vector
1.1074 + * \param scale scale of output values, as the number of bits to
1.1075 + * shift left to get actual values
1.1076 + * \param outVec output value vector
1.1077 + */
1.1078 +static void m3gFloatVecToShort(M3Gint size, const M3Gfloat *vec,
1.1079 + M3Gint scale, M3Gshort *outVec)
1.1080 +{
1.1081 + const M3Guint *vecInt = (const M3Guint*) vec;
1.1082 + M3Gint i;
1.1083 +
1.1084 + for (i = 0; i < size; ++i) {
1.1085 + M3Guint a = vecInt[i];
1.1086 + if ((a & ~SIGN_MASK) < (1 << 23)) {
1.1087 + *outVec++ = 0;
1.1088 + }
1.1089 + else {
1.1090 + M3Gint shift =
1.1091 + scale - ((M3Gint)((vecInt[i] >> 23) & 0xFFu) - (127 + 23));
1.1092 + M3G_ASSERT(shift > 8); /* or the high bits will overflow */
1.1093 +
1.1094 + if (shift > 23) {
1.1095 + *outVec++ = 0;
1.1096 + }
1.1097 + else {
1.1098 + M3Gint out =
1.1099 + (M3Gint) (((a & MANTISSA_MASK) | LEADING_ONE) >> shift);
1.1100 + if (a >> 31) {
1.1101 + out = -out;
1.1102 + }
1.1103 + M3G_ASSERT(m3gInRange(out, -32767, 32767));
1.1104 + *outVec++ = (M3Gshort) out;
1.1105 + }
1.1106 + }
1.1107 + }
1.1108 +}
1.1109 +
1.1110 +/*----------------------------------------------------------------------
1.1111 + * Internal functions
1.1112 + *--------------------------------------------------------------------*/
1.1113 +
1.1114 +/*--------------------------------------------------------------------*/
1.1115 +
1.1116 +#if defined(M3G_SOFT_FLOAT)
1.1117 +
1.1118 +#if !defined (M3G_BUILD_ARM)
1.1119 +
1.1120 +static M3Gfloat m3gAdd(const M3Gfloat a, const M3Gfloat b)
1.1121 +{
1.1122 + return m3gFloatAdd(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));
1.1123 +}
1.1124 +
1.1125 +static M3Gfloat m3gMul(const M3Gfloat a, const M3Gfloat b)
1.1126 +{
1.1127 + return m3gFloatMul(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));
1.1128 +}
1.1129 +
1.1130 +#endif /* M3G_BUILD_ARM */
1.1131 +
1.1132 +/*!
1.1133 + * \internal
1.1134 + * \brief Computes the reciprocal of the square root
1.1135 + *
1.1136 + * \param x a floating point value
1.1137 + * \return 1 / square root of \c x
1.1138 + */
1.1139 +static M3Gfloat m3gRcpSqrt(const M3Gfloat x)
1.1140 +{
1.1141 + /* Approximation followed by Newton-Raphson iteration a'la
1.1142 + * "Floating-point tricks" by J. Blinn, but we iterate several
1.1143 + * times to improve precision */
1.1144 +
1.1145 + M3Gint i = (M3G_FLOAT_ONE + (M3G_FLOAT_ONE >> 1))
1.1146 + - (FLOAT_AS_UINT(x) >> 1);
1.1147 + M3Gfloat y = INT_AS_FLOAT(i);
1.1148 + for (i = 0; i < 3; ++i) {
1.1149 + y = m3gMul(y, m3gSub(1.5f, m3gHalf(m3gMul(x, m3gSquare(y)))));
1.1150 + }
1.1151 + return y;
1.1152 +}
1.1153 +
1.1154 +/*!
1.1155 + * \internal
1.1156 + * \brief Computes the square root
1.1157 + *
1.1158 + * \param x a floating point value
1.1159 + * \return square root of \c x
1.1160 + */
1.1161 +static M3Gfloat m3gSqrt(const M3Gfloat x)
1.1162 +{
1.1163 + /* Approximation followed by Newton-Raphson iteration a'la
1.1164 + * "Floating-point tricks" by J. Blinn, but we iterate several
1.1165 + * times to improve precision */
1.1166 +
1.1167 + M3Gint i = (FLOAT_AS_UINT(x) >> 1) + (M3G_FLOAT_ONE >> 1);
1.1168 + M3Gfloat y = INT_AS_FLOAT(i);
1.1169 + for (i = 0; i < 2; ++i) {
1.1170 + y = m3gDiv(m3gAdd(m3gSquare(y), x), m3gDouble(y));
1.1171 + }
1.1172 + return y;
1.1173 +}
1.1174 +
1.1175 +#endif /* M3G_SOFT_FLOAT */
1.1176 +
1.1177 +/*--------------------------------------------------------------------*/
1.1178 +
1.1179 +#if defined(M3G_SOFT_FLOAT)
1.1180 +/*!
1.1181 + * \internal
1.1182 + */
1.1183 +static M3Gfloat m3gArcCos(const M3Gfloat x)
1.1184 +{
1.1185 + return (M3Gfloat) acos(x);
1.1186 +}
1.1187 +
1.1188 +/*!
1.1189 + * \internal
1.1190 + */
1.1191 +static M3Gfloat m3gArcTan(const M3Gfloat y, const M3Gfloat x)
1.1192 +{
1.1193 + return (M3Gfloat) atan2(y, x);
1.1194 +}
1.1195 +
1.1196 +/*!
1.1197 + * \internal
1.1198 + */
1.1199 +static M3Gfloat m3gCos(const M3Gfloat x)
1.1200 +{
1.1201 + return m3gSin(m3gAdd(x, HALF_PI));
1.1202 +}
1.1203 +
1.1204 +/*!
1.1205 + * \internal
1.1206 + * \brief
1.1207 + */
1.1208 +static M3Gfloat m3gSin(const M3Gfloat x)
1.1209 +{
1.1210 + M3Gfloat f = x;
1.1211 +
1.1212 + /* If x is greater than two pi, do a modulo operation to bring it
1.1213 + * back in range for the internal sine function */
1.1214 +
1.1215 + if (m3gAbs(f) >= TWO_PI) {
1.1216 + f = m3gMul (f, (1.f / TWO_PI));
1.1217 + f = m3gFrac(f);
1.1218 + f = m3gMul (f, TWO_PI);
1.1219 + }
1.1220 +
1.1221 + /* Compute the result, negating both the input value and the
1.1222 + * result if x was negative */
1.1223 + {
1.1224 + M3Guint i = FLOAT_AS_UINT(f);
1.1225 + M3Guint neg = (i & SIGN_MASK);
1.1226 + i ^= neg;
1.1227 + f = m3gSinFirstPeriod(INT_AS_FLOAT(i));
1.1228 + i = FLOAT_AS_UINT(f) ^ neg;
1.1229 + return INT_AS_FLOAT(i);
1.1230 + }
1.1231 +}
1.1232 +
1.1233 +/*!
1.1234 + * \internal
1.1235 + */
1.1236 +static M3Gfloat m3gTan(const M3Gfloat x)
1.1237 +{
1.1238 + return (M3Gfloat) tan(x);
1.1239 +}
1.1240 +
1.1241 +/*!
1.1242 + * \internal
1.1243 + */
1.1244 +static M3Gfloat m3gExp(const M3Gfloat a)
1.1245 +{
1.1246 + return (M3Gfloat) exp(a);
1.1247 +}
1.1248 +#endif /* M3G_SOFT_FLOAT */
1.1249 +
1.1250 +/*!
1.1251 + * \brief Checks whether the bottom row of a matrix is 0 0 0 1
1.1252 + */
1.1253 +static M3Gbool m3gIsWUnity(const Matrix *mtx)
1.1254 +{
1.1255 + M3G_ASSERT_PTR(mtx);
1.1256 +
1.1257 + if (!m3gIsClassified(mtx)) {
1.1258 + return (IS_ZERO(M44F(mtx, 3, 0)) &&
1.1259 + IS_ZERO(M44F(mtx, 3, 1)) &&
1.1260 + IS_ZERO(M44F(mtx, 3, 2)) &&
1.1261 + IS_ONE (M44F(mtx, 3, 3)));
1.1262 + }
1.1263 + else {
1.1264 + return ((mtx->mask & 0xC0C0C0C0) == (ELEM_ONE << 30));
1.1265 + }
1.1266 +}
1.1267 +
1.1268 +/*!
1.1269 + * \brief Makes a quaternion by exponentiating a quaternion logarithm
1.1270 + */
1.1271 +static void m3gExpQuat(Quat *quat, const Vec3 *qExp)
1.1272 +{
1.1273 + M3Gfloat theta;
1.1274 +
1.1275 + M3G_ASSERT_PTR(quat);
1.1276 + M3G_ASSERT_PTR(qExp);
1.1277 +
1.1278 + theta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(qExp->x),
1.1279 + m3gSquare(qExp->y)),
1.1280 + m3gSquare(qExp->z)));
1.1281 +
1.1282 + if (theta > EPSILON) {
1.1283 + M3Gfloat s = m3gMul(m3gSin(theta), m3gRcp(theta));
1.1284 + quat->x = m3gMul(qExp->x, s);
1.1285 + quat->y = m3gMul(qExp->y, s);
1.1286 + quat->z = m3gMul(qExp->z, s);
1.1287 + quat->w = m3gCos(theta);
1.1288 + }
1.1289 + else {
1.1290 + quat->x = quat->y = quat->z = 0.0f;
1.1291 + quat->w = 1.0f;
1.1292 + }
1.1293 +}
1.1294 +
1.1295 +/*!
1.1296 + * \brief Natural logarithm of a unit quaternion.
1.1297 + */
1.1298 +static void m3gLogQuat(Vec3 *qLog, const Quat *quat)
1.1299 +{
1.1300 + M3Gfloat sinTheta = m3gSqrt(m3gNorm3((const float *) quat));
1.1301 +
1.1302 + if (sinTheta > EPSILON) {
1.1303 + M3Gfloat s = m3gArcTan(sinTheta, quat->w) / sinTheta;
1.1304 + qLog->x = m3gMul(s, quat->x);
1.1305 + qLog->y = m3gMul(s, quat->y);
1.1306 + qLog->z = m3gMul(s, quat->z);
1.1307 + }
1.1308 + else {
1.1309 + qLog->x = qLog->y = qLog->z = 0.0f;
1.1310 + }
1.1311 +}
1.1312 +
1.1313 +/*!
1.1314 + * \brief Make quaternion the "logarithmic difference" between two
1.1315 + * other quaternions.
1.1316 + */
1.1317 +static void m3gLogDiffQuat(Vec3 *logDiff,
1.1318 + const Quat *from, const Quat *to)
1.1319 +{
1.1320 + Quat temp;
1.1321 + temp.x = m3gNegate(from->x);
1.1322 + temp.y = m3gNegate(from->y);
1.1323 + temp.z = m3gNegate(from->z);
1.1324 + temp.w = from->w;
1.1325 + m3gMulQuat(&temp, to);
1.1326 + m3gLogQuat(logDiff, &temp);
1.1327 +}
1.1328 +
1.1329 +/*!
1.1330 + * \brief Rounds a float to the nearest integer
1.1331 + *
1.1332 + * Overflows are clamped to the maximum or minimum representable
1.1333 + * value.
1.1334 + */
1.1335 +static M3Gint m3gRoundToInt(const M3Gfloat a)
1.1336 +{
1.1337 + M3Guint base = FLOAT_AS_UINT(a);
1.1338 + M3Gint signMask, expo;
1.1339 +
1.1340 + /* Decompose the number into sign, exponent, and base number */
1.1341 +
1.1342 + signMask = ((M3Gint) base >> 31); /* -> 0 or 0xFFFFFFFF */
1.1343 + expo = (M3Gint)((base >> 23) & 0xFF) - 127;
1.1344 +
1.1345 + /* First check for large values and return either the negative or
1.1346 + * the positive maximum integer in case of overflow. The overflow
1.1347 + * check can be made on the exponent alone, as large floats are
1.1348 + * spaced several integer values apart so that nothing will
1.1349 + * overflow because of rounding later on */
1.1350 +
1.1351 + if (expo >= 31) {
1.1352 + return (M3Gint)((1U << 31) - 1 + (((M3Guint) signMask) & 1));
1.1353 + }
1.1354 +
1.1355 + /* Also check for underflow to avoid problems with shifting by
1.1356 + * more than 31 */
1.1357 +
1.1358 + if (expo < -1) {
1.1359 + return 0;
1.1360 + }
1.1361 +
1.1362 + /* Mask out the sign and exponent bits, shift the base number so
1.1363 + * that the lowest bit corresponds to one half, then add one
1.1364 + * (half) and shift to round to the closest integer. */
1.1365 +
1.1366 + base = (base | LEADING_ONE) << 8; /* shift mantissa to 1.31 */
1.1367 + base = base >> (30 - expo); /* integer value as 31.1 */
1.1368 + base = (base + 1) >> 1; /* round to nearest 32.0 */
1.1369 +
1.1370 + /* Factor in the sign (negate if originally negative) and
1.1371 + * return */
1.1372 +
1.1373 + return ((M3Gint) base ^ signMask) - signMask;
1.1374 +}
1.1375 +
1.1376 +/*!
1.1377 + * \brief Calculates ray-triangle intersection.
1.1378 + *
1.1379 + * http://www.acm.org/jgt/papers/MollerTrumbore97
1.1380 + */
1.1381 +static M3Gbool m3gIntersectTriangle(const Vec3 *orig, const Vec3 *dir,
1.1382 + const Vec3 *vert0, const Vec3 *vert1, const Vec3 *vert2,
1.1383 + Vec3 *tuv, M3Gint cullMode)
1.1384 +{
1.1385 + Vec3 edge1, edge2, tvec, pvec, qvec;
1.1386 + M3Gfloat det,inv_det;
1.1387 +
1.1388 + /* find vectors for two edges sharing vert0 */
1.1389 + edge1 = *vert1;
1.1390 + edge2 = *vert2;
1.1391 + m3gSubVec3(&edge1, vert0);
1.1392 + m3gSubVec3(&edge2, vert0);
1.1393 +
1.1394 + /* begin calculating determinant - also used to calculate U parameter */
1.1395 + m3gCross(&pvec, dir, &edge2);
1.1396 +
1.1397 + /* if determinant is near zero, ray lies in plane of triangle */
1.1398 + det = m3gDot3(&edge1, &pvec);
1.1399 +
1.1400 + if (cullMode == 0 && det <= 0) return M3G_FALSE;
1.1401 + if (cullMode == 1 && det >= 0) return M3G_FALSE;
1.1402 +
1.1403 + if (det > -EPSILON && det < EPSILON)
1.1404 + return M3G_FALSE;
1.1405 + inv_det = m3gRcp(det);
1.1406 +
1.1407 + /* calculate distance from vert0 to ray origin */
1.1408 + tvec = *orig;
1.1409 + m3gSubVec3(&tvec, vert0);
1.1410 +
1.1411 + /* calculate U parameter and test bounds */
1.1412 + tuv->y = m3gMul(m3gDot3(&tvec, &pvec), inv_det);
1.1413 + if (tuv->y < 0.0f || tuv->y > 1.0f)
1.1414 + return M3G_FALSE;
1.1415 +
1.1416 + /* prepare to test V parameter */
1.1417 + m3gCross(&qvec, &tvec, &edge1);
1.1418 +
1.1419 + /* calculate V parameter and test bounds */
1.1420 + tuv->z = m3gMul(m3gDot3(dir, &qvec), inv_det);
1.1421 + if (tuv->z < 0.0f || m3gAdd(tuv->y, tuv->z) > 1.0f)
1.1422 + return M3G_FALSE;
1.1423 +
1.1424 + /* calculate t, ray intersects triangle */
1.1425 + tuv->x = m3gMul(m3gDot3(&edge2, &qvec), inv_det);
1.1426 +
1.1427 + return M3G_TRUE;
1.1428 +}
1.1429 +
1.1430 +/*!
1.1431 + * \brief Calculates ray-box intersection.
1.1432 + *
1.1433 + * http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
1.1434 + */
1.1435 +
1.1436 +#define XO orig->x
1.1437 +#define YO orig->y
1.1438 +#define ZO orig->z
1.1439 +#define XD dir->x
1.1440 +#define YD dir->y
1.1441 +#define ZD dir->z
1.1442 +
1.1443 +#define XL box->min[0]
1.1444 +#define YL box->min[1]
1.1445 +#define ZL box->min[2]
1.1446 +#define XH box->max[0]
1.1447 +#define YH box->max[1]
1.1448 +#define ZH box->max[2]
1.1449 +
1.1450 +/*!
1.1451 + * \internal
1.1452 + * \brief Ray - bounding box intersection
1.1453 + *
1.1454 + */
1.1455 +static M3Gbool m3gIntersectBox(const Vec3 *orig, const Vec3 *dir, const AABB *box)
1.1456 +{
1.1457 + M3Gfloat tnear = M3G_MIN_NEGATIVE_FLOAT;
1.1458 + M3Gfloat tfar = M3G_MAX_POSITIVE_FLOAT;
1.1459 + M3Gfloat t1, t2, temp;
1.1460 +
1.1461 + /* X slab */
1.1462 + if(XD != 0) {
1.1463 + t1 = m3gSub(XL, XO) / XD;
1.1464 + t2 = m3gSub(XH, XO) / XD;
1.1465 +
1.1466 + if(t1 > t2) {
1.1467 + temp = t1;
1.1468 + t1 = t2;
1.1469 + t2 = temp;
1.1470 + }
1.1471 +
1.1472 + if(t1 > tnear) tnear = t1;
1.1473 + if(t2 < tfar) tfar = t2;
1.1474 +
1.1475 + if(tnear > tfar) return M3G_FALSE;
1.1476 + if(tfar < 0) return M3G_FALSE;
1.1477 + }
1.1478 + else {
1.1479 + if(XO > XH || XO < XL) return M3G_FALSE;
1.1480 + }
1.1481 +
1.1482 + /* Y slab */
1.1483 + if(YD != 0) {
1.1484 + t1 = m3gSub(YL, YO) / YD;
1.1485 + t2 = m3gSub(YH, YO) / YD;
1.1486 +
1.1487 + if(t1 > t2) {
1.1488 + temp = t1;
1.1489 + t1 = t2;
1.1490 + t2 = temp;
1.1491 + }
1.1492 +
1.1493 + if(t1 > tnear) tnear = t1;
1.1494 + if(t2 < tfar) tfar = t2;
1.1495 +
1.1496 + if(tnear > tfar) return M3G_FALSE;
1.1497 + if(tfar < 0) return M3G_FALSE;
1.1498 + }
1.1499 + else {
1.1500 + if(YO > YH || YO < YL) return M3G_FALSE;
1.1501 + }
1.1502 +
1.1503 + /* Z slab */
1.1504 + if(ZD != 0) {
1.1505 + t1 = m3gSub(ZL, ZO) / ZD;
1.1506 + t2 = m3gSub(ZH, ZO) / ZD;
1.1507 +
1.1508 + if(t1 > t2) {
1.1509 + temp = t1;
1.1510 + t1 = t2;
1.1511 + t2 = temp;
1.1512 + }
1.1513 +
1.1514 + if(t1 > tnear) tnear = t1;
1.1515 + if(t2 < tfar) tfar = t2;
1.1516 +
1.1517 + if(tnear > tfar) return M3G_FALSE;
1.1518 + if(tfar < 0) return M3G_FALSE;
1.1519 + }
1.1520 + else {
1.1521 + if(ZO > ZH || ZO < ZL) return M3G_FALSE;
1.1522 + }
1.1523 +
1.1524 + return M3G_TRUE;
1.1525 +}
1.1526 +
1.1527 +/*!
1.1528 + * \brief Calculates the intersection of two rectangles. Always fills
1.1529 + * the intersection result.
1.1530 + *
1.1531 + * \param dst result of the intersection
1.1532 + * \param r1 rectangle 1
1.1533 + * \param r2 rectangle 2
1.1534 + */
1.1535 +static M3Gbool m3gIntersectRectangle(M3GRectangle *dst, M3GRectangle *r1, M3GRectangle *r2)
1.1536 +{
1.1537 + M3Gbool intersects = M3G_TRUE;
1.1538 + M3Gint min, max;
1.1539 +
1.1540 + max = (r1->x) >= (r2->x) ? (r1->x) : (r2->x);
1.1541 + min = (r1->x + r1->width) <= (r2->x + r2->width) ? (r1->x + r1->width) : (r2->x + r2->width);
1.1542 + if ((min - max) < 0) intersects = M3G_FALSE;
1.1543 + dst->x = max;
1.1544 + dst->width = min - max;
1.1545 +
1.1546 + max = (r1->y) >= (r2->y) ? (r1->y) : (r2->y);
1.1547 + min = (r1->y + r1->height) <= (r2->y + r2->height) ? (r1->y + r1->height) : (r2->y + r2->height);
1.1548 + if ((min - max) < 0) intersects = M3G_FALSE;
1.1549 + dst->y = max;
1.1550 + dst->height = min - max;
1.1551 +
1.1552 + return intersects;
1.1553 +}
1.1554 +
1.1555 +/*-------- float-to-int color conversions --------*/
1.1556 +
1.1557 +static M3Guint m3gAlpha1f(M3Gfloat a)
1.1558 +{
1.1559 + M3Guint alpha = (M3Guint) m3gFloatToByte(a);
1.1560 + return (alpha << 24) | M3G_RGB_MASK;
1.1561 +}
1.1562 +
1.1563 +static M3Guint m3gColor3f(M3Gfloat r, M3Gfloat g, M3Gfloat b)
1.1564 +{
1.1565 + return ((M3Guint) m3gFloatToByte(r) << 16)
1.1566 + | ((M3Guint) m3gFloatToByte(g) << 8)
1.1567 + | (M3Guint) m3gFloatToByte(b)
1.1568 + | M3G_ALPHA_MASK;
1.1569 +}
1.1570 +
1.1571 +static M3Guint m3gColor4f(M3Gfloat r, M3Gfloat g, M3Gfloat b, M3Gfloat a)
1.1572 +{
1.1573 + return ((M3Guint) m3gFloatToByte(r) << 16)
1.1574 + | ((M3Guint) m3gFloatToByte(g) << 8)
1.1575 + | (M3Guint) m3gFloatToByte(b)
1.1576 + | ((M3Guint) m3gFloatToByte(a) << 24);
1.1577 +}
1.1578 +
1.1579 +static void m3gFloatColor(M3Gint argb, M3Gfloat intensity, M3Gfloat *rgba)
1.1580 +{
1.1581 + /* NOTE we intentionally aim a bit high here -- some GL
1.1582 + * implementations may round down instead of closest */
1.1583 +
1.1584 + const M3Gfloat oneOver255 = (M3Gfloat)(1.0001 / 255.0);
1.1585 +
1.1586 + rgba[0] = (M3Gfloat)((argb >> 16) & 0xFF);
1.1587 + rgba[1] = (M3Gfloat)((argb >> 8) & 0xFF);
1.1588 + rgba[2] = (M3Gfloat)((argb ) & 0xFF);
1.1589 + rgba[3] = (M3Gfloat)((argb >> 24) & 0xFF);
1.1590 +
1.1591 + m3gScale4(rgba, m3gMul(oneOver255, intensity));
1.1592 +}
1.1593 +
1.1594 +/*!
1.1595 + * \brief Converts the 3x3 submatrix of a matrix into fixed point
1.1596 + *
1.1597 + * The input matrix must be an affine one, i.e. the bottom row must be
1.1598 + * 0 0 0 1. The output matrix is guaranteed to be such that it can be
1.1599 + * multiplied with a 16-bit 3-vector without overflowing, while using
1.1600 + * the 32-bit range optimally.
1.1601 + *
1.1602 + * \param mtx the original matrix
1.1603 + * \param elem 9 shorts to hold the fixed point base numbers
1.1604 + * \return floating point exponent for the values in \c elem
1.1605 + * (number of bits to shift left for actual values)
1.1606 + */
1.1607 +static M3Gint m3gGetFixedPoint3x3Basis(const Matrix *mtx, M3Gshort *elem)
1.1608 +{
1.1609 + M3Gint outExp;
1.1610 + M3Gint row, col;
1.1611 + const M3Guint *m;
1.1612 +
1.1613 + if (!mtx->complete) {
1.1614 + m3gFillClassifiedMatrix((Matrix*) mtx);
1.1615 + }
1.1616 + m = (const M3Guint*) mtx->elem;
1.1617 +
1.1618 + /* First, find the maximum exponent value in the whole matrix */
1.1619 +
1.1620 + outExp = 0;
1.1621 + for (col = 0; col < 3; ++col) {
1.1622 + for (row = 0; row < 3; ++row) {
1.1623 + M3Gint element = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);
1.1624 + outExp = M3G_MAX(outExp, element);
1.1625 + }
1.1626 + }
1.1627 + outExp >>= 23;
1.1628 +
1.1629 + /* Our candidate exponent is the maximum found plus 9, which is
1.1630 + * guaranteed to shift the maximum unsigned 24-bit mantissa (which
1.1631 + * always will have the high bit set) down to the signed 16-bit
1.1632 + * range */
1.1633 +
1.1634 + outExp += 9;
1.1635 +
1.1636 + /* Now proceed to sum each row and see what's the actual smallest
1.1637 + * exponent we can safely use without overflowing in a 16+16
1.1638 + * matrix-vector multiplication; this will win us one bit
1.1639 + * (doubling the precision) compared to the conservative approach
1.1640 + * of just shifting everything down by 10 bits */
1.1641 +
1.1642 + for (row = 0; row < 3; ++row) {
1.1643 +
1.1644 + /* Sum the absolute values on this row */
1.1645 +
1.1646 + M3Gint rowSum = 0;
1.1647 + for (col = 0; col < 3; ++col) {
1.1648 + M3Gint a = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);
1.1649 + M3Gint shift = outExp - (a >> 23);
1.1650 + M3G_ASSERT(shift < 265);
1.1651 +
1.1652 + if (shift < 24) {
1.1653 + rowSum += ((a & MANTISSA_MASK) | LEADING_ONE) >> shift;
1.1654 + }
1.1655 + }
1.1656 +
1.1657 + /* Now we have a 26-bit sum of the absolute values on this
1.1658 + * row, and shift that down until we fit the target range of
1.1659 + * [0, 65535]. Note that this still leaves *exactly* enough
1.1660 + * space for adding in an arbitrary 16-bit translation vector
1.1661 + * after multiplying with the matrix! */
1.1662 +
1.1663 + while (rowSum >= (1 << 16)) {
1.1664 + rowSum >>= 1;
1.1665 + ++outExp;
1.1666 + }
1.1667 + }
1.1668 +
1.1669 + /* De-bias the exponent, but add in an extra 23 to account for the
1.1670 + * decimal bits in the floating point mantissa values we started
1.1671 + * with (we're returning the exponent as "bits to shift left to
1.1672 + * get integers", so we're off by 23 from IEEE notation) */
1.1673 +
1.1674 + outExp = (outExp - 127) - 23;
1.1675 +
1.1676 + /* Finally, shift all the matrix elements to our final output
1.1677 + * precision */
1.1678 +
1.1679 + for (col = 0; col < 3; ++col) {
1.1680 + m3gFloatVecToShort(3, mtx->elem + MELEM(0, col), outExp, elem);
1.1681 + elem += 3;
1.1682 + }
1.1683 + return outExp;
1.1684 +}
1.1685 +
1.1686 +/*!
1.1687 + * \brief Gets the translation component of a matrix as fixed point
1.1688 + *
1.1689 + * \param mtx the matrix
1.1690 + * \param elem 3 shorts to write the vector into
1.1691 + * \return floating point exponent for the values in \c elem
1.1692 + * (number of bits to shift left for actual values)
1.1693 + */
1.1694 +static M3Gint m3gGetFixedPointTranslation(const Matrix *mtx, M3Gshort *elem)
1.1695 +{
1.1696 + const M3Guint *m;
1.1697 +
1.1698 + M3G_ASSERT(m3gIsWUnity(mtx));
1.1699 + if (!mtx->complete) {
1.1700 + m3gFillClassifiedMatrix((Matrix*) mtx);
1.1701 + }
1.1702 + m = (const M3Guint*) &mtx->elem[MELEM(0, 3)];
1.1703 +
1.1704 + /* Find the maximum exponent, then scale down by 9 bits from that
1.1705 + * to shift the unsigned 24-bit mantissas to the signed 16-bit
1.1706 + * range */
1.1707 + {
1.1708 + M3Gint outExp;
1.1709 + M3Guint maxElem = m[0] & ~SIGN_MASK;
1.1710 + maxElem = M3G_MAX(maxElem, m[1] & ~SIGN_MASK);
1.1711 + maxElem = M3G_MAX(maxElem, m[2] & ~SIGN_MASK);
1.1712 +
1.1713 + outExp = (M3Gint)(maxElem >> 23) - (127 + 23) + 9;
1.1714 + m3gFloatVecToShort(3, mtx->elem + MELEM(0, 3), outExp, elem);
1.1715 + return outExp;
1.1716 + }
1.1717 +}
1.1718 +
1.1719 +/*!
1.1720 + * \internal
1.1721 + * \brief Compute a bounding box enclosing two other boxes
1.1722 + *
1.1723 + * \param box box to fit
1.1724 + * \param a first box to enclose or NULL
1.1725 + * \param b second box to enclose or NULL
1.1726 + *
1.1727 + * \note If both input boxes are NULL, the box is not modified.
1.1728 + */
1.1729 +static void m3gFitAABB(AABB *box, const AABB *a, const AABB *b)
1.1730 +{
1.1731 + int i;
1.1732 +
1.1733 + M3G_ASSERT_PTR(box);
1.1734 +
1.1735 + if (a) {
1.1736 + m3gValidateAABB(a);
1.1737 + }
1.1738 + if (b) {
1.1739 + m3gValidateAABB(b);
1.1740 + }
1.1741 +
1.1742 + if (a && b) {
1.1743 + for (i = 0; i < 3; ++i) {
1.1744 + box->min[i] = M3G_MIN(a->min[i], b->min[i]);
1.1745 + box->max[i] = M3G_MAX(a->max[i], b->max[i]);
1.1746 + }
1.1747 + }
1.1748 + else if (a) {
1.1749 + *box = *a;
1.1750 + }
1.1751 + else if (b) {
1.1752 + *box = *b;
1.1753 + }
1.1754 +}
1.1755 +
1.1756 +/*
1.1757 + * \internal
1.1758 + * \brief Transform an axis-aligned bounding box with a matrix
1.1759 + *
1.1760 + * This results in a box that encloses the transformed original box.
1.1761 + *
1.1762 + * Based on "Transforming Axis-Aligned Bounding Boxes" by Jim Arvo
1.1763 + * from Graphics Gems.
1.1764 + *
1.1765 + * \note The bottom row of the matrix is ignored in the transformation.
1.1766 + */
1.1767 +static void m3gTransformAABB(AABB *box, const Matrix *mtx)
1.1768 +{
1.1769 + M3Gfloat boxMin[3], boxMax[3];
1.1770 + M3Gfloat newMin[3], newMax[3];
1.1771 +
1.1772 + m3gValidateAABB(box);
1.1773 +
1.1774 + if (!mtx->complete) {
1.1775 + m3gFillClassifiedMatrix((Matrix*) mtx);
1.1776 + }
1.1777 +
1.1778 + /* Get the original minimum and maximum extents as floats, and add
1.1779 + * the translation as the base for the transformed box */
1.1780 + {
1.1781 + int i;
1.1782 + for (i = 0; i < 3; ++i) {
1.1783 + boxMin[i] = box->min[i];
1.1784 + boxMax[i] = box->max[i];
1.1785 + newMin[i] = newMax[i] = M44F(mtx, i, 3);
1.1786 + }
1.1787 + }
1.1788 +
1.1789 + /* Transform into the new minimum and maximum coordinates using
1.1790 + * the upper left 3x3 part of the matrix */
1.1791 + {
1.1792 + M3Gint row, col;
1.1793 +
1.1794 + for (row = 0; row < 3; ++row) {
1.1795 + for (col = 0; col < 3; ++col) {
1.1796 + M3Gfloat a = m3gMul(M44F(mtx, row, col), boxMin[col]);
1.1797 + M3Gfloat b = m3gMul(M44F(mtx, row, col), boxMax[col]);
1.1798 +
1.1799 + if (a < b) {
1.1800 + newMin[row] = m3gAdd(newMin[row], a);
1.1801 + newMax[row] = m3gAdd(newMax[row], b);
1.1802 + }
1.1803 + else {
1.1804 + newMin[row] = m3gAdd(newMin[row], b);
1.1805 + newMax[row] = m3gAdd(newMax[row], a);
1.1806 + }
1.1807 + }
1.1808 + }
1.1809 + }
1.1810 +
1.1811 + /* Write back into the bounding box */
1.1812 + {
1.1813 + int i;
1.1814 + for (i = 0; i < 3; ++i) {
1.1815 + box->min[i] = newMin[i];
1.1816 + box->max[i] = newMax[i];
1.1817 + }
1.1818 + }
1.1819 +
1.1820 + m3gValidateAABB(box);
1.1821 +}
1.1822 +
1.1823 +#if defined(M3G_DEBUG)
1.1824 +/*!
1.1825 + * \brief
1.1826 + */
1.1827 +static void m3gValidateAABB(const AABB *aabb)
1.1828 +{
1.1829 + m3gValidateFloats(6, (float*) aabb);
1.1830 +}
1.1831 +#endif
1.1832 +
1.1833 +/*----------------------------------------------------------------------
1.1834 + * Public functions
1.1835 + *--------------------------------------------------------------------*/
1.1836 +
1.1837 +/*!
1.1838 + * \brief Linear interpolation of vectors
1.1839 + *
1.1840 + * \param size number of components
1.1841 + * \param vec output vector
1.1842 + * \param s interpolation factor
1.1843 + * \param start initial value
1.1844 + * \param end target value
1.1845 + */
1.1846 +#if defined(M3G_HW_FLOAT_VFPV2)
1.1847 +
1.1848 +__weak __asm void m3gLerp(M3Gint size,
1.1849 + M3Gfloat *vec,
1.1850 + M3Gfloat s,
1.1851 + const M3Gfloat *start, const M3Gfloat *end)
1.1852 +{
1.1853 +// r0 = size
1.1854 +// r1 = *vec
1.1855 +// r2 = s
1.1856 +// r3 = *start
1.1857 +// sp[0] = *end
1.1858 +
1.1859 + EXPORT m3gLerp[DYNAMIC]
1.1860 + CODE32
1.1861 +/*
1.1862 + M3Gfloat sCompl = 1.0 - s;
1.1863 + for (i = 0; i < size; ++i) {
1.1864 + vec[i] = sCompl*start[i] + s*end[i];
1.1865 + }
1.1866 +*/
1.1867 +//
1.1868 +// if size = 0, return
1.1869 +//
1.1870 + CMP r0, #0
1.1871 + BXEQ lr
1.1872 +
1.1873 + FMSR s0, r2
1.1874 + MOV r2, r3
1.1875 + LDR r3, [sp]
1.1876 +
1.1877 + FLDS s1,=1.0
1.1878 + STMFD sp!, {r4-r5}
1.1879 + FSUBS s2, s1, s0 // sCompl = 1 - s
1.1880 +
1.1881 + FMRX r4, FPSCR
1.1882 + CMP r0, #4
1.1883 + BGT _m3gLerp_over4Loop
1.1884 +
1.1885 +//
1.1886 +// if 1 < size <= 4
1.1887 +//
1.1888 +// set vector STRIDE = 1, LENGTH = 4
1.1889 + BIC r12, r4, #0x00370000
1.1890 + ORR r12, #(3<<16)
1.1891 + FMXR FPSCR, r12
1.1892 +
1.1893 + FLDMIAS r2!, {s4-s7} // load the start[i] values
1.1894 + FLDMIAS r3!, {s8-s11} // load the end[i] values
1.1895 + FMULS s12, s4, s2 // [s12-s15] = [sCompl*start[0] .. sCompl*start[3]]
1.1896 + FMACS s12, s8, s0 // [s12-s15] += [ s*end[0] .. s*end[3]]
1.1897 + CMP r0, #1
1.1898 + FSTS s12, [r1]
1.1899 + FSTSGT s13, [r1, #4]
1.1900 + CMP r0, #3
1.1901 + FSTSGE s14, [r1, #8]
1.1902 + FSTSGT s15, [r1, #12]
1.1903 +
1.1904 + FMXR FPSCR, r4
1.1905 +
1.1906 + LDMFD sp!, {r4-r5}
1.1907 +
1.1908 + BX lr
1.1909 +//
1.1910 +// if size > 4, interpolate 8 values in one loop
1.1911 +//
1.1912 +_m3gLerp_over4Loop
1.1913 +
1.1914 + FSTMFDD sp!, {d8-d9}
1.1915 + MOVS r5, r0, ASR #3 // size/8
1.1916 + SUB r0, r0, r5, LSL #3 // tail length
1.1917 +
1.1918 +// set vector STRIDE = 1, LENGTH = 8
1.1919 + BIC r12, r4, #0x00370000
1.1920 + ORR r12, #(7<<16)
1.1921 + FMXR FPSCR, r12
1.1922 +
1.1923 +
1.1924 +_m3gLerp_alignedLoop
1.1925 +
1.1926 + FLDMIASNE r2!, {s8-s15} // load the start[i] values
1.1927 + FLDMIASNE r3!, {s16-s23} // load the end[i] values
1.1928 + FMULSNE s24, s8, s2 // [s16-s23] = [ sCompl*start[0] sCompl*start[1] .. sCompl*start[7]]
1.1929 + FMACSNE s24, s16, s0 // [s16-s23] += [ s*end[0] s*end[1] .. s*end[7]]
1.1930 + FSTMIASNE r1!, {s24-s31}
1.1931 + SUBSNE r5, #1
1.1932 +
1.1933 + BNE _m3gLerp_alignedLoop
1.1934 +
1.1935 +// process the 0-7 remaining values in the tail
1.1936 +
1.1937 + CMP r0, #1
1.1938 + FLDMIASGE r2!, {s8-s15}
1.1939 + FLDMIASGE r3!, {s16-s23}
1.1940 + FMULSGE s24, s8, s2
1.1941 + FMACSGE s24, s16, s0
1.1942 + FSTSGE s24, [r1]
1.1943 + FSTSGT s25, [r1, #4]
1.1944 + CMP r0, #3
1.1945 + FSTSGE s26, [r1, #8]
1.1946 + FSTSGT s27, [r1, #12]
1.1947 + CMP r0, #5
1.1948 + FSTSGE s28, [r1, #16]
1.1949 + FSTSGT s29, [r1, #20]
1.1950 + CMP r0, #7
1.1951 + FSTSEQ s30, [r1, #24]
1.1952 +
1.1953 + FMXR FPSCR, r4
1.1954 +
1.1955 + FLDMFDD sp!, {d8-d9}
1.1956 + LDMFD sp!, {r4-r5}
1.1957 +
1.1958 + BX lr
1.1959 +
1.1960 +}
1.1961 +#else /* #if defined(M3G_HW_FLOAT_VFPV2) */
1.1962 +
1.1963 +M3G_API void m3gLerp(M3Gint size,
1.1964 + M3Gfloat *vec,
1.1965 + M3Gfloat s,
1.1966 + const M3Gfloat *start, const M3Gfloat *end)
1.1967 +{
1.1968 + int i;
1.1969 +
1.1970 + M3Gfloat sCompl = m3gSub(1.f, s);
1.1971 + for (i = 0; i < size; ++i) {
1.1972 + vec[i] = m3gAdd(m3gMul(sCompl, start[i]), m3gMul(s, end[i]));
1.1973 + }
1.1974 +}
1.1975 +#endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
1.1976 +
1.1977 +/*!
1.1978 + * \brief Hermite spline interpolation of vectors
1.1979 + *
1.1980 + * \param size number of components
1.1981 + * \param vec output vector
1.1982 + * \param s interpolation factor
1.1983 + * \param start start value vector
1.1984 + * \param end end value vector
1.1985 + * \param tStart start tangent vector
1.1986 + * \param tEnd end tangent vector
1.1987 + */
1.1988 +M3G_API void m3gHermite(M3Gint size,
1.1989 + M3Gfloat *vec,
1.1990 + M3Gfloat s,
1.1991 + const M3Gfloat *start, const M3Gfloat *end,
1.1992 + const M3Gfloat *tStart, const M3Gfloat *tEnd)
1.1993 +{
1.1994 + M3Gfloat s2 = m3gSquare(s);
1.1995 + M3Gfloat s3 = m3gMul(s2, s);
1.1996 + int i;
1.1997 +
1.1998 + for (i = 0; i < size; ++i) {
1.1999 + vec[i] =
1.2000 + m3gMadd(start[i],
1.2001 + m3gAdd(m3gSub(m3gDouble(s3), m3gMul(3.f, s2)), 1.f),
1.2002 + m3gMadd(end[i],
1.2003 + m3gSub(m3gMul(3.f, s2), m3gDouble(s3)),
1.2004 + m3gMadd(tStart[i],
1.2005 + m3gAdd(m3gSub(s3, m3gDouble(s2)), s),
1.2006 + m3gMul(tEnd[i],
1.2007 + m3gSub(s3, s2)))));
1.2008 +
1.2009 + }
1.2010 +
1.2011 + /* vec = ( 2*s3 - 3*s2 + 1) * start
1.2012 + + (-2*s3 + 3*s2 ) * end
1.2013 + + ( s3 - 2*s2 + s) * tStart
1.2014 + + ( s3 - s2 ) * tEnd; */
1.2015 +}
1.2016 +
1.2017 +/*--------------------------------------------------------------------*/
1.2018 +
1.2019 +/*!
1.2020 + * \brief Sets a matrix to a copy of another matrix
1.2021 + */
1.2022 +M3G_API void m3gCopyMatrix(Matrix *dst, const Matrix *src)
1.2023 +{
1.2024 + M3G_ASSERT(dst != NULL && src != NULL);
1.2025 + *dst = *src;
1.2026 +}
1.2027 +
1.2028 +/*!
1.2029 + * \brief Vector addition
1.2030 + */
1.2031 +M3G_API void m3gAddVec3(Vec3 *vec, const Vec3 *other)
1.2032 +{
1.2033 + vec->x = m3gAdd(vec->x, other->x);
1.2034 + vec->y = m3gAdd(vec->y, other->y);
1.2035 + vec->z = m3gAdd(vec->z, other->z);
1.2036 +}
1.2037 +
1.2038 +/*!
1.2039 + * \brief Vector addition
1.2040 + */
1.2041 +M3G_API void m3gAddVec4(Vec4 *vec, const Vec4 *other)
1.2042 +{
1.2043 + vec->x = m3gAdd(vec->x, other->x);
1.2044 + vec->y = m3gAdd(vec->y, other->y);
1.2045 + vec->z = m3gAdd(vec->z, other->z);
1.2046 + vec->w = m3gAdd(vec->w, other->w);
1.2047 +}
1.2048 +
1.2049 +/*!
1.2050 + * \brief Cross product of two 3D vectors expressed as 4D vectors
1.2051 + */
1.2052 +M3G_API void m3gCross(Vec3 *dst, const Vec3 *a, const Vec3 *b)
1.2053 +{
1.2054 + dst->x = m3gSub(m3gMul(a->y, b->z), m3gMul(a->z, b->y));
1.2055 + dst->y = m3gSub(m3gMul(a->z, b->x), m3gMul(a->x, b->z));
1.2056 + dst->z = m3gSub(m3gMul(a->x, b->y), m3gMul(a->y, b->x));
1.2057 +}
1.2058 +
1.2059 +/*!
1.2060 + * \brief Dot product of two vectors
1.2061 + */
1.2062 +M3G_API M3Gfloat m3gDot3(const Vec3 *a, const Vec3 *b)
1.2063 +{
1.2064 + M3Gfloat d;
1.2065 + d = m3gMul(a->x, b->x);
1.2066 + d = m3gMadd(a->y, b->y, d);
1.2067 + d = m3gMadd(a->z, b->z, d);
1.2068 + return d;
1.2069 +}
1.2070 +
1.2071 +/*!
1.2072 + * \brief Dot product of two vectors
1.2073 + */
1.2074 +M3G_API M3Gfloat m3gDot4(const Vec4 *a, const Vec4 *b)
1.2075 +{
1.2076 + M3Gfloat d;
1.2077 + d = m3gMul(a->x, b->x);
1.2078 + d = m3gMadd(a->y, b->y, d);
1.2079 + d = m3gMadd(a->z, b->z, d);
1.2080 + d = m3gMadd(a->w, b->w, d);
1.2081 + return d;
1.2082 +}
1.2083 +
1.2084 +/*!
1.2085 + * \brief
1.2086 + */
1.2087 +M3G_API void m3gSetVec3(Vec3 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z)
1.2088 +{
1.2089 + M3G_ASSERT_PTR(v);
1.2090 + v->x = x;
1.2091 + v->y = y;
1.2092 + v->z = z;
1.2093 +}
1.2094 +
1.2095 +/*!
1.2096 + * \brief
1.2097 + */
1.2098 +M3G_API void m3gSetVec4(Vec4 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z, M3Gfloat w)
1.2099 +{
1.2100 + M3G_ASSERT_PTR(v);
1.2101 + v->x = x;
1.2102 + v->y = y;
1.2103 + v->z = z;
1.2104 + v->w = w;
1.2105 +}
1.2106 +
1.2107 +/*!
1.2108 + * \brief Vector subtraction
1.2109 + */
1.2110 +M3G_API void m3gSubVec3(Vec3 *vec, const Vec3 *other)
1.2111 +{
1.2112 + vec->x = m3gSub(vec->x, other->x);
1.2113 + vec->y = m3gSub(vec->y, other->y);
1.2114 + vec->z = m3gSub(vec->z, other->z);
1.2115 +}
1.2116 +
1.2117 +/*!
1.2118 + * \brief Vector subtraction
1.2119 + */
1.2120 +M3G_API void m3gSubVec4(Vec4 *vec, const Vec4 *other)
1.2121 +{
1.2122 + vec->x = m3gSub(vec->x, other->x);
1.2123 + vec->y = m3gSub(vec->y, other->y);
1.2124 + vec->z = m3gSub(vec->z, other->z);
1.2125 + vec->w = m3gSub(vec->w, other->w);
1.2126 +}
1.2127 +
1.2128 +/*!
1.2129 + * \brief Vector length
1.2130 + */
1.2131 +M3G_API M3Gfloat m3gLengthVec3(const Vec3 *vec)
1.2132 +{
1.2133 + return m3gSqrt(m3gAdd(m3gAdd(m3gSquare(vec->x),
1.2134 + m3gSquare(vec->y)),
1.2135 + m3gSquare(vec->z)));
1.2136 +}
1.2137 +
1.2138 +/*!
1.2139 + * \brief Vector scaling
1.2140 + */
1.2141 +M3G_API void m3gScaleVec3(Vec3 *vec, const M3Gfloat s)
1.2142 +{
1.2143 + vec->x = m3gMul(vec->x, s);
1.2144 + vec->y = m3gMul(vec->y, s);
1.2145 + vec->z = m3gMul(vec->z, s);
1.2146 +}
1.2147 +
1.2148 +/*!
1.2149 + * \brief Vector scaling
1.2150 + */
1.2151 +M3G_API void m3gScaleVec4(Vec4 *vec, const M3Gfloat s)
1.2152 +{
1.2153 + vec->x = m3gMul(vec->x, s);
1.2154 + vec->y = m3gMul(vec->y, s);
1.2155 + vec->z = m3gMul(vec->z, s);
1.2156 + vec->w = m3gMul(vec->w, s);
1.2157 +}
1.2158 +
1.2159 +/*!
1.2160 + * \brief Returns an angle-axis representation for a quaternion
1.2161 + *
1.2162 + * \note There are many, and this is not guaranteed to return a
1.2163 + * particular one
1.2164 + */
1.2165 +M3G_API void m3gGetAngleAxis(const Quat *quat, M3Gfloat *angle, Vec3 *axis)
1.2166 +{
1.2167 + M3Gfloat x, y, z, sinTheta;
1.2168 +
1.2169 + M3G_ASSERT_PTR(quat);
1.2170 + M3G_ASSERT_PTR(angle);
1.2171 + M3G_ASSERT_PTR(axis);
1.2172 +
1.2173 + x = quat->x;
1.2174 + y = quat->y;
1.2175 + z = quat->z;
1.2176 +
1.2177 + sinTheta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(x), m3gSquare(y)),
1.2178 + m3gSquare(z)));
1.2179 +
1.2180 + if (sinTheta > EPSILON) {
1.2181 + M3Gfloat ooSinTheta = m3gRcp(sinTheta);
1.2182 + axis->x = m3gMul(x, ooSinTheta);
1.2183 + axis->y = m3gMul(y, ooSinTheta);
1.2184 + axis->z = m3gMul(z, ooSinTheta);
1.2185 + }
1.2186 + else {
1.2187 + /* return a valid axis even for no rotation */
1.2188 + axis->x = axis->y = 0.0f;
1.2189 + axis->z = 1.0f;
1.2190 + }
1.2191 + *angle = m3gMul(2.0f * RAD2DEG, m3gArcCos(quat->w));
1.2192 +}
1.2193 +
1.2194 +/*!
1.2195 + * \brief Gets a single matrix column
1.2196 + */
1.2197 +M3G_API void m3gGetMatrixColumn(const Matrix *mtx, M3Gint col, Vec4 *dst)
1.2198 +{
1.2199 + if ((col & ~3) == 0) {
1.2200 + if (!mtx->complete) {
1.2201 + m3gFillClassifiedMatrix((Matrix*)mtx);
1.2202 + }
1.2203 + dst->x = M44F(mtx, 0, col);
1.2204 + dst->y = M44F(mtx, 1, col);
1.2205 + dst->z = M44F(mtx, 2, col);
1.2206 + dst->w = M44F(mtx, 3, col);
1.2207 + }
1.2208 + else {
1.2209 + M3G_ASSERT(M3G_FALSE);
1.2210 + }
1.2211 +}
1.2212 +
1.2213 +/*!
1.2214 + * \brief Returns the floating point values of a matrix as consecutive
1.2215 + * columns
1.2216 + */
1.2217 +M3G_API void m3gGetMatrixColumns(const Matrix *mtx, M3Gfloat *dst)
1.2218 +{
1.2219 + M3G_ASSERT(mtx != NULL && dst != NULL);
1.2220 +
1.2221 + if (!mtx->complete) {
1.2222 + m3gFillClassifiedMatrix((Matrix*)mtx);
1.2223 + }
1.2224 + m3gCopy(dst, mtx->elem, sizeof(mtx->elem));
1.2225 +}
1.2226 +
1.2227 +/*!
1.2228 + * \brief Gets a single matrix row
1.2229 + */
1.2230 +M3G_API void m3gGetMatrixRow(const Matrix *mtx, M3Gint row, Vec4 *dst)
1.2231 +{
1.2232 + if ((row & ~3) == 0) {
1.2233 + if (!mtx->complete) {
1.2234 + m3gFillClassifiedMatrix((Matrix*)mtx);
1.2235 + }
1.2236 + dst->x = M44F(mtx, row, 0);
1.2237 + dst->y = M44F(mtx, row, 1);
1.2238 + dst->z = M44F(mtx, row, 2);
1.2239 + dst->w = M44F(mtx, row, 3);
1.2240 + }
1.2241 + else {
1.2242 + M3G_ASSERT(M3G_FALSE);
1.2243 + }
1.2244 +}
1.2245 +
1.2246 +/*!
1.2247 + * \brief Returns the floating point values of a matrix as consecutive
1.2248 + * rows
1.2249 + */
1.2250 +M3G_API void m3gGetMatrixRows(const Matrix *mtx, M3Gfloat *dst)
1.2251 +{
1.2252 + M3G_ASSERT(mtx != NULL && dst != NULL);
1.2253 +
1.2254 + if (!mtx->complete) {
1.2255 + m3gFillClassifiedMatrix((Matrix*)mtx);
1.2256 + }
1.2257 + {
1.2258 + int row;
1.2259 + for (row = 0; row < 4; ++row) {
1.2260 + *dst++ = mtx->elem[ 0 + row];
1.2261 + *dst++ = mtx->elem[ 4 + row];
1.2262 + *dst++ = mtx->elem[ 8 + row];
1.2263 + *dst++ = mtx->elem[12 + row];
1.2264 + }
1.2265 + }
1.2266 +}
1.2267 +
1.2268 +/*!
1.2269 + * \brief Sets a matrix to identity
1.2270 + */
1.2271 +M3G_API void m3gIdentityMatrix(Matrix *mtx)
1.2272 +{
1.2273 + M3G_ASSERT(mtx != NULL);
1.2274 + m3gClassifyAs(MC_IDENTITY, mtx);
1.2275 +}
1.2276 +
1.2277 +/*!
1.2278 + * \brief Sets a quaternion to identity
1.2279 + */
1.2280 +M3G_API void m3gIdentityQuat(Quat *quat)
1.2281 +{
1.2282 + M3G_ASSERT(quat != NULL);
1.2283 + quat->x = quat->y = quat->z = 0.0f;
1.2284 + quat->w = 1.0f;
1.2285 +}
1.2286 +
1.2287 +/*!
1.2288 + * \brief Inverts a matrix
1.2289 + */
1.2290 +M3G_API M3Gbool m3gInvertMatrix(Matrix *mtx)
1.2291 +{
1.2292 + M3Gfloat *matrix;
1.2293 + M3Gint i;
1.2294 + M3Gfloat tmp[12];
1.2295 + M3Gfloat src[16];
1.2296 + M3Gfloat det;
1.2297 +
1.2298 + M3G_ASSERT(mtx != NULL);
1.2299 +
1.2300 + if (!m3gIsClassified(mtx)) {
1.2301 + m3gClassify(mtx);
1.2302 + }
1.2303 +
1.2304 + /* Quick exit for identity */
1.2305 +
1.2306 + if (mtx->mask == MC_IDENTITY) {
1.2307 + return M3G_TRUE;
1.2308 + }
1.2309 +
1.2310 + /* Look for other common cases; these require that we have valid
1.2311 + * values in all the elements, so fill in the values first */
1.2312 + {
1.2313 + M3Guint mask = mtx->mask;
1.2314 +
1.2315 + if (!mtx->complete) {
1.2316 + m3gFillClassifiedMatrix(mtx);
1.2317 + }
1.2318 +
1.2319 + if ((mask | (0x3F << 24)) == MC_TRANSLATION) {
1.2320 + M44F(mtx, 0, 3) = m3gNegate(M44F(mtx, 0, 3));
1.2321 + M44F(mtx, 1, 3) = m3gNegate(M44F(mtx, 1, 3));
1.2322 + M44F(mtx, 2, 3) = m3gNegate(M44F(mtx, 2, 3));
1.2323 + mtx->mask = MC_TRANSLATION;
1.2324 + return M3G_TRUE;
1.2325 + }
1.2326 + if ((mask | 0x300C03) == MC_SCALING) {
1.2327 + if ((mask & 3 ) == 0 ||
1.2328 + (mask & (3 << 10)) == 0 ||
1.2329 + (mask & (3 << 20)) == 0) {
1.2330 + return M3G_FALSE; /* zero scale for at least one axis */
1.2331 + }
1.2332 + M44F(mtx, 0, 0) = m3gRcp(M44F(mtx, 0, 0));
1.2333 + M44F(mtx, 1, 1) = m3gRcp(M44F(mtx, 1, 1));
1.2334 + M44F(mtx, 2, 2) = m3gRcp(M44F(mtx, 2, 2));
1.2335 + return M3G_TRUE;
1.2336 + }
1.2337 + }
1.2338 +
1.2339 + /* Do a full 4x4 inversion as a last resort */
1.2340 +
1.2341 + matrix = mtx->elem;
1.2342 +
1.2343 + /* transpose matrix */
1.2344 + for (i = 0; i < 4; i++) {
1.2345 + src[i] = matrix[i*4];
1.2346 + src[i+4] = matrix[i*4+1];
1.2347 + src[i+8] = matrix[i*4+2];
1.2348 + src[i+12] = matrix[i*4+3];
1.2349 + }
1.2350 +
1.2351 + /* calculate pairs for first 8 elements (cofactors) */
1.2352 + tmp[0] = src[10]*src[15];
1.2353 + tmp[1] = src[11]*src[14];
1.2354 + tmp[2] = src[9]*src[15];
1.2355 + tmp[3] = src[11]*src[13];
1.2356 + tmp[4] = src[9]*src[14];
1.2357 + tmp[5] = src[10]*src[13];
1.2358 + tmp[6] = src[8]*src[15];
1.2359 + tmp[7] = src[11]*src[12];
1.2360 + tmp[8] = src[8]*src[14];
1.2361 + tmp[9] = src[10]*src[12];
1.2362 + tmp[10] = src[8]*src[13];
1.2363 + tmp[11] = src[9]*src[12];
1.2364 +
1.2365 + /* calculate first 8 elements (cofactors) */
1.2366 + matrix[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];
1.2367 + matrix[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];
1.2368 + matrix[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];
1.2369 + matrix[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];
1.2370 + matrix[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];
1.2371 + matrix[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];
1.2372 + matrix[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];
1.2373 + matrix[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];
1.2374 + matrix[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];
1.2375 + matrix[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];
1.2376 + matrix[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];
1.2377 + matrix[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];
1.2378 + matrix[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];
1.2379 + matrix[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];
1.2380 + matrix[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];
1.2381 + matrix[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];
1.2382 +
1.2383 + /* calculate pairs for second 8 elements (cofactors) */
1.2384 + tmp[0] = src[2]*src[7];
1.2385 + tmp[1] = src[3]*src[6];
1.2386 + tmp[2] = src[1]*src[7];
1.2387 + tmp[3] = src[3]*src[5];
1.2388 + tmp[4] = src[1]*src[6];
1.2389 + tmp[5] = src[2]*src[5];
1.2390 + tmp[6] = src[0]*src[7];
1.2391 + tmp[7] = src[3]*src[4];
1.2392 + tmp[8] = src[0]*src[6];
1.2393 + tmp[9] = src[2]*src[4];
1.2394 + tmp[10] = src[0]*src[5];
1.2395 + tmp[11] = src[1]*src[4];
1.2396 +
1.2397 + /* calculate second 8 elements (cofactors) */
1.2398 + matrix[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];
1.2399 + matrix[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];
1.2400 + matrix[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];
1.2401 + matrix[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];
1.2402 + matrix[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];
1.2403 + matrix[10] -= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];
1.2404 + matrix[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];
1.2405 + matrix[11] -= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];
1.2406 + matrix[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];
1.2407 + matrix[12] -= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];
1.2408 + matrix[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];
1.2409 + matrix[13] -= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];
1.2410 + matrix[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];
1.2411 + matrix[14] -= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];
1.2412 + matrix[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];
1.2413 + matrix[15] -= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];
1.2414 +
1.2415 + /* calculate determinant */
1.2416 + det = src[0]*matrix[0]+src[1]*matrix[1]+src[2]*matrix[2]+src[3]*matrix[3];
1.2417 +
1.2418 + /* matrix has no inverse */
1.2419 + if (det == 0.0f) {
1.2420 + return M3G_FALSE;
1.2421 + }
1.2422 +
1.2423 + /* calculate matrix inverse */
1.2424 + det = 1/det;
1.2425 + for (i = 0; i < 16; i++) {
1.2426 + matrix[i] *= det;
1.2427 + }
1.2428 +
1.2429 + mtx->classified = M3G_FALSE;
1.2430 + return M3G_TRUE;
1.2431 +}
1.2432 +
1.2433 +/*!
1.2434 + * \brief Sets a matrix to the inverse of another matrix
1.2435 + */
1.2436 +M3G_API M3Gbool m3gMatrixInverse(Matrix *mtx, const Matrix *other)
1.2437 +{
1.2438 + M3G_ASSERT(mtx != NULL && other != NULL);
1.2439 +
1.2440 + if (!m3gIsClassified(other)) {
1.2441 + m3gClassify((Matrix*)other);
1.2442 + }
1.2443 +
1.2444 + m3gCopyMatrix(mtx, other);
1.2445 + return m3gInvertMatrix(mtx);
1.2446 +}
1.2447 +
1.2448 +/*!
1.2449 + * \brief Sets a matrix to the transpose of another matrix
1.2450 + */
1.2451 +M3G_API void m3gMatrixTranspose(Matrix *mtx, const Matrix *other)
1.2452 +{
1.2453 + M3Gbyte i;
1.2454 + M3G_ASSERT(mtx != NULL && other != NULL);
1.2455 +
1.2456 + if (!other->complete) {
1.2457 + m3gFillClassifiedMatrix((Matrix *)other);
1.2458 + }
1.2459 +
1.2460 + for (i = 0; i < 4; i++) {
1.2461 + mtx->elem[i] = other->elem[i*4];
1.2462 + mtx->elem[i+4] = other->elem[i*4+1];
1.2463 + mtx->elem[i+8] = other->elem[i*4+2];
1.2464 + mtx->elem[i+12] = other->elem[i*4+3];
1.2465 + }
1.2466 + mtx->classified = M3G_FALSE;
1.2467 + mtx->complete = M3G_TRUE;
1.2468 +}
1.2469 +
1.2470 +M3G_API M3Gbool m3gInverseTranspose(Matrix *mtx, const Matrix *other)
1.2471 +{
1.2472 + Matrix temp;
1.2473 + if (!m3gMatrixInverse(&temp, other)) {
1.2474 + return M3G_FALSE;
1.2475 + }
1.2476 + m3gMatrixTranspose(mtx, &temp);
1.2477 + return M3G_TRUE;
1.2478 +}
1.2479 +
1.2480 +/*!
1.2481 + * \brief Sets a matrix to the product of two other matrices
1.2482 + *
1.2483 + * \note \c dst can not be either of \c left or \c right; if it is,
1.2484 + * the results are undefined
1.2485 + */
1.2486 +M3G_API void m3gMatrixProduct(Matrix *dst, const Matrix *left, const Matrix *right)
1.2487 +{
1.2488 + M3G_ASSERT_PTR(dst);
1.2489 + M3G_ASSERT_PTR(left);
1.2490 + M3G_ASSERT_PTR(right);
1.2491 + M3G_ASSERT(dst != left && dst != right);
1.2492 +
1.2493 + /* Classify input matrices and take early exits for identities */
1.2494 +
1.2495 + if (!m3gIsClassified(left)) {
1.2496 + m3gClassify((Matrix*)left);
1.2497 + }
1.2498 + if (left->mask == MC_IDENTITY) {
1.2499 + m3gCopyMatrix(dst, right);
1.2500 + return;
1.2501 + }
1.2502 +
1.2503 + if (!m3gIsClassified(right)) {
1.2504 + m3gClassify((Matrix*)right);
1.2505 + }
1.2506 + if (right->mask == MC_IDENTITY) {
1.2507 + m3gCopyMatrix(dst, left);
1.2508 + return;
1.2509 + }
1.2510 +
1.2511 + /* Special quick paths for 3x4 matrices */
1.2512 +
1.2513 + if (m3gIsWUnity(left) && m3gIsWUnity(right)) {
1.2514 +
1.2515 + /* Translation? */
1.2516 +
1.2517 + if ((left->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {
1.2518 +
1.2519 + if (left->mask != MC_TRANSLATION && !left->complete) {
1.2520 + m3gFillClassifiedMatrix((Matrix*)left);
1.2521 + }
1.2522 + if (right->mask != MC_TRANSLATION && !right->complete) {
1.2523 + m3gFillClassifiedMatrix((Matrix*)right);
1.2524 + }
1.2525 +
1.2526 + m3gCopyMatrix(dst, right);
1.2527 +
1.2528 + M44F(dst, 0, 3) = m3gAdd(M44F(left, 0, 3), M44F(dst, 0, 3));
1.2529 + M44F(dst, 1, 3) = m3gAdd(M44F(left, 1, 3), M44F(dst, 1, 3));
1.2530 + M44F(dst, 2, 3) = m3gAdd(M44F(left, 2, 3), M44F(dst, 2, 3));
1.2531 +
1.2532 + dst->mask |= MC_TRANSLATION_PART;
1.2533 + return;
1.2534 + }
1.2535 +
1.2536 + if ((right->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {
1.2537 + Vec4 tvec;
1.2538 +
1.2539 + if (left->mask != MC_TRANSLATION && !left->complete) {
1.2540 + m3gFillClassifiedMatrix((Matrix*)left);
1.2541 + }
1.2542 + if (right->mask != MC_TRANSLATION && !right->complete) {
1.2543 + m3gFillClassifiedMatrix((Matrix*)right);
1.2544 + }
1.2545 +
1.2546 + m3gCopyMatrix(dst, left);
1.2547 +
1.2548 + m3gGetMatrixColumn(right, 3, &tvec);
1.2549 + m3gTransformVec4(dst, &tvec);
1.2550 +
1.2551 + M44F(dst, 0, 3) = tvec.x;
1.2552 + M44F(dst, 1, 3) = tvec.y;
1.2553 + M44F(dst, 2, 3) = tvec.z;
1.2554 +
1.2555 + dst->mask |= MC_TRANSLATION_PART;
1.2556 + return;
1.2557 + }
1.2558 +
1.2559 + }
1.2560 +
1.2561 + /* Compute product and set output classification */
1.2562 +
1.2563 + m3gGenericMatrixProduct(dst, left, right);
1.2564 +}
1.2565 +
1.2566 +/*!
1.2567 + * \brief Normalizes a quaternion
1.2568 + */
1.2569 +M3G_API void m3gNormalizeQuat(Quat *q)
1.2570 +{
1.2571 + M3Gfloat norm;
1.2572 + M3G_ASSERT_PTR(q);
1.2573 +
1.2574 + norm = m3gNorm4(&q->x);
1.2575 +
1.2576 + if (norm > EPSILON) {
1.2577 + norm = m3gRcpSqrt(norm);
1.2578 + m3gScale4(&q->x, norm);
1.2579 + }
1.2580 + else {
1.2581 + m3gIdentityQuat(q);
1.2582 + }
1.2583 +}
1.2584 +
1.2585 +/*!
1.2586 + * \brief Normalizes a three-vector
1.2587 + */
1.2588 +M3G_API void m3gNormalizeVec3(Vec3 *v)
1.2589 +{
1.2590 + M3Gfloat norm;
1.2591 + M3G_ASSERT_PTR(v);
1.2592 +
1.2593 + norm = m3gNorm3(&v->x);
1.2594 +
1.2595 + if (norm > EPSILON) {
1.2596 + norm = m3gRcpSqrt(norm);
1.2597 + m3gScale3(&v->x, norm);
1.2598 + }
1.2599 + else {
1.2600 + m3gZero(v, sizeof(Vec3));
1.2601 + }
1.2602 +}
1.2603 +
1.2604 +/*!
1.2605 + * \brief Normalizes a four-vector
1.2606 + */
1.2607 +M3G_API void m3gNormalizeVec4(Vec4 *v)
1.2608 +{
1.2609 + M3Gfloat norm;
1.2610 + M3G_ASSERT_PTR(v);
1.2611 +
1.2612 + norm = m3gNorm4(&v->x);
1.2613 +
1.2614 + if (norm > EPSILON) {
1.2615 + norm = m3gRcpSqrt(norm);
1.2616 + m3gScale4(&v->x, norm);
1.2617 + }
1.2618 + else {
1.2619 + m3gZero(v, sizeof(Vec4));
1.2620 + }
1.2621 +}
1.2622 +
1.2623 +/*!
1.2624 + * \brief Multiplies a matrix from the right with another matrix
1.2625 + */
1.2626 +M3G_API void m3gPostMultiplyMatrix(Matrix *mtx, const Matrix *other)
1.2627 +{
1.2628 + Matrix temp;
1.2629 + M3G_ASSERT_PTR(mtx);
1.2630 + M3G_ASSERT_PTR(other);
1.2631 +
1.2632 + m3gCopyMatrix(&temp, mtx);
1.2633 + m3gMatrixProduct(mtx, &temp, other);
1.2634 +}
1.2635 +
1.2636 +/*!
1.2637 + * \brief Multiplies a matrix from the left with another matrix
1.2638 + */
1.2639 +M3G_API void m3gPreMultiplyMatrix(Matrix *mtx, const Matrix *other)
1.2640 +{
1.2641 + Matrix temp;
1.2642 + M3G_ASSERT_PTR(mtx);
1.2643 + M3G_ASSERT_PTR(other);
1.2644 +
1.2645 + m3gCopyMatrix(&temp, mtx);
1.2646 + m3gMatrixProduct(mtx, other, &temp);
1.2647 +}
1.2648 +
1.2649 +/*!
1.2650 + * \brief Multiplies a matrix with a rotation matrix.
1.2651 + */
1.2652 +M3G_API void m3gPostRotateMatrix(Matrix *mtx,
1.2653 + M3Gfloat angle,
1.2654 + M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
1.2655 +{
1.2656 + Quat q;
1.2657 + m3gSetAngleAxis(&q, angle, ax, ay, az);
1.2658 + m3gPostRotateMatrixQuat(mtx, &q);
1.2659 +}
1.2660 +
1.2661 +/*!
1.2662 + * \brief Multiplies a matrix with a translation matrix.
1.2663 + */
1.2664 +M3G_API void m3gPostRotateMatrixQuat(Matrix *mtx, const Quat *quat)
1.2665 +{
1.2666 + Matrix temp;
1.2667 + m3gQuatMatrix(&temp, quat);
1.2668 + m3gPostMultiplyMatrix(mtx, &temp);
1.2669 +}
1.2670 +
1.2671 +/*!
1.2672 + * \brief Multiplies a matrix with a scale matrix.
1.2673 + */
1.2674 +M3G_API void m3gPostScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)
1.2675 +{
1.2676 + Matrix temp;
1.2677 + m3gScalingMatrix(&temp, sx, sy, sz);
1.2678 + m3gPostMultiplyMatrix(mtx, &temp);
1.2679 +}
1.2680 +
1.2681 +/*!
1.2682 + * \brief Multiplies a matrix with a translation (matrix).
1.2683 + */
1.2684 +M3G_API void m3gPostTranslateMatrix(Matrix *mtx,
1.2685 + M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)
1.2686 +{
1.2687 + Matrix temp;
1.2688 + m3gTranslationMatrix(&temp, tx, ty, tz);
1.2689 + m3gPostMultiplyMatrix(mtx, &temp);
1.2690 +}
1.2691 +
1.2692 +/*!
1.2693 + * \brief Multiplies a matrix with a rotation matrix
1.2694 + */
1.2695 +M3G_API void m3gPreRotateMatrix(Matrix *mtx,
1.2696 + M3Gfloat angle,
1.2697 + M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
1.2698 +{
1.2699 + Quat q;
1.2700 + m3gSetAngleAxis(&q, angle, ax, ay, az);
1.2701 + m3gPreRotateMatrixQuat(mtx, &q);
1.2702 +}
1.2703 +
1.2704 +/*!
1.2705 + * \brief Multiplies a matrix with a quaternion rotation
1.2706 + */
1.2707 +M3G_API void m3gPreRotateMatrixQuat(Matrix *mtx, const Quat *quat)
1.2708 +{
1.2709 + Matrix temp;
1.2710 + m3gQuatMatrix(&temp, quat);
1.2711 + m3gPreMultiplyMatrix(mtx, &temp);
1.2712 +}
1.2713 +
1.2714 +/*!
1.2715 + * \brief Multiplies a matrix with a scale matrix.
1.2716 + */
1.2717 +M3G_API void m3gPreScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)
1.2718 +{
1.2719 + Matrix temp;
1.2720 + m3gScalingMatrix(&temp, sx, sy, sz);
1.2721 + m3gPreMultiplyMatrix(mtx, &temp);
1.2722 +}
1.2723 +
1.2724 +/*!
1.2725 + * \brief Multiplies a matrix with a translation (matrix).
1.2726 + */
1.2727 +M3G_API void m3gPreTranslateMatrix(Matrix *mtx,
1.2728 + M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)
1.2729 +{
1.2730 + Matrix temp;
1.2731 + m3gTranslationMatrix(&temp, tx, ty, tz);
1.2732 + m3gPreMultiplyMatrix(mtx, &temp);
1.2733 +}
1.2734 +
1.2735 +/*!
1.2736 + * \brief Converts a quaternion into a matrix
1.2737 + *
1.2738 + * The output is a matrix effecting the same rotation as the
1.2739 + * quaternion passed as input
1.2740 + */
1.2741 +M3G_API void m3gQuatMatrix(Matrix *mtx, const Quat *quat)
1.2742 +{
1.2743 + M3Gfloat qx = quat->x;
1.2744 + M3Gfloat qy = quat->y;
1.2745 + M3Gfloat qz = quat->z;
1.2746 + M3Gfloat qw = quat->w;
1.2747 +
1.2748 + /* Quick exit for identity rotations */
1.2749 +
1.2750 + if (IS_ZERO(qx) && IS_ZERO(qy) && IS_ZERO(qz)) {
1.2751 + m3gIdentityMatrix(mtx);
1.2752 + return;
1.2753 + }
1.2754 +
1.2755 + {
1.2756 + /* Determine the rough type of the output matrix */
1.2757 +
1.2758 + M3Guint type = MC_SCALING_ROTATION;
1.2759 + if (IS_ZERO(qz) && IS_ZERO(qy)) {
1.2760 + type = MC_X_ROTATION;
1.2761 + }
1.2762 + else if (IS_ZERO(qz) && IS_ZERO(qx)) {
1.2763 + type = MC_Y_ROTATION;
1.2764 + }
1.2765 + else if (IS_ZERO(qx) && IS_ZERO(qy)) {
1.2766 + type = MC_Z_ROTATION;
1.2767 + }
1.2768 + m3gClassifyAs(type, mtx);
1.2769 +
1.2770 + /* Generate the non-constant parts of the matrix */
1.2771 + {
1.2772 + M3Gfloat wx, wy, wz, xx, yy, yz, xy, xz, zz;
1.2773 +
1.2774 + xx = m3gMul(qx, qx);
1.2775 + xy = m3gMul(qx, qy);
1.2776 + xz = m3gMul(qx, qz);
1.2777 + yy = m3gMul(qy, qy);
1.2778 + yz = m3gMul(qy, qz);
1.2779 + zz = m3gMul(qz, qz);
1.2780 + wx = m3gMul(qw, qx);
1.2781 + wy = m3gMul(qw, qy);
1.2782 + wz = m3gMul(qw, qz);
1.2783 +
1.2784 + if (type != MC_X_ROTATION) {
1.2785 + M44F(mtx, 0, 0) = m3gSub(1.f, m3gDouble(m3gAdd(yy, zz)));
1.2786 + M44F(mtx, 0, 1) = m3gDouble(m3gSub(xy, wz));
1.2787 + M44F(mtx, 0, 2) = m3gDouble(m3gAdd(xz, wy));
1.2788 + }
1.2789 +
1.2790 + if (type != MC_Y_ROTATION) {
1.2791 + M44F(mtx, 1, 0) = m3gDouble(m3gAdd(xy, wz));
1.2792 + M44F(mtx, 1, 1) = m3gSub(1.f, m3gDouble(m3gAdd(xx, zz)));
1.2793 + M44F(mtx, 1, 2) = m3gDouble(m3gSub(yz, wx));
1.2794 + }
1.2795 +
1.2796 + if (type != MC_Z_ROTATION) {
1.2797 + M44F(mtx, 2, 0) = m3gDouble(m3gSub(xz, wy));
1.2798 + M44F(mtx, 2, 1) = m3gDouble(m3gAdd(yz, wx));
1.2799 + M44F(mtx, 2, 2) = m3gSub(1.f, m3gDouble(m3gAdd(xx, yy)));
1.2800 + }
1.2801 + }
1.2802 + m3gSubClassify(mtx);
1.2803 + }
1.2804 +}
1.2805 +
1.2806 +/*!
1.2807 + * \brief Generates a scaling matrix
1.2808 + */
1.2809 +M3G_API void m3gScalingMatrix(
1.2810 + Matrix *mtx,
1.2811 + const M3Gfloat sx, const M3Gfloat sy, const M3Gfloat sz)
1.2812 +{
1.2813 + M3G_ASSERT_PTR(mtx);
1.2814 + M44F(mtx, 0, 0) = sx;
1.2815 + M44F(mtx, 1, 1) = sy;
1.2816 + M44F(mtx, 2, 2) = sz;
1.2817 + m3gClassifyAs(MC_SCALING, mtx);
1.2818 + m3gSubClassify(mtx);
1.2819 +}
1.2820 +
1.2821 +/*!
1.2822 + * \brief Sets a quaternion to represent an angle-axis rotation
1.2823 + */
1.2824 +M3G_API void m3gSetAngleAxis(Quat *quat,
1.2825 + M3Gfloat angle,
1.2826 + M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
1.2827 +{
1.2828 + m3gSetAngleAxisRad(quat, m3gMul(angle, M3G_DEG2RAD), ax, ay, az);
1.2829 +}
1.2830 +
1.2831 +/*!
1.2832 + * \brief Sets a quaternion to represent an angle-axis rotation
1.2833 + */
1.2834 +M3G_API void m3gSetAngleAxisRad(Quat *quat,
1.2835 + M3Gfloat angleRad,
1.2836 + M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
1.2837 +{
1.2838 + M3G_ASSERT_PTR(quat);
1.2839 +
1.2840 + if (!IS_ZERO(angleRad)) {
1.2841 + M3Gfloat s;
1.2842 + M3Gfloat halfAngle = m3gHalf(angleRad);
1.2843 +
1.2844 + s = m3gSin(halfAngle);
1.2845 +
1.2846 + {
1.2847 + M3Gfloat sqrNorm = m3gMadd(ax, ax, m3gMadd(ay, ay, m3gMul(az, az)));
1.2848 + if (sqrNorm < 0.995f || sqrNorm > 1.005f) {
1.2849 + if (sqrNorm > EPSILON) {
1.2850 + M3Gfloat ooNorm = m3gRcpSqrt(sqrNorm);
1.2851 + ax = m3gMul(ax, ooNorm);
1.2852 + ay = m3gMul(ay, ooNorm);
1.2853 + az = m3gMul(az, ooNorm);
1.2854 + }
1.2855 + else {
1.2856 + ax = ay = az = 0.0f;
1.2857 + }
1.2858 + }
1.2859 + }
1.2860 +
1.2861 + quat->x = m3gMul(s, ax);
1.2862 + quat->y = m3gMul(s, ay);
1.2863 + quat->z = m3gMul(s, az);
1.2864 + quat->w = m3gCos(halfAngle);
1.2865 + }
1.2866 + else {
1.2867 + m3gIdentityQuat(quat);
1.2868 + }
1.2869 +}
1.2870 +
1.2871 +/*!
1.2872 + * \brief Quaternion multiplication.
1.2873 + */
1.2874 +M3G_API void m3gMulQuat(Quat *quat, const Quat *other)
1.2875 +{
1.2876 + Quat q;
1.2877 + q = *quat;
1.2878 +
1.2879 + quat->w = m3gMul(q.w, other->w) - m3gMul(q.x, other->x) - m3gMul(q.y, other->y) - m3gMul(q.z, other->z);
1.2880 + quat->x = m3gMul(q.w, other->x) + m3gMul(q.x, other->w) + m3gMul(q.y, other->z) - m3gMul(q.z, other->y);
1.2881 + quat->y = m3gMul(q.w, other->y) - m3gMul(q.x, other->z) + m3gMul(q.y, other->w) + m3gMul(q.z, other->x);
1.2882 + quat->z = m3gMul(q.w, other->z) + m3gMul(q.x, other->y) - m3gMul(q.y, other->x) + m3gMul(q.z, other->w);
1.2883 +}
1.2884 +
1.2885 +/*!
1.2886 + * \brief Makes this quaternion represent the rotation from one 3D
1.2887 + * vector to another
1.2888 + */
1.2889 +M3G_API void m3gSetQuatRotation(Quat *quat,
1.2890 + const Vec3 *from, const Vec3 *to)
1.2891 +{
1.2892 + M3Gfloat cosAngle;
1.2893 +
1.2894 + M3G_ASSERT_PTR(quat);
1.2895 + M3G_ASSERT_PTR(from);
1.2896 + M3G_ASSERT_PTR(to);
1.2897 +
1.2898 + cosAngle = m3gDot3(from, to);
1.2899 +
1.2900 + if (cosAngle > (1.0f - EPSILON)) { /* zero angle */
1.2901 + m3gIdentityQuat(quat);
1.2902 + return;
1.2903 + }
1.2904 + else if (cosAngle > (1.0e-3f - 1.0f)) { /* normal case */
1.2905 + Vec3 axis;
1.2906 + m3gCross(&axis, from, to);
1.2907 + m3gSetAngleAxisRad(quat, m3gArcCos(cosAngle), axis.x, axis.y, axis.z);
1.2908 + }
1.2909 + else {
1.2910 +
1.2911 + /* Opposite vectors; must generate an arbitrary perpendicular
1.2912 + * vector and use that as the rotation axis. Here, we try the
1.2913 + * Z axis first, and if that seems too parallel to the
1.2914 + * vectors, project the Y axis instead: Z is the only good
1.2915 + * choice for Z-constrained rotations, and Y by definition
1.2916 + * must be perpendicular to that. */
1.2917 +
1.2918 + Vec3 axis, temp;
1.2919 + M3Gfloat s;
1.2920 +
1.2921 + axis.x = axis.y = axis.z = 0.0f;
1.2922 + if (m3gAbs(from->z) < (1.0f - EPSILON)) {
1.2923 + axis.z = 1.0f;
1.2924 + }
1.2925 + else {
1.2926 + axis.y = 1.0f;
1.2927 + }
1.2928 +
1.2929 + s = m3gDot3(&axis, from);
1.2930 + temp = *from;
1.2931 + m3gScaleVec3(&temp, s);
1.2932 + m3gSubVec3(&axis, &temp);
1.2933 +
1.2934 + m3gSetAngleAxis(quat, 180.f, axis.x, axis.y, axis.z);
1.2935 + }
1.2936 +}
1.2937 +
1.2938 +/*!
1.2939 + * \brief Sets the values of a matrix
1.2940 + *
1.2941 + */
1.2942 +M3G_API void m3gSetMatrixColumns(Matrix *mtx, const M3Gfloat *src)
1.2943 +{
1.2944 + M3G_ASSERT(mtx != NULL && src != NULL);
1.2945 +
1.2946 + m3gCopy(mtx->elem, src, sizeof(mtx->elem));
1.2947 + mtx->classified = M3G_FALSE;
1.2948 + mtx->complete = M3G_TRUE;
1.2949 +}
1.2950 +
1.2951 +/*!
1.2952 + * \brief Sets the values of a matrix
1.2953 + *
1.2954 + */
1.2955 +M3G_API void m3gSetMatrixRows(Matrix *mtx, const M3Gfloat *src)
1.2956 +{
1.2957 + M3G_ASSERT(mtx != NULL && src != NULL);
1.2958 + {
1.2959 + int row;
1.2960 + for (row = 0; row < 4; ++row) {
1.2961 + mtx->elem[ 0 + row] = *src++;
1.2962 + mtx->elem[ 4 + row] = *src++;
1.2963 + mtx->elem[ 8 + row] = *src++;
1.2964 + mtx->elem[12 + row] = *src++;
1.2965 + }
1.2966 + }
1.2967 + mtx->classified = M3G_FALSE;
1.2968 + mtx->complete = M3G_TRUE;
1.2969 +}
1.2970 +
1.2971 +/*!
1.2972 + * \brief Transforms a 4-vector with a matrix
1.2973 + */
1.2974 +#if defined(M3G_HW_FLOAT_VFPV2)
1.2975 +
1.2976 +__asm void _m3gTransformVec4(const Matrix *mtx, Vec4 *vec, M3Gint n)
1.2977 +{
1.2978 +
1.2979 + CODE32
1.2980 +
1.2981 + FSTMFDD sp!, {d8-d11}
1.2982 +
1.2983 + FMRX r3, FPSCR
1.2984 + BIC r12, r3, #0x00370000
1.2985 + ORR r12, #(3<<16)
1.2986 + FMXR FPSCR, r12
1.2987 + CMP r2, #4
1.2988 +
1.2989 + FLDMIAS r0, {s4-s19} // [mtx0 mtx1 ..]
1.2990 + FLDMIAS r1, {s0-s3} // [vec.x vec.y vec.z vec.w]
1.2991 + FMULS s20, s4, s0 // [s20-s23] = [v.x*mtx0 v.x*mtx1 v.x*mtx2 v.x*mtx3 ]
1.2992 + FMACS s20, s8, s1 // [s20-s23] += [v.y*mtx4 v.y*mtx5 v.y*mtx6 v.y*mtx7 ]
1.2993 + FMACS s20, s12, s2 // [s20-s23] += [v.z*mtx8 v.z*mtx9 v.z*mtx10 v.z*mtx11]
1.2994 + FMACS s20, s16, s3 // [s20-s23] += [v.w*mtx12 v.w*mtx13 v.w*mtx14 v.w*mtx15]
1.2995 + FSTMIAS r1!, {s20-s22}
1.2996 + FSTMIASEQ r1, {s23}
1.2997 +
1.2998 + FMXR FPSCR, r3
1.2999 + FLDMFDD sp!, {d8-d11}
1.3000 +
1.3001 + BX lr
1.3002 +}
1.3003 +#endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
1.3004 +
1.3005 +M3G_API void m3gTransformVec4(const Matrix *mtx, Vec4 *vec)
1.3006 +{
1.3007 + M3Guint type;
1.3008 + M3G_ASSERT(mtx != NULL && vec != NULL);
1.3009 +
1.3010 + if (!m3gIsClassified(mtx)) {
1.3011 + m3gClassify((Matrix*)mtx);
1.3012 + }
1.3013 +
1.3014 + type = mtx->mask;
1.3015 +
1.3016 + if (type == MC_IDENTITY) {
1.3017 + return;
1.3018 + }
1.3019 + else {
1.3020 + int n = m3gIsWUnity(mtx) ? 3 : 4;
1.3021 +
1.3022 + if (!mtx->complete) {
1.3023 + m3gFillClassifiedMatrix((Matrix*)mtx);
1.3024 + }
1.3025 +#if defined(M3G_HW_FLOAT_VFPV2)
1.3026 + _m3gTransformVec4(mtx, vec, n);
1.3027 +#else
1.3028 + {
1.3029 + Vec4 v = *vec;
1.3030 + int i;
1.3031 +
1.3032 + for (i = 0; i < n; ++i) {
1.3033 + M3Gfloat d = m3gMul(M44F(mtx, i, 0), v.x);
1.3034 + d = m3gMadd(M44F(mtx, i, 1), v.y, d);
1.3035 + d = m3gMadd(M44F(mtx, i, 2), v.z, d);
1.3036 + d = m3gMadd(M44F(mtx, i, 3), v.w, d);
1.3037 + (&vec->x)[i] = d;
1.3038 + }
1.3039 + }
1.3040 +#endif
1.3041 + }
1.3042 +}
1.3043 +
1.3044 +/*!
1.3045 + * \brief Generates a translation matrix
1.3046 + */
1.3047 +M3G_API void m3gTranslationMatrix(
1.3048 + Matrix *mtx,
1.3049 + const M3Gfloat tx, const M3Gfloat ty, const M3Gfloat tz)
1.3050 +{
1.3051 + M3G_ASSERT_PTR(mtx);
1.3052 + M44F(mtx, 0, 3) = tx;
1.3053 + M44F(mtx, 1, 3) = ty;
1.3054 + M44F(mtx, 2, 3) = tz;
1.3055 + m3gClassifyAs(MC_TRANSLATION, mtx);
1.3056 + m3gSubClassify(mtx);
1.3057 +}
1.3058 +
1.3059 +/*!
1.3060 + * \brief Float vector assignment.
1.3061 + */
1.3062 +M3G_API void m3gSetQuat(Quat *quat, const M3Gfloat *vec)
1.3063 +{
1.3064 + quat->x = vec[0];
1.3065 + quat->y = vec[1];
1.3066 + quat->z = vec[2];
1.3067 + quat->w = vec[3];
1.3068 +}
1.3069 +
1.3070 +/*!
1.3071 + * \brief Slerp between quaternions q0 and q1, storing the result in quat.
1.3072 + */
1.3073 +M3G_API void m3gSlerpQuat(Quat *quat,
1.3074 + M3Gfloat s,
1.3075 + const Quat *q0, const Quat *q1)
1.3076 +{
1.3077 + M3Gfloat s0, s1;
1.3078 + M3Gfloat cosTheta = m3gDot4((const Vec4 *)q0, (const Vec4 *)q1);
1.3079 + M3Gfloat oneMinusS = m3gSub(1.0f, s);
1.3080 +
1.3081 + if (cosTheta > EPSILON - 1.0f) {
1.3082 + if (cosTheta < 1.0f - EPSILON) {
1.3083 + M3Gfloat theta = m3gArcCos(cosTheta);
1.3084 + M3Gfloat sinTheta = m3gSin(theta);
1.3085 + s0 = m3gSin(m3gMul(oneMinusS, theta)) / sinTheta;
1.3086 + s1 = m3gSin(m3gMul(s, theta)) / sinTheta;
1.3087 + }
1.3088 + else {
1.3089 + /* For quaternions very close to each other, use plain
1.3090 + linear interpolation for numerical stability. */
1.3091 + s0 = oneMinusS;
1.3092 + s1 = s;
1.3093 + }
1.3094 + quat->x = m3gMadd(s0, q0->x, m3gMul(s1, q1->x));
1.3095 + quat->y = m3gMadd(s0, q0->y, m3gMul(s1, q1->y));
1.3096 + quat->z = m3gMadd(s0, q0->z, m3gMul(s1, q1->z));
1.3097 + quat->w = m3gMadd(s0, q0->w, m3gMul(s1, q1->w));
1.3098 + }
1.3099 + else {
1.3100 + /* Slerp is undefined if the two quaternions are (nearly)
1.3101 + opposite, so we just pick an arbitrary plane of
1.3102 + rotation here. */
1.3103 +
1.3104 + quat->x = -q0->y;
1.3105 + quat->y = q0->x;
1.3106 + quat->z = -q0->w;
1.3107 + quat->w = q0->z;
1.3108 +
1.3109 + s0 = m3gSin(m3gMul(oneMinusS, HALF_PI));
1.3110 + s1 = m3gSin(m3gMul(s, HALF_PI));
1.3111 +
1.3112 + quat->x = m3gMadd(s0, q0->x, m3gMul(s1, quat->x));
1.3113 + quat->y = m3gMadd(s0, q0->y, m3gMul(s1, quat->y));
1.3114 + quat->z = m3gMadd(s0, q0->z, m3gMul(s1, quat->z));
1.3115 + }
1.3116 +}
1.3117 +
1.3118 +/*!
1.3119 + * \brief Interpolate quaternions using spline, or "squad" interpolation.
1.3120 + */
1.3121 +M3G_API void m3gSquadQuat(Quat *quat,
1.3122 + M3Gfloat s,
1.3123 + const Quat *q0, const Quat *a, const Quat *b, const Quat *q1)
1.3124 +{
1.3125 +
1.3126 + Quat temp0;
1.3127 + Quat temp1;
1.3128 + m3gSlerpQuat(&temp0, s, q0, q1);
1.3129 + m3gSlerpQuat(&temp1, s, a, b);
1.3130 +
1.3131 + m3gSlerpQuat(quat, m3gDouble(m3gMul(s, m3gSub(1.0f, s))), &temp0, &temp1);
1.3132 +}
1.3133 +
1.3134 +
1.3135 +