summaryrefslogtreecommitdiff
path: root/lib/lib8tion/math8.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lib8tion/math8.h')
-rw-r--r--lib/lib8tion/math8.h552
1 files changed, 552 insertions, 0 deletions
diff --git a/lib/lib8tion/math8.h b/lib/lib8tion/math8.h
new file mode 100644
index 0000000000..8c6b6c227e
--- /dev/null
+++ b/lib/lib8tion/math8.h
@@ -0,0 +1,552 @@
+#ifndef __INC_LIB8TION_MATH_H
+#define __INC_LIB8TION_MATH_H
+
+#include "scale8.h"
+
+///@ingroup lib8tion
+
+///@defgroup Math Basic math operations
+/// Fast, efficient 8-bit math functions specifically
+/// designed for high-performance LED programming.
+///
+/// Because of the AVR(Arduino) and ARM assembly language
+/// implementations provided, using these functions often
+/// results in smaller and faster code than the equivalent
+/// program using plain "C" arithmetic and logic.
+///@{
+
+
+/// add one byte to another, saturating at 0xFF
+/// @param i - first byte to add
+/// @param j - second byte to add
+/// @returns the sum of i & j, capped at 0xFF
+LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
+{
+#if QADD8_C == 1
+ uint16_t t = i + j;
+ if (t > 255) t = 255;
+ return t;
+#elif QADD8_AVRASM == 1
+ asm volatile(
+ /* First, add j to i, conditioning the C flag */
+ "add %0, %1 \n\t"
+
+ /* Now test the C flag.
+ If C is clear, we branch around a load of 0xFF into i.
+ If C is set, we go ahead and load 0xFF into i.
+ */
+ "brcc L_%= \n\t"
+ "ldi %0, 0xFF \n\t"
+ "L_%=: "
+ : "+a" (i)
+ : "a" (j) );
+ return i;
+#elif QADD8_ARM_DSP_ASM == 1
+ asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
+ return i;
+#else
+#error "No implementation for qadd8 available."
+#endif
+}
+
+/// Add one byte to another, saturating at 0x7F
+/// @param i - first byte to add
+/// @param j - second byte to add
+/// @returns the sum of i & j, capped at 0xFF
+LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
+{
+#if QADD7_C == 1
+ int16_t t = i + j;
+ if (t > 127) t = 127;
+ return t;
+#elif QADD7_AVRASM == 1
+ asm volatile(
+ /* First, add j to i, conditioning the V flag */
+ "add %0, %1 \n\t"
+
+ /* Now test the V flag.
+ If V is clear, we branch around a load of 0x7F into i.
+ If V is set, we go ahead and load 0x7F into i.
+ */
+ "brvc L_%= \n\t"
+ "ldi %0, 0x7F \n\t"
+ "L_%=: "
+ : "+a" (i)
+ : "a" (j) );
+
+ return i;
+#elif QADD7_ARM_DSP_ASM == 1
+ asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
+ return i;
+#else
+#error "No implementation for qadd7 available."
+#endif
+}
+
+/// subtract one byte from another, saturating at 0x00
+/// @returns i - j with a floor of 0
+LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
+{
+#if QSUB8_C == 1
+ int16_t t = i - j;
+ if (t < 0) t = 0;
+ return t;
+#elif QSUB8_AVRASM == 1
+
+ asm volatile(
+ /* First, subtract j from i, conditioning the C flag */
+ "sub %0, %1 \n\t"
+
+ /* Now test the C flag.
+ If C is clear, we branch around a load of 0x00 into i.
+ If C is set, we go ahead and load 0x00 into i.
+ */
+ "brcc L_%= \n\t"
+ "ldi %0, 0x00 \n\t"
+ "L_%=: "
+ : "+a" (i)
+ : "a" (j) );
+
+ return i;
+#else
+#error "No implementation for qsub8 available."
+#endif
+}
+
+/// add one byte to another, with one byte result
+LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
+{
+#if ADD8_C == 1
+ uint16_t t = i + j;
+ return t;
+#elif ADD8_AVRASM == 1
+ // Add j to i, period.
+ asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
+ return i;
+#else
+#error "No implementation for add8 available."
+#endif
+}
+
+/// add one byte to another, with one byte result
+LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
+{
+#if ADD8_C == 1
+ uint16_t t = i + j;
+ return t;
+#elif ADD8_AVRASM == 1
+ // Add i(one byte) to j(two bytes)
+ asm volatile( "add %A[j], %[i] \n\t"
+ "adc %B[j], __zero_reg__ \n\t"
+ : [j] "+a" (j)
+ : [i] "a" (i)
+ );
+ return i;
+#else
+#error "No implementation for add8to16 available."
+#endif
+}
+
+
+/// subtract one byte from another, 8-bit result
+LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j)
+{
+#if SUB8_C == 1
+ int16_t t = i - j;
+ return t;
+#elif SUB8_AVRASM == 1
+ // Subtract j from i, period.
+ asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
+ return i;
+#else
+#error "No implementation for sub8 available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+/// 8-bit integer values (uint8_t).
+/// Fractional results are rounded down, e.g. avg8(20,41) = 30
+LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
+{
+#if AVG8_C == 1
+ return (i + j) >> 1;
+#elif AVG8_AVRASM == 1
+ asm volatile(
+ /* First, add j to i, 9th bit overflows into C flag */
+ "add %0, %1 \n\t"
+ /* Divide by two, moving C flag into high 8th bit */
+ "ror %0 \n\t"
+ : "+a" (i)
+ : "a" (j) );
+ return i;
+#else
+#error "No implementation for avg8 available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+/// 16-bit integer values (uint16_t).
+/// Fractional results are rounded down, e.g. avg16(20,41) = 30
+LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
+{
+#if AVG16_C == 1
+ return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
+#elif AVG16_AVRASM == 1
+ asm volatile(
+ /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
+ "add %A[i], %A[j] \n\t"
+ /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
+ "adc %B[i], %B[j] \n\t"
+ /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
+ "ror %B[i] \n\t"
+ /* Divide iLo by two, moving C flag into high 8th bit */
+ "ror %A[i] \n\t"
+ : [i] "+a" (i)
+ : [j] "a" (j) );
+ return i;
+#else
+#error "No implementation for avg16 available."
+#endif
+}
+
+
+/// Calculate an integer average of two signed 7-bit
+/// integers (int8_t)
+/// If the first argument is even, result is rounded down.
+/// If the first argument is odd, result is result up.
+LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
+{
+#if AVG7_C == 1
+ return ((i + j) >> 1) + (i & 0x1);
+#elif AVG7_AVRASM == 1
+ asm volatile(
+ "asr %1 \n\t"
+ "asr %0 \n\t"
+ "adc %0, %1 \n\t"
+ : "+a" (i)
+ : "a" (j) );
+ return i;
+#else
+#error "No implementation for avg7 available."
+#endif
+}
+
+/// Calculate an integer average of two signed 15-bit
+/// integers (int16_t)
+/// If the first argument is even, result is rounded down.
+/// If the first argument is odd, result is result up.
+LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
+{
+#if AVG15_C == 1
+ return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
+#elif AVG15_AVRASM == 1
+ asm volatile(
+ /* first divide j by 2, throwing away lowest bit */
+ "asr %B[j] \n\t"
+ "ror %A[j] \n\t"
+ /* now divide i by 2, with lowest bit going into C */
+ "asr %B[i] \n\t"
+ "ror %A[i] \n\t"
+ /* add j + C to i */
+ "adc %A[i], %A[j] \n\t"
+ "adc %B[i], %B[j] \n\t"
+ : [i] "+a" (i)
+ : [j] "a" (j) );
+ return i;
+#else
+#error "No implementation for avg15 available."
+#endif
+}
+
+
+/// Calculate the remainder of one unsigned 8-bit
+/// value divided by anoter, aka A % M.
+/// Implemented by repeated subtraction, which is
+/// very compact, and very fast if A is 'probably'
+/// less than M. If A is a large multiple of M,
+/// the loop has to execute multiple times. However,
+/// even in that case, the loop is only two
+/// instructions long on AVR, i.e., quick.
+LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
+{
+#if defined(__AVR__)
+ asm volatile (
+ "L_%=: sub %[a],%[m] \n\t"
+ " brcc L_%= \n\t"
+ " add %[a],%[m] \n\t"
+ : [a] "+r" (a)
+ : [m] "r" (m)
+ );
+#else
+ while( a >= m) a -= m;
+#endif
+ return a;
+}
+
+/// Add two numbers, and calculate the modulo
+/// of the sum and a third number, M.
+/// In other words, it returns (A+B) % M.
+/// It is designed as a compact mechanism for
+/// incrementing a 'mode' switch and wrapping
+/// around back to 'mode 0' when the switch
+/// goes past the end of the available range.
+/// e.g. if you have seven modes, this switches
+/// to the next one and wraps around if needed:
+/// mode = addmod8( mode, 1, 7);
+///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
+LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
+{
+#if defined(__AVR__)
+ asm volatile (
+ " add %[a],%[b] \n\t"
+ "L_%=: sub %[a],%[m] \n\t"
+ " brcc L_%= \n\t"
+ " add %[a],%[m] \n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b), [m] "r" (m)
+ );
+#else
+ a += b;
+ while( a >= m) a -= m;
+#endif
+ return a;
+}
+
+/// Subtract two numbers, and calculate the modulo
+/// of the difference and a third number, M.
+/// In other words, it returns (A-B) % M.
+/// It is designed as a compact mechanism for
+/// incrementing a 'mode' switch and wrapping
+/// around back to 'mode 0' when the switch
+/// goes past the end of the available range.
+/// e.g. if you have seven modes, this switches
+/// to the next one and wraps around if needed:
+/// mode = addmod8( mode, 1, 7);
+///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
+LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
+{
+#if defined(__AVR__)
+ asm volatile (
+ " sub %[a],%[b] \n\t"
+ "L_%=: sub %[a],%[m] \n\t"
+ " brcc L_%= \n\t"
+ " add %[a],%[m] \n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b), [m] "r" (m)
+ );
+#else
+ a -= b;
+ while( a >= m) a -= m;
+#endif
+ return a;
+}
+
+/// 8x8 bit multiplication, with 8 bit result
+LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
+{
+#if MUL8_C == 1
+ return ((uint16_t)i * (uint16_t)(j) ) & 0xFF;
+#elif MUL8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Extract the LOW 8-bits (r0) */
+ "mov %0, r0 \n\t"
+ /* Restore r1 to "0"; it's expected to always be that */
+ "clr __zero_reg__ \n\t"
+ : "+a" (i)
+ : "a" (j)
+ : "r0", "r1");
+
+ return i;
+#else
+#error "No implementation for mul8 available."
+#endif
+}
+
+
+/// saturating 8x8 bit multiplication, with 8 bit result
+/// @returns the product of i * j, capping at 0xFF
+LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
+{
+#if QMUL8_C == 1
+ int p = ((uint16_t)i * (uint16_t)(j) );
+ if( p > 255) p = 255;
+ return p;
+#elif QMUL8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+ " mul %0, %1 \n\t"
+ /* If high byte of result is zero, all is well. */
+ " tst r1 \n\t"
+ " breq Lnospill_%= \n\t"
+ /* If high byte of result > 0, saturate low byte to 0xFF */
+ " ldi %0,0xFF \n\t"
+ " rjmp Ldone_%= \n\t"
+ "Lnospill_%=: \n\t"
+ /* Extract the LOW 8-bits (r0) */
+ " mov %0, r0 \n\t"
+ "Ldone_%=: \n\t"
+ /* Restore r1 to "0"; it's expected to always be that */
+ " clr __zero_reg__ \n\t"
+ : "+a" (i)
+ : "a" (j)
+ : "r0", "r1");
+
+ return i;
+#else
+#error "No implementation for qmul8 available."
+#endif
+}
+
+
+/// take abs() of a signed 8-bit uint8_t
+LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
+{
+#if ABS8_C == 1
+ if( i < 0) i = -i;
+ return i;
+#elif ABS8_AVRASM == 1
+
+
+ asm volatile(
+ /* First, check the high bit, and prepare to skip if it's clear */
+ "sbrc %0, 7 \n"
+
+ /* Negate the value */
+ "neg %0 \n"
+
+ : "+r" (i) : "r" (i) );
+ return i;
+#else
+#error "No implementation for abs8 available."
+#endif
+}
+
+/// square root for 16-bit integers
+/// About three times faster and five times smaller
+/// than Arduino's general sqrt on AVR.
+LIB8STATIC uint8_t sqrt16(uint16_t x)
+{
+ if( x <= 1) {
+ return x;
+ }
+
+ uint8_t low = 1; // lower bound
+ uint8_t hi, mid;
+
+ if( x > 7904) {
+ hi = 255;
+ } else {
+ hi = (x >> 5) + 8; // initial estimate for upper bound
+ }
+
+ do {
+ mid = (low + hi) >> 1;
+ if ((uint16_t)(mid * mid) > x) {
+ hi = mid - 1;
+ } else {
+ if( mid == 255) {
+ return 255;
+ }
+ low = mid + 1;
+ }
+ } while (hi >= low);
+
+ return low - 1;
+}
+
+/// blend a variable proproportion(0-255) of one byte to another
+/// @param a - the starting byte value
+/// @param b - the byte value to blend toward
+/// @param amountOfB - the proportion (0-255) of b to blend
+/// @returns a byte value between a and b, inclusive
+#if (FASTLED_BLEND_FIXED == 1)
+LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
+{
+#if BLEND8_C == 1
+ uint16_t partial;
+ uint8_t result;
+
+ uint8_t amountOfA = 255 - amountOfB;
+
+ partial = (a * amountOfA);
+#if (FASTLED_SCALE8_FIXED == 1)
+ partial += a;
+ //partial = add8to16( a, partial);
+#endif
+
+ partial += (b * amountOfB);
+#if (FASTLED_SCALE8_FIXED == 1)
+ partial += b;
+ //partial = add8to16( b, partial);
+#endif
+
+ result = partial >> 8;
+
+ return result;
+
+#elif BLEND8_AVRASM == 1
+ uint16_t partial;
+ uint8_t result;
+
+ asm volatile (
+ /* partial = b * amountOfB */
+ " mul %[b], %[amountOfB] \n\t"
+ " movw %A[partial], r0 \n\t"
+
+ /* amountOfB (aka amountOfA) = 255 - amountOfB */
+ " com %[amountOfB] \n\t"
+
+ /* partial += a * amountOfB (aka amountOfA) */
+ " mul %[a], %[amountOfB] \n\t"
+
+ " add %A[partial], r0 \n\t"
+ " adc %B[partial], r1 \n\t"
+
+ " clr __zero_reg__ \n\t"
+
+#if (FASTLED_SCALE8_FIXED == 1)
+ /* partial += a */
+ " add %A[partial], %[a] \n\t"
+ " adc %B[partial], __zero_reg__ \n\t"
+
+ // partial += b
+ " add %A[partial], %[b] \n\t"
+ " adc %B[partial], __zero_reg__ \n\t"
+#endif
+
+ : [partial] "=r" (partial),
+ [amountOfB] "+a" (amountOfB)
+ : [a] "a" (a),
+ [b] "a" (b)
+ : "r0", "r1"
+ );
+
+ result = partial >> 8;
+
+ return result;
+
+#else
+#error "No implementation for blend8 available."
+#endif
+}
+
+#else
+LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
+{
+ // This version loses precision in the integer math
+ // and can actually return results outside of the range
+ // from a to b. Its use is not recommended.
+ uint8_t result;
+ uint8_t amountOfA = 255 - amountOfB;
+ result = scale8_LEAVING_R1_DIRTY( a, amountOfA)
+ + scale8_LEAVING_R1_DIRTY( b, amountOfB);
+ cleanup_R1();
+ return result;
+}
+#endif
+
+
+///@}
+#endif