Test program for my Multi_WS2811 library that started out as a fork of heroic/WS2811. My library uses hardware DMA on the FRDM-KL25Z to drive up to 16 strings of WS2811 or WS2812 LEDs in parallel.

Dependencies:   Multi_WS2811 mbed MMA8451Q

Fork of WS2811 by Heroic Robotics

NOTE: I have accidentally pushed changes for another fork of this program that I used in the recent Georgetown Carnival Power Tool Races. When I get some time, I will restore the test program to its original glory.

You can see my power tool racer (Nevermore's Revenge) here

/media/uploads/bikeNomad/img_0482.jpg

This tests my FRDM-KL25Z multi-string WS2811/WS2812 library. It uses the accelerometer to change the rainbow phase on two strings of LEDs as well as the touch sense to change brightness.

A video of this program in operation is here.

Here is the library that I developed to run the LEDs:

Import libraryMulti_WS2811

Library allowing up to 16 strings of 60 WS2811 or WS2812 LEDs to be driven from a single FRDM-KL25Z board. Uses hardware DMA to do a full 800 KHz rate without much CPU burden.

Files at this revision

API Documentation at this revision

Comitter:
bikeNomad
Date:
Thu Jan 02 10:30:25 2014 +0000
Parent:
22:abfed71656bd
Child:
24:feb1dae0403a
Commit message:
smoothed animation; added frames/sec count (now at 341 for rainbow)

Changed in this revision

WS2811.cpp Show annotated file Show diff for this revision Revisions of this file
WS2811.h Show annotated file Show diff for this revision Revisions of this file
main.cpp Show annotated file Show diff for this revision Revisions of this file
--- a/WS2811.cpp	Thu Jan 02 02:31:24 2014 +0000
+++ b/WS2811.cpp	Thu Jan 02 10:30:25 2014 +0000
@@ -1,394 +1,399 @@
-// 800 KHz WS2811 driver driving potentially many LED strings.
-// Uses 3-phase DMA
-// 16K SRAM less stack, etc.
-//
-// Per LED: 3 bytes (malloc'd) for RGB data
-//
-// Per LED strip / per LED
-//          96 bytes (static) for bit data
-//        + 96 bytes (static) for ones data
-//        = 192 bytes
-//
-//        40 LEDs max per string = 7680 bytes static
-//
-//        40 LEDs: 7680 + 40*3 = 7800 bytes
-//        80 LEDs: 7680 + 80*3 = 7920 bytes
-
-#include "MKL25Z4.h"
-#include "LedStrip.h"
-#include "WS2811.h"
-
-//
-// Configuration
-//
-
-// Define MONITOR_TPM0_PWM as non-zero to monitor PWM timing on PTD0 and PTD1
-// PTD0 TPM0/CH0 PWM_1 J2/06
-// PTD1 TPM0/CH1 PWM_2 J2/12 (also LED_BLUE)
-#define MONITOR_TPM0_PWM 0
-
-// define DEBUG_PIN to identify a pin in PORTD used for debug output
-// #define DEBUG_PIN 3 /* PTD3 debugOut */
-
-#ifdef DEBUG_PIN
-#define DEBUG 1
-#endif
-
-#if DEBUG
-#define DEBUG_MASK (1<<DEBUG_PIN)
-#define RESET_DEBUG (IO_GPIO->PDOR &= ~DEBUG_MASK)
-#define SET_DEBUG (IO_GPIO->PDOR |= DEBUG_MASK)
-#else
-#define DEBUG_MASK 0
-#define RESET_DEBUG (void)0
-#define SET_DEBUG (void)0
-#endif
-
-static PORT_Type volatile * const IO_PORT = PORTD;
-static GPIO_Type volatile * const IO_GPIO = PTD;
-
-// 48 MHz clock, no prescaling.
-#define NSEC_TO_TICKS(nsec) ((nsec)*48/1000)
-static const uint32_t CLK_NSEC = 1250;
-static const uint32_t tpm_period    = NSEC_TO_TICKS(CLK_NSEC);
-static const uint32_t tpm_p0_period = NSEC_TO_TICKS(CLK_NSEC / 3);
-static const uint32_t tpm_p1_period = NSEC_TO_TICKS(CLK_NSEC * 2 / 3);
-
-enum DMA_MUX_SRC
-{
-    DMA_MUX_SRC_TPM0_CH_0     = 24,
-    DMA_MUX_SRC_TPM0_CH_1,
-    DMA_MUX_SRC_TPM0_Overflow = 54,
-};
-
-enum DMA_CHAN
-{
-    DMA_CHAN_START = 0,
-    DMA_CHAN_0_LOW = 1,
-    DMA_CHAN_1_LOW = 2,
-    N_DMA_CHANNELS
-};
-
-static volatile bool dma_done = true;
-
-// class static
-bool WS2811::initialized = false;
-
-// class static
-uint32_t WS2811::enabledPins = 0;
-
-#define WORD_ALIGNED __attribute__ ((aligned(4)))
-
-#define DMA_LEADING_ZEROS  2
-#define BITS_PER_RGB       24
-#define DMA_TRAILING_ZEROS 1
-
-static struct
-{
-    uint32_t start_t1_low[ DMA_LEADING_ZEROS ];
-    uint32_t dmaWords[ BITS_PER_RGB * MAX_LEDS_PER_STRIP ];
-    uint32_t trailing_zeros_1[ DMA_TRAILING_ZEROS ];
-
-    uint32_t start_t0_high[ DMA_LEADING_ZEROS - 1 ];
-    uint32_t allOnes[ BITS_PER_RGB * MAX_LEDS_PER_STRIP ];
-    uint32_t trailing_zeros_2[ DMA_TRAILING_ZEROS + 1 ];
-} dmaData WORD_ALIGNED;
-
-// class static 
-bool WS2811::is_dma_done()
-{
-    return dma_done;
-}
-
-// class static
-void WS2811::hw_init()
-{
-    if (initialized) return;
-
-    dma_data_init();
-    clock_init();
-    dma_init();
-    io_init();
-    tpm_init();
-
-    initialized = true;
-}
-
-// class static
-void WS2811::dma_data_init()
-{
-    memset(dmaData.allOnes, 0xFF, sizeof(dmaData.allOnes));
-
-#if DEBUG
-    for (unsigned i = 0; i < BITS_PER_RGB * MAX_LEDS_PER_STRIP; i++)
-        dmaData.dmaWords[i] = DEBUG_MASK;
-#endif
-}
-
-// class static
-
-/// Enable PORTD, DMA and TPM0 clocking
-void WS2811::clock_init()
-{
-    SIM->SCGC5 |= SIM_SCGC5_PORTD_MASK;
-    SIM->SCGC6 |= SIM_SCGC6_DMAMUX_MASK | SIM_SCGC6_TPM0_MASK; // Enable clock to DMA mux and TPM0
-    SIM->SCGC7 |= SIM_SCGC7_DMA_MASK;  // Enable clock to DMA
-
-    SIM->SOPT2 |= SIM_SOPT2_TPMSRC(1); // Clock source: MCGFLLCLK or MCGPLLCLK
-}
-
-// class static
-
-/// Configure GPIO output pins
-void WS2811::io_init()
-{
-    uint32_t m = 1;
-    for (uint32_t i = 0; i < 32; i++)
-    {
-        // set up each pin
-        if (m & enabledPins)
-        {
-            IO_PORT->PCR[i] = PORT_PCR_MUX(1) // GPIO
-                              | PORT_PCR_DSE_MASK; // high drive strength
-        }
-        m <<= 1;
-    }
-
-    IO_GPIO->PDDR |= enabledPins;      // set as outputs
-
-#if MONITOR_TPM0_PWM
-    // PTD0 CH0 monitor: TPM0, high drive strength
-    IO_PORT->PCR[0] = PORT_PCR_MUX(4) | PORT_PCR_DSE_MASK;
-    // PTD1 CH1 monitor: TPM0, high drive strength
-    IO_PORT->PCR[1] = PORT_PCR_MUX(4) | PORT_PCR_DSE_MASK;
-    IO_GPIO->PDDR  |= 3;               // set as outputs
-    IO_GPIO->PDOR &= ~(enabledPins | 3);     // initially low
-#else
-    IO_GPIO->PDOR &= ~enabledPins;     // initially low
-#endif
-
-#if DEBUG
-    IO_PORT->PCR[DEBUG_PIN] = PORT_PCR_MUX(1) | PORT_PCR_DSE_MASK;
-    IO_GPIO->PDDR |= DEBUG_MASK;
-    IO_GPIO->PDOR &= ~DEBUG_MASK;
-#endif
-}
-
-// class static
-
-/// Configure DMA and DMAMUX
-void WS2811::dma_init()
-{
-    // reset DMAMUX
-    DMAMUX0->CHCFG[DMA_CHAN_START] = 0;
-    DMAMUX0->CHCFG[DMA_CHAN_0_LOW] = 0;
-    DMAMUX0->CHCFG[DMA_CHAN_1_LOW] = 0;
-
-    // wire our DMA event sources into the first three DMA channels
-    // t=0: all enabled outputs go high on TPM0 overflow
-    DMAMUX0->CHCFG[DMA_CHAN_START] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_Overflow);
-    // t=tpm_p0_period: all of the 0 bits go low.
-    DMAMUX0->CHCFG[DMA_CHAN_0_LOW] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_CH_0);
-    // t=tpm_p1_period: all outputs go low.
-    DMAMUX0->CHCFG[DMA_CHAN_1_LOW] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_CH_1);
-
-    NVIC_EnableIRQ(DMA0_IRQn);
-}
-
-// class static
-
-/// Configure TPM0 to do two different PWM periods at 800kHz rate
-void WS2811::tpm_init()
-{
-    // set up TPM0 for proper period (800 kHz = 1.25 usec ±600nsec)
-    TPM_Type volatile *tpm = TPM0;
-    tpm->SC = TPM_SC_DMA_MASK          // enable DMA
-              | TPM_SC_CMOD(0)         // disable clocks
-              | TPM_SC_PS(0);          // 48MHz / 1 = 48MHz clock
-    tpm->MOD = tpm_period - 1;         // 48MHz / 800kHz
-
-    // No Interrupts; High True pulses on Edge Aligned PWM
-    tpm->CONTROLS[0].CnSC = TPM_CnSC_MSB_MASK | TPM_CnSC_ELSB_MASK | TPM_CnSC_DMA_MASK;
-    tpm->CONTROLS[1].CnSC = TPM_CnSC_MSB_MASK | TPM_CnSC_ELSB_MASK | TPM_CnSC_DMA_MASK;
-
-    // set TPM0 channel 0 for 0.35 usec (±150nsec) / 0.8 usec (±150nsec) (0 code)
-    // 1.25 usec * 1/3 = 417 nsec
-    tpm->CONTROLS[0].CnV = tpm_p0_period;
-
-    // set TPM0 channel 1 for 0.7 usec (±150nsec) / 0.6 usec (±150nsec) (1 code)
-    // 1.25 usec * 2/3 = 833 nsec
-    tpm->CONTROLS[1].CnV = tpm_p1_period;
-}
-
-WS2811::WS2811(unsigned n, unsigned pinNumber)
-    : LedStrip(n)
-    , pinMask(1U << pinNumber)
-{
-    enabledPins |= pinMask;
-    guardtime.start();
-}
-
-// class static
-void WS2811::startDMA()
-{
-    DMA_Type volatile * dma   = DMA0;
-    TPM_Type volatile *tpm   = TPM0;
-    uint32_t nBytes = sizeof(dmaData.start_t1_low)
-        + sizeof(dmaData.dmaWords)
-        + sizeof(dmaData.trailing_zeros_1);
-
-    tpm->SC &= ~TPM_SC_CMOD_MASK; // disable internal clocking
-    tpm->CNT = tpm_p0_period - 2 ;
-    tpm->STATUS = 0xFFFFFFFF;
-
-    dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
-    dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
-    dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
-
-    // t=0: all outputs go high
-    // triggered by TPM0_Overflow
-    // source is one word of 0 then 24 x 0xffffffff, then another 0 word
-    dma->DMA[DMA_CHAN_START].SAR     = (uint32_t)(void*)dmaData.start_t0_high;
-    dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
-
-    // t=tpm_p0_period: some outputs (the 0 bits) go low.
-    // Triggered by TPM0_CH0
-    // Start 2 words before the actual data to avoid garbage pulses.
-    dma->DMA[DMA_CHAN_0_LOW].SAR     = (uint32_t)(void*)dmaData.start_t1_low; // set source address
-    dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
-
-    // t=tpm_p1_period: all outputs go low.
-    // Triggered by TPM0_CH1
-    // source is constant 0x00000000 (first word of dmaWords)
-    dma->DMA[DMA_CHAN_1_LOW].SAR     = (uint32_t)(void*)dmaData.start_t1_low; // set source address
-    dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
-
-    dma->DMA[DMA_CHAN_0_LOW].DAR 
-        = dma->DMA[DMA_CHAN_1_LOW].DAR 
-        = dma->DMA[DMA_CHAN_START].DAR 
-        = (uint32_t)(void*)&IO_GPIO->PDOR;
-
-    // wait until done
-    while (!is_dma_done())
-    {
-        __WFI();
-    }
-
-    SET_DEBUG;
-
-    dma->DMA[DMA_CHAN_0_LOW].DCR     = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
-                                       | DMA_DCR_ERQ_MASK
-                                       | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
-                                       | DMA_DCR_SINC_MASK // increment source each transfer
-                                       | DMA_DCR_CS_MASK
-                                       | DMA_DCR_SSIZE(0) // 32-bit source transfers
-                                       | DMA_DCR_DSIZE(0); // 32-bit destination transfers
-
-    dma->DMA[DMA_CHAN_1_LOW].DCR     = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
-                                       | DMA_DCR_ERQ_MASK
-                                       | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
-                                       | DMA_DCR_CS_MASK
-                                       | DMA_DCR_SSIZE(0) // 32-bit source transfers
-                                       | DMA_DCR_DSIZE(0); // 32-bit destination transfers
-
-    dma->DMA[DMA_CHAN_START].DCR     = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
-                                       | DMA_DCR_ERQ_MASK
-                                       | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
-                                       | DMA_DCR_SINC_MASK // increment source each transfer
-                                       | DMA_DCR_CS_MASK
-                                       | DMA_DCR_SSIZE(0) // 32-bit source transfers
-                                       | DMA_DCR_DSIZE(0);
-
-
-    tpm->SC |= TPM_SC_CMOD(1);         // enable internal clocking
-}
-
-void WS2811::writePixel(unsigned n, uint8_t *p)
-{
-    uint32_t *dest = dmaData.dmaWords + n * BITS_PER_RGB;
-    writeByte(*p++, pinMask, dest + 0); // G
-    writeByte(*p++, pinMask, dest + 8); // R
-    writeByte(*p, pinMask, dest + 16); // B
-}
-
-// class static
-void WS2811::writeByte(uint8_t byte, uint32_t mask, uint32_t *dest)
-{
-    for (uint8_t bm = 0x80; bm; bm >>= 1)
-    {
-        // MSBit first
-        if (byte & bm)
-            *dest |= mask;
-        else
-            *dest &= ~mask;
-        dest++;
-    }
-}
-
-void WS2811::begin()
-{
-    blank();
-    show();
-}
-
-void WS2811::blank()
-{
-    memset(pixels, 0x00, numPixelBytes());
-
-#if DEBUG
-    for (unsigned i = DMA_LEADING_ZEROS; i < DMA_LEADING_ZEROS + BITS_PER_RGB; i++)
-            dmaData.dmaWords[i] = DEBUG_MASK;
-#else
-    memset(dmaData.dmaWords, 0x00, sizeof(dmaData.dmaWords));
-#endif
-}
-
-void WS2811::show()
-{
-    hw_init();
-
-    uint16_t i, n = numPixels(); // 3 bytes per LED
-    uint8_t *p = pixels;
-    while (guardtime.read_us() < 50)
-    {
-        __NOP();
-    }
-
-    for (i=0; i<n; i++ )
-    {
-        writePixel(i, p);
-        p += 3;
-    }
-
-    startDMA();
-
-    guardtime.reset();
-}
-
-extern "C" void DMA0_IRQHandler()
-{
-    DMA_Type volatile * dma = DMA0;
-    TPM_Type volatile *tpm   = TPM0;
-
-    uint32_t db = dma->DMA[DMA_CHAN_START].DSR_BCR;
-    if (db & DMA_DSR_BCR_DONE_MASK)
-    {
-        dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_DONE_MASK;  // clear/reset DMA status
-    }
-
-    db = dma->DMA[DMA_CHAN_0_LOW].DSR_BCR;
-    if (db & DMA_DSR_BCR_DONE_MASK)
-    {
-        dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK;  // clear/reset DMA status
-    }
-
-    db = dma->DMA[DMA_CHAN_1_LOW].DSR_BCR;
-    if (db & DMA_DSR_BCR_DONE_MASK)
-    {
-        dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
-        dma_done                         = true;
-        tpm->SC &= ~TPM_SC_CMOD_MASK; // disable internal clocking
-        RESET_DEBUG;
-    }
-}
+// 800 KHz WS2811 driver driving potentially many LED strings.
+// Uses 3-phase DMA
+// 16K SRAM less stack, etc.
+//
+// Per LED: 3 bytes (malloc'd) for RGB data
+//
+// Per LED strip / per LED
+//          96 bytes (static) for bit data
+//        + 96 bytes (static) for ones data
+//        = 192 bytes
+//
+//        40 LEDs max per string = 7680 bytes static
+//
+//        40 LEDs: 7680 + 40*3 = 7800 bytes
+//        80 LEDs: 7680 + 80*3 = 7920 bytes
+
+#include "MKL25Z4.h"
+#include "LedStrip.h"
+#include "WS2811.h"
+
+//
+// Configuration
+//
+
+// Define MONITOR_TPM0_PWM as non-zero to monitor PWM timing on PTD0 and PTD1
+// PTD0 TPM0/CH0 PWM_1 J2/06
+// PTD1 TPM0/CH1 PWM_2 J2/12 (also LED_BLUE)
+#define MONITOR_TPM0_PWM 0
+
+// define DEBUG_PIN to identify a pin in PORTD used for debug output
+// #define DEBUG_PIN 3 /* PTD3 debugOut */
+
+#ifdef DEBUG_PIN
+#define DEBUG 1
+#endif
+
+#if DEBUG
+#define DEBUG_MASK (1<<DEBUG_PIN)
+#define RESET_DEBUG (IO_GPIO->PDOR &= ~DEBUG_MASK)
+#define SET_DEBUG (IO_GPIO->PDOR |= DEBUG_MASK)
+#else
+#define DEBUG_MASK 0
+#define RESET_DEBUG (void)0
+#define SET_DEBUG (void)0
+#endif
+
+static PORT_Type volatile * const IO_PORT = PORTD;
+static GPIO_Type volatile * const IO_GPIO = PTD;
+
+// 48 MHz clock, no prescaling.
+#define NSEC_TO_TICKS(nsec) ((nsec)*48/1000)
+static const uint32_t CLK_NSEC = 1250;
+static const uint32_t tpm_period    = NSEC_TO_TICKS(CLK_NSEC);
+static const uint32_t tpm_p0_period = NSEC_TO_TICKS(CLK_NSEC / 3);
+static const uint32_t tpm_p1_period = NSEC_TO_TICKS(CLK_NSEC * 2 / 3);
+
+enum DMA_MUX_SRC
+{
+    DMA_MUX_SRC_TPM0_CH_0     = 24,
+    DMA_MUX_SRC_TPM0_CH_1,
+    DMA_MUX_SRC_TPM0_Overflow = 54,
+};
+
+enum DMA_CHAN
+{
+    DMA_CHAN_START = 0,
+    DMA_CHAN_0_LOW = 1,
+    DMA_CHAN_1_LOW = 2,
+    N_DMA_CHANNELS
+};
+
+static volatile bool dma_done = true;
+
+// class static
+bool WS2811::initialized = false;
+
+// class static
+Timer WS2811::guardtime;
+
+// class static
+uint32_t WS2811::enabledPins = 0;
+
+#define WORD_ALIGNED __attribute__ ((aligned(4)))
+
+#define DMA_LEADING_ZEROS  2
+#define BITS_PER_RGB       24
+#define DMA_TRAILING_ZEROS 1
+
+static struct
+{
+    uint32_t start_t1_low[ DMA_LEADING_ZEROS ];
+    uint32_t dmaWords[ BITS_PER_RGB * MAX_LEDS_PER_STRIP ];
+    uint32_t trailing_zeros_1[ DMA_TRAILING_ZEROS ];
+
+    uint32_t start_t0_high[ DMA_LEADING_ZEROS - 1 ];
+    uint32_t allOnes[ BITS_PER_RGB * MAX_LEDS_PER_STRIP ];
+    uint32_t trailing_zeros_2[ DMA_TRAILING_ZEROS + 1 ];
+} dmaData WORD_ALIGNED;
+
+// class static 
+bool WS2811::is_dma_done()
+{
+    return dma_done;
+}
+
+// class static
+void WS2811::hw_init()
+{
+    if (initialized) return;
+
+    guardtime.start();
+
+    dma_data_init();
+    clock_init();
+    dma_init();
+    io_init();
+    tpm_init();
+
+    initialized = true;
+}
+
+// class static
+void WS2811::dma_data_init()
+{
+    memset(dmaData.allOnes, 0xFF, sizeof(dmaData.allOnes));
+
+#if DEBUG
+    for (unsigned i = 0; i < BITS_PER_RGB * MAX_LEDS_PER_STRIP; i++)
+        dmaData.dmaWords[i] = DEBUG_MASK;
+#endif
+}
+
+// class static
+
+/// Enable PORTD, DMA and TPM0 clocking
+void WS2811::clock_init()
+{
+    SIM->SCGC5 |= SIM_SCGC5_PORTD_MASK;
+    SIM->SCGC6 |= SIM_SCGC6_DMAMUX_MASK | SIM_SCGC6_TPM0_MASK; // Enable clock to DMA mux and TPM0
+    SIM->SCGC7 |= SIM_SCGC7_DMA_MASK;  // Enable clock to DMA
+
+    SIM->SOPT2 |= SIM_SOPT2_TPMSRC(1); // Clock source: MCGFLLCLK or MCGPLLCLK
+}
+
+// class static
+
+/// Configure GPIO output pins
+void WS2811::io_init()
+{
+    uint32_t m = 1;
+    for (uint32_t i = 0; i < 32; i++)
+    {
+        // set up each pin
+        if (m & enabledPins)
+        {
+            IO_PORT->PCR[i] = PORT_PCR_MUX(1) // GPIO
+                              | PORT_PCR_DSE_MASK; // high drive strength
+        }
+        m <<= 1;
+    }
+
+    IO_GPIO->PDDR |= enabledPins;      // set as outputs
+
+#if MONITOR_TPM0_PWM
+    // PTD0 CH0 monitor: TPM0, high drive strength
+    IO_PORT->PCR[0] = PORT_PCR_MUX(4) | PORT_PCR_DSE_MASK;
+    // PTD1 CH1 monitor: TPM0, high drive strength
+    IO_PORT->PCR[1] = PORT_PCR_MUX(4) | PORT_PCR_DSE_MASK;
+    IO_GPIO->PDDR  |= 3;               // set as outputs
+    IO_GPIO->PDOR &= ~(enabledPins | 3);     // initially low
+#else
+    IO_GPIO->PDOR &= ~enabledPins;     // initially low
+#endif
+
+#if DEBUG
+    IO_PORT->PCR[DEBUG_PIN] = PORT_PCR_MUX(1) | PORT_PCR_DSE_MASK;
+    IO_GPIO->PDDR |= DEBUG_MASK;
+    IO_GPIO->PDOR &= ~DEBUG_MASK;
+#endif
+}
+
+// class static
+
+/// Configure DMA and DMAMUX
+void WS2811::dma_init()
+{
+    // reset DMAMUX
+    DMAMUX0->CHCFG[DMA_CHAN_START] = 0;
+    DMAMUX0->CHCFG[DMA_CHAN_0_LOW] = 0;
+    DMAMUX0->CHCFG[DMA_CHAN_1_LOW] = 0;
+
+    // wire our DMA event sources into the first three DMA channels
+    // t=0: all enabled outputs go high on TPM0 overflow
+    DMAMUX0->CHCFG[DMA_CHAN_START] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_Overflow);
+    // t=tpm_p0_period: all of the 0 bits go low.
+    DMAMUX0->CHCFG[DMA_CHAN_0_LOW] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_CH_0);
+    // t=tpm_p1_period: all outputs go low.
+    DMAMUX0->CHCFG[DMA_CHAN_1_LOW] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_CH_1);
 
+    NVIC_EnableIRQ(DMA0_IRQn);
+}
+
+// class static
+
+/// Configure TPM0 to do two different PWM periods at 800kHz rate
+void WS2811::tpm_init()
+{
+    // set up TPM0 for proper period (800 kHz = 1.25 usec ±600nsec)
+    TPM_Type volatile *tpm = TPM0;
+    tpm->SC = TPM_SC_DMA_MASK          // enable DMA
+              | TPM_SC_CMOD(0)         // disable clocks
+              | TPM_SC_PS(0);          // 48MHz / 1 = 48MHz clock
+    tpm->MOD = tpm_period - 1;         // 48MHz / 800kHz
+
+    // No Interrupts; High True pulses on Edge Aligned PWM
+    tpm->CONTROLS[0].CnSC = TPM_CnSC_MSB_MASK | TPM_CnSC_ELSB_MASK | TPM_CnSC_DMA_MASK;
+    tpm->CONTROLS[1].CnSC = TPM_CnSC_MSB_MASK | TPM_CnSC_ELSB_MASK | TPM_CnSC_DMA_MASK;
+
+    // set TPM0 channel 0 for 0.35 usec (±150nsec) / 0.8 usec (±150nsec) (0 code)
+    // 1.25 usec * 1/3 = 417 nsec
+    tpm->CONTROLS[0].CnV = tpm_p0_period;
+
+    // set TPM0 channel 1 for 0.7 usec (±150nsec) / 0.6 usec (±150nsec) (1 code)
+    // 1.25 usec * 2/3 = 833 nsec
+    tpm->CONTROLS[1].CnV = tpm_p1_period;
+}
+
+WS2811::WS2811(unsigned n, unsigned pinNumber)
+    : LedStrip(n)
+    , pinMask(1U << pinNumber)
+{
+    enabledPins |= pinMask;
+}
+
+// class static
+void WS2811::startDMA()
+{
+    DMA_Type volatile * dma   = DMA0;
+    TPM_Type volatile *tpm   = TPM0;
+    uint32_t nBytes = sizeof(dmaData.start_t1_low)
+        + sizeof(dmaData.dmaWords)
+        + sizeof(dmaData.trailing_zeros_1);
+
+    tpm->SC &= ~TPM_SC_CMOD_MASK; // disable internal clocking
+    tpm->CNT = tpm_p0_period - 2 ;
+    tpm->STATUS = 0xFFFFFFFF;
+
+    dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+    dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+    dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+
+    // t=0: all outputs go high
+    // triggered by TPM0_Overflow
+    // source is one word of 0 then 24 x 0xffffffff, then another 0 word
+    dma->DMA[DMA_CHAN_START].SAR     = (uint32_t)(void*)dmaData.start_t0_high;
+    dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
+
+    // t=tpm_p0_period: some outputs (the 0 bits) go low.
+    // Triggered by TPM0_CH0
+    // Start 2 words before the actual data to avoid garbage pulses.
+    dma->DMA[DMA_CHAN_0_LOW].SAR     = (uint32_t)(void*)dmaData.start_t1_low; // set source address
+    dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
+
+    // t=tpm_p1_period: all outputs go low.
+    // Triggered by TPM0_CH1
+    // source is constant 0x00000000 (first word of dmaWords)
+    dma->DMA[DMA_CHAN_1_LOW].SAR     = (uint32_t)(void*)dmaData.start_t1_low; // set source address
+    dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
+
+    dma->DMA[DMA_CHAN_0_LOW].DAR 
+        = dma->DMA[DMA_CHAN_1_LOW].DAR 
+        = dma->DMA[DMA_CHAN_START].DAR 
+        = (uint32_t)(void*)&IO_GPIO->PDOR;
+
+    // wait until done
+    while (!is_dma_done())
+    {
+        __WFI();
+    }
+    
+    // ensure sufficient guard time
+    while (guardtime.read_us() < 50)
+    {
+        __NOP();
+    }
+
+    SET_DEBUG;
+
+    dma->DMA[DMA_CHAN_0_LOW].DCR     = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
+                                       | DMA_DCR_ERQ_MASK
+                                       | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
+                                       | DMA_DCR_SINC_MASK // increment source each transfer
+                                       | DMA_DCR_CS_MASK
+                                       | DMA_DCR_SSIZE(0) // 32-bit source transfers
+                                       | DMA_DCR_DSIZE(0); // 32-bit destination transfers
+
+    dma->DMA[DMA_CHAN_1_LOW].DCR     = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
+                                       | DMA_DCR_ERQ_MASK
+                                       | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
+                                       | DMA_DCR_CS_MASK
+                                       | DMA_DCR_SSIZE(0) // 32-bit source transfers
+                                       | DMA_DCR_DSIZE(0); // 32-bit destination transfers
+
+    dma->DMA[DMA_CHAN_START].DCR     = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
+                                       | DMA_DCR_ERQ_MASK
+                                       | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
+                                       | DMA_DCR_SINC_MASK // increment source each transfer
+                                       | DMA_DCR_CS_MASK
+                                       | DMA_DCR_SSIZE(0) // 32-bit source transfers
+                                       | DMA_DCR_DSIZE(0);
+
+
+    tpm->SC |= TPM_SC_CMOD(1);         // enable internal clocking
+}
+
+void WS2811::writePixel(unsigned n, uint8_t *p)
+{
+    uint32_t *dest = dmaData.dmaWords + n * BITS_PER_RGB;
+    writeByte(*p++, pinMask, dest + 0); // G
+    writeByte(*p++, pinMask, dest + 8); // R
+    writeByte(*p, pinMask, dest + 16); // B
+}
+
+// class static
+void WS2811::writeByte(uint8_t byte, uint32_t mask, uint32_t *dest)
+{
+    for (uint8_t bm = 0x80; bm; bm >>= 1)
+    {
+        // MSBit first
+        if (byte & bm)
+            *dest |= mask;
+        else
+            *dest &= ~mask;
+        dest++;
+    }
+}
+
+void WS2811::begin()
+{
+    blank();
+    show();
+}
+
+void WS2811::blank()
+{
+    memset(pixels, 0x00, numPixelBytes());
+
+#if DEBUG
+    for (unsigned i = DMA_LEADING_ZEROS; i < DMA_LEADING_ZEROS + BITS_PER_RGB; i++)
+            dmaData.dmaWords[i] = DEBUG_MASK;
+#else
+    memset(dmaData.dmaWords, 0x00, sizeof(dmaData.dmaWords));
+#endif
+}
+
+void WS2811::show()
+{
+    hw_init();
+
+    uint16_t i, n = numPixels(); // 3 bytes per LED
+    uint8_t *p = pixels;
+
+    for (i=0; i<n; i++ )
+    {
+        writePixel(i, p);
+        p += 3;
+    }
+
+    startDMA();
+}
+
+extern "C" void DMA0_IRQHandler()
+{
+    DMA_Type volatile * dma = DMA0;
+    TPM_Type volatile *tpm   = TPM0;
+
+    uint32_t db = dma->DMA[DMA_CHAN_START].DSR_BCR;
+    if (db & DMA_DSR_BCR_DONE_MASK)
+    {
+        dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_DONE_MASK;  // clear/reset DMA status
+    }
+
+    db = dma->DMA[DMA_CHAN_0_LOW].DSR_BCR;
+    if (db & DMA_DSR_BCR_DONE_MASK)
+    {
+        dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK;  // clear/reset DMA status
+    }
+
+    db = dma->DMA[DMA_CHAN_1_LOW].DSR_BCR;
+    if (db & DMA_DSR_BCR_DONE_MASK)
+    {
+        dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+        dma_done                         = true;
+        tpm->SC &= ~TPM_SC_CMOD_MASK; // disable internal clocking
+        RESET_DEBUG;
+        WS2811::guardtime.reset();
+    }
+}
+
--- a/WS2811.h	Thu Jan 02 02:31:24 2014 +0000
+++ b/WS2811.h	Thu Jan 02 10:30:25 2014 +0000
@@ -21,6 +21,8 @@
 
 #define MAX_LEDS_PER_STRIP 60
 
+extern "C" void DMA0_IRQHandler();
+
 class WS2811 : public LedStrip
 {
 public:
@@ -30,8 +32,10 @@
     virtual void show();
     virtual void blank();
 
+    static void startDMA();
+    static bool is_dma_done();
+
 private:
-    Timer guardtime;
     uint32_t pinMask;
 
     void writePixel(unsigned n, uint8_t *p);
@@ -40,11 +44,9 @@
 
     static bool initialized;
     static uint32_t enabledPins;
+    static Timer guardtime;
 
     static void writeByte(uint8_t byte, uint32_t mask, uint32_t *dest);
-    static void startDMA();
-
-    static bool is_dma_done();
 
     static void hw_init();
         static void io_init();
@@ -52,6 +54,8 @@
         static void dma_init();
         static void tpm_init();
         static void dma_data_init();
+        
+    friend void DMA0_IRQHandler();
 };
 
 #endif
--- a/main.cpp	Thu Jan 02 02:31:24 2014 +0000
+++ b/main.cpp	Thu Jan 02 10:30:25 2014 +0000
@@ -2,8 +2,8 @@
 #include "WS2811.h"
 #include "Colors.h"
 
-// per LED: 3x 20 mA = 60mA
-// 4x full brightness = 0.33A at 5V.
+// per LED: 3 * 20 mA = 60mA max
+// 60 LEDs: 60 * 60mA = 3600 mA max
 unsigned const nLEDs = MAX_LEDS_PER_STRIP;
 
 // I/O pin usage
@@ -33,42 +33,31 @@
     strip.show();
 }
 
-static void showTestPattern(WS2811 &strip)
-{
-    unsigned nLEDs = strip.numPixels();
-    for (unsigned i = 0; i < nLEDs; i++)
-    {
-        strip.setPixelColor(i, LedStrip::Color(0xff,0,0xAA));
-    }
-    strip.show();
-}
-
 int main(void)
 {
     pc.baud(115200);
-    pc.printf("\r\nLEDs: %d\r\n", nLEDs);
-
     WS2811 lightStrip(nLEDs, DATA_OUT_PIN);
 
     lightStrip.begin();
 
-    float sat = 1.0;    // color saturation, 0.0 to 1.0
-    float brite = 0.1;  // 1.0 is full brightness, which is VERY bright!
-    float hueShift = 0.0;
-    float hueShiftIncrement = 1.0 / (360.0 / nLEDs);
+    float brite = 0.2;  // 1.0 is full brightness, which is VERY bright!
+    float rainbowPeriod = 1 * 1.0e6; // usec
+    float sat = 1.0;
 
-    Timer advance;
-    advance.start();
+    Timer timeRunning;
+    timeRunning.start();
+    bool printed = false;
+    unsigned frames = 0;
 
     for (;;) {
-        advance.reset();
-
-        // showTestPattern(lightStrip);
+        unsigned running = timeRunning.read_us();
+        float hueShift = running / rainbowPeriod;
+        if (!printed && running >= 10000000U) {
+            pc.printf("%u frames in %u usec = %u frames / sec\r\n", frames, running, frames * 1000000 / running);
+            printed = true;
+        }
         showRainbow(lightStrip, sat, brite, hueShift);
-        hueShift += hueShiftIncrement;
-
-        while (advance.read_ms() < 100)
-            __NOP();
-   }
+        frames ++;
+    }
 }