Examples of C vs ASM optimisation

Dependencies:   mbed

main.cpp

Committer:
simon
Date:
2011-01-28
Revision:
2:56fbeac9ddc8
Parent:
1:eaf79a9d2939

File content as of revision 2:56fbeac9ddc8:

// sford, Examples of C-based optimisations of...
//
//   INTEGER TO BINARY by J.P. Armstrong
//   http://www.armtronics.com/
//  
//   Improved by Igor Skochinsky
//
//   PART OF CODE FROM:
//   http://mbed.org/cookbook/Assembly-Language

#include "mbed.h"

// This program will blink LED1 and LED4
// using assembly language for LED1 and
// API functions for LED4
// declare external assembly language function (in a *.s file)
extern "C" int binasm(int value);

#define L1 0x040000 
#define L2 0x100000
#define L3 0x200000
#define L4 0x800000

#define ALLLEDS (L1 | L2 | L3 | L4)

#define GPIO1BASE  0x2009C020
#define GPIOSETOFF 0x18
#define GPIOCLROFF 0x1C

const uint32_t masks[] = {L1, L2, L3, L4};

// the asm code, translated in to a more natural expressive C function
void binc(int value) {
    LPC_GPIO1->FIOCLR = ALLLEDS;
    value += 1;
    for(int i=0; i<4; i++) {
        if((value >> i) & 1) {
            LPC_GPIO1->FIOSET = masks[i];
        } else {
            LPC_GPIO1->FIOCLR = masks[i];
        }
    }         
}

// creating the mask values before writing them to registers
void binc2(int value) {
    value += 1;
    uint32_t set = 0;
    uint32_t clr = 0;
    
    for(int i=0; i<4; i++) {
        if((value >> i) & 1) {
            set = masks[i];
        } else {
            clr = masks[i];
        }
    }         
    LPC_GPIO1->FIOSET = set;
    LPC_GPIO1->FIOCLR = clr;
}

const uint32_t masks2[] = {
    L1,
    L2,
    L2 | L1,
    L3,
    L3 | L1,
    L3 | L2,
    L3 | L2 | L1,
    L4,
    L4 | L1,
    L4 | L2,
    L4 | L2 | L1,
    L4 | L3,
    L4 | L3 | L1,
    L4 | L3 | L2,
    L4 | L3 | L2 | L1
};

// using a full lookup for the masks (space vs time tradeoff)
void binc3(int value) {
    value += 1;
    uint32_t set = masks2[value];
    uint32_t clr = ~masks2[value] & ALLLEDS;
    LPC_GPIO1->FIOSET = set;
    LPC_GPIO1->FIOCLR = clr;
}

DigitalOut myled1(LED1);
DigitalOut myled2(LED2);
DigitalOut myled3(LED3);
DigitalOut myled4(LED4);

PortOut ledport(Port1, ALLLEDS);

// creating the mask values before writing them to registers
void binc6(int value) {
    uint32_t set = 0;    
    for(int i=0; i<4; i++) {
        if((value >> i) & 1) {
            set |= masks[i];
        } 
    }
    ledport = set;
}

void binc7(int value) {
    ledport = masks2[value];
}

// timing functions

Timer t;

void START(char *msg) { 
    printf("Timing %s\n", msg);
    t.start();              
    t.reset();
}

void STOP() {
    int v = t.read_us();
    printf(" - %d us\n", v);
}

#define LOOPS 100000

int main() {
    START("Assembly");
     for(int i=0; i<LOOPS; i++) {
         binasm(i % 16);
     }
    STOP();
    
    START("C translation");
    for(int i=0; i<LOOPS; i++) {
         binc(i % 16);
     }
    STOP();

    START("C reg-writes outside the loop");
    for(int i=0; i<LOOPS; i++) {
         binc2(i % 16);
     }
    STOP();

    START("C with mask lookup table");
    for(int i=0; i<LOOPS; i++) {
         binc3(i % 16);
    }
    STOP();

    START("C with mask lookup table, but caller loop optimised");
    for(int i=0; i<LOOPS; i++) {
         binc3(i & 0xF);
    }
    STOP();

    START("PortOut");
    for(int i=0; i<LOOPS; i++) {
         binc6(i % 0xF);
    }
    STOP();

    START("PortOut with mask loocup, but caller loop optimised");
    for(int i=0; i<LOOPS; i++) {
         binc7(i & 0xF);
    }
    STOP();
}