CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Committer:
simon
Date:
Thu Mar 10 15:07:50 2011 +0000
Revision:
0:1014af42efd9

        

Who changed what in which revision?

UserRevisionLine numberNew contents of line
simon 0:1014af42efd9 1 /* ----------------------------------------------------------------------
simon 0:1014af42efd9 2 * Copyright (C) 2010 ARM Limited. All rights reserved.
simon 0:1014af42efd9 3 *
simon 0:1014af42efd9 4 * $Date: 29. November 2010
simon 0:1014af42efd9 5 * $Revision: V1.0.3
simon 0:1014af42efd9 6 *
simon 0:1014af42efd9 7 * Project: CMSIS DSP Library
simon 0:1014af42efd9 8 * Title: arm_fir_q7.c
simon 0:1014af42efd9 9 *
simon 0:1014af42efd9 10 * Description: Q7 FIR filter processing function.
simon 0:1014af42efd9 11 *
simon 0:1014af42efd9 12 * Target Processor: Cortex-M4/Cortex-M3
simon 0:1014af42efd9 13 *
simon 0:1014af42efd9 14 * Version 1.0.3 2010/11/29
simon 0:1014af42efd9 15 * Re-organized the CMSIS folders and updated documentation.
simon 0:1014af42efd9 16 *
simon 0:1014af42efd9 17 * Version 1.0.2 2010/11/11
simon 0:1014af42efd9 18 * Documentation updated.
simon 0:1014af42efd9 19 *
simon 0:1014af42efd9 20 * Version 1.0.1 2010/10/05
simon 0:1014af42efd9 21 * Production release and review comments incorporated.
simon 0:1014af42efd9 22 *
simon 0:1014af42efd9 23 * Version 1.0.0 2010/09/20
simon 0:1014af42efd9 24 * Production release and review comments incorporated.
simon 0:1014af42efd9 25 *
simon 0:1014af42efd9 26 * Version 0.0.5 2010/04/26
simon 0:1014af42efd9 27 * incorporated review comments and updated with latest CMSIS layer
simon 0:1014af42efd9 28 *
simon 0:1014af42efd9 29 * Version 0.0.3 2010/03/10
simon 0:1014af42efd9 30 * Initial version
simon 0:1014af42efd9 31 * -------------------------------------------------------------------- */
simon 0:1014af42efd9 32
simon 0:1014af42efd9 33 #include "arm_math.h"
simon 0:1014af42efd9 34
simon 0:1014af42efd9 35 /**
simon 0:1014af42efd9 36 * @ingroup groupFilters
simon 0:1014af42efd9 37 */
simon 0:1014af42efd9 38
simon 0:1014af42efd9 39 /**
simon 0:1014af42efd9 40 * @addtogroup FIR
simon 0:1014af42efd9 41 * @{
simon 0:1014af42efd9 42 */
simon 0:1014af42efd9 43
simon 0:1014af42efd9 44 /**
simon 0:1014af42efd9 45 * @param[in] *S points to an instance of the Q7 FIR filter structure.
simon 0:1014af42efd9 46 * @param[in] *pSrc points to the block of input data.
simon 0:1014af42efd9 47 * @param[out] *pDst points to the block of output data.
simon 0:1014af42efd9 48 * @param[in] blockSize number of samples to process per call.
simon 0:1014af42efd9 49 * @return none.
simon 0:1014af42efd9 50 *
simon 0:1014af42efd9 51 * <b>Scaling and Overflow Behavior:</b>
simon 0:1014af42efd9 52 * \par
simon 0:1014af42efd9 53 * The function is implemented using a 32-bit internal accumulator.
simon 0:1014af42efd9 54 * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.
simon 0:1014af42efd9 55 * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.
simon 0:1014af42efd9 56 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
simon 0:1014af42efd9 57 * The accumulator is converted to 18.7 format by discarding the low 7 bits.
simon 0:1014af42efd9 58 * Finally, the result is truncated to 1.7 format.
simon 0:1014af42efd9 59 */
simon 0:1014af42efd9 60
simon 0:1014af42efd9 61 void arm_fir_q7(
simon 0:1014af42efd9 62 const arm_fir_instance_q7 * S,
simon 0:1014af42efd9 63 q7_t * pSrc,
simon 0:1014af42efd9 64 q7_t * pDst,
simon 0:1014af42efd9 65 uint32_t blockSize)
simon 0:1014af42efd9 66 {
simon 0:1014af42efd9 67 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
simon 0:1014af42efd9 68 uint32_t i, blkCnt; /* Loop counters */
simon 0:1014af42efd9 69 q7_t *pState = S->pState; /* State pointer */
simon 0:1014af42efd9 70 q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
simon 0:1014af42efd9 71 q7_t *px, *pb; /* Temporary pointers to state and coeff */
simon 0:1014af42efd9 72 q31_t acc = 0; /* Accumlator */
simon 0:1014af42efd9 73 q31_t input1, input2; /* Temporary variables to store input */
simon 0:1014af42efd9 74 q15_t in1, in2; /* Temporary variables to store input */
simon 0:1014af42efd9 75 q7_t *pStateCurnt; /* Points to the current sample of the state */
simon 0:1014af42efd9 76
simon 0:1014af42efd9 77
simon 0:1014af42efd9 78 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
simon 0:1014af42efd9 79 /* pStateCurnt points to the location where the new input data should be written */
simon 0:1014af42efd9 80 pStateCurnt = S->pState + (numTaps - 1u);
simon 0:1014af42efd9 81
simon 0:1014af42efd9 82 i = blockSize >> 2u;
simon 0:1014af42efd9 83
simon 0:1014af42efd9 84 /* Copy four new input samples into the state buffer.
simon 0:1014af42efd9 85 ** Use 32-bit SIMD to move the four 8-bit data. Only requires one copy for every four samples. */
simon 0:1014af42efd9 86 while(i > 0u)
simon 0:1014af42efd9 87 {
simon 0:1014af42efd9 88 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
simon 0:1014af42efd9 89 i--;
simon 0:1014af42efd9 90 }
simon 0:1014af42efd9 91
simon 0:1014af42efd9 92 i = blockSize % 0x4u;
simon 0:1014af42efd9 93
simon 0:1014af42efd9 94 /* Copy remining samples into the state buffer. */
simon 0:1014af42efd9 95 while(i > 0u)
simon 0:1014af42efd9 96 {
simon 0:1014af42efd9 97 *pStateCurnt++ = *pSrc++;
simon 0:1014af42efd9 98 i--;
simon 0:1014af42efd9 99 }
simon 0:1014af42efd9 100
simon 0:1014af42efd9 101 blkCnt = blockSize;
simon 0:1014af42efd9 102
simon 0:1014af42efd9 103 /* Perform filtering upto BlockSize - BlockSize%4 */
simon 0:1014af42efd9 104 while(blkCnt > 0u)
simon 0:1014af42efd9 105 {
simon 0:1014af42efd9 106 /* Set accumulator to zero */
simon 0:1014af42efd9 107 acc = 0;
simon 0:1014af42efd9 108
simon 0:1014af42efd9 109 /* Initialize state pointer of type q7 */
simon 0:1014af42efd9 110 px = pState;
simon 0:1014af42efd9 111
simon 0:1014af42efd9 112 /* Initialize coeff pointer of type q7 */
simon 0:1014af42efd9 113 pb = pCoeffs;
simon 0:1014af42efd9 114
simon 0:1014af42efd9 115 i = numTaps >> 2u;
simon 0:1014af42efd9 116
simon 0:1014af42efd9 117 /* Loop over the number of taps. Unroll by a factor of 4.
simon 0:1014af42efd9 118 ** Repeat until we've computed numTaps-4 coefficients. */
simon 0:1014af42efd9 119 while(i > 0u)
simon 0:1014af42efd9 120 {
simon 0:1014af42efd9 121 /* Reading two inputs of state buffer and packing */
simon 0:1014af42efd9 122 in1 = (q15_t) * px++;
simon 0:1014af42efd9 123 in2 = (q15_t) * px++;
simon 0:1014af42efd9 124 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
simon 0:1014af42efd9 125
simon 0:1014af42efd9 126 /* Reading two inputs of coefficient buffer and packing */
simon 0:1014af42efd9 127 in1 = (q15_t) * pb++;
simon 0:1014af42efd9 128 in2 = (q15_t) * pb++;
simon 0:1014af42efd9 129 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
simon 0:1014af42efd9 130
simon 0:1014af42efd9 131 /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */
simon 0:1014af42efd9 132 acc = __SMLAD(input1, input2, acc);
simon 0:1014af42efd9 133
simon 0:1014af42efd9 134 /* Reading two inputs of state buffer and packing */
simon 0:1014af42efd9 135 in1 = (q15_t) * px++;
simon 0:1014af42efd9 136 in2 = (q15_t) * px++;
simon 0:1014af42efd9 137 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
simon 0:1014af42efd9 138
simon 0:1014af42efd9 139 /* Reading two inputs of coefficient buffer and packing */
simon 0:1014af42efd9 140 in1 = (q15_t) * pb++;
simon 0:1014af42efd9 141 in2 = (q15_t) * pb++;
simon 0:1014af42efd9 142 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
simon 0:1014af42efd9 143
simon 0:1014af42efd9 144 /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */
simon 0:1014af42efd9 145 acc = __SMLAD(input1, input2, acc);
simon 0:1014af42efd9 146
simon 0:1014af42efd9 147 /* Decrement the tap loop counter */
simon 0:1014af42efd9 148 i--;
simon 0:1014af42efd9 149 }
simon 0:1014af42efd9 150
simon 0:1014af42efd9 151 i = numTaps % 0x4u;
simon 0:1014af42efd9 152
simon 0:1014af42efd9 153 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
simon 0:1014af42efd9 154 while(i > 0u)
simon 0:1014af42efd9 155 {
simon 0:1014af42efd9 156 acc = __SMLAD(*px++, *pb++, acc);
simon 0:1014af42efd9 157 i--;
simon 0:1014af42efd9 158
simon 0:1014af42efd9 159 }
simon 0:1014af42efd9 160
simon 0:1014af42efd9 161 /* Saturate output */
simon 0:1014af42efd9 162 acc = __SSAT((acc >> 7), 8);
simon 0:1014af42efd9 163
simon 0:1014af42efd9 164 /*Store filter output */
simon 0:1014af42efd9 165 *pDst++ = (q7_t) (acc);
simon 0:1014af42efd9 166
simon 0:1014af42efd9 167 /* Advance the state pointer by 1 to process the next sample */
simon 0:1014af42efd9 168 pState = pState + 1;
simon 0:1014af42efd9 169
simon 0:1014af42efd9 170 /* Decrement the loop counter */
simon 0:1014af42efd9 171 blkCnt--;
simon 0:1014af42efd9 172 }
simon 0:1014af42efd9 173
simon 0:1014af42efd9 174 /* Processing is complete.
simon 0:1014af42efd9 175 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
simon 0:1014af42efd9 176 ** This prepares the state buffer for the next function call. */
simon 0:1014af42efd9 177
simon 0:1014af42efd9 178 /* Points to the start of the state buffer */
simon 0:1014af42efd9 179 pStateCurnt = S->pState;
simon 0:1014af42efd9 180
simon 0:1014af42efd9 181 /* Calculation of count for copying integer writes */
simon 0:1014af42efd9 182 i = (numTaps - 1u) >> 2u;
simon 0:1014af42efd9 183
simon 0:1014af42efd9 184 /* Copy four values using integer pointer */
simon 0:1014af42efd9 185 while(i > 0u)
simon 0:1014af42efd9 186 {
simon 0:1014af42efd9 187 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
simon 0:1014af42efd9 188
simon 0:1014af42efd9 189 i--;
simon 0:1014af42efd9 190
simon 0:1014af42efd9 191 }
simon 0:1014af42efd9 192
simon 0:1014af42efd9 193 /* Calculation of count for remaining q7_t data */
simon 0:1014af42efd9 194 i = (numTaps - 1u) % 0x4u;
simon 0:1014af42efd9 195
simon 0:1014af42efd9 196 /* Copy of remaining q7_t data */
simon 0:1014af42efd9 197 while(i > 0u)
simon 0:1014af42efd9 198 {
simon 0:1014af42efd9 199 *pStateCurnt++ = *pState++;
simon 0:1014af42efd9 200 i--;
simon 0:1014af42efd9 201 }
simon 0:1014af42efd9 202
simon 0:1014af42efd9 203 }
simon 0:1014af42efd9 204
simon 0:1014af42efd9 205 /**
simon 0:1014af42efd9 206 * @} end of FIR group
simon 0:1014af42efd9 207 */