CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Committer:
simon
Date:
Thu Mar 10 15:07:50 2011 +0000
Revision:
0:1014af42efd9

        

Who changed what in which revision?

UserRevisionLine numberNew contents of line
simon 0:1014af42efd9 1 /* ----------------------------------------------------------------------
simon 0:1014af42efd9 2 * Copyright (C) 2010 ARM Limited. All rights reserved.
simon 0:1014af42efd9 3 *
simon 0:1014af42efd9 4 * $Date: 29. November 2010
simon 0:1014af42efd9 5 * $Revision: V1.0.3
simon 0:1014af42efd9 6 *
simon 0:1014af42efd9 7 * Project: CMSIS DSP Library
simon 0:1014af42efd9 8 * Title: arm_fir_sparse_q15.c
simon 0:1014af42efd9 9 *
simon 0:1014af42efd9 10 * Description: Q15 sparse FIR filter processing function.
simon 0:1014af42efd9 11 *
simon 0:1014af42efd9 12 * Target Processor: Cortex-M4/Cortex-M3
simon 0:1014af42efd9 13 *
simon 0:1014af42efd9 14 * Version 1.0.3 2010/11/29
simon 0:1014af42efd9 15 * Re-organized the CMSIS folders and updated documentation.
simon 0:1014af42efd9 16 *
simon 0:1014af42efd9 17 * Version 1.0.2 2010/11/11
simon 0:1014af42efd9 18 * Documentation updated.
simon 0:1014af42efd9 19 *
simon 0:1014af42efd9 20 * Version 1.0.1 2010/10/05
simon 0:1014af42efd9 21 * Production release and review comments incorporated.
simon 0:1014af42efd9 22 *
simon 0:1014af42efd9 23 * Version 1.0.0 2010/09/20
simon 0:1014af42efd9 24 * Production release and review comments incorporated
simon 0:1014af42efd9 25 *
simon 0:1014af42efd9 26 * Version 0.0.7 2010/06/10
simon 0:1014af42efd9 27 * Misra-C changes done
simon 0:1014af42efd9 28 * ------------------------------------------------------------------- */
simon 0:1014af42efd9 29 #include "arm_math.h"
simon 0:1014af42efd9 30
simon 0:1014af42efd9 31 /**
simon 0:1014af42efd9 32 * @addtogroup FIR_Sparse
simon 0:1014af42efd9 33 * @{
simon 0:1014af42efd9 34 */
simon 0:1014af42efd9 35
simon 0:1014af42efd9 36 /**
simon 0:1014af42efd9 37 * @brief Processing function for the Q15 sparse FIR filter.
simon 0:1014af42efd9 38 * @param[in] *S points to an instance of the Q15 sparse FIR structure.
simon 0:1014af42efd9 39 * @param[in] *pSrc points to the block of input data.
simon 0:1014af42efd9 40 * @param[out] *pDst points to the block of output data
simon 0:1014af42efd9 41 * @param[in] *pScratchIn points to a temporary buffer of size blockSize.
simon 0:1014af42efd9 42 * @param[in] *pScratchOut points to a temporary buffer of size blockSize.
simon 0:1014af42efd9 43 * @param[in] blockSize number of input samples to process per call.
simon 0:1014af42efd9 44 * @return none.
simon 0:1014af42efd9 45 *
simon 0:1014af42efd9 46 * <b>Scaling and Overflow Behavior:</b>
simon 0:1014af42efd9 47 * \par
simon 0:1014af42efd9 48 * The function is implemented using an internal 32-bit accumulator.
simon 0:1014af42efd9 49 * The 1.15 x 1.15 multiplications yield a 2.30 result and these are added to a 2.30 accumulator.
simon 0:1014af42efd9 50 * Thus the full precision of the multiplications is maintained but there is only a single guard bit in the accumulator.
simon 0:1014af42efd9 51 * If the accumulator result overflows it will wrap around rather than saturate.
simon 0:1014af42efd9 52 * After all multiply-accumulates are performed, the 2.30 accumulator is truncated to 2.15 format and then saturated to 1.15 format.
simon 0:1014af42efd9 53 * In order to avoid overflows the input signal or coefficients must be scaled down by log2(numTaps) bits.
simon 0:1014af42efd9 54 */
simon 0:1014af42efd9 55
simon 0:1014af42efd9 56
simon 0:1014af42efd9 57 void arm_fir_sparse_q15(
simon 0:1014af42efd9 58 arm_fir_sparse_instance_q15 * S,
simon 0:1014af42efd9 59 q15_t * pSrc,
simon 0:1014af42efd9 60 q15_t * pDst,
simon 0:1014af42efd9 61 q15_t * pScratchIn,
simon 0:1014af42efd9 62 q31_t * pScratchOut,
simon 0:1014af42efd9 63 uint32_t blockSize)
simon 0:1014af42efd9 64 {
simon 0:1014af42efd9 65
simon 0:1014af42efd9 66 q15_t *pState = S->pState; /* State pointer */
simon 0:1014af42efd9 67 q15_t *pIn = (q15_t *) pSrc; /* Working pointer for input */
simon 0:1014af42efd9 68 q15_t *pOut = pDst; /* Working pointer for output */
simon 0:1014af42efd9 69 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
simon 0:1014af42efd9 70 q15_t *px; /* Temporary pointers for scratch buffer */
simon 0:1014af42efd9 71 q15_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */
simon 0:1014af42efd9 72 q15_t *py = pState; /* Temporary pointers for state buffer */
simon 0:1014af42efd9 73 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */
simon 0:1014af42efd9 74 uint32_t delaySize = S->maxDelay + blockSize; /* state length */
simon 0:1014af42efd9 75 uint16_t numTaps = S->numTaps; /* Filter order */
simon 0:1014af42efd9 76 int32_t readIndex; /* Read index of the state buffer */
simon 0:1014af42efd9 77 uint32_t tapCnt, blkCnt; /* loop counters */
simon 0:1014af42efd9 78 q15_t coeff = *pCoeffs++; /* Read the first coefficient value */
simon 0:1014af42efd9 79 q31_t *pScr2 = pScratchOut; /* Working pointer for pScratchOut */
simon 0:1014af42efd9 80 q31_t in1, in2; /* Temporary variables */
simon 0:1014af42efd9 81
simon 0:1014af42efd9 82
simon 0:1014af42efd9 83
simon 0:1014af42efd9 84 /* BlockSize of Input samples are copied into the state buffer */
simon 0:1014af42efd9 85 /* StateIndex points to the starting position to write in the state buffer */
simon 0:1014af42efd9 86 arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize);
simon 0:1014af42efd9 87
simon 0:1014af42efd9 88 /* Loop over the number of taps. */
simon 0:1014af42efd9 89 tapCnt = numTaps;
simon 0:1014af42efd9 90
simon 0:1014af42efd9 91 /* Read Index, from where the state buffer should be read, is calculated. */
simon 0:1014af42efd9 92 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
simon 0:1014af42efd9 93
simon 0:1014af42efd9 94 /* Wraparound of readIndex */
simon 0:1014af42efd9 95 if(readIndex < 0)
simon 0:1014af42efd9 96 {
simon 0:1014af42efd9 97 readIndex += (int32_t) delaySize;
simon 0:1014af42efd9 98 }
simon 0:1014af42efd9 99
simon 0:1014af42efd9 100 /* Working pointer for state buffer is updated */
simon 0:1014af42efd9 101 py = pState;
simon 0:1014af42efd9 102
simon 0:1014af42efd9 103 /* blockSize samples are read from the state buffer */
simon 0:1014af42efd9 104 arm_circularRead_q15(py, delaySize, &readIndex, 1,
simon 0:1014af42efd9 105 pb, pb, blockSize, 1, blockSize);
simon 0:1014af42efd9 106
simon 0:1014af42efd9 107 /* Working pointer for the scratch buffer of state values */
simon 0:1014af42efd9 108 px = pb;
simon 0:1014af42efd9 109
simon 0:1014af42efd9 110 /* Working pointer for scratch buffer of output values */
simon 0:1014af42efd9 111 pScratchOut = pScr2;
simon 0:1014af42efd9 112
simon 0:1014af42efd9 113 /* Loop over the blockSize. Unroll by a factor of 4.
simon 0:1014af42efd9 114 * Compute 4 multiplications at a time. */
simon 0:1014af42efd9 115 blkCnt = blockSize >> 2;
simon 0:1014af42efd9 116
simon 0:1014af42efd9 117 while(blkCnt > 0u)
simon 0:1014af42efd9 118 {
simon 0:1014af42efd9 119 /* Perform multiplication and store in the scratch buffer */
simon 0:1014af42efd9 120 *pScratchOut++ = ((q31_t) * px++ * coeff);
simon 0:1014af42efd9 121 *pScratchOut++ = ((q31_t) * px++ * coeff);
simon 0:1014af42efd9 122 *pScratchOut++ = ((q31_t) * px++ * coeff);
simon 0:1014af42efd9 123 *pScratchOut++ = ((q31_t) * px++ * coeff);
simon 0:1014af42efd9 124
simon 0:1014af42efd9 125 /* Decrement the loop counter */
simon 0:1014af42efd9 126 blkCnt--;
simon 0:1014af42efd9 127 }
simon 0:1014af42efd9 128
simon 0:1014af42efd9 129 /* If the blockSize is not a multiple of 4,
simon 0:1014af42efd9 130 * compute the remaining samples */
simon 0:1014af42efd9 131 blkCnt = blockSize % 0x4u;
simon 0:1014af42efd9 132
simon 0:1014af42efd9 133 while(blkCnt > 0u)
simon 0:1014af42efd9 134 {
simon 0:1014af42efd9 135 /* Perform multiplication and store in the scratch buffer */
simon 0:1014af42efd9 136 *pScratchOut++ = ((q31_t) * px++ * coeff);
simon 0:1014af42efd9 137
simon 0:1014af42efd9 138 /* Decrement the loop counter */
simon 0:1014af42efd9 139 blkCnt--;
simon 0:1014af42efd9 140 }
simon 0:1014af42efd9 141
simon 0:1014af42efd9 142 /* Load the coefficient value and
simon 0:1014af42efd9 143 * increment the coefficient buffer for the next set of state values */
simon 0:1014af42efd9 144 coeff = *pCoeffs++;
simon 0:1014af42efd9 145
simon 0:1014af42efd9 146 /* Read Index, from where the state buffer should be read, is calculated. */
simon 0:1014af42efd9 147 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
simon 0:1014af42efd9 148
simon 0:1014af42efd9 149 /* Wraparound of readIndex */
simon 0:1014af42efd9 150 if(readIndex < 0)
simon 0:1014af42efd9 151 {
simon 0:1014af42efd9 152 readIndex += (int32_t) delaySize;
simon 0:1014af42efd9 153 }
simon 0:1014af42efd9 154
simon 0:1014af42efd9 155 /* Loop over the number of taps. */
simon 0:1014af42efd9 156 tapCnt = (uint32_t) numTaps - 1u;
simon 0:1014af42efd9 157
simon 0:1014af42efd9 158 while(tapCnt > 0u)
simon 0:1014af42efd9 159 {
simon 0:1014af42efd9 160 /* Working pointer for state buffer is updated */
simon 0:1014af42efd9 161 py = pState;
simon 0:1014af42efd9 162
simon 0:1014af42efd9 163 /* blockSize samples are read from the state buffer */
simon 0:1014af42efd9 164 arm_circularRead_q15(py, delaySize, &readIndex, 1,
simon 0:1014af42efd9 165 pb, pb, blockSize, 1, blockSize);
simon 0:1014af42efd9 166
simon 0:1014af42efd9 167 /* Working pointer for the scratch buffer of state values */
simon 0:1014af42efd9 168 px = pb;
simon 0:1014af42efd9 169
simon 0:1014af42efd9 170 /* Working pointer for scratch buffer of output values */
simon 0:1014af42efd9 171 pScratchOut = pScr2;
simon 0:1014af42efd9 172
simon 0:1014af42efd9 173 /* Loop over the blockSize. Unroll by a factor of 4.
simon 0:1014af42efd9 174 * Compute 4 MACS at a time. */
simon 0:1014af42efd9 175 blkCnt = blockSize >> 2;
simon 0:1014af42efd9 176
simon 0:1014af42efd9 177 while(blkCnt > 0u)
simon 0:1014af42efd9 178 {
simon 0:1014af42efd9 179 /* Perform Multiply-Accumulate */
simon 0:1014af42efd9 180 *pScratchOut++ += (q31_t) * px++ * coeff;
simon 0:1014af42efd9 181 *pScratchOut++ += (q31_t) * px++ * coeff;
simon 0:1014af42efd9 182 *pScratchOut++ += (q31_t) * px++ * coeff;
simon 0:1014af42efd9 183 *pScratchOut++ += (q31_t) * px++ * coeff;
simon 0:1014af42efd9 184
simon 0:1014af42efd9 185 /* Decrement the loop counter */
simon 0:1014af42efd9 186 blkCnt--;
simon 0:1014af42efd9 187 }
simon 0:1014af42efd9 188
simon 0:1014af42efd9 189 /* If the blockSize is not a multiple of 4,
simon 0:1014af42efd9 190 * compute the remaining samples */
simon 0:1014af42efd9 191 blkCnt = blockSize % 0x4u;
simon 0:1014af42efd9 192
simon 0:1014af42efd9 193 while(blkCnt > 0u)
simon 0:1014af42efd9 194 {
simon 0:1014af42efd9 195 /* Perform Multiply-Accumulate */
simon 0:1014af42efd9 196 *pScratchOut++ += (q31_t) * px++ * coeff;
simon 0:1014af42efd9 197
simon 0:1014af42efd9 198 /* Decrement the loop counter */
simon 0:1014af42efd9 199 blkCnt--;
simon 0:1014af42efd9 200 }
simon 0:1014af42efd9 201
simon 0:1014af42efd9 202 /* Load the coefficient value and
simon 0:1014af42efd9 203 * increment the coefficient buffer for the next set of state values */
simon 0:1014af42efd9 204 coeff = *pCoeffs++;
simon 0:1014af42efd9 205
simon 0:1014af42efd9 206 /* Read Index, from where the state buffer should be read, is calculated. */
simon 0:1014af42efd9 207 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
simon 0:1014af42efd9 208
simon 0:1014af42efd9 209 /* Wraparound of readIndex */
simon 0:1014af42efd9 210 if(readIndex < 0)
simon 0:1014af42efd9 211 {
simon 0:1014af42efd9 212 readIndex += (int32_t) delaySize;
simon 0:1014af42efd9 213 }
simon 0:1014af42efd9 214
simon 0:1014af42efd9 215 /* Decrement the tap loop counter */
simon 0:1014af42efd9 216 tapCnt--;
simon 0:1014af42efd9 217 }
simon 0:1014af42efd9 218
simon 0:1014af42efd9 219 /* All the output values are in pScratchOut buffer.
simon 0:1014af42efd9 220 Convert them into 1.15 format, saturate and store in the destination buffer. */
simon 0:1014af42efd9 221 /* Loop over the blockSize. */
simon 0:1014af42efd9 222 blkCnt = blockSize >> 2;
simon 0:1014af42efd9 223
simon 0:1014af42efd9 224 while(blkCnt > 0u)
simon 0:1014af42efd9 225 {
simon 0:1014af42efd9 226 in1 = *pScr2++;
simon 0:1014af42efd9 227 in2 = *pScr2++;
simon 0:1014af42efd9 228 *__SIMD32(pOut)++ =
simon 0:1014af42efd9 229 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16),
simon 0:1014af42efd9 230 16);
simon 0:1014af42efd9 231
simon 0:1014af42efd9 232 in1 = *pScr2++;
simon 0:1014af42efd9 233 in2 = *pScr2++;
simon 0:1014af42efd9 234 *__SIMD32(pOut)++ =
simon 0:1014af42efd9 235 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16),
simon 0:1014af42efd9 236 16);
simon 0:1014af42efd9 237
simon 0:1014af42efd9 238 blkCnt--;
simon 0:1014af42efd9 239
simon 0:1014af42efd9 240 }
simon 0:1014af42efd9 241
simon 0:1014af42efd9 242 /* If the blockSize is not a multiple of 4,
simon 0:1014af42efd9 243 remaining samples are processed in the below loop */
simon 0:1014af42efd9 244 blkCnt = blockSize % 0x4u;
simon 0:1014af42efd9 245
simon 0:1014af42efd9 246 while(blkCnt > 0u)
simon 0:1014af42efd9 247 {
simon 0:1014af42efd9 248 *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16);
simon 0:1014af42efd9 249 blkCnt--;
simon 0:1014af42efd9 250 }
simon 0:1014af42efd9 251 }
simon 0:1014af42efd9 252
simon 0:1014af42efd9 253 /**
simon 0:1014af42efd9 254 * @} end of FIR_Sparse group
simon 0:1014af42efd9 255 */