CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
Diff: cmsis_dsp/FilteringFunctions/arm_conv_fast_q15.c
- Revision:
- 2:da51fb522205
- Parent:
- 1:fdd22bb7aa52
- Child:
- 3:7a284390b0ce
--- a/cmsis_dsp/FilteringFunctions/arm_conv_fast_q15.c Wed Nov 28 12:30:09 2012 +0000 +++ b/cmsis_dsp/FilteringFunctions/arm_conv_fast_q15.c Thu May 30 17:10:11 2013 +0100 @@ -2,12 +2,12 @@ * Copyright (C) 2010 ARM Limited. All rights reserved. * * $Date: 15. February 2012 -* $Revision: V1.1.0 +* $Revision: V1.1.0 * -* Project: CMSIS DSP Library -* Title: arm_conv_fast_q15.c +* Project: CMSIS DSP Library +* Title: arm_conv_fast_q15.c * -* Description: Fast Q15 Convolution. +* Description: Fast Q15 Convolution. * * Target Processor: Cortex-M4/Cortex-M3 * @@ -296,7 +296,7 @@ x0 = *__SIMD32(px); /* read x[1], x[2] samples */ x1 = _SIMD32_OFFSET(px+1); - px+= 2u; + px+= 2u; /* Apply loop unrolling and compute 4 MACs simultaneously. */ @@ -342,7 +342,7 @@ /* Read x[5], x[6] */ x1 = _SIMD32_OFFSET(px+3); - px += 4u; + px += 4u; /* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */ acc2 = __SMLADX(x0, c0, acc2); @@ -376,7 +376,7 @@ /* Read x[7] */ x3 = *__SIMD32(px); - px++; + px++; /* Perform the multiply-accumulates */ acc0 = __SMLAD(x0, c0, acc0); @@ -395,7 +395,7 @@ /* Read x[9] */ x2 = _SIMD32_OFFSET(px+1); - px += 2u; + px += 2u; /* Perform the multiply-accumulates */ acc0 = __SMLADX(x0, c0, acc0); @@ -422,7 +422,7 @@ acc3 = __SMLADX(x2, c0, acc3); /* Read y[srcBLen - 7] */ - c0 = *(py-1); + c0 = *(py-1); #ifdef ARM_MATH_BIG_ENDIAN c0 = c0 << 16u; @@ -433,7 +433,7 @@ /* Read x[10] */ x3 = _SIMD32_OFFSET(px+2); - px += 3u; + px += 3u; /* Perform the multiply-accumulates */ acc0 = __SMLADX(x1, c0, acc0); @@ -809,7 +809,7 @@ /* First part of the processing with loop unrolling. Compute 4 MACs at a time. ** a second loop below computes MACs for the remaining 1 to 3 samples. */ - py++; + py++; while(k > 0u) { @@ -891,25 +891,25 @@ acc0 = 0; acc1 = 0; acc2 = 0; - acc3 = 0; + acc3 = 0; /* read x[0], x[1] samples */ - a = *px++; - b = *px++; + a = *px++; + b = *px++; #ifndef ARM_MATH_BIG_ENDIAN - - x0 = __PKHBT(a, b, 16); - a = *px; - x1 = __PKHBT(b, a, 16); + + x0 = __PKHBT(a, b, 16); + a = *px; + x1 = __PKHBT(b, a, 16); #else - x0 = __PKHBT(b, a, 16); - a = *px; - x1 = __PKHBT(a, b, 16); + x0 = __PKHBT(b, a, 16); + a = *px; + x1 = __PKHBT(a, b, 16); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2u; @@ -920,19 +920,19 @@ { /* Read the last two inputB samples using SIMD: * y[srcBLen - 1] and y[srcBLen - 2] */ - a = *py; - b = *(py+1); - py -= 2; + a = *py; + b = *(py+1); + py -= 2; #ifndef ARM_MATH_BIG_ENDIAN - c0 = __PKHBT(a, b, 16); + c0 = __PKHBT(a, b, 16); #else - c0 = __PKHBT(b, a, 16);; + c0 = __PKHBT(b, a, 16);; -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* acc0 += x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2] */ acc0 = __SMLADX(x0, c0, acc0); @@ -940,22 +940,22 @@ /* acc1 += x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2] */ acc1 = __SMLADX(x1, c0, acc1); - a = *px; - b = *(px + 1); + a = *px; + b = *(px + 1); #ifndef ARM_MATH_BIG_ENDIAN - - x2 = __PKHBT(a, b, 16); - a = *(px + 2); - x3 = __PKHBT(b, a, 16); + + x2 = __PKHBT(a, b, 16); + a = *(px + 2); + x3 = __PKHBT(b, a, 16); #else - x2 = __PKHBT(b, a, 16); - a = *(px + 2); - x3 = __PKHBT(a, b, 16); + x2 = __PKHBT(b, a, 16); + a = *(px + 2); + x3 = __PKHBT(a, b, 16); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* acc2 += x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2] */ acc2 = __SMLADX(x2, c0, acc2); @@ -964,19 +964,19 @@ acc3 = __SMLADX(x3, c0, acc3); /* Read y[srcBLen - 3] and y[srcBLen - 4] */ - a = *py; - b = *(py+1); - py -= 2; + a = *py; + b = *(py+1); + py -= 2; #ifndef ARM_MATH_BIG_ENDIAN - c0 = __PKHBT(a, b, 16); + c0 = __PKHBT(a, b, 16); #else - c0 = __PKHBT(b, a, 16);; + c0 = __PKHBT(b, a, 16);; -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* acc0 += x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4] */ acc0 = __SMLADX(x2, c0, acc0); @@ -985,24 +985,24 @@ acc1 = __SMLADX(x3, c0, acc1); /* Read x[4], x[5], x[6] */ - a = *(px + 2); - b = *(px + 3); + a = *(px + 2); + b = *(px + 3); #ifndef ARM_MATH_BIG_ENDIAN - - x0 = __PKHBT(a, b, 16); - a = *(px + 4); - x1 = __PKHBT(b, a, 16); + + x0 = __PKHBT(a, b, 16); + a = *(px + 4); + x1 = __PKHBT(b, a, 16); #else - x0 = __PKHBT(b, a, 16); - a = *(px + 4); - x1 = __PKHBT(a, b, 16); + x0 = __PKHBT(b, a, 16); + a = *(px + 4); + x1 = __PKHBT(a, b, 16); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - px += 4u; + px += 4u; /* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */ acc2 = __SMLADX(x0, c0, acc2); @@ -1035,19 +1035,19 @@ #endif /* #ifdef ARM_MATH_BIG_ENDIAN */ /* Read x[7] */ - a = *px; - b = *(px+1); - px++; + a = *px; + b = *(px+1); + px++; #ifndef ARM_MATH_BIG_ENDIAN - x3 = __PKHBT(a, b, 16); + x3 = __PKHBT(a, b, 16); #else - x3 = __PKHBT(b, a, 16);; + x3 = __PKHBT(b, a, 16);; -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* Perform the multiply-accumulates */ @@ -1060,37 +1060,37 @@ if(k == 2u) { /* Read y[srcBLen - 5], y[srcBLen - 6] */ - a = *py; - b = *(py+1); + a = *py; + b = *(py+1); #ifndef ARM_MATH_BIG_ENDIAN - c0 = __PKHBT(a, b, 16); + c0 = __PKHBT(a, b, 16); #else - c0 = __PKHBT(b, a, 16);; + c0 = __PKHBT(b, a, 16);; -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* Read x[7], x[8], x[9] */ - a = *px; - b = *(px + 1); + a = *px; + b = *(px + 1); #ifndef ARM_MATH_BIG_ENDIAN - - x3 = __PKHBT(a, b, 16); - a = *(px + 2); - x2 = __PKHBT(b, a, 16); + + x3 = __PKHBT(a, b, 16); + a = *(px + 2); + x2 = __PKHBT(b, a, 16); #else - x3 = __PKHBT(b, a, 16); - a = *(px + 2); - x2 = __PKHBT(a, b, 16); + x3 = __PKHBT(b, a, 16); + a = *(px + 2); + x2 = __PKHBT(a, b, 16); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - px += 2u; +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ + px += 2u; /* Perform the multiply-accumulates */ acc0 = __SMLADX(x0, c0, acc0); @@ -1102,36 +1102,36 @@ if(k == 3u) { /* Read y[srcBLen - 5], y[srcBLen - 6] */ - a = *py; - b = *(py+1); + a = *py; + b = *(py+1); #ifndef ARM_MATH_BIG_ENDIAN - c0 = __PKHBT(a, b, 16); + c0 = __PKHBT(a, b, 16); #else - c0 = __PKHBT(b, a, 16);; + c0 = __PKHBT(b, a, 16);; -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* Read x[7], x[8], x[9] */ - a = *px; - b = *(px + 1); + a = *px; + b = *(px + 1); #ifndef ARM_MATH_BIG_ENDIAN - - x3 = __PKHBT(a, b, 16); - a = *(px + 2); - x2 = __PKHBT(b, a, 16); + + x3 = __PKHBT(a, b, 16); + a = *(px + 2); + x2 = __PKHBT(b, a, 16); #else - x3 = __PKHBT(b, a, 16); - a = *(px + 2); - x2 = __PKHBT(a, b, 16); + x3 = __PKHBT(b, a, 16); + a = *(px + 2); + x2 = __PKHBT(a, b, 16); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ /* Perform the multiply-accumulates */ acc0 = __SMLADX(x0, c0, acc0); @@ -1140,7 +1140,7 @@ acc3 = __SMLADX(x2, c0, acc3); /* Read y[srcBLen - 7] */ - c0 = *(py-1); + c0 = *(py-1); #ifdef ARM_MATH_BIG_ENDIAN c0 = c0 << 16u; @@ -1150,20 +1150,20 @@ #endif /* #ifdef ARM_MATH_BIG_ENDIAN */ /* Read x[10] */ - a = *(px+2); - b = *(px+3); + a = *(px+2); + b = *(px+3); #ifndef ARM_MATH_BIG_ENDIAN - x3 = __PKHBT(a, b, 16); + x3 = __PKHBT(a, b, 16); #else - x3 = __PKHBT(b, a, 16);; + x3 = __PKHBT(b, a, 16);; -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - px += 3u; + px += 3u; /* Perform the multiply-accumulates */ acc0 = __SMLADX(x1, c0, acc0); @@ -1173,10 +1173,10 @@ } /* Store the results in the accumulators in the destination buffer. */ - *pOut++ = (q15_t)(acc0 >> 15); - *pOut++ = (q15_t)(acc1 >> 15); - *pOut++ = (q15_t)(acc2 >> 15); - *pOut++ = (q15_t)(acc3 >> 15); + *pOut++ = (q15_t)(acc0 >> 15); + *pOut++ = (q15_t)(acc1 >> 15); + *pOut++ = (q15_t)(acc2 >> 15); + *pOut++ = (q15_t)(acc3 >> 15); /* Increment the pointer pIn1 index, count by 4 */ count += 4u; @@ -1325,10 +1325,10 @@ /* First part of the processing with loop unrolling. Compute 4 MACs at a time. ** a second loop below computes MACs for the remaining 1 to 3 samples. */ - py++; + py++; while(k > 0u) - { + { sum += ((q31_t) * px++ * *py--); sum += ((q31_t) * px++ * *py--); sum += ((q31_t) * px++ * *py--); @@ -1397,7 +1397,7 @@ blockSize3--; } -#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ +#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ } /**