V4.0.1 of the ARM CMSIS DSP libraries. Note that arm_bitreversal2.s, arm_cfft_f32.c and arm_rfft_fast_f32.c had to be removed. arm_bitreversal2.s will not assemble with the online tools. So, the fast f32 FFT functions are not yet available. All the other FFT functions are available.

Dependents:   MPU9150_Example fir_f32 fir_f32 MPU9150_nucleo_noni2cdev ... more

Committer:
emh203
Date:
Mon Jul 28 15:03:15 2014 +0000
Revision:
0:3d9c67d97d6f
1st working commit.   Had to remove arm_bitreversal2.s     arm_cfft_f32.c and arm_rfft_fast_f32.c.    The .s will not assemble.      For now I removed these functions so we could at least have a library for the other functions.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emh203 0:3d9c67d97d6f 1 /* ----------------------------------------------------------------------
emh203 0:3d9c67d97d6f 2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
emh203 0:3d9c67d97d6f 3 *
emh203 0:3d9c67d97d6f 4 * $Date: 12. March 2014
emh203 0:3d9c67d97d6f 5 * $Revision: V1.4.3
emh203 0:3d9c67d97d6f 6 *
emh203 0:3d9c67d97d6f 7 * Project: CMSIS DSP Library
emh203 0:3d9c67d97d6f 8 * Title: arm_cfft_radix4_q31.c
emh203 0:3d9c67d97d6f 9 *
emh203 0:3d9c67d97d6f 10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
emh203 0:3d9c67d97d6f 11 * In-place bit reversal using bit reversal table
emh203 0:3d9c67d97d6f 12 *
emh203 0:3d9c67d97d6f 13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emh203 0:3d9c67d97d6f 14 *
emh203 0:3d9c67d97d6f 15 * Redistribution and use in source and binary forms, with or without
emh203 0:3d9c67d97d6f 16 * modification, are permitted provided that the following conditions
emh203 0:3d9c67d97d6f 17 * are met:
emh203 0:3d9c67d97d6f 18 * - Redistributions of source code must retain the above copyright
emh203 0:3d9c67d97d6f 19 * notice, this list of conditions and the following disclaimer.
emh203 0:3d9c67d97d6f 20 * - Redistributions in binary form must reproduce the above copyright
emh203 0:3d9c67d97d6f 21 * notice, this list of conditions and the following disclaimer in
emh203 0:3d9c67d97d6f 22 * the documentation and/or other materials provided with the
emh203 0:3d9c67d97d6f 23 * distribution.
emh203 0:3d9c67d97d6f 24 * - Neither the name of ARM LIMITED nor the names of its contributors
emh203 0:3d9c67d97d6f 25 * may be used to endorse or promote products derived from this
emh203 0:3d9c67d97d6f 26 * software without specific prior written permission.
emh203 0:3d9c67d97d6f 27 *
emh203 0:3d9c67d97d6f 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
emh203 0:3d9c67d97d6f 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
emh203 0:3d9c67d97d6f 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
emh203 0:3d9c67d97d6f 31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
emh203 0:3d9c67d97d6f 32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
emh203 0:3d9c67d97d6f 33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
emh203 0:3d9c67d97d6f 34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
emh203 0:3d9c67d97d6f 35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
emh203 0:3d9c67d97d6f 36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
emh203 0:3d9c67d97d6f 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
emh203 0:3d9c67d97d6f 38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
emh203 0:3d9c67d97d6f 39 * POSSIBILITY OF SUCH DAMAGE.
emh203 0:3d9c67d97d6f 40 * -------------------------------------------------------------------- */
emh203 0:3d9c67d97d6f 41
emh203 0:3d9c67d97d6f 42 #include "arm_math.h"
emh203 0:3d9c67d97d6f 43
emh203 0:3d9c67d97d6f 44 void arm_radix4_butterfly_inverse_q31(
emh203 0:3d9c67d97d6f 45 q31_t * pSrc,
emh203 0:3d9c67d97d6f 46 uint32_t fftLen,
emh203 0:3d9c67d97d6f 47 q31_t * pCoef,
emh203 0:3d9c67d97d6f 48 uint32_t twidCoefModifier);
emh203 0:3d9c67d97d6f 49
emh203 0:3d9c67d97d6f 50 void arm_radix4_butterfly_q31(
emh203 0:3d9c67d97d6f 51 q31_t * pSrc,
emh203 0:3d9c67d97d6f 52 uint32_t fftLen,
emh203 0:3d9c67d97d6f 53 q31_t * pCoef,
emh203 0:3d9c67d97d6f 54 uint32_t twidCoefModifier);
emh203 0:3d9c67d97d6f 55
emh203 0:3d9c67d97d6f 56 void arm_bitreversal_q31(
emh203 0:3d9c67d97d6f 57 q31_t * pSrc,
emh203 0:3d9c67d97d6f 58 uint32_t fftLen,
emh203 0:3d9c67d97d6f 59 uint16_t bitRevFactor,
emh203 0:3d9c67d97d6f 60 uint16_t * pBitRevTab);
emh203 0:3d9c67d97d6f 61
emh203 0:3d9c67d97d6f 62 /**
emh203 0:3d9c67d97d6f 63 * @ingroup groupTransforms
emh203 0:3d9c67d97d6f 64 */
emh203 0:3d9c67d97d6f 65
emh203 0:3d9c67d97d6f 66 /**
emh203 0:3d9c67d97d6f 67 * @addtogroup ComplexFFT
emh203 0:3d9c67d97d6f 68 * @{
emh203 0:3d9c67d97d6f 69 */
emh203 0:3d9c67d97d6f 70
emh203 0:3d9c67d97d6f 71 /**
emh203 0:3d9c67d97d6f 72 * @details
emh203 0:3d9c67d97d6f 73 * @brief Processing function for the Q31 CFFT/CIFFT.
emh203 0:3d9c67d97d6f 74 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
emh203 0:3d9c67d97d6f 75 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
emh203 0:3d9c67d97d6f 76 * @return none.
emh203 0:3d9c67d97d6f 77 *
emh203 0:3d9c67d97d6f 78 * \par Input and output formats:
emh203 0:3d9c67d97d6f 79 * \par
emh203 0:3d9c67d97d6f 80 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
emh203 0:3d9c67d97d6f 81 * Hence the output format is different for different FFT sizes.
emh203 0:3d9c67d97d6f 82 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
emh203 0:3d9c67d97d6f 83 * \par
emh203 0:3d9c67d97d6f 84 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
emh203 0:3d9c67d97d6f 85 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
emh203 0:3d9c67d97d6f 86 *
emh203 0:3d9c67d97d6f 87 */
emh203 0:3d9c67d97d6f 88
emh203 0:3d9c67d97d6f 89 void arm_cfft_radix4_q31(
emh203 0:3d9c67d97d6f 90 const arm_cfft_radix4_instance_q31 * S,
emh203 0:3d9c67d97d6f 91 q31_t * pSrc)
emh203 0:3d9c67d97d6f 92 {
emh203 0:3d9c67d97d6f 93 if(S->ifftFlag == 1u)
emh203 0:3d9c67d97d6f 94 {
emh203 0:3d9c67d97d6f 95 /* Complex IFFT radix-4 */
emh203 0:3d9c67d97d6f 96 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
emh203 0:3d9c67d97d6f 97 S->twidCoefModifier);
emh203 0:3d9c67d97d6f 98 }
emh203 0:3d9c67d97d6f 99 else
emh203 0:3d9c67d97d6f 100 {
emh203 0:3d9c67d97d6f 101 /* Complex FFT radix-4 */
emh203 0:3d9c67d97d6f 102 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
emh203 0:3d9c67d97d6f 103 S->twidCoefModifier);
emh203 0:3d9c67d97d6f 104 }
emh203 0:3d9c67d97d6f 105
emh203 0:3d9c67d97d6f 106
emh203 0:3d9c67d97d6f 107 if(S->bitReverseFlag == 1u)
emh203 0:3d9c67d97d6f 108 {
emh203 0:3d9c67d97d6f 109 /* Bit Reversal */
emh203 0:3d9c67d97d6f 110 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
emh203 0:3d9c67d97d6f 111 }
emh203 0:3d9c67d97d6f 112
emh203 0:3d9c67d97d6f 113 }
emh203 0:3d9c67d97d6f 114
emh203 0:3d9c67d97d6f 115 /**
emh203 0:3d9c67d97d6f 116 * @} end of ComplexFFT group
emh203 0:3d9c67d97d6f 117 */
emh203 0:3d9c67d97d6f 118
emh203 0:3d9c67d97d6f 119 /*
emh203 0:3d9c67d97d6f 120 * Radix-4 FFT algorithm used is :
emh203 0:3d9c67d97d6f 121 *
emh203 0:3d9c67d97d6f 122 * Input real and imaginary data:
emh203 0:3d9c67d97d6f 123 * x(n) = xa + j * ya
emh203 0:3d9c67d97d6f 124 * x(n+N/4 ) = xb + j * yb
emh203 0:3d9c67d97d6f 125 * x(n+N/2 ) = xc + j * yc
emh203 0:3d9c67d97d6f 126 * x(n+3N 4) = xd + j * yd
emh203 0:3d9c67d97d6f 127 *
emh203 0:3d9c67d97d6f 128 *
emh203 0:3d9c67d97d6f 129 * Output real and imaginary data:
emh203 0:3d9c67d97d6f 130 * x(4r) = xa'+ j * ya'
emh203 0:3d9c67d97d6f 131 * x(4r+1) = xb'+ j * yb'
emh203 0:3d9c67d97d6f 132 * x(4r+2) = xc'+ j * yc'
emh203 0:3d9c67d97d6f 133 * x(4r+3) = xd'+ j * yd'
emh203 0:3d9c67d97d6f 134 *
emh203 0:3d9c67d97d6f 135 *
emh203 0:3d9c67d97d6f 136 * Twiddle factors for radix-4 FFT:
emh203 0:3d9c67d97d6f 137 * Wn = co1 + j * (- si1)
emh203 0:3d9c67d97d6f 138 * W2n = co2 + j * (- si2)
emh203 0:3d9c67d97d6f 139 * W3n = co3 + j * (- si3)
emh203 0:3d9c67d97d6f 140 *
emh203 0:3d9c67d97d6f 141 * Butterfly implementation:
emh203 0:3d9c67d97d6f 142 * xa' = xa + xb + xc + xd
emh203 0:3d9c67d97d6f 143 * ya' = ya + yb + yc + yd
emh203 0:3d9c67d97d6f 144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
emh203 0:3d9c67d97d6f 145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
emh203 0:3d9c67d97d6f 146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
emh203 0:3d9c67d97d6f 147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
emh203 0:3d9c67d97d6f 148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
emh203 0:3d9c67d97d6f 149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
emh203 0:3d9c67d97d6f 150 *
emh203 0:3d9c67d97d6f 151 */
emh203 0:3d9c67d97d6f 152
emh203 0:3d9c67d97d6f 153 /**
emh203 0:3d9c67d97d6f 154 * @brief Core function for the Q31 CFFT butterfly process.
emh203 0:3d9c67d97d6f 155 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
emh203 0:3d9c67d97d6f 156 * @param[in] fftLen length of the FFT.
emh203 0:3d9c67d97d6f 157 * @param[in] *pCoef points to twiddle coefficient buffer.
emh203 0:3d9c67d97d6f 158 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emh203 0:3d9c67d97d6f 159 * @return none.
emh203 0:3d9c67d97d6f 160 */
emh203 0:3d9c67d97d6f 161
emh203 0:3d9c67d97d6f 162 void arm_radix4_butterfly_q31(
emh203 0:3d9c67d97d6f 163 q31_t * pSrc,
emh203 0:3d9c67d97d6f 164 uint32_t fftLen,
emh203 0:3d9c67d97d6f 165 q31_t * pCoef,
emh203 0:3d9c67d97d6f 166 uint32_t twidCoefModifier)
emh203 0:3d9c67d97d6f 167 {
emh203 0:3d9c67d97d6f 168 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
emh203 0:3d9c67d97d6f 169 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
emh203 0:3d9c67d97d6f 170
emh203 0:3d9c67d97d6f 171 q31_t xa, xb, xc, xd;
emh203 0:3d9c67d97d6f 172 q31_t ya, yb, yc, yd;
emh203 0:3d9c67d97d6f 173 q31_t xa_out, xb_out, xc_out, xd_out;
emh203 0:3d9c67d97d6f 174 q31_t ya_out, yb_out, yc_out, yd_out;
emh203 0:3d9c67d97d6f 175
emh203 0:3d9c67d97d6f 176 q31_t *ptr1;
emh203 0:3d9c67d97d6f 177 q63_t xaya, xbyb, xcyc, xdyd;
emh203 0:3d9c67d97d6f 178 /* Total process is divided into three stages */
emh203 0:3d9c67d97d6f 179
emh203 0:3d9c67d97d6f 180 /* process first stage, middle stages, & last stage */
emh203 0:3d9c67d97d6f 181
emh203 0:3d9c67d97d6f 182
emh203 0:3d9c67d97d6f 183 /* start of first stage process */
emh203 0:3d9c67d97d6f 184
emh203 0:3d9c67d97d6f 185 /* Initializations for the first stage */
emh203 0:3d9c67d97d6f 186 n2 = fftLen;
emh203 0:3d9c67d97d6f 187 n1 = n2;
emh203 0:3d9c67d97d6f 188 /* n2 = fftLen/4 */
emh203 0:3d9c67d97d6f 189 n2 >>= 2u;
emh203 0:3d9c67d97d6f 190 i0 = 0u;
emh203 0:3d9c67d97d6f 191 ia1 = 0u;
emh203 0:3d9c67d97d6f 192
emh203 0:3d9c67d97d6f 193 j = n2;
emh203 0:3d9c67d97d6f 194
emh203 0:3d9c67d97d6f 195 /* Calculation of first stage */
emh203 0:3d9c67d97d6f 196 do
emh203 0:3d9c67d97d6f 197 {
emh203 0:3d9c67d97d6f 198 /* index calculation for the input as, */
emh203 0:3d9c67d97d6f 199 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emh203 0:3d9c67d97d6f 200 i1 = i0 + n2;
emh203 0:3d9c67d97d6f 201 i2 = i1 + n2;
emh203 0:3d9c67d97d6f 202 i3 = i2 + n2;
emh203 0:3d9c67d97d6f 203
emh203 0:3d9c67d97d6f 204 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
emh203 0:3d9c67d97d6f 205
emh203 0:3d9c67d97d6f 206 /* Butterfly implementation */
emh203 0:3d9c67d97d6f 207 /* xa + xc */
emh203 0:3d9c67d97d6f 208 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
emh203 0:3d9c67d97d6f 209 /* xa - xc */
emh203 0:3d9c67d97d6f 210 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
emh203 0:3d9c67d97d6f 211
emh203 0:3d9c67d97d6f 212 /* xb + xd */
emh203 0:3d9c67d97d6f 213 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
emh203 0:3d9c67d97d6f 214
emh203 0:3d9c67d97d6f 215 /* ya + yc */
emh203 0:3d9c67d97d6f 216 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 217 /* ya - yc */
emh203 0:3d9c67d97d6f 218 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 219
emh203 0:3d9c67d97d6f 220 /* xa' = xa + xb + xc + xd */
emh203 0:3d9c67d97d6f 221 pSrc[2u * i0] = (r1 + t1);
emh203 0:3d9c67d97d6f 222 /* (xa + xc) - (xb + xd) */
emh203 0:3d9c67d97d6f 223 r1 = r1 - t1;
emh203 0:3d9c67d97d6f 224 /* yb + yd */
emh203 0:3d9c67d97d6f 225 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 226
emh203 0:3d9c67d97d6f 227 /* ya' = ya + yb + yc + yd */
emh203 0:3d9c67d97d6f 228 pSrc[(2u * i0) + 1u] = (s1 + t2);
emh203 0:3d9c67d97d6f 229
emh203 0:3d9c67d97d6f 230 /* (ya + yc) - (yb + yd) */
emh203 0:3d9c67d97d6f 231 s1 = s1 - t2;
emh203 0:3d9c67d97d6f 232
emh203 0:3d9c67d97d6f 233 /* yb - yd */
emh203 0:3d9c67d97d6f 234 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 235 /* xb - xd */
emh203 0:3d9c67d97d6f 236 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
emh203 0:3d9c67d97d6f 237
emh203 0:3d9c67d97d6f 238 /* index calculation for the coefficients */
emh203 0:3d9c67d97d6f 239 ia2 = 2u * ia1;
emh203 0:3d9c67d97d6f 240 co2 = pCoef[ia2 * 2u];
emh203 0:3d9c67d97d6f 241 si2 = pCoef[(ia2 * 2u) + 1u];
emh203 0:3d9c67d97d6f 242
emh203 0:3d9c67d97d6f 243 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
emh203 0:3d9c67d97d6f 244 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
emh203 0:3d9c67d97d6f 245 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 246
emh203 0:3d9c67d97d6f 247 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
emh203 0:3d9c67d97d6f 248 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
emh203 0:3d9c67d97d6f 249 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 250
emh203 0:3d9c67d97d6f 251 /* (xa - xc) + (yb - yd) */
emh203 0:3d9c67d97d6f 252 r1 = r2 + t1;
emh203 0:3d9c67d97d6f 253 /* (xa - xc) - (yb - yd) */
emh203 0:3d9c67d97d6f 254 r2 = r2 - t1;
emh203 0:3d9c67d97d6f 255
emh203 0:3d9c67d97d6f 256 /* (ya - yc) - (xb - xd) */
emh203 0:3d9c67d97d6f 257 s1 = s2 - t2;
emh203 0:3d9c67d97d6f 258 /* (ya - yc) + (xb - xd) */
emh203 0:3d9c67d97d6f 259 s2 = s2 + t2;
emh203 0:3d9c67d97d6f 260
emh203 0:3d9c67d97d6f 261 co1 = pCoef[ia1 * 2u];
emh203 0:3d9c67d97d6f 262 si1 = pCoef[(ia1 * 2u) + 1u];
emh203 0:3d9c67d97d6f 263
emh203 0:3d9c67d97d6f 264 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
emh203 0:3d9c67d97d6f 265 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
emh203 0:3d9c67d97d6f 266 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 267
emh203 0:3d9c67d97d6f 268 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
emh203 0:3d9c67d97d6f 269 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
emh203 0:3d9c67d97d6f 270 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 271
emh203 0:3d9c67d97d6f 272 /* index calculation for the coefficients */
emh203 0:3d9c67d97d6f 273 ia3 = 3u * ia1;
emh203 0:3d9c67d97d6f 274 co3 = pCoef[ia3 * 2u];
emh203 0:3d9c67d97d6f 275 si3 = pCoef[(ia3 * 2u) + 1u];
emh203 0:3d9c67d97d6f 276
emh203 0:3d9c67d97d6f 277 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
emh203 0:3d9c67d97d6f 278 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
emh203 0:3d9c67d97d6f 279 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 280
emh203 0:3d9c67d97d6f 281 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
emh203 0:3d9c67d97d6f 282 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
emh203 0:3d9c67d97d6f 283 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 284
emh203 0:3d9c67d97d6f 285 /* Twiddle coefficients index modifier */
emh203 0:3d9c67d97d6f 286 ia1 = ia1 + twidCoefModifier;
emh203 0:3d9c67d97d6f 287
emh203 0:3d9c67d97d6f 288 /* Updating input index */
emh203 0:3d9c67d97d6f 289 i0 = i0 + 1u;
emh203 0:3d9c67d97d6f 290
emh203 0:3d9c67d97d6f 291 } while(--j);
emh203 0:3d9c67d97d6f 292
emh203 0:3d9c67d97d6f 293 /* end of first stage process */
emh203 0:3d9c67d97d6f 294
emh203 0:3d9c67d97d6f 295 /* data is in 5.27(q27) format */
emh203 0:3d9c67d97d6f 296
emh203 0:3d9c67d97d6f 297
emh203 0:3d9c67d97d6f 298 /* start of Middle stages process */
emh203 0:3d9c67d97d6f 299
emh203 0:3d9c67d97d6f 300
emh203 0:3d9c67d97d6f 301 /* each stage in middle stages provides two down scaling of the input */
emh203 0:3d9c67d97d6f 302
emh203 0:3d9c67d97d6f 303 twidCoefModifier <<= 2u;
emh203 0:3d9c67d97d6f 304
emh203 0:3d9c67d97d6f 305
emh203 0:3d9c67d97d6f 306 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emh203 0:3d9c67d97d6f 307 {
emh203 0:3d9c67d97d6f 308 /* Initializations for the first stage */
emh203 0:3d9c67d97d6f 309 n1 = n2;
emh203 0:3d9c67d97d6f 310 n2 >>= 2u;
emh203 0:3d9c67d97d6f 311 ia1 = 0u;
emh203 0:3d9c67d97d6f 312
emh203 0:3d9c67d97d6f 313 /* Calculation of first stage */
emh203 0:3d9c67d97d6f 314 for (j = 0u; j <= (n2 - 1u); j++)
emh203 0:3d9c67d97d6f 315 {
emh203 0:3d9c67d97d6f 316 /* index calculation for the coefficients */
emh203 0:3d9c67d97d6f 317 ia2 = ia1 + ia1;
emh203 0:3d9c67d97d6f 318 ia3 = ia2 + ia1;
emh203 0:3d9c67d97d6f 319 co1 = pCoef[ia1 * 2u];
emh203 0:3d9c67d97d6f 320 si1 = pCoef[(ia1 * 2u) + 1u];
emh203 0:3d9c67d97d6f 321 co2 = pCoef[ia2 * 2u];
emh203 0:3d9c67d97d6f 322 si2 = pCoef[(ia2 * 2u) + 1u];
emh203 0:3d9c67d97d6f 323 co3 = pCoef[ia3 * 2u];
emh203 0:3d9c67d97d6f 324 si3 = pCoef[(ia3 * 2u) + 1u];
emh203 0:3d9c67d97d6f 325 /* Twiddle coefficients index modifier */
emh203 0:3d9c67d97d6f 326 ia1 = ia1 + twidCoefModifier;
emh203 0:3d9c67d97d6f 327
emh203 0:3d9c67d97d6f 328 for (i0 = j; i0 < fftLen; i0 += n1)
emh203 0:3d9c67d97d6f 329 {
emh203 0:3d9c67d97d6f 330 /* index calculation for the input as, */
emh203 0:3d9c67d97d6f 331 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emh203 0:3d9c67d97d6f 332 i1 = i0 + n2;
emh203 0:3d9c67d97d6f 333 i2 = i1 + n2;
emh203 0:3d9c67d97d6f 334 i3 = i2 + n2;
emh203 0:3d9c67d97d6f 335
emh203 0:3d9c67d97d6f 336 /* Butterfly implementation */
emh203 0:3d9c67d97d6f 337 /* xa + xc */
emh203 0:3d9c67d97d6f 338 r1 = pSrc[2u * i0] + pSrc[2u * i2];
emh203 0:3d9c67d97d6f 339 /* xa - xc */
emh203 0:3d9c67d97d6f 340 r2 = pSrc[2u * i0] - pSrc[2u * i2];
emh203 0:3d9c67d97d6f 341
emh203 0:3d9c67d97d6f 342 /* ya + yc */
emh203 0:3d9c67d97d6f 343 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
emh203 0:3d9c67d97d6f 344 /* ya - yc */
emh203 0:3d9c67d97d6f 345 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
emh203 0:3d9c67d97d6f 346
emh203 0:3d9c67d97d6f 347 /* xb + xd */
emh203 0:3d9c67d97d6f 348 t1 = pSrc[2u * i1] + pSrc[2u * i3];
emh203 0:3d9c67d97d6f 349
emh203 0:3d9c67d97d6f 350 /* xa' = xa + xb + xc + xd */
emh203 0:3d9c67d97d6f 351 pSrc[2u * i0] = (r1 + t1) >> 2u;
emh203 0:3d9c67d97d6f 352 /* xa + xc -(xb + xd) */
emh203 0:3d9c67d97d6f 353 r1 = r1 - t1;
emh203 0:3d9c67d97d6f 354
emh203 0:3d9c67d97d6f 355 /* yb + yd */
emh203 0:3d9c67d97d6f 356 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
emh203 0:3d9c67d97d6f 357 /* ya' = ya + yb + yc + yd */
emh203 0:3d9c67d97d6f 358 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
emh203 0:3d9c67d97d6f 359
emh203 0:3d9c67d97d6f 360 /* (ya + yc) - (yb + yd) */
emh203 0:3d9c67d97d6f 361 s1 = s1 - t2;
emh203 0:3d9c67d97d6f 362
emh203 0:3d9c67d97d6f 363 /* (yb - yd) */
emh203 0:3d9c67d97d6f 364 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
emh203 0:3d9c67d97d6f 365 /* (xb - xd) */
emh203 0:3d9c67d97d6f 366 t2 = pSrc[2u * i1] - pSrc[2u * i3];
emh203 0:3d9c67d97d6f 367
emh203 0:3d9c67d97d6f 368 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
emh203 0:3d9c67d97d6f 369 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
emh203 0:3d9c67d97d6f 370 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 371
emh203 0:3d9c67d97d6f 372 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
emh203 0:3d9c67d97d6f 373 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
emh203 0:3d9c67d97d6f 374 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 375
emh203 0:3d9c67d97d6f 376 /* (xa - xc) + (yb - yd) */
emh203 0:3d9c67d97d6f 377 r1 = r2 + t1;
emh203 0:3d9c67d97d6f 378 /* (xa - xc) - (yb - yd) */
emh203 0:3d9c67d97d6f 379 r2 = r2 - t1;
emh203 0:3d9c67d97d6f 380
emh203 0:3d9c67d97d6f 381 /* (ya - yc) - (xb - xd) */
emh203 0:3d9c67d97d6f 382 s1 = s2 - t2;
emh203 0:3d9c67d97d6f 383 /* (ya - yc) + (xb - xd) */
emh203 0:3d9c67d97d6f 384 s2 = s2 + t2;
emh203 0:3d9c67d97d6f 385
emh203 0:3d9c67d97d6f 386 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
emh203 0:3d9c67d97d6f 387 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
emh203 0:3d9c67d97d6f 388 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 389
emh203 0:3d9c67d97d6f 390 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
emh203 0:3d9c67d97d6f 391 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
emh203 0:3d9c67d97d6f 392 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 393
emh203 0:3d9c67d97d6f 394 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
emh203 0:3d9c67d97d6f 395 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
emh203 0:3d9c67d97d6f 396 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 397
emh203 0:3d9c67d97d6f 398 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
emh203 0:3d9c67d97d6f 399 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
emh203 0:3d9c67d97d6f 400 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 401 }
emh203 0:3d9c67d97d6f 402 }
emh203 0:3d9c67d97d6f 403 twidCoefModifier <<= 2u;
emh203 0:3d9c67d97d6f 404 }
emh203 0:3d9c67d97d6f 405
emh203 0:3d9c67d97d6f 406 /* End of Middle stages process */
emh203 0:3d9c67d97d6f 407
emh203 0:3d9c67d97d6f 408 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
emh203 0:3d9c67d97d6f 409 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
emh203 0:3d9c67d97d6f 410 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
emh203 0:3d9c67d97d6f 411 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
emh203 0:3d9c67d97d6f 412
emh203 0:3d9c67d97d6f 413
emh203 0:3d9c67d97d6f 414 /* start of Last stage process */
emh203 0:3d9c67d97d6f 415 /* Initializations for the last stage */
emh203 0:3d9c67d97d6f 416 j = fftLen >> 2;
emh203 0:3d9c67d97d6f 417 ptr1 = &pSrc[0];
emh203 0:3d9c67d97d6f 418
emh203 0:3d9c67d97d6f 419 /* Calculations of last stage */
emh203 0:3d9c67d97d6f 420 do
emh203 0:3d9c67d97d6f 421 {
emh203 0:3d9c67d97d6f 422
emh203 0:3d9c67d97d6f 423 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 424
emh203 0:3d9c67d97d6f 425 /* Read xa (real), ya(imag) input */
emh203 0:3d9c67d97d6f 426 xaya = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 427 xa = (q31_t) xaya;
emh203 0:3d9c67d97d6f 428 ya = (q31_t) (xaya >> 32);
emh203 0:3d9c67d97d6f 429
emh203 0:3d9c67d97d6f 430 /* Read xb (real), yb(imag) input */
emh203 0:3d9c67d97d6f 431 xbyb = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 432 xb = (q31_t) xbyb;
emh203 0:3d9c67d97d6f 433 yb = (q31_t) (xbyb >> 32);
emh203 0:3d9c67d97d6f 434
emh203 0:3d9c67d97d6f 435 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 436 xcyc = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 437 xc = (q31_t) xcyc;
emh203 0:3d9c67d97d6f 438 yc = (q31_t) (xcyc >> 32);
emh203 0:3d9c67d97d6f 439
emh203 0:3d9c67d97d6f 440 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 441 xdyd = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 442 xd = (q31_t) xdyd;
emh203 0:3d9c67d97d6f 443 yd = (q31_t) (xdyd >> 32);
emh203 0:3d9c67d97d6f 444
emh203 0:3d9c67d97d6f 445 #else
emh203 0:3d9c67d97d6f 446
emh203 0:3d9c67d97d6f 447 /* Read xa (real), ya(imag) input */
emh203 0:3d9c67d97d6f 448 xaya = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 449 ya = (q31_t) xaya;
emh203 0:3d9c67d97d6f 450 xa = (q31_t) (xaya >> 32);
emh203 0:3d9c67d97d6f 451
emh203 0:3d9c67d97d6f 452 /* Read xb (real), yb(imag) input */
emh203 0:3d9c67d97d6f 453 xbyb = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 454 yb = (q31_t) xbyb;
emh203 0:3d9c67d97d6f 455 xb = (q31_t) (xbyb >> 32);
emh203 0:3d9c67d97d6f 456
emh203 0:3d9c67d97d6f 457 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 458 xcyc = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 459 yc = (q31_t) xcyc;
emh203 0:3d9c67d97d6f 460 xc = (q31_t) (xcyc >> 32);
emh203 0:3d9c67d97d6f 461
emh203 0:3d9c67d97d6f 462 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 463 xdyd = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 464 yd = (q31_t) xdyd;
emh203 0:3d9c67d97d6f 465 xd = (q31_t) (xdyd >> 32);
emh203 0:3d9c67d97d6f 466
emh203 0:3d9c67d97d6f 467
emh203 0:3d9c67d97d6f 468 #endif
emh203 0:3d9c67d97d6f 469
emh203 0:3d9c67d97d6f 470 /* xa' = xa + xb + xc + xd */
emh203 0:3d9c67d97d6f 471 xa_out = xa + xb + xc + xd;
emh203 0:3d9c67d97d6f 472
emh203 0:3d9c67d97d6f 473 /* ya' = ya + yb + yc + yd */
emh203 0:3d9c67d97d6f 474 ya_out = ya + yb + yc + yd;
emh203 0:3d9c67d97d6f 475
emh203 0:3d9c67d97d6f 476 /* pointer updation for writing */
emh203 0:3d9c67d97d6f 477 ptr1 = ptr1 - 8u;
emh203 0:3d9c67d97d6f 478
emh203 0:3d9c67d97d6f 479 /* writing xa' and ya' */
emh203 0:3d9c67d97d6f 480 *ptr1++ = xa_out;
emh203 0:3d9c67d97d6f 481 *ptr1++ = ya_out;
emh203 0:3d9c67d97d6f 482
emh203 0:3d9c67d97d6f 483 xc_out = (xa - xb + xc - xd);
emh203 0:3d9c67d97d6f 484 yc_out = (ya - yb + yc - yd);
emh203 0:3d9c67d97d6f 485
emh203 0:3d9c67d97d6f 486 /* writing xc' and yc' */
emh203 0:3d9c67d97d6f 487 *ptr1++ = xc_out;
emh203 0:3d9c67d97d6f 488 *ptr1++ = yc_out;
emh203 0:3d9c67d97d6f 489
emh203 0:3d9c67d97d6f 490 xb_out = (xa + yb - xc - yd);
emh203 0:3d9c67d97d6f 491 yb_out = (ya - xb - yc + xd);
emh203 0:3d9c67d97d6f 492
emh203 0:3d9c67d97d6f 493 /* writing xb' and yb' */
emh203 0:3d9c67d97d6f 494 *ptr1++ = xb_out;
emh203 0:3d9c67d97d6f 495 *ptr1++ = yb_out;
emh203 0:3d9c67d97d6f 496
emh203 0:3d9c67d97d6f 497 xd_out = (xa - yb - xc + yd);
emh203 0:3d9c67d97d6f 498 yd_out = (ya + xb - yc - xd);
emh203 0:3d9c67d97d6f 499
emh203 0:3d9c67d97d6f 500 /* writing xd' and yd' */
emh203 0:3d9c67d97d6f 501 *ptr1++ = xd_out;
emh203 0:3d9c67d97d6f 502 *ptr1++ = yd_out;
emh203 0:3d9c67d97d6f 503
emh203 0:3d9c67d97d6f 504
emh203 0:3d9c67d97d6f 505 } while(--j);
emh203 0:3d9c67d97d6f 506
emh203 0:3d9c67d97d6f 507 /* output is in 11.21(q21) format for the 1024 point */
emh203 0:3d9c67d97d6f 508 /* output is in 9.23(q23) format for the 256 point */
emh203 0:3d9c67d97d6f 509 /* output is in 7.25(q25) format for the 64 point */
emh203 0:3d9c67d97d6f 510 /* output is in 5.27(q27) format for the 16 point */
emh203 0:3d9c67d97d6f 511
emh203 0:3d9c67d97d6f 512 /* End of last stage process */
emh203 0:3d9c67d97d6f 513
emh203 0:3d9c67d97d6f 514 }
emh203 0:3d9c67d97d6f 515
emh203 0:3d9c67d97d6f 516
emh203 0:3d9c67d97d6f 517 /**
emh203 0:3d9c67d97d6f 518 * @brief Core function for the Q31 CIFFT butterfly process.
emh203 0:3d9c67d97d6f 519 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
emh203 0:3d9c67d97d6f 520 * @param[in] fftLen length of the FFT.
emh203 0:3d9c67d97d6f 521 * @param[in] *pCoef points to twiddle coefficient buffer.
emh203 0:3d9c67d97d6f 522 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emh203 0:3d9c67d97d6f 523 * @return none.
emh203 0:3d9c67d97d6f 524 */
emh203 0:3d9c67d97d6f 525
emh203 0:3d9c67d97d6f 526
emh203 0:3d9c67d97d6f 527 /*
emh203 0:3d9c67d97d6f 528 * Radix-4 IFFT algorithm used is :
emh203 0:3d9c67d97d6f 529 *
emh203 0:3d9c67d97d6f 530 * CIFFT uses same twiddle coefficients as CFFT Function
emh203 0:3d9c67d97d6f 531 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
emh203 0:3d9c67d97d6f 532 *
emh203 0:3d9c67d97d6f 533 *
emh203 0:3d9c67d97d6f 534 * IFFT is implemented with following changes in equations from FFT
emh203 0:3d9c67d97d6f 535 *
emh203 0:3d9c67d97d6f 536 * Input real and imaginary data:
emh203 0:3d9c67d97d6f 537 * x(n) = xa + j * ya
emh203 0:3d9c67d97d6f 538 * x(n+N/4 ) = xb + j * yb
emh203 0:3d9c67d97d6f 539 * x(n+N/2 ) = xc + j * yc
emh203 0:3d9c67d97d6f 540 * x(n+3N 4) = xd + j * yd
emh203 0:3d9c67d97d6f 541 *
emh203 0:3d9c67d97d6f 542 *
emh203 0:3d9c67d97d6f 543 * Output real and imaginary data:
emh203 0:3d9c67d97d6f 544 * x(4r) = xa'+ j * ya'
emh203 0:3d9c67d97d6f 545 * x(4r+1) = xb'+ j * yb'
emh203 0:3d9c67d97d6f 546 * x(4r+2) = xc'+ j * yc'
emh203 0:3d9c67d97d6f 547 * x(4r+3) = xd'+ j * yd'
emh203 0:3d9c67d97d6f 548 *
emh203 0:3d9c67d97d6f 549 *
emh203 0:3d9c67d97d6f 550 * Twiddle factors for radix-4 IFFT:
emh203 0:3d9c67d97d6f 551 * Wn = co1 + j * (si1)
emh203 0:3d9c67d97d6f 552 * W2n = co2 + j * (si2)
emh203 0:3d9c67d97d6f 553 * W3n = co3 + j * (si3)
emh203 0:3d9c67d97d6f 554
emh203 0:3d9c67d97d6f 555 * The real and imaginary output values for the radix-4 butterfly are
emh203 0:3d9c67d97d6f 556 * xa' = xa + xb + xc + xd
emh203 0:3d9c67d97d6f 557 * ya' = ya + yb + yc + yd
emh203 0:3d9c67d97d6f 558 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
emh203 0:3d9c67d97d6f 559 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
emh203 0:3d9c67d97d6f 560 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
emh203 0:3d9c67d97d6f 561 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
emh203 0:3d9c67d97d6f 562 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
emh203 0:3d9c67d97d6f 563 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
emh203 0:3d9c67d97d6f 564 *
emh203 0:3d9c67d97d6f 565 */
emh203 0:3d9c67d97d6f 566
emh203 0:3d9c67d97d6f 567 void arm_radix4_butterfly_inverse_q31(
emh203 0:3d9c67d97d6f 568 q31_t * pSrc,
emh203 0:3d9c67d97d6f 569 uint32_t fftLen,
emh203 0:3d9c67d97d6f 570 q31_t * pCoef,
emh203 0:3d9c67d97d6f 571 uint32_t twidCoefModifier)
emh203 0:3d9c67d97d6f 572 {
emh203 0:3d9c67d97d6f 573 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
emh203 0:3d9c67d97d6f 574 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
emh203 0:3d9c67d97d6f 575 q31_t xa, xb, xc, xd;
emh203 0:3d9c67d97d6f 576 q31_t ya, yb, yc, yd;
emh203 0:3d9c67d97d6f 577 q31_t xa_out, xb_out, xc_out, xd_out;
emh203 0:3d9c67d97d6f 578 q31_t ya_out, yb_out, yc_out, yd_out;
emh203 0:3d9c67d97d6f 579
emh203 0:3d9c67d97d6f 580 q31_t *ptr1;
emh203 0:3d9c67d97d6f 581 q63_t xaya, xbyb, xcyc, xdyd;
emh203 0:3d9c67d97d6f 582
emh203 0:3d9c67d97d6f 583 /* input is be 1.31(q31) format for all FFT sizes */
emh203 0:3d9c67d97d6f 584 /* Total process is divided into three stages */
emh203 0:3d9c67d97d6f 585 /* process first stage, middle stages, & last stage */
emh203 0:3d9c67d97d6f 586
emh203 0:3d9c67d97d6f 587 /* Start of first stage process */
emh203 0:3d9c67d97d6f 588
emh203 0:3d9c67d97d6f 589 /* Initializations for the first stage */
emh203 0:3d9c67d97d6f 590 n2 = fftLen;
emh203 0:3d9c67d97d6f 591 n1 = n2;
emh203 0:3d9c67d97d6f 592 /* n2 = fftLen/4 */
emh203 0:3d9c67d97d6f 593 n2 >>= 2u;
emh203 0:3d9c67d97d6f 594 i0 = 0u;
emh203 0:3d9c67d97d6f 595 ia1 = 0u;
emh203 0:3d9c67d97d6f 596
emh203 0:3d9c67d97d6f 597 j = n2;
emh203 0:3d9c67d97d6f 598
emh203 0:3d9c67d97d6f 599 do
emh203 0:3d9c67d97d6f 600 {
emh203 0:3d9c67d97d6f 601
emh203 0:3d9c67d97d6f 602 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
emh203 0:3d9c67d97d6f 603
emh203 0:3d9c67d97d6f 604 /* index calculation for the input as, */
emh203 0:3d9c67d97d6f 605 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emh203 0:3d9c67d97d6f 606 i1 = i0 + n2;
emh203 0:3d9c67d97d6f 607 i2 = i1 + n2;
emh203 0:3d9c67d97d6f 608 i3 = i2 + n2;
emh203 0:3d9c67d97d6f 609
emh203 0:3d9c67d97d6f 610 /* Butterfly implementation */
emh203 0:3d9c67d97d6f 611 /* xa + xc */
emh203 0:3d9c67d97d6f 612 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
emh203 0:3d9c67d97d6f 613 /* xa - xc */
emh203 0:3d9c67d97d6f 614 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
emh203 0:3d9c67d97d6f 615
emh203 0:3d9c67d97d6f 616 /* xb + xd */
emh203 0:3d9c67d97d6f 617 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
emh203 0:3d9c67d97d6f 618
emh203 0:3d9c67d97d6f 619 /* ya + yc */
emh203 0:3d9c67d97d6f 620 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 621 /* ya - yc */
emh203 0:3d9c67d97d6f 622 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 623
emh203 0:3d9c67d97d6f 624 /* xa' = xa + xb + xc + xd */
emh203 0:3d9c67d97d6f 625 pSrc[2u * i0] = (r1 + t1);
emh203 0:3d9c67d97d6f 626 /* (xa + xc) - (xb + xd) */
emh203 0:3d9c67d97d6f 627 r1 = r1 - t1;
emh203 0:3d9c67d97d6f 628 /* yb + yd */
emh203 0:3d9c67d97d6f 629 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 630 /* ya' = ya + yb + yc + yd */
emh203 0:3d9c67d97d6f 631 pSrc[(2u * i0) + 1u] = (s1 + t2);
emh203 0:3d9c67d97d6f 632
emh203 0:3d9c67d97d6f 633 /* (ya + yc) - (yb + yd) */
emh203 0:3d9c67d97d6f 634 s1 = s1 - t2;
emh203 0:3d9c67d97d6f 635
emh203 0:3d9c67d97d6f 636 /* yb - yd */
emh203 0:3d9c67d97d6f 637 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
emh203 0:3d9c67d97d6f 638 /* xb - xd */
emh203 0:3d9c67d97d6f 639 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
emh203 0:3d9c67d97d6f 640
emh203 0:3d9c67d97d6f 641 /* index calculation for the coefficients */
emh203 0:3d9c67d97d6f 642 ia2 = 2u * ia1;
emh203 0:3d9c67d97d6f 643 co2 = pCoef[ia2 * 2u];
emh203 0:3d9c67d97d6f 644 si2 = pCoef[(ia2 * 2u) + 1u];
emh203 0:3d9c67d97d6f 645
emh203 0:3d9c67d97d6f 646 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
emh203 0:3d9c67d97d6f 647 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
emh203 0:3d9c67d97d6f 648 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 649
emh203 0:3d9c67d97d6f 650 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
emh203 0:3d9c67d97d6f 651 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
emh203 0:3d9c67d97d6f 652 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 653
emh203 0:3d9c67d97d6f 654 /* (xa - xc) - (yb - yd) */
emh203 0:3d9c67d97d6f 655 r1 = r2 - t1;
emh203 0:3d9c67d97d6f 656 /* (xa - xc) + (yb - yd) */
emh203 0:3d9c67d97d6f 657 r2 = r2 + t1;
emh203 0:3d9c67d97d6f 658
emh203 0:3d9c67d97d6f 659 /* (ya - yc) + (xb - xd) */
emh203 0:3d9c67d97d6f 660 s1 = s2 + t2;
emh203 0:3d9c67d97d6f 661 /* (ya - yc) - (xb - xd) */
emh203 0:3d9c67d97d6f 662 s2 = s2 - t2;
emh203 0:3d9c67d97d6f 663
emh203 0:3d9c67d97d6f 664 co1 = pCoef[ia1 * 2u];
emh203 0:3d9c67d97d6f 665 si1 = pCoef[(ia1 * 2u) + 1u];
emh203 0:3d9c67d97d6f 666
emh203 0:3d9c67d97d6f 667 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
emh203 0:3d9c67d97d6f 668 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
emh203 0:3d9c67d97d6f 669 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 670
emh203 0:3d9c67d97d6f 671 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
emh203 0:3d9c67d97d6f 672 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
emh203 0:3d9c67d97d6f 673 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 674
emh203 0:3d9c67d97d6f 675 /* index calculation for the coefficients */
emh203 0:3d9c67d97d6f 676 ia3 = 3u * ia1;
emh203 0:3d9c67d97d6f 677 co3 = pCoef[ia3 * 2u];
emh203 0:3d9c67d97d6f 678 si3 = pCoef[(ia3 * 2u) + 1u];
emh203 0:3d9c67d97d6f 679
emh203 0:3d9c67d97d6f 680 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
emh203 0:3d9c67d97d6f 681 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
emh203 0:3d9c67d97d6f 682 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 683
emh203 0:3d9c67d97d6f 684 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
emh203 0:3d9c67d97d6f 685 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
emh203 0:3d9c67d97d6f 686 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
emh203 0:3d9c67d97d6f 687
emh203 0:3d9c67d97d6f 688 /* Twiddle coefficients index modifier */
emh203 0:3d9c67d97d6f 689 ia1 = ia1 + twidCoefModifier;
emh203 0:3d9c67d97d6f 690
emh203 0:3d9c67d97d6f 691 /* Updating input index */
emh203 0:3d9c67d97d6f 692 i0 = i0 + 1u;
emh203 0:3d9c67d97d6f 693
emh203 0:3d9c67d97d6f 694 } while(--j);
emh203 0:3d9c67d97d6f 695
emh203 0:3d9c67d97d6f 696 /* data is in 5.27(q27) format */
emh203 0:3d9c67d97d6f 697 /* each stage provides two down scaling of the input */
emh203 0:3d9c67d97d6f 698
emh203 0:3d9c67d97d6f 699
emh203 0:3d9c67d97d6f 700 /* Start of Middle stages process */
emh203 0:3d9c67d97d6f 701
emh203 0:3d9c67d97d6f 702 twidCoefModifier <<= 2u;
emh203 0:3d9c67d97d6f 703
emh203 0:3d9c67d97d6f 704 /* Calculation of second stage to excluding last stage */
emh203 0:3d9c67d97d6f 705 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emh203 0:3d9c67d97d6f 706 {
emh203 0:3d9c67d97d6f 707 /* Initializations for the first stage */
emh203 0:3d9c67d97d6f 708 n1 = n2;
emh203 0:3d9c67d97d6f 709 n2 >>= 2u;
emh203 0:3d9c67d97d6f 710 ia1 = 0u;
emh203 0:3d9c67d97d6f 711
emh203 0:3d9c67d97d6f 712 for (j = 0; j <= (n2 - 1u); j++)
emh203 0:3d9c67d97d6f 713 {
emh203 0:3d9c67d97d6f 714 /* index calculation for the coefficients */
emh203 0:3d9c67d97d6f 715 ia2 = ia1 + ia1;
emh203 0:3d9c67d97d6f 716 ia3 = ia2 + ia1;
emh203 0:3d9c67d97d6f 717 co1 = pCoef[ia1 * 2u];
emh203 0:3d9c67d97d6f 718 si1 = pCoef[(ia1 * 2u) + 1u];
emh203 0:3d9c67d97d6f 719 co2 = pCoef[ia2 * 2u];
emh203 0:3d9c67d97d6f 720 si2 = pCoef[(ia2 * 2u) + 1u];
emh203 0:3d9c67d97d6f 721 co3 = pCoef[ia3 * 2u];
emh203 0:3d9c67d97d6f 722 si3 = pCoef[(ia3 * 2u) + 1u];
emh203 0:3d9c67d97d6f 723 /* Twiddle coefficients index modifier */
emh203 0:3d9c67d97d6f 724 ia1 = ia1 + twidCoefModifier;
emh203 0:3d9c67d97d6f 725
emh203 0:3d9c67d97d6f 726 for (i0 = j; i0 < fftLen; i0 += n1)
emh203 0:3d9c67d97d6f 727 {
emh203 0:3d9c67d97d6f 728 /* index calculation for the input as, */
emh203 0:3d9c67d97d6f 729 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emh203 0:3d9c67d97d6f 730 i1 = i0 + n2;
emh203 0:3d9c67d97d6f 731 i2 = i1 + n2;
emh203 0:3d9c67d97d6f 732 i3 = i2 + n2;
emh203 0:3d9c67d97d6f 733
emh203 0:3d9c67d97d6f 734 /* Butterfly implementation */
emh203 0:3d9c67d97d6f 735 /* xa + xc */
emh203 0:3d9c67d97d6f 736 r1 = pSrc[2u * i0] + pSrc[2u * i2];
emh203 0:3d9c67d97d6f 737 /* xa - xc */
emh203 0:3d9c67d97d6f 738 r2 = pSrc[2u * i0] - pSrc[2u * i2];
emh203 0:3d9c67d97d6f 739
emh203 0:3d9c67d97d6f 740 /* ya + yc */
emh203 0:3d9c67d97d6f 741 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
emh203 0:3d9c67d97d6f 742 /* ya - yc */
emh203 0:3d9c67d97d6f 743 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
emh203 0:3d9c67d97d6f 744
emh203 0:3d9c67d97d6f 745 /* xb + xd */
emh203 0:3d9c67d97d6f 746 t1 = pSrc[2u * i1] + pSrc[2u * i3];
emh203 0:3d9c67d97d6f 747
emh203 0:3d9c67d97d6f 748 /* xa' = xa + xb + xc + xd */
emh203 0:3d9c67d97d6f 749 pSrc[2u * i0] = (r1 + t1) >> 2u;
emh203 0:3d9c67d97d6f 750 /* xa + xc -(xb + xd) */
emh203 0:3d9c67d97d6f 751 r1 = r1 - t1;
emh203 0:3d9c67d97d6f 752 /* yb + yd */
emh203 0:3d9c67d97d6f 753 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
emh203 0:3d9c67d97d6f 754 /* ya' = ya + yb + yc + yd */
emh203 0:3d9c67d97d6f 755 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
emh203 0:3d9c67d97d6f 756
emh203 0:3d9c67d97d6f 757 /* (ya + yc) - (yb + yd) */
emh203 0:3d9c67d97d6f 758 s1 = s1 - t2;
emh203 0:3d9c67d97d6f 759
emh203 0:3d9c67d97d6f 760 /* (yb - yd) */
emh203 0:3d9c67d97d6f 761 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
emh203 0:3d9c67d97d6f 762 /* (xb - xd) */
emh203 0:3d9c67d97d6f 763 t2 = pSrc[2u * i1] - pSrc[2u * i3];
emh203 0:3d9c67d97d6f 764
emh203 0:3d9c67d97d6f 765 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
emh203 0:3d9c67d97d6f 766 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
emh203 0:3d9c67d97d6f 767 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
emh203 0:3d9c67d97d6f 768
emh203 0:3d9c67d97d6f 769 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
emh203 0:3d9c67d97d6f 770 pSrc[(2u * i1) + 1u] =
emh203 0:3d9c67d97d6f 771 (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
emh203 0:3d9c67d97d6f 772 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
emh203 0:3d9c67d97d6f 773
emh203 0:3d9c67d97d6f 774 /* (xa - xc) - (yb - yd) */
emh203 0:3d9c67d97d6f 775 r1 = r2 - t1;
emh203 0:3d9c67d97d6f 776 /* (xa - xc) + (yb - yd) */
emh203 0:3d9c67d97d6f 777 r2 = r2 + t1;
emh203 0:3d9c67d97d6f 778
emh203 0:3d9c67d97d6f 779 /* (ya - yc) + (xb - xd) */
emh203 0:3d9c67d97d6f 780 s1 = s2 + t2;
emh203 0:3d9c67d97d6f 781 /* (ya - yc) - (xb - xd) */
emh203 0:3d9c67d97d6f 782 s2 = s2 - t2;
emh203 0:3d9c67d97d6f 783
emh203 0:3d9c67d97d6f 784 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
emh203 0:3d9c67d97d6f 785 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
emh203 0:3d9c67d97d6f 786 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 787
emh203 0:3d9c67d97d6f 788 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
emh203 0:3d9c67d97d6f 789 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
emh203 0:3d9c67d97d6f 790 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 791
emh203 0:3d9c67d97d6f 792 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
emh203 0:3d9c67d97d6f 793 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
emh203 0:3d9c67d97d6f 794 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 795
emh203 0:3d9c67d97d6f 796 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
emh203 0:3d9c67d97d6f 797 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
emh203 0:3d9c67d97d6f 798 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
emh203 0:3d9c67d97d6f 799 }
emh203 0:3d9c67d97d6f 800 }
emh203 0:3d9c67d97d6f 801 twidCoefModifier <<= 2u;
emh203 0:3d9c67d97d6f 802 }
emh203 0:3d9c67d97d6f 803
emh203 0:3d9c67d97d6f 804 /* End of Middle stages process */
emh203 0:3d9c67d97d6f 805
emh203 0:3d9c67d97d6f 806 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
emh203 0:3d9c67d97d6f 807 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
emh203 0:3d9c67d97d6f 808 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
emh203 0:3d9c67d97d6f 809 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
emh203 0:3d9c67d97d6f 810
emh203 0:3d9c67d97d6f 811
emh203 0:3d9c67d97d6f 812 /* Start of last stage process */
emh203 0:3d9c67d97d6f 813
emh203 0:3d9c67d97d6f 814
emh203 0:3d9c67d97d6f 815 /* Initializations for the last stage */
emh203 0:3d9c67d97d6f 816 j = fftLen >> 2;
emh203 0:3d9c67d97d6f 817 ptr1 = &pSrc[0];
emh203 0:3d9c67d97d6f 818
emh203 0:3d9c67d97d6f 819 /* Calculations of last stage */
emh203 0:3d9c67d97d6f 820 do
emh203 0:3d9c67d97d6f 821 {
emh203 0:3d9c67d97d6f 822 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 823 /* Read xa (real), ya(imag) input */
emh203 0:3d9c67d97d6f 824 xaya = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 825 xa = (q31_t) xaya;
emh203 0:3d9c67d97d6f 826 ya = (q31_t) (xaya >> 32);
emh203 0:3d9c67d97d6f 827
emh203 0:3d9c67d97d6f 828 /* Read xb (real), yb(imag) input */
emh203 0:3d9c67d97d6f 829 xbyb = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 830 xb = (q31_t) xbyb;
emh203 0:3d9c67d97d6f 831 yb = (q31_t) (xbyb >> 32);
emh203 0:3d9c67d97d6f 832
emh203 0:3d9c67d97d6f 833 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 834 xcyc = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 835 xc = (q31_t) xcyc;
emh203 0:3d9c67d97d6f 836 yc = (q31_t) (xcyc >> 32);
emh203 0:3d9c67d97d6f 837
emh203 0:3d9c67d97d6f 838 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 839 xdyd = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 840 xd = (q31_t) xdyd;
emh203 0:3d9c67d97d6f 841 yd = (q31_t) (xdyd >> 32);
emh203 0:3d9c67d97d6f 842
emh203 0:3d9c67d97d6f 843 #else
emh203 0:3d9c67d97d6f 844
emh203 0:3d9c67d97d6f 845 /* Read xa (real), ya(imag) input */
emh203 0:3d9c67d97d6f 846 xaya = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 847 ya = (q31_t) xaya;
emh203 0:3d9c67d97d6f 848 xa = (q31_t) (xaya >> 32);
emh203 0:3d9c67d97d6f 849
emh203 0:3d9c67d97d6f 850 /* Read xb (real), yb(imag) input */
emh203 0:3d9c67d97d6f 851 xbyb = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 852 yb = (q31_t) xbyb;
emh203 0:3d9c67d97d6f 853 xb = (q31_t) (xbyb >> 32);
emh203 0:3d9c67d97d6f 854
emh203 0:3d9c67d97d6f 855 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 856 xcyc = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 857 yc = (q31_t) xcyc;
emh203 0:3d9c67d97d6f 858 xc = (q31_t) (xcyc >> 32);
emh203 0:3d9c67d97d6f 859
emh203 0:3d9c67d97d6f 860 /* Read xc (real), yc(imag) input */
emh203 0:3d9c67d97d6f 861 xdyd = *__SIMD64(ptr1)++;
emh203 0:3d9c67d97d6f 862 yd = (q31_t) xdyd;
emh203 0:3d9c67d97d6f 863 xd = (q31_t) (xdyd >> 32);
emh203 0:3d9c67d97d6f 864
emh203 0:3d9c67d97d6f 865
emh203 0:3d9c67d97d6f 866 #endif
emh203 0:3d9c67d97d6f 867
emh203 0:3d9c67d97d6f 868 /* xa' = xa + xb + xc + xd */
emh203 0:3d9c67d97d6f 869 xa_out = xa + xb + xc + xd;
emh203 0:3d9c67d97d6f 870
emh203 0:3d9c67d97d6f 871 /* ya' = ya + yb + yc + yd */
emh203 0:3d9c67d97d6f 872 ya_out = ya + yb + yc + yd;
emh203 0:3d9c67d97d6f 873
emh203 0:3d9c67d97d6f 874 /* pointer updation for writing */
emh203 0:3d9c67d97d6f 875 ptr1 = ptr1 - 8u;
emh203 0:3d9c67d97d6f 876
emh203 0:3d9c67d97d6f 877 /* writing xa' and ya' */
emh203 0:3d9c67d97d6f 878 *ptr1++ = xa_out;
emh203 0:3d9c67d97d6f 879 *ptr1++ = ya_out;
emh203 0:3d9c67d97d6f 880
emh203 0:3d9c67d97d6f 881 xc_out = (xa - xb + xc - xd);
emh203 0:3d9c67d97d6f 882 yc_out = (ya - yb + yc - yd);
emh203 0:3d9c67d97d6f 883
emh203 0:3d9c67d97d6f 884 /* writing xc' and yc' */
emh203 0:3d9c67d97d6f 885 *ptr1++ = xc_out;
emh203 0:3d9c67d97d6f 886 *ptr1++ = yc_out;
emh203 0:3d9c67d97d6f 887
emh203 0:3d9c67d97d6f 888 xb_out = (xa - yb - xc + yd);
emh203 0:3d9c67d97d6f 889 yb_out = (ya + xb - yc - xd);
emh203 0:3d9c67d97d6f 890
emh203 0:3d9c67d97d6f 891 /* writing xb' and yb' */
emh203 0:3d9c67d97d6f 892 *ptr1++ = xb_out;
emh203 0:3d9c67d97d6f 893 *ptr1++ = yb_out;
emh203 0:3d9c67d97d6f 894
emh203 0:3d9c67d97d6f 895 xd_out = (xa + yb - xc - yd);
emh203 0:3d9c67d97d6f 896 yd_out = (ya - xb - yc + xd);
emh203 0:3d9c67d97d6f 897
emh203 0:3d9c67d97d6f 898 /* writing xd' and yd' */
emh203 0:3d9c67d97d6f 899 *ptr1++ = xd_out;
emh203 0:3d9c67d97d6f 900 *ptr1++ = yd_out;
emh203 0:3d9c67d97d6f 901
emh203 0:3d9c67d97d6f 902
emh203 0:3d9c67d97d6f 903 } while(--j);
emh203 0:3d9c67d97d6f 904
emh203 0:3d9c67d97d6f 905 /* output is in 11.21(q21) format for the 1024 point */
emh203 0:3d9c67d97d6f 906 /* output is in 9.23(q23) format for the 256 point */
emh203 0:3d9c67d97d6f 907 /* output is in 7.25(q25) format for the 64 point */
emh203 0:3d9c67d97d6f 908 /* output is in 5.27(q27) format for the 16 point */
emh203 0:3d9c67d97d6f 909
emh203 0:3d9c67d97d6f 910 /* End of last stage process */
emh203 0:3d9c67d97d6f 911 }