CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Committer:
emilmont
Date:
Thu May 30 17:10:11 2013 +0100
Revision:
2:da51fb522205
Parent:
1:fdd22bb7aa52
Child:
3:7a284390b0ce
Keep "cmsis-dsp" module in synch with its source

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emilmont 1:fdd22bb7aa52 1 /* ----------------------------------------------------------------------
emilmont 1:fdd22bb7aa52 2 * Copyright (C) 2010 ARM Limited. All rights reserved.
emilmont 1:fdd22bb7aa52 3 *
emilmont 1:fdd22bb7aa52 4 * $Date: 15. February 2012
emilmont 2:da51fb522205 5 * $Revision: V1.1.0
emilmont 1:fdd22bb7aa52 6 *
emilmont 2:da51fb522205 7 * Project: CMSIS DSP Library
emilmont 2:da51fb522205 8 * Title: arm_cfft_radix4_q31.c
emilmont 1:fdd22bb7aa52 9 *
emilmont 2:da51fb522205 10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
emilmont 2:da51fb522205 11 * In-place bit reversal using bit reversal table
emilmont 1:fdd22bb7aa52 12 *
emilmont 1:fdd22bb7aa52 13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emilmont 1:fdd22bb7aa52 14 *
emilmont 1:fdd22bb7aa52 15 * Version 1.1.0 2012/02/15
emilmont 1:fdd22bb7aa52 16 * Updated with more optimizations, bug fixes and minor API changes.
emilmont 1:fdd22bb7aa52 17 *
emilmont 1:fdd22bb7aa52 18 * Version 1.0.10 2011/7/15
emilmont 1:fdd22bb7aa52 19 * Big Endian support added and Merged M0 and M3/M4 Source code.
emilmont 1:fdd22bb7aa52 20 *
emilmont 1:fdd22bb7aa52 21 * Version 1.0.3 2010/11/29
emilmont 1:fdd22bb7aa52 22 * Re-organized the CMSIS folders and updated documentation.
emilmont 1:fdd22bb7aa52 23 *
emilmont 1:fdd22bb7aa52 24 * Version 1.0.2 2010/11/11
emilmont 1:fdd22bb7aa52 25 * Documentation updated.
emilmont 1:fdd22bb7aa52 26 *
emilmont 1:fdd22bb7aa52 27 * Version 1.0.1 2010/10/05
emilmont 1:fdd22bb7aa52 28 * Production release and review comments incorporated.
emilmont 1:fdd22bb7aa52 29 *
emilmont 1:fdd22bb7aa52 30 * Version 1.0.0 2010/09/20
emilmont 1:fdd22bb7aa52 31 * Production release and review comments incorporated.
emilmont 1:fdd22bb7aa52 32 *
emilmont 1:fdd22bb7aa52 33 * Version 0.0.5 2010/04/26
emilmont 2:da51fb522205 34 * incorporated review comments and updated with latest CMSIS layer
emilmont 1:fdd22bb7aa52 35 *
emilmont 1:fdd22bb7aa52 36 * Version 0.0.3 2010/03/10
emilmont 1:fdd22bb7aa52 37 * Initial version
emilmont 1:fdd22bb7aa52 38 * -------------------------------------------------------------------- */
emilmont 1:fdd22bb7aa52 39 #include "arm_math.h"
emilmont 1:fdd22bb7aa52 40
emilmont 1:fdd22bb7aa52 41
emilmont 1:fdd22bb7aa52 42 /**
emilmont 1:fdd22bb7aa52 43 * @ingroup groupTransforms
emilmont 1:fdd22bb7aa52 44 */
emilmont 1:fdd22bb7aa52 45
emilmont 1:fdd22bb7aa52 46 /**
emilmont 1:fdd22bb7aa52 47 * @addtogroup Radix4_CFFT_CIFFT
emilmont 1:fdd22bb7aa52 48 * @{
emilmont 1:fdd22bb7aa52 49 */
emilmont 1:fdd22bb7aa52 50
emilmont 1:fdd22bb7aa52 51 /**
emilmont 1:fdd22bb7aa52 52 * @details
emilmont 1:fdd22bb7aa52 53 * @brief Processing function for the Q31 CFFT/CIFFT.
emilmont 1:fdd22bb7aa52 54 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
emilmont 1:fdd22bb7aa52 55 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
emilmont 1:fdd22bb7aa52 56 * @return none.
emilmont 1:fdd22bb7aa52 57 *
emilmont 1:fdd22bb7aa52 58 * \par Input and output formats:
emilmont 1:fdd22bb7aa52 59 * \par
emilmont 1:fdd22bb7aa52 60 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
emilmont 1:fdd22bb7aa52 61 * Hence the output format is different for different FFT sizes.
emilmont 1:fdd22bb7aa52 62 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
emilmont 1:fdd22bb7aa52 63 * \par
emilmont 1:fdd22bb7aa52 64 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
emilmont 1:fdd22bb7aa52 65 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
emilmont 1:fdd22bb7aa52 66 *
emilmont 1:fdd22bb7aa52 67 */
emilmont 1:fdd22bb7aa52 68
emilmont 1:fdd22bb7aa52 69 void arm_cfft_radix4_q31(
emilmont 1:fdd22bb7aa52 70 const arm_cfft_radix4_instance_q31 * S,
emilmont 1:fdd22bb7aa52 71 q31_t * pSrc)
emilmont 1:fdd22bb7aa52 72 {
emilmont 1:fdd22bb7aa52 73 if(S->ifftFlag == 1u)
emilmont 1:fdd22bb7aa52 74 {
emilmont 1:fdd22bb7aa52 75 /* Complex IFFT radix-4 */
emilmont 1:fdd22bb7aa52 76 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
emilmont 1:fdd22bb7aa52 77 S->twidCoefModifier);
emilmont 1:fdd22bb7aa52 78 }
emilmont 1:fdd22bb7aa52 79 else
emilmont 1:fdd22bb7aa52 80 {
emilmont 1:fdd22bb7aa52 81 /* Complex FFT radix-4 */
emilmont 1:fdd22bb7aa52 82 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
emilmont 1:fdd22bb7aa52 83 S->twidCoefModifier);
emilmont 1:fdd22bb7aa52 84 }
emilmont 1:fdd22bb7aa52 85
emilmont 1:fdd22bb7aa52 86
emilmont 1:fdd22bb7aa52 87 if(S->bitReverseFlag == 1u)
emilmont 1:fdd22bb7aa52 88 {
emilmont 1:fdd22bb7aa52 89 /* Bit Reversal */
emilmont 1:fdd22bb7aa52 90 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
emilmont 1:fdd22bb7aa52 91 }
emilmont 1:fdd22bb7aa52 92
emilmont 1:fdd22bb7aa52 93 }
emilmont 1:fdd22bb7aa52 94
emilmont 1:fdd22bb7aa52 95 /**
emilmont 1:fdd22bb7aa52 96 * @} end of Radix4_CFFT_CIFFT group
emilmont 1:fdd22bb7aa52 97 */
emilmont 1:fdd22bb7aa52 98
emilmont 1:fdd22bb7aa52 99 /*
emilmont 1:fdd22bb7aa52 100 * Radix-4 FFT algorithm used is :
emilmont 1:fdd22bb7aa52 101 *
emilmont 1:fdd22bb7aa52 102 * Input real and imaginary data:
emilmont 1:fdd22bb7aa52 103 * x(n) = xa + j * ya
emilmont 1:fdd22bb7aa52 104 * x(n+N/4 ) = xb + j * yb
emilmont 1:fdd22bb7aa52 105 * x(n+N/2 ) = xc + j * yc
emilmont 1:fdd22bb7aa52 106 * x(n+3N 4) = xd + j * yd
emilmont 1:fdd22bb7aa52 107 *
emilmont 1:fdd22bb7aa52 108 *
emilmont 1:fdd22bb7aa52 109 * Output real and imaginary data:
emilmont 1:fdd22bb7aa52 110 * x(4r) = xa'+ j * ya'
emilmont 1:fdd22bb7aa52 111 * x(4r+1) = xb'+ j * yb'
emilmont 1:fdd22bb7aa52 112 * x(4r+2) = xc'+ j * yc'
emilmont 1:fdd22bb7aa52 113 * x(4r+3) = xd'+ j * yd'
emilmont 1:fdd22bb7aa52 114 *
emilmont 1:fdd22bb7aa52 115 *
emilmont 1:fdd22bb7aa52 116 * Twiddle factors for radix-4 FFT:
emilmont 1:fdd22bb7aa52 117 * Wn = co1 + j * (- si1)
emilmont 1:fdd22bb7aa52 118 * W2n = co2 + j * (- si2)
emilmont 1:fdd22bb7aa52 119 * W3n = co3 + j * (- si3)
emilmont 1:fdd22bb7aa52 120 *
emilmont 1:fdd22bb7aa52 121 * Butterfly implementation:
emilmont 1:fdd22bb7aa52 122 * xa' = xa + xb + xc + xd
emilmont 1:fdd22bb7aa52 123 * ya' = ya + yb + yc + yd
emilmont 1:fdd22bb7aa52 124 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
emilmont 1:fdd22bb7aa52 125 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
emilmont 1:fdd22bb7aa52 126 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
emilmont 1:fdd22bb7aa52 127 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
emilmont 1:fdd22bb7aa52 128 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
emilmont 1:fdd22bb7aa52 129 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
emilmont 1:fdd22bb7aa52 130 *
emilmont 1:fdd22bb7aa52 131 */
emilmont 1:fdd22bb7aa52 132
emilmont 1:fdd22bb7aa52 133 /**
emilmont 1:fdd22bb7aa52 134 * @brief Core function for the Q31 CFFT butterfly process.
emilmont 1:fdd22bb7aa52 135 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
emilmont 1:fdd22bb7aa52 136 * @param[in] fftLen length of the FFT.
emilmont 1:fdd22bb7aa52 137 * @param[in] *pCoef points to twiddle coefficient buffer.
emilmont 1:fdd22bb7aa52 138 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emilmont 1:fdd22bb7aa52 139 * @return none.
emilmont 1:fdd22bb7aa52 140 */
emilmont 1:fdd22bb7aa52 141
emilmont 1:fdd22bb7aa52 142 void arm_radix4_butterfly_q31(
emilmont 1:fdd22bb7aa52 143 q31_t * pSrc,
emilmont 1:fdd22bb7aa52 144 uint32_t fftLen,
emilmont 1:fdd22bb7aa52 145 q31_t * pCoef,
emilmont 1:fdd22bb7aa52 146 uint32_t twidCoefModifier)
emilmont 1:fdd22bb7aa52 147 {
emilmont 1:fdd22bb7aa52 148 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 149 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
emilmont 1:fdd22bb7aa52 150
emilmont 1:fdd22bb7aa52 151 q31_t xa, xb, xc, xd;
emilmont 1:fdd22bb7aa52 152 q31_t ya, yb, yc, yd;
emilmont 1:fdd22bb7aa52 153 q31_t xa_out, xb_out, xc_out, xd_out;
emilmont 1:fdd22bb7aa52 154 q31_t ya_out, yb_out, yc_out, yd_out;
emilmont 1:fdd22bb7aa52 155
emilmont 1:fdd22bb7aa52 156 q31_t *ptr1;
emilmont 1:fdd22bb7aa52 157 q63_t xaya, xbyb, xcyc, xdyd;
emilmont 1:fdd22bb7aa52 158 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 159
emilmont 1:fdd22bb7aa52 160 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 161
emilmont 1:fdd22bb7aa52 162
emilmont 1:fdd22bb7aa52 163 /* start of first stage process */
emilmont 1:fdd22bb7aa52 164
emilmont 1:fdd22bb7aa52 165 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 166 n2 = fftLen;
emilmont 1:fdd22bb7aa52 167 n1 = n2;
emilmont 1:fdd22bb7aa52 168 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 169 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 170 i0 = 0u;
emilmont 1:fdd22bb7aa52 171 ia1 = 0u;
emilmont 1:fdd22bb7aa52 172
emilmont 1:fdd22bb7aa52 173 j = n2;
emilmont 1:fdd22bb7aa52 174
emilmont 1:fdd22bb7aa52 175 /* Calculation of first stage */
emilmont 1:fdd22bb7aa52 176 do
emilmont 1:fdd22bb7aa52 177 {
emilmont 1:fdd22bb7aa52 178 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 179 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 180 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 181 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 182 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 183
emilmont 1:fdd22bb7aa52 184 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
emilmont 1:fdd22bb7aa52 185
emilmont 1:fdd22bb7aa52 186 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 187 /* xa + xc */
emilmont 1:fdd22bb7aa52 188 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
emilmont 1:fdd22bb7aa52 189 /* xa - xc */
emilmont 1:fdd22bb7aa52 190 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
emilmont 1:fdd22bb7aa52 191
emilmont 1:fdd22bb7aa52 192 /* xb + xd */
emilmont 1:fdd22bb7aa52 193 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 194
emilmont 1:fdd22bb7aa52 195 /* ya + yc */
emilmont 1:fdd22bb7aa52 196 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 197 /* ya - yc */
emilmont 1:fdd22bb7aa52 198 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 199
emilmont 1:fdd22bb7aa52 200 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 201 pSrc[2u * i0] = (r1 + t1);
emilmont 1:fdd22bb7aa52 202 /* (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 203 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 204 /* yb + yd */
emilmont 1:fdd22bb7aa52 205 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 206
emilmont 1:fdd22bb7aa52 207 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 208 pSrc[(2u * i0) + 1u] = (s1 + t2);
emilmont 1:fdd22bb7aa52 209
emilmont 1:fdd22bb7aa52 210 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 211 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 212
emilmont 1:fdd22bb7aa52 213 /* yb - yd */
emilmont 1:fdd22bb7aa52 214 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 215 /* xb - xd */
emilmont 1:fdd22bb7aa52 216 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 217
emilmont 1:fdd22bb7aa52 218 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 219 ia2 = 2u * ia1;
emilmont 1:fdd22bb7aa52 220 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 221 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 222
emilmont 1:fdd22bb7aa52 223 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 224 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
emilmont 1:fdd22bb7aa52 225 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 226
emilmont 1:fdd22bb7aa52 227 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 228 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
emilmont 1:fdd22bb7aa52 229 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 230
emilmont 1:fdd22bb7aa52 231 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 232 r1 = r2 + t1;
emilmont 1:fdd22bb7aa52 233 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 234 r2 = r2 - t1;
emilmont 1:fdd22bb7aa52 235
emilmont 1:fdd22bb7aa52 236 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 237 s1 = s2 - t2;
emilmont 1:fdd22bb7aa52 238 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 239 s2 = s2 + t2;
emilmont 1:fdd22bb7aa52 240
emilmont 1:fdd22bb7aa52 241 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 242 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 243
emilmont 1:fdd22bb7aa52 244 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 245 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 246 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 247
emilmont 1:fdd22bb7aa52 248 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 249 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 250 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 251
emilmont 1:fdd22bb7aa52 252 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 253 ia3 = 3u * ia1;
emilmont 1:fdd22bb7aa52 254 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 255 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 256
emilmont 1:fdd22bb7aa52 257 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 258 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 259 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 260
emilmont 1:fdd22bb7aa52 261 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 262 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 263 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 264
emilmont 1:fdd22bb7aa52 265 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 266 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 267
emilmont 1:fdd22bb7aa52 268 /* Updating input index */
emilmont 1:fdd22bb7aa52 269 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 270
emilmont 1:fdd22bb7aa52 271 } while(--j);
emilmont 1:fdd22bb7aa52 272
emilmont 1:fdd22bb7aa52 273 /* end of first stage process */
emilmont 1:fdd22bb7aa52 274
emilmont 1:fdd22bb7aa52 275 /* data is in 5.27(q27) format */
emilmont 1:fdd22bb7aa52 276
emilmont 1:fdd22bb7aa52 277
emilmont 1:fdd22bb7aa52 278 /* start of Middle stages process */
emilmont 1:fdd22bb7aa52 279
emilmont 1:fdd22bb7aa52 280
emilmont 1:fdd22bb7aa52 281 /* each stage in middle stages provides two down scaling of the input */
emilmont 1:fdd22bb7aa52 282
emilmont 1:fdd22bb7aa52 283 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 284
emilmont 1:fdd22bb7aa52 285
emilmont 1:fdd22bb7aa52 286 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 287 {
emilmont 1:fdd22bb7aa52 288 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 289 n1 = n2;
emilmont 1:fdd22bb7aa52 290 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 291 ia1 = 0u;
emilmont 1:fdd22bb7aa52 292
emilmont 1:fdd22bb7aa52 293 /* Calculation of first stage */
emilmont 1:fdd22bb7aa52 294 for (j = 0u; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 295 {
emilmont 1:fdd22bb7aa52 296 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 297 ia2 = ia1 + ia1;
emilmont 1:fdd22bb7aa52 298 ia3 = ia2 + ia1;
emilmont 1:fdd22bb7aa52 299 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 300 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 301 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 302 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 303 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 304 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 305 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 306 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 307
emilmont 1:fdd22bb7aa52 308 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 309 {
emilmont 1:fdd22bb7aa52 310 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 311 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 312 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 313 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 314 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 315
emilmont 1:fdd22bb7aa52 316 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 317 /* xa + xc */
emilmont 1:fdd22bb7aa52 318 r1 = pSrc[2u * i0] + pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 319 /* xa - xc */
emilmont 1:fdd22bb7aa52 320 r2 = pSrc[2u * i0] - pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 321
emilmont 1:fdd22bb7aa52 322 /* ya + yc */
emilmont 1:fdd22bb7aa52 323 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 324 /* ya - yc */
emilmont 1:fdd22bb7aa52 325 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 326
emilmont 1:fdd22bb7aa52 327 /* xb + xd */
emilmont 1:fdd22bb7aa52 328 t1 = pSrc[2u * i1] + pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 329
emilmont 1:fdd22bb7aa52 330 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 331 pSrc[2u * i0] = (r1 + t1) >> 2u;
emilmont 1:fdd22bb7aa52 332 /* xa + xc -(xb + xd) */
emilmont 1:fdd22bb7aa52 333 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 334
emilmont 1:fdd22bb7aa52 335 /* yb + yd */
emilmont 1:fdd22bb7aa52 336 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 337 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 338 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
emilmont 1:fdd22bb7aa52 339
emilmont 1:fdd22bb7aa52 340 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 341 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 342
emilmont 1:fdd22bb7aa52 343 /* (yb - yd) */
emilmont 1:fdd22bb7aa52 344 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 345 /* (xb - xd) */
emilmont 1:fdd22bb7aa52 346 t2 = pSrc[2u * i1] - pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 347
emilmont 1:fdd22bb7aa52 348 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 349 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
emilmont 1:fdd22bb7aa52 350 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 351
emilmont 1:fdd22bb7aa52 352 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 353 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
emilmont 1:fdd22bb7aa52 354 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 355
emilmont 1:fdd22bb7aa52 356 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 357 r1 = r2 + t1;
emilmont 1:fdd22bb7aa52 358 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 359 r2 = r2 - t1;
emilmont 1:fdd22bb7aa52 360
emilmont 1:fdd22bb7aa52 361 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 362 s1 = s2 - t2;
emilmont 1:fdd22bb7aa52 363 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 364 s2 = s2 + t2;
emilmont 1:fdd22bb7aa52 365
emilmont 1:fdd22bb7aa52 366 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 367 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 368 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 369
emilmont 1:fdd22bb7aa52 370 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 371 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 372 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 373
emilmont 1:fdd22bb7aa52 374 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 375 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 376 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 377
emilmont 1:fdd22bb7aa52 378 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 379 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 380 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 381 }
emilmont 1:fdd22bb7aa52 382 }
emilmont 1:fdd22bb7aa52 383 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 384 }
emilmont 1:fdd22bb7aa52 385
emilmont 1:fdd22bb7aa52 386 /* End of Middle stages process */
emilmont 1:fdd22bb7aa52 387
emilmont 1:fdd22bb7aa52 388 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
emilmont 1:fdd22bb7aa52 389 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
emilmont 1:fdd22bb7aa52 390 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
emilmont 1:fdd22bb7aa52 391 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
emilmont 1:fdd22bb7aa52 392
emilmont 1:fdd22bb7aa52 393
emilmont 1:fdd22bb7aa52 394 /* start of Last stage process */
emilmont 1:fdd22bb7aa52 395 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 396 j = fftLen >> 2;
emilmont 1:fdd22bb7aa52 397 ptr1 = &pSrc[0];
emilmont 1:fdd22bb7aa52 398
emilmont 1:fdd22bb7aa52 399 /* Calculations of last stage */
emilmont 1:fdd22bb7aa52 400 do
emilmont 1:fdd22bb7aa52 401 {
emilmont 1:fdd22bb7aa52 402
emilmont 1:fdd22bb7aa52 403 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 404
emilmont 1:fdd22bb7aa52 405 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 406 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 407 xa = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 408 ya = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 409
emilmont 1:fdd22bb7aa52 410 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 411 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 412 xb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 413 yb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 414
emilmont 1:fdd22bb7aa52 415 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 416 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 417 xc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 418 yc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 419
emilmont 1:fdd22bb7aa52 420 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 421 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 422 xd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 423 yd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 424
emilmont 1:fdd22bb7aa52 425 #else
emilmont 1:fdd22bb7aa52 426
emilmont 1:fdd22bb7aa52 427 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 428 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 429 ya = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 430 xa = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 431
emilmont 1:fdd22bb7aa52 432 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 433 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 434 yb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 435 xb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 436
emilmont 1:fdd22bb7aa52 437 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 438 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 439 yc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 440 xc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 441
emilmont 1:fdd22bb7aa52 442 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 443 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 444 yd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 445 xd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 446
emilmont 1:fdd22bb7aa52 447
emilmont 1:fdd22bb7aa52 448 #endif
emilmont 1:fdd22bb7aa52 449
emilmont 1:fdd22bb7aa52 450 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 451 xa_out = xa + xb + xc + xd;
emilmont 1:fdd22bb7aa52 452
emilmont 1:fdd22bb7aa52 453 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 454 ya_out = ya + yb + yc + yd;
emilmont 1:fdd22bb7aa52 455
emilmont 1:fdd22bb7aa52 456 /* pointer updation for writing */
emilmont 1:fdd22bb7aa52 457 ptr1 = ptr1 - 8u;
emilmont 1:fdd22bb7aa52 458
emilmont 1:fdd22bb7aa52 459 /* writing xa' and ya' */
emilmont 1:fdd22bb7aa52 460 *ptr1++ = xa_out;
emilmont 1:fdd22bb7aa52 461 *ptr1++ = ya_out;
emilmont 1:fdd22bb7aa52 462
emilmont 1:fdd22bb7aa52 463 xc_out = (xa - xb + xc - xd);
emilmont 1:fdd22bb7aa52 464 yc_out = (ya - yb + yc - yd);
emilmont 1:fdd22bb7aa52 465
emilmont 1:fdd22bb7aa52 466 /* writing xc' and yc' */
emilmont 1:fdd22bb7aa52 467 *ptr1++ = xc_out;
emilmont 1:fdd22bb7aa52 468 *ptr1++ = yc_out;
emilmont 1:fdd22bb7aa52 469
emilmont 1:fdd22bb7aa52 470 xb_out = (xa + yb - xc - yd);
emilmont 1:fdd22bb7aa52 471 yb_out = (ya - xb - yc + xd);
emilmont 1:fdd22bb7aa52 472
emilmont 1:fdd22bb7aa52 473 /* writing xb' and yb' */
emilmont 1:fdd22bb7aa52 474 *ptr1++ = xb_out;
emilmont 1:fdd22bb7aa52 475 *ptr1++ = yb_out;
emilmont 1:fdd22bb7aa52 476
emilmont 1:fdd22bb7aa52 477 xd_out = (xa - yb - xc + yd);
emilmont 1:fdd22bb7aa52 478 yd_out = (ya + xb - yc - xd);
emilmont 1:fdd22bb7aa52 479
emilmont 1:fdd22bb7aa52 480 /* writing xd' and yd' */
emilmont 1:fdd22bb7aa52 481 *ptr1++ = xd_out;
emilmont 1:fdd22bb7aa52 482 *ptr1++ = yd_out;
emilmont 1:fdd22bb7aa52 483
emilmont 1:fdd22bb7aa52 484
emilmont 1:fdd22bb7aa52 485 } while(--j);
emilmont 1:fdd22bb7aa52 486
emilmont 1:fdd22bb7aa52 487 /* output is in 11.21(q21) format for the 1024 point */
emilmont 1:fdd22bb7aa52 488 /* output is in 9.23(q23) format for the 256 point */
emilmont 1:fdd22bb7aa52 489 /* output is in 7.25(q25) format for the 64 point */
emilmont 1:fdd22bb7aa52 490 /* output is in 5.27(q27) format for the 16 point */
emilmont 1:fdd22bb7aa52 491
emilmont 1:fdd22bb7aa52 492 /* End of last stage process */
emilmont 1:fdd22bb7aa52 493
emilmont 1:fdd22bb7aa52 494 }
emilmont 1:fdd22bb7aa52 495
emilmont 1:fdd22bb7aa52 496
emilmont 1:fdd22bb7aa52 497 /**
emilmont 1:fdd22bb7aa52 498 * @brief Core function for the Q31 CIFFT butterfly process.
emilmont 1:fdd22bb7aa52 499 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
emilmont 1:fdd22bb7aa52 500 * @param[in] fftLen length of the FFT.
emilmont 1:fdd22bb7aa52 501 * @param[in] *pCoef points to twiddle coefficient buffer.
emilmont 1:fdd22bb7aa52 502 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emilmont 1:fdd22bb7aa52 503 * @return none.
emilmont 1:fdd22bb7aa52 504 */
emilmont 1:fdd22bb7aa52 505
emilmont 1:fdd22bb7aa52 506
emilmont 1:fdd22bb7aa52 507 /*
emilmont 1:fdd22bb7aa52 508 * Radix-4 IFFT algorithm used is :
emilmont 1:fdd22bb7aa52 509 *
emilmont 1:fdd22bb7aa52 510 * CIFFT uses same twiddle coefficients as CFFT Function
emilmont 1:fdd22bb7aa52 511 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
emilmont 1:fdd22bb7aa52 512 *
emilmont 1:fdd22bb7aa52 513 *
emilmont 1:fdd22bb7aa52 514 * IFFT is implemented with following changes in equations from FFT
emilmont 1:fdd22bb7aa52 515 *
emilmont 1:fdd22bb7aa52 516 * Input real and imaginary data:
emilmont 1:fdd22bb7aa52 517 * x(n) = xa + j * ya
emilmont 1:fdd22bb7aa52 518 * x(n+N/4 ) = xb + j * yb
emilmont 1:fdd22bb7aa52 519 * x(n+N/2 ) = xc + j * yc
emilmont 1:fdd22bb7aa52 520 * x(n+3N 4) = xd + j * yd
emilmont 1:fdd22bb7aa52 521 *
emilmont 1:fdd22bb7aa52 522 *
emilmont 1:fdd22bb7aa52 523 * Output real and imaginary data:
emilmont 1:fdd22bb7aa52 524 * x(4r) = xa'+ j * ya'
emilmont 1:fdd22bb7aa52 525 * x(4r+1) = xb'+ j * yb'
emilmont 1:fdd22bb7aa52 526 * x(4r+2) = xc'+ j * yc'
emilmont 1:fdd22bb7aa52 527 * x(4r+3) = xd'+ j * yd'
emilmont 1:fdd22bb7aa52 528 *
emilmont 1:fdd22bb7aa52 529 *
emilmont 1:fdd22bb7aa52 530 * Twiddle factors for radix-4 IFFT:
emilmont 1:fdd22bb7aa52 531 * Wn = co1 + j * (si1)
emilmont 1:fdd22bb7aa52 532 * W2n = co2 + j * (si2)
emilmont 1:fdd22bb7aa52 533 * W3n = co3 + j * (si3)
emilmont 1:fdd22bb7aa52 534
emilmont 1:fdd22bb7aa52 535 * The real and imaginary output values for the radix-4 butterfly are
emilmont 1:fdd22bb7aa52 536 * xa' = xa + xb + xc + xd
emilmont 1:fdd22bb7aa52 537 * ya' = ya + yb + yc + yd
emilmont 1:fdd22bb7aa52 538 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
emilmont 1:fdd22bb7aa52 539 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
emilmont 1:fdd22bb7aa52 540 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
emilmont 1:fdd22bb7aa52 541 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
emilmont 1:fdd22bb7aa52 542 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
emilmont 1:fdd22bb7aa52 543 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
emilmont 1:fdd22bb7aa52 544 *
emilmont 1:fdd22bb7aa52 545 */
emilmont 1:fdd22bb7aa52 546
emilmont 1:fdd22bb7aa52 547 void arm_radix4_butterfly_inverse_q31(
emilmont 1:fdd22bb7aa52 548 q31_t * pSrc,
emilmont 1:fdd22bb7aa52 549 uint32_t fftLen,
emilmont 1:fdd22bb7aa52 550 q31_t * pCoef,
emilmont 1:fdd22bb7aa52 551 uint32_t twidCoefModifier)
emilmont 1:fdd22bb7aa52 552 {
emilmont 1:fdd22bb7aa52 553 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 554 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
emilmont 1:fdd22bb7aa52 555 q31_t xa, xb, xc, xd;
emilmont 1:fdd22bb7aa52 556 q31_t ya, yb, yc, yd;
emilmont 1:fdd22bb7aa52 557 q31_t xa_out, xb_out, xc_out, xd_out;
emilmont 1:fdd22bb7aa52 558 q31_t ya_out, yb_out, yc_out, yd_out;
emilmont 1:fdd22bb7aa52 559
emilmont 1:fdd22bb7aa52 560 q31_t *ptr1;
emilmont 1:fdd22bb7aa52 561 q63_t xaya, xbyb, xcyc, xdyd;
emilmont 1:fdd22bb7aa52 562
emilmont 1:fdd22bb7aa52 563 /* input is be 1.31(q31) format for all FFT sizes */
emilmont 1:fdd22bb7aa52 564 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 565 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 566
emilmont 1:fdd22bb7aa52 567 /* Start of first stage process */
emilmont 1:fdd22bb7aa52 568
emilmont 1:fdd22bb7aa52 569 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 570 n2 = fftLen;
emilmont 1:fdd22bb7aa52 571 n1 = n2;
emilmont 1:fdd22bb7aa52 572 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 573 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 574 i0 = 0u;
emilmont 1:fdd22bb7aa52 575 ia1 = 0u;
emilmont 1:fdd22bb7aa52 576
emilmont 1:fdd22bb7aa52 577 j = n2;
emilmont 1:fdd22bb7aa52 578
emilmont 1:fdd22bb7aa52 579 do
emilmont 1:fdd22bb7aa52 580 {
emilmont 1:fdd22bb7aa52 581
emilmont 1:fdd22bb7aa52 582 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
emilmont 1:fdd22bb7aa52 583
emilmont 1:fdd22bb7aa52 584 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 585 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 586 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 587 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 588 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 589
emilmont 1:fdd22bb7aa52 590 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 591 /* xa + xc */
emilmont 1:fdd22bb7aa52 592 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
emilmont 1:fdd22bb7aa52 593 /* xa - xc */
emilmont 1:fdd22bb7aa52 594 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
emilmont 1:fdd22bb7aa52 595
emilmont 1:fdd22bb7aa52 596 /* xb + xd */
emilmont 1:fdd22bb7aa52 597 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 598
emilmont 1:fdd22bb7aa52 599 /* ya + yc */
emilmont 1:fdd22bb7aa52 600 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 601 /* ya - yc */
emilmont 1:fdd22bb7aa52 602 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 603
emilmont 1:fdd22bb7aa52 604 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 605 pSrc[2u * i0] = (r1 + t1);
emilmont 1:fdd22bb7aa52 606 /* (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 607 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 608 /* yb + yd */
emilmont 1:fdd22bb7aa52 609 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 610 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 611 pSrc[(2u * i0) + 1u] = (s1 + t2);
emilmont 1:fdd22bb7aa52 612
emilmont 1:fdd22bb7aa52 613 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 614 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 615
emilmont 1:fdd22bb7aa52 616 /* yb - yd */
emilmont 1:fdd22bb7aa52 617 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 618 /* xb - xd */
emilmont 1:fdd22bb7aa52 619 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 620
emilmont 1:fdd22bb7aa52 621 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 622 ia2 = 2u * ia1;
emilmont 1:fdd22bb7aa52 623 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 624 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 625
emilmont 1:fdd22bb7aa52 626 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 627 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
emilmont 1:fdd22bb7aa52 628 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 629
emilmont 1:fdd22bb7aa52 630 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 631 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
emilmont 1:fdd22bb7aa52 632 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 633
emilmont 1:fdd22bb7aa52 634 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 635 r1 = r2 - t1;
emilmont 1:fdd22bb7aa52 636 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 637 r2 = r2 + t1;
emilmont 1:fdd22bb7aa52 638
emilmont 1:fdd22bb7aa52 639 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 640 s1 = s2 + t2;
emilmont 1:fdd22bb7aa52 641 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 642 s2 = s2 - t2;
emilmont 1:fdd22bb7aa52 643
emilmont 1:fdd22bb7aa52 644 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 645 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 646
emilmont 1:fdd22bb7aa52 647 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 648 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 649 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 650
emilmont 1:fdd22bb7aa52 651 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 652 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 653 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 654
emilmont 1:fdd22bb7aa52 655 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 656 ia3 = 3u * ia1;
emilmont 1:fdd22bb7aa52 657 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 658 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 659
emilmont 1:fdd22bb7aa52 660 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 661 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 662 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 663
emilmont 1:fdd22bb7aa52 664 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 665 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 666 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 667
emilmont 1:fdd22bb7aa52 668 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 669 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 670
emilmont 1:fdd22bb7aa52 671 /* Updating input index */
emilmont 1:fdd22bb7aa52 672 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 673
emilmont 1:fdd22bb7aa52 674 } while(--j);
emilmont 1:fdd22bb7aa52 675
emilmont 1:fdd22bb7aa52 676 /* data is in 5.27(q27) format */
emilmont 1:fdd22bb7aa52 677 /* each stage provides two down scaling of the input */
emilmont 1:fdd22bb7aa52 678
emilmont 1:fdd22bb7aa52 679
emilmont 1:fdd22bb7aa52 680 /* Start of Middle stages process */
emilmont 1:fdd22bb7aa52 681
emilmont 1:fdd22bb7aa52 682 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 683
emilmont 1:fdd22bb7aa52 684 /* Calculation of second stage to excluding last stage */
emilmont 1:fdd22bb7aa52 685 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 686 {
emilmont 1:fdd22bb7aa52 687 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 688 n1 = n2;
emilmont 1:fdd22bb7aa52 689 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 690 ia1 = 0u;
emilmont 1:fdd22bb7aa52 691
emilmont 1:fdd22bb7aa52 692 for (j = 0; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 693 {
emilmont 1:fdd22bb7aa52 694 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 695 ia2 = ia1 + ia1;
emilmont 1:fdd22bb7aa52 696 ia3 = ia2 + ia1;
emilmont 1:fdd22bb7aa52 697 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 698 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 699 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 700 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 701 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 702 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 703 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 704 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 705
emilmont 1:fdd22bb7aa52 706 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 707 {
emilmont 1:fdd22bb7aa52 708 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 709 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 710 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 711 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 712 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 713
emilmont 1:fdd22bb7aa52 714 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 715 /* xa + xc */
emilmont 1:fdd22bb7aa52 716 r1 = pSrc[2u * i0] + pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 717 /* xa - xc */
emilmont 1:fdd22bb7aa52 718 r2 = pSrc[2u * i0] - pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 719
emilmont 1:fdd22bb7aa52 720 /* ya + yc */
emilmont 1:fdd22bb7aa52 721 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 722 /* ya - yc */
emilmont 1:fdd22bb7aa52 723 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 724
emilmont 1:fdd22bb7aa52 725 /* xb + xd */
emilmont 1:fdd22bb7aa52 726 t1 = pSrc[2u * i1] + pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 727
emilmont 1:fdd22bb7aa52 728 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 729 pSrc[2u * i0] = (r1 + t1) >> 2u;
emilmont 1:fdd22bb7aa52 730 /* xa + xc -(xb + xd) */
emilmont 1:fdd22bb7aa52 731 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 732 /* yb + yd */
emilmont 1:fdd22bb7aa52 733 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 734 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 735 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
emilmont 1:fdd22bb7aa52 736
emilmont 1:fdd22bb7aa52 737 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 738 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 739
emilmont 1:fdd22bb7aa52 740 /* (yb - yd) */
emilmont 1:fdd22bb7aa52 741 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 742 /* (xb - xd) */
emilmont 1:fdd22bb7aa52 743 t2 = pSrc[2u * i1] - pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 744
emilmont 1:fdd22bb7aa52 745 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 746 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
emilmont 1:fdd22bb7aa52 747 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
emilmont 1:fdd22bb7aa52 748
emilmont 1:fdd22bb7aa52 749 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 750 pSrc[(2u * i1) + 1u] =
emilmont 1:fdd22bb7aa52 751 (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
emilmont 1:fdd22bb7aa52 752 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
emilmont 1:fdd22bb7aa52 753
emilmont 1:fdd22bb7aa52 754 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 755 r1 = r2 - t1;
emilmont 1:fdd22bb7aa52 756 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 757 r2 = r2 + t1;
emilmont 1:fdd22bb7aa52 758
emilmont 1:fdd22bb7aa52 759 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 760 s1 = s2 + t2;
emilmont 1:fdd22bb7aa52 761 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 762 s2 = s2 - t2;
emilmont 1:fdd22bb7aa52 763
emilmont 1:fdd22bb7aa52 764 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 765 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 766 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 767
emilmont 1:fdd22bb7aa52 768 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 769 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 770 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 771
emilmont 1:fdd22bb7aa52 772 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 773 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 774 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 775
emilmont 1:fdd22bb7aa52 776 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 777 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 778 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 779 }
emilmont 1:fdd22bb7aa52 780 }
emilmont 1:fdd22bb7aa52 781 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 782 }
emilmont 1:fdd22bb7aa52 783
emilmont 1:fdd22bb7aa52 784 /* End of Middle stages process */
emilmont 1:fdd22bb7aa52 785
emilmont 1:fdd22bb7aa52 786 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
emilmont 1:fdd22bb7aa52 787 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
emilmont 1:fdd22bb7aa52 788 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
emilmont 1:fdd22bb7aa52 789 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
emilmont 1:fdd22bb7aa52 790
emilmont 1:fdd22bb7aa52 791
emilmont 1:fdd22bb7aa52 792 /* Start of last stage process */
emilmont 1:fdd22bb7aa52 793
emilmont 1:fdd22bb7aa52 794
emilmont 1:fdd22bb7aa52 795 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 796 j = fftLen >> 2;
emilmont 1:fdd22bb7aa52 797 ptr1 = &pSrc[0];
emilmont 1:fdd22bb7aa52 798
emilmont 1:fdd22bb7aa52 799 /* Calculations of last stage */
emilmont 1:fdd22bb7aa52 800 do
emilmont 1:fdd22bb7aa52 801 {
emilmont 1:fdd22bb7aa52 802 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 803 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 804 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 805 xa = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 806 ya = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 807
emilmont 1:fdd22bb7aa52 808 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 809 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 810 xb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 811 yb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 812
emilmont 1:fdd22bb7aa52 813 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 814 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 815 xc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 816 yc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 817
emilmont 1:fdd22bb7aa52 818 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 819 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 820 xd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 821 yd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 822
emilmont 1:fdd22bb7aa52 823 #else
emilmont 1:fdd22bb7aa52 824
emilmont 1:fdd22bb7aa52 825 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 826 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 827 ya = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 828 xa = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 829
emilmont 1:fdd22bb7aa52 830 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 831 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 832 yb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 833 xb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 834
emilmont 1:fdd22bb7aa52 835 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 836 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 837 yc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 838 xc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 839
emilmont 1:fdd22bb7aa52 840 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 841 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 842 yd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 843 xd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 844
emilmont 1:fdd22bb7aa52 845
emilmont 1:fdd22bb7aa52 846 #endif
emilmont 1:fdd22bb7aa52 847
emilmont 1:fdd22bb7aa52 848 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 849 xa_out = xa + xb + xc + xd;
emilmont 1:fdd22bb7aa52 850
emilmont 1:fdd22bb7aa52 851 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 852 ya_out = ya + yb + yc + yd;
emilmont 1:fdd22bb7aa52 853
emilmont 1:fdd22bb7aa52 854 /* pointer updation for writing */
emilmont 1:fdd22bb7aa52 855 ptr1 = ptr1 - 8u;
emilmont 1:fdd22bb7aa52 856
emilmont 1:fdd22bb7aa52 857 /* writing xa' and ya' */
emilmont 1:fdd22bb7aa52 858 *ptr1++ = xa_out;
emilmont 1:fdd22bb7aa52 859 *ptr1++ = ya_out;
emilmont 1:fdd22bb7aa52 860
emilmont 1:fdd22bb7aa52 861 xc_out = (xa - xb + xc - xd);
emilmont 1:fdd22bb7aa52 862 yc_out = (ya - yb + yc - yd);
emilmont 1:fdd22bb7aa52 863
emilmont 1:fdd22bb7aa52 864 /* writing xc' and yc' */
emilmont 1:fdd22bb7aa52 865 *ptr1++ = xc_out;
emilmont 1:fdd22bb7aa52 866 *ptr1++ = yc_out;
emilmont 1:fdd22bb7aa52 867
emilmont 1:fdd22bb7aa52 868 xb_out = (xa - yb - xc + yd);
emilmont 1:fdd22bb7aa52 869 yb_out = (ya + xb - yc - xd);
emilmont 1:fdd22bb7aa52 870
emilmont 1:fdd22bb7aa52 871 /* writing xb' and yb' */
emilmont 1:fdd22bb7aa52 872 *ptr1++ = xb_out;
emilmont 1:fdd22bb7aa52 873 *ptr1++ = yb_out;
emilmont 1:fdd22bb7aa52 874
emilmont 1:fdd22bb7aa52 875 xd_out = (xa + yb - xc - yd);
emilmont 1:fdd22bb7aa52 876 yd_out = (ya - xb - yc + xd);
emilmont 1:fdd22bb7aa52 877
emilmont 1:fdd22bb7aa52 878 /* writing xd' and yd' */
emilmont 1:fdd22bb7aa52 879 *ptr1++ = xd_out;
emilmont 1:fdd22bb7aa52 880 *ptr1++ = yd_out;
emilmont 1:fdd22bb7aa52 881
emilmont 1:fdd22bb7aa52 882
emilmont 1:fdd22bb7aa52 883 } while(--j);
emilmont 1:fdd22bb7aa52 884
emilmont 1:fdd22bb7aa52 885 /* output is in 11.21(q21) format for the 1024 point */
emilmont 1:fdd22bb7aa52 886 /* output is in 9.23(q23) format for the 256 point */
emilmont 1:fdd22bb7aa52 887 /* output is in 7.25(q25) format for the 64 point */
emilmont 1:fdd22bb7aa52 888 /* output is in 5.27(q27) format for the 16 point */
emilmont 1:fdd22bb7aa52 889
emilmont 1:fdd22bb7aa52 890 /* End of last stage process */
emilmont 1:fdd22bb7aa52 891 }